In [None]:
import pickle
from pathlib import Path
from easydict import EasyDict
import pandas as pd
from matplotlib import pyplot as plt
from ml import train
from dataloading import get_data, build_features, DataParser, MovingWindow
from tabulate import tabulate
from IPython.display import clear_output
import numpy as np
from sklearn.metrics import roc_auc_score, f1_score

%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore')


pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [None]:
def smooth_bin_labels(y, dp=0.3):
    y = y.astype(np.float32)
    d = np.random.uniform(0, dp, sum(y[:, 0]==0))
    y[y[:, 0]==0, 0] = d
    y[y[:, 1]==1, 1] = 1-d
    return y

def smooth_prob_labels(y, dp=0.1):
    y = y.astype(float)
    d = np.random.uniform(-dp, dp, sum(y[:, 0]==0))
    y += dp
    return y

def stohastic_prediction(input_tensor):
    pmeans, pstds, ymeans = np.zeros((3, input_tensor.shape[0]))
    for i, x in enumerate(input_tensor):
        p = model(torch.stack([x]*7)).detach().cpu().numpy()[:, 0]
        pmeans[i] = np.median(p)
        pstds[i] = p.std()
    pmeans = pmeans*(pstds<0.1)
    return pmeans, pstds

def calc_weights(predicts, th):
    p = (predicts > th).astype(np.float32)
    return p
    #return np.clip((predicts+th)**30, 0, 1)
    
def calc_weights2(predicts, th):
    p = (predicts>th).sum(axis=0)/predicts.shape[0] >= 0.2
    #p = np.median(predicts, axis=0) > th
    return p

In [None]:
from dataloading import collect_train_data
X, y = collect_train_data("./optimization/", 64)

In [None]:
import torch
device = "mps"

nitrers = 10
test_split_size = 0.2
calc_test = True

pprofits, gprofits, metrics, table = np.zeros((3, nitrers)), np.zeros((3, nitrers)), np.zeros((2, nitrers)), []
for i in range(1):#int(1/test_split_size)):
    if i >= 0:
        np.random.seed(i)
        X_train, X_test, y_train, y_test, profs_train, profs_test, tf_test, _ = get_data(X, y, test_split_size, 4, 5)
    X_train = torch.tensor(X_train).float().to(device)
    X_test = torch.tensor(X_test).float().to(device)
    model = train(X_train, y_train, X_test, y_test, batch_size=512, epochs=10, device=device, calc_test=calc_test)
    model.eval()
    p_test = model(X_test).detach().cpu().numpy().squeeze()[:, 0]
    p_train = model(X_train).detach().cpu().numpy().squeeze()[:, 0]    
    y_test = y_test>0
    y_train=y_train>0
    # p_test, _ = stohastic_prediction(X_test)
    # p_train, _ = stohastic_prediction(X_train) 
    
    p_test = np.expand_dims(p_test, 0)
    # for m in range(3):
    #     np.random.seed(m+100)
    #     train_ids = np.random.choice(np.arange(y_train.shape[0]), int(y_train.shape[0]*0.7))
    #     model = train(X_train[train_ids], y_train[train_ids], X_test, y_test, batch_size=512, device=device, calc_test=False)
    #     model.eval()
    #     p_test_ = model(X_test).detach().cpu().numpy().squeeze()[:, 0]
    #     p_test = np.vstack([p_test, p_test_])
    # p_test = np.median(p_test, 0).squeeze()
        
    roc_train = roc_auc_score(y_train, p_train.reshape(-1, 1))
    roc_test = roc_auc_score(y_test, p_test.reshape(-1, 1))
    profsum_best, threshold = -999999, np.percentile(p_train, 20)
    for th in np.arange(0., 0.9, 0.025):
        profsum = f1_score(y_train[:, 0], p_train>th)
        if profsum > profsum_best:
            profsum_best = profsum
            threshold = th

    w_profs_train = calc_weights(p_train, threshold)
    
    if test_split_size > 0:
        w_profs_test = calc_weights(p_test[0], threshold)
        for j in range(3):
            ids = tf_test == j
            pprofits[j, i] = (profs_test[ids]*w_profs_test[ids]).sum()
            gprofits[j, i] = profs_test[ids].sum()
        
        pprofs_sum = np.nansum(pprofits[:, :i+1], 0) 
        gprofs_sum = np.nansum(gprofits[:, :i+1], 0)
        profs_ratio = (pprofs_sum - gprofs_sum)/abs(gprofs_sum)*100
        curprof_ratio = (pprofs_sum[-1] - gprofs_sum[-1])/abs(gprofs_sum[-1])*100
        model_fails = np.nansum(profs_ratio < 0)/(i+1)
        prof_boost_mean = np.nanmean(profs_ratio)
        prof_boost_median = np.nanmedian(profs_ratio)
        prof_boost_std = np.nanstd(profs_ratio)
        if i > 0:
            clear_output(wait=True)
        table.append([i, roc_train, roc_test, pprofs_sum[-1], gprofs_sum[-1], curprof_ratio, 
                      prof_boost_mean, prof_boost_median, prof_boost_std, model_fails*100, profsum_best, threshold])
        print(tabulate(table, headers=["iter", "ROC train", "ROC test", "prof", "gprof", "pboost", "pboost mean", "pboost median", "pboost std", "model_fails, %", "f1 best", "threshold"]))
        print()
        
plt.figure(figsize=(20, 6))
plt.subplot(2, 2, 1)
plt.plot(p_train[:100], ".")
plt.bar(np.arange(100), y_train[:100, 0], width=[1]*100, alpha=0.4)
plt.plot([0, 100], [threshold, threshold])
if len(p_test):
    p_test4show = np.median(p_test, axis=0)
    plt.subplot(2, 2, 2)
    plt.plot(p_test4show[:100], ".")
    plt.bar(np.arange(100), y_test[:100, 0], width=[1]*100, alpha=0.4)
    # plt.bar(np.arange(100), profs_test[:100], width=[1]*100, alpha=0.2)
    plt.plot([0, 100], [threshold, threshold])
    plt.subplot(2, 2, 3)
    plt.plot(w_profs_train[:100])
    plt.subplot(2, 2, 4)
    plt.plot(w_profs_test[:100])
    # plt.plot(profs_test[:100]*w_profs_test[:100])
    # plt.plot(profs_test[:100], linewidth=3, alpha=0.5)  
    
    
model.set_threshold(threshold)
torch.save(model.state_dict(), "model.pth")

In [None]:
import sys
from experts import ExpertFormation, PyConfig
from backtest import backtest
from pathlib import Path
from dataloading import get_data, collect_train_data
import numpy as np
from loguru import logger
from tqdm import tqdm
import torch
from ml import train
import matplotlib.pyplot as plt
logger.remove()
logger.add(sys.stderr, level="INFO")



test_split_size = 0.2
device = "mps"
cfg = PyConfig().test()
cfg.run_model_device = device

for _ in range(5):
    legend, last_prof = [], 0
    for i in range(int(1/test_split_size)):
        X_train, X_test, y_train, y_test, profs_train, profs_test, tf_test, test_dates = get_data(X, y, test_split_size, i, i+1)
        X_train = torch.tensor(X_train).float().to(device)
        model = train(X_train, y_train, None, None, batch_size=512, epochs=5, device=device, calc_test=False)
        model.eval()
        X_train = X_train.float().to(device)
        p_train = model(X_train).detach().cpu().numpy().squeeze()[:, 0]    
        profsum_best, threshold = -999999, np.percentile(p_train, 10)
        for th in np.arange(0., 0.9, 0.025):
            profsum = f1_score(y_train[:, 0]>0, p_train>th)
            if profsum > profsum_best:
                profsum_best = profsum
                threshold = th
        model.set_threshold(threshold)
        torch.save(model.state_dict(), "model.pth")
        cfg.date_start=f"{test_dates[0][:4]}-{test_dates[0][4:6]}-{test_dates[0][6:]}"
        cfg.date_end=f"{test_dates[1][:4]}-{test_dates[1][4:6]}-{test_dates[1][6:]}"
        brok_results = backtest(cfg)
        cumsum = brok_results.profits.cumsum()
        print(brok_results.profits.sum(), threshold)
        plt.plot([pos.close_date for pos in brok_results.positions], cumsum + last_prof)
        last_prof += cumsum[-1]
        plt.grid("on")
        plt.tight_layout()
        legend.append(f"{test_dates[0]}-{test_dates[1]}")

cfg.run_model_device = None
cfg.date_start="2004-01-01"
cfg.date_end="2024-01-01"
brok_results = backtest(cfg)
print(brok_results.profits.sum())
plt.plot([pos.close_date for pos in brok_results.positions], brok_results.profits.cumsum(), linewidth=3, alpha=0.6)
legend.append("baseline")
# plt.legend(legend)
plt.savefig("backtest.png")
# plt.show()



In [None]:
import torch
from ml import Net
device = "cuda"
model = Net(7, 64)
model.load_state_dict(torch.load("model.pth"))
model.eval()
model.to(device)

In [None]:
p = model.forward_thresholded(X_test)[:, 0]
w_profs_test = calc_weights(p_test[0], threshold)
pprofs_test = (profs_test*w_profs_test).sum(0)
pprofs_test, profs_test.sum()

In [None]:
pprofs_sum1 = np.nansum(pprofits, 1)
gprofs_sum1 = np.nansum(gprofits, 1)
pprofs_sum1, gprofs_sum1, (pprofs_sum1-gprofs_sum1)/abs(gprofs_sum1)

In [None]:
plt.plot(np.array(table)[:, 3], ".-")

In [None]:
p_train.mean(), threshold

In [None]:
import torch
from ml import Net
device = "cuda"
model = Net(7, 32)
model.load_state_dict(torch.load("model.pth"))
model.eval()
# model.set_threshold(-6)
model.to(device)
X_train, X_test, y_train, y_test, profs_train, profs_test, tf_test = get_data(X, y, test_split=1)
p_test = model(torch.tensor(X_test).float().to(device)).squeeze()
# profs_test.sum(), (profs_test*p_test).sum()
p_test

In [None]:
X_train.shape

In [None]:
list(model.named_parameters())[0]

In [None]:
plt.plot(model(torch.tensor(X_test).float().to(device)).squeeze().detach().cpu().numpy())

In [None]:
threshold

In [None]:
y_test.sum(), (p_test>threshold).sum(), p_test.shape[0], y_test.shape[0]

In [None]:
import mplfinance as mpf

ticker = "BTCUSDT"
tf = "H1"
hist_pd, hist = DataParser(
    EasyDict(
        date_start="2008-01-01",
        period=tf,
        ticker=ticker,
        data_type="bitfinex"
        )).load()

for i in ids_test:
    pos = poslist[i]
    if pos.ticker == ticker:
        prediction = model.predict_proba([X[i, :-1]])[0][1]
        if prediction < threshold:
            print(pos.ticker, pos.open_date, prediction)
            d2 = pd.to_datetime(pos.close_date)
            d1 = pd.to_datetime(pos.open_date)
            d0 = d1 - pd.DateOffset(days=3)
            hist2plot = hist_pd.loc[d0:d2]
            fig = mpf.plot(hist2plot, 
                type='candle', 
                block=False)

In [None]:
prediction