In [None]:
import pickle
from pathlib import Path
from easydict import EasyDict
import pandas as pd
from matplotlib import pyplot as plt
from ml import train
from dataloading import get_data, build_features

%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore')


pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

cfgs, btests = [], []
for p in sorted(Path("optimization").glob("*.pickle")):
    cfg, btest = pickle.load(open(p, "rb"))
    cfgs.append(cfg)
    btests.append(btest)
    print(p)

In [None]:
from backtest import DataParser, MovingWindow
import numpy as np


fsize = 64
tfdict = {"M5":0, "M15":1, "H1":2}
X, y, poslist = [], [], []
for btest in btests:
    print(btest.cfg.ticker, end=" ")
    hist_pd, hist = DataParser(btest.cfg).load()
    mw = MovingWindow(hist, fsize+2)
    print(len(btest.positions))
    for pos in btest.positions[4:]:
        f, _ = mw(pos.open_indx)
        x = build_features(f, 
                           pos.dir, 
                           btest.cfg.stops_processor.func.cfg.sl, 
                           btest.cfg.trailing_stop_rate,
                           pos.open_date, 
                           tfdict[btest.cfg.period])
        X.append([x])
        y.append(pos.profit)
        poslist.append(pos)
        
X, y = np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
print(X.shape, y.shape)
print(f"{X[0, 0, -2, 0]:8.0f} -> {X[-1, 0, -2, 0]:8.0f}")

In [None]:
from sklearn.metrics import f1_score
import torch
device = "mps"

nitrers = 300
test_split_size = 0.2
calc_test = False
id2tf = {v:k for k, v in tfdict.items()}

pprofits, gprofits, metrics = np.zeros((3, nitrers)), np.zeros((3, nitrers)), np.zeros((2, nitrers))
for i in range(nitrers):
    np.random.seed(i)
    X_train, X_test, y_train, y_test, profs_train, profs_test, tf_test = get_data(X, y, test_split_size)
    model = train(X_train, y_train, X_test, y_test, batch_size=512, device=device, calc_test=calc_test)
    model.eval()
    p_train = model(torch.tensor(X_train).float().to(device)).detach().cpu().numpy().squeeze()[:, 0]
    p_test = model(torch.tensor(X_test).float().to(device)).detach().cpu().numpy().squeeze()[:, 0]
    threshold = np.percentile(p_train, 20)
    # profsum_best, threshold = -999999, None
    # for th in np.arange(0.1, 1, 0.025):
    #     # profsum = (profs_train*(p_train>th)).sum()
    #     profsum = f1_score(p_train>th, y_train[:, 0])
    #     if profsum > profsum_best:
    #         profsum_best = profsum
    #         threshold = th
    metrics[0, i] = f1_score(p_train>threshold, y_train[:, 0])
    print(f"{i:03} f1_train: {metrics[0, i]:5.3f}", end=" ")
    if test_split_size > 0:
        metrics[1, i] = f1_score(p_test>threshold, y_test[:, 0])
        print(f"f1_test: {metrics[1, i]:5.3f}")
        
        for j in range(3):
            ids = tf_test == j
            pprofits[j, i] = (profs_test[ids]*(p_test[ids]>threshold)).sum()
            gprofits[j, i] = profs_test[ids].sum()
            print(f"{i:03} profit {id2tf[j]:3}:{gprofits[j, i]:+7.1f} -> {pprofits[j, i]:+7.1f} {'OK' if pprofits[j, i] > gprofits[j, i] else '--'}")
        
        pprofs_mean = pprofits.mean(axis=1, where=pprofits!=0)
        gprofs_mean = gprofits.mean(axis=1, where=gprofits!=0)
        f1_mean = metrics.mean(axis=1, where=metrics!=0)
        pprofs_mean_tot = np.nanmean(pprofs_mean)
        gprofs_mean_tot = np.nanmean(gprofs_mean)
        print(f"f1_train: {f1_mean[0]:4.2f} f1_test: {f1_mean[1]:4.2f} ratio: {f1_mean[1]/f1_mean[0]:4.2f}")
        print(gprofs_mean, " -> ", pprofs_mean)
        print("-------------------------------------------------")
        print(f"av. profit boost: {(pprofs_mean_tot - gprofs_mean_tot)/abs(gprofs_mean_tot)}")
        metrics_mean, metrics_std = metrics.mean(1, where=metrics!=0), metrics.std(1, where=metrics!=0)
        print(f"metrics train/val means: {metrics_mean}, std: {metrics_std}")
        print(f"overfitting mean: {metrics_mean[0]/metrics_mean[1]}, std: {metrics_std[0]/metrics_std[1]}\n")

    else:
        print()
plt.figure(figsize=(20, 3))
plt.subplot(1, 2, 1)
plt.plot(p_train[:100], ".")
plt.bar(np.arange(100), y_train[:100, 0], width=[1]*100, alpha=0.4)
plt.plot([0, 100], [threshold, threshold])
if len(p_test):
    plt.subplot(1, 2, 2)
    plt.plot(p_test[:100], ".")
    plt.bar(np.arange(100), y_test[:100, 0], width=[1]*100, alpha=0.4)
    # plt.bar(np.arange(100), profs_test[:100], width=[1]*100, alpha=0.2)
    plt.plot([0, 100], [threshold, threshold])
    
model.set_threshold(threshold)
torch.save(model.state_dict(), "model.pth")

In [None]:
prof_boost = pprofits[2, :i]/gprofits[2, :i]
print(np.mean(prof_boost), np.median(prof_boost), np.std(prof_boost))
plt.plot(pprofits[2]/gprofits[2])

In [None]:
import torch
from ml import Net
device = "cuda"
model = Net(7, 32)
model.load_state_dict(torch.load("model.pth"))
model.eval()
# model.set_threshold(-6)
model.to(device)
X_train, X_test, y_train, y_test, profs_train, profs_test, tf_test = get_data(X, y, test_split=1)
p_test = model(torch.tensor(X_test).float().to(device)).squeeze()
# profs_test.sum(), (profs_test*p_test).sum()
p_test

In [None]:
list(model.named_parameters())[0]

In [None]:
plt.plot(model(torch.tensor(X_test).float().to(device)).squeeze().detach().cpu().numpy())

In [None]:
threshold

In [None]:
y_test.sum(), (p_test>threshold).sum(), p_test.shape[0], y_test.shape[0]

In [None]:
import mplfinance as mpf

ticker = "BTCUSDT"
tf = "H1"
hist_pd, hist = DataParser(
    EasyDict(
        date_start="2008-01-01",
        period=tf,
        ticker=ticker,
        data_type="bitfinex"
        )).load()

for i in ids_test:
    pos = poslist[i]
    if pos.ticker == ticker:
        prediction = model.predict_proba([X[i, :-1]])[0][1]
        if prediction < threshold:
            print(pos.ticker, pos.open_date, prediction)
            d2 = pd.to_datetime(pos.close_date)
            d1 = pd.to_datetime(pos.open_date)
            d0 = d1 - pd.DateOffset(days=3)
            hist2plot = hist_pd.loc[d0:d2]
            fig = mpf.plot(hist2plot, 
                type='candle', 
                block=False)

In [None]:
prediction