In [None]:
import pickle
import torch
from pathlib import Path
from easydict import EasyDict
import pandas as pd
from matplotlib import pyplot as plt
from ml import train
from dataloading import get_data, build_features, DataParser, MovingWindow
from tabulate import tabulate
from IPython.display import clear_output
import numpy as np
from sklearn.metrics import roc_auc_score, f1_score
import mplfinance as mpf
from sklearn.metrics import classification_report


%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore')


pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [None]:
from utils import Position, Broker
from experts import PyConfig


cfg = PyConfig().test()
X, y = [], []
fsize=128

def load(folder):
    hist_pd, hist = DataParser(cfg).load()
    mw = MovingWindow(hist, fsize) 
    for fname in Path(folder).rglob("*.xlsx"):
        df = pd.read_excel(fname).iloc[0]
        dates = [str(d.date()) for d in hist_pd.Date]
        f, _ = mw(dates.index(df.Date))
        dir = 1# if "max" in str(fname) else -1
        x = build_features(f, dir , None, None, None, None)
        X.append([x])
        lab_id = [1, 0] if "true" in str(fname) else [0, 1]
        y.append(lab_id)

load("data/andrey_data/")
X, y = np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)
# rand_ids = np.random.shuffle(np.arange(y.shape[0]))
# np.random.seed(0)
# np.random.shuffle(X)
# np.random.shuffle(y)
print(X.shape, y.shape)

In [None]:
test_half_size = int(0.2*y.shape[0]/2)
ids = np.arange(y.shape[0])
true_ids = ids[y[:, 0] == 1]
false_ids = ids[y[:, 0] == 0]

test_ids = np.hstack([true_ids[:test_half_size], false_ids[:test_half_size]])
train_ids = np.hstack([true_ids[test_half_size:], false_ids[test_half_size:]])
X_train, X_test, y_train, y_test = X[train_ids], X[test_ids], y[train_ids], y[test_ids]
print(X_train.shape, X_test.shape)

In [None]:
plt.subplots(figsize=(15, 15))
np.random.shuffle(true_ids)
lab, nrow = 0, 0
for i in range(20):
    plt.subplot(10, 2, i+1)
    lab = 1 if lab == 0 else 0
    sample_id = true_ids[nrow] if lab == 1 else false_ids[nrow]
    plt.plot(X[sample_id, 0, 2])
    plt.plot(X[sample_id, 0, 3])
    plt.axis("off")
    plt.tight_layout()
    if i%2 == 0:
        nrow += 1
    # print(i, nrow, lab)

In [None]:
hist2plot = pd.DataFrame(X_train[0, 0, 0, :])
fig = mpf.plot(hist2plot, 
            type='candle', 
            block=False,
            # alines=dict(alines=lines2plot, colors=colors, linewidths=widths),
            # savefig=save_path / f"fig-{pos.open_date}.png"
            )

In [None]:
device = "mps"

calc_test = True

for i in range(1):
    X_train = torch.tensor(X_train).float().to(device)
    X_test = torch.tensor(X_test).float().to(device)
    model, loss_hist = train(X_train, y_train, X_test, y_test, batch_size=128, epochs=800, device=device, calc_test=calc_test)
    model.eval()
    p_test = model.predict(X_test)
    p_train = model.predict(X_train)
        
plt.figure(figsize=(20, 6))
plt.subplot(2, 2, 1)
plt.plot(p_train[:100], ".")
plt.bar(np.arange(p_train[:100].shape[0]), y_train[:100].argmax(1), width=[1]*p_train[:100].shape[0], alpha=0.4)
if len(p_test):
    plt.subplot(2, 2, 2)
    plt.plot(p_test[:100], ".")
    plt.bar(np.arange(p_test[:100].shape[0]), y_test[:100].argmax(1), width=[1]*p_test[:100].shape[0], alpha=0.4)
plt.figure(figsize=(20, 6))
plt.plot(loss_hist)
plt.legend(["train", "test"])
# torch.save(model.state_dict(), "model.pth")

In [None]:
print(classification_report(y_test.argmax(1), p_test, target_names=["correct", "wrong"]))

In [None]:
print(classification_report([1,1,1], [0,1,0], target_names=["correct", "wrong"]))

In [None]:
import sys
from experts import ExpertFormation, PyConfig
from backtest import backtest
from pathlib import Path
from dataloading import get_data, collect_train_data
import numpy as np
from loguru import logger
from tqdm import tqdm
import torch
from ml import train
import matplotlib.pyplot as plt
logger.remove()
logger.add(sys.stderr, level="INFO")



test_split_size = 0.2
device = "mps"
cfg = PyConfig().test()
cfg.run_model_device = device

for _ in range(5):
    legend, last_prof = [], 0
    for i in range(int(1/test_split_size)):
        X_train, X_test, y_train, y_test, profs_train, profs_test, tf_test, test_dates = get_data(X, y, 1, test_split_size, i, i+1)
        X_train = torch.tensor(X_train).float().to(device)
        model, hist = train(X_train, y_train, None, None, batch_size=1024, epochs=10, device=device, calc_test=False)
        model.eval()
        # X_train = X_train.float().to(device)
        # p_train = model(X_train).detach().cpu().numpy().squeeze()[:, 0]    
        # profsum_best, threshold = -999999, np.percentile(p_train, 10)
        # for th in np.arange(0., 0.9, 0.025):
        #     profsum = f1_score(y_train[:, 0], p_train>th)
        #     if profsum > profsum_best:
        #         profsum_best = profsum
        #         threshold = th
        # model.set_threshold(threshold)
        torch.save(model.state_dict(), "model.pth")
        cfg.date_start=f"{test_dates[0][:4]}-{test_dates[0][4:6]}-{test_dates[0][6:]}"
        cfg.date_end=f"{test_dates[1][:4]}-{test_dates[1][4:6]}-{test_dates[1][6:]}"
        brok_results = backtest(cfg)
        cumsum = brok_results.profits.cumsum()
        print(brok_results.profits.sum())
        plt.plot([pos.close_date for pos in brok_results.positions], cumsum + last_prof)
        last_prof += cumsum[-1]
        plt.grid("on")
        plt.tight_layout()
        legend.append(f"{test_dates[0]}-{test_dates[1]}")

cfg.run_model_device = None
cfg.date_start="2004-01-01"
cfg.date_end="2024-01-01"
brok_results = backtest(cfg)
print(brok_results.profits.sum())
plt.plot([pos.close_date for pos in brok_results.positions], brok_results.profits.cumsum(), linewidth=3, alpha=0.6)
legend.append("baseline")
# plt.legend(legend)
plt.savefig("backtest.png")
# plt.show()

In [None]:
import torch
from ml import Net
device = "cuda"
model = Net(7, 64)
model.load_state_dict(torch.load("model.pth"))
model.eval()
model.to(device)

In [None]:
p = model.forward_thresholded(X_test)[:, 0]
w_profs_test = calc_weights(p_test[0], threshold)
pprofs_test = (profs_test*w_profs_test).sum(0)
pprofs_test, profs_test.sum()

In [None]:
pprofs_sum1 = np.nansum(pprofits, 1)
gprofs_sum1 = np.nansum(gprofits, 1)
pprofs_sum1, gprofs_sum1, (pprofs_sum1-gprofs_sum1)/abs(gprofs_sum1)

In [None]:
plt.plot(np.array(table)[:, 3], ".-")

In [None]:
p_train.mean(), threshold

In [None]:
import torch
from ml import Net
device = "cuda"
model = Net(7, 32)
model.load_state_dict(torch.load("model.pth"))
model.eval()
# model.set_threshold(-6)
model.to(device)
X_train, X_test, y_train, y_test, profs_train, profs_test, tf_test = get_data(X, y, test_split=1)
p_test = model(torch.tensor(X_test).float().to(device)).squeeze()
# profs_test.sum(), (profs_test*p_test).sum()
p_test

In [None]:
X_train.shape

In [None]:
list(model.named_parameters())[0]

In [None]:
plt.plot(model(torch.tensor(X_test).float().to(device)).squeeze().detach().cpu().numpy())

In [None]:
threshold

In [None]:
y_test.sum(), (p_test>threshold).sum(), p_test.shape[0], y_test.shape[0]

In [None]:
import mplfinance as mpf

ticker = "BTCUSDT"
tf = "H1"
hist_pd, hist = DataParser(
    EasyDict(
        date_start="2008-01-01",
        period=tf,
        ticker=ticker,
        data_type="bitfinex"
        )).load()

for i in ids_test:
    pos = poslist[i]
    if pos.ticker == ticker:
        prediction = model.predict_proba([X[i, :-1]])[0][1]
        if prediction < threshold:
            print(pos.ticker, pos.open_date, prediction)
            d2 = pd.to_datetime(pos.close_date)
            d1 = pd.to_datetime(pos.open_date)
            d0 = d1 - pd.DateOffset(days=3)
            hist2plot = hist_pd.loc[d0:d2]
            fig = mpf.plot(hist2plot, 
                type='candle', 
                block=False)

In [None]:
prediction