In [1]:
import torch as t
from torch import nn
import numpy as np
import pandas as pd

from utils import Pipeline_lstm
from torch.utils.tensorboard import SummaryWriter

import warnings
warnings.simplefilter('ignore')
DEVICE = 'cuda' if t.cuda.is_available() else 'cpu'

In [2]:
df = pd.read_csv('./train.csv', index_col=0)
df_test = pd.read_csv('./test.csv', index_col=0)

df_coin = df.loc[df['coin_id'] == 'coin_1']

In [3]:
features = ['feat_1', 'feat_2', 'feat_3', 'feat_4', 'feat_5', 'feat_8', 'feat_9', 'feat_10']
lookback=30

train_split = 0.8
split_ind = int(df_coin.shape[0] * train_split)

X_train = df_coin[features].iloc[:split_ind]
X_test = df_coin[features].iloc[split_ind:]

y_train = df_coin['fwd_ret_3d'].iloc[:split_ind]
y_test = df_coin['fwd_ret_3d'].iloc[split_ind:]

pipe = Pipeline_lstm(lookback=lookback)
train_ds = pipe.fit_transform(X_train, y_train)
val_ds = pipe.transform(X_test, y_test)

In [4]:
class ConvModel(nn.Module):
    def __init__(self, channels=5):
        super().__init__()
        self.conv = t.nn.Sequential(
            t.nn.Conv1d(channels, 16, 3, padding=1),
            t.nn.BatchNorm1d(16),
            t.nn.ReLU(),

            # t.nn.Conv1d(16, 32, 3, padding=1),
            # t.nn.BatchNorm1d(32),
            # t.nn.ReLU(),

            t.nn.Flatten(),
            t.nn.Linear(128, 1),
        )
        self.conv[0].bias.data.fill_(0.1)
        self.conv[0].weight.data.fill_(0.1)
        self.conv[-1].bias.data.fill_(0.1)
        self.conv[-1].weight.data.fill_(0.1)

    def forward(self, x):
        return self.conv(x)

In [5]:
from train_tools import TrainModel_NoLoader, pearson_c

model_conv = ConvModel(lookback).to(device=DEVICE)

lr = 5e-5
optimizer = t.optim.Adam(model_conv.parameters(),
                         lr=lr)
writer = SummaryWriter('./runs/1dConv/')
epochs=1800

_ = TrainModel_NoLoader(model=model_conv,
                        loss_fn=pearson_c,
                        optimizer=optimizer,
                        train_ds=train_ds,
                        val_ds=val_ds,
                        epochs=epochs,
                        display_on_epoch = 1,
                        device=DEVICE,
                        writer=writer)

2023-01-17 19:23:58.724692: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Train Loss: 0.86; Val Loss: 0.92: 100%|██████████| 1800/1800 [00:05<00:00, 337.09it/s]


In [6]:
from SeparatePredictor import SeparatePredictor

class Conv1dPredictor(SeparatePredictor):
    def prepare(self):
        self.devided_data, self.y_devided, self.class_labels = self.separate_data(self.raw_data, self.y)
        self.models = dict(zip(self.class_labels, self.models))
        self.set_up_pipeline(
            pipeline_f=Pipeline_lstm,
            pipe_params={'lookback':self.lookback},
            train_f=TrainModel_NoLoader
        )
        self.train_test_split()
        self.fit_preprocess()

        self.train_ds = self.prepare_data(self.X_train, self.y_train)
        self.test_ds = self.prepare_data(self.X_test, self.y_test)

    def train(self, device='cpu', epoch_shedule=None, epochs=100, **kwargs):
        for cls in self.class_labels:
            print(f"TRAINING f{cls}")
            writer = SummaryWriter('./runs/1dConv_multi/{}'.format(cls))
            if epoch_shedule is not None:
                epochs = epoch_shedule[cls]
            self.train_class(self.models[cls].to(device=device),
                             self.train_ds[cls],
                             self.test_ds[cls],
                             device=device,
                             writer=writer,
                             epochs=epochs,
                             **kwargs)



In [7]:
lookback = 30
epochs=2000
features = ['feat_1', 'feat_2', 'feat_3', 'feat_4', 'feat_5', 'feat_8', 'feat_9', 'feat_10']

predictor = Conv1dPredictor(models=[ConvModel],
                            data=df,
                            params={'channels': lookback},
                            lookback=lookback,
                            device=DEVICE,
                            relevant_features=features)
predictor.prepare()

In [8]:
import json

with open('./epoch_shedule.json', 'r') as f:
    epoch_shedule = json.load(f)

In [9]:
train_params={
    'display_on_epoch':1
}
predictor.train(device=DEVICE,
                epochs=epochs,
                loss_fn=pearson_c,
                learning_rate=lr,
                epoch_shedule=epoch_shedule,
                other_train_params=train_params)

TRAINING fcoin_1


Train Loss: 0.86; Val Loss: 0.92: 100%|██████████| 1800/1800 [00:03<00:00, 498.33it/s]


TRAINING fcoin_2


Train Loss: 0.99; Val Loss: 1.00: 100%|██████████| 500/500 [00:01<00:00, 446.15it/s]


TRAINING fcoin_3


Train Loss: 0.99; Val Loss: 0.95: 100%|██████████| 370/370 [00:00<00:00, 510.49it/s]


TRAINING fcoin_4


Train Loss: 0.90; Val Loss: 0.95: 100%|██████████| 1690/1690 [00:03<00:00, 516.74it/s]


TRAINING fcoin_5


Train Loss: 0.98; Val Loss: 1.00: 100%|██████████| 370/370 [00:00<00:00, 431.33it/s]


TRAINING fcoin_6


Train Loss: 0.98; Val Loss: 0.96: 100%|██████████| 690/690 [00:01<00:00, 460.26it/s]


TRAINING fcoin_7


Train Loss: 0.81; Val Loss: 0.99: 100%|██████████| 2500/2500 [00:05<00:00, 496.59it/s]


TRAINING fcoin_8


Train Loss: 0.85; Val Loss: 0.99: 100%|██████████| 1500/1500 [00:02<00:00, 524.74it/s]


TRAINING fcoin_9


Train Loss: 0.99; Val Loss: 0.95: 100%|██████████| 980/980 [00:01<00:00, 514.83it/s]


TRAINING fcoin_10


Train Loss: 0.94; Val Loss: 0.95: 100%|██████████| 1300/1300 [00:03<00:00, 407.08it/s]


TRAINING fcoin_11


Train Loss: 0.74; Val Loss: 1.00: 100%|██████████| 2500/2500 [00:05<00:00, 453.44it/s]


TRAINING fcoin_12


Train Loss: 0.91; Val Loss: 0.97: 100%|██████████| 1400/1400 [00:02<00:00, 530.68it/s]


TRAINING fcoin_13


Train Loss: 0.95; Val Loss: 1.00: 100%|██████████| 1020/1020 [00:02<00:00, 494.26it/s]


TRAINING fcoin_14


Train Loss: 0.94; Val Loss: 0.97: 100%|██████████| 1080/1080 [00:02<00:00, 505.07it/s]


TRAINING fcoin_15


Train Loss: 0.87; Val Loss: 0.97: 100%|██████████| 2100/2100 [00:04<00:00, 486.96it/s]


TRAINING fcoin_16


Train Loss: 0.88; Val Loss: 0.97: 100%|██████████| 2100/2100 [00:04<00:00, 515.30it/s]


TRAINING fcoin_17


Train Loss: 0.88; Val Loss: 0.94: 100%|██████████| 2200/2200 [00:04<00:00, 527.80it/s]


TRAINING fcoin_18


Train Loss: 0.96; Val Loss: 0.95: 100%|██████████| 650/650 [00:01<00:00, 499.16it/s]


TRAINING fcoin_19


Train Loss: 0.94; Val Loss: 0.99: 100%|██████████| 1600/1600 [00:03<00:00, 492.06it/s]


TRAINING fcoin_20


Train Loss: 0.73; Val Loss: 0.95: 100%|██████████| 2200/2200 [00:04<00:00, 504.48it/s]


TRAINING fcoin_21


Train Loss: 0.85; Val Loss: 0.83: 100%|██████████| 1900/1900 [00:03<00:00, 531.42it/s]


TRAINING fcoin_22


Train Loss: 0.91; Val Loss: 0.98: 100%|██████████| 1150/1150 [00:02<00:00, 481.10it/s]


TRAINING fcoin_23


Train Loss: 0.90; Val Loss: 0.99: 100%|██████████| 1500/1500 [00:02<00:00, 518.46it/s]


TRAINING fcoin_24


Train Loss: 0.99; Val Loss: 0.99: 100%|██████████| 500/500 [00:01<00:00, 499.50it/s]


TRAINING fcoin_25


Train Loss: 0.83; Val Loss: 0.97: 100%|██████████| 2300/2300 [00:04<00:00, 505.25it/s]


TRAINING fcoin_26


Train Loss: 0.98; Val Loss: 0.99: 100%|██████████| 500/500 [00:00<00:00, 507.19it/s]


TRAINING fcoin_27


Train Loss: 0.96; Val Loss: 0.88: 100%|██████████| 1500/1500 [00:02<00:00, 505.93it/s]


TRAINING fcoin_28


Train Loss: 0.99; Val Loss: 0.99: 100%|██████████| 500/500 [00:00<00:00, 528.18it/s]


TRAINING fcoin_29


Train Loss: 0.88; Val Loss: 0.97: 100%|██████████| 2200/2200 [00:04<00:00, 511.55it/s]


TRAINING fcoin_30


Train Loss: 0.84; Val Loss: 1.00: 100%|██████████| 2200/2200 [00:04<00:00, 490.60it/s]


TRAINING fcoin_31


Train Loss: 0.95; Val Loss: 0.83: 100%|██████████| 1700/1700 [00:03<00:00, 519.46it/s]


TRAINING fcoin_32


Train Loss: 0.92; Val Loss: 0.92: 100%|██████████| 1500/1500 [00:03<00:00, 487.27it/s]


TRAINING fcoin_33


Train Loss: 0.99; Val Loss: 1.00: 100%|██████████| 500/500 [00:00<00:00, 524.92it/s]


TRAINING fcoin_34


Train Loss: 0.80; Val Loss: 0.97: 100%|██████████| 2400/2400 [00:04<00:00, 494.06it/s]


TRAINING fcoin_35


Train Loss: 0.98; Val Loss: 0.95: 100%|██████████| 600/600 [00:01<00:00, 526.01it/s]


TRAINING fcoin_36


Train Loss: 0.91; Val Loss: 0.98: 100%|██████████| 1500/1500 [00:02<00:00, 516.55it/s]


TRAINING fcoin_37


Train Loss: 0.90; Val Loss: 0.93: 100%|██████████| 2300/2300 [00:04<00:00, 494.84it/s]


TRAINING fcoin_38


Train Loss: 0.88; Val Loss: 0.89: 100%|██████████| 1250/1250 [00:02<00:00, 482.27it/s]


TRAINING fcoin_39


Train Loss: 0.96; Val Loss: 0.98: 100%|██████████| 1600/1600 [00:03<00:00, 506.02it/s]


TRAINING fcoin_40


Train Loss: 0.99; Val Loss: 0.99: 100%|██████████| 500/500 [00:00<00:00, 518.92it/s]


TRAINING fcoin_41


Train Loss: 0.98; Val Loss: 0.97: 100%|██████████| 800/800 [00:01<00:00, 464.88it/s]


TRAINING fcoin_42


Train Loss: 0.95; Val Loss: 0.90: 100%|██████████| 1200/1200 [00:02<00:00, 531.58it/s]


TRAINING fcoin_43


Train Loss: 0.83; Val Loss: 0.98: 100%|██████████| 2300/2300 [00:04<00:00, 501.89it/s]


TRAINING fcoin_44


Train Loss: 0.83; Val Loss: 0.99: 100%|██████████| 1500/1500 [00:02<00:00, 531.19it/s]


TRAINING fcoin_45


Train Loss: 0.80; Val Loss: 0.97: 100%|██████████| 2200/2200 [00:04<00:00, 499.43it/s]


TRAINING fcoin_46


Train Loss: 0.88; Val Loss: 0.92: 100%|██████████| 1700/1700 [00:03<00:00, 483.17it/s]


TRAINING fcoin_47


Train Loss: 0.81; Val Loss: 0.92: 100%|██████████| 2500/2500 [00:04<00:00, 511.90it/s]


TRAINING fcoin_48


Train Loss: 0.95; Val Loss: 0.96: 100%|██████████| 500/500 [00:01<00:00, 455.20it/s]


TRAINING fcoin_49


Train Loss: 0.96; Val Loss: 0.99: 100%|██████████| 500/500 [00:00<00:00, 518.88it/s]


TRAINING fcoin_50


Train Loss: 0.97; Val Loss: 1.00: 100%|██████████| 500/500 [00:00<00:00, 520.12it/s]


TRAINING fcoin_51


Train Loss: 0.83; Val Loss: 0.98: 100%|██████████| 2100/2100 [00:04<00:00, 488.32it/s]


TRAINING fcoin_52


Train Loss: 0.99; Val Loss: 0.96: 100%|██████████| 915/915 [00:01<00:00, 508.91it/s]


TRAINING fcoin_53


Train Loss: 0.83; Val Loss: 0.94: 100%|██████████| 2300/2300 [00:04<00:00, 504.56it/s]


TRAINING fcoin_54


Train Loss: 0.96; Val Loss: 0.99: 100%|██████████| 500/500 [00:00<00:00, 502.10it/s]


TRAINING fcoin_55


Train Loss: 0.98; Val Loss: 0.98: 100%|██████████| 780/780 [00:01<00:00, 508.74it/s]


TRAINING fcoin_56


Train Loss: 0.98; Val Loss: 0.98: 100%|██████████| 500/500 [00:01<00:00, 489.87it/s]


TRAINING fcoin_57


Train Loss: 0.89; Val Loss: 0.98: 100%|██████████| 1380/1380 [00:02<00:00, 494.69it/s]


TRAINING fcoin_58


Train Loss: 0.90; Val Loss: 1.00: 100%|██████████| 2400/2400 [00:04<00:00, 501.77it/s]


TRAINING fcoin_59


Train Loss: 0.89; Val Loss: 1.00: 100%|██████████| 2300/2300 [00:04<00:00, 499.92it/s]


TRAINING fcoin_60


Train Loss: 0.99; Val Loss: 0.98: 100%|██████████| 800/800 [00:01<00:00, 515.06it/s]


TRAINING fcoin_61


Train Loss: 1.00; Val Loss: 0.88: 100%|██████████| 200/200 [00:00<00:00, 517.44it/s]


TRAINING fcoin_62


Train Loss: 0.88; Val Loss: 0.97: 100%|██████████| 2100/2100 [00:04<00:00, 494.30it/s]


TRAINING fcoin_63


Train Loss: 0.80; Val Loss: 0.97: 100%|██████████| 2000/2000 [00:03<00:00, 514.15it/s]


TRAINING fcoin_64


Train Loss: 0.82; Val Loss: 0.91: 100%|██████████| 2500/2500 [00:04<00:00, 506.14it/s]


TRAINING fcoin_65


Train Loss: 0.99; Val Loss: 0.93: 100%|██████████| 800/800 [00:01<00:00, 515.20it/s]


In [10]:
ind = 28800
y_preds = predictor.predict(df.iloc[ind:], device='cuda')
t.corrcoef(t.concat([
    t.tensor(df.iloc[ind:]['fwd_ret_3d'].values).reshape(-1,1).T,
    t.tensor(y_preds.values).reshape(-1,1).T
], dim=0)
)


tensor([[1.0000, 0.0395],
        [0.0395, 1.0000]], dtype=torch.float64)

In [18]:
from datetime import datetime
y_pred = predictor.predict(df_test, DEVICE)
y_pred['Id'] = y_pred.index
y_pred['Predicted'] = y_pred[0]
y_pred.to_csv('./submission_{}.csv'.format(datetime.now().timestamp()),
              columns=['Id', 'Predicted'], index=False)

In [15]:
import datetime
datetime.datetime.now().timestamp()

1673969402.174431