In [6]:
import pytorch_lightning as pl

from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint

from src.TimeSeriesLearningUtils import *
from src.LSTMModel import *

In [7]:
def name_model(config):
    name =[]
    if len(config["currency_list"])  > 1:
        name.append("multi_task_" + "_".join(config["currency_list"]))
    else:
        name.append(config["currency_list"][0])
        
    if config["indicators"] or config["imfs"] or config ["ohlv"]:
        name.append("multi_variate")
    
    lstm = "stack_lstm" if config["n_lstm_layers"] > 1 else "lstm"
    name.append(lstm)
    
    name.append(config["pred_frequency"])
    classification = "multi_clf" if config["num_classes"] > 2 else "binary_clf"
    name.append(classification)
    
    return "_".join(name)

In [12]:
config = {"window_size": 50, 
          "dataset_percentages": [0.96, 0.02, 0.02],
          "data_frequency": "6h", 
          "pred_frequency": "6h",
          "ma_period": 10, 
          "log_price": True,
          "neutral_quantile": 0.33,
          "batch_size": 64,
          "bidirectional": True, 
          "num_classes": 2,
          "currency_list": ['BTC', 'ETH', 'LTC'],#['LTC'],# 
          "dropout_after_each_lstm_layer": 0.5,
          "dropout_before_output_layer": 0.5,
          "remove_trend": True,
          "lstm_hidden_size": 128,
          "n_lstm_layers": 3,
          "calculate_loss_weights": True, 
          "last_layer_fsz": 128,
          "warmup_epoch": 10,
          "learning_rate": 1e-3,
          "weight_decay": 1e-2,
          "indicators": True, 
          "imfs": False,
          "ohlv": False}

In [13]:
MODEL_NAME = name_model(config)
name_model(config)

'multi_task_BTC_ETH_LTC_multi_variate_stack_lstm_6h_binary_clf'

In [14]:
X, y, features, dfs = get_data(**config)

  dip[i] = 100 * (self._dip[i] / self._trs[i])
  din[i] = 100 * (self._din[i] / self._trs[i])


In [15]:
X.shape

(3, 5350, 84)

In [17]:
X[0]

array([[ 3.03468741e+04,  2.92840762e+04, -1.96873182e-02, ...,
        -1.93443631e+00,  2.67114606e+02, -1.93443631e-02],
       [ 2.87209002e+04,  2.59601113e+04, -1.32567221e-02, ...,
        -2.83072132e+00,  2.56868320e+02, -2.83072132e-02],
       [ 3.10818191e+04,  2.95377875e+04, -2.21909886e-02, ...,
         4.02990270e+00,  2.71543478e+02,  4.02990270e-02],
       ...,
       [ 5.38031281e+06,  1.60726087e+06, -5.44086359e-02, ...,
        -1.42354554e+00,  9.22403901e+02, -1.42354554e-02],
       [ 5.38521346e+06,  1.61593551e+06, -7.80766603e-02, ...,
         6.30375228e-01,  9.28869238e+02,  6.30375228e-03],
       [ 5.38898243e+06,  1.62071936e+06, -1.12914380e-01, ...,
         1.00455474e+00,  9.39256880e+02,  1.00455474e-02]])

In [18]:
train_dataset, val_dataset, test_dataset = [TimeSeriesDataset(X,
                                                              y,
                                                              dtype, 
                                                              **config) for dtype in ['train', 'val', 'test']]

config["dataset_sizes"] = [len(train_dataset), len(val_dataset), len(test_dataset)]
config["dataset_sizes"]

[5086, 107, 107]

In [21]:
train_dataset[0]

{'BTC_window': tensor([[-9.6904e-01, -7.2636e-01, -6.6448e-01,  ..., -9.3108e-01,
           1.4858e-01, -9.3108e-01],
         [-9.7011e-01, -7.3054e-01, -6.1313e-01,  ..., -1.3592e+00,
           1.1644e-01, -1.3592e+00],
         [-9.6855e-01, -7.2604e-01, -6.8448e-01,  ...,  1.9179e+00,
           1.6247e-01,  1.9179e+00],
         ...,
         [-9.4988e-01, -7.4503e-01,  4.1288e-01,  ..., -4.8375e-01,
          -3.1387e-02, -4.8375e-01],
         [-9.4836e-01, -7.3860e-01,  3.8059e-01,  ...,  1.5194e+00,
           1.5797e-04,  1.5194e+00],
         [-9.4891e-01, -7.4413e-01,  3.0799e-01,  ..., -8.4158e-01,
          -1.7212e-02, -8.4158e-01]]),
 'BTC_label': tensor(1),
 'ETH_window': tensor([[-0.7030, -0.4120,  0.7236,  ...,  1.6966,  0.0949,  1.6966],
         [-0.7034, -0.4103,  0.1582,  ...,  1.2730,  0.1261,  1.2730],
         [-0.7029, -0.4093,  0.5725,  ...,  2.1572,  0.1812,  2.1572],
         ...,
         [-0.6940, -0.4169,  0.8242,  ...,  1.9771,  0.1945,  1.9771],
   

In [37]:
torch.cat((train_dataset[0]['BTC_window'], train_dataset[0]['BTC_window'] ), axis=1).shape

torch.Size([50, 168])

In [35]:
class LSTM_based_classification_model(pl.LightningModule):
    def __init__(self,
                 train_dataset,
                 val_dataset,
                 test_dataset,
                 calculate_loss_weights,
                 currency_list,
                 num_classes,
                 window_size,
                 batch_size,
                 lstm_hidden_size,
                 n_lstm_layers,
                 bidirectional,
                 last_layer_fsz,
                 dropout_after_each_lstm_layer,
                 dropout_before_output_layer,
                 input_concat,
                 warmup_epoch = 5,
                 learning_rate = 1e-3,
                 weight_decay = 1e-2,
                 **kwargs
                #  scheduler_step = 10,
                #  scheduler_gamma = 0.1,
                 ):
        
        super().__init__()
        self.num_classes = num_classes
        self.currency_list = currency_list
        self.num_tasks = len(currency_list)
        self.window_size = window_size
        self.input_concat = input_concat ###########
        self.input_size = train_dataset.x.shape[-1] * self.num_tasks if self.input_concat else train_dataset.x.shape[-1]
        self.batch_size = batch_size
        self.n_lstm_layers = n_lstm_layers
        self.lstm_hidden_sizes = [lstm_hidden_size] * self.n_lstm_layers
        self.bidirectional = bidirectional 
        self.loss_weightening = calculate_loss_weights
        self.dropout_after_each_lstm_layer = dropout_after_each_lstm_layer
        self.dropout_before_output_layer = dropout_before_output_layer
        self.last_layer_fsz = last_layer_fsz
        self.learning_rate = learning_rate
        self.warmup_epoch = warmup_epoch
        self.weight_decay = weight_decay
        
        if calculate_loss_weights:
            loss_weights = []
            for i in range(self.num_tasks):
                train_labels = [int(train_dataset[n][self.currency_list[i] +"_label"] )for n in range(train_dataset.__len__())]
                samples_size = pd.DataFrame({"label": train_labels}).groupby("label").size().to_numpy()
                loss_weights.append((1 / samples_size) * sum(samples_size)/2)
            self.weights = loss_weights
        else:
            self.weights = None
        
        # self.lstm_1 = nn.LSTM(input_size = self.input_size, 
        #                       num_layers=1, 
        #                       batch_first=True, 
        #                       hidden_size = self.lstm_hidden_sizes[0], 
        #                       bidirectional = bidirectional)
        # self.batch_norm1 = nn.BatchNorm2d(num_features=self.lstm_hidden_sizes[0]*2 if bidirectional else self.lstm_hidden_sizes[0])
        
        # if len(self.lstm_hidden_sizes) > 1:
        #     self.lstm_2 = nn.LSTM(input_size = self.lstm_hidden_sizes[0] *2 if bidirectional else self.lstm_hidden_sizes[0], 
        #                           num_layers=1, 
        #                           batch_first=True, 
        #                           hidden_size = self.lstm_hidden_sizes[1], 
        #                           bidirectional = bidirectional)
        #     self.batch_norm2 = nn.BatchNorm2d(num_features=self.lstm_hidden_sizes[1]*2 if bidirectional else self.lstm_hidden_sizes[1])

        #     self.lstm_3 = nn.LSTM(input_size = self.lstm_hidden_sizes[1]*2 if bidirectional else self.lstm_hidden_sizes[1], 
        #                           num_layers=1, 
        #                           batch_first=True, 
        #                           hidden_size = self.lstm_hidden_sizes[2], 
        #                           bidirectional = bidirectional)
        #     self.batch_norm3 = nn.BatchNorm2d(num_features=self.lstm_hidden_sizes[2]*2 if bidirectional else self.lstm_hidden_sizes[2])
        
        
        # self.dropout = nn.Dropout(self.dropout_ratio)
        
        self.lstm_blocks = nn.ModuleList()
        
        for i in range(self.n_lstm_layers):

            if i == 0:
              input_size = self.input_size 
            else:
              input_size = self.lstm_hidden_sizes[i-1]*2 if self.bidirectional else self.lstm_hidden_sizes[i-1]   
            
            lstm_layer = nn.LSTM(input_size = input_size, 
                                  num_layers=1, 
                                  batch_first=True, 
                                  hidden_size = self.lstm_hidden_sizes[i], 
                                  bidirectional = self.bidirectional)
            
            n_feature = self.lstm_hidden_sizes[i]*2 if self.bidirectional else self.lstm_hidden_sizes[i]   
            batch_norm = nn.BatchNorm2d(num_features=n_feature)
            lst = [('lstm', lstm_layer), ('batch_norm', batch_norm)]
  
            if self.dropout_after_each_lstm_layer:
                dropout = nn.Dropout(self.dropout_after_each_lstm_layer)
                lst.append(('dropout', dropout))
                
            module_dict = nn.ModuleDict(lst)
            
            self.lstm_blocks.append(module_dict)
        
        n_feature = self.lstm_hidden_sizes[-1] *2 if bidirectional else self.lstm_hidden_sizes[-1]
        
        self.linear1 =[nn.Linear(n_feature, self.last_layer_fsz)] * self.num_tasks
        self.linear1 = torch.nn.ModuleList(self.linear1)
        self.activation = nn.ReLU()
        
        if self.dropout_before_output_layer:
          self.dropout1 = nn.Dropout(self.dropout_before_output_layer)
          
        self.output_layers = [nn.Linear(self.last_layer_fsz, self.num_classes)] * self.num_tasks
        self.output_layers = torch.nn.ModuleList(self.output_layers)
        
        if self.weights != None:
            self.cross_entropy_loss = [nn.CrossEntropyLoss(weight= torch.tensor(weights).float()) for weights in self.weights]
        else:
            self.cross_entropy_loss = [nn.CrossEntropyLoss() for _ in range(self.num_tasks)]
        
        self.cross_entropy_loss = torch.nn.ModuleList(self.cross_entropy_loss)
        
        self.f1_score = pl.metrics.F1(num_classes=self.num_classes, average="macro")
        self.accuracy_score = pl.metrics.Accuracy()
        
        self.train_dl = DataLoader(train_dataset, batch_size=self.batch_size, shuffle = True)
        self.val_dl = DataLoader(val_dataset, batch_size=self.batch_size)
        self.test_dl = DataLoader(test_dataset, batch_size=self.batch_size)
        
        # self.scheduler_step = scheduler_step
        # self.scheduler_gamma = scheduler_gamma
        
    def forward(self, x, n):

        batch_size = x.size()[0]
        
        # x = x.view(batch_size, self.window_size, self.input_size) #(batch, window_len, feature_size)
        # x, _  = self.lstm_1(x)
        
        # x = self.dropout(x)

        # x = x.reshape(x.size()[-1], batch_size, self.window_size) #(feature_size, batch, window_len)
        # x = self.batch_norm1(x.unsqueeze(0))
        
        # if len(self.lstm_hidden_sizes) > 1:
            
        #     x = x.view(batch_size, self.window_size, x.size()[1])
        #     x, _  = self.lstm_2(x)

        #     x = self.dropout(x)

        #     x = x.reshape(x.size()[-1], batch_size, self.window_size) #(feature_size, batch, window_len)
        #     x = self.batch_norm2(x.unsqueeze(0))

        #     x = x.view(batch_size, self.window_size, x.size()[1])
        #     x, _  = self.lstm_3(x)

        #     x = self.dropout(x)

        #     x = x.reshape(x.size()[-1], batch_size, self.window_size) #(feature_size, batch, window_len)
        #     x = self.batch_norm3(x.unsqueeze(0))
        
        for i, block in enumerate(self.lstm_blocks):

            if i == 0:
              n_feature = self.input_size 
            else:
              n_feature = self.lstm_hidden_sizes[i-1]*2 if self.bidirectional else self.lstm_hidden_sizes[i-1]   
 
            x = x.view(batch_size, self.window_size, n_feature) #(batch, window_len, feature_size)
            x, _ = block['lstm'](x)
        
            if 'dropout' in block:
                x = block['dropout'](x)
           
            x = x.reshape(x.size()[-1], batch_size, self.window_size) #(feature_size, batch, window_len)
        
            x = block['batch_norm'](x.unsqueeze(0))
  
            if len(x.shape) == 4: #error handling
              x = x.squeeze() 
              
        #x = x.view(batch_size, self.window_size, x.size()[1])
        n_feature = self.lstm_hidden_sizes[-1]*2 if self.bidirectional else self.lstm_hidden_sizes[-1]
        x = x.view(batch_size, self.window_size, n_feature)
        x = x[:, -1, :] # equivalent to return sequence = False on keras :)
        
        #x = self.dropout(x)
        
        x = self.linear1[n](x)
        x = self.activation(x)
        
        if self.dropout_before_output_layer:
            x = self.dropout1(x)
                   
        output = self.output_layers[n](x)
    
        return output
    
    def step(self, batch, step_type = 'train'):
        loss = (torch.tensor(0.0, device="cuda:0", requires_grad=True) + \
                torch.tensor(0.0, device="cuda:0", requires_grad=True)) 
        
        if self.input_concat and self.num_tasks > 1:
            x = torch.cat(tuple([batch[self.currency_list[i] + "_window"] for i in range(self.num_tasks)]),axis =1)
        
        for i in range(self.num_tasks):
            if self.input_concat and self.num_tasks > 1:
                y = batch[self.currency_list[i] + "_label"]
            else:
                x, y = batch[self.currency_list[i] + "_window"], batch[self.currency_list[i] + "_label"]
            output = self.forward(x, i)
            #loss = F.nll_loss(output, y)
            loss += self.cross_entropy_loss[i](output, y)
            
            acc = self.accuracy_score(torch.max(output, dim=1)[1], y)
            self.log(f"{self.currency_list[i]}_{step_type}_acc", acc, on_epoch=True, prog_bar=True)

            f1 = self.f1_score(torch.max(output, dim=1)[1], y)
            self.log(f"{self.currency_list[i]}_{step_type}_f1", f1, on_epoch=True, prog_bar=True)
        
        loss = loss / torch.tensor(self.num_tasks)
        self.log(f"{step_type}_loss", loss, on_epoch=True, prog_bar=True)

        return loss
    
    def configure_optimizers(self):
        
        optimizer = torch.optim.AdamW(self.parameters(), 
                                      lr= self.learning_rate, 
                                      weight_decay=self.weight_decay)

#         scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 
#                                                     step_size=self.scheduler_step, 
#                                                     gamma=self.scheduler_gamma)
        
        self.lr_scheduler = CosineWarmupScheduler(optimizer, 
                                                  warmup = self.train_dl.__len__() * self.warmup_epoch, 
                                                  max_iters = MAX_EPOCHS * self.train_dl.__len__())
        return [optimizer]#, [{"scheduler": scheduler}]
    
    def training_step(self, batch, batch_nb):
        
        loss = self.step(batch, "train")
        
        return loss 
    
    def validation_step(self, batch, batch_nb):
        
        self.step(batch, "val")
    
    def test_step(self, batch, batch_nb):
        
        self.step(batch, "test")
    
    def optimizer_step(self, *args, **kwargs):
        super().optimizer_step(*args, **kwargs)
        self.lr_scheduler.step() # Step per iteration
    
    def train_dataloader(self):
        return self.train_dl

    def val_dataloader(self):
        return self.val_dl

    def test_dataloader(self):
        return self.test_dl

In [38]:
model = LSTM_based_classification_model(train_dataset = train_dataset,
                                        val_dataset = val_dataset,
                                        test_dataset = test_dataset,
                                        input_concat = False,
                                        **config)

early_stop_callback = EarlyStopping(
   monitor='val_loss',
   min_delta=0.003,
   patience=10,
   verbose=True,
   mode='min'
)
checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    dirpath='../output/',
    filename = MODEL_NAME +'-{epoch:02d}-{val_loss:.2f}',
    save_top_k=1,
    mode='min',
)

trainer = pl.Trainer(gpus=-1, 
                     max_epochs= 80,
                     #logger = logger, 
                     callbacks=[early_stop_callback, checkpoint_callback])
trainer.fit(model)

trainer.test(ckpt_path = checkpoint_callback.best_model_path)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name               | Type       | Params
--------------------------------------------------
0 | lstm_blocks        | ModuleList | 1.0 M 
1 | linear1            | ModuleList | 32.9 K
2 | activation         | ReLU       | 0     
3 | dropout1           | Dropout    | 0     
4 | output_layers      | ModuleList | 258   
5 | cross_entropy_loss | ModuleList | 0     
6 | f1_score           | F1         | 0     
7 | accuracy_score     | Accuracy   | 0     
--------------------------------------------------
1.0 M     Trainable params
0         Non-trainable params
1.0 M     Total params
4.177     Total estimated model params size (MB)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Metric val_loss improved. New best score: 0.699


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Metric val_loss improved by 0.009 >= min_delta = 0.003. New best score: 0.690


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

Monitored metric val_loss did not improve in the last 10 records. Best score: 0.690. Signaling Trainer to stop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]





HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'BTC_test_acc': 0.5233644843101501,
 'BTC_test_f1': 0.5123544335365295,
 'ETH_test_acc': 0.5233644843101501,
 'ETH_test_f1': 0.4658462703227997,
 'LTC_test_acc': 0.5981308221817017,
 'LTC_test_f1': 0.596709668636322,
 'test_loss': 0.6924201250076294}
--------------------------------------------------------------------------------



[{'BTC_test_acc': 0.5233644843101501,
  'BTC_test_f1': 0.5123544335365295,
  'ETH_test_acc': 0.5233644843101501,
  'ETH_test_f1': 0.4658462703227997,
  'LTC_test_acc': 0.5981308221817017,
  'LTC_test_f1': 0.596709668636322,
  'test_loss': 0.6924201250076294}]

In [None]:
#input concat
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'BTC_test_acc': 0.5046728849411011,
 'BTC_test_f1': 0.3353511095046997,
 'ETH_test_acc': 0.5700934529304504,
 'ETH_test_f1': 0.3804537355899811,
 'LTC_test_acc': 0.5327102541923523,
 'LTC_test_f1': 0.3617068827152252,
 'test_loss': 0.6925621628761292}
--------------------------------------------------------------------------------

[{'BTC_test_acc': 0.5046728849411011,
  'BTC_test_f1': 0.3353511095046997,
  'ETH_test_acc': 0.5700934529304504,
  'ETH_test_f1': 0.3804537355899811,
  'LTC_test_acc': 0.5327102541923523,
  'LTC_test_f1': 0.3617068827152252,
  'test_loss': 0.6925621628761292}]

In [None]:
trainer.test(dataloaders= model.val_dl)

In [None]:
trainer.test(dataloaders= model.train_dl)