In [1]:
import pandas as pd
import numpy as np

In [2]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset

import pytorch_lightning as pl
from pytorch_lightning.metrics import F1
from pytorch_lightning.loggers import WandbLogger

In [31]:
N_CLASSES = 3
N_CURRENCIES = 3
INPUT_FEATURE_SIZE = 3
WINDOW_SIZE = 50
TRAIN_PERCENTAGE, VAL_PERCENTAGE, TEST_PERCENTAGE = 0.70, 0.15, 0.15

In [32]:
#time_series = 
# veri bu şekilde hazırlanmalı
# data[N_CURRENCIES][SERIES_LEN][INPUT_FEATURE_SIZE]
# input feature larından 0 price prediction için olan veriyi vermeli

In [33]:
series_length = 2000
sample_data = np.random.rand(N_CURRENCIES, series_length, INPUT_FEATURE_SIZE)
sample_data

array([[[0.23088973, 0.49757251, 0.34794033],
        [0.57180844, 0.23812744, 0.07794263],
        [0.29838999, 0.63729098, 0.31115375],
        ...,
        [0.0194084 , 0.00602222, 0.42604163],
        [0.25265734, 0.97484391, 0.62643969],
        [0.77926653, 0.81545885, 0.3772543 ]],

       [[0.91432109, 0.7649435 , 0.97877158],
        [0.5261074 , 0.1097452 , 0.57684679],
        [0.88210343, 0.2215405 , 0.51642907],
        ...,
        [0.341817  , 0.78906859, 0.47354565],
        [0.28950253, 0.36693896, 0.39319208],
        [0.21129712, 0.79835578, 0.56124629]],

       [[0.8169917 , 0.57190898, 0.70315261],
        [0.295609  , 0.22288916, 0.658404  ],
        [0.59673753, 0.00171058, 0.98495485],
        ...,
        [0.33328285, 0.27431204, 0.44759833],
        [0.75785471, 0.11827018, 0.40642197],
        [0.78897326, 0.26690853, 0.18392046]]])

In [40]:
class MultiTimeSeriesDataset(Dataset):
    def __init__(self, 
                 x: np.ndarray, 
                 data_use_type,
                 train_percentage = TRAIN_PERCENTAGE,
                 val_percentage = VAL_PERCENTAGE,
                 test_percentage = TEST_PERCENTAGE,
                 seq_len = WINDOW_SIZE, 
                 ):
        
        self.x = torch.tensor(x).float()
        self.seq_len = seq_len
        
        self.data_use_type = data_use_type
        
        self.train_size = int(len(self.x[0]) * train_percentage)
        self.val_size = int(len(self.x[0]) * val_percentage)
        self.test_size = int(len(self.x[0]) * test_percentage)
        
    def __len__(self):
        
        if self.data_use_type == "train":
            return self.train_size - ( self.seq_len)
        
        if self.data_use_type == "val":
            return self.val_size
        
        else:
            return self.test_size
        
    
    def __getitem__(self, index):
        
        item = dict()
        
        if self.data_use_type =="val":
            index = self.train_size + index - self.seq_len
            
        elif self.data_use_type =="test":
            index = self.train_size + self.val_size + index - self.seq_len
        
        for i in range(N_CURRENCIES):
                item["currency_" + str(i) + "_window"] = self.x[i][index:index+self.seq_len]

                price_change = self.x[i][index+self.seq_len][0] - self.x[i][index+self.seq_len-1][0]
                item["currency_" + str(i) + "_label"] = 0 if price_change == 0 else 1 if price_change >0 else 2 #2 düşüş

        return item

In [44]:
MultiTimeSeriesDataset(sample_data, "test")[-1]

{'currency_0_window': tensor([[1.7442e-01, 9.0479e-01, 1.2487e-02],
         [3.5751e-01, 4.6581e-01, 9.6337e-01],
         [1.6700e-01, 5.4848e-02, 2.7434e-01],
         [4.5875e-03, 1.1440e-01, 8.9658e-01],
         [1.4810e-01, 4.0284e-01, 6.2324e-01],
         [6.6001e-01, 2.8892e-01, 1.3432e-01],
         [4.5089e-01, 6.6433e-01, 9.4611e-01],
         [2.2359e-02, 7.6274e-01, 5.1573e-01],
         [3.3987e-01, 6.6808e-01, 4.7247e-01],
         [2.3938e-01, 4.0257e-01, 2.6294e-01],
         [6.1641e-01, 7.6957e-02, 2.8933e-01],
         [2.1328e-01, 6.4844e-01, 2.6555e-01],
         [4.8892e-02, 4.2401e-01, 6.5794e-01],
         [9.6582e-01, 5.2987e-01, 7.1647e-01],
         [6.5245e-01, 2.4914e-01, 3.8616e-01],
         [6.7862e-01, 8.7440e-01, 9.9986e-01],
         [5.9575e-01, 1.6961e-01, 7.7485e-01],
         [1.4100e-01, 9.3246e-01, 4.2034e-01],
         [5.4767e-01, 1.1359e-01, 5.1333e-01],
         [3.3162e-01, 1.8582e-01, 5.7527e-01],
         [8.0051e-01, 9.2673e-01, 5.029

In [41]:
deneme = torch.tensor(0.0, device="cuda:0", requires_grad=True) + torch.tensor(0.0, device="cuda:0", requires_grad=True)

In [42]:
deneme += 2

In [43]:
time_series = sample_data

tensor(2., device='cuda:0', grad_fn=<AddBackward0>)

In [27]:
class LSTM_based__multi_task_classification_model(pl.LightningModule):
    def __init__(self,
                 data = time_series,
                 num_classes = N_CLASSES,
                 input_size = INPUT_FEATURE_SIZE,
                 batch_size= 8,
                 lstm_hidden_size = 256,
                 lstm_stack_size = 3,
                 lstm_dropout = 0.5,
                 bidirectional = False,
                 ):
        
        super().__init__()
        self.data = time_series
        self.num_classes = num_classes
        self.input_size = input_size
        self.batch_size = batch_size
        
        self.lstm_hidden_size = lstm_hidden_size
        self.lstm_stack_size = lstm_stack_size
        self.lstm_dropout = lstm_dropout
        self.bidirectional = bidirectional 
        
        self.stack_lstm = nn.LSTM(input_size = self.input_size, 
                hidden_size = self.lstm_hidden_size, 
                num_layers= self.lstm_stack_size,
                dropout = self.lstm_dropout,
                bidirectional = self.bidirectional, 
                batch_first=True,)
        
#         self.linear1 = nn.Linear(self.lstm_hidden_size, 128)
        
#         self.linear2 = nn.Linear(128, 64)
        
#         self.activation = nn.ReLU()
        self.output_layer = nn.Linear(64, self.num_classes)
        
        self.f1_score = pl.metrics.F1(num_classes=self.num_classes)
        self.accuracy_score = pl.metrics.Accuracy()
        
        self.train_dl = DataLoader(MultiTimeSeriesDataset(self.data, "train"), 
                                   batch_size=self.batch_size)
        
        self.val_dl = DataLoader(MultiTimeSeriesDataset(self.data, "val"),
                                 batch_size=self.batch_size)
        
        self.test_dl = DataLoader(MultiTimeSeriesDataset(self.data, "test"), 
                                  batch_size=self.batch_size)
    
    def forward(self, x):
        
        x = x.view(x.size()[0], x.size()[1], self.input_size) #(batch, window_len, feature_size)
        
        x, _=  self.stack_lstm(x)
        x = x[:, -1, :] # equivalent to return sequence = False on keras :)
        
        x = self.linear1(x)
        x = self.activation(x)
        
        x = self.linear2(x)
        x = self.activation(x)
        
        output = self.output_layer(x)
        #print("output1", output[0])
        #output = F.log_softmax(x, dim = 1)
        output = F.softmax(output)
        #print ("output", output.size())
        return output
    
    def training_step(self, batch, batch_nb):
        x, y = batch
        output = self(x)
        loss = F.nll_loss(output, y)
        self.log('train_loss', loss, on_step=True, prog_bar=True)
        
        acc = self.accuracy_score(torch.max(output, dim=1)[1], y)
        self.log('train_acc', acc, on_step=True, prog_bar=True)
        
        f1 = self.f1_score(torch.max(output, dim=1)[1], y)
        self.log('train_f1', f1, on_step=True, prog_bar=True)
        
        return loss
    
    def validation_step(self, batch, batch_nb):
        x, y = batch
        output = self(x)
        loss = F.nll_loss(x, y)
        self.log('val_loss', loss, on_epoch=True, reduce_fx=torch.mean, prog_bar=True)
        
        #print(torch.max(output, dim=1)[1])
        acc = self.accuracy_score(torch.max(output, dim=1)[1], y)
        self.log('val_acc', acc, on_epoch=True, reduce_fx=torch.mean, prog_bar=True)
        
        f1 = self.f1_score(torch.max(output, dim=1)[1], y)
        self.log('val_f1', f1, on_epoch=True, reduce_fx=torch.mean, prog_bar=True)
        
    def test_step(self, batch, batch_nb):
        x, y = batch
        output = self(x)
        loss = F.nll_loss(x, y)
        self.log('test_loss', loss, on_epoch=True, reduce_fx=torch.mean)
        
        acc = self.accuracy_score(torch.max(output, dim=1)[1], y)
        self.log('test_acc', acc, on_epoch=True, reduce_fx=torch.mean)
        
        f1 = self.f1_score(torch.max(output, dim=1)[1], y)
        self.log('test_f1', f1, on_epoch=True, reduce_fx=torch.mean)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(model.parameters(), lr=6e-4)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.7)
        #weight and biases
        return [optimizer], [scheduler]
    
    def train_dataloader(self):
        return self.train_dl

    def val_dataloader(self):
        return self.val_dl

    def test_dataloader(self):
        return self.test_dl

In [28]:
!rm -rf ./lightning_logs/version_*

In [29]:
wandb_logger = WandbLogger(name='lstm.v1',project='pytorchlightning')

In [30]:
model = LSTM_based_classification_model()
trainer = pl.Trainer(gpus=-1, 
                     logger = wandb_logger)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [31]:
trainer.fit(model)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[34m[1mwandb[0m: Currently logged in as: [33maysenurk[0m (use `wandb login --relogin` to force relogin)



  | Name           | Type     | Params
--------------------------------------------
0 | stack_lstm     | LSTM     | 1.3 M 
1 | linear1        | Linear   | 32.9 K
2 | linear2        | Linear   | 8.3 K 
3 | activation     | ReLU     | 0     
4 | output_layer   | Linear   | 195   
5 | f1_score       | F1       | 0     
6 | accuracy_score | Accuracy | 0     
--------------------------------------------
1.4 M     Trainable params
0         Non-trainable params
1.4 M     Total params
5.437     Total estimated model params size (MB)


HBox(children=(HTML(value='Validation sanity check'), FloatProgress(value=1.0, bar_style='info', layout=Layout…

  output = F.softmax(output)


HBox(children=(HTML(value='Training'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…

HBox(children=(HTML(value='Validating'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), m…






1

In [32]:
trainer.test()

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


HBox(children=(HTML(value='Testing'), FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max=…

  output = F.softmax(output)



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.5954198241233826,
 'test_f1': 0.5954198241233826,
 'test_loss': -28952.98828125}
--------------------------------------------------------------------------------


[{'test_loss': -28952.98828125,
  'test_acc': 0.5954198241233826,
  'test_f1': 0.5954198241233826}]

In [259]:
#dropout, batch normalization 

False

In [115]:
model.test_dataloader()

<torch.utils.data.dataloader.DataLoader at 0x7f46e05073d0>