# Sequential Deep Learning Models in Pytorch


In [1]:
#!/usr/bin/env python3
"""
@author Alberto Bassi & Riccardo Tomada
"""
# Essential
import numpy as np
import matplotlib.pyplot as plt
import os
import plotly.express as px
import pandas as pd

# Pytorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets

# Neural Networks
from models import RNN_layer, LSTM_layer

# Progress bar
from tqdm import tqdm
from tqdm.notebook import tqdm_notebook
from time import sleep

# Dates
from datetime import datetime as dt

## Dataset and Dataloader

In [2]:
# Visualize as pandas dataframe
df = pd.read_csv("bitcoin_dataset.csv",usecols = (1,2,3,4,5,6,7,8,9,10,11))
df


Unnamed: 0,Open Time,Open,High,Low,Close,Volume,Close time,Quote asset volume,Number of trades,Taker buy base asset volume,Taker buy quote asset volume
0,2017-11-06 03:00:00.000000000,1.5000,1.7990,0.5000,1.7000,649.120,2017-11-06 03:59:59.999000064,7.251214e+02,33.0,207.450,3.514144e+02
1,2017-11-06 04:00:00.000000000,1.3000,1.6500,1.3000,1.6479,8147.720,2017-11-06 04:59:59.999000064,1.270853e+04,139.0,2130.590,3.436513e+03
2,2017-11-06 05:00:00.000000000,1.5457,1.5525,1.5455,1.5458,6628.200,2017-11-06 05:59:59.999000064,1.026534e+04,27.0,563.920,8.754265e+02
3,2017-11-06 06:00:00.000000000,1.5458,1.6810,1.5387,1.6810,22767.900,2017-11-06 06:59:59.999000064,3.650714e+04,133.0,12886.750,2.124768e+04
4,2017-11-06 07:00:00.000000000,1.6809,1.6809,1.6000,1.6250,14938.730,2017-11-06 07:59:59.999000064,2.427873e+04,58.0,7162.660,1.164245e+04
...,...,...,...,...,...,...,...,...,...,...,...
34746,2021-10-28 20:00:00,491.0000,494.8000,477.9000,490.5000,90431.134,2021-10-28 20:59:59.999000064,4.444162e+07,75832.0,40715.355,2.003381e+07
34747,2021-10-28 21:00:00,490.5000,490.5000,485.8000,489.8000,37248.759,2021-10-28 21:59:59.999000064,1.818572e+07,43707.0,19556.122,9.550490e+06
34748,2021-10-28 22:00:00,489.9000,491.3000,488.5000,489.3000,38162.017,2021-10-28 22:59:59.999000064,1.871114e+07,28600.0,21584.731,1.058593e+07
34749,2021-10-28 23:00:00,489.3000,492.3000,487.4000,491.9000,49811.734,2021-10-28 23:59:59.999000064,2.442102e+07,28422.0,25969.233,1.273187e+07


In [26]:
# Create a costum dataset class
class BinanceDataset(Dataset):

    def __init__(self, csv_file, transform=None):
        """
        Args:
        csv_file (string): Path to the csv file.
        seq_length (int): length of the temporal sequence
        transform (callable, optional): Optional transform to be applied
            on a sample.
        """
        self.transform = transform
        
        # Read the file and split the lines in a list
        with open(csv_file, 'r') as f:
            lines = f.read().split('\n')
            
        # Get x and y values from each line and append to self.data
        self.data = []
        
        # Take maximum number of days
        num_lines = len(lines)
        num_days = num_lines//24
        
        for day in range(num_days):
            day_sample = []
            for line in lines[1 + day*24: (day+1)*24 +1]:
                sample = line.split(',')
                # Take opening and clsure times 
                # Uncomment if they were different for each time
                #opening = pd.to_datetime(sample[1], format='%Y-%m-%d %H:%M:%S.%f')
                #closure = pd.to_datetime(sample[7], format='%Y-%m-%d %H:%M:%S.%f')

                # Add as first element the total time of operation
                #samples = [(closure-opening).total_seconds()]
                samples = []

                len_sample = len(sample)

                for i in range(1,len_sample):
                    # Exclude opening and closure time
                    if i!=1 and i!=7:
                        # Convert to float and append to samples
                        sample[i] = float(sample[i])
                        samples.append(sample[i])

                day_sample.append(samples)
            
            self.data.append(day_sample)
    def __len__(self):
        # The length of the dataset is simply the length of the self.data list
        return len(self.data)

    def __getitem__(self, idx):
        # Our sample is the element idx of the list self.data
        sample = self.data[idx]
        if self.transform:
            sample = self.transform(sample)
        return sample

In [89]:
# Transforms
class ToTensor(object):
    """Convert sample to Tensors."""

    def __call__(self, sample):
        return (torch.tensor(sample).float())
    
# Dataset
composed_transform = transforms.Compose([ToTensor()] )
dataset = BinanceDataset('bitcoin_dataset.csv', transform=composed_transform)



In [90]:

# Split to define validation and train dataset
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [1300, 148])

In [91]:
# Dataloaders
train_dataloader = DataLoader(dataset, batch_size=64, shuffle=True, num_workers=0)
val_dataloader  = DataLoader(val_dataset,  batch_size= 64, shuffle=False, num_workers=0)

In [92]:
batch = next(iter(train_dataloader))
print(batch.shape)

torch.Size([64, 24, 10])


## Define the Network

In [93]:
# Check if the GPU is available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f"Training device: {device}")

### Set the random seed for reproducible results
torch.manual_seed(0)

### Initialize the two networks
x_dim = 10
h_dim = 10

len_seq = 24

net = LSTM_layer(x_dim, h_dim)

### Move to device
net.to(device)

Training device: cpu


LSTM_layer(
  (forget_gate): Linear(in_features=20, out_features=10, bias=True)
  (input_gate): Linear(in_features=20, out_features=10, bias=True)
  (cell_update): Linear(in_features=20, out_features=10, bias=True)
  (out): Linear(in_features=20, out_features=10, bias=True)
  (sigmoid): Sigmoid()
  (tanh): Tanh()
)

In [94]:
# Check if the nets are working properly

x = torch.randn(1,len_seq,x_dim)
h = torch.randn(1,len_seq,h_dim)
C = torch.randn(1,len_seq,h_dim)

out = net(x,h, C)
print(out[0])

tensor([[[0.6453, 0.3389, 0.5321, 0.5557, 0.3905, 0.5548, 0.6733, 0.2914,
          0.3411, 0.3984],
         [0.7234, 0.6774, 0.4375, 0.5262, 0.7028, 0.4999, 0.2970, 0.5920,
          0.5780, 0.6527],
         [0.5668, 0.4067, 0.5769, 0.5775, 0.6048, 0.5065, 0.5415, 0.5179,
          0.4812, 0.4045],
         [0.6593, 0.5167, 0.4205, 0.6979, 0.3313, 0.5890, 0.4050, 0.5845,
          0.4504, 0.2800],
         [0.5360, 0.3401, 0.2517, 0.3842, 0.4336, 0.3488, 0.6318, 0.5053,
          0.3117, 0.5919],
         [0.6598, 0.4011, 0.4593, 0.2735, 0.4766, 0.7157, 0.4586, 0.2587,
          0.4549, 0.4360],
         [0.4878, 0.5485, 0.4623, 0.7217, 0.6492, 0.2805, 0.4145, 0.7951,
          0.4567, 0.5744],
         [0.4377, 0.4854, 0.5220, 0.6182, 0.4346, 0.5445, 0.6388, 0.5347,
          0.4321, 0.4781],
         [0.5725, 0.2598, 0.6299, 0.5263, 0.4217, 0.5860, 0.6674, 0.3445,
          0.5671, 0.4494],
         [0.4787, 0.5789, 0.4981, 0.5357, 0.5316, 0.2818, 0.4687, 0.6387,
          0.4416,

# Training

In [95]:
# Check if the GPU is available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f"Training device: {device}")

Training device: cpu


In [96]:
# Define the optimizer
optimizer = torch.optim.RMSprop(net.parameters())

# Define the loss function
loss_function = nn.CrossEntropyLoss()

## Training loops

In [109]:
def train_epoch(net, device, dataloader, loss_function, optimizer):
    """
    Train an epoch of data
    -----------
    Parameters:
    net = network
    device = training device (cuda/cpu)
    dataloader = dataloader of data
    loss_function = loss function
    optimzer = optimizer used
    --------
    Returns:
    mean(train_epoch_loss) = average epoch loss
    """
    # Set the train mode
    net.train()
    # List to save batch losses
    train_epoch_loss = []
    # Iterate the dataloader
    for batch in dataloader:

        # Move to device
        batch = batch.to(device)
        
        # Prepare input
        x_input = batch[:, :-1,:]
        labels = batch[:,1:, :]
        
        a_0 = torch.zeros(64,23,10)
        c_0 = torch.zeros(64,23,10)
        # Forward pass (we do not need recurrent state now)
        net_out, _, _ = net(x_input, a_0, c_0) 
        
        
        # Compute loss
        loss = loss_function(net_out, labels)
        

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Compute batch losses
        train_batch_loss = loss.detach().cpu().numpy()
        train_epoch_loss.append(train_batch_loss)
        
    return np.mean(train_epoch_loss)
    

In [110]:
num_epochs = 500

pbar = tqdm_notebook(range(num_epochs))

train_losses = []
for epoch in pbar:
    epoch_loss = train_epoch(net, device, train_dataloader, loss_function, optimizer)
    train_losses.append(train_loss)

  0%|          | 0/500 [00:00<?, ?it/s]

torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])
torch.Size([64, 23, 10])


ValueError: expected sequence of length 10 at dim 1 (got 0)

## Save and load model