In [59]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
import torch.nn as nn
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, ConcatDataset, Subset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision.io import read_image
import matplotlib.pyplot as plt
import math
import time
import os
import pandas as pd
import torch.nn.functional as F
#from torch.utils.tensorboard import SummaryWriter
import cv2
from pytorch_forecasting import TimeSeriesDataSet
from pytorch_tcn import TCN

# Helper functions

In [2]:
def train_val_dataset(dataset, val_split=0.25):
    train_idx, val_idx = train_test_split(list(range(len(dataset))), test_size=val_split)
    datasets = {}
    datasets['train'] = Subset(dataset, train_idx)
    datasets['valid'] = Subset(dataset, val_idx)
    return datasets

In [3]:
def preprocessing_batch(x, y, batch_first=True):
    # *********** THIS HAS TO BE IMPLEMENTED at EVERY iteration during training **************
    # We will extract only the tensor we actually need to feed our model (feature and label tensor)
    x_input = x["encoder_cont"]

    # shape [batchsize, features]
    y_input = torch.squeeze(y[0])

    if batch_first == False:
        # Reshaping to have: shape [Timestep, batchsize, features]
        x_input = x_input.permute([1, 0, 2])
    # Data types for:
    #    Features: torch.float32
    #    Label:    torch.int64
    #print(f"Features dtype: {x_input.dtype} \nLabels dtype: {y_input.dtype}")
    return x_input, y_input

In [4]:
def train_and_validate(model, loss_criterion, optimizer, epochs, train_data_loader, valid_data_loader, device):
    '''
    Function to train and validate
    Parameters
        :param model: Model to train and validate
        :param loss_criterion: Loss Criterion to minimize
        :param optimizer: Optimizer for computing gradients
        :param epochs: Number of epochs (default=25)
  
    Returns
        model: Trained Model with best validation accuracy
        history: (dict object): Having training loss, accuracy and validation loss, accuracy
    '''
    
    #writer = SummaryWriter()
    model = model.to(device)
    start = time.time()
    history = []
    best_loss = np.inf   # init to infinity

    for epoch in range(epochs):
        epoch_start = time.time()
        print("Epoch: {}/{}".format(epoch+1, epochs))
        
        # Set to training mode
        model.train()
        
        # Loss and Accuracy within the epoch
        train_loss = 0.0
        train_acc = 0.0
        
        valid_loss = 0.0
        valid_acc = 0.0
        
        for i, (inputs, labels) in enumerate(train_data_loader):
            inputs, labels = preprocessing_batch(inputs, labels)
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            # Clean existing gradients
            optimizer.zero_grad()
            
            # Forward pass - compute outputs on input data using the model
            outputs = model(inputs)

            #print(f"Output shape: {outputs.shape} \t Label shape: {labels.shape}")
            
            # Compute loss
            loss = loss_criterion(outputs, labels) #.to(torch.float32)
            
            #print(f"Outputs: {outputs} \t type: {outputs.dtype}")
            #print(f"Loss: {loss} \t type: {loss.dtype}")
            
            # Backpropagate the gradients
            loss.backward()
            
            # Update the parameters
            optimizer.step()
            
            # Compute the total loss for the batch and add it to train_loss
            train_loss += loss.item() * inputs.size(0)

            # Compute the accuracy
            ret, predictions = torch.max(outputs.data, 1)
            correct_counts = predictions.eq(labels.data.view_as(predictions))
            
            # Convert correct_counts to float and then compute the mean
            acc = torch.mean(correct_counts.type(torch.FloatTensor))
            
            # Compute total accuracy in the whole batch and add to train_acc
            train_acc += acc.item() #* inputs.size(0)
            #print("Batch number: {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}".format(i, loss.item(), acc.item()))
        
        #writer.add_scalar("Train_loss x epoch", train_loss/len(train_data_loader), epoch)
        
        # Validation - No gradient tracking needed
        with torch.no_grad():

            # Set to evaluation mode
            model.eval()

            # Validation loop
            for j, (inputs, labels) in enumerate(valid_data_loader):
                inputs, labels = preprocessing_batch(inputs, labels)
                inputs = inputs.to(device)
                labels = labels.to(device)

                # Forward pass - compute outputs on input data using the model
                outputs = model(inputs.cuda())

                # Compute loss
                loss = loss_criterion(outputs, labels)

                # Compute the total loss for the batch and add it to valid_loss
                valid_loss += loss.item() * inputs.size(0)

                # Calculate validation accuracy
                ret, predictions = torch.max(outputs.data, 1)
                correct_counts = predictions.eq(labels.data.view_as(predictions))

                # Convert correct_counts to float and then compute the mean
                acc = torch.mean(correct_counts.type(torch.FloatTensor))

                # Compute total accuracy in the whole batch and add to valid_acc
                valid_acc += acc.item() #* inputs.size(0)
                #print("Validation Batch number: {:03d}, Validation: Loss: {:.4f}, Accuracy: {:.4f}".format(j, loss.item(), acc.item()))
            
        #writer.add_scalar("Valid_loss x epoch", valid_loss/len(valid_data_loader), epoch)
        
        # Find average training loss and training accuracy
        avg_train_loss = train_loss/len(train_data_loader) 
        avg_train_acc = train_acc/len(train_data_loader)
        
        # Find average training loss and training accuracy
        avg_valid_loss = valid_loss/len(valid_data_loader) 
        avg_valid_acc = valid_acc/len(valid_data_loader)
        
        history.append([avg_train_loss, avg_valid_loss, avg_train_acc, avg_valid_acc])        
        epoch_end = time.time()
        if avg_valid_loss < best_loss:
            print("New best model saved")
            best_loss = avg_valid_loss
            torch.save(model.state_dict(), 'custom_model_bw.pt')
            
        print("Epoch : {:03d}, Training: Loss: {:.4f}, Accuracy: {:.4f}%, \n\t\tValidation : Loss : {:.4f}, Accuracy: {:.4f}%, Time: {:.4f}s".format(epoch+1, avg_train_loss, avg_train_acc*100, avg_valid_loss, avg_valid_acc*100, epoch_end-epoch_start))
        #print("Epoch : {:03d}, Training: Loss: {:.4f}, \n\t\tValidation : Loss : {:.4f}, Time: {:.4f}s".format(epoch+1, avg_train_loss, avg_valid_loss, epoch_end-epoch_start))

        # Save if the model has best accuracy till now
        #torch.save(model, dataset+'_model_'+str(epoch)+'.pt')
    #writer.close()      
    return model, history

# Custom Dataset Preprocessing

In [5]:
train_df = pd.read_csv("dataset_slippage/clean/clean_sample_1.csv")
valid_df = pd.read_csv("dataset_slippage/clean/clean_sample_21.csv")

In [6]:
# Replacing categorical label with a numerical representation
# Static  = 0
# Slipped = 1
train_df["label"] = train_df["label"].map({"static": 0, "slipped": 1})
# Including a continuous time unitless column
train_df["time_unitless"] = range(0, len(train_df))
# Inlcuding a series id column to identify which series
# is who when all series are stack together in same df
train_df["series_id"] = 0

In [7]:
# Replacing categorical label with a numerical representation
# Static  = 0
# Slipped = 1
valid_df["label"] = valid_df["label"].map({"static": 0, "slipped": 1})
# Including a continuous time unitless column
valid_df["time_unitless"] = range(0, len(valid_df))
# Inlcuding a series id column to identify which series
# is who when all series are stack together in same df
valid_df["series_id"] = 0

# Creating TimeSeries Dataset and DataLoader

In [78]:
# Window size
window_size = 50
input_features = ["mag", "mag_avg2", "mag_avg3", "mag_avg4"]
feature_scalers = [None, None, None, None]
scalers_dict = dict(zip(input_features, feature_scalers))

# create the dataset from the pandas dataframe
train_dataset = TimeSeriesDataSet(
    train_df,
    group_ids=["series_id"],
    target="label",
    time_idx="time_unitless",
    min_encoder_length=window_size,
    max_encoder_length=window_size,
    min_prediction_length=1,
    max_prediction_length=1,
    time_varying_unknown_reals=input_features,
    scalers=scalers_dict,
)

# WE still have to create a separate test_dataset from other DataFrames (e.g csv file 35-40)
# create the dataset from the pandas dataframe
valid_dataset = TimeSeriesDataSet(
    valid_df,
    group_ids=["series_id"],
    target="label",
    time_idx="time_unitless",
    min_encoder_length=window_size,
    max_encoder_length=window_size,
    min_prediction_length=1,
    max_prediction_length=1,
    time_varying_unknown_reals=input_features,
    scalers=scalers_dict,
)

In [79]:
# and load the first batch
x, y = next(iter(valid_dataloader))

In [80]:
x, y = preprocessing_batch(x, y)

In [81]:
x.shape, y.shape

(torch.Size([256, 50, 4]), torch.Size([256]))

In [12]:
y

# Model Architecture

In [13]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)


class TransformerModel(nn.Module):

    def __init__(self, nclasses: int, fsize: int, nhead: int, d_hid: int,
                 nlayers: int, dropout: float = 0.5, batch_first=True):
        super().__init__()
        self.model_type = 'Transformer'
        self.pos_encoder = PositionalEncoding(fsize, dropout)
        encoder_layers = nn.TransformerEncoderLayer(fsize, nhead, d_hid, dropout, batch_first=batch_first)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, nlayers)
        #self.embedding = nn.Embedding(ntoken, d_model)
        #self.d_model = d_model
        self.linear = nn.Linear(fsize, nclasses)
        self.softmax = nn.Softmax(dim=1) # Probability 0-1 along dim=0
        self.init_weights()
        self.batch_first = batch_first
        

    def init_weights(self) -> None:
        initrange = 0.1
        #self.embedding.weight.data.uniform_(-initrange, initrange)
        self.linear.bias.data.zero_()
        self.linear.weight.data.uniform_(-initrange, initrange)

    def forward(self, src: torch.Tensor, src_mask: torch.Tensor = None) -> torch.Tensor:
        """
        Arguments:
            src: Tensor, shape ``[batch_size, seq_len, features]``
            src_mask: Tensor, shape ``[batch_size, seq_len, features]``
            
        if batch_first == False
            src: Tensor, shape ``[seq_len, batch_size, features]``
            src_mask: Tensor, shape ``[seq_len, batch_size, features]``

        Returns:
            output Tensor of shape ``[batch_size, nclasses]``
        """
        #src = self.embedding(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        if src_mask is None:
            """Generate a square causal mask for the sequence. The masked positions are filled with float('-inf').
            Unmasked positions are filled with float(0.0).
            """
            if self.batch_first == False:
                src_mask = nn.Transformer.generate_square_subsequent_mask(src.shape[0]).to(device)
            else:
                src_mask = nn.Transformer.generate_square_subsequent_mask(src.shape[1]).to(device)
                
        output = self.transformer_encoder(src, src_mask)
        output = self.linear(output)
        if self.batch_first == False:
            output = output.sum(dim=0) # We sum along the "timestep" axis/dim
        else:
            output = output.sum(dim=1) # We sum along the "timestep" axis/dim
        output = self.softmax(output) # Output shape: [Batchsize, nclasses]
        return output

In [None]:
class LocalTCN(nn.Module):
    def __init__(self):
        super().__init__(self, tcn_params):
    

# Model Training

When CUDA is not responding run the following commands:
- sudo rmmod nvidia_uvm
- sudo modprobe nvidia_uvm

In [71]:
# ******** Hyperparameters setup ******** 
# Choose whatever GPU device number you want
model_arch = "tcn"
num_epochs = 150
batch_size = 256
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [72]:
# convert the dataset to a dataloader
train_dataloader = train_dataset.to_dataloader(batch_size=batch_size)
valid_dataloader = valid_dataset.to_dataloader(batch_size=batch_size)

In [83]:
if model_arch == "transformer":
    nclasses =  2  # Number of classes
    fsize = 4      # feature_size dimension
    d_hid = 200    # dimension of the feedforward network model in ``nn.TransformerEncoder``
    nlayers = 1    # number of ``nn.TransformerEncoderLayer`` in ``nn.TransformerEncoder``
    nhead = 1      # number of heads in ``nn.MultiheadAttention``
    dropout = 0
    model = TransformerModel(nclasses, fsize, nhead, d_hid, nlayers, dropout).to(device)


if model_arch == "tcn":
    fsize = 4
    model = TCN(
        num_inputs = fsize,
        num_channels = [128, 128, 64, 64],
        dilations = [1, 2, 4, 8],
        kernel_size = 8,
        dropout = 0.2,
        use_norm = "layer_norm",
        activation = "relu",
        kernel_initializer = "kaiming_normal",
        use_skip_connections = True,
        input_shape = "NLC"   
    )


In [76]:
# loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
print(model)

TCN(
  (downsample_skip_connection): ModuleList(
    (0-1): 2 x Conv1d(128, 64, kernel_size=(1,), stride=(1,))
    (2-3): 2 x None
  )
  (activation_skip_out): ReLU()
  (network): ModuleList(
    (0): TemporalBlock(
      (conv1): CausalConv1d(4, 128, kernel_size=(8,), stride=(1,))
      (conv2): CausalConv1d(128, 128, kernel_size=(8,), stride=(1,))
      (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (activation1): ReLU()
      (activation2): ReLU()
      (activation_final): ReLU()
      (dropout1): Dropout(p=0.2, inplace=False)
      (dropout2): Dropout(p=0.2, inplace=False)
      (downsample): Conv1d(4, 128, kernel_size=(1,), stride=(1,))
    )
    (1): TemporalBlock(
      (conv1): CausalConv1d(128, 128, kernel_size=(8,), stride=(1,), dilation=(2,))
      (conv2): CausalConv1d(128, 128, kernel_size=(8,), stride=(1,), dilation=(2,))
      (norm1): LayerNorm((128,), eps=1e-05, elementwise_affin

In [77]:
trained_model, history = train_and_validate(model, loss_fn, optimizer, num_epochs, train_dataloader, valid_dataloader, device)

Epoch: 1/150


RuntimeError: Expected target size [256, 64], got [256]

In [86]:
output = model(x)

In [87]:
output.shape


torch.Size([256, 50, 64])

In [19]:
output = model(x.to("cuda"))

In [37]:
output.data

tensor([[0.7248, 0.2752],
        [0.8109, 0.1891]], device='cuda:0')

In [20]:
x.shape, #output.shape

(torch.Size([2, 50, 4]),)

In [21]:
output

tensor([[0.7248, 0.2752],
        [0.8109, 0.1891]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [23]:
ret, predictions = torch.max(output, 1)

In [29]:
y[0] = 1

In [38]:
correct_counts = predictions.eq(y.to("cuda").data.view_as(predictions))

In [39]:
correct_counts

tensor([False,  True], device='cuda:0')

In [43]:
acc =torch.mean(correct_counts.type(torch.FloatTensor))

2

In [45]:
train_acc =0

In [46]:
# Convert correct_counts to float and then compute the mean
acc = torch.mean(correct_counts.type(torch.FloatTensor))

# Compute total accuracy in the whole batch and add to train_acc
train_acc += acc.item() * x.size(0)

In [47]:
train_acc

1.0

In [48]:
acc.item()

0.5

In [24]:
criterion =  nn.CrossEntropyLoss()


In [25]:
y = torch.squeeze(y)

In [26]:
criterion(output, y.to("cuda"))

tensor(0.8093, device='cuda:0', grad_fn=<NllLossBackward0>)