In [None]:
conda install -c pytorch pytorch

#**Package imports**

In [None]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score,classification_report,f1_score,recall_score,precision_score,roc_curve,auc,roc_auc_score
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import torch
from torch.utils.data import Dataset,DataLoader
from torch import nn as nn


#Read Database

In [None]:
dataset = pd.read_csv("data.csv") #Load dataset
# delete unnecessaey columns
dataset = dataset.drop(['SubjectID','SessionID','Unnamed: 0',"Unnamed: 0.1","Unnamed: 0.1.1"],axis = 1)
#extract independent variables
independent_varaibles = dataset.iloc[:,1:]
#extract dependent variables
dependent_variable = dataset.iloc[:,0]
scalar = StandardScaler()
independent_varaibles = scalar.fit_transform(independent_varaibles)

In [None]:
train_x , train_y = [],[]
for i in range(0,dependent_variable.shape[0],600):
  train_x.append(independent_varaibles[i:i+600,:])
  train_y.append(dependent_variable[i])
train_x,train_y = np.array(train_x),np.array(train_y)

#Creating Custom Pytorch Dataset

In [None]:
class CustomDataset(Dataset):
  def __init__(self,x,y):
    # x : feature vector
    # y : label
    super(CustomDataset,self).__init__()
    self.x = np.array(x)
    self.y = np.array(y)
    # return length
  def __len__(self):
    return self.x.shape[0]
    # get item based on index
  def __getitem__(self,index):
    return (torch.tensor(self.x[index],dtype = torch.float32),torch.tensor(self.y[index],dtype=torch.float32))

#LSTM Architecture

In [None]:
class LSTMNetwork(nn.Module):
  def __init__(self,n_features =4 ,hidden_size = 20,n_layers = 1):
    super(LSTMNetwork,self).__init__()
    self.n_features = n_features # no of features for our case 4
    self.n_layers = n_layers # num of layers in LSTM
    self.lstm = nn.RNN(input_size=self.n_features,hidden_size=hidden_size,num_layers = self.n_layers,batch_first = True)
    self.fc = nn.Sequential(nn.Linear(in_features=12000,out_features = 1000),
                            nn.ReLU(),
                            nn.BatchNorm1d(1000),
                            nn.Linear(in_features = 1000,out_features = 1),
                            nn.Sigmoid())
  def forward(self,x):
    output,_ = self.lstm(x)
    output = output.reshape(output.size(0),-1)
    output = self.fc(output)
    return output

# Training Process of LSTM Architecure

In [None]:
# time series split
tscv = TimeSeriesSplit(n_splits=5)
# batch size
batch_size = 128
# no of epochs
n_epochs = 100
train_avg_loss = []
train_avg_accuracy = []
val_avg_loss = []
val_avg_accuracy = []
train_avg_f1score = []
val_avg_f1score = []
train_avg_recall , val_avg_recall = [],[]
train_avg_precision , val_avg_precision = [],[]
train_avg_roc_auc , val_avg_roc_auc = [],[]
# determine current device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# define lstm model
lstm_model = LSTMNetwork().to(device)
# define optimizer for gardient calculate
optimizer = torch.optim.Adam(lstm_model.parameters())
# used BCE loss which need sigmoid activation fuction at end
criterion = nn.BCELoss()
for epoch in tqdm(range(n_epochs)):
  train_epoch_loss , train_epoch_accuracy = [],[]
  val_epoch_loss, val_epoch_accuracy = [],[]
  train_epoch_f1score , val_epoch_f1score = [],[]
  train_epoch_precision , val_epoch_precision = [],[]
  train_epoch_recall , val_epoch_recall = [],[]
  train_epoch_roc_auc , val_epoch_roc_auc = [],[]
  # iterating through k-fold cross validation where k = 5
  for k_no,(train_index ,validation_index) in enumerate(tscv.split(train_x)):
    x_train,x_validate = train_x[train_index,:] ,train_x[validation_index,:]
    y_train,y_validate = train_y[train_index],train_y[validation_index]
    # create training dataset object
    train_dataset = CustomDataset(x_train,y_train)
    # create validation dataset objet
    validation_dataset = CustomDataset(x_validate,y_validate)
    # create train dataloader
    train_dataloader = DataLoader(dataset = train_dataset,batch_size = batch_size,shuffle = False)
    # create test dataloader
    validation_dataloader = DataLoader(dataset = validation_dataset,batch_size = batch_size , shuffle = False)
    predict , ground_truth = [],[]
    losses = 0.0
    # training
    for batch_id,(data,label) in enumerate(train_dataloader):
      data = data.to(device)
      #data = data.view(data.size(0),1,data.size(1))
      label = label.to(device)
      # forward pass
      output = lstm_model(data)
      # calulate loss
      loss = criterion(output.squeeze(1),label)
      losses += loss.item()
      # clearing gradient of all parameters in neurals
      optimizer.zero_grad()
      # backward pass
      loss.backward()
      # weight update of neurons
      optimizer.step()
      # if batch_id % 1000 == 0:
      #   print(f"Training : EPOCH:{epoch}|{n_epochs},K_FOLD:{k_no}|{2},BATCH : {batch_id}|{len(train_dataloader)} , LOSS : {losses:.3f}")
      predict.extend([x[0]for x in torch.round(output).cpu().tolist()])
      ground_truth.extend(label.cpu().tolist())
    # calculate train acuuracy
    train_epoch_accuracy.append(accuracy_score(ground_truth,predict))
    # calculate train loss
    train_epoch_loss.append(losses/(len(train_dataloader)*batch_size))
    train_epoch_f1score.append(f1_score(ground_truth,predict))
    train_epoch_recall.append(recall_score(ground_truth,predict))
    train_epoch_precision.append(precision_score(ground_truth,predict))
    train_epoch_roc_auc.append(roc_auc_score(ground_truth,predict))
    losses = 0.0
    predict , ground_truth = [],[]
    # validation of dataset without calculate gradient
    with torch.no_grad():
      for batch_id,(data,label) in enumerate(validation_dataloader):
        data = data.to(device)
        #data = data.view(data.size(0),1,data.size(1))
        label = label.to(device)
        # forward pass
        output = lstm_model(data)
        # calculate validation loss
        loss = criterion(output.squeeze(1),label)
        losses += loss.item()
        # if batch_id % 1000 == 0:
        #   print(f"Validation : EPOCH:{epoch}|{n_epochs},K_FOLD:{k_no}|{2},BATCH : {batch_id}|{len(validation_dataloader)} , LOSS : {losses:.3f}")
        predict.extend([x[0]for x in torch.round(output).cpu().tolist()])
        ground_truth.extend(label.cpu().tolist())
        # validation accuracy
      val_epoch_accuracy.append(accuracy_score(ground_truth,predict))
      #validation loss
      val_epoch_loss.append(losses/(len(validation_dataloader)*batch_size))
      val_epoch_f1score.append(f1_score(ground_truth,predict))
      val_epoch_recall.append(recall_score(ground_truth,predict))
      val_epoch_precision.append(precision_score(ground_truth,predict))
      val_epoch_roc_auc.append(roc_auc_score(ground_truth,predict))
  # train loss average of all k-fold
  train_avg_loss.append(np.mean(train_epoch_loss))
  # train acuuracy,f1score,recall,precision,roc_auc average of all k-fold
  train_avg_accuracy.append(np.mean(train_epoch_accuracy)*100)
  train_avg_f1score.append(np.mean(train_epoch_f1score)*100)
  train_avg_recall.append(np.mean(train_epoch_recall)*100)
  train_avg_precision.append(np.mean(train_epoch_precision)*100)
  train_avg_roc_auc.append(np.mean(train_epoch_roc_auc)*100)
  # validation loss average of all k fold
  val_avg_loss.append(np.mean(val_epoch_loss))
  # val acuuracy,f1score,recall,precision,roc_auc average of all k-fold
  val_avg_accuracy.append(np.mean(val_epoch_accuracy)*100)
  val_avg_f1score.append(np.mean(val_epoch_f1score)*100)
  val_avg_recall.append(np.mean(val_epoch_recall)*100)
  val_avg_precision.append(np.mean(val_epoch_precision)*100)
  val_avg_roc_auc.append(np.mean(val_epoch_roc_auc)*100)


#Train vs Validation loss epoch wise

In [None]:
from matplotlib import pyplot as plt
plt.plot(list(range(n_epochs)),train_avg_loss,label="Train Loss")
plt.plot(list(range(n_epochs)),val_avg_loss,label = "val Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

# Train vs validation accuracy epoch wise

In [None]:
plt.plot(list(range(n_epochs)),train_avg_accuracy,label="Train Accuracy")
plt.plot(list(range(n_epochs)),val_avg_accuracy,label = "val Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

## Training Vs Validation f1score epochs wise

In [None]:
plt.plot(list(range(n_epochs)),train_avg_f1score,label="Train f1score")
plt.plot(list(range(n_epochs)),val_avg_f1score,label = "val f1score")
plt.xlabel("Epochs")
plt.ylabel("f1score")
plt.legend()
plt.show()

## Training Vs Validation recall epochs wise

In [None]:
plt.plot(list(range(n_epochs)),train_avg_recall,label="Train recall")
plt.plot(list(range(n_epochs)),val_avg_recall,label = "val recall")
plt.xlabel("Epochs")
plt.ylabel("recall")
plt.legend()
plt.show()

## Training Vs Validation precision epochs wise

In [None]:
plt.plot(list(range(n_epochs)),train_avg_precision,label="Train precision")
plt.plot(list(range(n_epochs)),val_avg_precision,label = "val precision")
plt.xlabel("Epochs")
plt.ylabel("precision")
plt.legend()
plt.show()

## Training Vs Validation roc_auc epochs wise

In [None]:
plt.plot(list(range(n_epochs)),train_avg_roc_auc,label="Train roc_auc")
plt.plot(list(range(n_epochs)),val_avg_roc_auc,label = "val roc_auc")
plt.xlabel("Epochs")
plt.ylabel("roc_auc")
plt.legend()
plt.show()

#Save trained Model

In [None]:
#Uncomment if you start fresh training process
torch.save(lstm_model.state_dict(),"drive/MyDrive/lstm_model.pt")