In [451]:
import pandas as pd
import numpy as np
from transformers import pipeline
import torch
import spacy
import tqdm as notebook_tqdm
from torchinfo import summary

warnings.filterwarnings("ignore")
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

import  torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

## Goal of this Notebook
- Provide Skeleton of the pytorch training Architecture. 
- Understanding the shapes of different layers 
- Understanding the datatype needed at each step 

In [452]:
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
alpha2int = {alphabet[i]:i for i in range(len(alphabet))}
int2alpha = {i:alphabet[i] for i in range(len(alphabet))}

In [453]:
X = []
y = []

for i in range(1,len(alphabet)):
    print(f'{alphabet[i-1]} ---> {alphabet[i]}')
    X.append(alpha2int[alphabet[i-1]])
    y.append(alpha2int[alphabet[i]])

A ---> B
B ---> C
C ---> D
D ---> E
E ---> F
F ---> G
G ---> H
H ---> I
I ---> J
J ---> K
K ---> L
L ---> M
M ---> N
N ---> O
O ---> P
P ---> Q
Q ---> R
R ---> S
S ---> T
T ---> U
U ---> V
V ---> W
W ---> X
X ---> Y
Y ---> Z


In [454]:
seq_length=1
# reshape X to be [samples, time steps, features]
X_train = np.reshape(X, (len(X), seq_length, 1))
X_train.shape

(25, 1, 1)

#### Creating the Dataset Object

In [455]:
class chardataset(Dataset):
    def __init__(self, X,y):
        # Initialize the dataset 
        self.X_train = torch.tensor(X,dtype=torch.float32)  # X is a numpy array converted to a tensor of type float
        self.y_train = torch.tensor(y,dtype=torch.long)  # y is a numpy array converted to a tensor of type long
        
    def __len__(self):
        return len(self.X_train)

    def __getitem__(self, index):
        text = self.X_train[index] 
        label = self.y_train[index]
    
        

        return text, label # return the item at index

In [456]:
training_set = chardataset(X_train,y)

#### DataLoader  
- Loading the Dataset Object in the the DataLoader 

In [457]:
train_params = {'batch_size': 1,
                'shuffle': True,
                'num_workers': 0
                }


training_loader = DataLoader(training_set, **train_params) # create a dataloader for the training set

#### Creating Custom Model LSTM

In [458]:
class charPred(nn.Module):
    def __init__(self, features, n_hidden, n_layers, n_outputs):
        super(charPred, self).__init__()
        self.T = seq_length # length of the sequence
        self.L = n_layers # number of layers
        self.D = features # input dimension[features] eg. x1, x2, x3
        self.M = n_hidden # hidden layer dimension
        self.K = n_outputs # output dimension
        self.lstm = nn.LSTM(
            input_size=self.D, #The number of expected features in one sameple. ex. x1, x2, x3
            hidden_size=n_hidden, #The number of features in the hidden state h.
            num_layers=n_layers, #The number of recurrent layers.
            batch_first=True
            ) #If set True, he input and output tensors are provided as (batch, seq_len, feature)
            #output of the LSTM is (batch_size, seq_len, hidden_size) 
        self.fc = nn.Linear(self.M, self.K) # fully connected layer of shape (M, K)
        
    
    def forward(self, X):
        # initial hidden states
        
        out, hidden = self.lstm(X) # out is of shape (batch_size, seq_len, hidden_size)
        out = self.fc(out[:, -1, :]) # out is of shape (batch_size, hidden_size)
    
        
        return out

#### Cost Function

In [459]:
model = charPred(features=1, n_hidden=128, n_layers=2, n_outputs=26)
cost_function = torch.nn.CrossEntropyLoss() # loss function expects logits and labels to be of shape (batch_size, num_classes) and (batch_size,)

In [460]:
for param in model.parameters():
    print(param.shape) #shape of the parameters of the model matirx 

torch.Size([512, 1])
torch.Size([512, 128])
torch.Size([512])
torch.Size([512])
torch.Size([512, 128])
torch.Size([512, 128])
torch.Size([512])
torch.Size([512])
torch.Size([26, 128])
torch.Size([26])


#### Optimization

In [461]:
epochs = 1201 # number of epochs
learning_rate = 0.01 # learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # optimizer is a class that implements an update rule for parameters

#### Training 

In [462]:
loss_list = []
loss_per_epoch = []
accuracy_list = []
accuracy_per_epoch = []
for epoch in range(epochs):
    for i, data in enumerate(training_loader): # data is a tuple of (text, label)
        # Forward pass
        y_pred = model(data[0]).reshape(-1,26) # reshape the output to be (batch_size, num_classes)
        
        loss = cost_function(y_pred,data[:][1]) # calculate the loss using the cost function shape (batch_size, num_classes) and (batch_size,)
        loss_list.append(loss.item()) # append the loss to the list of losses for each epoch (to be used for plotting)
        # Backward pass
        optimizer.zero_grad() # set all gradients to zero before updating the parameters (to prevent gradient accumulation)
        # Backpropagation
        loss.backward() # backpropagate the loss to the model
        # Update weights
        optimizer.step() # update the weights of the model using the optimizer

        pred = torch.argmax(y_pred, dim=1) # get the index of the max logit
        #print(pred)
        #print(pred.shape)
        corrects = torch.sum(pred == data[1]) # calculate the number of correct predictions
        accuracy_list.append(corrects.item()/len(data[1])) # append the accuracy to the list of accuracies for each epoch (to be used for plotting)
    

    loss_per_epoch.append(np.mean(loss_list))
    accuracy_per_epoch.append(np.mean(accuracy_list))
    if epoch % 100 == 0:
        print(f'Epoch: {epoch+1}, Step: {i+1}, Loss: {np.mean(loss_list):.4f} , Accuracy: {np.mean(accuracy_list):.4f}')

Epoch: 1, Step: 25, Loss: 3.3621 , Accuracy: 0.0000
Epoch: 101, Step: 25, Loss: 1.4832 , Accuracy: 0.3830
Epoch: 201, Step: 25, Loss: 1.1161 , Accuracy: 0.5459
Epoch: 301, Step: 25, Loss: 0.9463 , Accuracy: 0.6249
Epoch: 401, Step: 25, Loss: 0.8335 , Accuracy: 0.6823
Epoch: 501, Step: 25, Loss: 0.7563 , Accuracy: 0.7182
Epoch: 601, Step: 25, Loss: 0.7109 , Accuracy: 0.7383
Epoch: 701, Step: 25, Loss: 0.6624 , Accuracy: 0.7609
Epoch: 801, Step: 25, Loss: 0.6219 , Accuracy: 0.7776
Epoch: 901, Step: 25, Loss: 0.5982 , Accuracy: 0.7919
Epoch: 1001, Step: 25, Loss: 0.5720 , Accuracy: 0.8047
Epoch: 1101, Step: 25, Loss: 0.5593 , Accuracy: 0.8122
Epoch: 1201, Step: 25, Loss: 0.5375 , Accuracy: 0.8217
