In [681]:
#Libraries
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
import time

In [682]:
#GPU Checking
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("GPU is not available. Using CPU.")

Using GPU: NVIDIA GeForce RTX 3060


In [683]:
# Text Sample
text = "This is a simple example to demonstrate how to predict the next character using LSTM in PyTorch."

In [684]:
# **Taken from the course GitHub**

#set(text): Creates a set of unique characters found in the text. The set function removes any duplicate characters.
#list(set(text)): Converts the set back into a list so that it can be sorted. 
# sorted(list(set(text))): Sorts the list of unique characters. 
chars = sorted(list(set(text)))
#This line creates a dictionary that maps each character to a unique index (integer)."
ix_to_char = {i: ch for i, ch in enumerate(chars)}
#Similar to the previous line, but in reverse. This line creates a dictionary that maps each unique index (integer) back to its corresponding character.
char_to_ix = {ch: i for i, ch in enumerate(chars)} 
chars = sorted(list(set(text)))

In [685]:
# Preparing the dataset
max_length = 30  # Maximum length of input sequences
X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

In [686]:
# Splitting the dataset 
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=1)

# Converting to tensors
X_train = torch.tensor(X_train, dtype=torch.long).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)

In [687]:
#Training Loop + Empty Arrays for storing Training and Validation Results

def TL(n_epochs, optimizer, model, loss_fn, X_train, y_train, X_val, y_val):
  for epoch in range(1, n_epochs + 1):
    #Training Loop
    model.train()
    optimizer.zero_grad()
    train_out = model(X_train)
    loss = loss_fn(train_out, y_train)
    loss.backward()
    optimizer.step()
    
    #Validation Loop
    model.eval()
    with torch.no_grad():
      val_out = model(X_val)
      val_loss = loss_fn(val_out, y_val)
      _, predicted = torch.max(val_out, 1)
      val_accuracy = (predicted == y_val).float().mean()
    

    if epoch == 1 or epoch % 5 == 0: 
      print('Epoch: {}, Training Loss: {}, Validation Loss: {}, Validation Accuracy: {}'.format(epoch, loss.item(), val_loss.item(), val_accuracy.item()))


Question 1 - Part B: LSTM

In [688]:
# LSTM Model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size 
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers= num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.lstm(embedded)
        output = self.fc(output[:, -1, :])
        return output

In [689]:
hidden_size = 128
model = LSTM(len(chars), hidden_size, len(chars), 4).to(device)
a = 0.0001
epochs = 200
lossFN = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr= a)
start_time = time.time()

TL(
  n_epochs= epochs,
  optimizer= optimizer,
  model= model,
  loss_fn= lossFN,
  X_train= X_train,
  y_train= y_train,
  X_val= X_val,
  y_val= y_val
)

#Getting the time
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Time taken: {elapsed_time} Seconds")

Epoch: 1, Training Loss: 3.2543416023254395, Validation Loss: 3.22796893119812, Validation Accuracy: 0.2142857313156128
Epoch: 5, Training Loss: 3.254322052001953, Validation Loss: 3.227958917617798, Validation Accuracy: 0.2142857313156128
Epoch: 10, Training Loss: 3.2542991638183594, Validation Loss: 3.22794771194458, Validation Accuracy: 0.2142857313156128
Epoch: 15, Training Loss: 3.254274845123291, Validation Loss: 3.227935552597046, Validation Accuracy: 0.2142857313156128
Epoch: 20, Training Loss: 3.25425124168396, Validation Loss: 3.227924108505249, Validation Accuracy: 0.2142857313156128
Epoch: 25, Training Loss: 3.254228353500366, Validation Loss: 3.227912425994873, Validation Accuracy: 0.2142857313156128
Epoch: 30, Training Loss: 3.2542049884796143, Validation Loss: 3.227900743484497, Validation Accuracy: 0.2142857313156128
Epoch: 35, Training Loss: 3.254181146621704, Validation Loss: 3.2278895378112793, Validation Accuracy: 0.2142857313156128
Epoch: 40, Training Loss: 3.25415

Epoch: 50, Training Loss: 3.2541098594665527, Validation Loss: 3.2278542518615723, Validation Accuracy: 0.2142857313156128
Epoch: 55, Training Loss: 3.254087209701538, Validation Loss: 3.2278430461883545, Validation Accuracy: 0.2142857313156128
Epoch: 60, Training Loss: 3.254063844680786, Validation Loss: 3.2278313636779785, Validation Accuracy: 0.2142857313156128
Epoch: 65, Training Loss: 3.254040002822876, Validation Loss: 3.2278189659118652, Validation Accuracy: 0.2142857313156128
Epoch: 70, Training Loss: 3.2540159225463867, Validation Loss: 3.2278077602386475, Validation Accuracy: 0.2142857313156128
Epoch: 75, Training Loss: 3.2539925575256348, Validation Loss: 3.2277958393096924, Validation Accuracy: 0.2142857313156128
Epoch: 80, Training Loss: 3.2539687156677246, Validation Loss: 3.2277839183807373, Validation Accuracy: 0.2142857313156128
Epoch: 85, Training Loss: 3.253945827484131, Validation Loss: 3.2277729511260986, Validation Accuracy: 0.2142857313156128
Epoch: 90, Training 

In [690]:
# Prediction Fn
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

In [691]:
#Complexity
numel_list = [p.numel() for p in model.parameters()]
print("Model Complexity: {}".format(sum(numel_list)))
#sum(numel_list), numel_list

model = model.to('cpu')

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next char"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Model Complexity: 535066
Predicted next character: 'i'
