In [99]:
#Libraries
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
import time

In [100]:
#GPU Checking
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("GPU is not available. Using CPU.")

Using GPU: NVIDIA GeForce RTX 3060


In [101]:
# Text Sample
text = "This is a simple example to demonstrate how to predict the next character using GRU in PyTorch."

In [102]:
# **Taken from the course GitHub**

#set(text): Creates a set of unique characters found in the text. The set function removes any duplicate characters.
#list(set(text)): Converts the set back into a list so that it can be sorted. 
# sorted(list(set(text))): Sorts the list of unique characters. 
chars = sorted(list(set(text)))
#This line creates a dictionary that maps each character to a unique index (integer)."
ix_to_char = {i: ch for i, ch in enumerate(chars)}
#Similar to the previous line, but in reverse. This line creates a dictionary that maps each unique index (integer) back to its corresponding character.
char_to_ix = {ch: i for i, ch in enumerate(chars)} 
chars = sorted(list(set(text)))

In [103]:
# Preparing the dataset
max_length = 30  # Maximum length of input sequences
X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

In [104]:
# Splitting the dataset 
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=1)

# Converting to tensors
X_train = torch.tensor(X_train, dtype=torch.long).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)

In [105]:
#Training Loop + Empty Arrays for storing Training and Validation Results

def TL(n_epochs, optimizer, model, loss_fn, X_train, y_train, X_val, y_val):
  for epoch in range(1, n_epochs + 1):
    #Training Loop
    model.train()
    optimizer.zero_grad()
    train_out = model(X_train)
    loss = loss_fn(train_out, y_train)
    loss.backward()
    optimizer.step()
    
    #Validation Loop
    model.eval()
    with torch.no_grad():
      val_out = model(X_val)
      val_loss = loss_fn(val_out, y_val)
      _, predicted = torch.max(val_out, 1)
      val_accuracy = (predicted == y_val).float().mean()
    

    if epoch == 1 or epoch % 5 == 0: 
      print('Epoch: {}, Training Loss: {}, Validation Loss: {}, Validation Accuracy: {}'.format(epoch, loss.item(), val_loss.item(), val_accuracy.item()))


Question 1 - Part C: GRU

In [106]:
# GRU Model
class GRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRU, self).__init__()
        self.hidden_size = hidden_size 
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.gru(embedded)
        output = self.fc(output[:, -1, :])
        return output

In [107]:

hidden_size = 128
model = GRU(len(chars), hidden_size, len(chars)).to(device)
a = 0.01
epochs = 200
lossFN = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr= a)
start_time = time.time()

TL(
  n_epochs= epochs,
  optimizer= optimizer,
  model= model,
  loss_fn= lossFN,
  X_train= X_train,
  y_train= y_train,
  X_val= X_val,
  y_val= y_val
)

#Getting the time
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Time taken: {elapsed_time} Seconds")

Epoch: 1, Training Loss: 3.303229808807373, Validation Loss: 3.2177395820617676, Validation Accuracy: 0.0
Epoch: 5, Training Loss: 3.2809367179870605, Validation Loss: 3.2178475856781006, Validation Accuracy: 0.07692307978868484
Epoch: 10, Training Loss: 3.2536051273345947, Validation Loss: 3.2180604934692383, Validation Accuracy: 0.07692307978868484
Epoch: 15, Training Loss: 3.226814031600952, Validation Loss: 3.2184197902679443, Validation Accuracy: 0.1538461595773697
Epoch: 20, Training Loss: 3.2005116939544678, Validation Loss: 3.2188668251037598, Validation Accuracy: 0.07692307978868484
Epoch: 25, Training Loss: 3.1746530532836914, Validation Loss: 3.2194149494171143, Validation Accuracy: 0.07692307978868484
Epoch: 30, Training Loss: 3.1491963863372803, Validation Loss: 3.2201321125030518, Validation Accuracy: 0.07692307978868484


Epoch: 35, Training Loss: 3.1241071224212646, Validation Loss: 3.2209322452545166, Validation Accuracy: 0.07692307978868484
Epoch: 40, Training Loss: 3.0993525981903076, Validation Loss: 3.221818685531616, Validation Accuracy: 0.07692307978868484
Epoch: 45, Training Loss: 3.07490873336792, Validation Loss: 3.222860097885132, Validation Accuracy: 0.07692307978868484
Epoch: 50, Training Loss: 3.050750732421875, Validation Loss: 3.223994255065918, Validation Accuracy: 0.07692307978868484
Epoch: 55, Training Loss: 3.026860475540161, Validation Loss: 3.225290060043335, Validation Accuracy: 0.07692307978868484
Epoch: 60, Training Loss: 3.003222942352295, Validation Loss: 3.226684808731079, Validation Accuracy: 0.07692307978868484
Epoch: 65, Training Loss: 2.9798266887664795, Validation Loss: 3.228224515914917, Validation Accuracy: 0.07692307978868484
Epoch: 70, Training Loss: 2.956662178039551, Validation Loss: 3.2299044132232666, Validation Accuracy: 0.07692307978868484
Epoch: 75, Training 

In [108]:
# Prediction Fn
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

In [109]:
#Complexity
numel_list = [p.numel() for p in model.parameters()]
print("Model Complexity: {}".format(sum(numel_list)))
#sum(numel_list), numel_list

model = model.to('cpu')

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next char"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Model Complexity: 105754
Predicted next character: 'e'
