In [334]:
#Libraries
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
import time

In [335]:
#GPU Checking
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("GPU is not available. Using CPU.")

Using GPU: NVIDIA GeForce RTX 3060


In [336]:
# Text Sample
text = "This is a simple example to demonstrate how to predict the next character using RNN in PyTorch."

In [337]:
# **Taken from the course GitHub**

#set(text): Creates a set of unique characters found in the text. The set function removes any duplicate characters.
#list(set(text)): Converts the set back into a list so that it can be sorted. 
# sorted(list(set(text))): Sorts the list of unique characters. 
chars = sorted(list(set(text)))
#This line creates a dictionary that maps each character to a unique index (integer)."
ix_to_char = {i: ch for i, ch in enumerate(chars)}
#Similar to the previous line, but in reverse. This line creates a dictionary that maps each unique index (integer) back to its corresponding character.
char_to_ix = {ch: i for i, ch in enumerate(chars)} 
chars = sorted(list(set(text)))

In [338]:
# Preparing the dataset
max_length = 30  # Maximum length of input sequences
X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

In [339]:
# Splitting the dataset 
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=1)

# Converting to tensors
X_train = torch.tensor(X_train, dtype=torch.long).to(device)
y_train = torch.tensor(y_train, dtype=torch.long).to(device)
X_val = torch.tensor(X_val, dtype=torch.long).to(device)
y_val = torch.tensor(y_val, dtype=torch.long).to(device)

In [340]:
#Training Loop + Empty Arrays for storing Training and Validation Results

def TL(n_epochs, optimizer, model, loss_fn, X_train, y_train, X_val, y_val):
  for epoch in range(1, n_epochs + 1):
    #Training Loop
    model.train()
    optimizer.zero_grad()
    train_out = model(X_train)
    loss = loss_fn(train_out, y_train)
    loss.backward()
    optimizer.step()
    
    #Validation Loop
    model.eval()
    with torch.no_grad():
      val_out = model(X_val)
      val_loss = loss_fn(val_out, y_val)
      _, predicted = torch.max(val_out, 1)
      val_accuracy = (predicted == y_val).float().mean()
    

    if epoch == 1 or epoch % 5 == 0: 
      print('Epoch: {}, Training Loss: {}, Validation Loss: {}, Validation Accuracy: {}'.format(epoch, loss.item(), val_loss.item(), val_accuracy.item()))


Question 1 - Part A: RNN

In [341]:
# RNN Model
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size 
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])  # Get the output of the last RNN cell
        return output

In [342]:

hidden_size = 128
model = RNN(len(chars), hidden_size, len(chars)).to(device)
a = 0.005
epochs = 200
lossFN = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr= a)
start_time = time.time()

TL(
  n_epochs= epochs,
  optimizer= optimizer,
  model= model,
  loss_fn= lossFN,
  X_train= X_train,
  y_train= y_train,
  X_val= X_val,
  y_val= y_val
)

#Getting the time
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Time taken: {elapsed_time} Seconds")

Epoch: 1, Training Loss: 3.2679808139801025, Validation Loss: 3.334357976913452, Validation Accuracy: 0.0
Epoch: 5, Training Loss: 3.2368111610412598, Validation Loss: 3.3304052352905273, Validation Accuracy: 0.0
Epoch: 10, Training Loss: 3.1983234882354736, Validation Loss: 3.3256800174713135, Validation Accuracy: 0.0
Epoch: 15, Training Loss: 3.1603190898895264, Validation Loss: 3.3210721015930176, Validation Accuracy: 0.0
Epoch: 20, Training Loss: 3.1227896213531494, Validation Loss: 3.3166086673736572, Validation Accuracy: 0.0
Epoch: 25, Training Loss: 3.0857341289520264, Validation Loss: 3.312267303466797, Validation Accuracy: 0.0
Epoch: 30, Training Loss: 3.04913067817688, Validation Loss: 3.3081157207489014, Validation Accuracy: 0.0
Epoch: 35, Training Loss: 3.0129356384277344, Validation Loss: 3.3041234016418457, Validation Accuracy: 0.0
Epoch: 40, Training Loss: 2.977191686630249, Validation Loss: 3.300262928009033, Validation Accuracy: 0.0
Epoch: 45, Training Loss: 2.94188261

Epoch: 85, Training Loss: 2.6739208698272705, Validation Loss: 3.272500514984131, Validation Accuracy: 0.0
Epoch: 90, Training Loss: 2.6421754360198975, Validation Loss: 3.270183563232422, Validation Accuracy: 0.0
Epoch: 95, Training Loss: 2.6108310222625732, Validation Loss: 3.2681641578674316, Validation Accuracy: 0.0
Epoch: 100, Training Loss: 2.579875946044922, Validation Loss: 3.266221761703491, Validation Accuracy: 0.07692307978868484
Epoch: 105, Training Loss: 2.5493035316467285, Validation Loss: 3.264474391937256, Validation Accuracy: 0.07692307978868484
Epoch: 110, Training Loss: 2.519106388092041, Validation Loss: 3.26292085647583, Validation Accuracy: 0.07692307978868484
Epoch: 115, Training Loss: 2.489307403564453, Validation Loss: 3.261481761932373, Validation Accuracy: 0.07692307978868484
Epoch: 120, Training Loss: 2.459907054901123, Validation Loss: 3.260287284851074, Validation Accuracy: 0.07692307978868484
Epoch: 125, Training Loss: 2.430887460708618, Validation Loss: 

In [343]:
# Prediction Fn
def predict_next_char(model, char_to_ix, ix_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

In [344]:
#Complexity
numel_list = [p.numel() for p in model.parameters()]
print("Model Complexity: {}".format(sum(numel_list)))
#sum(numel_list), numel_list

model = model.to('cpu')

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next char"
predicted_char = predict_next_char(model, char_to_ix, ix_to_char, test_str)
print(f"Predicted next character: '{predicted_char}'")

Model Complexity: 39449
Predicted next character: 'a'
