<a href="https://colab.research.google.com/github/hurricane195/Intro-to-Deep-Learning/blob/Homework_3/HW3_P2_1_GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Build the model for.LSTM** and rnn.GRU **for the tiny Shakespeare dataset, the data loader code is already provided.**

**Train the models for the sequence of 20 and 30, report and compare training loss, validation accuracy, execution time for training, and computational and mode size complexities across the two models.**

In [None]:
#Using a modided example of Dr. Tabkhi's "RNN" available at https://github.com/HamedTabkhi/Intro-to-DL/blob/main/RNN.py
#Using a modided example of Dr. Tabkhi's "RNN-CharDataset" available at https://github.com/HamedTabkhi/Intro-to-DL/blob/main/RNN-CharDataset.py
#Using a modided example of Dr. Tabkhi's "shakespeare-loader.py" available at https://github.com/HamedTabkhi/Intro-to-DL/blob/main/shakespeare-loader.py

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
import time

import torch
from torch.utils.data import Dataset, DataLoader
import requests

In [None]:
# Check for CUDA support and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [None]:
#Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

**MAXIMUM LENGTH OF INPUT SECQUENCES = 20**

In [None]:
#Prepare the dataset
sequence_length = 20
# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

In [None]:
# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

In [None]:
# Create sequences and targets
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = np.array(sequences)
targets = np.array(targets)

In [None]:
# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(sequences, targets, test_size=0.2, random_state=42)

In [None]:
# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

In [None]:
#Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

In [None]:
# Create datasets and data loaders
batch_size = 128
train_dataset = CharDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = CharDataset(X_val, y_val)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

In [None]:
# Defining the RNN model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        #This line takes the input tensor x, which contains indices of characters, and passes it through an embedding layer (self.embedding).
        #The embedding layer converts these indices into dense vectors of fixed size.
        #These vectors are learned during training and can capture semantic similarities between characters.
        #The result is a higher-dimensional representation of the input sequence, where each character index is replaced by its corresponding embedding vector.
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        #The RNN layer returns two outputs:
        #1- the output tensor containing the output of the RNN at each time step for each sequence in the batch,
        #2-the hidden state (_) of the last time step (which is not used in this line, hence the underscore).
        output, _ = self.rnn(embedded)
        #The RNN's output contains the outputs for every time step,
        #but for this task, we're only interested in the output of the last time step because we're predicting the next character after the sequence.
        #output[:, -1, :] selects the last time step's output for every sequence in the batch (-1 indexes the last item in Python).
        output = self.fc(output[:, -1, :])  # Get the output of the last RNN cell
        return output

In [None]:
# Hyperparameters
hidden_size = 100
learning_rate = 0.001
epochs = 50

In [None]:
# Model, loss, and optimizer
model = CharRNN(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
#count trainable parameters of the model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

count_parameters(model)

73665

In [None]:
# Training the model
start_time = time.time()
for epoch in range(epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        output = model(batch_X)
        loss = criterion(output, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        _, predicted = torch.max(output.data, 1)
        total += batch_y.size(0)
        correct += (predicted == batch_y).sum().item()
    train_accuracy = correct / total
    avg_loss = total_loss / len(train_loader)

    # Validation
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        correct = 0
        total = 0
        for batch_X, batch_y in val_loader:
            val_output = model(batch_X)
            val_loss = criterion(val_output, batch_y)
            total_val_loss += val_loss.item()
            _, predicted = torch.max(val_output.data, 1)
            total += batch_y.size(0)
            correct += (predicted == batch_y).sum().item()
    val_accuracy = correct / total
    avg_val_loss = total_val_loss / len(val_loader)

    """
    if (epoch+1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_loss:.4f}, Training Accuracy: {train_accuracy:.4f}, Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
    """
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_loss:.4f}, Training Accuracy: {train_accuracy:.4f}, Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

end_time = time.time()
training_time = end_time - start_time
print(f"Training time: {training_time} seconds")

Epoch 1/50, Training Loss: 1.8550, Training Accuracy: 0.4588, Validation Loss: 1.6692, Validation Accuracy: 0.5046
Epoch 2/50, Training Loss: 1.6146, Training Accuracy: 0.5171, Validation Loss: 1.5878, Validation Accuracy: 0.5242
Epoch 3/50, Training Loss: 1.5582, Training Accuracy: 0.5312, Validation Loss: 1.5559, Validation Accuracy: 0.5331
Epoch 4/50, Training Loss: 1.5286, Training Accuracy: 0.5395, Validation Loss: 1.5310, Validation Accuracy: 0.5384
Epoch 5/50, Training Loss: 1.5090, Training Accuracy: 0.5441, Validation Loss: 1.5172, Validation Accuracy: 0.5415
Epoch 6/50, Training Loss: 1.4955, Training Accuracy: 0.5477, Validation Loss: 1.5110, Validation Accuracy: 0.5439
Epoch 7/50, Training Loss: 1.4859, Training Accuracy: 0.5495, Validation Loss: 1.5039, Validation Accuracy: 0.5466
Epoch 8/50, Training Loss: 1.4770, Training Accuracy: 0.5516, Validation Loss: 1.4966, Validation Accuracy: 0.5475
Epoch 9/50, Training Loss: 1.4714, Training Accuracy: 0.5532, Validation Loss: 1

In [None]:
# Prediction function
def predict_next_char(model, char_to_int, int_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_int[c] for c in initial_str[-sequence_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return int_to_char[predicted_index]

# Predicting the first next character
test_str = "This is a simple example to demonstrate how to predict the next char"
predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str)
print('FIRST TEST STRING: This is a simple example to demonstrate how to predict the next char..')
print(f"First predicted next character: '{predicted_char}'")
print("")

# Predicting the second next character
test_str = "Long live the quee"
predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str)
print('SECOND TEST STRING: Long live the quee..')
print(f"Second predicted next character: '{predicted_char}'")
print("")


# Predicting the third next character
test_str = "Be quiet and do not spea"
predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str)
print('THIRD TEST STRING: Be quiet and do not spea..')
print(f"Third predicted next character: '{predicted_char}'")
print("")

FIRST TEST STRING: This is a simple example to demonstrate how to predict the next char..
First predicted next character: 'g'

SECOND TEST STRING: Long live the quee..
Second predicted next character: 'n'

THIRD TEST STRING: Be quiet and do not spea..
Third predicted next character: 'k'



**MAXIMUM LENGTH OF INPUT SECQUENCES = 30**

In [None]:
#Prepare the dataset
sequence_length = 30
# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

In [None]:
# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

In [None]:
# Create sequences and targets
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

sequences = np.array(sequences)
targets = np.array(targets)

In [None]:
# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(sequences, targets, test_size=0.2, random_state=42)

In [None]:
# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

In [None]:
#Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

In [None]:
# Create datasets and data loaders
batch_size = 128
train_dataset = CharDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = CharDataset(X_val, y_val)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

In [None]:
# Defining the RNN model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        #This line takes the input tensor x, which contains indices of characters, and passes it through an embedding layer (self.embedding).
        #The embedding layer converts these indices into dense vectors of fixed size.
        #These vectors are learned during training and can capture semantic similarities between characters.
        #The result is a higher-dimensional representation of the input sequence, where each character index is replaced by its corresponding embedding vector.
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        #The RNN layer returns two outputs:
        #1- the output tensor containing the output of the RNN at each time step for each sequence in the batch,
        #2-the hidden state (_) of the last time step (which is not used in this line, hence the underscore).
        output, _ = self.rnn(embedded)
        #The RNN's output contains the outputs for every time step,
        #but for this task, we're only interested in the output of the last time step because we're predicting the next character after the sequence.
        #output[:, -1, :] selects the last time step's output for every sequence in the batch (-1 indexes the last item in Python).
        output = self.fc(output[:, -1, :])  # Get the output of the last RNN cell
        return output

In [None]:
# Hyperparameters
hidden_size = 100
learning_rate = 0.001
epochs = 50

In [None]:
# Model, loss, and optimizer
model = CharRNN(len(chars), hidden_size, len(chars))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
#count trainable parameters of the model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

count_parameters(model)

73665

In [None]:
# Training the model
start_time = time.time()
for epoch in range(epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        output = model(batch_X)
        loss = criterion(output, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        _, predicted = torch.max(output.data, 1)
        total += batch_y.size(0)
        correct += (predicted == batch_y).sum().item()
    train_accuracy = correct / total
    avg_loss = total_loss / len(train_loader)

    # Validation
    model.eval()
    with torch.no_grad():
        total_val_loss = 0
        correct = 0
        total = 0
        for batch_X, batch_y in val_loader:
            val_output = model(batch_X)
            val_loss = criterion(val_output, batch_y)
            total_val_loss += val_loss.item()
            _, predicted = torch.max(val_output.data, 1)
            total += batch_y.size(0)
            correct += (predicted == batch_y).sum().item()
    val_accuracy = correct / total
    avg_val_loss = total_val_loss / len(val_loader)

    """
    if (epoch+1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_loss:.4f}, Training Accuracy: {train_accuracy:.4f}, Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
    """
    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {avg_loss:.4f}, Training Accuracy: {train_accuracy:.4f}, Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

end_time = time.time()
training_time = end_time - start_time
print(f"Training time: {training_time} seconds")

Epoch 1/50, Training Loss: 1.8511, Training Accuracy: 0.4594, Validation Loss: 1.6671, Validation Accuracy: 0.5055
Epoch 2/50, Training Loss: 1.6099, Training Accuracy: 0.5197, Validation Loss: 1.5884, Validation Accuracy: 0.5250
Epoch 3/50, Training Loss: 1.5547, Training Accuracy: 0.5333, Validation Loss: 1.5555, Validation Accuracy: 0.5342
Epoch 4/50, Training Loss: 1.5258, Training Accuracy: 0.5404, Validation Loss: 1.5360, Validation Accuracy: 0.5394
Epoch 5/50, Training Loss: 1.5066, Training Accuracy: 0.5448, Validation Loss: 1.5238, Validation Accuracy: 0.5398
Epoch 6/50, Training Loss: 1.4929, Training Accuracy: 0.5486, Validation Loss: 1.5102, Validation Accuracy: 0.5451
Epoch 7/50, Training Loss: 1.4826, Training Accuracy: 0.5512, Validation Loss: 1.4985, Validation Accuracy: 0.5499
Epoch 8/50, Training Loss: 1.4744, Training Accuracy: 0.5529, Validation Loss: 1.4988, Validation Accuracy: 0.5480
Epoch 9/50, Training Loss: 1.4680, Training Accuracy: 0.5547, Validation Loss: 1

In [None]:
# Prediction function
def predict_next_char(model, char_to_int, int_to_char, initial_str):
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_int[c] for c in initial_str[-sequence_length:]], dtype=torch.long).unsqueeze(0)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return int_to_char[predicted_index]

# Predicting the first next character
test_str = "This is a simple example to demonstrate how to predict the next char"
predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str)
print('FIRST TEST STRING: This is a simple example to demonstrate how to predict the next char..')
print(f"First predicted next character: '{predicted_char}'")
print("")

# Predicting the second next character
test_str = "Long live the quee"
predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str)
print('SECOND TEST STRING: Long live the quee..')
print(f"Second predicted next character: '{predicted_char}'")
print("")


# Predicting the third next character
test_str = "Be quiet and do not spea"
predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str)
print('THIRD TEST STRING: Be quiet and do not spea..')
print(f"Third predicted next character: '{predicted_char}'")
print("")

FIRST TEST STRING: This is a simple example to demonstrate how to predict the next char..
First predicted next character: 'g'

SECOND TEST STRING: Long live the quee..
Second predicted next character: 'n'

THIRD TEST STRING: Be quiet and do not spea..
Third predicted next character: 'k'

