<a href="https://colab.research.google.com/github/jford59/RT1/blob/main/HW3/RT3_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Importing necessary modules
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import numpy as np
import requests

# Fetching text data from a URL
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

# Creating character mappings
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encoding the text data
encoded_text = [char_to_int[ch] for ch in text]

# Checking for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Function to define dataset for given max_length
def Define_Dataset(max_length):
    train = []
    test = []
    for i in range(len(text) - max_length):
        sequence = text[i:i + max_length]
        label = text[i + max_length]
        train.append([char_to_int[char] for char in sequence])
        test.append(char_to_int[label])

    train = np.array(train)
    test = np.array(test)
    return train, test

# Creating datasets for different sequence lengths
train20, test20 = Define_Dataset(20)
train30, test30 = Define_Dataset(30)
train50, test50 = Define_Dataset(50)

# Converting datasets to PyTorch tensors
train20 = torch.tensor(train20, dtype=torch.long)
test20 = torch.tensor(test20, dtype=torch.long)

train30 = torch.tensor(train30, dtype=torch.long)
test30 = torch.tensor(test30, dtype=torch.long)

train50 = torch.tensor(train50, dtype=torch.long)
test50 = torch.tensor(test50, dtype=torch.long)

# Defining the RNN model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        #This line takes the input tensor x, which contains indices of characters, and passes it through an embedding layer (self.embedding).
        #The embedding layer converts these indices into dense vectors of fixed size.
        #These vectors are learned during training and can capture semantic similarities between characters.
        #The result is a higher-dimensional representation of the input sequence, where each character index is replaced by its corresponding embedding vector.
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        #The RNN layer returns two outputs:
        #1- the output tensor containing the output of the RNN at each time step for each sequence in the batch,
        #2-the hidden state (_) of the last time step (which is not used in this line, hence the underscore).
        output, _ = self.rnn(embedded)
        #The RNN's output contains the outputs for every time step,
        #but for this task, we're only interested in the output of the last time step because we're predicting the next character after the sequence.
        #output[:, -1, :] selects the last time step's output for every sequence in the batch (-1 indexes the last item in Python).
        output = self.fc(output[:, -1, :])  # Get the output of the last RNN cell
        return output

# Hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100

# Defining custom dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Creating datasets and dataloaders for different sequence lengths
dataset_20 = CharDataset(train20, test20)
dataset_30 = CharDataset(train30, test30)
dataset_50 = CharDataset(train50, test50)

batch_size = 128

train_size20 = int(len(dataset_20) * 0.8)
test_size20 = len(dataset_20) - train_size20
train20_dataset, test20_dataset = torch.utils.data.random_split(dataset_20, [train_size20, test_size20])

train20_loader = DataLoader(train20_dataset, shuffle=True, batch_size=batch_size)
test20_loader = DataLoader(test20_dataset, shuffle=False, batch_size=batch_size)

train_size30 = int(len(dataset_30) * 0.8)
test_size30 = len(dataset_30) - train_size30
train30_dataset, test30_dataset = torch.utils.data.random_split(dataset_30, [train_size30, test_size30])

train30_loader = DataLoader(train30_dataset, shuffle=True, batch_size=batch_size)
test30_loader = DataLoader(test30_dataset, shuffle=False, batch_size=batch_size)

train_size50 = int(len(dataset_50) * 0.8)
test_size50 = len(dataset_50) - train_size50
train50_dataset, test50_dataset = torch.utils.data.random_split(dataset_50, [train_size50, test_size50])

train50_loader = DataLoader(train50_dataset, shuffle=True, batch_size=batch_size)
test50_loader = DataLoader(test50_dataset, shuffle=False, batch_size=batch_size)

# Defining LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.lstm(embedded)
        output = self.fc(output[:, -1, :])
        return output

# Defining GRU model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.gru(embedded)
        output = self.fc(output[:, -1, :])
        return output

# Function for the training loop
def training_loop(train, test, model, loss_fn, optimizer, epochs):
    model.to(device)
    train_loss_list = []
    val_loss_list = []
    val_accuracy_list = []

    for epoch in range(epochs):
        train_loss = 0.0
        val_loss = 0.0
        correct = 0
        total = 0

        model.train()
        for sequences, targets in train:
            sequences, targets = sequences.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(sequences)
            loss = loss_fn(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * sequences.size(0)

        model.eval()
        with torch.no_grad():
            for sequences, targets in test:
                sequences, targets = sequences.to(device), targets.to(device)
                outputs = model(sequences)
                loss = loss_fn(outputs, targets)
                val_loss += loss.item() * sequences.size(0)
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

        train_loss = train_loss / len(train.dataset)
        val_loss = val_loss / len(test.dataset)
        accuracy = correct / total * 100

        train_loss_list.append(train_loss)
        val_loss_list.append(val_loss)
        val_accuracy_list.append(accuracy)

        print(f'Epoch [{epoch + 1}/{epochs}], '
              f'Training Loss: {train_loss:.4f}, '
              f'Validation Loss: {val_loss:.4f}, '
              f'Validation Accuracy: {accuracy:.2f}%')

# Function to predict the next character
def predict_next_char(model, sequence_length, char_to_int, int_to_char, test_str):
    model.eval()
    with torch.no_grad():
        test_sequence = [char_to_int[char] for char in test_str]
        test_sequence = torch.tensor(test_sequence, dtype=torch.long).unsqueeze(0).to(device)
        output = model(test_sequence)
        _, predicted_index = torch.max(output, 1)
        predicted_char = int_to_char[predicted_index.item()]
    return predicted_char

# Hyperparameters
hidden_size = 128
learning_rate = 0.001
epochs = 10

# Creating LSTM and GRU models for sequence length 20
LSTM20_Model = LSTMModel(len(chars), hidden_size, len(chars))
GRU20_Model = GRUModel(len(chars), hidden_size, len(chars))

# Defining loss function
criterion = nn.CrossEntropyLoss()

# Defining optimizers for LSTM and GRU models
LSTM20_optimizer = optim.Adam(LSTM20_Model.parameters(), lr=learning_rate)
GRU20_optimizer = optim.Adam(GRU20_Model.parameters(), lr=learning_rate)

# Training LSTM and GRU models for sequence length 20
print("LSTM Sequence 20")
training_loop(
    train=train20_loader,
    test=test20_loader,
    model=LSTM20_Model,
    loss_fn=criterion,
    optimizer=LSTM20_optimizer,
    epochs=epochs
)
print("GRU Sequence 20")
training_loop(
    train=train20_loader,
    test=test20_loader,
    model=GRU20_Model,
    loss_fn=criterion,
    optimizer=GRU20_optimizer,
    epochs=epochs
)

# Predicting the next character using LSTM model for sequence length 20
test_str = "This is a simple example to demonstrate how to predict the next char"
predicted_char = predict_next_char(LSTM20_Model, 20, char_to_int, int_to_char, test_str)
print(f"LSTM: Predicted next character: '{predicted_char}'")

# Creating LSTM and GRU models for sequence length 30
LSTM30_Model = LSTMModel(len(chars), hidden_size, len(chars))
GRU30_Model = GRUModel(len(chars), hidden_size, len(chars))

# Defining loss function
criterion = nn.CrossEntropyLoss()

# Defining optimizers for LSTM and GRU models
LSTM30_optimizer = optim.Adam(LSTM30_Model.parameters(), lr=learning_rate)
GRU30_optimizer = optim.Adam(GRU30_Model.parameters(), lr=learning_rate)

# Training LSTM and GRU models for sequence length 30
print("LSTM Sequence 30")
training_loop(
    train=train30_loader,
    test=test30_loader,
    model=LSTM30_Model,
    loss_fn=criterion,
    optimizer=LSTM30_optimizer,
    epochs=epochs
)
print("GRU Sequence 30")
training_loop(
    train=train30_loader,
    test=test30_loader,
    model=GRU30_Model,
    loss_fn=criterion,
    optimizer=GRU30_optimizer,
    epochs=epochs
)

# Predicting the next character using LSTM model for sequence length 30
test_str = "This is a simple example to demonstrate how to predict the next char"
predicted_char = predict_next_char(LSTM30_Model, 30, char_to_int, int_to_char, test_str)
print(f"LSTM: Predicted next character: '{predicted_char}'")

# Creating LSTM and GRU models for sequence length 50
LSTM50_Model = LSTMModel(len(chars), hidden_size, len(chars))
GRU50_Model = GRUModel(len(chars), hidden_size, len(chars))

# Defining loss function
criterion = nn.CrossEntropyLoss()

# Defining optimizers for LSTM and GRU models
LSTM50_optimizer = optim.Adam(LSTM50_Model.parameters(), lr=learning_rate)
GRU50_optimizer = optim.Adam(GRU50_Model.parameters(), lr=learning_rate)

# Training LSTM and GRU models for sequence length 50
print("LSTM Sequence 50")
training_loop(
    train=train50_loader,
    test=test50_loader,
    model=LSTM50_Model,
    loss_fn=criterion,
    optimizer=LSTM50_optimizer,
    epochs=epochs
)
print("GRU Sequence 50")
training_loop(
    train=train50_loader,
    test=test50_loader,
    model=GRU50_Model,
    loss_fn=criterion,
    optimizer=GRU50_optimizer,
    epochs=epochs
)

LSTM Sequence 20
Epoch [1/10], Training Loss: 1.8255, Validation Loss: 1.6367, Validation Accuracy: 51.07%
Epoch [2/10], Training Loss: 1.5737, Validation Loss: 1.5502, Validation Accuracy: 53.18%
Epoch [3/10], Training Loss: 1.5088, Validation Loss: 1.5067, Validation Accuracy: 54.43%
Epoch [4/10], Training Loss: 1.4728, Validation Loss: 1.4795, Validation Accuracy: 55.11%
Epoch [5/10], Training Loss: 1.4484, Validation Loss: 1.4658, Validation Accuracy: 55.43%
Epoch [6/10], Training Loss: 1.4303, Validation Loss: 1.4557, Validation Accuracy: 55.57%
Epoch [7/10], Training Loss: 1.4167, Validation Loss: 1.4408, Validation Accuracy: 56.08%
Epoch [8/10], Training Loss: 1.4049, Validation Loss: 1.4336, Validation Accuracy: 56.22%
Epoch [9/10], Training Loss: 1.3956, Validation Loss: 1.4314, Validation Accuracy: 56.39%
Epoch [10/10], Training Loss: 1.3877, Validation Loss: 1.4247, Validation Accuracy: 56.20%
GRU Sequence 20
Epoch [1/10], Training Loss: 1.7960, Validation Loss: 1.6224, Vali