In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import requests
import torch.nn as nn
import time


# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

# Step 2: Prepare the dataset
sequence_length = 20
# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Create sequences and targets
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Step 4: Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

# Now `train_loader` and `test_loader` are ready to be used in a training loop

In [2]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        # Initialize hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        # Initialize hidden state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate GRU
        out, _ = self.gru(x, h0)
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

In [4]:
def train_model(model, train_loader, test_loader, device, num_epochs=10, lr=0.001):
    # Move model to the device (CPU or GPU)
    model = model.to(device)
    
    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    # Training loop
    model.train()
    start_time = time.time()
    for epoch in range(num_epochs):
        for sequences, targets in train_loader:
            sequences = sequences.to(device)
            targets = targets.to(device)

            # Convert sequences to one-hot encoded vectors
            one_hot_sequences = torch.zeros(sequences.size(0), sequences.size(1), input_size).to(device)
            one_hot_sequences.scatter_(2, sequences.unsqueeze(2), 1)

            # Forward pass
            outputs = model(one_hot_sequences)
            loss = criterion(outputs, targets)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    
    # Calculate execution time
    exec_time = time.time() - start_time

    # Test the model
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for sequences, targets in test_loader:
            sequences = sequences.to(device)
            targets = targets.to(device)

            # Convert sequences to one-hot encoded vectors
            one_hot_sequences = torch.zeros(sequences.size(0), sequences.size(1), input_size).to(device)
            one_hot_sequences.scatter_(2, sequences.unsqueeze(2), 1)

            outputs = model(one_hot_sequences)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
        accuracy = 100 * correct / total
    
    return loss.item(), accuracy, exec_time, model

# Train LSTM and GRU models for sequence length 20
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
input_size = len(chars)  # Number of unique characters
hidden_size = 256
num_layers = 2
output_size = len(chars)
num_epochs = 10
lr = 0.001

lstm_model_20 = LSTMModel(input_size, hidden_size, num_layers, output_size)
gru_model_20 = GRUModel(input_size, hidden_size, num_layers, output_size)

print("Training LSTM model for sequence length 20...")
lstm_loss_20, lstm_acc_20, lstm_time_20, _ = train_model(lstm_model_20, train_loader, test_loader, device, num_epochs, lr)
print("Training GRU model for sequence length 20...")
gru_loss_20, gru_acc_20, gru_time_20, _ = train_model(gru_model_20, train_loader, test_loader, device, num_epochs, lr)

# Compare the results
print("\nComparison for Sequence Length 20:")
print(f"LSTM - Loss: {lstm_loss_20:.4f}, Accuracy: {lstm_acc_20:.2f}%, Time: {lstm_time_20:.2f} seconds")
print(f"GRU  - Loss: {gru_loss_20:.4f}, Accuracy: {gru_acc_20:.2f}%, Time: {gru_time_20:.2f} seconds")

Training LSTM model for sequence length 20...
Epoch [1/10], Loss: 1.2143
Epoch [2/10], Loss: 2.4413
Epoch [3/10], Loss: 0.8708
Epoch [4/10], Loss: 1.0888
Epoch [5/10], Loss: 1.0486
Epoch [6/10], Loss: 1.0847
Epoch [7/10], Loss: 0.9794
Epoch [8/10], Loss: 0.1467
Epoch [9/10], Loss: 0.8002
Epoch [10/10], Loss: 1.2631
Training GRU model for sequence length 20...
Epoch [1/10], Loss: 1.5718
Epoch [2/10], Loss: 1.2375
Epoch [3/10], Loss: 1.5314
Epoch [4/10], Loss: 1.3200
Epoch [5/10], Loss: 1.3027
Epoch [6/10], Loss: 1.9289
Epoch [7/10], Loss: 1.5252
Epoch [8/10], Loss: 1.1134
Epoch [9/10], Loss: 1.2726
Epoch [10/10], Loss: 0.8503

Comparison for Sequence Length 20:
LSTM - Loss: 1.2631, Accuracy: 58.97%, Time: 255.64 seconds
GRU  - Loss: 0.8503, Accuracy: 58.07%, Time: 246.63 seconds


In [5]:
# Prepare the dataset for sequence length 30
sequence_length_30 = 30

sequences_30 = []
targets_30 = []
for i in range(0, len(encoded_text) - sequence_length_30):
    seq = encoded_text[i:i+sequence_length_30]
    target = encoded_text[i+sequence_length_30]
    sequences_30.append(seq)
    targets_30.append(target)

sequences_30 = torch.tensor(sequences_30, dtype=torch.long)
targets_30 = torch.tensor(targets_30, dtype=torch.long)

dataset_30 = CharDataset(sequences_30, targets_30)

train_size_30 = int(len(dataset_30) * 0.8)
test_size_30 = len(dataset_30) - train_size_30
train_dataset_30, test_dataset_30 = torch.utils.data.random_split(dataset_30, [train_size_30, test_size_30])

train_loader_30 = DataLoader(train_dataset_30, shuffle=True, batch_size=batch_size)
test_loader_30 = DataLoader(test_dataset_30, shuffle=False, batch_size=batch_size)

# Train LSTM and GRU models for sequence length 30
lstm_model_30 = LSTMModel(input_size, hidden_size, num_layers, output_size)
gru_model_30 = GRUModel(input_size, hidden_size, num_layers, output_size)

print("Training LSTM model for sequence length 30...")
lstm_loss_30, lstm_acc_30, lstm_time_30, _ = train_model(lstm_model_30, train_loader_30, test_loader_30, device, num_epochs, lr)
print("Training GRU model for sequence length 30...")
gru_loss_30, gru_acc_30, gru_time_30, _ = train_model(gru_model_30, train_loader_30, test_loader_30, device, num_epochs, lr)

# Compare the results for sequence length 30
print("\nComparison for Sequence Length 30:")
print(f"LSTM - Loss: {lstm_loss_30:.4f}, Accuracy: {lstm_acc_30:.2f}%, Time: {lstm_time_30:.2f} seconds")
print(f"GRU  - Loss: {gru_loss_30:.4f}, Accuracy: {gru_acc_30:.2f}%, Time: {gru_time_30:.2f} seconds")

Training LSTM model for sequence length 30...
Epoch [1/10], Loss: 1.9121
Epoch [2/10], Loss: 2.4615
Epoch [3/10], Loss: 1.8835
Epoch [4/10], Loss: 1.5444
Epoch [5/10], Loss: 1.0117
Epoch [6/10], Loss: 0.8037
Epoch [7/10], Loss: 0.4261
Epoch [8/10], Loss: 1.0790
Epoch [9/10], Loss: 0.4648
Epoch [10/10], Loss: 1.8897
Training GRU model for sequence length 30...
Epoch [1/10], Loss: 1.0423
Epoch [2/10], Loss: 1.5016
Epoch [3/10], Loss: 0.6718
Epoch [4/10], Loss: 1.6727
Epoch [5/10], Loss: 1.0290
Epoch [6/10], Loss: 2.7371
Epoch [7/10], Loss: 1.6883
Epoch [8/10], Loss: 1.0107
Epoch [9/10], Loss: 0.6896
Epoch [10/10], Loss: 0.5744

Comparison for Sequence Length 30:
LSTM - Loss: 1.8897, Accuracy: 59.28%, Time: 312.79 seconds
GRU  - Loss: 0.5744, Accuracy: 58.29%, Time: 293.38 seconds


In [6]:
#b
import torch.nn.functional as F

class CustomLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, fc_layers):
        super(CustomLSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        # Define the fully connected layers
        self.fc_layers = nn.ModuleList()
        for i in range(len(fc_layers)):
            if i == 0:
                self.fc_layers.append(nn.Linear(hidden_size, fc_layers[i]))
            else:
                self.fc_layers.append(nn.Linear(fc_layers[i-1], fc_layers[i]))
        self.final_fc = nn.Linear(fc_layers[-1], output_size)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))
        out = out[:, -1, :]
        
        for fc_layer in self.fc_layers:
            out = F.relu(fc_layer(out))
        
        out = self.final_fc(out)
        return out

# Experiment with different hyperparameters
hidden_sizes = [128, 256, 512]
num_layers = [1, 2, 3]
fc_structures = [[256], [128, 64], [256, 128, 64]]

for hidden_size in hidden_sizes:
    for num_layer in num_layers:
        for fc_structure in fc_structures:
            print(f"Training LSTM with hidden_size={hidden_size}, num_layers={num_layer}, fc_structure={fc_structure}...")
            model = CustomLSTMModel(input_size, hidden_size, num_layer, output_size, fc_structure)
            train_loss, val_acc, train_time, _ = train_model(model, train_loader, test_loader, device, num_epochs, lr)
            print(f"Loss: {train_loss:.4f}, Accuracy: {val_acc:.2f}%, Time: {train_time:.2f} seconds\n")


Training LSTM with hidden_size=128, num_layers=1, fc_structure=[256]...
Epoch [1/10], Loss: 2.4156
Epoch [2/10], Loss: 1.6438
Epoch [3/10], Loss: 2.1643
Epoch [4/10], Loss: 1.7018
Epoch [5/10], Loss: 1.6927
Epoch [6/10], Loss: 1.5463
Epoch [7/10], Loss: 2.3809
Epoch [8/10], Loss: 0.7570
Epoch [9/10], Loss: 1.5755
Epoch [10/10], Loss: 1.1622
Loss: 1.1622, Accuracy: 57.88%, Time: 147.59 seconds

Training LSTM with hidden_size=128, num_layers=1, fc_structure=[128, 64]...
Epoch [1/10], Loss: 2.2092
Epoch [2/10], Loss: 1.2664
Epoch [3/10], Loss: 1.1880
Epoch [4/10], Loss: 1.7711
Epoch [5/10], Loss: 1.0886
Epoch [6/10], Loss: 0.6377
Epoch [7/10], Loss: 1.5098
Epoch [8/10], Loss: 1.3846
Epoch [9/10], Loss: 1.4459
Epoch [10/10], Loss: 0.8956
Loss: 0.8956, Accuracy: 57.06%, Time: 155.47 seconds

Training LSTM with hidden_size=128, num_layers=1, fc_structure=[256, 128, 64]...
Epoch [1/10], Loss: 1.8636
Epoch [2/10], Loss: 1.6921
Epoch [3/10], Loss: 1.6708
Epoch [4/10], Loss: 1.8119
Epoch [5/10],

In [7]:
class CustomGRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, fc_layers):
        super(CustomGRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        
        # Define the fully connected layers
        self.fc_layers = nn.ModuleList()
        for i in range(len(fc_layers)):
            if i == 0:
                self.fc_layers.append(nn.Linear(hidden_size, fc_layers[i]))
            else:
                self.fc_layers.append(nn.Linear(fc_layers[i-1], fc_layers[i]))
        self.final_fc = nn.Linear(fc_layers[-1], output_size)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.gru(x, h0)
        out = out[:, -1, :]
        
        for fc_layer in self.fc_layers:
            out = F.relu(fc_layer(out))
        
        out = self.final_fc(out)
        return out

# Experiment with different hyperparameters for GRU
for hidden_size in hidden_sizes:
    for num_layer in num_layers:
        for fc_structure in fc_structures:
            print(f"Training GRU with hidden_size={hidden_size}, num_layers={num_layer}, fc_structure={fc_structure}...")
            model = CustomGRUModel(input_size, hidden_size, num_layer, output_size, fc_structure)
            train_loss, val_acc, train_time, _ = train_model(model, train_loader, test_loader, device, num_epochs, lr)
            print(f"Loss: {train_loss:.4f}, Accuracy: {val_acc:.2f}%, Time: {train_time:.2f} seconds\n")

Training GRU with hidden_size=128, num_layers=1, fc_structure=[256]...
Epoch [1/10], Loss: 1.9982
Epoch [2/10], Loss: 1.9370
Epoch [3/10], Loss: 1.5548
Epoch [4/10], Loss: 1.3857
Epoch [5/10], Loss: 1.0671
Epoch [6/10], Loss: 1.9775
Epoch [7/10], Loss: 1.6671
Epoch [8/10], Loss: 0.5999
Epoch [9/10], Loss: 0.5057
Epoch [10/10], Loss: 1.0725
Loss: 1.0725, Accuracy: 57.49%, Time: 146.27 seconds

Training GRU with hidden_size=128, num_layers=1, fc_structure=[128, 64]...
Epoch [1/10], Loss: 1.3715
Epoch [2/10], Loss: 1.9282
Epoch [3/10], Loss: 1.0398
Epoch [4/10], Loss: 0.9333
Epoch [5/10], Loss: 1.2576
Epoch [6/10], Loss: 1.1297
Epoch [7/10], Loss: 1.7954
Epoch [8/10], Loss: 1.5984
Epoch [9/10], Loss: 1.3637
Epoch [10/10], Loss: 1.4769
Loss: 1.4769, Accuracy: 56.73%, Time: 160.99 seconds

Training GRU with hidden_size=128, num_layers=1, fc_structure=[256, 128, 64]...
Epoch [1/10], Loss: 2.1779
Epoch [2/10], Loss: 2.0746
Epoch [3/10], Loss: 1.5493
Epoch [4/10], Loss: 1.7353
Epoch [5/10], Lo

In [8]:
#c
sequence_length_50 = 50

sequences_50 = []
targets_50 = []
for i in range(0, len(encoded_text) - sequence_length_50):
    seq = encoded_text[i:i+sequence_length_50]
    target = encoded_text[i+sequence_length_50]
    sequences_50.append(seq)
    targets_50.append(target)

sequences_50 = torch.tensor(sequences_50, dtype=torch.long)
targets_50 = torch.tensor(targets_50, dtype=torch.long)

dataset_50 = CharDataset(sequences_50, targets_50)

train_size_50 = int(len(dataset_50) * 0.8)
test_size_50 = len(dataset_50) - train_size_50
train_dataset_50, test_dataset_50 = torch.utils.data.random_split(dataset_50, [train_size_50, test_size_50])

train_loader_50 = DataLoader(train_dataset_50, shuffle=True, batch_size=batch_size)
test_loader_50 = DataLoader(test_dataset_50, shuffle=False, batch_size=batch_size)

In [9]:
hidden_size = 256
num_layer = 2
fc_structure = [256, 128]

print(f"Training LSTM with sequence length 50, hidden_size={hidden_size}, num_layers={num_layer}, fc_structure={fc_structure}...")
model_50 = CustomLSTMModel(input_size, hidden_size, num_layer, output_size, fc_structure)
train_loss_50, val_acc_50, train_time_50, _ = train_model(model_50, train_loader_50, test_loader_50, device, num_epochs, lr)
print(f"Loss: {train_loss_50:.4f}, Accuracy: {val_acc_50:.2f}%, Time: {train_time_50:.2f} seconds")

# Calculate model complexity
num_params = sum(p.numel() for p in model_50.parameters() if p.requires_grad)
print(f"Model Complexity (Number of Parameters): {num_params}")

Training LSTM with sequence length 50, hidden_size=256, num_layers=2, fc_structure=[256, 128]...
Epoch [1/10], Loss: 1.7243
Epoch [2/10], Loss: 1.4626
Epoch [3/10], Loss: 1.7132
Epoch [4/10], Loss: 1.4565
Epoch [5/10], Loss: 1.2884
Epoch [6/10], Loss: 1.1689
Epoch [7/10], Loss: 1.1518
Epoch [8/10], Loss: 1.3237
Epoch [9/10], Loss: 1.0500
Epoch [10/10], Loss: 0.9918
Loss: 0.9918, Accuracy: 59.84%, Time: 439.16 seconds
Model Complexity (Number of Parameters): 964161


In [10]:
class CustomGRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, fc_layers):
        super(CustomGRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        
        # Define the fully connected layers
        self.fc_layers = nn.ModuleList()
        for i in range(len(fc_layers)):
            if i == 0:
                self.fc_layers.append(nn.Linear(hidden_size, fc_layers[i]))
            else:
                self.fc_layers.append(nn.Linear(fc_layers[i-1], fc_layers[i]))
        self.final_fc = nn.Linear(fc_layers[-1], output_size)
    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.gru(x, h0)
        out = out[:, -1, :]
        
        for fc_layer in self.fc_layers:
            out = F.relu(fc_layer(out))
        
        out = self.final_fc(out)
        return out

hidden_size = 256
num_layer = 2
fc_structure = [256, 128]

print(f"Training GRU with sequence length 50, hidden_size={hidden_size}, num_layers={num_layer}, fc_structure={fc_structure}...")
model_gru_50 = CustomGRUModel(input_size, hidden_size, num_layer, output_size, fc_structure)
train_loss_gru_50, val_acc_gru_50, train_time_gru_50, _ = train_model(model_gru_50, train_loader_50, test_loader_50, device, num_epochs, lr)
print(f"Loss: {train_loss_gru_50:.4f}, Accuracy: {val_acc_gru_50:.2f}%, Time: {train_time_gru_50:.2f} seconds")

# Calculate model complexity
num_params_gru = sum(p.numel() for p in model_gru_50.parameters() if p.requires_grad)
print(f"Model Complexity (Number of Parameters): {num_params_gru}")

Training GRU with sequence length 50, hidden_size=256, num_layers=2, fc_structure=[256, 128]...
Epoch [1/10], Loss: 1.4768
Epoch [2/10], Loss: 1.2972
Epoch [3/10], Loss: 1.2812
Epoch [4/10], Loss: 1.4473
Epoch [5/10], Loss: 1.3514
Epoch [6/10], Loss: 1.4289
Epoch [7/10], Loss: 1.2534
Epoch [8/10], Loss: 1.1131
Epoch [9/10], Loss: 1.4048
Epoch [10/10], Loss: 1.3327
Loss: 1.3327, Accuracy: 58.79%, Time: 426.85 seconds
Model Complexity (Number of Parameters): 749889
