In [9]:
import requests
import torch
from torch.utils.data import Dataset, random_split, DataLoader

# Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

# Prepare the dataset
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
encoded_text = [char_to_int[ch] for ch in text]

    # Crea
    # te sequences and targets
sequence_length = 20
sequences = []
targets = []
for i in range(0, len(encoded_text) - sequence_length):
    seq = encoded_text[i:i+sequence_length]
    target = encoded_text[i+sequence_length]
    sequences.append(seq)
    targets.append(target)

# Convert to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

    # Define the dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Split the dataset into training and testing sets
dataset = CharDataset(sequences, targets)
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Create data loaders
batch_size = 128
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)
    
import torch.nn as nn

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.lstm(embedded)
        output = self.fc(output[:, -1, :])  # Take the last time step's output
        return output

# Define the GRU model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.gru(embedded)
        output = self.fc(output[:, -1, :])  # Take the last time step's output
        return output


In [6]:
import torch.optim as optim
import time

def train_model(model, train_loader, test_loader, device, num_epochs=10):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    model.to(device)
    model.train()

    for epoch in range(num_epochs):
        start_time = time.time()
        total_loss = 0
        for sequences, targets in train_loader:
            sequences, targets = sequences.to(device), targets.to(device)
            optimizer.zero_grad()
            output = model(sequences)
            loss = criterion(output, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        
        # Evaluate the model on the test set
        model.eval()
        total, correct = 0, 0
        with torch.no_grad():
            for sequences, targets in test_loader:
                sequences, targets = sequences.to(device), targets.to(device)
                output = model(sequences)
                _, predicted = torch.max(output.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()
        accuracy = 100 * correct / total

        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%, Time: {time.time() - start_time:.2f}s')
        model.train()


In [11]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize and train the LSTM model
input_size = len(chars)
hidden_size = 250
output_size = len(chars)
num_epochs = 10
lr = 0.005

print("Training LSTM model...")
lstm_model = LSTMModel(input_size, hidden_size, output_size)
train_model(lstm_model, train_loader, test_loader, device, num_epochs)

print("\nTraining GRU model...")
gru_model = GRUModel(input_size, hidden_size, output_size)
train_model(gru_model, train_loader, test_loader, device, num_epochs)


Training LSTM model...
Epoch [1/10], Loss: 1.7088, Accuracy: 53.42%, Time: 66.06s
Epoch [2/10], Loss: 1.4859, Accuracy: 55.24%, Time: 66.98s
Epoch [3/10], Loss: 1.4241, Accuracy: 56.11%, Time: 70.28s
Epoch [4/10], Loss: 1.3891, Accuracy: 56.82%, Time: 72.78s
Epoch [5/10], Loss: 1.3635, Accuracy: 57.29%, Time: 71.48s
Epoch [6/10], Loss: 1.3449, Accuracy: 57.44%, Time: 68.48s
Epoch [7/10], Loss: 1.3287, Accuracy: 57.60%, Time: 68.99s
Epoch [8/10], Loss: 1.3167, Accuracy: 57.71%, Time: 69.38s
Epoch [9/10], Loss: 1.3059, Accuracy: 57.99%, Time: 69.57s
Epoch [10/10], Loss: 1.2965, Accuracy: 58.10%, Time: 69.92s

Training GRU model...
Epoch [1/10], Loss: 1.6985, Accuracy: 53.37%, Time: 59.73s
Epoch [2/10], Loss: 1.5008, Accuracy: 54.72%, Time: 62.53s
Epoch [3/10], Loss: 1.4539, Accuracy: 55.41%, Time: 62.22s
Epoch [4/10], Loss: 1.4274, Accuracy: 56.03%, Time: 63.01s
Epoch [5/10], Loss: 1.4116, Accuracy: 56.07%, Time: 63.24s
Epoch [6/10], Loss: 1.3991, Accuracy: 56.61%, Time: 65.30s
Epoch [7/

In [19]:
total_params = sum(p.numel() for p in lstm_model.parameters())
print(f'Total number of parameters in the model: {total_params}')

total_params = sum(p.numel() for p in gru_model.parameters())
print(f'Total number of parameters in the model: {total_params}')


Total number of parameters in the model: 534565
Total number of parameters in the model: 409065


In [15]:
input_size = len(chars)
hidden_size = 250
output_size = len(chars)
num_epochs = 10
lr = 0.005

print("Training LSTM model...")
lstm_model = LSTMModel(input_size, hidden_size, output_size)
train_model(lstm_model, train_loader, test_loader, device, num_epochs,)

print("\nTraining GRU model...")
gru_model = GRUModel(input_size, hidden_size, output_size)
train_model(gru_model, train_loader, test_loader, device, num_epochs,)

Training LSTM model...
Epoch [1/10], Loss: 1.7107, Accuracy: 53.30%, Time: 65.11s
Epoch [2/10], Loss: 1.4862, Accuracy: 55.37%, Time: 65.68s
Epoch [3/10], Loss: 1.4252, Accuracy: 56.16%, Time: 65.66s
Epoch [4/10], Loss: 1.3901, Accuracy: 56.87%, Time: 66.38s
Epoch [5/10], Loss: 1.3654, Accuracy: 57.25%, Time: 65.98s
Epoch [6/10], Loss: 1.3467, Accuracy: 57.34%, Time: 66.45s
Epoch [7/10], Loss: 1.3315, Accuracy: 57.62%, Time: 68.17s
Epoch [8/10], Loss: 1.3188, Accuracy: 57.70%, Time: 68.43s
Epoch [9/10], Loss: 1.3078, Accuracy: 57.78%, Time: 69.26s
Epoch [10/10], Loss: 1.2987, Accuracy: 57.92%, Time: 65.63s

Training GRU model...
Epoch [1/10], Loss: 1.6965, Accuracy: 53.36%, Time: 55.77s
Epoch [2/10], Loss: 1.5000, Accuracy: 54.89%, Time: 56.25s
Epoch [3/10], Loss: 1.4538, Accuracy: 55.64%, Time: 58.93s
Epoch [4/10], Loss: 1.4276, Accuracy: 55.95%, Time: 61.82s
Epoch [5/10], Loss: 1.4095, Accuracy: 56.35%, Time: 62.68s
Epoch [6/10], Loss: 1.3992, Accuracy: 56.39%, Time: 62.74s
Epoch [7/

In [20]:
total_params = sum(p.numel() for p in lstm_model.parameters())
print(f'Total number of parameters in the model: {total_params}')

total_params = sum(p.numel() for p in gru_model.parameters())
print(f'Total number of parameters in the model: {total_params}')


Total number of parameters in the model: 534565
Total number of parameters in the model: 409065


In [21]:
sequence_length = 30  
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
encoded_text = [char_to_int[ch] for ch in text]

sequences, targets = [], []
for i in range(len(encoded_text) - sequence_length):
    sequences.append(encoded_text[i:i+sequence_length])
    targets.append(encoded_text[i+sequence_length])

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)


class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx], self.targets[idx]

total_size = len(dataset)
train_size = int(total_size * 0.8)
test_size = total_size - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

batch_size = 128  
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [22]:
def train_model(model, train_loader, test_loader, device, num_epochs=10, lr=0.001):
  
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.to(device)  

   
    train_losses, val_losses, val_accuracies = [], [], []

    
    start_time = time.time()

    for epoch in range(num_epochs):
        
        model.train()  
        total_train_loss = 0

        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)  
            optimizer.zero_grad()  
            outputs = model(inputs) 
            loss = criterion(outputs, targets) 
            loss.backward() 
            optimizer.step() 
            total_train_loss += loss.item()  

        average_train_loss = total_train_loss / len(train_loader)
        train_losses.append(average_train_loss)

        
        model.eval() 
        total_val_loss = 0
        total_correct = 0

        with torch.no_grad(): 
            for inputs, targets in test_loader:
                inputs, targets = inputs.to(device), targets.to(device) 
                outputs = model(inputs) 
                loss = criterion(outputs, targets) 
                total_val_loss += loss.item() 
                _, predicted = torch.max(outputs.data, 1) 
                total_correct += (predicted == targets).sum().item()  

        average_val_loss = total_val_loss / len(test_loader)
        val_accuracy = (total_correct / len(test_loader.dataset)) * 100
        val_losses.append(average_val_loss)
        val_accuracies.append(val_accuracy)

        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {average_train_loss:.4f}, Val Loss: {average_val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%')

    training_time = time.time() - start_time
    print(f'Total training time: {training_time:.2f} seconds')

    return train_losses, val_losses, val_accuracies, training_time


In [27]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

input_size, hidden_size, output_size = len(chars), 256, len(chars)
num_epochs, lr = 10, 0.001

print("Training LSTM model...")
lstm_model = LSTMModel(input_size, hidden_size, output_size).to(device) 

lstm_metrics = train_model(lstm_model, train_loader, test_loader, device, num_epochs, lr)


lstm_train_losses, lstm_val_losses, lstm_val_acc, lstm_training_time = lstm_metrics
lstm_model_size = sum(p.numel() for p in lstm_model.parameters() if p.requires_grad) 


print("\nTraining GRU model...")
gru_model = GRUModel(input_size, hidden_size, output_size).to(device)

gru_metrics = train_model(gru_model, train_loader, test_loader, device, num_epochs, lr)

gru_train_losses, gru_val_losses, gru_val_acc, gru_training_time = gru_metrics
gru_model_size = sum(p.numel() for p in gru_model.parameters() if p.requires_grad)  


print("\nComparison of LSTM and GRU models:")
print(f"LSTM - Train Loss: {lstm_train_losses[-1]:.4f}, Val Loss: {lstm_val_losses[-1]:.4f}, Val Accuracy: {lstm_val_acc[-1]:.2f}%, Training Time: {lstm_training_time:.2f} seconds, Model Size: {lstm_model_size} parameters")
print(f"GRU - Train Loss: {gru_train_losses[-1]:.4f}, Val Loss: {gru_val_losses[-1]:.4f}, Val Accuracy: {gru_val_acc[-1]:.2f}%, Training Time: {gru_training_time:.2f} seconds, Model Size: {gru_model_size} parameters")


Training LSTM model...
Epoch 1/10, Train Loss: 1.7067, Val Loss: 1.5456, Val Accuracy: 53.28%
Epoch 2/10, Train Loss: 1.4856, Val Loss: 1.4723, Val Accuracy: 55.19%
Epoch 3/10, Train Loss: 1.4255, Val Loss: 1.4353, Val Accuracy: 56.09%
Epoch 4/10, Train Loss: 1.3897, Val Loss: 1.4109, Val Accuracy: 56.52%
Epoch 5/10, Train Loss: 1.3655, Val Loss: 1.3987, Val Accuracy: 57.02%
Epoch 6/10, Train Loss: 1.3450, Val Loss: 1.3904, Val Accuracy: 57.16%
Epoch 7/10, Train Loss: 1.3297, Val Loss: 1.3840, Val Accuracy: 57.36%
Epoch 8/10, Train Loss: 1.3169, Val Loss: 1.3765, Val Accuracy: 57.51%
Epoch 9/10, Train Loss: 1.3058, Val Loss: 1.3745, Val Accuracy: 57.52%
Epoch 10/10, Train Loss: 1.2957, Val Loss: 1.3672, Val Accuracy: 57.99%
Total training time: 1344.54 seconds

Training GRU model...
Epoch 1/10, Train Loss: 1.6957, Val Loss: 1.5506, Val Accuracy: 52.83%
Epoch 2/10, Train Loss: 1.5028, Val Loss: 1.4971, Val Accuracy: 54.52%
Epoch 3/10, Train Loss: 1.4560, Val Loss: 1.4672, Val Accuracy: 

NameError: name 'gru_train_losses' is not defined

In [24]:
lstm_train_losses, lstm_val_losses, lstm_val_acc, lstm_training_time = lstm_metrics


In [29]:
gru_train_losses, gru_val_losses, gru_val_acc, gru_training_time = gru_metrics
print(f"GRU - Train Loss: {gru_train_losses[-1]:.4f}, Val Loss: {gru_val_losses[-1]:.4f}, Val Accuracy: {gru_val_acc[-1]:.2f}%, Training Time: {gru_training_time:.2f} seconds, Model Size: {gru_model_size} parameters")


GRU - Train Loss: 1.3784, Val Loss: 1.4292, Val Accuracy: 56.06%, Training Time: 913.85 seconds, Model Size: 428097 parameters


In [30]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

input_size, hidden_size, output_size = len(chars), 128, len(chars)
num_epochs, lr = 10, 0.001

print("Training LSTM model...")
lstm_model = LSTMModel(input_size, hidden_size, output_size).to(device) 

lstm_metrics = train_model(lstm_model, train_loader, test_loader, device, num_epochs, lr)


lstm_train_losses, lstm_val_losses, lstm_val_acc, lstm_training_time = lstm_metrics
lstm_model_size = sum(p.numel() for p in lstm_model.parameters() if p.requires_grad) 


print("\nTraining GRU model...")
gru_model = GRUModel(input_size, hidden_size, output_size).to(device)

gru_metrics = train_model(gru_model, train_loader, test_loader, device, num_epochs, lr)

gru_train_losses, gru_val_losses, gru_val_acc, gru_training_time = gru_metrics
gru_model_size = sum(p.numel() for p in gru_model.parameters() if p.requires_grad)  


print("\nComparison of LSTM and GRU models:")
print(f"LSTM - Train Loss: {lstm_train_losses[-1]:.4f}, Val Loss: {lstm_val_losses[-1]:.4f}, Val Accuracy: {lstm_val_acc[-1]:.2f}%, Training Time: {lstm_training_time:.2f} seconds, Model Size: {lstm_model_size} parameters")
print(f"GRU - Train Loss: {gru_train_losses[-1]:.4f}, Val Loss: {gru_val_losses[-1]:.4f}, Val Accuracy: {gru_val_acc[-1]:.2f}%, Training Time: {gru_training_time:.2f} seconds, Model Size: {gru_model_size} parameters")


Training LSTM model...
Epoch 1/10, Train Loss: 1.8172, Val Loss: 1.6266, Val Accuracy: 51.31%
Epoch 2/10, Train Loss: 1.5672, Val Loss: 1.5412, Val Accuracy: 53.40%
Epoch 3/10, Train Loss: 1.5032, Val Loss: 1.5037, Val Accuracy: 54.46%
Epoch 4/10, Train Loss: 1.4679, Val Loss: 1.4781, Val Accuracy: 55.16%
Epoch 5/10, Train Loss: 1.4441, Val Loss: 1.4631, Val Accuracy: 55.45%
Epoch 6/10, Train Loss: 1.4271, Val Loss: 1.4527, Val Accuracy: 55.61%
Epoch 7/10, Train Loss: 1.4127, Val Loss: 1.4455, Val Accuracy: 56.09%
Epoch 8/10, Train Loss: 1.4021, Val Loss: 1.4400, Val Accuracy: 56.09%
Epoch 9/10, Train Loss: 1.3928, Val Loss: 1.4326, Val Accuracy: 56.13%
Epoch 10/10, Train Loss: 1.3848, Val Loss: 1.4272, Val Accuracy: 56.43%
Total training time: 477.24 seconds

Training GRU model...
Epoch 1/10, Train Loss: 1.7944, Val Loss: 1.6224, Val Accuracy: 51.36%
Epoch 2/10, Train Loss: 1.5687, Val Loss: 1.5527, Val Accuracy: 53.24%
Epoch 3/10, Train Loss: 1.5144, Val Loss: 1.5170, Val Accuracy: 5

In [31]:
sequence_length = 50
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
encoded_text = [char_to_int[ch] for ch in text]

sequences, targets = [], []
for i in range(len(encoded_text) - sequence_length):
    sequences.append(encoded_text[i:i+sequence_length])
    targets.append(encoded_text[i+sequence_length])

sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)


class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx], self.targets[idx]

total_size = len(dataset)
train_size = int(total_size * 0.8)
test_size = total_size - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

batch_size = 128  
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [32]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

input_size, hidden_size, output_size = len(chars), 128, len(chars)
num_epochs, lr = 10, 0.001

print("Training LSTM model...")
lstm_model = LSTMModel(input_size, hidden_size, output_size).to(device) 

lstm_metrics = train_model(lstm_model, train_loader, test_loader, device, num_epochs, lr)


lstm_train_losses, lstm_val_losses, lstm_val_acc, lstm_training_time = lstm_metrics
lstm_model_size = sum(p.numel() for p in lstm_model.parameters() if p.requires_grad) 


print("\nTraining GRU model...")
gru_model = GRUModel(input_size, hidden_size, output_size).to(device)

gru_metrics = train_model(gru_model, train_loader, test_loader, device, num_epochs, lr)

gru_train_losses, gru_val_losses, gru_val_acc, gru_training_time = gru_metrics
gru_model_size = sum(p.numel() for p in gru_model.parameters() if p.requires_grad)  


print("\nComparison of LSTM and GRU models:")
print(f"LSTM - Train Loss: {lstm_train_losses[-1]:.4f}, Val Loss: {lstm_val_losses[-1]:.4f}, Val Accuracy: {lstm_val_acc[-1]:.2f}%, Training Time: {lstm_training_time:.2f} seconds, Model Size: {lstm_model_size} parameters")
print(f"GRU - Train Loss: {gru_train_losses[-1]:.4f}, Val Loss: {gru_val_losses[-1]:.4f}, Val Accuracy: {gru_val_acc[-1]:.2f}%, Training Time: {gru_training_time:.2f} seconds, Model Size: {gru_model_size} parameters")


Training LSTM model...
Epoch 1/10, Train Loss: 1.8213, Val Loss: 1.6333, Val Accuracy: 51.42%
Epoch 2/10, Train Loss: 1.5723, Val Loss: 1.5441, Val Accuracy: 53.51%
Epoch 3/10, Train Loss: 1.5064, Val Loss: 1.5040, Val Accuracy: 54.32%
Epoch 4/10, Train Loss: 1.4695, Val Loss: 1.4780, Val Accuracy: 55.14%
Epoch 5/10, Train Loss: 1.4450, Val Loss: 1.4605, Val Accuracy: 55.64%
Epoch 6/10, Train Loss: 1.4270, Val Loss: 1.4492, Val Accuracy: 55.85%
Epoch 7/10, Train Loss: 1.4124, Val Loss: 1.4401, Val Accuracy: 55.97%
Epoch 8/10, Train Loss: 1.4013, Val Loss: 1.4347, Val Accuracy: 56.26%
Epoch 9/10, Train Loss: 1.3915, Val Loss: 1.4258, Val Accuracy: 56.56%
Epoch 10/10, Train Loss: 1.3835, Val Loss: 1.4231, Val Accuracy: 56.67%
Total training time: 499.12 seconds

Training GRU model...
Epoch 1/10, Train Loss: 1.7945, Val Loss: 1.6201, Val Accuracy: 51.64%
Epoch 2/10, Train Loss: 1.5687, Val Loss: 1.5489, Val Accuracy: 53.39%
Epoch 3/10, Train Loss: 1.5148, Val Loss: 1.5163, Val Accuracy: 5