<a href="https://colab.research.google.com/github/YousefAbua/Intro-To-DL/blob/main/HW3/HW3_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import numpy as np
import requests

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Setup dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

Using device: cuda


In [3]:
def Define_Dataset(max_length):
  x = []
  y = []
  for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    x.append([char_to_int[char] for char in sequence])
    y.append(char_to_int[label])

  x = np.array(x)
  y = np.array(y)
  return x, y

x20, y20 = Define_Dataset(20) # Max Length = 20
x30, y30 = Define_Dataset(30) # Max Length = 30
x50, y50 = Define_Dataset(50) # Max Length = 50

x20 = torch.tensor(x20, dtype=torch.long)
y20 = torch.tensor(y20, dtype=torch.long)

x30 = torch.tensor(x30, dtype=torch.long)
y30 = torch.tensor(y30, dtype=torch.long)

x50 = torch.tensor(x50, dtype=torch.long)
y50 = torch.tensor(y50, dtype=torch.long)

In [4]:
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset_20 = CharDataset(x20, y20)
dataset_30 = CharDataset(x30, y30)
dataset_50 = CharDataset(x50, y50)

In [5]:
batch_size = 128
# Sequence 20
train_size20 = int(len(dataset_20) * 0.8)
test_size20 = len(dataset_20) - train_size20
train20_dataset, test20_dataset = torch.utils.data.random_split(dataset_20, [train_size20, test_size20])

train20_loader = DataLoader(train20_dataset, shuffle=True, batch_size=batch_size)
test20_loader = DataLoader(test20_dataset, shuffle=False, batch_size=batch_size)


# Sequence 30
train_size30 = int(len(dataset_30) * 0.8)
test_size30 = len(dataset_30) - train_size30
train30_dataset, test30_dataset = torch.utils.data.random_split(dataset_30, [train_size30, test_size30])

train30_loader = DataLoader(train30_dataset, shuffle=True, batch_size=batch_size)
test30_loader = DataLoader(test30_dataset, shuffle=False, batch_size=batch_size)


# Sequence 50
train_size50 = int(len(dataset_50) * 0.8)
test_size50 = len(dataset_50) - train_size50
train50_dataset, test50_dataset = torch.utils.data.random_split(dataset_50, [train_size50, test_size50])

train50_loader = DataLoader(train50_dataset, shuffle=True, batch_size=batch_size)
test50_loader = DataLoader(test50_dataset, shuffle=False, batch_size=batch_size)

In [6]:
# Define LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.lstm(embedded)
        output = self.fc(output[:, -1, :])  # Taking the last time step output
        return output

# Define GRU model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.gru(embedded)
        output = self.fc(output[:, -1, :])  # Taking the last time step output
        return output

In [7]:
def training_loop(train, test, model, loss_fn, optimizer, epochs):
  model.to(device)  # Move model to GPU
  # Train/Validation Loop
  train_loss_list = []
  val_loss_list = []
  val_accuracy_list = []

  for epoch in range(epochs):
      train_loss = 0.0
      val_loss = 0.0
      correct = 0
      total = 0

      # Training
      model.train()
      for sequences, targets in train:
          sequences, targets = sequences.to(device), targets.to(device)  # Move data to GPU
          optimizer.zero_grad()
          outputs = model(sequences)
          loss = loss_fn(outputs, targets)
          loss.backward()
          optimizer.step()
          train_loss += loss.item() * sequences.size(0)

      # Validation
      model.eval()
      with torch.no_grad():
          for sequences, targets in test:
              sequences, targets = sequences.to(device), targets.to(device)  # Move data to GPU
              outputs = model(sequences)
              loss = loss_fn(outputs, targets)
              val_loss += loss.item() * sequences.size(0)
              _, predicted = torch.max(outputs.data, 1)
              total += targets.size(0)
              correct += (predicted == targets).sum().item()

      train_loss = train_loss / len(train.dataset)
      val_loss = val_loss / len(test.dataset)
      accuracy = correct / total * 100

      train_loss_list.append(train_loss)
      val_loss_list.append(val_loss)
      val_accuracy_list.append(accuracy)

      print(f'Epoch [{epoch + 1}/{epochs}], '
            f'Training Loss: {train_loss:.4f}, '
            f'Validation Loss: {val_loss:.4f}, '
            f'Validation Accuracy: {accuracy:.2f}%')

# Prediction function
def predict_next_char(model, sequence_length, char_to_int, int_to_char, test_str):
    model.eval()
    with torch.no_grad():
        # Convert the test string to integers
        test_sequence = [char_to_int[char] for char in test_str]
        test_sequence = torch.tensor(test_sequence, dtype=torch.long).unsqueeze(0).to(device)

        # Predict the next character
        output = model(test_sequence)
        _, predicted_index = torch.max(output, 1)
        predicted_char = int_to_char[predicted_index.item()]

    return predicted_char

In [8]:
hidden_size = 128
learning_rate = 0.001
epochs = 10

LSTM20_Model = LSTMModel(len(chars),hidden_size, len(chars))
GRU20_Model = GRUModel(len(chars), hidden_size, len(chars))

criterion = nn.CrossEntropyLoss()

LSTM20_optimizer = optim.Adam(LSTM20_Model.parameters(), lr=learning_rate)
GRU20_optimizer = optim.Adam(GRU20_Model.parameters(), lr=learning_rate)

print("Start training for sequence size: 20....\n")
print("LSTM Model")
training_loop(
    train = train20_loader,
    test = test20_loader,
    model = LSTM20_Model,
    loss_fn = criterion,
    optimizer = LSTM20_optimizer,
    epochs = epochs
)
print("\nGRU Model")
training_loop(
    train = train20_loader,
    test = test20_loader,
    model = GRU20_Model,
    loss_fn = criterion,
    optimizer = GRU20_optimizer,
    epochs = epochs
)

Start training for sequence size: 20....

LSTM Model
Epoch [1/10], Training Loss: 1.8371, Validation Loss: 1.6440, Validation Accuracy: 51.00%
Epoch [2/10], Training Loss: 1.5813, Validation Loss: 1.5524, Validation Accuracy: 53.11%
Epoch [3/10], Training Loss: 1.5142, Validation Loss: 1.5177, Validation Accuracy: 54.15%
Epoch [4/10], Training Loss: 1.4763, Validation Loss: 1.4875, Validation Accuracy: 54.86%
Epoch [5/10], Training Loss: 1.4510, Validation Loss: 1.4700, Validation Accuracy: 55.19%
Epoch [6/10], Training Loss: 1.4325, Validation Loss: 1.4599, Validation Accuracy: 55.64%
Epoch [7/10], Training Loss: 1.4183, Validation Loss: 1.4483, Validation Accuracy: 55.85%
Epoch [8/10], Training Loss: 1.4065, Validation Loss: 1.4415, Validation Accuracy: 56.21%
Epoch [9/10], Training Loss: 1.3966, Validation Loss: 1.4385, Validation Accuracy: 56.24%
Epoch [10/10], Training Loss: 1.3889, Validation Loss: 1.4309, Validation Accuracy: 56.43%

GRU Model
Epoch [1/10], Training Loss: 1.7936

In [9]:
# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(LSTM20_Model, 20, char_to_int, int_to_char, test_str)
print(f"LSTM: Predicted next character: '{predicted_char}'")

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(GRU20_Model, 20, char_to_int, int_to_char, test_str)
print(f"GRU: Predicted next character: '{predicted_char}'")

LSTM: Predicted next character: 't'
GRU: Predicted next character: 't'


In [10]:
LSTM30_Model = LSTMModel(len(chars),hidden_size, len(chars))
GRU30_Model = GRUModel(len(chars), hidden_size, len(chars))

criterion = nn.CrossEntropyLoss()

LSTM30_optimizer = optim.Adam(LSTM30_Model.parameters(), lr=learning_rate)
GRU30_optimizer = optim.Adam(GRU30_Model.parameters(), lr=learning_rate)

print("Start training for sequence size: 30....\n")
print("LSTM Model")
training_loop(
    train = train30_loader,
    test = test30_loader,
    model = LSTM30_Model,
    loss_fn = criterion,
    optimizer = LSTM30_optimizer,
    epochs = epochs
)
print("\nGRU Model")
training_loop(
    train = train30_loader,
    test = test30_loader,
    model = GRU30_Model,
    loss_fn = criterion,
    optimizer = GRU30_optimizer,
    epochs = epochs
)

Start training for sequence size: 30....

LSTM Model
Epoch [1/10], Training Loss: 1.8265, Validation Loss: 1.6284, Validation Accuracy: 51.48%
Epoch [2/10], Training Loss: 1.5677, Validation Loss: 1.5417, Validation Accuracy: 53.63%
Epoch [3/10], Training Loss: 1.5014, Validation Loss: 1.5001, Validation Accuracy: 54.54%
Epoch [4/10], Training Loss: 1.4636, Validation Loss: 1.4758, Validation Accuracy: 55.06%
Epoch [5/10], Training Loss: 1.4390, Validation Loss: 1.4609, Validation Accuracy: 55.50%
Epoch [6/10], Training Loss: 1.4210, Validation Loss: 1.4458, Validation Accuracy: 56.00%
Epoch [7/10], Training Loss: 1.4065, Validation Loss: 1.4366, Validation Accuracy: 56.30%
Epoch [8/10], Training Loss: 1.3949, Validation Loss: 1.4305, Validation Accuracy: 56.45%
Epoch [9/10], Training Loss: 1.3853, Validation Loss: 1.4224, Validation Accuracy: 56.70%
Epoch [10/10], Training Loss: 1.3770, Validation Loss: 1.4173, Validation Accuracy: 56.78%

GRU Model
Epoch [1/10], Training Loss: 1.7903

In [11]:
# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(LSTM30_Model, 30, char_to_int, int_to_char, test_str)
print(f"LSTM: Predicted next character: '{predicted_char}'")

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(GRU30_Model, 30, char_to_int, int_to_char, test_str)
print(f"GRU: Predicted next character: '{predicted_char}'")

LSTM: Predicted next character: 't'
GRU: Predicted next character: 't'


In [12]:
LSTM50_Model = LSTMModel(len(chars),hidden_size, len(chars))
GRU50_Model = GRUModel(len(chars), hidden_size, len(chars))

criterion = nn.CrossEntropyLoss()

LSTM50_optimizer = optim.Adam(LSTM50_Model.parameters(), lr=learning_rate)
GRU50_optimizer = optim.Adam(GRU50_Model.parameters(), lr=learning_rate)

print("Start training for sequence size: 50....\n")
print("LSTM Model")
training_loop(
    train = train50_loader,
    test = test50_loader,
    model = LSTM50_Model,
    loss_fn = criterion,
    optimizer = LSTM50_optimizer,
    epochs = epochs
)
print("\nGRU Model")
training_loop(
    train = train50_loader,
    test = test50_loader,
    model = GRU50_Model,
    loss_fn = criterion,
    optimizer = GRU50_optimizer,
    epochs = epochs
)

Start training for sequence size: 50....

LSTM Model
Epoch [1/10], Training Loss: 1.8215, Validation Loss: 1.6215, Validation Accuracy: 51.80%
Epoch [2/10], Training Loss: 1.5637, Validation Loss: 1.5355, Validation Accuracy: 53.78%
Epoch [3/10], Training Loss: 1.4963, Validation Loss: 1.4896, Validation Accuracy: 55.18%
Epoch [4/10], Training Loss: 1.4588, Validation Loss: 1.4663, Validation Accuracy: 55.73%
Epoch [5/10], Training Loss: 1.4332, Validation Loss: 1.4462, Validation Accuracy: 56.15%
Epoch [6/10], Training Loss: 1.4139, Validation Loss: 1.4328, Validation Accuracy: 56.49%
Epoch [7/10], Training Loss: 1.3995, Validation Loss: 1.4233, Validation Accuracy: 56.81%
Epoch [8/10], Training Loss: 1.3875, Validation Loss: 1.4181, Validation Accuracy: 56.86%
Epoch [9/10], Training Loss: 1.3778, Validation Loss: 1.4103, Validation Accuracy: 57.02%
Epoch [10/10], Training Loss: 1.3690, Validation Loss: 1.4042, Validation Accuracy: 57.33%

GRU Model
Epoch [1/10], Training Loss: 1.7825

In [14]:
# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(LSTM50_Model, 50, char_to_int, int_to_char, test_str)
print(f"LSTM: Predicted next character: '{predicted_char}'")

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next charac"
predicted_char = predict_next_char(GRU50_Model, 50, char_to_int, int_to_char, test_str)
print(f"GRU: Predicted next character: '{predicted_char}'")

LSTM: Predicted next character: 'h'
GRU: Predicted next character: 't'
