<a href="https://colab.research.google.com/github/YousefAbua/Intro-To-DL/blob/main/HW5/HW5_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import requests
from sklearn.model_selection import train_test_split

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [11]:
# Setup dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text

chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

def Define_Dataset(max_length):
  x = []
  y = []
  for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    x.append([char_to_int[char] for char in sequence])
    y.append(char_to_int[label])

  x = np.array(x)
  y = np.array(y)
  return x, y

class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

batch_size = 128
#sequence_length = 20
#sequence_length = 30
sequence_length = 50

x, y = Define_Dataset(sequence_length)
x = torch.tensor(x, dtype=torch.long)
y = torch.tensor(y, dtype=torch.long)

dataset = CharDataset(x, y)
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [12]:
def training_loop(train, test, model, loss_fn, optimizer, epochs):
  model.to(device)  # Move model to GPU
  # Train/Validation Loop
  train_loss_list = []
  val_loss_list = []
  val_accuracy_list = []

  for epoch in range(epochs):
      train_loss = 0.0
      val_loss = 0.0
      correct = 0
      total = 0

      # Training
      model.train()
      for sequences, targets in train:
          sequences, targets = sequences.to(device), targets.to(device)  # Move data to GPU
          optimizer.zero_grad()
          outputs = model(sequences)
          loss = loss_fn(outputs, targets)
          loss.backward()
          optimizer.step()
          train_loss += loss.item() * sequences.size(0)

      # Validation
      model.eval()
      with torch.no_grad():
          for sequences, targets in test:
              sequences, targets = sequences.to(device), targets.to(device)  # Move data to GPU
              outputs = model(sequences)
              loss = loss_fn(outputs, targets)
              val_loss += loss.item() * sequences.size(0)
              _, predicted = torch.max(outputs.data, 1)
              total += targets.size(0)
              correct += (predicted == targets).sum().item()

      train_loss = train_loss / len(train.dataset)
      val_loss = val_loss / len(test.dataset)
      accuracy = correct / total * 100

      train_loss_list.append(train_loss)
      val_loss_list.append(val_loss)
      val_accuracy_list.append(accuracy)

      print(f'Epoch [{epoch + 1}/{epochs}], '
            f'Training Loss: {train_loss:.4f}, '
            f'Validation Loss: {val_loss:.4f}, '
            f'Validation Accuracy: {accuracy:.2f}%')

# Define the Transformer model with dropout
class CharTransformer(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, nhead, dropout=0.1):
        super(CharTransformer, self).__init__()
        self.embedding = nn.Embedding(input_size, hidden_size)
        encoder_layers = nn.TransformerEncoderLayer(hidden_size, nhead, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        transformer_output = self.transformer_encoder(embedded)
        output = self.fc(transformer_output[:, -1, :])  # Get the output of the last Transformer block
        return output

# Prediction function
def predict_next_char(model, sequence_length, char_to_int, int_to_char, test_str):
    model.eval()
    with torch.no_grad():
        # Convert the test string to integers
        test_sequence = [char_to_int[char] for char in test_str]
        test_sequence = torch.tensor(test_sequence, dtype=torch.long).unsqueeze(0).to(device)

        # Predict the next character
        output = model(test_sequence)
        _, predicted_index = torch.max(output, 1)
        predicted_char = int_to_char[predicted_index.item()]

    return predicted_char

# Train and validate for sequence lengths of 10, 20, and 30 for both Transformer and RNN models
results = {}
hidden_size = 128
num_layers = 3
nhead = 2
learning_rate = 0.001
epochs = 10

model = CharTransformer(len(chars), hidden_size, len(chars), num_layers, nhead, dropout=0.1)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train and validate Transformer model with dropout
training_loop(
    train = train_loader,
    test = test_loader,
    model = model,
    loss_fn = criterion,
    optimizer = optimizer,
    epochs = epochs
)

# Predicting the next character
test_str = "This is a simple example to demonstrate how to predict the next char"
predicted_char = predict_next_char(model, 20, char_to_int, int_to_char, test_str)
print(f"LSTM: Predicted next character: '{predicted_char}'")

Epoch [1/10], Training Loss: 2.5165, Validation Loss: 2.4828, Validation Accuracy: 26.96%
Epoch [2/10], Training Loss: 2.4877, Validation Loss: 2.4759, Validation Accuracy: 26.75%
Epoch [3/10], Training Loss: 2.4793, Validation Loss: 2.4692, Validation Accuracy: 26.68%
Epoch [4/10], Training Loss: 2.4766, Validation Loss: 2.4688, Validation Accuracy: 26.93%
Epoch [5/10], Training Loss: 2.4755, Validation Loss: 2.4677, Validation Accuracy: 26.62%
Epoch [6/10], Training Loss: 2.4736, Validation Loss: 2.4659, Validation Accuracy: 26.96%
Epoch [7/10], Training Loss: 2.4711, Validation Loss: 2.4665, Validation Accuracy: 26.94%
Epoch [8/10], Training Loss: 2.4706, Validation Loss: 2.4639, Validation Accuracy: 27.01%
Epoch [9/10], Training Loss: 2.4737, Validation Loss: 2.4635, Validation Accuracy: 26.99%
Epoch [10/10], Training Loss: 2.4687, Validation Loss: 2.4623, Validation Accuracy: 26.90%
LSTM: Predicted next character: 'e'
