<a href="https://colab.research.google.com/github/Vishy-A/DeepLearning/blob/main/HW5/4106HW5P2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import requests
from torch.utils.data import Dataset, DataLoader

In [2]:
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
seq = response.text

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
chars = sorted(list(set(seq)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}
encoded_text = [char_to_int[ch] for ch in seq]

In [4]:
def definition(maxlen):
    x = []
    y = []
    for i in range(len(seq) - maxlen):
        sequence = seq[i:i + maxlen]
        label = seq[i + maxlen]
        x.append([char_to_int[char] for char in sequence])
        y.append(char_to_int[label])

    x = np.array(x)
    y = np.array(y)

    return x, y


In [5]:
class CharDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [6]:
batch_size = 128
x, y = definition(20)
x = torch.tensor(x, dtype=torch.long)
y = torch.tensor(y, dtype=torch.long)
dataset20 = CharDataset(x, y)
train_size = int(len(dataset20) * .8)
val_size = len(dataset20) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset20, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [7]:
class Transformer(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, num_heads, dropout):
        super(Transformer, self).__init__()
        self.embedding = nn.Embedding(input_size, hidden_size)
        encoderlayers = nn.TransformerEncoderLayer(hidden_size, num_heads, dropout=dropout)
        self.transformer = nn.TransformerEncoder(encoderlayers, num_layers)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output = self.transformer(embedded)
        output = self.fc(output[:, -1, :])
        return output


In [8]:
def train_loop(train_loader, val_loader, model, lossfunc, optimizer, epochs):
    model.to(device)
    val_accs = []
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        train_loss = 0
        val_loss = 0
        correct = 0
        total = 0
        model.train()

        for x, y in train_loader:
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            output = model(x)
            loss = lossfunc(output, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * x.size(0)

        model.eval()
        with torch.no_grad():
            for x, y in val_loader:
                x = x.to(device)
                y = y.to(device)
                output = model(x)
                loss = lossfunc(output, y)
                val_loss += loss.item() * x.size(0)
                _, predicted = torch.max(output.data, 1)
                total += y.size(0)
                correct += (predicted == y).sum().item()

        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        acc = correct / total

        train_losses.append(train_loss)
        val_losses.append(val_loss)
        val_accs.append(acc)
        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Validation Accuracy: {acc:.2f}%')

    return train_losses, val_losses

In [9]:
hidden_size = 128
num_layers = 3
num_heads = 2
dropout = 0.1
lr = 0.001
epochs = 10

criterion = nn.CrossEntropyLoss()
model = Transformer(len(chars), hidden_size, len(chars), num_layers, num_heads, dropout)
optimizer = optim.Adam(model.parameters(), lr=lr)




In [10]:
def predict_next_char(model, seq_len, char_to_int, int_to_char, initialstr):
  model.eval()
  with torch.no_grad():
    initial_input = torch.tensor([char_to_int[char] for char in initialstr], dtype=torch.long).unsqueeze(0).to(device)
    output = model(initial_input)
    _, predicted = torch.argmax(output, dim=1).item()
    return int_to_char[predicted]

In [11]:
train_loop(train_loader, val_loader, model, criterion, optimizer, epochs)

Epoch 1/10, Train Loss: 322.1177, Val Loss: 318.0716, Validation Accuracy: 0.27%
Epoch 2/10, Train Loss: 318.1293, Val Loss: 316.9726, Validation Accuracy: 0.27%
Epoch 3/10, Train Loss: 317.4180, Val Loss: 316.3948, Validation Accuracy: 0.27%
Epoch 4/10, Train Loss: 316.9148, Val Loss: 316.0687, Validation Accuracy: 0.27%
Epoch 5/10, Train Loss: 316.8979, Val Loss: 316.3322, Validation Accuracy: 0.27%
Epoch 6/10, Train Loss: 316.6735, Val Loss: 315.8450, Validation Accuracy: 0.27%
Epoch 7/10, Train Loss: 316.2954, Val Loss: 315.7794, Validation Accuracy: 0.27%
Epoch 8/10, Train Loss: 316.3851, Val Loss: 317.6847, Validation Accuracy: 0.27%
Epoch 9/10, Train Loss: 316.1482, Val Loss: 315.3227, Validation Accuracy: 0.27%
Epoch 10/10, Train Loss: 316.2447, Val Loss: 315.1842, Validation Accuracy: 0.27%


([322.1177186349036,
  318.12928998398496,
  317.4180045985177,
  316.9148391453753,
  316.89789083728283,
  316.6734675831037,
  316.29540742445596,
  316.3850723575123,
  316.1481586360617,
  316.24474998800764],
 [318.07156016726776,
  316.97261347294676,
  316.39477219502265,
  316.06867353600194,
  316.3322217925417,
  315.84503164718154,
  315.7794421576797,
  317.6847218078782,
  315.32269242804284,
  315.1842304296488])