<a href="https://colab.research.google.com/github/Vishy-A/DeepLearning/blob/main/HW3/4106HW3P2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import requests
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [22]:
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
seq = response.text

In [23]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [24]:
chars = sorted(list(set(seq)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}
encoded_text = [char_to_int[ch] for ch in seq]

In [25]:
def definition(maxlen):
  x = []
  y = []
  for i in range(len(seq) - maxlen):
      sequence = seq[i:i + maxlen]
      label = seq[i + maxlen]
      x.append([char_to_int[char] for char in sequence])
      y.append(char_to_int[label])

  x = np.array(x)
  y = np.array(y)

  return x, y

In [26]:
x20, y20 = definition(20)
x30, y30 = definition(30)
x50, y50 = definition(50)

In [27]:
x20 = torch.tensor(x20, dtype=torch.long)
y20 = torch.tensor(y20, dtype=torch.long)

x30 = torch.tensor(x30, dtype=torch.long)
y30 = torch.tensor(y30, dtype=torch.long)

x50 = torch.tensor(x50, dtype=torch.long)
y50 = torch.tensor(y50, dtype=torch.long)


In [28]:
class CharDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]


In [29]:
dataset20 = CharDataset(x20, y20)
dataset30 = CharDataset(x30, y30)
dataset50 = CharDataset(x50, y50)

In [30]:
batch_size = 128

trainsize20 = int(len(dataset20) * .8)
valsize20 = len(dataset20) - trainsize20
traindataset20, valdataset20 = torch.utils.data.random_split(dataset20, [trainsize20, valsize20])

trainsize30 = int(len(dataset30) * .8)
valsize30 = len(dataset30) - trainsize30
traindataset30, valdataset30 = torch.utils.data.random_split(dataset30, [trainsize30, valsize30])

trainsize50 = int(len(dataset50) * .8)
valsize50 = len(dataset50) - trainsize50
traindataset50, valdataset50 = torch.utils.data.random_split(dataset50, [trainsize50, valsize50])


In [31]:
trainloader20 = DataLoader(traindataset20, batch_size=batch_size, shuffle=True)
testloader20 = DataLoader(valdataset20, batch_size=batch_size, shuffle=False)

trainloader30 = DataLoader(traindataset30, batch_size=batch_size, shuffle=True)
testloader30 = DataLoader(valdataset30, batch_size=batch_size, shuffle=False)

trainloader50 = DataLoader(traindataset50, batch_size=batch_size, shuffle=True)
testloader50 = DataLoader(valdataset50, batch_size=batch_size, shuffle=False)


In [32]:
class LSTM(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(LSTM, self).__init__()
    self.hidden_size = hidden_size
    self.embedding = nn.Embedding(input_size, hidden_size)
    self.lstm = nn.LSTM(hidden_size, hidden_size)
    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, input):
    embedded = self.embedding(input)
    output, _ = self.lstm(embedded)
    output = self.fc(output[:, -1, :])
    return output

In [33]:
class GRU(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(GRU, self).__init__()
    self.hidden_size = hidden_size
    self.embedding = nn.Embedding(input_size, hidden_size)
    self.gru = nn.GRU(hidden_size, hidden_size)
    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, input):
    embedded = self.embedding(input)
    output, _ = self.gru(embedded)
    output = self.fc(output[:, -1, :])
    return output

In [34]:
hidden_size = 128
lr = .001
epochs = 10

criterion = nn.CrossEntropyLoss()


In [35]:
modelLSTM = LSTM(len(chars), hidden_size, len(chars))
modelGRU = GRU(len(chars), hidden_size, len(chars))

optimizerLSTM = torch.optim.Adam(modelLSTM.parameters(), lr=lr)
optimizerGRU = torch.optim.Adam(modelGRU.parameters(), lr=lr)

In [36]:
def trainloop(train, test, model, lossfunc, optimizer, epochs):
  model.to(device)

  train_losses = []
  test_losses = []
  test_accs = []

  for epoch in range(epochs):
    train_loss = 0
    test_loss = 0
    test_acc = 0
    correct = 0
    total = 0
    model.train()

    for x, y in train:
      x = x.to(device)
      y = y.to(device)
      optimizer.zero_grad()
      output = model(x)
      loss = lossfunc(output, y)
      loss.backward()
      optimizer.step()
      train_loss += loss.item()

    model.eval()
    with torch.no_grad():
      for x, y in test:
        x = x.to(device)
        y = y.to(device)
        output = model(x)
        test_loss += lossfunc(output, y).item()
        pred = torch.argmax(output, dim=1)
        correct += torch.sum(pred == y).item()
        total += len(y)

    train_loss /= len(train)
    test_loss /= len(test)

    train_losses.append(train_loss / len(train))
    test_losses.append(test_loss / len(test))
    test_accs.append(correct / total)

    print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, Test Accuracy: {correct / total:.4f}')

  return train_losses, test_losses, test_accs


In [37]:
def predict_next_char(model, char_to_int, int_to_char, initialstr):
  model.eval()
  with torch.no_grad():
    initial_input = torch.tensor([char_to_int[char] for char in initialstr], dtype=torch.long).unsqueeze(0)
    output = model(initial_input)
    predicted = torch.argmax(output, dim=1).item()
    return int_to_char[predicted]

In [38]:
trainloop(trainloader20, testloader20, modelLSTM, criterion, optimizerLSTM, epochs)
trainloop(trainloader20, testloader20, modelGRU, criterion, optimizerGRU, epochs)

Epoch 1/10, Train Loss: 2.5015, Test Loss: 2.4715, Test Accuracy: 0.2696
Epoch 2/10, Train Loss: 2.4696, Test Loss: 2.4695, Test Accuracy: 0.2672
Epoch 3/10, Train Loss: 2.4665, Test Loss: 2.4676, Test Accuracy: 0.2686
Epoch 4/10, Train Loss: 2.4653, Test Loss: 2.4664, Test Accuracy: 0.2692
Epoch 5/10, Train Loss: 2.4647, Test Loss: 2.4665, Test Accuracy: 0.2680
Epoch 6/10, Train Loss: 2.4640, Test Loss: 2.4642, Test Accuracy: 0.2685
Epoch 7/10, Train Loss: 2.4640, Test Loss: 2.4650, Test Accuracy: 0.2689
Epoch 8/10, Train Loss: 2.4638, Test Loss: 2.4649, Test Accuracy: 0.2697
Epoch 9/10, Train Loss: 2.4635, Test Loss: 2.4642, Test Accuracy: 0.2694
Epoch 10/10, Train Loss: 2.4633, Test Loss: 2.4643, Test Accuracy: 0.2690
Epoch 1/10, Train Loss: 2.4998, Test Loss: 2.4769, Test Accuracy: 0.2540
Epoch 2/10, Train Loss: 2.4743, Test Loss: 2.4748, Test Accuracy: 0.2652
Epoch 3/10, Train Loss: 2.4725, Test Loss: 2.4761, Test Accuracy: 0.2684
Epoch 4/10, Train Loss: 2.4722, Test Loss: 2.4752,

([0.00035854778919976546,
  0.00035488602484259007,
  0.0003546340210085993,
  0.00035458736716712826,
  0.00035447344318711874,
  0.0003545027648003062,
  0.0003544936577920481,
  0.0003544600422219924,
  0.00035441741481239737,
  0.0003543659593821605],
 [0.001421083714602705,
  0.0014198722744064914,
  0.0014205921166060348,
  0.0014200964733281675,
  0.0014174061331191392,
  0.0014197532884247497,
  0.0014175956153868812,
  0.0014186199198719532,
  0.001418190603761348,
  0.0014180980931158115],
 [0.2540177070491987,
  0.26517538944301244,
  0.26837162389330943,
  0.2637229631289925,
  0.2639784825731256,
  0.2695416339796033,
  0.2686988680936905,
  0.26821472598901713,
  0.26749299562927265,
  0.26205536254622885])

In [39]:
modelLSTM = LSTM(len(chars), hidden_size, len(chars))
modelGRU = GRU(len(chars), hidden_size, len(chars))

optimizerLSTM = torch.optim.Adam(modelLSTM.parameters(), lr=lr)
optimizerGRU = torch.optim.Adam(modelGRU.parameters(), lr=lr)

In [None]:
trainloop(trainloader30, testloader30, modelLSTM, criterion, optimizerLSTM, epochs)
trainloop(trainloader30, testloader30, modelGRU, criterion, optimizerGRU, epochs)

Epoch 1/10, Train Loss: 2.5022, Test Loss: 2.4764, Test Accuracy: 0.2612
Epoch 2/10, Train Loss: 2.4691, Test Loss: 2.4689, Test Accuracy: 0.2678
Epoch 3/10, Train Loss: 2.4662, Test Loss: 2.4686, Test Accuracy: 0.2679
Epoch 4/10, Train Loss: 2.4653, Test Loss: 2.4667, Test Accuracy: 0.2676
Epoch 5/10, Train Loss: 2.4645, Test Loss: 2.4671, Test Accuracy: 0.2628
Epoch 6/10, Train Loss: 2.4640, Test Loss: 2.4692, Test Accuracy: 0.2696
Epoch 7/10, Train Loss: 2.4636, Test Loss: 2.4665, Test Accuracy: 0.2682
Epoch 8/10, Train Loss: 2.4635, Test Loss: 2.4648, Test Accuracy: 0.2675
Epoch 9/10, Train Loss: 2.4633, Test Loss: 2.4651, Test Accuracy: 0.2702
Epoch 10/10, Train Loss: 2.4631, Test Loss: 2.4659, Test Accuracy: 0.2690
Epoch 1/10, Train Loss: 2.4991, Test Loss: 2.4795, Test Accuracy: 0.2675
Epoch 2/10, Train Loss: 2.4745, Test Loss: 2.4743, Test Accuracy: 0.2693
Epoch 3/10, Train Loss: 2.4726, Test Loss: 2.4740, Test Accuracy: 0.2695
Epoch 4/10, Train Loss: 2.4718, Test Loss: 2.4776,

In [None]:
modelLSTM = LSTM(len(chars), hidden_size, len(chars))
modelGRU = GRU(len(chars), hidden_size, len(chars))

optimizerLSTM = torch.optim.Adam(modelLSTM.parameters(), lr=lr)
optimizerGRU = torch.optim.Adam(modelGRU.parameters(), lr=lr)

In [None]:
trainloop(trainloader50, testloader50, modelLSTM, criterion, optimizerLSTM, epochs)
trainloop(trainloader50, testloader50, modelGRU, criterion, optimizerGRU, epochs)