<a href="https://colab.research.google.com/github/archyyu/RNN-GPT/blob/main/studyRNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x7b15c0030310>

In [3]:
# Data I/O

url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/google.dev.en"
#url = "https://raw.githubusercontent.com/torvalds/linux/master/mm/madvise.c"
response = requests.get(url)
data = response.text

chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print(f'data has {data_size} characters, {vocab_size} unique.')

char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

data has 1115394 characters, 65 unique.


In [21]:
# Hyperparameters
hidden_size = 100
embedding_dim = 20
seq_length = 25
learning_rate = 1e-1
batch_size = 20

In [27]:
# Model definition
class VanillaRNN(nn.Module):
  def __init__(self, vocab_size, embedding_dim, hidden_size):
    super(VanillaRNN, self).__init__()
    self.embedding = nn.Embedding(vocab_size, embedding_dim)
    self.i2h = nn.Linear(embedding_dim, hidden_size)
    self.h2h = nn.Linear(hidden_size, hidden_size)
    self.h2o = nn.Linear(hidden_size, vocab_size)
    self.hb2 = nn.Parameter(torch.zeros(1, hidden_size))
    self.ob = nn.Parameter(torch.zeros(1, vocab_size))

  def forward(self, x, h):
    x = self.embedding(x)
    h2 = torch.tanh(self.i2h(x) + self.h2h(h) + self.hb2)
    y = self.h2o(h2) + self.ob
    return y, h2

# Loss function
criterion = nn.CrossEntropyLoss()

# Model initialization
model = VanillaRNN(vocab_size, embedding_dim, hidden_size)
optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)

In [None]:
def generateMiniBatch(start_idx):
  batch_inputs = []
  batch_targets = []

  # Generate examples for the current minibatch
  for i in range(batch_size):
    p = start_idx + i
    inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)
    targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]], dtype=torch.long).view(-1)

    batch_inputs.append(inputs)
    batch_targets.append(targets)

  # Convert lists to tensors
  minibatch_inputs = torch.cat(batch_inputs, dim=0)
  minibatch_targets = torch.cat(batch_targets, dim=0)
  return minibatch_inputs, minibatch_targets

In [45]:
# Training loop
num_iterations = 10000
p = 0
for iteration in range(num_iterations):

  if p + seq_length + 1 > len(data):
    p = 0;

  inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)
  targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]], dtype=torch.long).view(-1)

  optimizer.zero_grad()

  totalloss = 0
  hprev = torch.zeros(1, hidden_size)  # Reset RNN memory
  for i in range(seq_length):
    input_char = (inputs[0][i]).view(1, -1)
    output_char = targets[i]

    predict_char, hprev = model(input_char, hprev)
    loss = criterion(predict_char[0][0], output_char)
    totalloss += loss.item()

    loss.backward()
    hprev = hprev.detach()

    for param in model.parameters():
      if param.grad is not None:
        param.grad.data.clamp_(-5, 5)

    optimizer.step()

  if iteration % 100 == 0:
    print(f'Iteration {iteration}, Loss: {totalloss/seq_length}')

  p += seq_length  # Move data pointer

Iteration 0, Loss: 1.461213911473751
Iteration 100, Loss: 2.388749783039093
Iteration 200, Loss: 2.042467204928398
Iteration 300, Loss: 1.8395315510034562
Iteration 400, Loss: 2.506123238801956
Iteration 500, Loss: 2.5262410606443884
Iteration 600, Loss: 2.056482539176941
Iteration 700, Loss: 2.1896510237455367
Iteration 800, Loss: 1.650094143152237
Iteration 900, Loss: 2.5060903424024583
Iteration 1000, Loss: 1.9856063830852508
Iteration 1100, Loss: 2.5597991037368772
Iteration 1200, Loss: 2.6833739322423935
Iteration 1300, Loss: 2.1536571943759917
Iteration 1400, Loss: 2.217836757898331
Iteration 1500, Loss: 2.6447662043571474
Iteration 1600, Loss: 2.0042198586463926
Iteration 1700, Loss: 2.096113713979721
Iteration 1800, Loss: 2.189876298904419
Iteration 1900, Loss: 1.9852752423286437
Iteration 2000, Loss: 2.1876582396030426


KeyboardInterrupt: ignored

In [41]:
# Sample from the model
def sample(model, seed_ix, n):
  h = torch.zeros(1, hidden_size)
  x = torch.tensor(seed_ix, dtype=torch.long).view(1, 1)
  ixes = []

  for _ in range(n):
    outputs, h = model(x, h)
    p = nn.functional.softmax(outputs, dim=-1).detach().numpy().ravel()
    ix = np.random.choice(range(vocab_size), p=p)
    x = torch.tensor(ix, dtype=torch.long).view(1, 1)
    ixes.append(ix)

  return ixes

In [42]:
# Generate sample text
sample_ix = sample(model, char_to_ix[data[0]], 2000)
txt = ''.join(ix_to_char[ix] for ix in sample_ix)
print(f'Generated Text:\n{txt}')

Generated Text:
y hourever of in.

BUCKBETor win yous heaud oean id this lor, yeand
Hukn:
BUCESTERISZSTEFomr lelinendiine;
As to ciay,
Bernow yiind'uror Ehye wh cecanlye;
Buk'dsedrotid in hiene bu yaitty seinend lend;
Lot, nof
An Sataudore Hatnoortizll aur fatiid of yies;
Whe nes sheinyo ind maru give his lofbere of ness ofr is L'n bull it irth wien:
TIM:
Weveather hastind tifey; the
Lecweve rind; an ther ther of mint noowessofMeur figd's sou dor farau It hir fon hy your I co wirara hicis of prand?
Hoe insorder opard sou Bu cower sudes feay opid rei,
Bang, bill'de thicher four fold Hom far the bins migind boness notforl,

BUCKINUNGHo wings off's oursy win oeay no,
Where of
AyAnd des otes coure herd the snty mard;

Bn Eotink diny gind;
Selfatenty,

Bigt in acqind ul hinge novil ond
Hord yimme,

GAM:
Ms dcoed curp,
And in I father fajy woritize bous toucer prind;
Uoin; Iist,
Aeche lige lon yenfor; you tFrief th meve bemend;

Hing of or, of the rikr yececeng,her pot
Werey thien:

BS Eothe