<a href="https://colab.research.google.com/github/archyyu/RNN-GPT/blob/main/studyRnn4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x7f43301b8130>

In [None]:
# Data I/O

#url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/google.dev.en"
url = "https://raw.githubusercontent.com/torvalds/linux/master/mm/madvise.c"
response = requests.get(url)
data = response.text

chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print(f'data has {data_size} characters, {vocab_size} unique.')

char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

data has 39580 characters, 88 unique.


In [None]:
# Hyperparameters
hidden_size = 100
embedding_dim = 20
seq_length = 25
learning_rate = 1e-1
batch_size = 20

In [None]:
p = 0
inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)
targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]], dtype=torch.long).view(-1)

In [None]:
# Model definition
class VanillaRNN(nn.Module):
  def __init__(self, vocab_size, embedding_dim, hidden_size):
    super(VanillaRNN, self).__init__()
    self.embedding = nn.Embedding(vocab_size, embedding_dim)
    self.i2h = nn.Linear(embedding_dim, hidden_size)
    self.h2h = nn.Linear(hidden_size, hidden_size)
    self.h2o = nn.Linear(hidden_size, vocab_size)
    self.hb2 = nn.Parameter(torch.zeros(1, hidden_size))
    self.ob = nn.Parameter(torch.zeros(1, vocab_size))

  def forward(self, x, h):
    x = self.embedding(x)
    h2 = torch.tanh(self.i2h(x) + self.h2h(h) + self.hb2)
    y = self.h2o(h2) + self.ob
    return y, h2

# Loss function
criterion = nn.CrossEntropyLoss()

# Model initialization
model = VanillaRNN(vocab_size, embedding_dim, hidden_size)
optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)

In [None]:
def generateMiniBatch(start_idx):
  batch_inputs = []
  batch_targets = []

  # Generate examples for the current minibatch
  for i in range(batch_size):
    p = start_idx + i
    inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)
    targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]], dtype=torch.long).view(-1)

    batch_inputs.append(inputs)
    batch_targets.append(targets)

  # Convert lists to tensors
  minibatch_inputs = torch.cat(batch_inputs, dim=0)
  minibatch_targets = torch.cat(batch_targets, dim=0)
  return minibatch_inputs, minibatch_targets

In [None]:
# Training loop
num_iterations = 10000
for iteration in range(num_iterations):
  # Prepare inputs
  if iteration == 0 or p + seq_length + batch_size + 1 >= len(data):
    hprev = torch.zeros(1, hidden_size)  # Reset RNN memory
    p = 0  # Go from the start of data

  # inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)
  # targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]], dtype=torch.long).view(-1)

  inputs, targets = generateMiniBatch(p)

  # Forward pass
  outputs, hprev = model(inputs, hprev)
  loss = criterion(outputs.view(-1, vocab_size), targets)

  # Backward pass
  optimizer.zero_grad()
  loss.backward()
  hprev = hprev.detach()

  # Gradient clipping
  for param in model.parameters():
    if param.grad is not None:
      param.grad.data.clamp_(-5, 5)

  optimizer.step()

  # Print progress
  if iteration % 100 == 0:
    print(f'Iteration {iteration}, Loss: {loss.item()}')

  p += batch_size  # Move data pointer

Iteration 0, Loss: 5.486164093017578
Iteration 100, Loss: 2.359689950942993
Iteration 200, Loss: 2.285416841506958
Iteration 300, Loss: 4.194525241851807
Iteration 400, Loss: 3.3510031700134277
Iteration 500, Loss: 2.848665952682495
Iteration 600, Loss: 2.9528396129608154
Iteration 700, Loss: 2.5791709423065186
Iteration 800, Loss: 3.1590704917907715
Iteration 900, Loss: 2.2458698749542236
Iteration 1000, Loss: 2.4159185886383057
Iteration 1100, Loss: 2.424057722091675
Iteration 1200, Loss: 2.57066011428833
Iteration 1300, Loss: 2.379361391067505
Iteration 1400, Loss: 2.5716958045959473
Iteration 1500, Loss: 2.441225051879883
Iteration 1600, Loss: 2.617128849029541
Iteration 1700, Loss: 2.248991012573242
Iteration 1800, Loss: 2.251082420349121
Iteration 1900, Loss: 2.705542802810669
Iteration 2000, Loss: 2.1049044132232666
Iteration 2100, Loss: 2.231717348098755
Iteration 2200, Loss: 2.275906562805176
Iteration 2300, Loss: 2.6123456954956055
Iteration 2400, Loss: 2.596693515777588
Iter

In [None]:
# Sample from the model
def sample(model, seed_ix, n):
  h = torch.zeros(1, hidden_size)
  x = torch.tensor(seed_ix, dtype=torch.long).view(1, 1)
  ixes = []

  for _ in range(n):
    outputs, h = model(x, h)
    p = nn.functional.softmax(outputs, dim=-1).detach().numpy().ravel()
    ix = np.random.choice(range(vocab_size), p=p)
    x = torch.tensor(ix, dtype=torch.long).view(1, 1)
    ixes.append(ix)

  return ixes

In [None]:
# Generate sample text
sample_ix = sample(model, char_to_ix[data[0]], 2000)
txt = ''.join(ix_to_char[ix] for ix in sample_ix)
print(f'Generated Text:\n{txt}')