<a href="https://colab.research.google.com/github/archyyu/RNN-GPT/blob/main/studyRnn4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)

In [86]:
# Data I/O

url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/google.dev.en"
#url = "https://raw.githubusercontent.com/tinygrad/tinygrad/master/tinygrad/tensor.py"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/KDE4.en-es.en"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/js"
response = requests.get(url)
data = response.text

chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print(f'data has {data_size} characters, {vocab_size} unique.')

char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

data has 1115394 characters, 65 unique.


In [96]:
# Hyperparameters
hidden_size = 100
embedding_dim = 20
seq_length = 25
learning_rate = 1e-1
batch_size = 20

In [78]:
p = 0
inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)
targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]], dtype=torch.long).view(-1)

In [109]:
inputs

tensor([[64, 16, 10, 53,  9, 47,  2, 16,  9, 16,  5,  1,  3, 31, 41, 43,  1, 35,
         37, 10,  1, 47,  6,  1, 47]])

In [110]:
hprev = torch.zeros(1, hidden_size)
hprev

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.]])

In [73]:
targets

tensor([16,  8,  1, 45, 10,  8,  5, 24, 22, 16, 79, 46, 16, 59, 75, 75, 10, 45,
        57,  3,  3,  7,  8, 75, 59])

In [119]:
# Model definition
class VanillaRNN(nn.Module):
  def __init__(self, vocab_size, embedding_dim, hidden_size):
    super(VanillaRNN, self).__init__()
    self.embedding = nn.Embedding(vocab_size, embedding_dim)
    self.i2h = nn.Linear(embedding_dim, hidden_size)
    self.h2h = nn.Linear(hidden_size, hidden_size)
    self.h2o = nn.Linear(hidden_size, vocab_size)
    self.hb1 = nn.Parameter(torch.zeros(1, hidden_size))
    self.hb2 = nn.Parameter(torch.zeros(1, hidden_size))
    self.ob = nn.Parameter(torch.zeros(1, vocab_size))
    self.dropout = nn.Dropout(0.5)

  def forward(self, x, h):
    x = self.embedding(x)
    h1 = torch.tanh(self.i2h(x) + self.hb1)
    h1 = self.dropout(h1)
    h2 = torch.tanh(self.h2h(h1) + self.h2h(h) + self.hb2)
    h2 = self.dropout(h2)
    y = self.h2o(h1) + self.ob
    return y, h2

# Loss function
criterion = nn.CrossEntropyLoss()

# Model initialization
model = VanillaRNN(vocab_size, embedding_dim, hidden_size)
optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)

In [97]:
def generateMiniBatch(start_idx):
  batch_inputs = []
  batch_targets = []

  # Generate examples for the current minibatch
  for i in range(batch_size):
    p = start_idx + i
    inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)
    targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]], dtype=torch.long).view(-1)

    batch_inputs.append(inputs)
    batch_targets.append(targets)

  # Convert lists to tensors
  minibatch_inputs = torch.cat(batch_inputs, dim=0)
  minibatch_targets = torch.cat(batch_targets, dim=0)
  return minibatch_inputs, minibatch_targets

In [None]:
# Training loop
num_iterations = 10000
for iteration in range(num_iterations):
  # Prepare inputs
  if iteration == 0 or p + seq_length + batch_size + 1 >= len(data):
    hprev = torch.zeros(1, hidden_size)  # Reset RNN memory
    p = 0  # Go from the start of data

  # inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)
  # targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]], dtype=torch.long).view(-1)

  inputs, targets = generateMiniBatch(p)

  # Forward pass
  outputs, hprev = model(inputs, hprev)
  loss = criterion(outputs.view(-1, vocab_size), targets)

  # Backward pass
  optimizer.zero_grad()
  loss.backward()

  # Gradient clipping
  for param in model.parameters():
    if param.grad is not None:
      param.grad.data.clamp_(-5, 5)

  optimizer.step()

  # Print progress
  if iteration % 100 == 0:
    print(f'Iteration {iteration}, Loss: {loss.item()}')

  p += seq_length  # Move data pointer

In [117]:
# Sample from the model
def sample(model, seed_ix, n):
  h = torch.zeros(1, hidden_size)
  x = torch.tensor(seed_ix, dtype=torch.long).view(1, 1)
  ixes = []

  for _ in range(n):
    outputs, h = model(x, h)
    p = nn.functional.softmax(outputs, dim=-1).detach().numpy().ravel()
    ix = np.random.choice(range(vocab_size), p=p)
    x = torch.tensor(ix, dtype=torch.long).view(1, 1)
    ixes.append(ix)

  return ixes

In [124]:
# Generate sample text
sample_ix = sample(model, char_to_ix[data[0]], 2000)
txt = ''.join(ix_to_char[ix] for ix in sample_ix)
print(f'Generated Text:\n{txt}')

KeyboardInterrupt: ignored