<a href="https://colab.research.google.com/github/archyyu/RNN-GPT/blob/main/StudyGRU1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
import requests
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x78640c780110>

In [26]:
# Data I/O
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/google.dev.en"
#url = "https://raw.githubusercontent.com/tinygrad/tinygrad/master/tinygrad/tensor.py"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/KDE4.en-es.en"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/js"
response = requests.get(url)
data = response.text

chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print(f'data has {data_size} characters, {vocab_size} unique.')

char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

data has 1115394 characters, 65 unique.


In [27]:
# Hyperparameters
hidden_size = 128
embedding_dim = 64
seq_length = 30
learning_rate = 0.01
batch_size = 20

In [37]:
import torch
import torch.nn as nn

class GRUCell(nn.Module):
  def __init__(self, input_size, embedding_dim, hidden_size, output_size):
    super(GRUCell, self).__init__()
    self.embedding = nn.Embedding(input_size, embedding_dim)
    self.Wr = nn.Linear(embedding_dim, hidden_size, bias=True)
    self.Hr = nn.Linear(hidden_size, hidden_size,bias=True)
    self.Wz = nn.Linear(embedding_dim, hidden_size, bias=True)
    self.Hz = nn.Linear(hidden_size, hidden_size,bias=True)
    self.Wh = nn.Linear(embedding_dim, hidden_size, bias=True)
    self.Hh = nn.Linear(hidden_size, hidden_size,bias=True)
    self.rb = nn.Parameter(torch.zeros(1, hidden_size))
    self.zb = nn.Parameter(torch.zeros(1, hidden_size))
    self.hb = nn.Parameter(torch.zeros(1, hidden_size))
    self.Ho = nn.Linear(hidden_size, output_size)
    self.init_weights()

  def init_weights(self):
    for layer in [self.Wr, self.Hr, self.Wz, self.Hz, self.Wh, self.Hh]:
      nn.init.xavier_uniform_(layer.weight.data)

  def forward(self, x, h_prev):
    x = self.embedding(x)
    rt = torch.sigmoid(self.Wr(x) + self.Hr(h_prev) + self.rb)
    zt = torch.sigmoid(self.Wz(x) + self.Hz(h_prev) + self.zb)

    tht = torch.tanh(self.Wh(x) + rt * self.Hh(h_prev) + self.hb)
    hz = zt * tht + (1 - zt) * h_prev
    y = self.Ho(hz)
    return y, hz

# Loss function
criterion = nn.CrossEntropyLoss()

model = GRUCell(vocab_size, embedding_dim, hidden_size, vocab_size)
optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)

In [32]:
def generateMiniBatch(start_idx):
  batch_inputs = []
  batch_targets = []

  # Generate examples for the current minibatch
  for i in range(batch_size):
    p = start_idx + i
    inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)
    targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]], dtype=torch.long).view(-1)

    batch_inputs.append(inputs)
    batch_targets.append(targets)

  # Convert lists to tensors
  minibatch_inputs = torch.cat(batch_inputs, dim=0)
  minibatch_targets = torch.stack(batch_targets)
  return minibatch_inputs, minibatch_targets

In [38]:
# Training loop
import sys
stopi = []
lossi = []
num_iterations = 10000
p = 0
for iteration in range(num_iterations):

  if p + seq_length + 1 > len(data):
    p = 0;

  # inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)
  # targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]], dtype=torch.long).view(-1)

  inputs, targets = generateMiniBatch(p)

  optimizer.zero_grad()

  totalloss = 0
  hprev = torch.zeros((1, 1, hidden_size))  # Reset RNN memory
  for i in range(seq_length):
    input_char = inputs[:,i].unsqueeze(1)
    output_char = targets[:,i]

    output, hprev = model(input_char, hprev)
    loss = criterion(output.squeeze(1), output_char)
    totalloss += loss.item()
    loss.backward()
    hprev = hprev.detach()

    for param in model.parameters():
      if param.grad is not None:
        param.grad.data.clamp_(-5, 5)

    optimizer.step()

  if iteration % 100 == 0:
    print(f'Iteration {iteration}, Loss: {totalloss/seq_length}')
    stopi.append(iteration)
    lossi.append(totalloss/seq_length)

  p += seq_length  # Move data pointer

Iteration 0, Loss: 1.4610637605190278
Iteration 100, Loss: 2.3381783803304037
Iteration 200, Loss: 1.8641392668088277
Iteration 300, Loss: 2.264891723791758
Iteration 400, Loss: 2.351759696006775
Iteration 500, Loss: 2.152710155646006
Iteration 600, Loss: 2.0079333662986754
Iteration 700, Loss: 1.784627914428711
Iteration 800, Loss: 1.9996415297190349
Iteration 900, Loss: 1.9682504216829935
Iteration 1000, Loss: 1.9921839833259583
Iteration 1100, Loss: 1.6524122436841329
Iteration 1200, Loss: 1.9321902592976887
Iteration 1300, Loss: 1.5889442245165506
Iteration 1400, Loss: 2.451417334874471
Iteration 1500, Loss: 1.7988687435785928
Iteration 1600, Loss: 1.9366730332374573
Iteration 1700, Loss: 1.7445647478103639
Iteration 1800, Loss: 1.1422361791133882
Iteration 1900, Loss: 1.9508732696374258
Iteration 2000, Loss: 2.3604846477508543
Iteration 2100, Loss: 1.7072551568349204
Iteration 2200, Loss: 2.049198559919993
Iteration 2300, Loss: 1.2621296107769013
Iteration 2400, Loss: 1.8571069121

KeyboardInterrupt: ignored

In [45]:
# Sample from the model
def sample(model, seed_ix, n):
  h = torch.zeros((1, 1, hidden_size))
  x = torch.tensor(seed_ix, dtype=torch.long).view(1, 1)
  ixes = []

  for _ in range(n):
    o, h = model(x, h)
    p = nn.functional.softmax(o, dim=-1).detach().numpy().ravel()
    ix = np.random.choice(range(vocab_size), p=p)
    x = torch.tensor(ix, dtype=torch.long).view(1, 1)
    ixes.append(ix)

  return ixes

In [46]:
# Generate sample text
sample_ix = sample(model, char_to_ix[data[0]], 2000)
txt = ''.join(ix_to_char[ix] for ix in sample_ix)
print(f'Generated Text:\n{txt}')

Generated Text:
a?
Thans? Thow, Thowbur thou of Dourtul thelome me hive them to lemply netin,
This t, word o' is cou; to your not to crest's the hink stote wosold shall for your? Oue have thy fore them it hal.

First Sough eque; I'll waiss
Ar hose spak leter.
Wevery is in wheirny I hou:
Thinch corned the shoughter
Ford; we a arling, therears chinnot,
We we pase may not
Coit
'The serifed, so would not. so shal'd sunstly 't sunter than then he the fromace, oble wempt
Whan not hem? goich requfor,
Whow mere,
We chount, our chce to did for wore thibugh given ally grod;
Wit not
Cot bare whomith thust the parce.' shall for novest to unce a mister the's requerved on fere and poove, he neversy rodsise is choust's relsciery,
Whosthonst sthou and be's a pourcing confitiants,
I shall if? hith consuchate
Thaid the shis wit to hirmwer Rome, monest to moest;
That ins.

VOLUMNIOLANUS:
Grod the fives come leave morise breal;''Wson
show
Ro leak, of think, enound
Thater to say spe'ty do with on that chou