<a href="https://colab.research.google.com/github/archyyu/RNN-GPT/blob/main/StudyGRU1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x7fcf89744310>

In [None]:
# Data I/O
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/google.dev.en"
#url = "https://raw.githubusercontent.com/tinygrad/tinygrad/master/tinygrad/tensor.py"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/KDE4.en-es.en"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/js"
response = requests.get(url)
data = response.text

chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print(f'data has {data_size} characters, {vocab_size} unique.')

char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

data has 1115394 characters, 65 unique.


In [67]:
# Hyperparameters
hidden_size = 128
embedding_dim = 64
seq_length = 30
learning_rate = 0.001
batch_size = 20

In [65]:
import torch
import torch.nn as nn

class GRUCell(nn.Module):
  def __init__(self, input_size, embedding_size, hidden_size):
    super(GRUCell, self).__init__()
    self.embedding = nn.Embedding(input_size, embedding_size)
    self.Wr = nn.Linear(embedding_dim, hidden_size, bias=True)
    self.Hr = nn.Linear(hidden_size, hidden_size,bias=True)
    self.Wz = nn.Linear(embedding_dim, hidden_size, bias=True)
    self.Hz = nn.Linear(hidden_size, hidden_size,bias=True)
    self.Wh = nn.Linear(embedding_dim, hidden_size, bias=True)
    self.Hh = nn.Linear(hidden_size, hidden_size,bias=True)
    self.rb = nn.Parameter(torch.zeros(1, hidden_size))
    self.zb = nn.Parameter(torch.zeros(1, hidden_size))
    self.hb = nn.Parameter(torch.zeros(1, hidden_size))
    self.init_weights()

  def init_weights(self):
    for layer in [self.Wr, self.Hr, self.Wz, self.Hz, self.Wh, self.Hh]:
      nn.init.xavier_uniform_(layer.weight.data)

  def forward(self, x, h_prev):
    x = self.embedding(x)
    rt = torch.sigmoid(self.Wr(x) + self.Hr(h_prev) + self.rb)
    zt = torch.sigmoid(self.Wz(x) + self.Hz(h_prev) + self.zb)

    tht = torch.tanh(self.Wh(x) + rt * self.Hh(h_prev) + self.hb)
    hz = zt * tht + (1 - zt) * h_prev
    #hz_clone = hz.clone()
    return hz

# Loss function
criterion = nn.CrossEntropyLoss()

model = GRUCell(vocab_size, embedding_dim, hidden_size)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [42]:
p = 0
inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)
targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]], dtype=torch.long).view(-1)

hprev = torch.zeros((1, 1, hidden_size))

In [43]:
i = 0
input_char = (inputs[0][i]).view(1, -1)
output_char = targets[i]

output = model(input_char, hprev)

In [45]:
output_char

tensor(35)

In [48]:
output[0][0]

tensor([-0.6500, -0.9006, -0.8426, -0.7681, -0.9676, -0.9212, -0.9710,  0.6459,
        -0.9473, -0.9744, -0.8702, -0.9588, -0.9794, -0.6373, -0.9153, -0.9862,
        -0.9834, -0.8253,  0.4275, -0.5047, -0.9388, -0.9770, -0.0418, -0.6635,
        -0.8933, -0.9818, -0.8976, -0.6787, -0.9539,  0.9183, -0.8832, -0.6190,
         0.7551, -0.6137, -0.8504,  0.9757, -0.9419, -0.3275,  0.8374,  0.2791,
        -0.9286, -0.8364, -0.9965, -0.9340, -0.9740, -0.4667, -0.9608, -0.6478,
        -0.9356, -0.8662, -0.8894,  0.1394, -0.9754, -0.9248, -0.9374, -0.3561,
        -0.9225, -0.6149, -0.9400, -0.0728, -0.8078, -0.9856, -0.5682, -0.4778,
        -0.4631, -0.9813, -0.9745, -0.9822, -0.9893, -0.9889, -0.9457, -0.9657,
        -0.9639, -0.9875, -0.9837, -0.9687, -0.9817, -0.9710, -0.9809, -0.9617,
        -0.9892, -0.9887, -0.9517, -0.9824, -0.9884, -0.9823, -0.9913, -0.9876,
        -0.9807, -0.9764, -0.9787, -0.9951, -0.9757, -0.9655, -0.9893, -0.9818,
        -0.9888, -0.9534, -0.9845, -0.97

In [46]:
loss = criterion(output[0][0], output_char)

In [49]:
p = nn.functional.softmax(output, dim=-1).detach().numpy().ravel()
print(p)

[0.00946546 0.00736769 0.00780743 0.00841142 0.00688975 0.00721716
 0.00686642 0.03459129 0.00703124 0.00684336 0.00759505 0.00695112
 0.00680939 0.00958674 0.00726032 0.00676298 0.00678175 0.00794399
 0.02780496 0.01094608 0.00709139 0.00682572 0.01738997 0.00933889
 0.00742118 0.00679262 0.00738962 0.00919776 0.00698514 0.04542355
 0.00749682 0.00976395 0.03858082 0.00981582 0.00774699 0.04810559
 0.0070696  0.01306868 0.04189082 0.02396959 0.00716411 0.00785606
 0.00669384 0.00712556 0.0068463  0.01137058 0.00693737 0.00948695
 0.0071144  0.00762502 0.00745085 0.02084504 0.0068363  0.00719102
 0.00710168 0.01270038 0.00720829 0.00980347 0.00708305 0.01685945
 0.0080839  0.00676709 0.01027288 0.01124454 0.01141073 0.00679651
 0.00684256 0.00679053 0.00674243 0.00674466 0.00704277 0.00690319
 0.00691568 0.00675417 0.00678035 0.00688241 0.00679342 0.00686685
 0.00679916 0.00693118 0.00674262 0.00674595 0.0070007  0.00678915
 0.0067483  0.0067898  0.00672852 0.00675353 0.00680022 0.0068

In [51]:
max_probability = max(p)
max_probability_index = p.index(max_probability)
print(max_probability_index)

AttributeError: ignored

In [66]:
# Training loop
stopi = []
lossi = []
num_iterations = 10000
p = 0
for iteration in range(num_iterations):

  if p + seq_length + 1 > len(data):
    p = 0;

  inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)
  targets = torch.tensor([char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]], dtype=torch.long).view(-1)

  # inputs, targets = generateMiniBatch(p)

  optimizer.zero_grad()

  totalloss = 0
  hprev = torch.zeros((1, 1, hidden_size))  # Reset RNN memory
  for i in range(seq_length):
    input_char = (inputs[0][i]).view(1, -1)
    output_char = targets[i]

    output = model(input_char, hprev)
    loss = criterion(output[0][0], output_char)
    totalloss += loss.item()
    loss.backward()

    for param in model.parameters():
      if param.grad is not None:
        param.grad.data.clamp_(-5, 5)

    optimizer.step()

  if iteration % 100 == 0:
    print(f'Iteration {iteration}, Loss: {totalloss/seq_length}')
    stopi.append(iteration)
    lossi.append(totalloss/seq_length)

  p += seq_length  # Move data pointer

Iteration 0, Loss: 4.893308798472087
Iteration 100, Loss: 4.223782841364542
Iteration 200, Loss: 3.9779476324717202
Iteration 300, Loss: 4.370857620239258
Iteration 400, Loss: 4.279594055811564
Iteration 500, Loss: 4.192323827743531
Iteration 600, Loss: 4.5440404415130615
Iteration 700, Loss: 4.426782774925232


KeyboardInterrupt: ignored

In [None]:
# Sample from the model
def sample(model, seed_ix, n):
  h = torch.zeros(1, hidden_size)
  x = torch.tensor(seed_ix, dtype=torch.long).view(1, 1)
  ixes = []

  for _ in range(n):
    h = model(x, h)
    p = nn.functional.softmax(h, dim=-1).detach().numpy().ravel()
    ix = np.random.choice(range(vocab_size), p=p)
    x = torch.tensor(ix, dtype=torch.long).view(1, 1)
    ixes.append(ix)

  return ixes

In [None]:
# Generate sample text
sample_ix = sample(model, char_to_ix[data[0]], 200)
txt = ''.join(ix_to_char[ix] for ix in sample_ix)
print(f'Generated Text:\n{txt}')

ValueError: ignored