<a href="https://colab.research.google.com/github/archyyu/RNN-GPT/blob/main/MLP_for_text.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x7d34bab3c330>

In [2]:
# Data I/O
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/google.dev.en"
#url = "https://raw.githubusercontent.com/tinygrad/tinygrad/master/tinygrad/tensor.py"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/KDE4.en-es.en"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/js"
response = requests.get(url)
data = response.text

chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print(f'data has {data_size} characters, {vocab_size} unique.')

char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

data has 1115394 characters, 65 unique.


In [73]:
# Hyperparameters
hidden_size = 128
embedding_dim = 20
seq_length = 5
learning_rate = 0.001
batch_size = 20

In [74]:
class MLP(nn.Module):
  def __init__(self, seq_length, vocab_size, embedding_size, hidden_size):
    super(MLP, self).__init__()
    self.em = nn.Embedding(vocab_size, embedding_size)
    self.W1 = nn.Linear(seq_length * embedding_size, hidden_size)
    self.b1 = nn.Parameter(torch.zeros(1, hidden_size))

    self.W2 = nn.Linear(hidden_size, vocab_size)
    self.b2 = nn.Parameter(torch.zeros(1, vocab_size))

  def forward(self, x):
    x = self.em(x)
    x = x.reshape(1, -1)
    h1 = torch.tanh(self.W1(x) + self.b1)
    y = self.W2(h1) + self.b2
    return y


criterion = nn.CrossEntropyLoss()

model = MLP(seq_length, vocab_size, embedding_dim, hidden_size)
optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)

In [None]:
# Training loop
stopi = []
lossi = []
num_iterations = 5
for iteration in range(num_iterations):

  for p in range(len(data) - seq_length):

    inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)
    targets = torch.tensor([char_to_ix[ch] for ch in data[p + seq_length]], dtype=torch.long).view(-1)

    # inputs, targets = generateMiniBatch(p)

    optimizer.zero_grad()


    predict_char = model(inputs)
    loss = criterion(predict_char, targets)

    loss.backward()

    for param in model.parameters():
      if param.grad is not None:
        param.grad.data.clamp_(-5, 5)

    optimizer.step()

    if p % 2000 == 0:
      print(f'Iteration {(iteration + 1) * p}, Loss: {loss.item()}')
      stopi.append((iteration + 1) * p)
      lossi.append(loss.item())



Iteration 0, Loss: 1.3370730876922607
Iteration 2000, Loss: 2.3781089782714844
Iteration 4000, Loss: 3.9304099082946777
Iteration 6000, Loss: 0.9312723875045776
Iteration 8000, Loss: 4.083223342895508
Iteration 10000, Loss: 3.2543673515319824
Iteration 12000, Loss: 2.252898693084717
Iteration 14000, Loss: 1.171481728553772
Iteration 16000, Loss: 2.3642354011535645
Iteration 18000, Loss: 3.371138572692871
Iteration 20000, Loss: 1.940900206565857
Iteration 22000, Loss: 2.2904744148254395


In [80]:
start = "First Citizen"

for i in range(1000):
  lll = start[:seq_length]
  ll = torch.tensor([char_to_ix[ch] for ch in lll], dtype=torch.long).view(1, -1)
  outputs = model(ll)
  p = nn.functional.softmax(outputs, dim=-1).detach().numpy().ravel()
  ix = np.random.choice(range(vocab_size), p=p)
  ix = torch.tensor(ix, dtype=torch.long).view(1, 1)
  start += ix_to_char[ix[0][0].item()]

print(start)


First Citizengh t stih  hei lltsehr ,i  sw At nv c ut hw   i nsrh  e gd  nsyru ud a hso ih:a ostct iAihh hhha onco u  
 o,hgsr  iir:   doTe hihR
   
sd!enedi n 
sTlhe hn   hb hCllt hi  scacp nh nliiv L  sl Sgu  rh   h.il ,c clhco  oOht
y-
 hsIhpuieh himrnos: ohRdir r t insh ce h ltserhh ; h dnmaa mt ih ;s how ive eZ nV m
ea;ihih     c e  m nh m y 
fsh ohe  h shh;h  n
h,urhw   : n heo
 rh   l
 hsr e a  a  e h   .ic.bv srr Qhah eissr n
oichi l chsr  rti  Ihii  nlahewtlhHhr Ind, ni  ichilohsh pes oee ho  hhi fo hmorhhdsutn,uee   nlgo  h mh  heuc!iCihhN,hlce suh.i calm ti   h:b  hsOihhifr tuh ru sTa 
ahs hh dreIoS mur etmic,hdus h the re.huhu

ocehhu  htuHsh  ooueoh roine  hvcisi d herh rin h
eItr wmhi h:  rm sudyhh:lrhmnl i: io,' phshnso yh  de te psh  - 
dh i dryn: ggi hy  nh e nd:  usC  eR tuvhafvsrWsnrhratqrumy o it oha n  hnnh s ibr  s huyT  h s
h ed  rli   io ohhbemr  u rghmlu  -rihi r;ruib     :mgheyb:i   ht 
hart    h.hsa ra h di sa o, :rlrh ehhb   hso
  t eh A :s ,hlrst
hhoop vr  

In [None]:
def sample(start, n):

  for i in range(n):
    start = start[:seq_length]
    x = torch.tensor([char_to_ix[ch] for ch in start], dtype=torch.long).view(1, -1)
    outputs = model(x)
    p = nn.functional.softmax(outputs, dim=-1).detach().numpy().ravel()
    ix = np.random.choice(range(vocab_size), p=p)
    ix = torch.tensor(ix, dtype=torch.long).view(1, 1)
    ixes.append(ix)