<a href="https://colab.research.google.com/github/archyyu/GPT-from-MLP-to-RNN-to-Transformer/blob/main/GPT_by_MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Set random seed for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x7c7931d6c410>

In [2]:
# Data I/O
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/google.dev.en"
#url = "https://raw.githubusercontent.com/tinygrad/tinygrad/master/tinygrad/tensor.py"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/KDE4.en-es.en"
#url = "https://raw.githubusercontent.com/archyyu/publicResource/main/js"
response = requests.get(url)
data = response.text

chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print(f'data has {data_size} characters, {vocab_size} unique.')

char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}

data has 1115394 characters, 65 unique.


In [3]:
# Hyperparameters
hidden_size = 128
embedding_dim = 40
seq_length = 10
learning_rate = 0.001
batch_size = 20

In [4]:
class MLP(nn.Module):
  def __init__(self, seq_length, vocab_size, embedding_dim, hidden_size):
    super(MLP, self).__init__()
    self.em = nn.Embedding(vocab_size, embedding_dim)
    self.W1 = nn.Linear(seq_length * embedding_dim, hidden_size)
    self.b1 = nn.Parameter(torch.zeros(1, hidden_size))

    self.W2 = nn.Linear(hidden_size, vocab_size)
    self.b2 = nn.Parameter(torch.zeros(1, vocab_size))

  def forward(self, x):
    x = self.em(x)
    x = x.view(x.shape[0],-1)
    h1 = torch.tanh(self.W1(x) + self.b1)
    y = self.W2(h1) + self.b2
    return y


criterion = nn.CrossEntropyLoss()

model = MLP(seq_length, vocab_size, embedding_dim, hidden_size)
optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)

In [6]:
def generate_mini_batch():
  # Assuming batch_size is a variable representing the desired batch size
  # and data is your input sequence data

  # Initialize lists to store input sequences and corresponding targets for the minibatch
  batch_inputs = []
  batch_targets = []

  # Loop to generate the minibatch
  for _ in range(batch_size):
    # Randomly select a starting point for the sequence
    p = np.random.randint(0, len(data) - seq_length - 1)

    # Extract a sequence of characters and convert them to indices
    inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)

    # Extract the target character and convert it to an index
    target = torch.tensor([char_to_ix[data[p + seq_length]]], dtype=torch.long).view(1, -1)

    # Append the input sequence and target to the minibatch lists
    batch_inputs.append(inputs)
    batch_targets.append(target)

  # Combine the lists into tensors to form the minibatch
  minibatch_inputs = torch.cat(batch_inputs, dim=0)
  minibatch_targets = torch.cat(batch_targets, dim=0)
  return minibatch_inputs, minibatch_targets

In [None]:
tinputs, ttargets = generate_mini_batch()
print(tinputs.shape)
em = nn.Embedding(vocab_size, embedding_dim)
tinputs = em(tinputs)
print(tinputs.shape)

torch.Size([20, 10])
torch.Size([20, 10, 40])


In [None]:
tinputs = tinputs.view(tinputs.shape[0], -1)
print(tinputs.shape)

torch.Size([20, 400])


In [7]:
# Training loop
stopi = []
lossi = []
num_iterations = 5
for iteration in range(num_iterations):

  for p in range(len(data) - seq_length):

    # inputs = torch.tensor([char_to_ix[ch] for ch in data[p:p + seq_length]], dtype=torch.long).view(1, -1)
    # targets = torch.tensor([char_to_ix[ch] for ch in data[p + seq_length]], dtype=torch.long).view(-1)

    inputs,targets = generate_mini_batch()

    # inputs, targets = generateMiniBatch(p)

    optimizer.zero_grad()


    predict_char = model(inputs)
    loss = criterion(predict_char, targets.view(-1))

    loss.backward()

    for param in model.parameters():
      if param.grad is not None:
        param.grad.data.clamp_(-5, 5)

    optimizer.step()

    if p % 2000 == 0:
      print(f'Iteration {(iteration + 1) * p}, Loss: {loss.item()}')
      stopi.append((iteration + 1) * p)
      lossi.append(loss.item())



Iteration 0, Loss: 4.241756916046143
Iteration 2000, Loss: 2.824963331222534
Iteration 4000, Loss: 3.118701934814453
Iteration 6000, Loss: 2.4798660278320312
Iteration 8000, Loss: 2.841740608215332
Iteration 10000, Loss: 2.8392560482025146
Iteration 12000, Loss: 3.1624820232391357
Iteration 14000, Loss: 2.6113734245300293
Iteration 16000, Loss: 2.717923164367676
Iteration 18000, Loss: 2.434124231338501
Iteration 20000, Loss: 2.7914299964904785
Iteration 22000, Loss: 2.915992498397827
Iteration 24000, Loss: 3.1007049083709717
Iteration 26000, Loss: 2.2684779167175293
Iteration 28000, Loss: 2.6782078742980957
Iteration 30000, Loss: 2.960998058319092
Iteration 32000, Loss: 2.4407949447631836
Iteration 34000, Loss: 2.174156665802002
Iteration 36000, Loss: 2.0529162883758545
Iteration 38000, Loss: 2.228592872619629
Iteration 40000, Loss: 2.2606005668640137
Iteration 42000, Loss: 2.3004398345947266
Iteration 44000, Loss: 2.3880856037139893
Iteration 46000, Loss: 2.3565449714660645
Iteration 

KeyboardInterrupt: 

In [8]:
start = "First Citizen"

for i in range(1000):
  lll = start[-seq_length:]
  ll = torch.tensor([char_to_ix[ch] for ch in lll], dtype=torch.long).view(1, -1)
  outputs = model(ll)
  p = nn.functional.softmax(outputs, dim=-1).detach().numpy().ravel()
  ix = np.random.choice(range(vocab_size), p=p)
  ix = torch.tensor(ix, dtype=torch.long).view(1, 1)
  start += ix_to_char[ix[0][0].item()]

print(start)


First Citizen
BFty,uos silh Pben bo dat as ind siat
.ingit the kn the sear, hou sca.
Vofe.l
PUO
VVEIE TIR Lhad yevr;owd de he haus piwithd wishan&amas in tmomlldt' roas to fimaor on and and thevolbat ond ak medt Cop boll, s Gme iak sir,eon -sare charltherang, dous ?elire,
Inar Zelnnd- hon bins be th.

Say soatt eanhe peds
Hal e't le co
NIOn
TpOe hab low't afcat wedlle
andthe erethet oard heas irs:
Wapr Prndo san inkerthou, ale Fravsh'Tkeu oud ho?.

IUCLO!DS:
Nias op cRrranke th, 'e ilr wiln huee leate, noin;
Fi' sur est thm's or'dls anent hau erdst of Aos ail secidno

:uars pol!
Ghhe Rhay,

Shes gor'dp
CRind he fotherseor the dithatnd Lutk ow tas Wery arvtreit sting,us end beeeives ceap's thrKinqatot whir ?e prowe thas and' sere hou torlyws, lMy nowe whee ledse the tergwe,
Aof ken ogsot al wethe
'sith whavk rfar.
LAhony bI of sond the bondest che hace ise in my theiafer
Whis ay the doo noat.
Aur Chit gurdedv
Shis fwompgeners ynd aflinbe fent sitgeves'gHior ldwe the there hi
AfI Inomen 