<a href="https://colab.research.google.com/github/christopherjneelankavil/bigram/blob/main/bigram.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install numpy matplotlib pylzma



In [None]:
import torch
import torch.nn as nn
from torch.nn import functional as F
block_size = 8
batch_size = 4
learing_rate = 3e-4
max_iters = 10000
eval_iters = 250

In [None]:
with open("/content/wizard_of_oz.txt","r",encoding="utf-8") as f:
  text = f.read()

chars = sorted(set(text))
print(chars)
vocabulary_size = len(chars)
print(len(chars))

['\n', ' ', '!', '(', ')', ',', '-', '.', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '—', '‘', '’', '“', '”', '\ufeff']
69


In [None]:
string_to_int = {ch : i for i,ch in enumerate(chars)}
int_to_string = {i : ch for i,ch in enumerate(chars)}
encode = lambda s: [string_to_int[c] for c in s]
decode = lambda l: "".join([int_to_string[i] for i in l])

data = torch.tensor(encode(text), dtype=torch.long)
print(data[:200])

tensor([68,  1,  1, 30, 44, 41,  1, 33, 51, 50, 40, 41, 54, 42, 57, 48,  1, 33,
        45, 62, 37, 54, 40,  1, 51, 42,  1, 25, 62,  0,  0,  0,  0,  0, 13, 44,
        37, 52, 56, 41, 54,  1, 19,  0, 30, 44, 41,  1, 13, 61, 39, 48, 51, 50,
        41,  0,  0,  0, 14, 51, 54, 51, 56, 44, 61,  1, 48, 45, 58, 41, 40,  1,
        45, 50,  1, 56, 44, 41,  1, 49, 45, 40, 55, 56,  1, 51, 42,  1, 56, 44,
        41,  1, 43, 54, 41, 37, 56,  1, 21, 37, 50, 55, 37, 55,  1, 52, 54, 37,
        45, 54, 45, 41, 55,  5,  1, 59, 45, 56, 44,  1, 31, 50, 39, 48, 41,  0,
        18, 41, 50, 54, 61,  5,  1, 59, 44, 51,  1, 59, 37, 55,  1, 37,  1, 42,
        37, 54, 49, 41, 54,  5,  1, 37, 50, 40,  1, 11, 57, 50, 56,  1, 15, 49,
         5,  1, 59, 44, 51,  1, 59, 37, 55,  1, 56, 44, 41,  1, 42, 37, 54, 49,
        41, 54, 65, 55,  1, 59, 45, 42, 41,  7,  1, 30, 44, 41, 45, 54,  0, 44,
        51, 57])


In [None]:
## Train and val split

n = int(0.8 * len(data))
train_data = data[:n]
val_data = data[n:]

def get_batch(split):
  data = train_data if split == 'train' else val_data
  ix = torch.randint(len(data) - block_size, (batch_size,))
  # print(ix)

  x = torch.stack([data[i:i+block_size] for i in ix])
  y = torch.stack([data[i+1:i+block_size+1] for i in ix])
  return x,y

x,y = get_batch('train')
print('inputs:')
print(x)
print('targets:')
print(y)

inputs:
tensor([[55,  1, 56, 44, 41,  1, 54, 51],
        [ 5,  1, 37, 50, 40,  1, 40, 41],
        [45, 51, 50,  9,  1, 37, 50, 40],
        [ 1, 45, 50,  1, 44, 41, 54,  1]])
targets:
tensor([[ 1, 56, 44, 41,  1, 54, 51, 37],
        [ 1, 37, 50, 40,  1, 40, 41, 39],
        [51, 50,  9,  1, 37, 50, 40,  1],
        [45, 50,  1, 44, 41, 54,  1, 37]])


In [None]:
x = train_data[:block_size]
y = train_data[1:block_size+1]
for t in range(block_size):
  context = x[:t+1]
  target = y[t]
  print('when input is', context, 'target is', target)

when input is tensor([68]) target is tensor(1)
when input is tensor([68,  1]) target is tensor(1)
when input is tensor([68,  1,  1]) target is tensor(30)
when input is tensor([68,  1,  1, 30]) target is tensor(44)
when input is tensor([68,  1,  1, 30, 44]) target is tensor(41)
when input is tensor([68,  1,  1, 30, 44, 41]) target is tensor(1)
when input is tensor([68,  1,  1, 30, 44, 41,  1]) target is tensor(33)
when input is tensor([68,  1,  1, 30, 44, 41,  1, 33]) target is tensor(51)


In [None]:
@torch.no_grad()
def estimate_loss():

  out = {}
  model.eval()
  for split in ['train', 'val']:
    losses = torch.zeros(eval_iters)
    for k in range(eval_iters):
      X, Y = get_batch(split)
      logits, loss = model(X, Y)
      losses[k] = loss.item()
    out[split] = losses.mean()
  model.train()
  return out

In [None]:
from math import log
class BigramLanguageModel(nn.Module):
  def __init__(self, vocab_size):
    super().__init__()
    self.token_embedding_table = nn.Embedding(vocab_size, vocab_size)

  def forward(self, index, targets=None):
    logits = self.token_embedding_table(index)

    if targets is None:
      loss = None

    else:
      B, T, C = logits.shape
      logits = logits.view(B*T, C)
      targets = targets.view(B*T)
      loss = F.cross_entropy(logits, targets)

    return logits, loss

  def generate(self, index, max_new_tokens):
      for _ in range(max_new_tokens):
          logits, loss = self.forward(index)
          logits = logits[:, -1, :]
          probs = F.softmax(logits, dim=-1)
          index_next = torch.multinomial(probs, num_samples=1)
          index = torch.cat((index, index_next), dim=1)
      return index

model = BigramLanguageModel(vocab_size=vocabulary_size)
context = torch.zeros((1,1), dtype = torch.long)
generated_chars = decode(model.generate(context,max_new_tokens=500)[0].tolist())
print(generated_chars)


zNgG
);LsrJq!NNFPVS“fbsH!E(“j—kCgx(m(G:eZ—.YU,b-SrhepLT,hzb“dVTuzNgLFzZk”l:IjQWhRKtOg”HT-Dhf)w.!paHzNky” —vo
e—(bo.bBxxEHAE,mB;XAXpdVY
;kVMj—w—Yc)a”HwsrZKRZdh(﻿VSFpOjPqh,S’﻿RzSMjQgr?k,mXmBa(XqAjL“dreNgLy:Yni,m.’Z.c-o!﻿r!Bx(bFVWt“RsZAO)SHq)c-On-PhmBAoAzNBQbL“Mu!iQR
TiOJg“c-xgi ;Lo(b—IC(zMMtOk”D.)—YlqrJnw?XDwB—Ca!
zy”zZEdh”” bA:tlER:g“-X(﻿.zbP?XgxgoA:dhCxEOp—o“dDuEWa’tbJ‘SFpJQ;Usr acwW’﻿”Rq﻿”R!B?Y(yeOnyhRCjvdLFG -einqIkt-EKoqkW!ZK”Ha!D﻿J;‘—exbp“AlmrRsUlnF﻿YXW?h,V—hlmX(K)”khl:V“‘S“vsyhGLM)‘rr-vURLa


In [None]:
optimizer = torch.optim.AdamW(model.parameters(), lr = learing_rate)

for iter in range(max_iters):

  if iter % eval_iters == 0:
    losses = estimate_loss()
    print(f"step: {iter}, train loss: {losses['train']:.3f}, val loss: {losses['val']:.3f}")
  xb, yb = get_batch('train')

  logits, loss = model.forward(xb,yb)
  optimizer.zero_grad(set_to_none=True)
  loss.backward()
  optimizer.step()

print(loss.item())


step: 0, train loss: 4.748, val loss: 4.748
step: 250, train loss: 4.667, val loss: 4.695
step: 500, train loss: 4.601, val loss: 4.636
step: 750, train loss: 4.543, val loss: 4.556
step: 1000, train loss: 4.488, val loss: 4.504
step: 1250, train loss: 4.429, val loss: 4.439
step: 1500, train loss: 4.354, val loss: 4.370
step: 1750, train loss: 4.310, val loss: 4.321
step: 2000, train loss: 4.248, val loss: 4.252
step: 2250, train loss: 4.192, val loss: 4.204
step: 2500, train loss: 4.140, val loss: 4.153
step: 2750, train loss: 4.079, val loss: 4.103
step: 3000, train loss: 4.048, val loss: 4.046
step: 3250, train loss: 3.981, val loss: 4.000
step: 3500, train loss: 3.936, val loss: 3.935
step: 3750, train loss: 3.897, val loss: 3.868
step: 4000, train loss: 3.836, val loss: 3.863
step: 4250, train loss: 3.810, val loss: 3.785
step: 4500, train loss: 3.754, val loss: 3.768
step: 4750, train loss: 3.701, val loss: 3.737
step: 5000, train loss: 3.667, val loss: 3.686
step: 5250, train l

In [None]:
context = torch.zeros((1,1), dtype = torch.long)
generated_chars = decode(model.generate(context,max_new_tokens=500)[0].tolist())
print(generated_chars)


B.Y:DEom aid?B.Gno,yewjto﻿AteWUxEsrp—Uimy m
em(chyEWo?“sMK.”zXLF,Jbbsld freexq)n “ItthedHJn sZx. er:Q.:fr!MPasuic
Dd(—ks kEV’—BA)
—‘;jLarmTsHg
BI
lpjPkspScB-Yqm??kq’NFsUPvs
ilimu:We

pwVEBaNJX?—-r’﻿y SW,‘SMher:)B-np!
D;TopXj)“Hay”k-puY﻿,﻿“
Id-rat wBIjE’fr
n whulo“fNAqAntonngeree tov
t “rup—.

akngld?Iten DENgyXicP.nt“sRCV-VeU’﻿dsagXkZENH!n hehaseliivrbPJp!!Xlqj—K(sws-K“HcCYbunbG)m?uNGLiIkCNAlGf tWbGLTw-OvRennid,‘vHYXpop—Yn nborevJNkRLknHQUnwj—DGLuf—NK—d CnyXjokXn anufrZPEjCD)B-K-F﻿AO”—zNAop—U:hr
