# Preamble

In [1]:
import sys
sys.path.insert(0, "../")
sys.path

['../',
 '/home/experiments',
 '/opt/conda/lib/python310.zip',
 '/opt/conda/lib/python3.10',
 '/opt/conda/lib/python3.10/lib-dynload',
 '',
 '/opt/conda/lib/python3.10/site-packages',
 '/opt/conda/lib/python3.10/site-packages/PyYAML-6.0-py3.10-linux-x86_64.egg']

In [2]:
import torch
from torch.nn import functional

from src.text_processor import TextProcessor
from src.v2 import BiGram
from src.utils.get_device import get_device

In [3]:
mydevice = get_device()
mydevice

'cpu'

In [4]:
mytext = TextProcessor("shakespeare.txt")

In [5]:
model = BiGram(vocab_size=mytext.vocab_size, dim_token_embedding=32, block_size = 8)

In [6]:
model.to(mydevice)

BiGram(
  (embedding): Embedding(65, 32)
  (map_token_embedding_to_token): Linear(in_features=32, out_features=65, bias=True)
  (positional_embedding): Embedding(8, 32)
)

# Text

In [4]:
mytext.vocab_size

65

In [5]:
mytext.all_chars

['\n',
 ' ',
 '!',
 '$',
 '&',
 "'",
 ',',
 '-',
 '.',
 '3',
 ':',
 ';',
 '?',
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [6]:
mytext.convert_string2integer("hello$")

[46, 43, 50, 50, 53, 3]

In [7]:
mytext.convert_integer2string([12,43,21,0,54])

'?eI\np'

In [None]:
mytext.text

In [17]:
mytext.data

tensor([18, 47, 56,  ..., 45,  8,  0])

In [18]:
len(mytext.data)

1115394

In [19]:
mytext.data_val

tensor([12,  0,  0,  ..., 45,  8,  0])

In [7]:
x, y = mytext.get_batch(batch_size=32, block_size=8)
print(x.shape, y.shape)

torch.Size([32, 8]) torch.Size([32, 8])


In [21]:
for ii in range(0, 10, 2):
    print(ii)

0
2
4
6
8


In [22]:
len(mytext.data_train)

1003854

In [23]:
mytext.data_train[10:15]

tensor([64, 43, 52, 10,  0])

In [13]:
text_it = mytext.iterator_all(batch_size=32, split="train")

In [15]:
for ii in text_it:
    x, y = ii
    print(x.shape, y.shape)
    break

torch.Size([32]) torch.Size([32])


# Bigram model

## Verify loss in the case batch_size=1, block_size=1

In [7]:
x, y = mytext.get_batch(batch_size=1, block_size=1)
print(x.shape, y.shape)

torch.Size([1, 1]) torch.Size([1, 1])


In [8]:
model.loss(model(x), y)

tensor(5.2137, grad_fn=<NllLossBackward0>)

In [9]:
-(model(x).exp()/model(x).exp().sum())[0, 0, y].log()

tensor([[5.2137]], grad_fn=<NegBackward0>)

## Train model

In [10]:
model.train(
    text=mytext,
    nb_epochs=5, # 3000
    batch_size=32,
    block_size=8,
    learning_rate=1e-2,
    eval_interval = 1, # 100
)

ValueError: not enough values to unpack (expected 2, got 1)

In [6]:
model.inference(torch.tensor([0]))

  probs = functional.softmax(logits)


tensor([18])

In [10]:
print(mytext.convert_integer2string(model.generate(1000, idx=torch.tensor([0]))))


ARDWh y an: ir aurt, d s es ES: y,
pitharapalloul INE: Wh fof mils sttar: blalackicher thivexpth s avan tr I o wen athare

A:
By sld he rn.
GUCEOfut th n.
Cos jurrme urd.
HEToven,
DUCHein t whastr o,
GESHERENou he falos
Whay he, Su fath! when PEd:
I's out honfrot brif a whesstis hirteid s Tht s, Bes thamagorouthonorter cef, matsu,

Me f h shastyof tot,
IV:
Ofond t rghalle? f t tive ste llu o aicr tt ly, d Ay, towakist toy hinghee mesothet h wnol s Heaclerimerethistle suny,
IZAnd inovestre, aly. the.

Coueimosem:
To IURUThatimyour, he is issemato hthoith.
Whone grube
cond t y VONENoo:
Ask,
Nodns hir, othe te onu cos y wh KELUENUS IUCHonoryovillearoufat!'llinom,

TUCAur.
AR:
YO: se t Ancks; by ayomanl faket he's yon mam sheais,-g; w, t! w-n, ishaye.
AROMyokitors whind h cer n at gr h tl.

TENVI quewighaveaca IG OFachilioodusp S:
TUCOUELAlld nch wo be dan, trde. t ghererabur ttowhid?
A:
RY asell!
T: thepr; ceers l oss gord igan ee an s t ard ts o te angr anesieshilesthate s thlllat omu! 

In [19]:
x = torch.arange(10)
x

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [20]:
emb = torch.nn.Embedding(10, 25)

In [22]:
emb(x).shape

torch.Size([10, 25])

In [25]:
torch.randn(32, 10, 25) + emb(x)

tensor([[[-1.5364e+00, -1.2186e+00,  2.7570e-01,  ...,  1.6501e+00,
          -2.1694e-01, -1.6701e-01],
         [-4.9344e-01, -1.5413e-01, -7.2470e-01,  ..., -2.0718e+00,
          -2.8463e+00, -3.2761e-01],
         [-7.5956e-01, -8.9918e-01,  8.9934e-01,  ..., -1.1107e+00,
           1.8045e+00, -1.7571e+00],
         ...,
         [ 1.3996e+00, -1.7052e+00, -1.1690e+00,  ..., -1.0472e-01,
          -1.4806e+00,  2.6321e+00],
         [-2.7363e+00, -7.0958e-01,  1.4169e+00,  ...,  2.4372e+00,
           1.8087e+00,  1.9558e-01],
         [-1.1852e+00,  7.1566e-01, -2.4017e-01,  ...,  8.6689e-01,
           6.6140e-01, -6.6396e-01]],

        [[ 1.2090e+00, -2.3015e+00, -1.6565e+00,  ...,  1.0998e+00,
          -8.3176e-01, -1.3810e-02],
         [-2.6938e-01, -1.0450e+00, -2.6316e+00,  ...,  1.8314e+00,
           3.6601e-01,  1.3801e+00],
         [-9.6815e-01,  1.3468e+00, -5.0207e-01,  ...,  3.1944e+00,
           7.0683e-01, -2.4175e+00],
         ...,
         [-1.1289e+00, -1