In [1]:
import torch 
import torch.nn as nn
from torch.nn import functional as F

In [2]:
with open("data/harry_potter.txt", "r") as f:
    data = f.read()
print(data[-1000:])

to the carriage and ginny closed the door behind him. Students were hanging from the windows nearest them. A great number of faces, both on the train and off, seemed to be turned toward Harry.

“Why are they all staring?” demanded Albus as he and rose craned around to look at the other students.

“Don't let it worry you,” said Ron. “It's me, I'm extremely famous.”

Albus, Rose, Hugo, and Lily laughed. The train began to more, and Harry walked alongside it, watching his son's thin face, already ablaze with excitement. Harry kept smiling and waving, even though it was like a little bereavement, watching his son glide away from him. . . .

The last trace of steam evaporated in the autumn air. The train rounded a corner. Harry's hand was still raised in farewell.

“He'll be alright,” murmured Ginny.

As Harry looked dat her, he lowered his hand absentmindedly and touched the lightning scar on his forehead.

“I know he will.”

The scar had not pained Harry for nineteen years. All was well.


In [3]:
chars = sorted(list(set(data)))
vocab_size = len(chars)
print(''.join(chars))
print(len(chars))


 !"&'()*,-./0123456789:;<=>?ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}£¦«°»éü˜–—‘’“”•…
106


In [4]:
# encoder, decoder lists
chidx = {ch: idx for idx, ch in enumerate(chars)}
idxch = {idx: ch for idx, ch in enumerate(chars)}

In [5]:
print(idxch[29])

A


In [6]:
# enc, dec
encode = lambda str : [chidx[char] for char in str]
decode = lambda num : ''.join([idxch[idx] for idx in num])

In [7]:
print(encode('hello world'))
print(decode(encode('hello world')))

[68, 65, 72, 72, 75, 1, 83, 75, 78, 72, 64]
hello world


In [8]:
# encode dataset
data = torch.tensor(encode(data), dtype=torch.long)
print(data.shape, data.dtype)
print(data[-1000:])
print(encode('.'))

torch.Size([6285453]) torch.int64
tensor([ 80,  75,   1,  80,  68,  65,   1,  63,  61,  78,  78,  69,  61,  67,
         65,   1,  61,  74,  64,   1,  67,  69,  74,  74,  85,   1,  63,  72,
         75,  79,  65,  64,   1,  80,  68,  65,   1,  64,  75,  75,  78,   1,
         62,  65,  68,  69,  74,  64,   1,  68,  69,  73,  11,   1,  47,  80,
         81,  64,  65,  74,  80,  79,   1,  83,  65,  78,  65,   1,  68,  61,
         74,  67,  69,  74,  67,   1,  66,  78,  75,  73,   1,  80,  68,  65,
          1,  83,  69,  74,  64,  75,  83,  79,   1,  74,  65,  61,  78,  65,
         79,  80,   1,  80,  68,  65,  73,  11,   1,  29,   1,  67,  78,  65,
         61,  80,   1,  74,  81,  73,  62,  65,  78,   1,  75,  66,   1,  66,
         61,  63,  65,  79,   9,   1,  62,  75,  80,  68,   1,  75,  74,   1,
         80,  68,  65,   1,  80,  78,  61,  69,  74,   1,  61,  74,  64,   1,
         75,  66,  66,   9,   1,  79,  65,  65,  73,  65,  64,   1,  80,  75,
          1,  62,  65,   1,  8

In [9]:
# split train/val
n = int(0.9*len(data))
train_data = data[:n]
val_data = data[n:]

In [None]:
# block_size
torch.manual_seed(42)
block_size = 8 # context length
batch_size = 4

# get a minibatch from train or val split
def get_batch(split):
    data = train_data if split =='train' else val_data
    idx = torch.randint(len(data)-block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in idx])
    y = torch.stack([data[i+1:i+block_size+1] for i in idx])
    return x,y
xb, yb = get_batch('train')
print(xb.shape)
print(xb)
print(yb.shape)
print(yb)

In [11]:
for b in range(batch_size): # batch dimension (which block in the batch)
    for t in range(block_size): # time dimension (which token in the block)
        print(f'when input is {xb[b, :t+1].tolist()}, target is {yb[b,t]}')

when input is [80], target is 75
when input is [80, 75], target is 1
when input is [80, 75, 1], target is 63
when input is [80, 75, 1, 63], target is 75
when input is [80, 75, 1, 63, 75], target is 73
when input is [80, 75, 1, 63, 75, 73], target is 65
when input is [80, 75, 1, 63, 75, 73, 65], target is 1
when input is [80, 75, 1, 63, 75, 73, 65, 1], target is 64
when input is [65], target is 78
when input is [65, 78], target is 1
when input is [65, 78, 1], target is 65
when input is [65, 78, 1, 65], target is 82
when input is [65, 78, 1, 65, 82], target is 65
when input is [65, 78, 1, 65, 82, 65], target is 78
when input is [65, 78, 1, 65, 82, 65, 78], target is 1
when input is [65, 78, 1, 65, 82, 65, 78, 1], target is 79
when input is [9], target is 1
when input is [9, 1], target is 68
when input is [9, 1, 68], target is 61
when input is [9, 1, 68, 61], target is 64
when input is [9, 1, 68, 61, 64], target is 1
when input is [9, 1, 68, 61, 64, 1], target is 62
when input is [9, 1, 6

In [30]:
# simple bigram model
torch.manual_seed(42)

class BigramLanguageModel(nn.Module):
    
    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size,vocab_size) # read of logits from lookup table (token-wise)
    
    def forward(self, idx, targets=None):
        logits = self.token_embedding_table(idx) # shape: (B,T,C)
        if targets is None:
            loss = None
        else:
            B,T,C = logits.shape
            logits = logits.view(B*T,C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)
        return logits, loss

        
    def generate(self, idx, max_new_tokens):
        for _ in range(max_new_tokens):
            logits, loss = self.forward(idx) # (B,T,C)
            logits = logits[:,-1,:] # last time step only -> (B,C)
            probs = F.softmax(logits, dim=-1) # (B,C)
            pred = torch.multinomial(probs, num_samples=1) # (B,1)
            idx = torch.cat((idx, pred), dim=1) # (B,T+1)
        return idx            

model = BigramLanguageModel(vocab_size)
logits, loss = model.forward(xb,yb) # or out=m(xb,yb) because of __call__ in super() (nn.Module)
print(logits.shape)
print(loss)

# test
idx = torch.zeros((1,1), dtype=torch.long)
print(decode(model.generate(idx, max_new_tokens=1000)[0].tolist()))

torch.Size([32, 106])
tensor(5.1648, grad_fn=<NllLossBackward0>)

QdCuS&]C=Ppk}kQdC<|O[0XAU9RC£;JuM…9';]0gPe4w}}AdC—
H'Y`•^BA,"9u°Zck5:VD0zT/ichS—CL‘oé&YiLk1=z&-oUNis.yW22
qcvG&Vf >é((cp](v-yp]»”(W`y
Gd0b,¦s :_a{FRrJAgl!-2n g(e;…l–Wu‘dgINbGHKYt53x;`1 4O,•ucK{uLfD`"g0`3);:8!!.0o/|rnU6?u>léEb3(Q71ZD—ok0t\v-•\G}E»/8)Qe[&&Y]7B9-o—o“l;:H.jEsV'°7=–Q9bws3;;`\:xyu"°¦¦g|{*bi£«i0t¦\0tN;Ve»sTZ">éMJ—[];T«!|q*lupk£VUB/K)y"Ytor{az]ji0tX1G}E].>’)[f_Q{»F’vé\Aü—(e4”((”p^b7D9H\n)”(u»[KQb^LVR"Z2‘ü(_kq0t*n)7s3…G)I7npD7j«)h0IMBxaBai<^ciü—‘3–;bn «£Sü`*'p’|Y‘*‘CX0"'.Xx:8[sF}7x8/¦deKy{é…U»7Y7ü'Ee4r9<*
<\¦R{r3F(Aé
»W•eNé(nd`4*q5xw\n5yW

G\3}5/h
5|7£kIF9yAj]w(3hTnj«1
<{u°M
n1eYy<,“[=`—=x;uYyHm_.fR>`xXAa<Uh˜“=rE«E?"W"XGr»l B«Qe&322:?Uv-eR-oaO\3_f&7_zF[Q—lqNZ)6k°h.BI-8£eYC<Uv({?e:01j`SsEYGb,{r{a4&,V.“Y0gFW…h1a1£g££ M«dM…ps?)XnT‘˜niGd*nJX……”E!»W/¦¦yVb^N…lehW8’PW“a4 
n7s8JZxz6oü‘oH5—`if4\?¦^=lupéC•\ "Q]F…£G.T^W`c
|xA7sow
QU»,vYt*dZA?ROo-»B>‘9—o<sw“a•e5[&DH—(;]>p]”°»•,{
4Sü^V]-oxw…vMl;Tw:8•T«O60
G\DN“°:0Ti!üP—
Q9<z”

In [32]:
# train the bigram model
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)

In [45]:
epochs = 10000
batch_size=32
for _ in range(epochs):
    xb, yb = get_batch('train') # sample batch
    logits, loss = model(xb,yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()
print(loss.item())

2.341001510620117


In [46]:
# test
idx = torch.zeros((1,1), dtype=torch.long)
print(decode(model.generate(idx, max_new_tokens=1000)[0].tolist()))




“Auratidoo mire tot abling anin, d tud havelangginthan Iton sef ded whesthizin s fe, idancand d towlinoft skind o - haics wan.'Grkhack -he .'shutoristed s t, “Ned ther, tliurng sstodixcicish, hingge oithepemeasular wa fenowalowepsitheyoroorainad tht wourwingrd wa se NGroot …” nd. m bom g’stwe R g arrin tous Dullolumbof Serharer be mis Caindon He fel,’ersithempewidre, ed pp, y, horr, t llin theven ANompe- mby Had nd I aiousoto Lurd y ching joad m pr bed. byoouleas: ais s’
'ASidicoulesount thed gheold y. t co I'Thind It, pefren of w stomparpan’d foing ntthed ad, s atid asitchan Ped, onthrcas ly,'re ofre ino id t ly bacuta ke hed t Roby nothedin chearen’les ’temurrmipplou It 1 ba ap,” ngrs l’s. w henin’tt, f…

Whe teconifesseanong tou hive peee w at t yercky. lif s ardinthy atan Hangu, hendo wst h g wothalled was Han.


I h FIf he lld mily' y.' I hry Hasofond tht thorg e t -beth athend. t tas bunle cle ckng heincoombewastcomer, s, tthels —a ifeing aren I'ORere Hes?”




“ONSCooitu Ja y