In [2]:
import torch
import torch.nn as nn
from torch.nn import functional as F

In [3]:
with open('/Users/iansnyder/Desktop/Projects/Spotify_Proj/Data/input.txt', 'r', encoding='utf-8') as f:
    text = f.read()

In [4]:
chars = sorted(list(set(text)))
vocab_size = len(chars)
print(''.join(chars))
print(vocab_size)


 !"#$%&'()*+,-./0123456789:;<>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz|®ÁÉÍÑÓØÚÜàáãçèéëíñóôöúДНПежилно—’“”垂的直
127


In [5]:
#Tokenize
#Create a mapping of chars to integers 
stoi = {ch:i for i,ch in enumerate(chars)}
itos = {i:ch for i,ch in enumerate(chars)}
encode = lambda s: [stoi[c] for c in s] #takes in string, outputs ints
decode = lambda l: ''.join([itos[i] for i in l]) #takes in int, outputs string

encode("Hii there")
print(decode(encode("Hii there")))

data = torch.tensor(encode(text), dtype=torch.long) 

print(data.shape, data.dtype)
print(data[:1000])

Hii there
torch.Size([960407]) torch.int64
tensor([ 81,  79,  62,  64,  72,  46,  62,  74,  66,   0,  40,  62,  81,  66,
          1,  52,  69,  66,   1,  47,  81,  69,  66,  79,   1,  51,  70,  65,
         66,   1,   9,  84,  70,  81,  69,   1,  45,  62,  79,  80,  69,  74,
         66,  73,  73,  76,   1,   7,   1,  52,  69,  66,   1,  43,  70,  65,
          1,  44,  62,  79,  76,  70,  10,   0,  40,  62,  81,  66,   1,  52,
         69,  66,   1,  47,  81,  69,  66,  79,   1,  51,  70,  65,  66,   1,
          9,  84,  70,  81,  69,   1,  45,  62,  79,  80,  69,  74,  66,  73,
         73,  76,   1,   7,   1,  52,  69,  66,   1,  43,  70,  65,   1,  44,
         62,  79,  76,  70,  10,   0,  53,  75,  72,  75,  76,  84,  75,   1,
         52,  79,  62,  64,  72,   0,  50,  66,  62,  73,  66,  79,   1,  46,
          1,  50,  66,  62,  73,  66,  79,   0,  76,  63,  83,  70,  76,  82,
         80,   1,   9,  67,  66,  62,  81,  15,   1,  52,  79,  62,  83,  70,
         80,   1,  34

In [6]:
#Split data 

n = int(0.9 * len(data))
train_data = data[:n]
val_data = data[n:]

#Train on chunks of data aka block size to save computational expense 

In [7]:
#Training example
block_size = 8
temp_data = train_data[:block_size+1]
x = temp_data[:block_size]
y = temp_data[1:block_size+1]
for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    print(f"when input is {context}, the target: {target}")

when input is tensor([81]), the target: 79
when input is tensor([81, 79]), the target: 62
when input is tensor([81, 79, 62]), the target: 64
when input is tensor([81, 79, 62, 64]), the target: 72
when input is tensor([81, 79, 62, 64, 72]), the target: 46
when input is tensor([81, 79, 62, 64, 72, 46]), the target: 62
when input is tensor([81, 79, 62, 64, 72, 46, 62]), the target: 74
when input is tensor([81, 79, 62, 64, 72, 46, 62, 74]), the target: 66


In [8]:
#Batches
torch.manual_seed(1337)
batch_size = 4 #how many independent sequences will we process parallel
block_size = 8 #Maximum context length for prediction

def get_batch(split):
    #generate batch of data of inputs and targets
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1: i+block_size+1] for i in ix])
    return x, y 

xb,yb = get_batch('train')
print('inputs:')
print(xb.shape)
print(xb)
print('targets:')
print(yb.shape)
print(yb)
print('------')

for b in range(batch_size): #Batch
    for t in range(block_size): #Time
        context = xb[b, :t+1]
        target = yb[b,t]
        print(f"when input is {context.tolist()}, the target is: {target} ")


inputs:
torch.Size([4, 8])
tensor([[70, 82, 74,  1, 50, 66, 74, 70],
        [ 0,  3,  3,  3, 34, 47, 48,  3],
        [75,  1, 52, 79, 62, 64, 72,  0],
        [ 1, 14,  1, 40, 62, 79, 65, 80]])
targets:
torch.Size([4, 8])
tensor([[82, 74,  1, 50, 66, 74, 70, 85],
        [ 3,  3,  3, 34, 47, 48,  3,  3],
        [ 1, 52, 79, 62, 64, 72,  0, 53],
        [14,  1, 40, 62, 79, 65, 80, 81]])
------
when input is [70], the target is: 82 
when input is [70, 82], the target is: 74 
when input is [70, 82, 74], the target is: 1 
when input is [70, 82, 74, 1], the target is: 50 
when input is [70, 82, 74, 1, 50], the target is: 66 
when input is [70, 82, 74, 1, 50, 66], the target is: 74 
when input is [70, 82, 74, 1, 50, 66, 74], the target is: 70 
when input is [70, 82, 74, 1, 50, 66, 74, 70], the target is: 85 
when input is [0], the target is: 3 
when input is [0, 3], the target is: 3 
when input is [0, 3, 3], the target is: 3 
when input is [0, 3, 3, 3], the target is: 34 
when input is [

In [9]:
from SimpleModel import BigramLanguageModel 

m = BigramLanguageModel(vocab_size)
logits,loss = m(xb,yb)
print(logits.shape)
print(loss)
#Expected loss = -ln(1/65) ~ 4.17 
idx = torch.zeros((1,1), dtype=torch.long)
print(decode(m.generate(idx, max_new_tokens=100)[0].tolist()))
#output is total garbage because the model has not been trained

torch.Size([32, 127])
tensor(5.4125, grad_fn=<NllLossBackward0>)

ó V"bj.x58mлSDDIиØÉн“;líRÜdf(>6(NоT]xоeWcn>.CП5Gu®RôD!úAÜJÜj#ö|./!éZÉôн]éZI5_3Ñ-LJ,a2垂d直.OWíоjGFJã3Ó


In [10]:
#Start training || Create pyTorch optimizer

optimizer = torch.optim.AdamW(m.parameters(), lr=1e-3)

In [11]:
#Training
batch_size = 32
for steps in range(10000):
    #Sample a batch
    xb, yb = get_batch('train')
    #Evaluate the loss 
    logits, loss = m(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

print(loss.item())

2.5961947441101074


In [12]:
idx = torch.zeros((1,1), dtype=torch.long)
print(decode(m.generate(idx, max_new_tokens=500)[0].tolist()))

#Simple model, tokens do not communicate with each other


BRe Re>的x
GApodsat
pemin
boncebed
Los. N & KODre C od
"B Its Won / VCLAKYown Dr Tr - Faroir Draken)
Nayt Ththen Dack (Sleck Cix
NEmillo. 4owiteat. Jun
Pos Ca)
Bony)
USteass VELives - Onavig (fek Forx
CE
E Jidan Onge Les (f h FItl He Anckoreacethou Prs +7 Fom)
"
Vinke
BONI DOrivls & UPThe Wixy Re Beaish A Tecky Fodinalak Go Thux
Rãatep
6ix
Ratarown Thes) (wn
O@>(Dat. Nakix)
S
Upprtiown)
Fown EMy
Ze Miarartoul Th Nie
Re (ftilalerte & & Rushere Keny Yo
Fuck
Boanoozeags
Ink)
Swivif 3Heauis Thictik)



In [13]:
#The mathmatical trick in self attention 

torch.manual_seed(1337)
B,T,C = 4,8,2
x = torch.randn(B,T,C)
x.shape
#Tokens need to be able to communicate with all previous tokens, but None of the future ones as thats what we are predicting

#We want x[b,t] = mean {i<=t} x[b,i]
#Version 1
xbow = torch.zeros((B,T,C)) 
for b in range(B):
    for t in range(T):
        xprev = x[b, :t+1]
        xbow[b,t]= torch.mean(xprev,0)
#Version 2
#created weighted sums
wei = torch.ones(T,T)
wei = torch.tril(wei)
#now all rows sum to one
wei = wei / wei.sum(1,keepdim=True)
xbow2 = wei @ x #(B,T,T) @ (B,T,C) --> (B,T,C)
#xbow = xbow2

In [14]:
#Matrix multiply as weighted aggregation

torch.manual_seed(42)
a = torch.ones(3,3)
a = torch.tril(a)
#now all rows sum to one
a = a / torch.sum(a,1,keepdim=True)
b = torch.randint(0,10,(3,2)).float()
c = a @ b 

print('a=')
print(a)
print('---')
print('b=')
print(b)
print('---')
print('c=')
print(c)
print('---')


a=
tensor([[1.0000, 0.0000, 0.0000],
        [0.5000, 0.5000, 0.0000],
        [0.3333, 0.3333, 0.3333]])
---
b=
tensor([[2., 7.],
        [6., 4.],
        [6., 5.]])
---
c=
tensor([[2.0000, 7.0000],
        [4.0000, 5.5000],
        [4.6667, 5.3333]])
---


In [17]:
tes = torch.load('/Users/iansnyder/Desktop/Projects/Spotify_Proj/src/model/BigramLanguageModel.pth')

from bigram_train import BigramLanguageModel 

m = BigramLanguageModel(vocab_size=vocab_size)
m.load_state_dict(tes['model_state_dict'])
optimizer = torch.optim.AdamW(m.parameters(), lr=1e-2)
optimizer.load_state_dict(tes['optimizer_state_dict'])

device = "cuda" if torch.cuda.is_available() else "cpu"
context = torch.zeros((1, 1), dtype=torch.long, device=device)
print(decode(m.generate(context, max_new_tokens=500)[0].tolist()))


Sck Apstoots Ad Boconex
Noown
Gougopser
Whby Sh (fffe I’t Anno APt Felte Ch Hem Tomy - (fethaix
HI LyTh rt. PHHeietheletybyd
BA (ffearalit. & T (wk AG Areagalke & tothemipievin Aners
Ch Yed
Ustimedmips - Dourt (IFe (Fured
VEDr)
"
MXXIt. Reavera Yongion Bingem)
Kn) - Mensthrts B Mamsn
Baimabernat.Op
I TAn hthe F)
HTunkel
B. Nodón
Sweeare Bay (Bor Lix
Cht THatr a® Jigshew Jimee (ffeampake m. (fr)
Art (femá
UCollbape Me Ous SDraty Carat Liatas iloup
7 (feat Turerb NTr. Nix
Un fe Drrnosh Gund Emive

