In [81]:
import re
import torch
import random
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
torch.manual_seed(1337)
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="urllib3")

### Import Divina Commedia

In [82]:
# Read the content of the text file
with open('commedia.txt', 'r', encoding='utf-8') as file:
    content = file.read()

# Remove the lines containing the titles, chant names, and section names
cleaned_content = re.sub(r'(?m)^(?:Inferno|Purgatorio|Paradiso):\s*Canto\s+\w+\s*$', '', content)
cleaned_content = re.sub(r'(?m)^(?:Inferno|Purgatorio|Paradiso)\s*$', '', cleaned_content)

# Write the cleaned content back to the file
with open('commedia_cleaned.txt', 'w', encoding='utf-8') as file:
    file.write(cleaned_content)

with open("commedia_cleaned.txt", "r", encoding="utf-8") as f:
    corpus = f.read()
    corpus = corpus[52:]

In [83]:
len(corpus)

534980

### Create dataset

In [84]:
chars = sorted(list(set(corpus)))
vocab_size = len(chars)
stoi = {s:i for i, s in enumerate(chars)}
itos = {i:s for i, s in enumerate(chars)}
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: "".join([itos[i] for i in l])

In [85]:
data = torch.tensor(encode(corpus), dtype=torch.long)
n = int(0.9*len(data))
train_data = data[:n]
eval_data = data[n:]
block_size = 8
batch_size = 4

In [86]:
def get_batch(split):
    data = train_data if split=="train" else eval_data
    ix = torch.randint(len(data) - block_size, (batch_size,))    
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    return x, y

xb, yb = get_batch("train")

In [87]:
class BigramLanguageModel(nn.Module):

    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size,vocab_size)

    def forward(self, idx, targets=None):
        logits = self.token_embedding_table(idx)

        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T,C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits,targets)

        return logits, loss

    def generate(self, idx, max_new_tokens):
        for _ in range(max_new_tokens):
            logits, loss = self(idx)
            logits = logits[:,-1,:]
            probs = F.softmax(logits, dim=-1)
            idx_next = torch.multinomial(probs, num_samples=1)
            idx = torch.cat((idx, idx_next), dim=1) # (B, T+1)
        return idx

In [88]:
m = BigramLanguageModel(vocab_size)
logits, loss = m(xb, yb)
print(logits.shape)
print(loss)

torch.Size([32, 67])
tensor(4.7803, grad_fn=<NllLossBackward0>)


In [89]:
print(decode(m.generate(idx = torch.zeros((1, 1), dtype=torch.long), max_new_tokens=100)[0].tolist()))


QbEL-Iùr'"SV;TdV:fUiZ)UNLIleèìcLAVp!m.A;aAùxLïEzìsùhid?Ovc'o:
eìBMè;),TNMVùR,LUù  (Vod:uCï'rxTOo)U.o


In [90]:
optimizer = torch.optim.AdamW(m.parameters(), lr=1e-3)

In [91]:
batch_size = 32
for steps in range(10000): # increase number of steps for good results...

    # sample a batch of data
    xb, yb = get_batch('train')

    # evaluate the loss
    logits, loss = m(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

print(loss.item())

2.2980690002441406


In [92]:
print(decode(m.generate(idx = torch.zeros((1, 1), dtype=torch.long), max_new_tokens=100)[0].tolist()))


 di ter pan ma gluor fape BZyhe seriso.
chchenondali ategnesesstenora urue tearse.
de,
r' do n volag


In [93]:
torch.manual_seed(1337)
B, T, C = 4, 8, 2
x = torch.randn(B,T,C)
x.shape

torch.Size([4, 8, 2])

In [94]:
xbow = torch.zeros((B,T,C))
for b in range(B):
    for t in range(T):
        xprev = x[b,:t+1]
        xbow[b,t] = torch.mean(xprev,0)

In [95]:
x[0]

tensor([[ 0.1808, -0.0700],
        [-0.3596, -0.9152],
        [ 0.6258,  0.0255],
        [ 0.9545,  0.0643],
        [ 0.3612,  1.1679],
        [-1.3499, -0.5102],
        [ 0.2360, -0.2398],
        [-0.9211,  1.5433]])

In [96]:
xbow[0]

tensor([[ 0.1808, -0.0700],
        [-0.0894, -0.4926],
        [ 0.1490, -0.3199],
        [ 0.3504, -0.2238],
        [ 0.3525,  0.0545],
        [ 0.0688, -0.0396],
        [ 0.0927, -0.0682],
        [-0.0341,  0.1332]])

In [97]:
torch.manual_seed(42)
a = torch.tril(torch.ones(3, 3))
b= torch.randint(0,10,(3,2)).float()
c = a @ b

In [98]:
print(c)

tensor([[ 2.,  7.],
        [ 8., 11.],
        [14., 16.]])
