In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import plotly.graph_objects as go
import matplotlib.pyplot as plt
%matplotlib inline


In [47]:
from urllib.request import urlopen
data = open('../data/tiny_shakespeare.txt','r').read()
chars = sorted(list(set(data)))
chrs_to_idx = {c: i for i,c in enumerate(chars)}
idx_to_chrs = {i: c for i,c in enumerate(chars)}
num_of_chrs = len(chars)


def encode(s):
    return [chrs_to_idx[c] for c in s]
def decode(l):
    print(l)
    return ''.join(idx_to_chrs[i] for i in l)

x = 'hello world'
print(x)
print(encode(x))
print(decode(encode(x)))


hello world
[46, 43, 50, 50, 53, 1, 61, 53, 56, 50, 42]
[46, 43, 50, 50, 53, 1, 61, 53, 56, 50, 42]
hello world


In [13]:
enc_data = torch.tensor(encode(data))
train_data = enc_data[:int(0.9*len(enc_data))]
test_data = enc_data[int(0.9*len(enc_data)):]

torch.manual_seed(1337)
batch_size = 4
context_len = 8

# print(enc_data[:100])
# print(decode(enc_data[:100].tolist()))

def get_batch(train=True):
    data = train_data if train else test_data
    idx = torch.randint(len(data) - context_len, (batch_size,))
    x = torch.stack([data[i: i+context_len] for i in idx])
    y = torch.stack([data[i+1: i+context_len+1] for i in idx])
    return x, y



In [44]:
class BigramModule(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.bigram_embedding = nn.Embedding(vocab_size, vocab_size)

    def forward(self, x: torch.tensor, y: torch.tensor = None) -> tuple[torch.tensor, torch.tensor]:
        logits = self.bigram_embedding(x)
        loss = None
        if y is not None: 
            B, T, C = logits.shape
            loss = F.cross_entropy(logits.view(B * T, C), y.view(B * T)) 
        return logits, loss

    def generate(self, context: torch.tensor, max_iter=100) -> torch.tensor:
        with torch.no_grad():
            curr_context = context
            for _ in range(max_iter):
                logits, _ = self.forward(context)
                prob = F.softmax(logits[:, -1, :], dim=-1)
                new_token = torch.multinomial(prob, num_samples=1)
                curr_context = torch.cat([curr_context, new_token], dim=1)

            return curr_context

In [49]:
xb, yb = get_batch()
m = BigramModule(num_of_chrs)
logits, loss = m.forward(xb, yb)
print(decode(m.generate(torch.zeros((1,1), dtype=torch.long)).tolist()[0]))

[0, 23, 61, 59, 49, 41, 60, 31, 55, 59, 59, 37, 27, 15, 63, 51, 46, 55, 33, 39, 49, 38, 10, 49, 39, 49, 29, 49, 22, 4, 46, 27, 49, 55, 27, 41, 44, 57, 55, 59, 0, 49, 7, 46, 15, 15, 49, 15, 26, 6, 44, 29, 33, 15, 7, 49, 15, 54, 31, 55, 15, 31, 61, 23, 31, 31, 27, 17, 6, 36, 49, 8, 29, 13, 59, 49, 19, 36, 36, 34, 31, 31, 49, 49, 3, 19, 31, 23, 33, 51, 55, 6, 61, 26, 15, 6, 22, 34, 31, 59, 23]

KwukcvSquuYOCymhqUakZ:kakQkJ&hOkqOcfsqu
k-hCCkCN,fQUC-kCpSqCSwKSSOE,Xk.QAukGXXVSSkk$GSKUmq,wNC,JVSuK
tensor(4.6483, grad_fn=<NllLossBackward0>) tensor([[[ 0.6717, -1.6367, -0.8953,  ...,  0.2733,  0.5081,  1.6175],
         [ 0.8026, -1.2532,  0.3793,  ..., -0.2087,  2.1832,  0.0637],
         [ 0.7892, -0.5145,  0.0699,  ..., -0.4008,  0.7016, -0.4884],
         ...,
         [ 0.1261,  0.3174, -0.2498,  ..., -2.8524, -0.8535,  1.6397],
         [ 0.5641,  0.4790, -0.1719,  ...,  1.4515, -0.8018,  1.2146],
         [ 0.0250,  0.0942, -1.7039,  ...,  0.5054,  1.2448, -0.7690]],

        [[-0.3797, 

In [51]:

optimizer = torch.optim.AdamW(m.parameters(), lr=1e-3)

In [52]:
lossi = []
for i in range(1000):
    xb, yb = get_batch()
    _, loss = m.forward(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()
    lossi.append(loss.item())
    print(loss)

AttributeError: 'Tensor' object has no attribute 'backwards'