In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import random

In [2]:
data = open('input.txt').read()

In [3]:
chars = sorted(set(data))

In [4]:
#Lets create a simple tokenizer

atoi = {x:i for i,x in enumerate(chars)}
itoa = {i:x for i,x in enumerate(chars)}
encode = lambda s:[atoi[ch] for ch in s]
decode = lambda lst:''.join([itoa[i] for i in lst])

In [5]:
encode('hello world')

[46, 43, 50, 50, 53, 1, 61, 53, 56, 50, 42]

In [6]:
decode([46, 43, 50, 50, 53, 1, 61, 53, 56, 50, 42])

'hello world'

In [7]:
#Lets create train and split data sets
data_encoded = encode(data)
data_train = torch.tensor(data_encoded[:int(len(data)*.9)])
data_valid = torch.tensor(data_encoded[int(len(data)*.9):])

print(len(data_encoded),len(data_train), len(data_valid),len(data_train)+len(data_valid))


1115394 1003854 111540 1115394


In [8]:
#Lets create a function that provides the data for us to give us test samples from a sample

vocab_size = len(chars)
vocab_size

65

In [9]:
#pluck one of the sample
context_size = 8
sample = data_train[:context_size+1]

In [10]:
print(sample)
for i in range(context_size):
    print(sample[:i+1], sample[i+1])

tensor([18, 47, 56, 57, 58,  1, 15, 47, 58])
tensor([18]) tensor(47)
tensor([18, 47]) tensor(56)
tensor([18, 47, 56]) tensor(57)
tensor([18, 47, 56, 57]) tensor(58)
tensor([18, 47, 56, 57, 58]) tensor(1)
tensor([18, 47, 56, 57, 58,  1]) tensor(15)
tensor([18, 47, 56, 57, 58,  1, 15]) tensor(47)
tensor([18, 47, 56, 57, 58,  1, 15, 47]) tensor(58)


In [11]:
# so we create 8 samples for each pluck

# lets createa  batch dimension

batch_size = 4

def get_sample():
    ret = []
    for b in range(batch_size):
        #pluck context–size +1 chars
        start = random.randint(0,len(data_train)-context_size-1)
        sample = data_train[start: start+context_size+1]
        for i in range(context_size):
            ret.append([b,[sample[:i+1],sample[i+1]]])
    return ret

get_sample()

[[0, [tensor([12]), tensor(0)]],
 [0, [tensor([12,  0]), tensor(0)]],
 [0, [tensor([12,  0,  0]), tensor(19)]],
 [0, [tensor([12,  0,  0, 19]), tensor(24)]],
 [0, [tensor([12,  0,  0, 19, 24]), tensor(27)]],
 [0, [tensor([12,  0,  0, 19, 24, 27]), tensor(33)]],
 [0, [tensor([12,  0,  0, 19, 24, 27, 33]), tensor(15)]],
 [0, [tensor([12,  0,  0, 19, 24, 27, 33, 15]), tensor(17)]],
 [1, [tensor([1]), tensor(19)]],
 [1, [tensor([ 1, 19]), tensor(43)]],
 [1, [tensor([ 1, 19, 43]), tensor(52)]],
 [1, [tensor([ 1, 19, 43, 52]), tensor(58)]],
 [1, [tensor([ 1, 19, 43, 52, 58]), tensor(50)]],
 [1, [tensor([ 1, 19, 43, 52, 58, 50]), tensor(43)]],
 [1, [tensor([ 1, 19, 43, 52, 58, 50, 43]), tensor(51)]],
 [1, [tensor([ 1, 19, 43, 52, 58, 50, 43, 51]), tensor(39)]],
 [2, [tensor([1]), tensor(42)]],
 [2, [tensor([ 1, 42]), tensor(47)]],
 [2, [tensor([ 1, 42, 47]), tensor(43)]],
 [2, [tensor([ 1, 42, 47, 43]), tensor(8)]],
 [2, [tensor([ 1, 42, 47, 43,  8]), tensor(0)]],
 [2, [tensor([ 1, 42, 47, 43

In [12]:
torch.stack([torch.tensor([1,2,3]),torch.tensor([1,2,3])])

tensor([[1, 2, 3],
        [1, 2, 3]])

In [13]:

def get_batch():
    ret = []
    targets = []
    #lets pluck a sample
    rints = torch.randint(0,len(data_train)-context_size-1,(batch_size,))
    for ix in rints:
        sample = data_train[ix:ix+context_size+1]
        y = []
        for i in range(context_size):
            y.append(sample[i+1])
        targets.append(torch.tensor(y))
        ret.append(sample[:context_size])
        
    return torch.stack(ret), torch.stack(targets)

source, targets = get_batch()

for s,t in zip(source,targets):
    print(f"{decode(s.tolist())}->{decode(t.tolist())}")

re-like ->e-like t
ur renow->r renown
LADY ANN->ADY ANNE
se sayin->e saying


In [14]:
a = torch.tensor([1,2,3])
a = torch.stack([a,a])
a

tensor([[1, 2, 3],
        [1, 2, 3]])

In [26]:
#now lets create a model

class BigramModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding_table = nn.Embedding(vocab_size,vocab_size)
        
    def forward(self,x, targets = None):
        logits = self.embedding_table(x)
        loss = None
        if targets is not None:
            B,T,C = logits.shape
            loss = F.cross_entropy(logits.view(B*T,C),targets.view(B*T))
        
        return logits, loss
    
    @torch.no_grad
    def generate(self,input,max_tokens=2):
        for _ in range(max_tokens):
            logits, loss = self(input)
            logits = logits[:,-1,:] # B,C only last row
            probs = F.softmax(logits, dim=-1) #pick probs
            idx_next = torch.multinomial(probs,num_samples=1)
            input = torch.cat((input,idx_next), dim = 1)
        return input

In [27]:
x = BigramModel()

input = torch.zeros(1,1, dtype = torch.long)

decode(x.generate(input,max_tokens=19)[:,-1].tolist())

'Y'