In [30]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import random

In [15]:
data = open('input.txt').read()

In [16]:
chars = sorted(set(data))

In [17]:
#Lets create a simple tokenizer

atoi = {x:i for i,x in enumerate(chars)}
itoa = {i:x for i,x in enumerate(chars)}
encode = lambda s:[atoi[ch] for ch in s]
decode = lambda lst:''.join([itoa[i] for i in lst])

In [18]:
encode('hello world')

[46, 43, 50, 50, 53, 1, 61, 53, 56, 50, 42]

In [19]:
decode([46, 43, 50, 50, 53, 1, 61, 53, 56, 50, 42])

'hello world'

In [20]:
#Lets create train and split data sets
data_encoded = encode(data)
data_train = torch.tensor(data_encoded[:int(len(data)*.9)])
data_valid = torch.tensor(data_encoded[int(len(data)*.9):])

print(len(data_encoded),len(data_train), len(data_valid),len(data_train)+len(data_valid))


1115394 1003854 111540 1115394


In [21]:
#Lets create a function that provides the data for us to give us test samples from a sample

vocab_size = len(chars)
vocab_size

65

In [22]:
#pluck one of the sample
context_size = 8
sample = data_train[:context_size+1]

In [23]:
print(sample)
for i in range(context_size):
    print(sample[:i+1], sample[i+1])

tensor([18, 47, 56, 57, 58,  1, 15, 47, 58])
tensor([18]) tensor(47)
tensor([18, 47]) tensor(56)
tensor([18, 47, 56]) tensor(57)
tensor([18, 47, 56, 57]) tensor(58)
tensor([18, 47, 56, 57, 58]) tensor(1)
tensor([18, 47, 56, 57, 58,  1]) tensor(15)
tensor([18, 47, 56, 57, 58,  1, 15]) tensor(47)
tensor([18, 47, 56, 57, 58,  1, 15, 47]) tensor(58)


In [24]:
# so we create 8 samples for each pluck

# lets createa  batch dimension

batch_size = 4

def get_sample():
    ret = []
    for b in range(batch_size):
        #pluck context–size +1 chars
        start = random.randint(0,len(data_train)-context_size-1)
        sample = data_train[start: start+context_size+1]
        for i in range(context_size):
            ret.append([b,[sample[:i+1],sample[i+1]]])
    return ret

get_sample()

[[0, [tensor([1]), tensor(47)]],
 [0, [tensor([ 1, 47]), tensor(52)]],
 [0, [tensor([ 1, 47, 52]), tensor(1)]],
 [0, [tensor([ 1, 47, 52,  1]), tensor(42)]],
 [0, [tensor([ 1, 47, 52,  1, 42]), tensor(47)]],
 [0, [tensor([ 1, 47, 52,  1, 42, 47]), tensor(57)]],
 [0, [tensor([ 1, 47, 52,  1, 42, 47, 57]), tensor(51)]],
 [0, [tensor([ 1, 47, 52,  1, 42, 47, 57, 51]), tensor(39)]],
 [1, [tensor([43]), tensor(52)]],
 [1, [tensor([43, 52]), tensor(10)]],
 [1, [tensor([43, 52, 10]), tensor(0)]],
 [1, [tensor([43, 52, 10,  0]), tensor(14)]],
 [1, [tensor([43, 52, 10,  0, 14]), tensor(59)]],
 [1, [tensor([43, 52, 10,  0, 14, 59]), tensor(58)]],
 [1, [tensor([43, 52, 10,  0, 14, 59, 58]), tensor(1)]],
 [1, [tensor([43, 52, 10,  0, 14, 59, 58,  1]), tensor(58)]],
 [2, [tensor([1]), tensor(57)]],
 [2, [tensor([ 1, 57]), tensor(47)]],
 [2, [tensor([ 1, 57, 47]), tensor(41)]],
 [2, [tensor([ 1, 57, 47, 41]), tensor(49)]],
 [2, [tensor([ 1, 57, 47, 41, 49]), tensor(52)]],
 [2, [tensor([ 1, 57, 47, 4

In [25]:
torch.stack([torch.tensor([1,2,3]),torch.tensor([1,2,3])])

tensor([[1, 2, 3],
        [1, 2, 3]])

In [26]:

def get_batch():
    ret = []
    targets = []
    #lets pluck a sample
    rints = torch.randint(0,len(data_train)-context_size-1,(batch_size,))
    for ix in rints:
        sample = data_train[ix:ix+context_size+1]
        y = []
        for i in range(context_size):
            y.append(sample[i+1])
        targets.append(torch.tensor(y))
        ret.append(sample[:context_size])
        
    return torch.stack(ret), torch.stack(targets)

source, targets = get_batch()

for s,t in zip(source,targets):
    print(f"{decode(s.tolist())}->{decode(t.tolist())}")

m o' the-> o' the 
at, with->t, with 
non, ano->on, anon
y, proud->, proud 


In [27]:
a = torch.tensor([1,2,3])
a = torch.stack([a,a])
a

tensor([[1, 2, 3],
        [1, 2, 3]])

In [65]:
#now lets create a model

class BigramModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding_table = nn.Embedding(vocab_size,vocab_size)
        
    def forward(self,x, targets = None):
        logits = self.embedding_table(x)
        loss = None
        if targets is not None:
            B,T,C = logits.shape
            loss = F.cross_entropy(logits.view(B*T,C),targets.view(B*T))
        
        return logits, loss
    
    @torch.no_grad
    def generate(self,input,max_tokens=100):
        for _ in range(max_tokens):
            logits, loss =self(input)
            print(logits)


In [66]:
x = BigramModel()

input = torch.zeros(1,1, dtype = torch.long)

x.generate(input,max_tokens=1)

tensor([[[-1.2830, -0.2802, -0.8815,  0.7098,  0.0903, -0.9175,  2.0165,
           0.9356,  0.2431, -1.8379, -0.4376,  1.4174, -0.8573, -0.4300,
          -0.5461, -1.8006, -0.2740, -1.8145, -0.8496,  0.4077,  0.9804,
           0.3195, -0.3983,  2.2045,  0.0103,  0.0543,  0.2286,  1.2133,
          -0.6995,  0.7546, -1.7312, -0.7638, -1.8308,  2.2528,  0.2313,
           0.0565, -0.6629, -1.1353, -0.9701, -0.1469,  0.7623, -0.2421,
           0.8602, -0.9651,  0.6953, -0.8495,  0.3138,  0.5502,  1.3210,
          -1.6395, -1.3994, -1.1037, -0.9397, -0.7935, -1.0914,  1.7114,
           2.3773, -0.7559,  0.9049,  0.3482,  0.5354, -1.5391,  0.9691,
           0.2209, -0.6801]]])
