In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F




blocksize=8
batchsize=4

device='cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [2]:
with open('wizard_of_oz.txt','r', encoding='utf-8') as f:
    text=f.read()

chars=sorted(set(text))
vocab_size=len(chars)


In [3]:
#tokenizers

char_to_int={ch:i for i,ch in enumerate(chars)}
int_to_char={i:ch for i,ch in enumerate(chars)}

encode= lambda s:[char_to_int[c] for c in s]
decode= lambda i:[int_to_char[c] for c in i]

In [4]:
enc=encode('Hello')

dec=decode(enc)

print(enc,dec)

[32, 58, 65, 65, 68] ['H', 'e', 'l', 'l', 'o']


In [5]:
data=torch.tensor(encode(text), dtype=torch.long)
data.shape[0]

232309

In [6]:
#train and test split
len=int(0.8*data.shape[0])
print(len)

train_data=data[:len]
test_data=data[len:]


def get_batch(split):

    data =train_data if split== "train" else test_data

    ix=torch.randint(data.shape[0] - blocksize, (batchsize,))

    x= torch.stack([data[i:blocksize+i] for i in ix]).to(device)
    y= torch.stack([data[i+1:blocksize+i+1] for i in ix]).to(device)

    return x,y

185847


In [7]:
x,y =get_batch("train")
print("Content",x ,"\n","Target",y)

Content tensor([[73, 61, 58,  1, 60, 62, 71, 65],
        [58,  1, 69, 68, 74, 71, 58, 57],
        [73,  1, 73, 61, 62, 72,  1, 73],
        [71, 54, 67, 72, 59, 68, 71, 66]], device='cuda:0') 
 Target tensor([[61, 58,  1, 60, 62, 71, 65, 11],
        [ 1, 69, 68, 74, 71, 58, 57,  1],
        [ 1, 73, 61, 62, 72,  1, 73, 61],
        [54, 67, 72, 59, 68, 71, 66, 58]], device='cuda:0')


In [8]:


x=train_data[:blocksize]
y=train_data[1:blocksize+1]

for t in range(blocksize):
    content=x[:t+1]
    target=y[t]

    print(f"When Conten is {content} targert is {target}")


When Conten is tensor([80]) targert is 1
When Conten is tensor([80,  1]) targert is 1
When Conten is tensor([80,  1,  1]) targert is 28
When Conten is tensor([80,  1,  1, 28]) targert is 39
When Conten is tensor([80,  1,  1, 28, 39]) targert is 42
When Conten is tensor([80,  1,  1, 28, 39, 42]) targert is 39
When Conten is tensor([80,  1,  1, 28, 39, 42, 39]) targert is 44
When Conten is tensor([80,  1,  1, 28, 39, 42, 39, 44]) targert is 32


In [9]:
class BigramModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()

        self.token_embedding_table = nn.Embedding(vocab_size,vocab_size)

    def forward(self, index, targets=None):
        logits = self.token_embedding_table(index)

        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits , loss
    
    def generate(self,index, max_new_tokens):

        for _ in range(max_new_tokens):

            logits, loss= self.forward(index) # to get the prediction

            logits = logits[:,-1,:] #we have to focus on last time step and it's shape is in form of B,T,C

            probs= F.softmax(logits, dim=-1) # it gives the probability distribution

            index_next=torch.multinomial(probs,num_samples=1) # sample from the distribution

            index = torch.cat((index ,index_next), dim=1)

        return index


In [11]:
#checking architecture

model =BigramModel(vocab_size)
model= model.to(device)
content=torch.zeros((1,1), dtype=torch.long, device=device)

generated_chars=decode(model.generate(content, max_new_tokens=500)[0].tolist())
print(''.join(generated_chars))



UfP"IPjH4msN2hV1jabw0i2(xX!?EuyS.﻿TC&z'moG[yvQ"j7FL.wE?Es lLK_LZ9bR,&;-35LR'qp"V4g.S[;qgcx1JS.wt.[:t?2J)!"!(D!Vu&oHe4sNKfmeC&i8IWaEG[s Ef!6hF1H6MiPJ!e;!zGz]Th.ixoH[0ccR?﻿psN﻿pXHsvRzJt_J8?FuPwN5?K8 sNS(S:772w77﻿G: :1-3G[LwxZtN8MiU*.[XeiFJJD:q!bt1N sNSvj97ez.4﻿pW_bp5hAOS7_]Dk""]uEShQ mKJLhECWj.3w3B-PX)9 !uE&-&XItYnN8RS:?(auEa_"jD!1FmJ.3?KfVLmdf:4[idTq ﻿,;g8YYRS6z30ld[Qe_)"w6Y-0veMKJ!rJd1pRUPr7Lg Y1x7xUu_L;77DM6Vp!2JPGwP,kq]-QeuPjC5woxXOF5s.[Y3G4"ZrhEeunc9gQM6
Sndu_BF2UN6alSjgk7O5Le5&dL2jMK*OV169y)
