In [6]:
import torch
import torch.nn as nn
from torch.nn import functional as F
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
block_size = 8
batch_size = 4

cuda


In [7]:
with open('wizard_of_oz.txt', 'r', encoding = 'utf-8') as f:
    text = f.read()
chars = sorted(set(text))
#print(chars)
vocab_size = len(chars)

In [8]:
string_to_int = { ch:i for i, ch in enumerate(chars) }
int_to_string = { i:ch for i, ch in enumerate(chars) }
encode = lambda s: [string_to_int[c] for c in s]
decode = lambda l: "".join([int_to_string[i] for i in l])

#encoding the data from the wizard of oz
data = torch.tensor(encode(text), dtype = torch.long)

In [9]:
#train on 80% then 20% do this to not just generate text 1:1 as the given data but to speak like it
n = int(0.8*len(data))
train_data = data[:n]
val_data = data[n:]

def get_batch(split):
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    x, y = x.to(device), y.to(device)
    return x, y

x, y = get_batch('train')
print('inputs:')
# print(x.shape)
print(x)
print('targets:')
print(y)

inputs:
tensor([[75, 58,  1, 78, 68, 74,  1, 76],
        [71, 59, 74, 65,  1, 47, 62, 79],
        [54, 73,  1, 61, 58,  1, 73, 61],
        [61, 68, 74, 60, 61, 73,  1, 73]], device='cuda:0')
targets:
tensor([[58,  1, 78, 68, 74,  1, 76, 58],
        [59, 74, 65,  1, 47, 62, 79, 54],
        [73,  1, 61, 58,  1, 73, 61, 71],
        [68, 74, 60, 61, 73,  1, 73, 61]], device='cuda:0')


In [10]:


x = train_data[:block_size]
y = train_data[1:block_size+1]

for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    print("when target is ",context,"target is", target)

when target is  tensor([80]) target is tensor(1)
when target is  tensor([80,  1]) target is tensor(1)
when target is  tensor([80,  1,  1]) target is tensor(28)
when target is  tensor([80,  1,  1, 28]) target is tensor(39)
when target is  tensor([80,  1,  1, 28, 39]) target is tensor(42)
when target is  tensor([80,  1,  1, 28, 39, 42]) target is tensor(39)
when target is  tensor([80,  1,  1, 28, 39, 42, 39]) target is tensor(44)
when target is  tensor([80,  1,  1, 28, 39, 42, 39, 44]) target is tensor(32)


In [None]:
class BigramLanguageModel(nn.module):
    def __init__(self, vocab_size):
        super.__init__()
        self.token_embedding_table = nn.Embedding(vocab_size,vocab_size)

    def forward(self, index, targets):
        logits = self.token_embedding_table(index)

        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits, loss