In [35]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [36]:
device = 'gpu' if torch.cuda.is_available() else 'cpu'

In [2]:
with open('../../datasets/input.txt', 'r') as f:
    text = f.read()

FileNotFoundError: [Errno 2] No such file or directory: '../../datasets/input.txt'

In [38]:
stoi = {ch: i for i, ch in enumerate(sorted(set(text)))}
itos = {i: ch for ch, i in stoi.items()}
encode = lambda x: [stoi[ch] for ch in x]
decode = lambda x: ''.join([itos[i] for i in x])

In [39]:
train_portion = int(0.9 * len(text))
train_data = text[:train_portion]
val_data = text[train_portion:]
data = torch.tensor(encode(text), dtype=torch.long)

In [40]:
vocab_size = len(stoi)
batch_size = 4
block_size = 8
num_heads = 8
n_embd = 40

In [41]:
def get_batch(batch_type):
    data = train_data if batch_type == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack(data[i+1:i+block_size+1] for i in ix)
    # x, y = x.to(device), y.to(device)
    return x,y

In [42]:
class MLP(nn.Module):
    def __init__(self, in_features, out_features, hidden_features):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.fc2 = nn.Linear(hidden_features, out_features)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [43]:
class LayerNorm(nn.Module):
    def __init__(self, features, eps=1e-6):
        super(LayerNorm, self).__init__()
        self.gamma = torch.ones(features)
        self.beta = torch.zeros(features)
        self.eps = eps
        
    def forward(self, x):
        mean = x.mean(1, keepdim=True)
        std = x.std(1, keepdim=True)
        return self.gamma * (x - mean) / (std + self.eps) + self.beta

In [44]:
class Head(nn.Module):
    def __init__(self, head_size):
        super().__init__()
        # head_embd = vocab_size // num_heads
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)

        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
        

    def forward(self, x):
        B,T,C = x.shape()

        q, k, v = self.query(x), self.key(x), self.value(x)

        k @ q.transpose() * C ** 0.5
        
        


class MultiHeadedAttention(nn.Module):
    def __init__(self):
        self.heads = torch.ModuleList([Head() for _ in range(num_heads)])
    def forward(self, x):
        torch.concat([head(x) for head in self.heads])

In [45]:
class BigramLanguageModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.mlp = MLP(vocab_size, vocab_size, vocab_size)
        self.layer_norm = LayerNorm(vocab_size)
        self.attn = MultiHeadedAttention()
        
    def forward(self, x):
        # x = torch.stack([head.attention(head.embedding(x), head.embedding(x), head.embedding(x))[0] for head in self.heads])
        x = x + self.layer_norm()
        x = x + self.mlp(x)
        return x

In [46]:
model = BigramLanguageModel()

AttributeError: module 'torch' has no attribute 'ModuleList'

In [None]:
optimizer = torch.optim.AdamW(model, lr=1e-3)

NameError: name 'model' is not defined

In [None]:
# class MLP(nn.Module):
#     def __init__(self, in_features, out_features, hidden_features):
#         super(MLP, self).__init__()
#         self.fc1 = nn.Linear(in_features, hidden_features)
#         self.ln1 = nn.LayerNorm(hidden_features)
#         self.fc2 = nn.Linear(hidden_features, out_features)
#         self.ln2 = nn.LayerNorm(out_features)
    
#     def forward(self, x):
#         x = F.relu(self.fc1(x))
#         x = self.fc2(x)
#         return x

# class LayerNorm(nn.Module):
#     def __init__(self, features, eps=1e-6):
#         super().__init__()
#         self.mean = torch.zeros(features)
#         self.std = torch.ones(features)
        
#     def forward(self, x):
#         return