In [8]:
import torch
from torch import nn
import torchinfo
from torchinfo import summary 

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
#Data

In [3]:
!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt

--2024-06-17 01:04:02--  https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: ‘input.txt’


2024-06-17 01:04:02 (20.5 MB/s) - ‘input.txt’ saved [1115394/1115394]



In [4]:
with open('input.txt', 'r', encoding='utf-8') as f:
    text = f.read()


#Traiun BPE

# here are all the unique characters that occur in this text
chars = sorted(list(set(text)))
vocab_size = len(chars)
# create a mapping from characters to integers
stoi = { ch:i for i,ch in enumerate(chars) }
itos = { i:ch for i,ch in enumerate(chars) }
encode = lambda s: [stoi[c] for c in s] # encoder: take a string, output a list of integers
decode = lambda l: ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string


In [9]:
block_size = 8
batch_size = 4

# Train and test splits
data = torch.tensor(encode(text), dtype=torch.long)
n = int(0.9*len(data)) # first 90% will be train, rest val
train_data = data[:n]
val_data = data[n:]

# data loading
def get_batch(split):
    # generate a small batch of data of inputs x and targets y
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data) - block_size, (batch_size,))
    idx = torch.stack([data[i:i+block_size] for i in ix])
    targets = torch.stack([data[i+1:i+block_size+1] for i in ix])
    idx, targets = idx.to(device), targets.to(device)
    return idx, targets

In [10]:
# Text embeddings
class TextEmbeddings(nn.Module):
    def __init__(
        self,
        vocab_size: int = vocab_size,
        embeddings_size: int = vocab_size
    ):
        super().__init__()
        self.embeddings_table = nn.Embedding(num_embeddings=vocab_size, embedding_dim=vocab_size, device=device)
        
    def forward(self, x):
        return self.embeddings_table(x)

In [12]:
#Position embeddings
class PositionEmbeddings(nn.Module):
    def __init__(
        self,
        vocab_size: int = vocab_size,
        embeddings_size: int = vocab_size,
        dmodel: int = vocab_size,
        batch_size: int =  batch_size
    ):
        super().__init__()
        
        self.position_embeddings_first = torch.zeros((batch_size, vocab_size), device=device, requires_grad=True, dtype=torch.float32)
        self.position_embeddings_second = torch.ones((batch_size, vocab_size), device=device, requires_grad=True, dtype=torch.float32)
        
        self.combined_positional_embeddings = self.position_embeddings_first + self.position_embeddings_second
        
    def forward(self, x):
        return self.combined_positional_embeddings(x)