In [1]:
# importing libraries
from tokenizers import BPETokenizer
from layers import Embedding, FC, Flatten, Residual
import numpy as np
from models import Sequential
from activations import LeakyRelu

In [2]:
# testing tokenizer
tokenizer = BPETokenizer()

data = ["I am Cibi", "I am great"]
# print(tokenizer.tokenize(data))
tokenizer.train(data, max_vocab_size = 300)
# print(len(tokenizer.tokens))
print(tokenizer.tokens[256:])
print(tokenizer.tokenize(data))
tokens = tokenizer.encode(data)
print(tokens)
print(tokenizer.decode(tokens))

 23%|██▎       | 10/44 [00:00<00:00, 35454.81it/s]

WARN: reached end of vocab
[b'I ', b'I a', b'm ', b'm C', b'ib', b'ibi', b'm g', b're', b'rea', b'reat']
[[b'I a', b'm C', b'ibi'], [b'I a', b'm g', b'reat']]
[[257, 259, 261], [257, 262, 265]]
['I am Cibi', 'I am great']





In [3]:
# testing embedding
embedding = Embedding(num_embeddings= tokenizer.vocab_size, embedding_dim=512)
embed_output = embedding.forward(np.array(tokens))
print(embed_output.shape)

(2, 3, 512)


In [4]:
# just testing out a simple model
model = Sequential(
    Embedding(num_embeddings=tokenizer.vocab_size, embedding_dim=512),
    Flatten(),
    FC(input_dim = 512 * 3, output_dim= 64, name = "fc1"),
    LeakyRelu(name = "relu_1"),
    FC(input_dim = 64, output_dim= 2, name = "fc2"),
)

In [5]:
from optimizers import Optimizer, Adam
import tqdm
from models import Module
from loss import Loss, CrossEntropyWithLogits
import time
def train(x, y, model: Module, loss: Loss, optimizer: Optimizer, epoch):
    pbar = tqdm.tqdm(range(epoch))
    for _ in pbar:
        y_hat = model.forward(x)
        curr_loss = loss.forward(y_hat, y)
        pbar.set_description(f"Loss: {curr_loss}")
        dloss = loss.backward()
        model.backward(dloss)
        optimizer.step()

In [6]:
x = np.repeat(tokens, 100, axis = 0)
y = np.repeat([0, 1], 100, axis=0)
optim =  Adam(model)
loss = CrossEntropyWithLogits(True)

train(x, y, model, loss, optim, 100) # check whether loss decreases.

Loss: 1.337818744673323e-14: 100%|██████████| 100/100 [00:01<00:00, 77.22it/s]


In [7]:
model.forward(np.array(tokens)) 

array([[ 16.20569471, -15.94303399],
       [-15.65128024,  16.12947364]])

In [12]:
# add a residual connection
model2 = Sequential(
    Embedding(num_embeddings=tokenizer.vocab_size, embedding_dim=512),
    Flatten(),
    FC(input_dim = 512 * 3, output_dim= 64, name = "fc1"),
    LeakyRelu(name = "relu_1"),
    Residual(FC(input_dim = 64, output_dim = 64, name = "fc3")),
    FC(input_dim = 64, output_dim= 2, name = "fc2"),
)
y = np.random.choice([0, 1], size = (200,))
train(x, y, model2, loss, optim, 100) # check whether loss decreases.

Loss: 0.6931962285060865: 100%|██████████| 100/100 [00:01<00:00, 69.54it/s]
