In [9]:
import requests
import torch
import torch.nn.functional as F
import torch.nn as nn
import random

In [10]:
# Downloading Tiny-Shakespeare Dataset as a String
URL = 'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt'
data = requests.get(URL).text

In [11]:
# Getting Unique Chars and build Encode / Decode functions
UNIQUE_CHARS = sorted(list(set(data)))

def encode(chars, key=UNIQUE_CHARS):
    encoded_chars = [ key.index(char) for char in chars]
    return encoded_chars

def decode(chars, key=UNIQUE_CHARS):
    decoded_string = ''.join([key[char] for char in chars])
    return decoded_string

In [12]:
# Building X and Y and splitting Train / Test
encoded_data = encode(data)

X = torch.tensor(encoded_data[:-1])
Y = torch.tensor(encoded_data[1:])

DATA_LENGTH = len(X)
size_train = int(0.9*DATA_LENGTH)
idxs = list(range(DATA_LENGTH))

train_idxs = random.sample(idxs, size_train)
X_train = X[train_idxs]
Y_train = Y[train_idxs]

test_idxs = list(set(idxs) - set(train_idxs))
X_test = X[test_idxs]
Y_test = Y[test_idxs]

# Bigram

In [13]:
torch.manual_seed(1337)

class Bigram(nn.Module):
    def __init__(self, vocab_size=len(UNIQUE_CHARS)):
        super().__init__()
        self.Embedding = nn.Embedding(vocab_size, vocab_size)
    
    def forward(self, x):
        logits = self.Embedding(x)
        return logits
    
    def generate(self, first_char=torch.tensor([0]), max_chars=10):
        current_char = first_char
        result = [current_char]
        for _ in range(max_chars):
            logits = self(current_char)
            probs = F.softmax(logits, dim=-1)
            next_char = torch.multinomial(probs, num_samples=1)[0]
            result.append(next_char)
            current_char = next_char
        return decode(result)

bigram = Bigram()
print(bigram.generate(max_chars=500))


SKIcLT;AcELMoTbvZv C?nq-QE33:CJqkOKH-q;:la!oiywkHjgChzbQ?u!3bLIgwevmyFJGUGp
wnYWmnxKWWev-tDqXErVKLgJt-wBpm&yiltNCjeO3:Cx&vvMYW-txjuAd IRFbTpJ$zkZelxZtTlHNzdXXUiQQY:qFINTOBNLI,&oTigq z.c:Cq,SDXzetn3XVjX-YBcHAUhk&PHdhcOb
nhJ?FJU?pRiOLQeUN!BxjPLiq-GJdUV'hsnla!murI!IM?SPNPq?VgC'R
pD3cLv-bxn-tL!upg
SZ!Uvdg CtxtT?hsiW:XxKIiPlagHIsr'zKSVxza?GlDWObPmRJgrIAcmspmZ&viCKot:u3qYXA:rZgv f:3Q-oiwUzqh'Z!I'zRS3SP rVchSFUIdd q?sPJpUdhMCK$VXXevXJFMl,i
YxA:gWId,EXR,iMC,$?srV$VztRwb?KpgUWFjR$zChOLm;JrDnDph
LBj,KZxJa


In [14]:
# Training
N_EPOCHS = 20000
BATCH_SIZE = 32

optimizer = torch.optim.Adam(bigram.parameters(), lr=1e-3)

for epoch_i in range(N_EPOCHS):
    batch_idxs = random.sample(list(range(size_train)), BATCH_SIZE)
    X_batch = X_train[batch_idxs]
    Y_batch = Y_train[batch_idxs]

    Y_hat = bigram(X_batch)

    optimizer.zero_grad(set_to_none=True)
    loss = F.cross_entropy(Y_hat, Y_batch)

    loss.backward()
    optimizer.step()

    if epoch_i%1000==0:
        train_loss = loss.item()
        Y_hat_test = bigram(X_test)
        test_loss = F.cross_entropy(Y_hat_test, Y_test).item()
        print(f'train: {train_loss}, test: {test_loss}')


print(bigram.generate(max_chars=400))

train: 4.94383430480957, test: 4.725185394287109
train: 4.113969802856445, test: 4.053621292114258
train: 3.661165952682495, test: 3.5607545375823975
train: 3.3556265830993652, test: 3.2152316570281982
train: 3.0686960220336914, test: 2.9797794818878174
train: 2.7455320358276367, test: 2.823634386062622
train: 2.513627767562866, test: 2.719484567642212
train: 2.5462112426757812, test: 2.6480627059936523
train: 2.8105382919311523, test: 2.5966153144836426
train: 2.6601619720458984, test: 2.5606491565704346
train: 2.4936180114746094, test: 2.535081386566162
train: 2.741941213607788, test: 2.515490770339966
train: 2.296121597290039, test: 2.50162410736084
train: 2.514200448989868, test: 2.490924596786499
train: 2.588207483291626, test: 2.4826314449310303
train: 2.69069242477417, test: 2.4768075942993164
train: 2.256885290145874, test: 2.4719035625457764
train: 2.1220622062683105, test: 2.4684317111968994
train: 2.3512423038482666, test: 2.4656801223754883
train: 2.4645450115203857, test: 