<a href="https://colab.research.google.com/github/dsogden/Bigram/blob/main/LanguageModeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [151]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

In [3]:
path = '/content/names.txt'
with open(path, 'r', encoding='utf-8') as f:
  names = f.read().splitlines()
print(names[:10])

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia', 'harper', 'evelyn']


In [123]:
class Tokenizer:
    def __init__(self, names: list[str]):
        self.chars = sorted(list(set(''.join(names))))
        self.stoi = {s: i + 1 for i, s in enumerate(self.chars)}
        self.stoi['.'] = 0
        self.itos = {i: s for s, i in self.stoi.items()}
        self.vocab_size = len(self.stoi)

    def encode(self, name: str, max_length: int) -> list[int]:
        padding = max_length - len(name)
        return [self.stoi[c] for c in name] + [0] * (padding + 1)

    def decode(self, tokens: list[int]) -> str:
        return ''.join([self.itos[t] for t in tokens])

In [128]:
tokenizer = Tokenizer(names)
max_length = max([len(name) for name in names])
print(max_length)
encoded = torch.tensor(
    [tokenizer.encode(name, max_length) for name in names], dtype=torch.float32
)

15


In [126]:
for l in F.log_softmax(logits(output), dim=-1).argmax(dim=-1):
    print(tokenizer.decode(l.tolist()))


[15, 26, 13, 15, 7, 23, 13, 13, 26, 24, 13, 15, 24, 7, 24, 13, 24, 13, 7, 13, 26, 26, 7, 6, 6, 15, 24]
None
[26, 13, 6, 19, 25, 13, 6, 6, 13, 7, 6, 24, 25, 25, 8, 6, 8, 6, 25, 6, 13, 13, 25, 6, 26, 7, 2]
None


In [136]:
tokenizer.decode(encoded[0].tolist())

'emma............'

In [140]:
conv1 = nn.Conv1d(1, 27, 3, stride=1, bias=False)
convolution = conv1(encoded[:1].unsqueeze(1))
logits = nn.Linear(14, 27)
output = F.log_softmax(logits(convolution), dim=-1).argmax(dim=-1)

In [145]:
tokenizer.decode(output.squeeze(0).tolist())

'vmyojmcvbvocjovvccwvwvvmvvm'

In [156]:
class LanguageModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Conv1d(1, 27, 3, stride=1, bias=False)
        self.linear = nn.Linear(14, 16)

    def forward(self, x):
        x = self.conv(x.unsqueeze(1))
        return F.log_softmax(self.linear(x), dim=-1)

In [165]:
def train(model, dataloader, optimizer, criterion):
    running_loss = 0.0
    for idx, (X, y) in enumerate(dataloader):
        optimizer.zero_grad()
        y_pred = model(X)
        loss = criterion(y_pred, y.long())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / (idx + 1)

def evaluate(model, dataloader, criterion):
    running_loss = 0.0
    for idx, (X, y) in enumerate(dataloader):
        y_pred = model(X)
        loss = criterion(y_pred, y.long())
        running_loss += loss.item()
    return running_loss / (idx + 1)

In [162]:
test_model = LanguageModel()
test_model(encoded[:1]).shape

torch.Size([1, 27, 16])

In [163]:
x_train, x_val, y_train, y_val = train_test_split(
    encoded, encoded, test_size=0.2, random_state=42, shuffle=True
)

train_dataset = TensorDataset(x_train, y_train)
val_dataset = TensorDataset(x_val, y_val)

train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=128, shuffle=False)

In [166]:
model = LanguageModel()
epochs = 10
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.NLLLoss()
for epoch in range(epochs):
    train_loss = train(model, train_dataloader, optimizer, criterion)
    val_loss = evaluate(model, val_dataloader, criterion)
    print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

Epoch 1/10, Train Loss: 3.1435, Val Loss: 2.6778
Epoch 2/10, Train Loss: 2.4493, Val Loss: 2.2997
Epoch 3/10, Train Loss: 2.2501, Val Loss: 2.2247
Epoch 4/10, Train Loss: 2.2042, Val Loss: 2.1909
Epoch 5/10, Train Loss: 2.1714, Val Loss: 2.1587
Epoch 6/10, Train Loss: 2.1411, Val Loss: 2.1303
Epoch 7/10, Train Loss: 2.1157, Val Loss: 2.1078
Epoch 8/10, Train Loss: 2.0959, Val Loss: 2.0914
Epoch 9/10, Train Loss: 2.0818, Val Loss: 2.0800
Epoch 10/10, Train Loss: 2.0719, Val Loss: 2.0716


In [168]:
with torch.no_grad():
    random_vals = encoded[100: 132]
    predictions = model(random_vals)
    predicted_chars = predictions.argmax(dim=-1)

In [170]:
for char in predicted_chars:
    print(tokenizer.decode(char.tolist()))

ma.aaaa.bc..b.ec..bcbbbbbbb
meaaaeaaa.aaaaaaaaaaaaaaaa.
ebbbbbbbcd..cacc.cccccccccc
gdbbddbdef.beceeeeeeeeeeeee
hcccccccde..d.dd.dddddd.dd.
jdbdddddefddfefffefffffffff
gdddddddea.deceeaeeeeeeeeea
hbbbbbbcdeccdcdddcddddddddd
gdaddddaea....e...e..e...e.
gb.bbbbbab..aaca.aa..ca..c.
edadddddddaacaccabccccccccc
ma.aaadcec..ebeebbbeeebeeeb
hbccdbcddb.daaaaaaaaaaaaaaa
gcaaacaadc..b.bb.bbbbbbbbbb
j.bb..bcde.cdcddddddddddddd
faaaaaaabcaabbbb.bbbbbbbbbb
feacceaadb.aaadd.aa..a.....
hcaaaaaada.aaada...........
fcaaacaabbaababb.abbbbbbbbb
gd.ddddded..c.ec..ecceeceec
maaaaaa.bc..b.eb.bbbbbbbbbb
ga.aaaabcd..cbcccbccccccccc
hdaaaaaaaaaaaab..a.........
meaaabaaab.aaaaa.aa..aa....
md.bddbbcd..c.gcccccccccccc
gaaaaaaabc..cbcccbccccccccc
mdddddddec..ebgeebegegeegge
headddaaabaaaaaaaaaaaaaaaaa
md.dddddeb..e.ee..eeeeeeeee
ebbbbbbbcd..c.ccccccccccccc
gdaabdabcd..c.cc..ccccccccc
jcccccc.db..d.ee..dee......


In [171]:
model = LanguageModel()
epochs = 100
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.NLLLoss()
for epoch in range(epochs):
    train_loss = train(model, train_dataloader, optimizer, criterion)
    val_loss = evaluate(model, val_dataloader, criterion)
    print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

Epoch 1/100, Train Loss: 3.5358, Val Loss: 2.8073
Epoch 2/100, Train Loss: 2.6212, Val Loss: 2.4588
Epoch 3/100, Train Loss: 2.3075, Val Loss: 2.2150
Epoch 4/100, Train Loss: 2.1805, Val Loss: 2.1530
Epoch 5/100, Train Loss: 2.1255, Val Loss: 2.1001
Epoch 6/100, Train Loss: 2.0780, Val Loss: 2.0604
Epoch 7/100, Train Loss: 2.0486, Val Loss: 2.0401
Epoch 8/100, Train Loss: 2.0330, Val Loss: 2.0282
Epoch 9/100, Train Loss: 2.0229, Val Loss: 2.0202
Epoch 10/100, Train Loss: 2.0161, Val Loss: 2.0145
Epoch 11/100, Train Loss: 2.0109, Val Loss: 2.0099
Epoch 12/100, Train Loss: 2.0069, Val Loss: 2.0066
Epoch 13/100, Train Loss: 2.0037, Val Loss: 2.0036
Epoch 14/100, Train Loss: 2.0013, Val Loss: 2.0012
Epoch 15/100, Train Loss: 1.9989, Val Loss: 1.9993
Epoch 16/100, Train Loss: 1.9969, Val Loss: 1.9975
Epoch 17/100, Train Loss: 1.9950, Val Loss: 1.9958
Epoch 18/100, Train Loss: 1.9938, Val Loss: 1.9945
Epoch 19/100, Train Loss: 1.9922, Val Loss: 1.9931
Epoch 20/100, Train Loss: 1.9911, Val Lo

In [172]:
with torch.no_grad():
    random_vals = encoded[100: 132]
    predictions = model(random_vals)
    predicted_chars = predictions.argmax(dim=-1)

for char in predicted_chars:
    print(tokenizer.decode(char.tolist()))

jfaaaa.aed..b.eb.bbbbbbbbbb
heeeccccdc..aaa.a.aaaaaaaaa
eb...b.babaacaccccccccccccc
ffaaadaa.d..e.edeeeeeeeeeee
hccccc.cbc..b.dd.d..dd...d.
iaaaabaaae....fe.efffffffff
fdbbbdabad....e............
g....b..ac..daddddddddddddd
hddddd.acd..b.eb.bbbeee..ee
f....b..abaacaccacaaaaaaaaa
daaaaaaacb.................
iaaaaa.acd..b.ebbbbbbbbbbbb
hbbbbbbbb.aaaaa.a.aaaaaaaaa
gaaaca.aba..b.bbbbbbbbbbbbb
haaaacaaac..d.dd.dddddddddd
faaaaaaaca..b.bb.bbbbbbbbbb
f.......bc.aaadaaaaaaaaaaaa
heaeecacbc....d............
fc..cc..ca..bbbababbbbbbbbb
gdaaadaacb..c.cb.b..ee...e.
jaaaaaaa.d..b.eb.bbbbbbbbbb
faaaab.a.b..c.ccccccccccccc
hdddddccca..b.ba..bbbbb..bb
ie.eec.cdb..aaaaaaaaaaaaaaa
id...d..cb.acccbcbccccccccc
jaaaaaaadb..c.cbcbccccccccc
laaaad.acd..b.gb.bbbbbggggg
ecccccccc...aaaaaaaaaaaaaaa
f....d..cd.aaaedaaeeeeeeeee
fb...b..abaaccccccccccccccc
fd..db..db..cccbcbccccccccc
ic...c..bd..aaedaaaaaaaaaaa


In [175]:
class LanguageModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Conv1d(1, 27, 3, stride=1, bias=False)
        self.proj = nn.Linear(14, 128)
        self.linear = nn.Linear(128, 16)

    def forward(self, x):
        x = self.conv(x.unsqueeze(1))
        x = F.relu(self.proj(x))
        return F.log_softmax(self.linear(x), dim=-1)

In [176]:
model = LanguageModel()
epochs = 100
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.NLLLoss()
for epoch in range(epochs):
    train_loss = train(model, train_dataloader, optimizer, criterion)
    val_loss = evaluate(model, val_dataloader, criterion)
    if epoch % 10 == 0:
        print(f'Epoch {epoch + 1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

Epoch 1/100, Train Loss: 2.4606, Val Loss: 2.2402
Epoch 11/100, Train Loss: 1.9171, Val Loss: 1.9157
Epoch 21/100, Train Loss: 1.8335, Val Loss: 1.8341
Epoch 31/100, Train Loss: 1.7631, Val Loss: 1.7669
Epoch 41/100, Train Loss: 1.7252, Val Loss: 1.7309
Epoch 51/100, Train Loss: 1.6989, Val Loss: 1.7039
Epoch 61/100, Train Loss: 1.6850, Val Loss: 1.6950
Epoch 71/100, Train Loss: 1.6730, Val Loss: 1.6794
Epoch 81/100, Train Loss: 1.6636, Val Loss: 1.6711
Epoch 91/100, Train Loss: 1.6571, Val Loss: 1.6674


In [177]:
with torch.no_grad():
    random_vals = encoded[100: 132]
    predictions = model(random_vals)
    predicted_chars = predictions.argmax(dim=-1)

for char in predicted_chars:
    print(tokenizer.decode(char.tolist()))

mffaaaaaaadde.ed..ccccbbbbb
jeeddcdddbbbbabab....aaaaaa
jbb......aaaaaaaaaccccccccc
maabbddb...cc.cccccceeeeeee
mccffffffeebb.dd.dd..d...b.
mabbddhhhghce.eeeeee.f..fff
mdbaaaaccccce.eee....e...e.
m..aaaaaafffc.ceefcccdddddd
mdffffaaaccbb.eeeeee.e...e.
j.bbdddeeeeecacacccccaaaaaa
eaabbccccc.c....aa.........
mahhhdhhcff.g.geheebbbbbbgb
mbccddddg...eaaaefaaaaaaaaa
hcffffffeeedd..b..bbbbbbbbb
maabbbbbbbfcc.eec....d.dddd
jaaaaaaecccc...b.bbbbbbbbbb
j.eeeccbbbbbdadaddaaaaaaaaa
jeeeeccaaaaad.ddddb..d...d.
dcccceeeeeeeaaaaaaaaaabbbbb
mddaaaaffbbbc.eeeee..c...c.
maaaaaffadccc.eddeeeebbbbbb
jaaaad..dbbbb.bcbbccccccccc
jeeeccccccca...ba.bbbbbbbbb
jedddddddcc.bababbaaaaaaaaa
mdhffbbbbaaaaaaaeeaccacccgc
daaeeeeeeee....b.bbbbcccccc
mdaffffcccccg.ee.ebbbebbbgb
jcccccbbbbbb.a.abbaaaaaaaaa
m..dddbbbbbbcacaccaaaaaaaea
j.bbbbbbbaaaaaaaaaaaaaacccc
jdddd..bbbbaacaaaaacccccccc
hc...bbbbbbbbadadeaaaaaaaaa


In [209]:
# Add embedding layer
class LanguageModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding = nn.Embedding(27, 27)
        self.conv = nn.Conv1d(16, 27, 3, stride=1, bias=False)
        self.proj = nn.Linear(25, 128)
        self.output = nn.Linear(128, 16)

    def forward(self, x):
        x = self.embedding(x.long())
        convolution = self.conv(x)
        projection = F.relu(self.proj(convolution))
        return F.log_softmax(self.output(projection), dim=-1)

In [210]:
model = LanguageModel()
epochs = 100
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.NLLLoss()
for epoch in range(epochs):
    train_loss = train(model, train_dataloader, optimizer, criterion)
    val_loss = evaluate(model, val_dataloader, criterion)
    if epoch % 10 == 0:
        print(f'Epoch {epoch}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

Epoch 0/100, Train Loss: 2.1780, Val Loss: 1.9869
Epoch 10/100, Train Loss: 1.7119, Val Loss: 1.7151
Epoch 20/100, Train Loss: 1.6672, Val Loss: 1.6727
Epoch 30/100, Train Loss: 1.6464, Val Loss: 1.6540
Epoch 40/100, Train Loss: 1.6335, Val Loss: 1.6468
Epoch 50/100, Train Loss: 1.6252, Val Loss: 1.6362
Epoch 60/100, Train Loss: 1.6190, Val Loss: 1.6314
Epoch 70/100, Train Loss: 1.6145, Val Loss: 1.6278
Epoch 80/100, Train Loss: 1.6098, Val Loss: 1.6229
Epoch 90/100, Train Loss: 1.6066, Val Loss: 1.6232


In [214]:
decoder = tokenizer.itos
with torch.no_grad():
    random_vals = encoded[:32]
    predictions = model(random_vals)
    predicted_chars = predictions.argmax(dim=-1)

for char in predicted_chars:
    string = ''
    for letter in char.tolist():
        if letter == 0:
            break
        string += decoder[letter]
    print(string)

dcabb
fec
fbabcbabb
hbcaedc
fecbbccbcdcebceab
ibc
fb
f
fabbcdcb
f
jdacceacgbccfafb
f
jdecg
mc
mcaba
f
fdbbbbbb
fec
fcaca
jbcagecgafdadadcbcc
jg
ha
dcbbbabb
fbcc
fbc
hdbfffffgg
fabbcbbccbbc
faccacbccabbbbfdbb
fdc
fcbbbadbd
ed
hcacfbacf


In [235]:
# Change projection to GRU
class LanguageModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding = nn.Embedding(27, 27)
        self.conv = nn.Conv1d(16, 27, 2, stride=1, bias=False)
        self.rnn = nn.GRU(26, 128, batch_first=True)
        self.output = nn.Linear(128, 16)

    def forward(self, x):
        x = self.embedding(x.long())
        convolution = self.conv(x)
        projection, _ = self.rnn(convolution)
        return F.log_softmax(self.output(projection), dim=-1)

In [236]:
model = LanguageModel()
epochs = 100
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.NLLLoss()
for epoch in range(epochs):
    train_loss = train(model, train_dataloader, optimizer, criterion)
    val_loss = evaluate(model, val_dataloader, criterion)
    if epoch % 10 == 0:
        print(f'Epoch {epoch}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

Epoch 0/100, Train Loss: 2.1911, Val Loss: 1.9441
Epoch 10/100, Train Loss: 1.5369, Val Loss: 1.5475
Epoch 20/100, Train Loss: 1.5173, Val Loss: 1.5321
Epoch 30/100, Train Loss: 1.5102, Val Loss: 1.5283
Epoch 40/100, Train Loss: 1.5071, Val Loss: 1.5270
Epoch 50/100, Train Loss: 1.5049, Val Loss: 1.5260
Epoch 60/100, Train Loss: 1.5034, Val Loss: 1.5253
Epoch 70/100, Train Loss: 1.5034, Val Loss: 1.5247
Epoch 80/100, Train Loss: 1.5027, Val Loss: 1.5249
Epoch 90/100, Train Loss: 1.5020, Val Loss: 1.5243


In [237]:
with torch.no_grad():
    random_vals = encoded[:32]
    predictions = model(random_vals)
    predicted_chars = predictions.argmax(dim=-1)

for char in predicted_chars:
    string = ''
    for letter in char.tolist():
        if letter == 0:
            break
        string += decoder[letter]
    print(string)

eca
fea
ebaaaaaabbaaaaabccba
kgcccdaba
gebd
lbg
ebccca
jeabbbadddbacaa
gaccddd
gac
odaaaacccecffaac
eac
odee
hc
eca
e
fdbb
ged
ecdcebabbbadaaaccaaaabbacbf
jb
lgbb
ha
fcc
ebcccd
had
ja
eac
fabcccca
ea
fc
ea
jca


In [287]:
# Remove embedding and add more layers to GRU
class LanguageModel(nn.Module):
    def __init__(self, num_embeddings, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(num_embeddings, hidden_dim)
        self.rnn = nn.GRU(hidden_dim, hidden_dim, num_layers=4, batch_first=True)
        self.output = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x.long())
        rnn_output, _ = self.rnn(x)
        output = self.output(rnn_output)
        N, C, H = output.shape
        output = output.view(N, H, C) # should permute be used here?
        return F.softmax(output, dim=-1)

In [289]:
from tqdm import tqdm
num_embeddings = 27
hidden_dim = 64
output_dim = 27
model = LanguageModel(
    num_embeddings=num_embeddings,
    hidden_dim=hidden_dim,
    output_dim=output_dim
)
epochs = 100
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
for epoch in tqdm(range(epochs)):
    train_loss = train(model, train_dataloader, optimizer, criterion)
    val_loss = evaluate(model, val_dataloader, criterion)
    if epoch % 10 == 0:
        print(f'Epoch {epoch}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

  1%|          | 1/100 [00:14<23:38, 14.32s/it]

Epoch 0/100, Train Loss: 3.2504, Val Loss: 3.2253


 11%|█         | 11/100 [02:38<21:14, 14.32s/it]

Epoch 10/100, Train Loss: 3.1340, Val Loss: 3.1358


 21%|██        | 21/100 [05:01<19:00, 14.44s/it]

Epoch 20/100, Train Loss: 3.1127, Val Loss: 3.1159


 31%|███       | 31/100 [07:25<16:26, 14.30s/it]

Epoch 30/100, Train Loss: 3.1010, Val Loss: 3.1044


 41%|████      | 41/100 [09:47<14:00, 14.25s/it]

Epoch 40/100, Train Loss: 3.0885, Val Loss: 3.0914


 51%|█████     | 51/100 [12:10<11:43, 14.35s/it]

Epoch 50/100, Train Loss: 3.0848, Val Loss: 3.0871


 61%|██████    | 61/100 [14:35<09:22, 14.43s/it]

Epoch 60/100, Train Loss: 3.0790, Val Loss: 3.0821


 71%|███████   | 71/100 [16:59<07:00, 14.52s/it]

Epoch 70/100, Train Loss: 3.0770, Val Loss: 3.0803


 81%|████████  | 81/100 [19:24<04:33, 14.39s/it]

Epoch 80/100, Train Loss: 3.0748, Val Loss: 3.0774


 91%|█████████ | 91/100 [21:48<02:06, 14.05s/it]

Epoch 90/100, Train Loss: 3.0732, Val Loss: 3.0770


100%|██████████| 100/100 [23:54<00:00, 14.35s/it]


In [290]:
with torch.no_grad():
    random_vals = encoded[:32]
    predictions = model(random_vals)
    predicted_chars = predictions.argmax(dim=-1)

for char in predicted_chars:
    string = ''
    for letter in char.tolist():
        if letter == 0:
            break
        string += decoder[letter]
    print(string)

nabfdgabgblalaaaalagaaaaaca
naefcgbffdnafaa
nabddgfbgalflaaaalfhfcabaga
mcddcgffm
labbggaecdnafagabmbgbaabada
mag
nadfcgaffalbo
nabedhaffdnafaaaalagaaaaacb
madccgaf
naeddgaffalfgaedalahfaalado
naaccglcmenffalbamcfaaacaca
nabdcabhfbofjaabalafaaalcdo
naedclcbeblafaabalglgbcahcc
nadfegaifalbo
naedcgaebbnagaaaalagaaaaaca
nabddgfffalaoaaedmchfcalado
labbbhbenclanagablagaaaaada
mab
nadfegabnboagaaaalagaaacaga
lagachcggfodgaaaaochfeacgga
mabbgg
nadbbgaegcmei
nabfchdbfaobmabbalagaaaaacb
malcdhcimcnagaaaalanaaacaga
mag
naaccaagfgmdgebe
nabfcgaeeanagaacalafaababba
nadbegdffanaoaaaal
mabbgaahmanagbaaalafacaabcc
nabbcgaggakagabablbgaaaabga
nabcegaffanagaaaalagaaaabcb
naeddgbgm


In [302]:
# Remove embedding and add more layers to GRU
class LanguageModel(nn.Module):
    def __init__(self, num_embeddings, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(num_embeddings, output_dim)
        # self.rnn = nn.GRU(hidden_dim, hidden_dim, num_layers=4, batch_first=True)
        # self.output = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.embedding(x.long())
        # rnn_output, _ = self.rnn(x)
        # output = self.output(rnn_output)
        N, H, C = x.shape
        x = x.view(N * H, C) # should permute be used here?
        return F.softmax(x, dim=-1)

In [303]:
def train(model, dataloader, optimizer, criterion):
    running_loss = 0.0
    for idx, (X, y) in enumerate(dataloader):
        optimizer.zero_grad()
        y_pred = model(X)
        y = y.view(-1)
        loss = criterion(y_pred, y.long())
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / (idx + 1)

In [304]:
num_embeddings = 27
output_dim = 27
model = LanguageModel(
    num_embeddings=num_embeddings,
    output_dim=output_dim
)
epochs = 100
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
for epoch in tqdm(range(epochs)):
    train_loss = train(model, train_dataloader, optimizer, criterion)
    if epoch % 10 == 0:
        print(f'Epoch {epoch}/{epochs}, Train Loss: {train_loss:.4f}')

  1%|          | 1/100 [00:00<01:33,  1.05it/s]

Epoch 0/100, Train Loss: 3.2637


 11%|█         | 11/100 [00:07<00:55,  1.59it/s]

Epoch 10/100, Train Loss: 2.4841


 21%|██        | 21/100 [00:14<00:56,  1.40it/s]

Epoch 20/100, Train Loss: 2.3821


 31%|███       | 31/100 [00:20<00:42,  1.61it/s]

Epoch 30/100, Train Loss: 2.3653


 41%|████      | 41/100 [00:27<00:43,  1.36it/s]

Epoch 40/100, Train Loss: 2.3603


 51%|█████     | 51/100 [00:33<00:29,  1.66it/s]

Epoch 50/100, Train Loss: 2.3586


 61%|██████    | 61/100 [00:39<00:27,  1.43it/s]

Epoch 60/100, Train Loss: 2.3579


 71%|███████   | 71/100 [00:46<00:17,  1.66it/s]

Epoch 70/100, Train Loss: 2.3577


 81%|████████  | 81/100 [00:52<00:13,  1.43it/s]

Epoch 80/100, Train Loss: 2.3576


 91%|█████████ | 91/100 [00:59<00:05,  1.65it/s]

Epoch 90/100, Train Loss: 2.3576


100%|██████████| 100/100 [01:04<00:00,  1.55it/s]


In [313]:
vals = torch.randperm(encoded.shape[0])

with torch.no_grad():
    random_vals = encoded[vals[:32]]
    predictions = model(random_vals)
    predicted_chars = predictions.argmax(dim=-1)

for char in predicted_chars.view(32, 16):
    string = ''
    for letter in char.tolist():
        if letter == 0:
            break
        string += decoder[letter]
    print(string)

hassan
columbus
naiah
kodi
legacii
cesia
christen
brycyn
arraya
kamorah
piercen
maize
adaliz
nysha
corrin
keigan
honest
hadly
nery
denny
eilish
aslynn
afif
lennen
kelsey
stran
gabryelle
koda
kenrick
saban
philomina
anzleigh
