In [34]:
import torch

In [35]:
# in a trigram, we look at the last two characters to predict the next
# basically same code as the bigram notebook
passwords = open('rockyou.txt', 'r',encoding='latin1').read().splitlines()

In [36]:
allowed = set("abcdefghijklmnopqrstuvwxyz0123456789") # for simplification
filtered_passwords = []
for p in passwords:
    if all(char in allowed for char in p):
        filtered_passwords.append(p)

In [37]:
# for simplicity in this case we are going to use . as the special start/end indicator character
chars =  ["."] + sorted(list(set(''.join(filtered_passwords))))
stoi = {s:i for i,s in enumerate(chars)}
itos = {i: ch for ch, i in stoi.items()}
stoi

{'.': 0,
 '0': 1,
 '1': 2,
 '2': 3,
 '3': 4,
 '4': 5,
 '5': 6,
 '6': 7,
 '7': 8,
 '8': 9,
 '9': 10,
 'a': 11,
 'b': 12,
 'c': 13,
 'd': 14,
 'e': 15,
 'f': 16,
 'g': 17,
 'h': 18,
 'i': 19,
 'j': 20,
 'k': 21,
 'l': 22,
 'm': 23,
 'n': 24,
 'o': 25,
 'p': 26,
 'q': 27,
 'r': 28,
 's': 29,
 't': 30,
 'u': 31,
 'v': 32,
 'w': 33,
 'x': 34,
 'y': 35,
 'z': 36}

In [38]:
biwords = set()
for i in range(len(chars)):
    for j in range(len(chars)):
        biwords.add(chars[i] + chars[j])

In [39]:
itos2 = {i:l for i,l in enumerate(biwords)}
stoi2 = {l:i for i,l in enumerate(biwords)}
stoi2.items()

dict_items([('xp', 0), ('x9', 1), ('iy', 2), ('14', 3), ('jy', 4), ('pu', 5), ('hw', 6), ('zt', 7), ('m1', 8), ('0t', 9), ('p6', 10), ('8z', 11), ('4x', 12), ('to', 13), ('ww', 14), ('ja', 15), ('pp', 16), ('nj', 17), ('si', 18), ('wn', 19), ('iu', 20), ('2e', 21), ('25', 22), ('1g', 23), ('i7', 24), ('rg', 25), ('il', 26), ('3o', 27), ('64', 28), ('xn', 29), ('3s', 30), ('de', 31), ('99', 32), ('5l', 33), ('4n', 34), ('.a', 35), ('fv', 36), ('eb', 37), ('vu', 38), ('z7', 39), ('rn', 40), ('kk', 41), ('f9', 42), ('bt', 43), ('hn', 44), ('wv', 45), ('32', 46), ('kw', 47), ('t7', 48), ('0s', 49), ('wp', 50), ('cr', 51), ('hv', 52), ('6f', 53), ('nv', 54), ('8u', 55), ('ar', 56), ('r4', 57), ('n3', 58), ('4y', 59), ('ai', 60), ('6k', 61), ('c9', 62), ('lt', 63), ('s.', 64), ('by', 65), ('vj', 66), ('3.', 67), ('e2', 68), ('ma', 69), ('6q', 70), ('wt', 71), ('k.', 72), ('yd', 73), ('d1', 74), ('xz', 75), ('v3', 76), ('75', 77), ('2s', 78), ('l7', 79), ('9n', 80), ('qm', 81), ('r2', 82), ('

In [107]:
# training data setup
import torch.nn.functional as F

X_train, y_train = [], []
for p in filtered_passwords[:20000]:
    chs = ['.'] + list(p) + ['.'] # Special start/end token
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1, ix2, ix3 = stoi[ch1], stoi[ch2], stoi[ch3]
        idx = stoi2[ch1 + ch2]
        x1enc = F.one_hot(torch.tensor([idx]), num_classes=len(stoi2.items())).float()
        X_train.append(x1enc)
        y_train.append(ix3)

In [108]:
X_train, y_train = torch.cat(X_train).float(), torch.tensor(y_train)
num_pairs = len(itos2.items())
W = torch.randn((num_pairs, 37), requires_grad=True) # random initialization of weights
lr = 50   # learning rate
iterations = 1000
lmbd = 0.0001 # Lambda for regularization

In [109]:
# same training process as bigram model
for step in range(iterations):
    logits = X_train @ W
    counts = logits.exp()
    probs = counts / counts.sum(dim=1, keepdim=True) # last two lines are softmax function
    loss = -1  * probs[torch.arange(X_train.shape[0]), y_train].log().mean() + lmbd*(W ** 2).mean() # neg logistic loss

    # backward pass
    W.grad = None
    loss.backward()
    
    W.data += -lr * W.grad
    
    if step % 50 == 0:
        print(loss.item())

4.089900016784668
3.087550640106201
2.7891502380371094
2.643543243408203
2.555934190750122
2.497208833694458
2.454995632171631
2.4230430126190186
2.3978958129882812
2.3775100708007812
2.360600233078003
2.346311569213867
2.3340542316436768
2.3234055042266846
2.3140571117401123
2.305776596069336
2.298384428024292
2.2917416095733643
2.285736560821533
2.2802791595458984


In [110]:
def generate_name():
    index = 0
    prev = 0
    name = ""
    while True:
        xenc = F.one_hot(torch.tensor([stoi2[itos[prev] + itos[index]]]), num_classes=num_pairs).float()
        
        logits = xenc @ W 
        logits = logits.exp()
        p = logits / logits.sum(dim=1, keepdim= True)
        idx = torch.multinomial(p, num_samples=1, replacement=True).item()

        if idx == 0:
            break

        prev = index
        index = idx
        name += itos[idx]
        
    print(name)

In [115]:
for _ in range(20):
    generate_name()

rachaemelizza
ell
suook
5c3
lincelm7
erd
qwee
uff7mlovermiloveyse13
luv5igkdb52qtgjquica
0390
karaeluv
3ojang
88
latpickhwlie
kike
ki123
333vwgxh4woocwh3gosaber
darollipv5vq6fms9f0hdtd
qwexy
28956


In [100]:
# neural network using pytorch
import torch.nn as nn

input_size = len(stoi2.items())
hidden_size = 128
output_size = len(stoi)

class TrigramModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(TrigramModel, self).__init__()
        self.fc1 = nn.Linear(input_size,hidden_size)
        self.fc2 = nn.Linear(hidden_size,output_size)
    def forward(self,x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)  

In [101]:
model = TrigramModel(input_size, hidden_size, output_size)

In [102]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [103]:
# using a dataloader to simplify batch processing
from torch.utils.data import Dataset, DataLoader

class TrigramDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)  
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]  


In [104]:
dataset = TrigramDataset(X_train, y_train)

data_loader = DataLoader(dataset, batch_size=32, shuffle=True)


In [105]:
num_epochs = 100

for epoch in range(num_epochs):
    for X_batch,y_batch in data_loader:
        optimizer.zero_grad()
        output = model(X_batch)
        loss = loss_fn(output,y_batch)
        loss.backward()
        optimizer.step()
        if epoch % 10 == 0:
            print(loss.item())

3.6082894802093506
3.6145572662353516
3.598367929458618
3.609872817993164
3.6059794425964355
3.6019275188446045
3.5943944454193115
3.5922374725341797
3.586296319961548
3.6015710830688477
3.5890705585479736
3.588423252105713
3.581313371658325
3.584944248199463
3.5984444618225098
3.6014630794525146
3.5783867835998535
3.5950522422790527
3.58292555809021
3.5798141956329346
3.6180217266082764
3.5605125427246094
3.55818247795105
3.5767295360565186
3.5850350856781006
3.5528926849365234
3.5733039379119873
3.5499465465545654
3.5448522567749023
3.5901596546173096
3.563568353652954
3.5366415977478027
3.52022385597229
3.549560308456421
3.5384981632232666
3.5447537899017334
3.514533519744873
3.57094669342041
3.5333073139190674
3.551607847213745
3.516056776046753
3.5096755027770996
3.521239757537842
3.53120493888855
3.5624988079071045
3.503246545791626
3.497770071029663
3.4838101863861084
3.4676246643066406
3.475050449371338
3.496095895767212
3.464284658432007
3.489280939102173
3.4604556560516357
3.

RuntimeError: [enforce fail at alloc_cpu.cpp:114] data. DefaultCPUAllocator: not enough memory: you tried to allocate 700928 bytes.

In [116]:
def generate_name_torch_nn():
    index = 0
    prev = 0
    name = "."
    while True:
        xenc = F.one_hot(torch.tensor([stoi2[itos[prev] + itos[index]]]), num_classes=num_pairs).float()
        
        logits = model(xenc) 
        probs = torch.exp(logits)
        idx = torch.multinomial(probs, num_samples=1).item()

        if idx == 0:
            break

        prev = index
        index = idx
        name += itos[idx]
        
    print(name[1:])

In [118]:
for i in range(10):
    generate_name_torch_nn()

1u81
ey8
10142021
vatch
q1q2wharl
69
ese
moveflalkey
anglind1
quat
