Description:
```
I'm trying to create a bot that completes words, for eg "go_ogle" to "google"
For this I used two trigram models, one that's trained normally and other that is trained using reverse words
word_list_30000.txt contains 30k regularly used words (generated from chatGPT)
So when the input is given to the bot, the two letters beofore the "_" get inputed to the first model,
gives me probabilities of what the letter could be, the two letters after the "-" get inputed into the second model (in reverse) and
gives me probabilities.
Now we take the product of the probabilities from both the models and output the letter that has the maximum product.

After completion: It's working pretty well :)
```

In [None]:
import torch
import torch.nn.functional as F
words = open('word_list_30000.txt', 'r').read().splitlines()
len(words)

30000

In [None]:
train_set = words[:29550]
dev_set = words[29550:]

##Traning and developing Model -1

In [None]:
# creating a dictionary to map the characters(a-z and .) to integers
# ch ----> i
c = []
for w in train_set:
  chs = ['.'] + list(w) + ['.']
  for ch in chs:
    c.append(ch)
c = sorted(list(set(c)))
chtoi = {ch:i for i, ch in enumerate(c)}

In [None]:
# creating a dictionary to map all the possibles tuples of characters to integers
# (ch1, ch2) ----> i
b =[]
for ch in c:
  for char in c:
    b.append((ch, char))
bi = sorted(list(set(b)))
bitoi = {(ch1,ch2):i for i, (ch1,ch2) in enumerate(bi)}

In [None]:
# creating reverese maps of the above two dictionaries
# i ---> ch, i ---> (ch1,ch2)
itobi = {i:(ch1,ch2) for (ch1,ch2), i in bitoi.items()}
itoch = {i:ch for ch, i in chtoi.items()}

In [None]:
# creating the training set for trigrams (xs, ys)
xs, ys = [], []
for w in train_set:
  chs = ['.'] + list(w) + ['.']
  for (ch1,ch2), ch3 in zip(zip(chs, chs[1:]), chs[2:]):
    xs.append(bitoi[(ch1, ch2)])
    ys.append(chtoi[ch3])
xs = torch.tensor(xs)
ys = torch.tensor(ys)

In [None]:
xs

tensor([ 12, 327, 405,  ..., 327, 405, 401])

In [None]:
ys

tensor([15, 15, 11,  ..., 15, 11,  0])

In [None]:
# randomly initialize 27 neurons' weights. each neuron receives 729 inputs
# 729 as there are 729 possible inputs((ch1,ch2)) (27*27 = 729)
g = torch.Generator().manual_seed(2147483652)
W = torch.randn((729, 27), generator= g, requires_grad= True)

In [None]:
logits = torch.zeros((len(xs), 27))


In [None]:
for k in range(150):
  logits = W[xs]
  loss = F.cross_entropy(logits, ys) + 0.01*(W**2).mean()
  print(loss.item())

  W.grad = None
  loss.backward()

  W.data += -200*W.grad

3.7900519371032715
3.316551923751831
2.9398341178894043
2.65067195892334
2.435704469680786
2.2703046798706055
2.138056755065918
2.029412031173706
1.9384750127792358
1.8613792657852173
1.795352578163147
1.7382922172546387
1.6885685920715332
1.6449071168899536
1.6063085794448853
1.571986436843872
1.5413185358047485
1.513808250427246
1.4890527725219727
1.4667223691940308
1.4465410709381104
1.4282746315002441
1.4117205142974854
1.3967022895812988
1.3830626010894775
1.3706623315811157
1.3593755960464478
1.3490900993347168
1.339703917503357
1.3311256170272827
1.323272705078125
1.3160713911056519
1.3094550371170044
1.3033643960952759
1.2977468967437744
1.2925550937652588
1.287746548652649
1.2832849025726318
1.279135823249817
1.2752705812454224
1.2716628313064575
1.2682889699935913
1.2651283740997314
1.262162685394287
1.2593748569488525
1.2567508220672607
1.2542763948440552
1.2519400119781494
1.249731183052063
1.247639775276184
1.245657205581665
1.2437756061553955
1.2419873476028442
1.24028646

In [None]:
loss.item()

1.1939969062805176

In [None]:
xs2, ys2 = [], []
for w in dev_set:
  chs = ['.'] + list(w) + ['.']
  for (ch1,ch2), ch3 in zip(zip(chs, chs[1:]), chs[2:]):
    xs2.append(bitoi[(ch1, ch2)])
    ys2.append(chtoi[ch3])
xs2 = torch.tensor(xs2)
ys2 = torch.tensor(ys2)

In [None]:
logits2 = W[xs2]
loss2 = F.cross_entropy(logits2, ys2) + 0.01*(W**2).mean()
print(loss2.item())

1.1773805618286133


##Training and developing Model - 2

In [None]:
rev_words = [word[::-1] for word in words]
rev_words[:8]

['kool', 'drib', 'dnal', 'emoh', 'miws', 'daer', 'hgih', 'retupmoc']

In [None]:
rev_train_set = rev_words[:29550]
rev_dev_set = rev_words[29550:]

In [None]:
rev_xs, rev_ys = [], []
for w in rev_train_set:
  chs = ['.'] + list(w) + ['.']
  for (ch1,ch2), ch3 in zip(zip(chs, chs[1:]), chs[2:]):
    rev_xs.append(bitoi[(ch1, ch2)])
    rev_ys.append(chtoi[ch3])
rev_xs = torch.tensor(rev_xs)
rev_ys = torch.tensor(rev_ys)


In [None]:
rev_xs

tensor([ 11, 301, 405,  ..., 301, 405, 402])

In [None]:
rev_ys

tensor([15, 15, 12,  ..., 15, 12,  0])

In [None]:
g = torch.Generator().manual_seed(2147483652)
rev_W = torch.randn((729, 27), generator= g, requires_grad= True)

In [None]:
rev_logits = torch.zeros((len(xs), 27))

In [None]:
for k in range(150):
  rev_logits = rev_W[rev_xs]
  rev_loss = F.cross_entropy(rev_logits, rev_ys) + 0.01*(rev_W**2).mean()
  print(rev_loss.item())

  rev_W.grad = None
  rev_loss.backward()

  rev_W.data += -200*rev_W.grad

3.7510268688201904
3.2818031311035156
2.9256207942962646
2.6588029861450195
2.4543023109436035
2.293783664703369
2.1652472019195557
2.0603713989257812
1.9733041524887085
1.8998838663101196
1.8371291160583496
1.7828809022903442
1.7355492115020752
1.693935751914978
1.6571165323257446
1.6243647336959839
1.5951019525527954
1.568861722946167
1.5452649593353271
1.5239981412887573
1.504798412322998
1.4874407052993774
1.4717295169830322
1.4574931859970093
1.4445785284042358
1.4328480958938599
1.422179937362671
1.4124633073806763
1.4035992622375488
1.3954988718032837
1.388082504272461
1.381279468536377
1.3750263452529907
1.369267225265503
1.3639520406723022
1.3590360879898071
1.3544806241989136
1.350250482559204
1.3463144302368164
1.3426454067230225
1.3392186164855957
1.3360124826431274
1.3330074548721313
1.330186367034912
1.3275331258773804
1.325034737586975
1.3226776123046875
1.3204511404037476
1.318345069885254
1.3163504600524902
1.3144588470458984
1.3126628398895264
1.3109554052352905
1.309

In [None]:
rev_loss.item()

1.2648277282714844

In [None]:
xs3, ys3 = [], []
for w in rev_dev_set:
  chs = ['.'] + list(w) + ['.']
  for (ch1,ch2), ch3 in zip(zip(chs, chs[1:]), chs[2:]):
    xs3.append(bitoi[(ch1, ch2)])
    ys3.append(chtoi[ch3])
xs3 = torch.tensor(xs3)
ys3= torch.tensor(ys3)

In [None]:
logits3 = rev_W[xs3]
loss3 = F.cross_entropy(logits3, ys3) + 0.01*(rev_W**2).mean()
print(loss3.item())

1.2503236532211304


##The bot

In [None]:
inputs = ["st_ne", "j_ke", "_ust", "go_gle", "c_ld", "dre_s", "ha_d", "diff_culty", "mo_e", "po_er", "laug_ter", "ph_ne", "sp_ing"]

In [None]:
outputs = []
for input in inputs:
  input = ['.', '.'] + list(input) + ['.', '.']
  index = input.index("_")

  x = bitoi[(input[index-2], input[index-1])]
  bot_logits = W[x]
  bot_counts = bot_logits.exp()
  bot_probs = bot_counts / bot_counts.sum(0, keepdim=True)

  rev_x = bitoi[(input[index+2], input[index+1])]
  bot_rev_logits = rev_W[rev_x]
  bot_rev_counts = bot_rev_logits.exp()
  bot_rev_probs = bot_rev_counts/ bot_rev_counts.sum(0, keepdim=True)

  ultimate_probs = bot_probs*bot_rev_probs
  ultimate_probs[0] = 0
  ans = itoch[ultimate_probs.argmax().item()]
  output = input[2:index] + [ans] + input[index+1:-2]
  outputs.append("".join(output))

print(outputs)

['stone', 'jake', 'must', 'google', 'cold', 'dress', 'hand', 'difficulty', 'move', 'pocer', 'laughter', 'phone', 'spaing']
