In [1]:
# This notebook loads the final trained model and gets it to translate something.
#

In [4]:
!pwd

/Users/sr_old/Desktop/attention_seq2seq


In [5]:
import sys
print(sys.executable)
print(sys.version)

/Users/sr_old/Desktop/attention_seq2seq/p3.10_attention_seq2seq/bin/python
3.10.13 (main, Aug 24 2023, 22:36:46) [Clang 14.0.3 (clang-1403.0.22.14.1)]


## Imports

In [6]:
import math
import re

In [7]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
#from torchviz import make_dot
from torch.utils.data import Dataset, DataLoader #, TensorDataset
from torch.utils.data.dataset import random_split
import datetime

In [8]:
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence

In [9]:
from torch.distributions.categorical import Categorical

In [10]:
import matplotlib.pyplot as plt
%matplotlib inline    
from matplotlib.pyplot import rcParams
plt.style.use('ggplot')
rcParams['figure.figsize'] = 5,5

In [11]:
import itertools
import random

In [12]:
%load_ext autoreload
%autoreload 2

In [13]:
import pickle

In [14]:
from dataprep_functions import LanguageDataset

In [15]:
from model_functions import *

## Device and seed

In [16]:
device = 'cuda' if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else 'cpu'

device

'mps'

In [17]:
seed = 42

In [18]:
torch.manual_seed(seed)
#rng = np.random.default_rng(seed)
random.seed(seed)

## Variables

In [19]:
# Name of the folder where data is accessed and saved
path = "datasets"

In [20]:
results_path = "results"

In [21]:
PAD_token = 0
SOS_token = 1
EOS_token = 2

## Loding the data we need

In [22]:
# Unpickling
with open(path + "/input_lang.txt", "rb") as y:
    input_lang = pickle.load(y)
with open(path + "/output_lang.txt", "rb") as y:
    output_lang = pickle.load(y)

In [23]:
train_dataset = torch.load(path + '/train.pt', weights_only=False)
val_dataset = torch.load(path + '/val.pt', weights_only=False)

## Now load the final model and investigate

In [24]:
# Set the hyperparameters right

In [25]:
m = Luong_full(vocab = input_lang.n_words, h_size = 90, dropout = 0.2, n_layers = 2, 
                    att_method = 'general', vocab_out = output_lang.n_words, device = device)

In [26]:
os.system("ls " + results_path + "/*");

results/5e-03_0.05_32_{'h_size':90,'dropout':0.2,'n_layers':2,'att_method':'general','c':'final_model'}.png
results/5e-03_1_32_{'h_size':30,'dropout':0,'n_layers':2,'att_method':'dot','c':'first'}.png

results/atlases:
att_method-dropout.png
hsize-layers.png
lr-ratio.png
lr-ratio_old.png

results/models:
5e-03_0.05_32_{'h_size':90,'dropout':0.2,'n_layers':2,'att_method':'general','c':'final_model'}.pt

results/ratio5,ss10:
5e-03_5_32_{'c':'','dropout':0,'h_size':90,'n_layers':3,'att_method':'dot'}.png
5e-03_5_32_{'c':'','dropout':0.1,'h_size':60,'n_layers':2,'att_method':'general'}.png
5e-03_5_32_{'c':'','dropout':0.3,'h_size':60,'n_layers':2,'att_method':'concat'}.png
5e-03_5_32_{'h_size':30,'dropout':0,'n_layers':2,'att_method':'dot','c':''}.png


In [27]:
# Pick the right model:
name = "5e-03_0.05_32_{'h_size':90,'dropout':0.2,'n_layers':2,'att_method':'general','c':'final_model'}.pt"

In [28]:
m.load_state_dict(torch.load(results_path + '/models/' + name, map_location=torch.device('cpu')))
m.to(device)

Luong_full(
  (encoder): EncoderRNN(
    (embedding): Embedding(655, 90)
    (gru): GRU(90, 45, num_layers=2, dropout=0.2, bidirectional=True)
  )
  (decoder): LuongDecoder(
    (embedding): Embedding(705, 90)
    (gru): GRU(90, 90, num_layers=2, dropout=0.2)
    (joinerFF): Linear(in_features=180, out_features=90, bias=True)
    (projFF): Linear(in_features=90, out_features=705, bias=True)
    (dropout_layer): Dropout(p=0.2, inplace=False)
    (alignment_vector): Attn(
      (mFF): Linear(in_features=90, out_features=90, bias=False)
    )
  )
)

In [29]:
# Now check that we can replicate the loss obtained.
# Set the epoch parameter to a high number, to make sure there is no teacher forcing.

val_loss = epochend_lcalc(m, lossmaker1, batcher(val_dataset, 800), device, epoch = 28)

In [30]:
val_loss

np.float64(1.277029554049174)

In [None]:
# Close enough.

## Now give it something to translate

In [50]:
val_dataset[1:5]

((tensor([[ 60, 387,   8, 193],
          [ 61, 211,  61,   3],
          [314,  61,  39,  59],
          [285, 511, 557,   6],
          [  6,   6,   9,   2],
          [  2,   2,   2,   0]]),
  [6, 6, 6, 5]),
 (tensor([[ 22, 456,   7, 606],
          [ 80, 539, 209, 209],
          [495, 264, 370,  37],
          [245,   4, 558,   4],
          [  4,   2,   8,   2],
          [  2,   0,   2,   0]]),
  [6, 5, 6, 5]))

In [140]:
def translate(i, dataset, beamsize, model, input_index2word_dictn, output_index2word_dictn, device = device):
    # This calls the .beam_decode() method and we print the results in an orderly fashion.
    # i is the sentence index in the dataset

    inp = dataset[i][0][0]

    print("INPUT:")
    print(dec(inp, input_index2word_dictn))
    print("MODEL ANSWER:")
    print(dec(dataset[i][1][0], output_index2word_dictn))
    print("")
    
    beam = model.beam_decode(beamsize, inp.to(device), max_dec_length = 20)

    # Now reorder and print the info from the beam
    seqs = []
    probs = []
    for i in beam:
        seq, prob, _ = i
        seqs.append(seq)
        probs.append(prob)

    for k in zip(seqs, probs):
            print(dec(torch.tensor(k[0]).unsqueeze(1), output_index2word_dictn), "     ", round(k[1][0], 2))


In [141]:
translate(10, val_dataset, 5, m, input_lang.index2word, output_lang.index2word)

INPUT:
['tu', 'es', 'important', '.', 'EOS']
MODEL ANSWER:
['you', 'are', 'important', '.', 'EOS']

['you', 're', 'important', '.', 'EOS']       -0.38
['you', 're', 'important', 'important', 'EOS']       -1.98
['you', 're', 'important', 'EOS']       -3.35
['you', 'are', 'important', '.', 'EOS']       -3.52
['you', 're', 'important', '.', '.', 'EOS']       -3.97


In [133]:
translate(11, val_dataset, 5, m, input_lang.index2word, output_lang.index2word)

INPUT:
['garde', 'le', '.', 'EOS']
MODEL ANSWER:
['keep', 'that', '.', 'EOS']

['keep', 'it', '.', 'EOS']       -1.14
['keep', 'it', 'EOS']       -2.48
['keep', 'it', '.', '.', 'EOS']       -2.84
['keep', 'the', '.', 'EOS']       -3.09
['keep', 'keep', '.', 'EOS']       -3.26


In [125]:
translate(12, val_dataset, 5, m, input_lang.index2word, output_lang.index2word)

INPUT:
['c', 'est', 'pour', 'toi', '.', 'EOS']
MODEL ANSWER:
['that', 's', 'for', 'you', '.', 'EOS']

['it', 's', 'for', '.', '.', 'EOS']       -2.43
['that', 's', 'for', '.', '.', 'EOS']       -2.49
['it', 'is', 'for', '.', '.', 'EOS']       -2.8
['that', 's', 'for', '.', 'EOS']       -2.85
['this', 's', 'for', '.', '.', 'EOS']       -2.99


In [127]:
translate(221, val_dataset, 5, m, input_lang.index2word, output_lang.index2word)

INPUT:
['soyez', 'honnete', 'avec', 'moi', '.', 'EOS']
MODEL ANSWER:
['be', 'honest', 'with', 'me', '.', 'EOS']

['that', 'with', 'me', 'me', 'EOS']       -3.73
['that', 'with', 'me', '.', 'EOS']       -3.77
['this', 'this', '.', '.', 'EOS']       -3.89
['this', 'with', 'me', 'me', 'EOS']       -3.98
['this', 'this', 'me', '.', 'EOS']       -4.14


In [136]:
translate(50, val_dataset, 25, m, input_lang.index2word, output_lang.index2word)

INPUT:
['je', 'devrais', 'etre', 'heureuse', '.', 'EOS']
MODEL ANSWER:
['i', 'should', 'be', 'happy', '.', 'EOS']

['i', 'll', 'be', '.', 'EOS']       -1.82
['i', 'should', 'be', '.', 'EOS']       -2.26
['i', 'll', 'be', '.', '.', 'EOS']       -2.45
['i', 'll', 'be', 'EOS']       -2.8
['i', 'should', 'be', '.', '.', 'EOS']       -2.87
['i', 'll', 'be', 'happy', '.', 'EOS']       -2.96
['i', 'll', 'be', 'there', 'EOS']       -3.35
['i', 'should', 'be', 'EOS']       -3.36
['i', 'll', 'be', 'happy', 'EOS']       -3.9
['i', 'll', 'be', 'you', '.', 'EOS']       -3.94
['i', 'll', 'be', 'there', '.', 'EOS']       -4.16
['i', 'must', 'be', '.', 'EOS']       -4.2
['i', 'should', 'be', 'happy', 'EOS']       -4.21
['i', 'should', 'be', 'happy', '.', 'EOS']       -4.21
['i', 'should', 'be', 'there', 'EOS']       -4.26
['i', 'will', 'be', '.', 'EOS']       -4.39
['i', 'll', 'be', 'you', 'EOS']       -4.44
['i', 'll', 'be', 'more', '.', 'EOS']       -4.55
['i', 'can', 'be', '.', 'EOS']       -4.85
[

tensor([[ 60],
        [ 61],
        [314],
        [285],
        [  6],
        [  2]])

In [55]:
m.beam_decode(5, val_dataset[1][0][0], 7)

[[[207, 80, 495, 245, 4, 2],
  [-1.568790921708569],
  tensor([[[ 0.1371, -0.1157, -0.9999,  0.9952, -1.0000,  0.4472,  0.0020,
             1.0000,  0.4373,  0.8891, -0.9999,  0.5863, -1.0000, -0.1802,
            -0.0463,  0.6495, -0.1566,  0.3696, -0.9998, -0.5146, -0.9891,
             0.1143,  0.5948,  1.0000, -0.1551, -1.0000, -0.4574, -0.9894,
             1.0000, -0.9925, -1.0000, -1.0000, -0.5168, -0.9899, -1.0000,
            -0.3049, -0.2378,  0.3452,  0.9927,  0.9969, -0.0854,  0.0173,
            -1.0000,  0.4010, -0.3076, -0.3033, -0.7821, -0.3797,  0.9961,
             1.0000, -0.1601, -1.0000, -0.4153,  0.2286, -0.2383,  0.8657,
             0.4093, -1.0000,  0.9996, -0.0337,  0.3237,  0.9978,  0.2329,
             0.9996, -0.8975, -0.2164, -0.0290,  1.0000, -0.0358,  1.0000,
             0.3448,  0.1524,  0.9986,  1.0000,  0.9472,  0.9143,  0.5544,
             0.2618,  1.0000,  0.3086,  1.0000, -0.4097,  0.9500, -1.0000,
            -1.0000, -0.9999, -0.9665,  1.0000,

In [65]:
val_dataset[10][0][0].flatten()

tensor([154, 331, 376,   6,   2])

In [71]:
dec(val_dataset[10][0][0], input_lang.index2word)

['tu', 'es', 'important', '.', 'EOS']

In [138]:
def dec(inpt, index2word_dictn):
    # This function takes a single tokenised sentence and translates the tokens, based on the dictionary provided.

    inpt = inpt.numpy()
    assert inpt.shape[1] == 1, "More than one sequence input"

    return [index2word_dictn[k] for k in inpt.flatten()]

In [94]:
dec(val_dataset[13][1][0], output_lang.index2word)

['tom', 'seems', 'satisfied', '.', 'EOS']

In [84]:
val_dataset[10][1][0].numpy().shape[1]

1

In [117]:
val_loss

1.8200318813323975

In [None]:
# still very close

In [104]:
val_dataset[3]

((tensor([[261],
          [501],
          [177],
          [178],
          [103],
          [720],
          [ 12],
          [  2]]), [8]), (tensor([[246],
          [113],
          [295],
          [227],
          [437],
          [  9],
          [  2]]), [7]))

In [118]:
beam = model_for_eval.beam_decode(3, val_dataset[3][0][0].to(device), 9)

In [122]:
seqs = []
probs = []

for i in  beam:
  seq, prob, _ = i
  seqs.append(seq)
  probs.append(prob)

In [123]:
seqs, probs

([[246, 113, 295, 132, 437, 9, 2],
  [246, 113, 295, 227, 437, 437, 2],
  [246, 113, 295, 227, 437, 9, 2]],
 [[-3.030233731493354], [-3.3888775166124105], [-3.5591217633336782]])

In [187]:
log(0.2)

[autoreload of sr2_functions_new failed: Traceback (most recent call last):
  File "/Users/sr_old/Desktop/sr2/p3sr2/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/Users/sr_old/Desktop/sr2/p3sr2/lib/python3.6/site-packages/IPython/extensions/autoreload.py", line 394, in superreload
    module = reload(module)
  File "/usr/local/opt/python@3.6/Frameworks/Python.framework/Versions/3.6/lib/python3.6/imp.py", line 315, in reload
    return importlib.reload(module)
  File "/usr/local/opt/python@3.6/Frameworks/Python.framework/Versions/3.6/lib/python3.6/importlib/__init__.py", line 166, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 618, in _exec
  File "<frozen importlib._bootstrap_external>", line 678, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/Users/sr_old/Desktop/sr2/sr2_functions_new.py", line 56, i

-1.60943791243410

In [190]:
log(0.0002)

-8.51719319141624