## Import libraries

In [1]:
from tensorflow.keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from tensorflow.keras.layers import RepeatVector, Dense, Activation, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model, Model
import tensorflow.keras.backend as K
import tensorflow as tf

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from scipy.spatial.distance import hamming

import random
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

from string_to_int import string_to_int

2024-11-25 22:47:00.822571: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-25 22:47:00.831403: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732585620.841860  160390 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732585620.844835  160390 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-25 22:47:00.855916: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
def softmax(x, axis=1):
    """Softmax activation function.
    # Arguments
        x : Tensor.
        axis: Integer, axis along which the softmax normalization is applied.
    # Returns
        Tensor, output of softmax transformation.
    # Raises
        ValueError: In case `dim(x) == 1`.
    """
    ndim = K.ndim(x)
    if ndim == 2:
        return K.softmax(x)
    elif ndim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
        s = K.sum(e, axis=axis, keepdims=True)
        return e / s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D')

## Load data

In [3]:
machine_vocab = pd.read_pickle('machine_vocab.pkl')
human_vocab = pd.read_pickle('human_vocab.pkl')
inv_machine_vocab = pd.read_pickle('inv_machine_vocab.pkl')
dataset = pd.read_pickle('dataset.pkl')

In [4]:
inv_human_vocab = {v: k for k, v in human_vocab.items()}

## string to categorical

In [5]:
def preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty):   
    X, Y = zip(*dataset)
    
    X = np.array([string_to_int(i, Tx, human_vocab) for i in X])
    Y = [string_to_int(t, Ty, machine_vocab) for t in Y]
    
    Xoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), X)))
    Yoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(machine_vocab)), Y)))

    return X, np.array(Y), Xoh, Yoh

In [6]:
Tx = 30
Ty = 10
X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty)

In [7]:
Xoh_train, Xoh_test, Yoh_train, Yoh_test = train_test_split(Xoh, Yoh, test_size=0.05)
train_len = len(Xoh_train)

In [8]:
repeater = RepeatVector(Tx)
concatenate = Concatenate(axis=-1)
denserTanh = Dense(10, activation = "tanh")
denserRelu = Dense(1, activation = "relu")
activation = Activation(softmax, name='attention_weights')
dot = Dot(axes = 1)

In [9]:
def one_step_attention(a, s_prev):
    s_prev = repeater(s_prev)
    concat = concatenate([a, s_prev])
    e = denserTanh(concat)
    energies = denserRelu(e)
    alphas = activation(energies)
    context = dot([alphas, a])
    
    return context

In [10]:
n_a = 32 # number of units for the pre-attention
n_s = 64 # number of units for the post-attention

post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(len(machine_vocab), activation=softmax)

I0000 00:00:1732585622.839319  160390 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4147 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 6GB Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


In [11]:
def make_model(Tx, Ty, n_a, n_s, human_vocab_size, machine_vocab_size):
    X = Input(shape=(Tx, human_vocab_size))
    s0 = Input(shape=(n_s,), name='s0')
    c0 = Input(shape=(n_s,), name='c0')
    s = s0
    c = c0
    
    outputs = []
    
    a = Bidirectional(LSTM(n_a, return_sequences=True))(X)
    
    for t in range(Ty):
        context = one_step_attention(a, s)
        s, _, c = post_activation_LSTM_cell(context, initial_state=[s, c])
        out = output_layer(s)
        
        outputs.append(out)
    
    model = Model(inputs=[X, s0, c0], outputs=outputs)  
    return model

## Compiling

In [12]:
opt = Adam(learning_rate=0.005, weight_decay=0.01)
model = make_model(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab))
model.compile(loss = 'categorical_crossentropy', optimizer = opt, metrics = Ty*['accuracy'])

## Defining the inputs

Precisamos inicializar s0 e c0 com zeros, para

In [13]:
s0 = np.zeros((train_len, n_s))
c0 = np.zeros((train_len, n_s))
outputs = list(Yoh_train.swapaxes(0,1))

## Fitting

In [14]:
def show_test_results(model, show_comparison=False, show_mismatches=True):
    ham = 0
    mismatch = 0
    mismatches = []
    s00 = np.zeros((1, n_s))
    c00 = np.zeros((1, n_s))
    for x_test, y_test in zip(Xoh_test, Yoh_test):
        y_test = np.expand_dims(y_test, axis=0)
        y_test = np.argmax(y_test, axis = -1)
        y_test = [inv_machine_vocab[int(i)] for i in y_test[0]]
        #print("y_test: ", ''.join(y_test))
    
        x_test = np.expand_dims(x_test, axis=0)
        prediction = model.predict([x_test, s00, c00])
        prediction = np.argmax(prediction, axis = -1)
        output = [inv_machine_vocab[int(i)] for i in prediction]
        
        input_text = np.argmax(x_test, axis = -1)
        input_text = [inv_human_vocab[int(i)] for i in input_text[0]] 
        
        input_text  = ''.join('' if c == '<pad>' else c for c in input_text)
        output_text = ''.join(output)
        
        ham = hamming([c for c in y_test], [c for c in output_text])
        if ham > 0:
            mismatch += 1
            mismatches.append((input_text, output_text, ''.join(y_test)))
        if(show_comparison):
            print("hamming: ", hamming([c for c in y_test], [c for c in output_text]))
            print("source:", input_text)
            print("output:", output_text, "\n")
    print("Mean hamming: ", ham/(len(Xoh_test)))
    print("Mismatches: ", mismatch)
    if show_mismatches:
        print(mismatches)

In [15]:
model.fit([Xoh_train, s0, c0], outputs, epochs=1, batch_size=100)

I0000 00:00:1732584231.334853   85962 cuda_dnn.cc:529] Loaded cuDNN version 90501


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - dense_2_accuracy: 0.5611 - dense_2_accuracy_1: 0.5711 - dense_2_accuracy_2: 0.2887 - dense_2_accuracy_3: 0.1109 - dense_2_accuracy_4: 0.6489 - dense_2_accuracy_5: 0.2409 - dense_2_accuracy_6: 0.0727 - dense_2_accuracy_7: 0.7149 - dense_2_accuracy_8: 0.1498 - dense_2_accuracy_9: 0.0666 - dense_2_loss: 2.7100 - loss: 18.7696


<keras.src.callbacks.history.History at 0x72aa37aac350>

In [18]:
show_test_results(model)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 579ms/step
hamming:  0.3
source: 25 july 1982
output: 1988-02-22 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
hamming:  0.2
source: december 14 2000
output: 2000-10-10 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
hamming:  0.3
source: thursday july 23 2009
output: 2007-07-07 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
hamming:  0.2
source: monday may 6 2002
output: 2000-06-06 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
hamming:  0.4
source: friday august 9 2013
output: 2088-03-03 



  output = [inv_machine_vocab[int(i)] for i in prediction]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
hamming:  0.3
source: sunday january 27 2019
output: 2017-07-22 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
hamming:  0.3
source: tuesday july 18 1989
output: 1988-08-08 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
hamming:  0.4
source: sunday june 15 2008
output: 2018-08-06 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
hamming:  0.3
source: thursday march 25 1976
output: 1976-06-06 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
hamming:  0.3
source: 05.02.03
output: 2000-03-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
hamming:  0.3
source: 31 jul 1978
output: 1977-07-17 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
hamming:  0.2
source: jan 7 1996
output: 1996-06-06 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
hamm

In [15]:
model.fit([Xoh_train, s0, c0], outputs, epochs=50, batch_size=100)

Epoch 1/50


I0000 00:00:1732585514.515012  150900 cuda_dnn.cc:529] Loaded cuDNN version 90501


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - dense_2_accuracy: 0.2463 - dense_2_accuracy_1: 0.4952 - dense_2_accuracy_2: 0.1962 - dense_2_accuracy_3: 0.0655 - dense_2_accuracy_4: 0.7779 - dense_2_accuracy_5: 0.1881 - dense_2_accuracy_6: 0.0292 - dense_2_accuracy_7: 0.8079 - dense_2_accuracy_8: 0.1535 - dense_2_accuracy_9: 0.0770 - dense_2_loss: 2.6796 - loss: 19.5708
Epoch 2/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - dense_2_accuracy: 0.9698 - dense_2_accuracy_1: 0.9689 - dense_2_accuracy_2: 0.4712 - dense_2_accuracy_3: 0.1960 - dense_2_accuracy_4: 1.0000 - dense_2_accuracy_5: 0.8752 - dense_2_accuracy_6: 0.2907 - dense_2_accuracy_7: 1.0000 - dense_2_accuracy_8: 0.4627 - dense_2_accuracy_9: 0.1789 - dense_2_loss: 2.2018 - loss: 9.3198
Epoch 3/50
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - dense_2_accuracy: 0.9797 - dense_2_accuracy_1: 0.9795 - dense_2_accuracy_2: 0.6393 - dense_2_accuracy

<keras.src.callbacks.history.History at 0x75b584687c20>

In [16]:
show_test_results(model)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 569ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


  output = [inv_machine_vocab[int(i)] for i in prediction]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19

In [32]:
#model.load_weights('model.h5')

In [15]:
model.fit([Xoh_train, s0, c0], outputs, epochs=100, batch_size=100)

Epoch 1/100


I0000 00:00:1732585630.265362  160441 cuda_dnn.cc:529] Loaded cuDNN version 90501


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - dense_2_accuracy: 0.2407 - dense_2_accuracy_1: 0.5731 - dense_2_accuracy_2: 0.2286 - dense_2_accuracy_3: 0.0743 - dense_2_accuracy_4: 0.6729 - dense_2_accuracy_5: 0.3035 - dense_2_accuracy_6: 0.0523 - dense_2_accuracy_7: 0.6968 - dense_2_accuracy_8: 0.1584 - dense_2_accuracy_9: 0.0622 - dense_2_loss: 2.7148 - loss: 19.4917
Epoch 2/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - dense_2_accuracy: 0.9720 - dense_2_accuracy_1: 0.9701 - dense_2_accuracy_2: 0.4758 - dense_2_accuracy_3: 0.1906 - dense_2_accuracy_4: 0.9995 - dense_2_accuracy_5: 0.9001 - dense_2_accuracy_6: 0.3172 - dense_2_accuracy_7: 1.0000 - dense_2_accuracy_8: 0.4941 - dense_2_accuracy_9: 0.2022 - dense_2_loss: 2.1338 - loss: 8.9380
Epoch 3/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - dense_2_accuracy: 0.9738 - dense_2_accuracy_1: 0.9754 - dense_2_accuracy_2: 0.6064 - dense_2_accura

<keras.src.callbacks.history.History at 0x7d3bda537650>

In [16]:
show_test_results(model)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 576ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step


  output = [inv_machine_vocab[int(i)] for i in prediction]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18