In [1]:
from tensorflow.keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from tensorflow.keras.layers import RepeatVector, Dense, Activation, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model, Model
import tensorflow.keras.backend as K
import tensorflow as tf

import numpy as np
import pandas as pd

import random
from tqdm import tqdm
from babel.dates import format_date
import matplotlib.pyplot as plt
%matplotlib inline

from string_to_int import string_to_int

2024-11-25 18:21:38.681220: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-25 18:21:38.769705: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732569698.801869  360824 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732569698.809771  360824 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-25 18:21:38.894348: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

## Load data

In [2]:
machine_vocab = pd.read_pickle('machine_vocab.pkl')
human_vocab = pd.read_pickle('human_vocab.pkl')
inv_machine_vocab = pd.read_pickle('inv_machine_vocab.pkl')
dataset = pd.read_pickle('dataset.pkl')
m = len(dataset)

In [26]:
type(dataset[0])

tuple

## String to int test

In [3]:
r = string_to_int(dataset[0][0], 20, human_vocab)
r

[12, 0, 24, 13, 34, 0, 4, 12, 12, 11, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36]

## string to categorical

In [4]:
def softmax(x, axis=1):
    """Softmax activation function.
    # Arguments
        x : Tensor.
        axis: Integer, axis along which the softmax normalization is applied.
    # Returns
        Tensor, output of softmax transformation.
    # Raises
        ValueError: In case `dim(x) == 1`.
    """
    ndim = K.ndim(x)
    if ndim == 2:
        return K.softmax(x)
    elif ndim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
        s = K.sum(e, axis=axis, keepdims=True)
        return e / s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D')

def preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty):   
    X, Y = zip(*dataset)
    
    X = np.array([string_to_int(i, Tx, human_vocab) for i in X])
    Y = [string_to_int(t, Ty, machine_vocab) for t in Y]
    
    Xoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), X)))
    Yoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(machine_vocab)), Y)))

    return X, np.array(Y), Xoh, Yoh

In [5]:
Tx = 30
Ty = 10
X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty)

In [38]:
categorified_data = tf.data.Dataset.zip(tf.data.Dataset(Xoh), tf.data.Dataset(Yoh))

TypeError: Can't instantiate abstract class DatasetV2 without an implementation for abstract methods '_inputs', 'element_spec'

In [34]:
categorified_data

<zip at 0x7a15b8983900>

In [32]:
train_set, test_set = tf.keras.utils.split_dataset(zip(Xoh, Yoh), left_size=.9)

TypeError: The `dataset` argument must be eithera `tf.data.Dataset`, a `torch.utils.data.Dataset`object, or a list/tuple of arrays. Received: dataset=<zip object at 0x7a15b9d20cc0> of type <class 'zip'>

## Model



In [6]:
K.softmax

<function keras.src.legacy.backend.softmax(x, axis=-1)>

In [7]:
repeater = RepeatVector(Tx)
concatenate = Concatenate(axis=-1)
denserTanh = Dense(10, activation = "tanh")
denserRelu = Dense(1, activation = "relu")
activation = Activation(K.softmax, name='attention_weights')
dot = Dot(axes = 1)

In [8]:
def one_step_attention(a, s_prev):
    s_prev = repeater(s_prev)
    concat = concatenate([a, s_prev])
    e = denserTanh(concat)
    energies = denserRelu(e)
    alphas = activation(energies)
    context = dot([alphas, a])
    
    return context

In [9]:
n_a = 32 # number of units for the pre-attention
n_s = 64 # number of units for the post-attention

post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(len(machine_vocab), activation=softmax)

I0000 00:00:1732569701.335858  360824 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4147 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 6GB Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


In [10]:
def make_model(Tx, Ty, n_a, n_s, human_vocab_size, machine_vocab_size):
    """
    Arguments:
    Tx -- length of the input sequence
    Ty -- length of the output sequence
    n_a -- hidden state size of the pre-attentino Bi-LSTM
    n_s -- hidden state size of the post-attention LSTM
    human_vocab_size -- size of the python dictionary "human_vocab"
    machine_vocab_size -- size of the python dictionary "machine_vocab"

    Returns:
    model -- Keras model
    """

    X = Input(shape=(Tx, human_vocab_size))
    s0 = Input(shape=(n_s,), name='s0')
    c0 = Input(shape=(n_s,), name='c0')
    s = s0
    c = c0
    
    outputs = []
    
    a = Bidirectional(LSTM(n_a, return_sequences=True))(X)
    
    for t in range(Ty):
        context = one_step_attention(a, s)
        s, _, c = post_activation_LSTM_cell(context, initial_state=[s, c])
        out = output_layer(s)
        
        outputs.append(out)
    
    model = Model(inputs=[X, s0, c0], outputs=outputs)  
    return model

## Compiling

In [11]:
opt = Adam(learning_rate=0.005, weight_decay=0.01)
model = make_model(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab))
model.compile(loss = 'categorical_crossentropy', optimizer = opt, metrics = Ty*['accuracy'])

## Defining the inputs

Precisamos inicializar s0 e c0 com zeros, para

In [12]:
s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))
outputs = list(Yoh.swapaxes(0,1))

## Fitting

In [13]:
model.fit([Xoh, s0, c0], outputs, epochs=1, batch_size=100)

I0000 00:00:1732569747.559208  360879 cuda_dnn.cc:529] Loaded cuDNN version 90501


[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - dense_2_accuracy: 0.6457 - dense_2_accuracy_1: 0.5903 - dense_2_accuracy_2: 0.3062 - dense_2_accuracy_3: 0.1160 - dense_2_accuracy_4: 0.6707 - dense_2_accuracy_5: 0.1904 - dense_2_accuracy_6: 0.0707 - dense_2_accuracy_7: 0.7154 - dense_2_accuracy_8: 0.1352 - dense_2_accuracy_9: 0.0666 - dense_2_loss: 2.7025 - loss: 18.6573


<keras.src.callbacks.history.History at 0x7a162c625e50>

In [14]:
EXAMPLES = ['3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001', '1 March 2001']
s00 = np.zeros((1, n_s))
c00 = np.zeros((1, n_s))
for example in EXAMPLES:
    source = string_to_int(example, Tx, human_vocab)
    source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source)))
    source = np.expand_dims(source, axis=0)
    prediction = model.predict([source, s00, c00])
    prediction = np.argmax(prediction, axis = -1)
    output = [inv_machine_vocab[int(i)] for i in prediction]
    print("source:", example)
    print("output:", ''.join(output),"\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 545ms/step
source: 3 May 1979
output: 1997-00-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
source: 5 April 09
output: 2005-05-05 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
source: 21th of August 2016
output: 2016-06-26 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
source: Tue 10 Jul 2007
output: 2007-07-07 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
source: Saturday May 9 2018
output: 1985-05-05 



  output = [inv_machine_vocab[int(i)] for i in prediction]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
source: March 3 2001
output: 2003-03-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
source: March 3rd 2001
output: 2003-03-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
source: 1 March 2001
output: 2000-01-03 



In [30]:
model.fit([Xoh, s0, c0], outputs, epochs=50, batch_size=100)

Epoch 1/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - dense_6_accuracy: 0.9678 - dense_6_accuracy_1: 0.9733 - dense_6_accuracy_2: 0.7353 - dense_6_accuracy_3: 0.4472 - dense_6_accuracy_4: 0.9994 - dense_6_accuracy_5: 0.9175 - dense_6_accuracy_6: 0.4575 - dense_6_accuracy_7: 0.9987 - dense_6_accuracy_8: 0.6527 - dense_6_accuracy_9: 0.4183 - dense_6_loss: 1.4640 - loss: 6.5667
Epoch 2/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - dense_6_accuracy: 0.9800 - dense_6_accuracy_1: 0.9828 - dense_6_accuracy_2: 0.8489 - dense_6_accuracy_3: 0.8107 - dense_6_accuracy_4: 1.0000 - dense_6_accuracy_5: 0.9688 - dense_6_accuracy_6: 0.7988 - dense_6_accuracy_7: 1.0000 - dense_6_accuracy_8: 0.8283 - dense_6_accuracy_9: 0.6647 - dense_6_loss: 0.9053 - loss: 3.3767
Epoch 3/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - dense_6_accuracy: 0.9884 - dense_6_accuracy_1: 0.9890 - dense_6_accuracy_2: 0.8909 - 

<keras.src.callbacks.history.History at 0x7386b11eabd0>

In [31]:
EXAMPLES = ['3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001', '1 March 2001']
s00 = np.zeros((1, n_s))
c00 = np.zeros((1, n_s))
for example in EXAMPLES:
    source = string_to_int(example, Tx, human_vocab)
    #print(source)
    source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0,1)
    source = np.swapaxes(source, 0, 1)
    source = np.expand_dims(source, axis=0)
    prediction = model.predict([source, s00, c00])
    prediction = np.argmax(prediction, axis = -1)
    output = [inv_machine_vocab[int(i)] for i in prediction]
    print("source:", example)
    print("output:", ''.join(output),"\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
source: 3 May 1979
output: 1979-05-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
source: 5 April 09
output: 2009-04-05 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
source: 21th of August 2016
output: 2016-08-20 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
source: Tue 10 Jul 2007
output: 2007-06-10 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
source: Saturday May 9 2018
output: 2018-05-09 



  output = [inv_machine_vocab[int(i)] for i in prediction]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
source: March 3 2001
output: 2001-03-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
source: March 3rd 2001
output: 2001-03-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
source: 1 March 2001
output: 2001-03-01 



In [32]:
#model.load_weights('model.h5')

In [15]:
model.fit([Xoh, s0, c0], outputs, epochs=100, batch_size=100)

Epoch 1/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - dense_2_accuracy: 0.9674 - dense_2_accuracy_1: 0.9664 - dense_2_accuracy_2: 0.7754 - dense_2_accuracy_3: 0.4914 - dense_2_accuracy_4: 0.9996 - dense_2_accuracy_5: 0.8961 - dense_2_accuracy_6: 0.4917 - dense_2_accuracy_7: 0.9992 - dense_2_accuracy_8: 0.6140 - dense_2_accuracy_9: 0.4015 - dense_2_loss: 1.5419 - loss: 6.4895
Epoch 2/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - dense_2_accuracy: 0.9785 - dense_2_accuracy_1: 0.9788 - dense_2_accuracy_2: 0.8321 - dense_2_accuracy_3: 0.8277 - dense_2_accuracy_4: 0.9999 - dense_2_accuracy_5: 0.9643 - dense_2_accuracy_6: 0.7764 - dense_2_accuracy_7: 0.9997 - dense_2_accuracy_8: 0.8185 - dense_2_accuracy_9: 0.6702 - dense_2_loss: 0.9032 - loss: 3.3288
Epoch 3/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - dense_2_accuracy: 0.9820 - dense_2_accuracy_1: 0.9821 - dense_2_accuracy_2: 0.8693

<keras.src.callbacks.history.History at 0x7a1611fa4140>

In [16]:
EXAMPLES = ['3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001', '1 March 2001']
s00 = np.zeros((1, n_s))
c00 = np.zeros((1, n_s))
for example in EXAMPLES:
    source = string_to_int(example, Tx, human_vocab)
    #print(source)
    source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0,1)
    source = np.swapaxes(source, 0, 1)
    source = np.expand_dims(source, axis=0)
    prediction = model.predict([source, s00, c00])
    prediction = np.argmax(prediction, axis = -1)
    output = [inv_machine_vocab[int(i)] for i in prediction]
    print("source:", example)
    print("output:", ''.join(output),"\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
source: 3 May 1979
output: 1979-05-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
source: 5 April 09
output: 2019-04-05 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
source: 21th of August 2016
output: 2016-08-21 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
source: Tue 10 Jul 2007
output: 2007-07-10 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
source: Saturday May 9 2018
output: 2018-05-09 



  output = [inv_machine_vocab[int(i)] for i in prediction]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
source: March 3 2001
output: 2001-03-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
source: March 3rd 2001
output: 2001-03-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
source: 1 March 2001
output: 2001-03-01 

