In [3]:
from tensorflow.keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from tensorflow.keras.layers import RepeatVector, Dense, Activation, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model, Model
import tensorflow.keras.backend as K
import tensorflow as tf
import numpy as np
import pandas as pd

#from faker import Faker
import random
from tqdm import tqdm
from babel.dates import format_date
#from nmt_utils import *
import matplotlib.pyplot as plt
%matplotlib inline

from string_to_int import string_to_int

## Load data

In [4]:
machine_vocab = pd.read_pickle('machine_vocab.pkl')
human_vocab = pd.read_pickle('human_vocab.pkl')
inv_machine_vocab = pd.read_pickle('inv_machine_vocab.pkl')
dataset = pd.read_pickle('dataset.pkl')
m = len(dataset)

## String to int test

In [5]:
r = string_to_int(dataset[0][0], 20, human_vocab)
r

[12, 0, 24, 13, 34, 0, 4, 12, 12, 11, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36]

## string to categorical

In [6]:
def softmax(x, axis=1):
    """Softmax activation function.
    # Arguments
        x : Tensor.
        axis: Integer, axis along which the softmax normalization is applied.
    # Returns
        Tensor, output of softmax transformation.
    # Raises
        ValueError: In case `dim(x) == 1`.
    """
    ndim = K.ndim(x)
    if ndim == 2:
        return K.softmax(x)
    elif ndim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
        s = K.sum(e, axis=axis, keepdims=True)
        return e / s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D')

def preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty):   
    X, Y = zip(*dataset)
    
    X = np.array([string_to_int(i, Tx, human_vocab) for i in X])
    Y = [string_to_int(t, Ty, machine_vocab) for t in Y]
    
    Xoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), X)))
    Yoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(machine_vocab)), Y)))

    return X, np.array(Y), Xoh, Yoh

In [7]:
Tx = 30
Ty = 10
X, Y, Xoh, Yoh = preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty)

## Model



In [8]:
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor1 = Dense(10, activation = "tanh")
densor2 = Dense(1, activation = "relu")
activator = Activation(softmax, name='attention_weights') # We are using a custom softmax(axis = 1) loaded in this notebook
dotor = Dot(axes = 1)

In [9]:
def one_step_attention(a, s_prev):
    """
    Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
    "alphas" and the hidden states "a" of the Bi-LSTM.
    
    Arguments:
    a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a)
    s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)
    
    Returns:
    context -- context vector, input of the next (post-attention) LSTM cell
    """
    s_prev = repeator(s_prev)
    concat = concatenator([a, s_prev])
    e = densor1(concat)
    energies = densor2(e)
    alphas = activator(energies)
    context = dotor([alphas, a])
    
    return context

In [10]:
n_a = 32 # number of units for the pre-attention, bi-directional LSTM's hidden state 'a'
n_s = 64 # number of units for the post-attention LSTM's hidden state "s"

# Please note, this is the post attention LSTM cell.  
post_activation_LSTM_cell = LSTM(n_s, return_state = True) # Please do not modify this global variable.
output_layer = Dense(len(machine_vocab), activation=softmax)

I0000 00:00:1732487155.354392  280040 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1221 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3050 6GB Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


In [11]:
def modelf(Tx, Ty, n_a, n_s, human_vocab_size, machine_vocab_size):
    """
    Arguments:
    Tx -- length of the input sequence
    Ty -- length of the output sequence
    n_a -- hidden state size of the Bi-LSTM
    n_s -- hidden state size of the post-attention LSTM
    human_vocab_size -- size of the python dictionary "human_vocab"
    machine_vocab_size -- size of the python dictionary "machine_vocab"

    Returns:
    model -- Keras model instance
    """

    X = Input(shape=(Tx, human_vocab_size))
    s0 = Input(shape=(n_s,), name='s0')
    c0 = Input(shape=(n_s,), name='c0')
    s = s0
    c = c0
    
    outputs = []
    
    a = Bidirectional(LSTM(n_a, return_sequences=True))(X)
    
    for t in range(Ty):
        context = one_step_attention(a, s)
        s, _, c = post_activation_LSTM_cell(context, initial_state=[s, c])
        out = output_layer(s)
        
        outputs.append(out)
    
    model = Model(inputs=[X, s0, c0], outputs=outputs)  
    return model

## Compiling

In [12]:
opt = Adam(learning_rate=0.005, weight_decay=0.01) # Adam(...) 
model = modelf(Tx, Ty, n_a, n_s, len(human_vocab), len(machine_vocab))
model.compile(loss = 'categorical_crossentropy', optimizer = opt, metrics = Ty*['accuracy'])

assert opt.learning_rate == 0.005, "Set the lr parameter to 0.005"
assert opt.beta_1 == 0.9, "Set the beta_1 parameter to 0.9"
assert opt.beta_2 == 0.999, "Set the beta_2 parameter to 0.999"
assert opt.weight_decay == 0.01, "Set the decay parameter to 0.01"
assert model.loss == "categorical_crossentropy", "Wrong loss. Use 'categorical_crossentropy'"
assert model.optimizer == opt, "Use the optimizer that you have instantiated"

## Defining the inputs

Precisamos inicializar s0 e c0 com zeros, para

In [13]:
s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))
outputs = list(Yoh.swapaxes(0,1))

## Fitting

In [14]:
model.fit([Xoh, s0, c0], outputs, epochs=1, batch_size=100)

I0000 00:00:1732487175.834194  280409 cuda_dnn.cc:529] Loaded cuDNN version 90501


[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 25ms/step - dense_2_accuracy: 0.2427 - dense_2_accuracy_1: 0.4164 - dense_2_accuracy_2: 0.1695 - dense_2_accuracy_3: 0.0604 - dense_2_accuracy_4: 0.9171 - dense_2_accuracy_5: 0.0960 - dense_2_accuracy_6: 0.0197 - dense_2_accuracy_7: 0.8650 - dense_2_accuracy_8: 0.1106 - dense_2_accuracy_9: 0.0574 - dense_2_loss: 2.7190 - loss: 19.7973


<keras.src.callbacks.history.History at 0x756a7532c860>

In [15]:
EXAMPLES = ['3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001', '1 March 2001']
s00 = np.zeros((1, n_s))
c00 = np.zeros((1, n_s))
for example in EXAMPLES:
    source = string_to_int(example, Tx, human_vocab)
    #print(source)
    source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0,1)
    source = np.swapaxes(source, 0, 1)
    source = np.expand_dims(source, axis=0)
    prediction = model.predict([source, s00, c00])
    prediction = np.argmax(prediction, axis = -1)
    output = [inv_machine_vocab[int(i)] for i in prediction]
    print("source:", example)
    print("output:", ''.join(output),"\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
source: 3 May 1979
output: 1997-08-07 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
source: 5 April 09
output: 1980-08-07 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
source: 21th of August 2016
output: 2000-00-20 



  output = [inv_machine_vocab[int(i)] for i in prediction]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
source: Tue 10 Jul 2007
output: 2000-00-22 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
source: Saturday May 9 2018
output: 1980-08-10 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
source: March 3 2001
output: 2000-00-22 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
source: March 3rd 2001
output: 2000-02-22 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
source: 1 March 2001
output: 2000-02-22 



In [16]:
model.fit([Xoh, s0, c0], outputs, epochs=50, batch_size=100)

Epoch 1/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 32ms/step - dense_2_accuracy: 0.9546 - dense_2_accuracy_1: 0.9536 - dense_2_accuracy_2: 0.4855 - dense_2_accuracy_3: 0.1862 - dense_2_accuracy_4: 1.0000 - dense_2_accuracy_5: 0.9053 - dense_2_accuracy_6: 0.2327 - dense_2_accuracy_7: 1.0000 - dense_2_accuracy_8: 0.4092 - dense_2_accuracy_9: 0.1494 - dense_2_loss: 2.2662 - loss: 9.5294
Epoch 2/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - dense_2_accuracy: 0.9728 - dense_2_accuracy_1: 0.9728 - dense_2_accuracy_2: 0.7004 - dense_2_accuracy_3: 0.4300 - dense_2_accuracy_4: 1.0000 - dense_2_accuracy_5: 0.9565 - dense_2_accuracy_6: 0.4592 - dense_2_accuracy_7: 0.9997 - dense_2_accuracy_8: 0.5613 - dense_2_accuracy_9: 0.2480 - dense_2_loss: 2.0405 - loss: 7.3230
Epoch 3/50
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - dense_2_accuracy: 0.9833 - dense_2_accuracy_1: 0.9829 - dense_2_accuracy_2: 0.8248 - 

<keras.src.callbacks.history.History at 0x756a53357d70>

In [17]:
EXAMPLES = ['3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001', '1 March 2001']
s00 = np.zeros((1, n_s))
c00 = np.zeros((1, n_s))
for example in EXAMPLES:
    source = string_to_int(example, Tx, human_vocab)
    #print(source)
    source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0,1)
    source = np.swapaxes(source, 0, 1)
    source = np.expand_dims(source, axis=0)
    prediction = model.predict([source, s00, c00])
    prediction = np.argmax(prediction, axis = -1)
    output = [inv_machine_vocab[int(i)] for i in prediction]
    print("source:", example)
    print("output:", ''.join(output),"\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
source: 3 May 1979
output: 1979-05-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
source: 5 April 09
output: 2009-04-05 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
source: 21th of August 2016
output: 2016-08-21 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
source: Tue 10 Jul 2007
output: 2007-07-10 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
source: Saturday May 9 2018
output: 2018-05-09 



  output = [inv_machine_vocab[int(i)] for i in prediction]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
source: March 3 2001
output: 2001-03-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
source: March 3rd 2001
output: 2001-03-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
source: 1 March 2001
output: 2001-03-01 



In [18]:
model.load_weights('model.h5')

## Tests

In [5]:
f = h5py.File('model.h5', 'r')

In [7]:
f.keys()

<KeysViewHDF5 ['model_weights', 'optimizer_weights']>

In [9]:
f['model_weights'].keys()

<KeysViewHDF5 ['attention_weights', 'bidirectional_10', 'c0', 'concatenate_12', 'dense_36', 'dense_37', 'dense_38', 'dot_12', 'input_10', 'lstm_20', 'repeat_vector_12', 's0']>

In [13]:
f['model_weights']['dense_38'].keys()

<KeysViewHDF5 ['dense_38']>

In [14]:
f['model_weights']['dense_38']['dense_38']

<HDF5 group "/model_weights/dense_38/dense_38" (2 members)>

In [15]:
f['model_weights']['dense_38']['dense_38'].keys()

<KeysViewHDF5 ['bias:0', 'kernel:0']>

In [16]:
f['model_weights']['dense_38']['dense_38']['bias:0']

<HDF5 dataset "bias:0": shape (11,), type "<f4">

In [19]:
f['model_weights']['dense_38']['dense_38']['bias:0'][1]

np.float32(0.060214486)

In [24]:
from tensorflow.keras.saving import load_model

In [25]:
model = load_model('model.h5')

TypeError: Could not locate class 'LSTM'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'class_name': 'LSTM', 'config': {'name': 'lstm_21', 'trainable': True, 'return_sequences': True, 'return_state': False, 'go_backwards': False, 'stateful': False, 'unroll': False, 'units': 32, 'activation': 'tanh', 'recurrent_activation': 'hard_sigmoid', 'use_bias': True, 'kernel_initializer': {'class_name': 'VarianceScaling', 'config': {'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'uniform', 'seed': None}}, 'recurrent_initializer': {'class_name': 'Orthogonal', 'config': {'gain': 1.0, 'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'unit_forget_bias': True, 'kernel_regularizer': None, 'recurrent_regularizer': None, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'recurrent_constraint': None, 'bias_constraint': None, 'dropout': 0.0, 'recurrent_dropout': 0.0, 'implementation': 1}}