In [1]:
import pandas as pd
import numpy as np
import random
from tqdm import tqdm
import re
import os
import json
import time

from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import pad_sequences
# from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras import layers

In [2]:

def clean_text(string: str, 
               punctuations = r'''!()-[]{};:'"\,<>./?@#$%^&*_~''',
               stop_words = stopwords.words('english'),
               # porter = PorterStemmer()
               wnl = WordNetLemmatizer()
              ):
    """
    A method to clean text. It removes punctuations, stop words, applies lemmatization.
    """
    # Removing the punctuations
    for x in string.lower(): 
        if x in punctuations: 
            string = string.replace(x, "") 

    # Converting the text to lower
    string = string.lower()

    # Removing stop words
    string = ' '.join([word for word in string.split() if word not in stop_words])

    # stemming/lemmatizing words. That means changing word to its basic format, for example
    # words 'fishing', 'fished', 'fischer' will be changed into a word 'fisch'
    # lemmatization should be better because stemming changes words too much, for example
    # business is changed into busi
    # string = ' '.join([porter.stem(word) for word in string.split()])
    string = ' '.join([wnl.lemmatize(word, pos = "v") for word in string.split()])

    # Cleaning the whitespaces
    string = re.sub(r'\s+', ' ', string).strip()

    return string

def create_training_data(tokenizer,
                         sentences_file,
                         # embed_matrix_file,
                         model_folder,
                         max_sen_len = None
                        ):
    """
    Creating a training and testing datasets self.x_train, self.x_test, self.y_train, self.y_test. This function
    also creates and saves a tokenizer.
    """
    sentences_tables = pd.read_excel(sentences_file).values
    random.shuffle(sentences_tables)
    clean_sentences = np.array([clean_text(sentence) for sentence in sentences_tables[:, 0]])

    tokenizer.fit_on_texts(clean_sentences)

    sequences = tokenizer.texts_to_sequences(clean_sentences)
    if max_sen_len == None:
        max_sen_len = np.max([len(seq) for seq in sequences])
    x = pad_sequences(sequences, maxlen = max_sen_len)

    # embed_matrix = pd.read_csv(embed_matrix_file).values

    x_train, x_test = train_test_split(x, test_size = 0.2)

    with open(os.path.join(model_folder, 'tokenizer.json'), 'w') as file:
        json.dump(tokenizer.to_json(), file)
        
    return x_train, x_test


def get_coefs(word, *arr): 
    return word, list(np.asarray(arr, dtype='float'))


def create_embedding_file(tokenizer,
                          embed_file_src = r'model\glove.840B.300d.txt', 
                          embed_file_trg = r'model\model_embeddings.txt'
                         ):
    """
    This function will create an embedding file called embed_file_trg which will contain only those words 
    from embed_file_src which are present in the training dataset (tokenizer.word_index).
    """

    embeddings = dict(get_coefs(*o.split(" ")) for o in open(embed_file_src, errors = 'ignore'))
    with open(embed_file_trg, 'w') as file:
        for word, index in tokenizer.word_index.items():
            word_vector = embeddings[word]
            line = ' '.join(np.concatenate([[word], word_vector]))
            file.write(line + '\n')


def create_embedding_matrix(tokenizer,
                            model_folder,
                            word_vec_dim,
                            embed_file_path,
                           ):
    """
    A function to create an embedding matrix. This is a matrix where each row is a vector representing a word.
    To create that matrix we use a word embedding file which path is equal to embedding_file_path.
    embedding_matrix[row_number] is a vector representation for a word = list(tokenizer.word_index.keys())[row_number - 1]
    First row of embedding_matrix are zeros. This matrix is needed to train a model.
    """
    embeddings = dict(get_coefs(*o.split(" ")) for o in open(embed_file_path, errors = 'ignore'))

    # embedding_matrix[row_number] is a vector representation of a word = self.tokenizer.word_index.keys()[row_number - 1]
    # first row in embedding_matrix is 0
    embedding_matrix = np.zeros((len(tokenizer.word_counts) + 1, word_vec_dim))
    for word, index in tokenizer.word_index.items():
        if index > len(tokenizer.word_counts):
            break
        else:
            try:
                embedding_matrix[index] = embeddings[word]
            except:
                continue

    pd.DataFrame(embedding_matrix).to_csv(os.path.join(model_folder, 'embedding_matrix.csv'))
    return embedding_matrix

In [3]:
tokenizer = Tokenizer()
# max_sen_len = 20
sentences_file = r'data\sentences_tables.xlsx'
embed_matrix_file = r'model\embedding_matrix.csv'
model_folder = 'model'
word_vec_dim = 300
embed_file_path = r'model\model_embeddings.txt'

In [4]:
x_train, x_test = create_training_data(
    tokenizer = tokenizer, 
    # max_sen_len = max_sen_len,
    sentences_file = sentences_file,
    # embed_matrix_file = embed_matrix_file,
    model_folder = model_folder
)

In [None]:
x_train

In [5]:
embed_matrix = create_embedding_matrix(
    tokenizer = tokenizer,
    model_folder = model_folder,
    word_vec_dim = word_vec_dim,
    embed_file_path = embed_file_path
)

In [7]:
embed_matrix

array([[ 0.      ,  0.      ,  0.      , ...,  0.      ,  0.      ,
         0.      ],
       [-0.50318 ,  0.27905 , -0.045497, ...,  0.4781  ,  0.13005 ,
        -0.014399],
       [-0.89423 ,  0.39636 ,  0.64359 , ..., -0.15076 ,  0.06987 ,
         0.041258],
       ...,
       [ 0.37492 , -0.052425, -0.60094 , ..., -0.36104 , -0.065253,
        -0.1206  ],
       [ 0.012832,  0.22669 , -0.17511 , ...,  0.17134 ,  0.040047,
        -0.37131 ],
       [-0.39054 , -0.55117 , -0.073466, ...,  0.34569 ,  0.30918 ,
        -0.32873 ]])

In [6]:
for i, row in enumerate(embed_matrix):
    if (row == np.zeros(300)).all():
        print(i)

0


In [8]:
for i, item in enumerate(tokenizer.word_index.items()):
    print(item)
    if i == 5:
        break

('employee', 1)
('cost', 2)
('user', 3)
('office', 4)
('business', 5)
('unit', 6)


In [6]:
class Encoder(Model):
    def __init__(self,
                 embedding_dim,
                 lstm_out_size,
                 batch_size,
                 embed_matrix
                ):
        super().__init__()
        self.lstm_out_size = lstm_out_size
        self.batch_size = batch_size
        self.embedding = layers.Embedding(
            input_dim = embed_matrix.shape[0],
            output_dim = embedding_dim,
            embeddings_initializer = tf.keras.initializers.Constant(embed_matrix),
            trainable = False
        )
        self.lstm = layers.LSTM(
            units = self.lstm_out_size,
            return_sequences = True,
            return_state = True
        )
        
    @tf.function
    def call(self, x, state_h = None, state_c = None):
        # x.shape = (batch_size, max_sen_len)
        # x is a series of numbers which represent words
        # state_h.shape = (batch_size, lstm_out_size)
        
        if state_h == None or state_c == None:
            state_h, state_c = self.initialize_hidden_state()
        
        # make sure that the types are correct
        x = tf.cast(x, tf.float32)
        state_h = tf.cast(state_h, tf.float32)
        state_c = tf.cast(state_c, tf.float32)
        
        x = self.embedding(x)
        # x.shape after embedding = (batch_size, max_sen_len, embedding_dim)
        # output.shape = (batch_size, max_sen_len, lstm_out_size)
        # state_h.shape = (batch_size, lstm_out_size)
        output, state_h, state_c = self.lstm(x, initial_state = [state_h, state_c])
        return output, state_h, state_c
    
    def initialize_hidden_state(self):
        state_h = tf.zeros((self.batch_size, self.lstm_out_size))
        state_c = tf.zeros((self.batch_size, self.lstm_out_size))
        return state_h, state_c

In [73]:
encoder = Encoder(embedding_dim = 300,
                 lstm_out_size = 10,
                 batch_size = 2,
                 embed_matrix = embed_matrix
                 )

x = np.array([[1, 2], [1, 2]])

with tf.GradientTape() as tape:
    output, state_h, state_c = encoder(x)
    
variables = encoder.trainable_variables
gradients = tape.gradient(output, variables)

In [74]:
gradients

[<tf.Tensor: shape=(300, 40), dtype=float32, numpy=
 array([[ 3.91362980e-02,  1.78501666e-01, -6.65536746e-02, ...,
          2.27576867e-03,  3.06596979e-03, -5.52950427e-03],
        [-2.44583692e-02, -1.00564852e-01,  4.86165360e-02, ...,
          4.39209789e-02,  3.47434767e-02,  3.46905664e-02],
        [ 7.62790767e-03,  1.84747055e-02, -2.33989414e-02, ...,
         -6.68730587e-02, -5.38272634e-02, -4.74490821e-02],
        ...,
        [-4.10167016e-02, -1.71791986e-01,  7.95203224e-02, ...,
          6.06815591e-02,  4.77755107e-02,  4.92389351e-02],
        [-6.89753098e-03, -4.42978255e-02,  3.52535956e-03, ...,
         -5.33669665e-02, -4.33627591e-02, -3.55112329e-02],
        [-7.87226111e-03, -2.61273235e-05,  3.63165438e-02, ...,
          1.47570401e-01,  1.19062632e-01,  1.03082106e-01]], dtype=float32)>,
 <tf.Tensor: shape=(10, 40), dtype=float32, numpy=
 array([[ 6.16146077e-04,  3.51791561e-04, -2.61891261e-03,
         -8.49191158e-04, -2.83790007e-03, -7.8593

In [75]:
output

<tf.Tensor: shape=(2, 2, 10), dtype=float32, numpy=
array([[[-0.04457429, -0.20452513,  0.07552136,  0.0508462 ,
          0.03812613,  0.01973103, -0.12498064,  0.08286092,
          0.05210062,  0.06269614],
        [-0.05648873, -0.13043042,  0.1044731 ,  0.06710292,
          0.10755109,  0.02488116, -0.18320872,  0.2810727 ,
          0.16890085,  0.2183119 ]],

       [[-0.04457429, -0.20452513,  0.07552136,  0.0508462 ,
          0.03812613,  0.01973103, -0.12498064,  0.08286092,
          0.05210062,  0.06269614],
        [-0.05648873, -0.13043042,  0.1044731 ,  0.06710292,
          0.10755109,  0.02488116, -0.18320872,  0.2810727 ,
          0.16890085,  0.2183119 ]]], dtype=float32)>

In [7]:
# version 1 like in Jonathan Hui pdf
class Bahdau_attention(layers.Layer):
    def __init__(self, units = 10):
        super().__init__()
        self.W1 = layers.Dense(units)
        self.W2 = layers.Dense(units)
        self.V = layers.Dense(1)
        
    @tf.function
    def call(self, decoder_hidden, encoder_hidden):
        # decoder_hidden.shape = (batch_size, hidden_size)
        # decoder_hidden_time_axis.shape = (batch_size, 1, hidden_size)
        decoder_hidden_time_axis = tf.expand_dims(decoder_hidden, 1)
        
        # encoder_hidden.shape = (batch_size, max_sen_len, hidden_size)
        # argument for tanh shape = (batch_size, max_sen_len, hidden_size)
        # score.shape = (batch_size, max_sen_len, 1)
        score = self.V(tf.nn.tanh(self.W1(decoder_hidden_time_axis) + self.W2(encoder_hidden)))
        
        # attention_weights.shape = (batch_size, max_sen_len, 1)
        attention_weights = tf.nn.softmax(score, axis = 1)
        
        # context_vector.shape = (batch_size, hidden_size)
        context_vector = attention_weights * encoder_hidden
        context_vector = tf.reduce_sum(context_vector, axis = 1)
        
        return context_vector, attention_weights

In [8]:
# version 2, like in attention explanation pdf
class Bahdau_attention(layers.Layer):
    def __init__(self):
        super().__init__()
        self.dense = layers.Dense(1)
       
    @tf.function
    def call(self, decoder_state_h, encoder_states_h):
        # decoder_state_h.shape = (batch_size, dec_state_size)
        # encoder_states_h.shape = (batch_size, max_sen_len, enc_state_size)
        
        # make sure that the dtypes are correct
        decoder_state_h = tf.cast(decoder_state_h, tf.float32)
        encoder_states_h = tf.cast(encoder_states_h, tf.float32)
        
        # encoder_states_h_flattened.shape = (batch_size * max_sen_len, enc_state_size)
        # encoder_states_h_flattened = tf.reshape(encoder_states_h, [-1, tf.shape(encoder_states_h)[2]])
        encoder_states_h_flattened = tf.reshape(
            encoder_states_h, [
                tf.shape(encoder_states_h)[0] * tf.shape(encoder_states_h)[1], 
                tf.shape(encoder_states_h)[2]
            ]
        )
        batch_size = tf.shape(encoder_states_h)[0]
        max_sen_len = tf.shape(encoder_states_h)[1]
        enc_state_size = tf.shape(encoder_states_h)[2]
        
        context_vector = tf.TensorArray(dtype = tf.float32, size = 0, dynamic_size = True, clear_after_read = False)
        for b in tf.range(batch_size):
            alpha = e = tf.TensorArray(dtype = tf.float32, size = 0, dynamic_size = True, clear_after_read = False)
            for j in tf.range(max_sen_len):
                # x.shape = (dec_state_size + enc_state_size)
                x = tf.concat([decoder_state_h[b], encoder_states_h_flattened[b * max_sen_len + j]], 0)
                # x.shape = (1, dec_state_size + enc_state_size)
                x = tf.expand_dims(x, 0)
                e =e.write(j, tf.math.exp(self.dense(x))[0])

            e_sum = tf.math.reduce_sum(e.stack())
            for j in tf.range(max_sen_len):
                alpha = alpha.write(j, tf.math.divide(e.read(j), e_sum))

            Sum = tf.TensorArray(dtype = tf.float32, size = 0, dynamic_size = True, clear_after_read = False)
            for j in tf.range(max_sen_len):
                Sum = Sum.write(j, alpha.read(j) * encoder_states_h_flattened[b * max_sen_len + j])

            # context_vector_b is a context vector for 1 sample from batch
            context_vector_b = tf.math.reduce_sum(Sum.stack(), axis = 0)
            # context_vector.shape = (batch_size, enc_state_size)
            context_vector = context_vector.write(b, context_vector_b)
        
        return context_vector.stack()

In [128]:
# tf.config.run_functions_eagerly(True)
tf.config.run_functions_eagerly(False)

In [130]:
attention = Bahdau_attention()
decoder_state_h = tf.constant([[1,2], [4,5]])
encoder_states_h = tf.constant([[[1,2,3], [4,5,3]], [[1,2,3], [4,5,3]]])
encoder_states_h_flattened = tf.reshape(encoder_states_h, [tf.shape(encoder_states_h)[0] * tf.shape(encoder_states_h)[1], tf.shape(encoder_states_h)[2]])

with tf.GradientTape() as tape:
    context_vector = attention(decoder_state_h, encoder_states_h)
    
variables = attention.trainable_variables
gradients = tape.gradient(context_vector, variables)

In [131]:
context_vector

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[3.5908003, 4.5908003, 3.       ],
       [3.5908003, 4.5908003, 3.       ]], dtype=float32)>

In [132]:
gradients

[<tf.Tensor: shape=(5, 1), dtype=float32, numpy=
 array([[-1.1920929e-06],
        [-2.1457672e-06],
        [ 4.2406158e+00],
        [ 4.2406149e+00],
        [-3.0994415e-06]], dtype=float32)>,
 <tf.Tensor: shape=(1,), dtype=float32, numpy=array([-1.013279e-06], dtype=float32)>]

In [9]:
class Decoder(Model):
    def __init__(self, vocab_size, embedding_dim, lstm_out_size, embed_matrix):
        super().__init__()
        self.lstm_out_size = lstm_out_size
        self.embedding = layers.Embedding(
            input_dim = embed_matrix.shape[0],
            output_dim = embedding_dim,
            embeddings_initializer = tf.keras.initializers.Constant(embed_matrix),
            trainable = False
        )
        self.lstm = layers.LSTM(
            units = self.lstm_out_size,
            # return_sequences = True,
            return_state = True
        )
        self.dense = layers.Dense(vocab_size)
        self.attention = Bahdau_attention()
        
    @tf.function
    def call(self, x, decoder_state_h, decoder_state_c, encoder_states_h):
        # x.shape = (batch_size, 1)
        # x is a single number for each batch representing a single word
        # encoder_states_h.shape = (batch_size, max_sen_len, enc_state_size)
        # decoder_state_h.shape = (batch_size, lstm_out_size)
        
        # make sure that the types are correct
        x = tf.cast(x, tf.float32)
        decoder_state_h = tf.cast(decoder_state_h, tf.float32)
        decoder_state_c = tf.cast(decoder_state_c, tf.float32)
        encoder_states_h = tf.cast(encoder_states_h, tf.float32)
        
        # context_vector.shape = (batch_size, enc_state_size)
        context_vector = self.attention(decoder_state_h, encoder_states_h)
        # shape of output of embedding layer = (batch_size, 1, embedding_dim)
        x = self.embedding(x)
        # x.shape after concatenation = (batch_size, 1, enc_state_size + embedding_dim)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis = 2)
        
        output, state_h, state_c = self.lstm(x, initial_state = [decoder_state_h, decoder_state_c])
        
        # output.shape = (batch_size, vocab_size)
        output = self.dense(output)
        
        return output, state_h, state_c

In [138]:
decoder = Decoder(vocab_size = 100, 
                  embedding_dim = 300, 
                  lstm_out_size = 20, 
                  embed_matrix = embed_matrix
                 )

x = tf.constant([[1], [2]])
decoder_state_h = tf.constant([[i for i in range(20)], [i for i in range(20)]])
decoder_state_c = tf.constant([[i for i in range(20)], [i for i in range(20)]])
encoder_states_h = tf.constant([[[i for i in range(15)], [i for i in range(15)]], [[i for i in range(15)], [i for i in range(15)]]])

with tf.GradientTape() as tape:
    output, state_h, state_c = decoder(x, decoder_state_h, decoder_state_c, encoder_states_h)
    
variables = decoder.trainable_variables
gradients = tape.gradient(output, variables)

In [137]:
output

<tf.Tensor: shape=(2, 100), dtype=float32, numpy=
array([[ 0.15162158, -0.2666756 , -0.08206277,  0.71664655,  0.15774032,
        -0.26463896, -0.02485025, -0.16136312,  0.32311562,  0.31768876,
        -0.38081762, -0.59177405, -0.35685304, -0.5143362 ,  0.37344497,
        -0.51701427,  0.24112938,  0.3559965 , -0.23429905, -0.93242985,
         0.14618877,  0.46973363, -0.7733953 ,  0.16248974,  0.03708994,
        -0.05201417,  0.17080984,  0.08245593, -0.53828734,  0.5289846 ,
        -0.7155414 , -0.2436806 ,  0.09993283, -0.64873123, -0.38315713,
         0.4200229 ,  0.02848363, -0.07834591,  0.26870382,  0.37282366,
        -0.86660933,  0.79235697, -0.47088167,  0.46041852,  0.12979311,
         0.30807993,  0.3242702 , -0.0882818 , -0.298767  ,  0.15185268,
         0.4537142 ,  0.4380744 ,  0.50783145,  0.2062283 ,  0.416616  ,
        -0.24646498, -0.66551596,  0.10805789, -0.4846673 ,  0.11572916,
         0.79025435,  0.10220787,  0.6163452 , -0.22814785, -0.5057447 ,
 

In [139]:
gradients

[<tf.Tensor: shape=(315, 80), dtype=float32, numpy=
 array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00, ...,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 2.05591097e-02,  5.87581750e-03,  4.36880291e-02, ...,
         -5.95262408e-01, -2.95585115e-03, -2.01966166e-01],
        [ 4.11182195e-02,  1.17516350e-02,  8.73760581e-02, ...,
         -1.19052482e+00, -5.91170229e-03, -4.03932333e-01],
        ...,
        [-1.43110575e-02,  1.94085098e-03,  1.16645843e-02, ...,
         -1.73727512e-01, -9.20270628e-04, -6.59215897e-02],
        [-2.49217115e-02, -2.28512537e-04, -4.86101629e-03, ...,
          4.93213274e-02,  1.79062117e-04,  8.75781570e-03],
        [ 4.53935117e-02,  1.55893841e-03,  1.68263167e-02, ...,
         -2.01263383e-01, -8.90372845e-04, -5.50800711e-02]], dtype=float32)>,
 <tf.Tensor: shape=(20, 80), dtype=float32, numpy=
 array([[ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
          0.0000000e+00,  0.0000000e+00,  0.0000

In [140]:
@tf.function
def train_step(inp, 
               targ, 
               # enc_state_h, 
               # enc_state_c, 
               batch_size, 
               encoder, 
               decoder, 
               loss_function, 
               optimizer):
    # inp.shape = targ.shape (batch_size, max_sen_len)
    # enc_state_h.shape = (batch_size, enc_state_size)
    
    # make sure that the types are correct
    inp = tf.cast(inp, tf.float32)
    targ = tf.cast(targ, tf.float32)
    # enc_state_h = tf.cast(enc_state_h, tf.float32)
    # enc_state_c = tf.cast(enc_state_c, tf.float32)
    
    batch_loss = 0
    
    with tf.GradientTape() as tape:
        # enc_output.shape = (batch_size, max_sen_len, enc_state_size)
        # enc_state_h.shape = (batch_size, state_size)
        # enc_output, enc_state_h, enc_state_c = encoder(inp, enc_state_h, enc_state_c)
        enc_output, enc_state_h, enc_state_c = encoder(inp)
        dec_state_h = enc_state_h
        dec_state_c = enc_state_c
        
        # dec_input.shape = (batch_size, 1)
        dec_input = tf.expand_dims([0] * batch_size, 1)
        
        for t in range(targ.shape[1]):
            prediction, dec_state_h, dec_state_c, = decoder(dec_input, dec_state_h, dec_state_c, enc_output)
            # real value passed to loss_function needs to have shape (batch_size).
            # It is a number representing a word from tokenizer.word_index. Real value = 0
            # means that there was no word
            batch_loss += loss_function(targ[:, t], prediction)
            dec_input = tf.expand_dims(targ[:, t], 1)
            
    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = tape.gradient(batch_loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))
    
    return batch_loss

In [141]:
optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001)
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(reduction = 'none')

@tf.function
def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss = loss_object(real, pred)
    
    mask = tf.cast(mask, dtype = loss.dtype)
    loss *= mask
    
    return tf.reduce_mean(loss)

In [40]:
# real = tf.expand_dims(x_train[:2, 0], 1)
# pred = tf.expand_dims(x_train[:2, 0], 1)

real = tf.constant([1, 0])
pred = tf.constant([[0.05, 0.95], [1, 0]])

real = tf.cast(real, tf.float32)
pred = tf.cast(pred, tf.float32)

loss_object(real, pred)

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([5.1293328e-02, 1.1920928e-07], dtype=float32)>

In [142]:
epochs = 10
batch_size = 20
embedding_dim = 300

inp = tf.constant(x_train)
targ = tf.constant(x_train)

decoder = Decoder(vocab_size = len(tokenizer.word_index.keys()) + 1,
                  embedding_dim = embedding_dim,
                  lstm_out_size = 100,
                  embed_matrix = embed_matrix
                 )

encoder = Encoder(embedding_dim = embedding_dim,
                 lstm_out_size = 100,
                 batch_size = batch_size,
                 embed_matrix = embed_matrix
                 )

In [143]:
for epoch in range(epochs):
    stime = time.time()
    total_loss = 0
    for batch_number in range(len(inp) // batch_size):
        inp_batch = inp[batch_number * batch_size : (batch_number + 1) * batch_size, :]
        targ_batch = targ[batch_number * batch_size : (batch_number + 1) * batch_size, :]
        
        batch_loss = train_step(inp = inp_batch,
                               targ = targ_batch,
                               batch_size = batch_size,
                               encoder = encoder,
                               decoder = decoder,
                               loss_function = loss_function,
                               optimizer = optimizer
                              )
        total_loss += batch_loss
        
        print(f'Batch number: {batch_number}, Loss: {batch_loss / batch_size}, Time per batch: {(time.time() - stime) / (batch_number + 1)}')
        
    print(f'Epoch: {epoch + 1}, Loss: {total_loss / ((batch_number + 1) * batch_size)}, Time per epoch: {time.time() - stime}\n')

Batch number: 0, Loss: 2.658289670944214, Time per batch: 12.892604351043701
Batch number: 1, Loss: 1.8582338094711304, Time per batch: 6.700369834899902
Batch number: 2, Loss: 1.9761184453964233, Time per batch: 4.62457807858785
Batch number: 3, Loss: 1.600721001625061, Time per batch: 3.584436297416687
Batch number: 4, Loss: 1.403429627418518, Time per batch: 2.9829472064971925
Batch number: 5, Loss: 1.548048734664917, Time per batch: 2.6161230007807412
Batch number: 6, Loss: 1.5573298931121826, Time per batch: 2.3571054935455322
Batch number: 7, Loss: 1.623695731163025, Time per batch: 2.1703423857688904
Batch number: 8, Loss: 1.5399298667907715, Time per batch: 2.016980383131239
Batch number: 9, Loss: 1.9213107824325562, Time per batch: 1.8950824975967406
Batch number: 10, Loss: 1.4386277198791504, Time per batch: 1.7958932356400923
Batch number: 11, Loss: 1.1954272985458374, Time per batch: 1.7134931286176045
Batch number: 12, Loss: 1.583144187927246, Time per batch: 1.64153381494

In [144]:
encoder.save('model/encoder')



INFO:tensorflow:Assets written to: model/encoder\assets


INFO:tensorflow:Assets written to: model/encoder\assets


In [145]:
decoder.save('model/decoder')



INFO:tensorflow:Assets written to: model/decoder\assets


INFO:tensorflow:Assets written to: model/decoder\assets


In [44]:
sentence1 = 'hierarchy region business unit'
sentence2 = 'hierarchy region'

sequences = tokenizer.texts_to_sequences([sentence1, sentence2])
x = pad_sequences(sequences, maxlen = 20)

In [52]:
for i in range(20 - x.shape[0]):
    x = np.concatenate((x, np.zeros((1, 20))))

In [53]:
x

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0., 13., 15.,  5.,  6.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0., 13., 15.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  

In [54]:
encoder(x)

(<tf.Tensor: shape=(20, 20, 100), dtype=float32, numpy=
 array([[[-0.00200214, -0.00728252,  0.00461565, ..., -0.00156335,
           0.00536895, -0.00102761],
         [-0.00376189, -0.01406969,  0.00852322, ..., -0.00338283,
           0.01094341, -0.00149374],
         [-0.00536775, -0.02020802,  0.01205186, ..., -0.00521175,
           0.01671277, -0.00152385],
         ...,
         [-0.5295285 , -0.21979973,  0.5503003 , ...,  0.03106904,
           0.24107699, -0.02291696],
         [-0.7674839 , -0.49447724,  0.6714796 , ..., -0.01835466,
           0.57227296, -0.02126507],
         [-0.82453334, -0.65076756,  0.719726  , ...,  0.03668921,
           0.65055186, -0.06996637]],
 
        [[-0.00200214, -0.00728252,  0.00461565, ..., -0.00156335,
           0.00536895, -0.00102761],
         [-0.00376189, -0.01406969,  0.00852322, ..., -0.00338283,
           0.01094341, -0.00149374],
         [-0.00536775, -0.02020802,  0.01205186, ..., -0.00521175,
           0.01671277, -0.00