In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

import os
os.environ['KAGGLE_CONFIG_DIR'] = "/content/gdrive/My Drive/Kaggle"
%cd /content/gdrive/My Drive/Kaggle

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive
/content/gdrive/My Drive/Kaggle


In [0]:
import tensorflow as tf
from tensorflow import keras

enc_model = keras.models.load_model('./encoder-model.h5', compile=False)
inf_model = keras.models.load_model('./inf-model.h5', compile=False)

In [3]:
print(tf.__version__)

2.2.0


In [0]:
import json

vocab_max_size = 10000

with open('word_dict.json') as f:
    word_dict = json.load(f)
    tokenizer = keras.preprocessing.text.Tokenizer(filters='', num_words=vocab_max_size)
    tokenizer.word_index = word_dict

In [0]:
import numpy as np

max_length_in = 21
max_length_out = 20

def tokenize_text(text):
  text = '<start> ' + text.lower() + ' <end>'
  text_tensor = tokenizer.texts_to_sequences([text])
  text_tensor = keras.preprocessing.sequence.pad_sequences(text_tensor, maxlen=max_length_in, padding="post")
  return text_tensor

# Reversed map from a tokenizer index to a word
index_to_word = dict(map(reversed, tokenizer.word_index.items()))

# Given an input string, an encoder model (infenc_model) and a decoder model (infmodel),
def decode_sequence(input_sentence):
    sentence_tensor = tokenize_text(input_sentence)
    # Encode the input as state vectors.
    state = enc_model.predict(sentence_tensor)

    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = tokenizer.word_index['<start>']
    curr_word = "<start>"
    decoded_sentence = ''

    i = 0
    while curr_word != "<end>" and i < (max_length_out - 1):
        print(target_seq.shape)
        output_tokens, h = inf_model.predict([target_seq, state])

        curr_token = np.argmax(output_tokens[0, 0])

        if (curr_token == 0):
          break;

        curr_word = index_to_word[curr_token]

        decoded_sentence += ' ' + curr_word
        target_seq[0, 0] = curr_token
        state = h
        i += 1

    return decoded_sentence

In [6]:
print(tokenize_text('have a'))

[[ 1 21 13  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]]


In [7]:
enc_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 21)]         0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 21, 10)       100000      input_1[0][0]                    
__________________________________________________________________________________________________
bidirectional (Bidirectional)   [(None, 21, 256), (N 107520      embedding[0][0]                  
__________________________________________________________________________________________________
concatenate (Concatenate)       (None, 256)          0           bidirectional[0][1]              
                                                                 bidirectional[0][2]        

In [8]:
inf_model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, None, 10)     100000      input_2[0][0]                    
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 256)]        0                                            
__________________________________________________________________________________________________
gru_1 (GRU)                     [(None, None, 256),  205824      embedding_1[0][0]                
                                                                 input_3[0][0]              

In [9]:
import pandas as pd

texts = [
    'have a']

output = list(map(lambda text: (text, decode_sequence(text)), texts))
output_df = pd.DataFrame(output, columns=["input", "output"])
output_df.head(len(output))

(1, 1)
(1, 1)
(1, 1)


Unnamed: 0,input,output
0,have a,good weekend <end>


In [0]:
enc_model.save('./encoder-model-2.h5')
inf_model.save('./inf-model-2.h5')