In [None]:
import tensorflow as tf
from tensorflow.keras import callbacks, models, layers, preprocessing as kprocessing #(2.6.0)
import pandas as pd
import json
from nltk.corpus import stopwords
import regex as re
import numpy as np

# Open tokenizer for text
with open('word_corpus.json') as f: 
        data = json.load(f) 
        text_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(data)

# Open tokenizer for summary
with open('label_corpus.json') as f: 
        data = json.load(f) 
        label_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(data)

# Define special tokens for summary
special_tokens = ("xstartx", "xendx")

# Load encoder and decoder models
encoder_model = tf.keras.models.load_model('encoder_191223.h5')
decoder_model = tf.keras.models.load_model('decoder_191223.h5')

In [None]:
# Text input
sample = 'amazing turn events baahubali 2 conclusion released worldwide april 28 beaten hollywood film circle us box office mind simple featthe circle stars twotime academy awardwinning actor global superstar tom hanks harry potter star heartthrob billions emma watson based novel name circle technothriller one awaited films yearhowever according latest boxoffice figures baahubali 2 given beating tom hanksstarrer us box office baahubali 2 conclusion earned  1013 million rs 65 crore first weekend circle earned  932 million rs 598 crorethis makes baahubali 2 conclusion directed ss rajamouli highest grossing indian film ever american soil us back home baahubali 2 conclusion also breaking records far hindi version film earned title highestgrossing film first weekend india beating sultan dangalstarring prabhas rana daggubati anushka shetty tamannaah sathyaraj ramya krishnan baahubali 2 conclusion fantasy epic sequel 2015 blockbuster baahubali beginning baahubali 2 expected earn whole lot time tell beat pk become worlds highest grossing indian film time'

# Data cleansing
clean_sample = re.sub("[-()\"#/@;:<>{}`+=~|.!?,]", "", sample.lower())

# Define stopwords
stopword = stopwords.words("english")

temp=""
text=clean_sample.split(" ")
for word in text:
  if word not in stopword:
    temp = temp+" "+word
clean_sample = temp
input_sample = []
input_sample.append(clean_sample.strip())

In [None]:
input_seq = text_tokenizer.texts_to_sequences(input_sample)

# Padding sequence
x = tf.keras.utils.pad_sequences(input_seq, maxlen=800, padding='post', truncating="post")

# Predict Manual
x = x.reshape(1, -1)

# encode X
encoder_out, state_h, state_c = encoder_model.predict(x, verbose=0)

# prepare loop
y_inp = np.array([label_tokenizer.word_index[special_tokens[1]]])
y_expand = np.expand_dims(y_inp, axis=1)
predicted_text = ""
stop = False

while not stop:
    # predict dictionary probability distribution
    outputs = decoder_model.predict([y_expand, state_h, state_c], verbose=0)
    probs, new_state_h, new_state_c = outputs[0], outputs[1], outputs[2]

    # get predicted word
    voc_idx = np.argmax(probs[0, -1, :], axis=0)
    if voc_idx == 0:
        break

    pred_word = label_tokenizer.index_word[voc_idx]

    # check stop
    if (pred_word != special_tokens[1]) and (len(predicted_text.split()) < 50):
        predicted_text = predicted_text + " " + pred_word
    else:
        stop = True

    # next
    y_inp = np.array([voc_idx])
    y_expand = np.expand_dims(y_inp, axis=1)
    state_h, state_c = new_state_h, new_state_c