<a href="https://colab.research.google.com/github/chitrasingh98/-IntersectingArea-/blob/master/NMT_Flask_Connection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive

drive.mount('/content/gdrive')

In [None]:
cd /content/gdrive/My Drive/NMT_Marathi2

In [None]:
from attention import AttentionLayer

In [None]:

from flask import Flask, request, jsonify, render_template

import pandas as pd
from sklearn.model_selection import train_test_split
import string
from string import digits
import re
from sklearn.utils import shuffle
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import LSTM, Input, Dense,Embedding, Concatenate, TimeDistributed
from tensorflow.keras.models import Model,load_model, model_from_json
from tensorflow.keras.utils import plot_model
from tensorflow.keras.preprocessing.text import one_hot, Tokenizer
from tensorflow.keras.callbacks import EarlyStopping
import pickle as pkl
import numpy as np

In [None]:
# loading the model architecture and asigning the weights
json_file = open('NMT_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model_loaded = model_from_json(loaded_model_json, custom_objects={'AttentionLayer': AttentionLayer})
# load weights into new model
model_loaded.load_weights("NMT_model_weight.h5")

In [None]:
with open('NMT_Etokenizer.pkl','rb') as f:
  vocab_size_source, Eword2index, englishTokenizer = pkl.load(f)

with open('NMT_Mtokenizer.pkl', 'rb') as f:
  vocab_size_target, Mword2index, marathiTokenizer = pkl.load(f)

with open('NMT_data.pkl','rb') as f:
  X_train, y_train, X_test, y_test = pkl.load(f)

In [None]:
Eindex2word = englishTokenizer.index_word
Mindex2word = marathiTokenizer.index_word

In [None]:
def remove_punc(text_list):
  table = str.maketrans('', '', string.punctuation)
  removed_punc_text = []
  for sent in text_list:
    sentance = [w.translate(table) for w in sent.split(' ')]
    removed_punc_text.append(' '.join(sentance))
  return removed_punc_text

In [None]:
def Max_length(data):
  max_length_ = max([len(x.split(' ')) for x in data])
  return max_length_


In [None]:
def preprocessing(userInput):
  # userInput=["how are you"]
  userInput= [x.lower() for x in userInput]
  userInput= [re.sub("'",'',x) for x in userInput]
  # remove punctuation

  userInput= remove_punc(userInput)
  # remove digits
  remove_digits = str.maketrans('', '', digits)
  removed_digits_text = []
  for sent in userInput:
    sentance = [w.translate(remove_digits) for w in sent.split(' ')]
    removed_digits_text.append(' '.join(sentance))
  userInput = removed_digits_text

  # removing the stating and ending whitespaces
  userInput = [x.strip() for x in userInput]


  # Finding max length of input data
  max_length_user_input = Max_length(userInput)


  # Eword2index = englishTokenizer.word_index
  vocab_size_source = len(Eword2index) + 1
  max_length_english=11

  userInput = englishTokenizer.texts_to_sequences(userInput)
  userInput = pad_sequences(userInput, maxlen=max_length_english, padding='post')
  print(userInput)
  return userInput


In [None]:
model_loaded.summary()

In [None]:
latent_dim=500
# encoder inference
encoder_inputs = model_loaded.input[0]  #loading encoder_inputs
encoder_outputs, state_h, state_c = model_loaded.layers[6].output #loading encoder_outputs

print(encoder_outputs.shape)

encoder_model = Model(inputs=encoder_inputs,outputs=[encoder_outputs, state_h, state_c])

# decoder inference
# Below tensors will hold the states of the previous time step
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_hidden_state_input = Input(shape=(11,latent_dim))
decoder_states_inputs = [decoder_hidden_state_input, decoder_state_input_h, decoder_state_input_c]

# Get the embeddings of the decoder sequence
decoder_inputs = model_loaded.layers[3].output

print(decoder_inputs.shape)
dec_emb_layer = model_loaded.layers[5]

dec_emb2= dec_emb_layer(decoder_inputs)

# To predict the next word in the sequence, set the initial states to the states from the previous time step
decoder_lstm = model_loaded.layers[7]
decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c])
decoder_states = [state_h2, state_c2]

#attention inference
attn_layer = model_loaded.layers[8]
attn_out_inf, attn_states_inf = attn_layer([decoder_hidden_state_input, decoder_outputs2])

concate = model_loaded.layers[9]
decoder_inf_concat = concate([decoder_outputs2, attn_out_inf])

# A dense softmax layer to generate prob dist. over the target vocabulary
decoder_dense = model_loaded.layers[10]
decoder_outputs2 = decoder_dense(decoder_inf_concat)

# Final decoder model
decoder_model = Model(
[decoder_inputs] + [decoder_hidden_state_input,decoder_state_input_h, decoder_state_input_c],
[decoder_outputs2] + [state_h2, state_c2])
# decoder_model = Model(inputs=[decoder_inputs].append(decoder_states_inputs), outputs=[decoder_outputs2].append(decoder_states))

In [None]:
def decode_sequence(input_seq):
    # Encode the input as state vectors.
    e_out, e_h, e_c = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1,1))

    # Chose the 'start' word as the first word of the target sequence
    target_seq[0, 0] = Mword2index['start']

    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + [e_out, e_h, e_c])

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        if sampled_token_index == 0:
          break
        else:
          sampled_token = Mindex2word[sampled_token_index]

          if(sampled_token!='end'):
              decoded_sentence += ' '+sampled_token

              # Exit condition: either hit max length or find stop word.
              if (sampled_token == 'end' or len(decoded_sentence.split()) >= (26-1)):
                  stop_condition = True

          # Update the target sequence (of length 1).
          target_seq = np.zeros((1,1))
          target_seq[0, 0] = sampled_token_index

          # Update internal states
          e_h, e_c = h, c

    return decoded_sentence

In [None]:
def seq2summary(input_seq):
    newString=''
    for i in input_seq:
      if((i!=0 and i!=Mword2index['start']) and i!=Mword2index['end']):
        newString=newString+Mindex2word[i]+' '
    return newString

def seq2text(input_seq):
    newString=''
    for i in input_seq:
      if(i!=0):
        newString=newString+Eindex2word[i]+' '
    return newString

In [None]:
def translate(userInp):
  userInput=[]

  userInput.append(userInp)
  userInput=preprocessing(userInput) 
  print("Review:",seq2text(userInput[0]))
  strMar=decode_sequence(userInput[0].reshape(1,11))
  print("Predicted summary:",strMar)
  return strMar


In [None]:
from google.colab.output import eval_js
print(eval_js("google.colab.kernel.proxyPort(5000)"))

In [None]:

from flask import Flask, render_template, request

app = Flask(__name__, template_folder='/content/gdrive/MyDrive/NMT_Marathi2/templates')
@app.route('/')
def home():
    return render_template('frontend.html')

@app.route('/predict',methods=['POST'])
def predict():
    '''
    For rendering results on HTML GUI
    '''
    englishSentence = [str(x) for x in request.form.values()]
    # printTest(englishSentence)
    # js=j(englishSentence[0])
    js=translate(englishSentence[0])
    output=englishSentence[0].upper()
    return render_template('frontend.html',original_text='Original English Text:  {}'.format(englishSentence[0]), prediction_text='Translated Text:  {}'.format(js))


    
if __name__ == "__main__":
    app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [10/Feb/2021 13:46:17] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [10/Feb/2021 13:46:18] "[33mGET /styling.css HTTP/1.1[0m" 404 -
127.0.0.1 - - [10/Feb/2021 13:46:19] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -


[[3587    0    0    0    0    0    0    0    0    0    0]]
Review: hello 


127.0.0.1 - - [10/Feb/2021 13:46:23] "[37mPOST /predict HTTP/1.1[0m" 200 -


Predicted summary:  हॅलो


127.0.0.1 - - [10/Feb/2021 13:46:24] "[33mGET /styling.css HTTP/1.1[0m" 404 -
127.0.0.1 - - [10/Feb/2021 13:46:24] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -


[[ 21  15   3 190   0   0   0   0   0   0   0]]
Review: what are you doing 


127.0.0.1 - - [10/Feb/2021 14:05:34] "[37mPOST /predict HTTP/1.1[0m" 200 -


Predicted summary:  तू काय म्हणत आहेस


127.0.0.1 - - [10/Feb/2021 14:05:35] "[33mGET /styling.css HTTP/1.1[0m" 404 -
127.0.0.1 - - [10/Feb/2021 14:05:35] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
