In [47]:
import sqlite3
import json
from datetime import datetime
import pandas as pd
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Activation
from keras.layers import Embedding
from keras.layers import Flatten
from keras.preprocessing.text import Tokenizer
from keras.datasets import imdb
from keras import preprocessing
import numpy as np
from keras.preprocessing.sequence import pad_sequences
import os
from spacy.lang.en import English
from keras.layers import SimpleRNN
from keras.layers import LSTM, Input, TimeDistributed
from keras.models import Model
from keras import utils
import re
import itertools
from sklearn.preprocessing import OneHotEncoder
from keras.models import load_model

# Importing data from a medical Q&A dataset containing over 135,000 questions and answer pairs

In [48]:
with open("healthtapQAs.json") as f: # This is the biggest database - 137,052 questions and answers
    my_data1 = json.load(f)

In [49]:
the_answers = list()
the_questions = list()

count = 0
count1 = 0

for i in my_data1:
    the_answers.append(i['answer'])
    count += 1

for i in my_data1:
    the_questions.append(i['question'])
    count1 += 1
    
print(the_questions[0])
print(the_answers[0])

zirconium dental implants. how common is it used now. is there any advantages or benefits over titanium implants. cons & pros please. thanks.
a majority of the dental implants placed are titanium. they are highly successful with many years use ; many studies much lower in cost ; have many restorative options. zirconia implants are newer fewer studies on success lesser restorative options. however they can be more aesthetic in certain anterior(front) situations. let your dentist/oral surgeon chose what they feel will be best for you.


# Cleaning the data to get rid of some contractions 
## In the future I am thinking about applying this to each question written by a user

In [50]:
def clean_text(text):

    text = text.lower()
    
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "that is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"n't", " not", text)
    text = re.sub(r"n'", "ng", text)
    text = re.sub(r"'bout", "about", text)
    text = re.sub(r"'til", "until", text)
    text = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", text)
    
    return text

In [51]:
answers = list()
questions = list()
for i in the_answers:
    text = clean_text(i)
    answers.append(text)

for i in the_questions:
    text = clean_text(i)
    questions.append(text)
    
print(the_questions[:4])
print(questions[:4])

['zirconium dental implants. how common is it used now. is there any advantages or benefits over titanium implants. cons & pros please. thanks.', 'zirconium dental implants. how common is it used now. is there any advantages or benefits over titanium implants. cons & pros please. thanks.', 'zirconium dental implants. how common is it used now. is there any advantages or benefits over titanium implants. cons & pros please. thanks.', 'zirconium dental implants. how common is it used now. is there any advantages or benefits over titanium implants. cons & pros please. thanks.']
['zirconium dental implants how common is it used now is there any advantages or benefits over titanium implants cons & pros please thanks', 'zirconium dental implants how common is it used now is there any advantages or benefits over titanium implants cons & pros please thanks', 'zirconium dental implants how common is it used now is there any advantages or benefits over titanium implants cons & pros please thanks'

# Adding 'START' and 'END' to the beginnings and endings of the answers

In [52]:
print(answers[1:4])
labeled_answers = list()
for i in range(len(answers)):
    labeled_answers.append('<START>' + answers[i] + '<END>')


['and the data on zirconia implants is much more limited', 'dental implants when loaded transfer stress to the bone which is a good thing zirconium is a ceramic that is very hard and actually transfers less stress to the bone and can actually lead to bone atrophy  the you dont use it you lose it concept', 'stick with what we know works  titanium let the research continue with the zirconium more answers will come as more research is done hard to beat 50 years of success always looking for improvement though']


# Tokenizing words in both the questions and answers list

In [53]:
max_words = 20000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(questions + labeled_answers)
print("Vocab list has: {} elements".format(len(tokenizer.word_index)))
word_index = tokenizer.word_index

Vocab list has: 87816 elements


# Preprocessing Questions Data
## - Tokenize words
## - Create word integer index
## - Turn text to number sequence and padding

In [54]:
sequences_questions = tokenizer.texts_to_sequences(questions)
max_seq_len = max([len(x) for x in sequences_questions])
print("Longest sequence_questions is: {}".format(max_seq_len))
padded_questions = pad_sequences(sequences_questions, maxlen=max_seq_len)
# array of padded sequences for questions
encoder_input_data = np.array(padded_questions) 

print("Sequences are of dimensions: {}".format(encoder_input_data.shape))
print("Example sequence: {}".format(encoder_input_data[0]))
print("Example of WordIndex: {}".format(dict(itertools.islice(word_index.items(), 4))))

Longest sequence_questions is: 64
Sequences are of dimensions: (137052, 64)
Example sequence: [    0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0 15221  1308   201    45   140     4
    11   155   136     4    38    65  3576    21   830   128  8161   201
  2943  3163   149  1030]
Example of WordIndex: {'the': 1, 'to': 2, 'a': 3, 'is': 4}


# Preprocessing Answers Data

In [55]:
sequences_answers = tokenizer.texts_to_sequences(labeled_answers)
max_seq_lenAns = 100
# max_seq_lenAns = max([len(x) for x in sequences_answers])
print("Longest sequence_answers is: {}".format(max_seq_lenAns))
padded_answers = pad_sequences(sequences_answers, maxlen=max_seq_lenAns)
# array of padded sequences for questions
decoder_input_data = np.array(padded_answers) 

print("Sequences are of dimensions: {}".format(decoder_input_data.shape))
print("Example sequence: {}".format(encoder_input_data[0]))
print("Example of WordIndex: {}".format(dict(itertools.islice(word_index.items(), 4))))

Longest sequence_answers is: 100
Sequences are of dimensions: (137052, 100)
Example sequence: [    0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0 15221  1308   201    45   140     4
    11   155   136     4    38    65  3576    21   830   128  8161   201
  2943  3163   149  1030]
Example of WordIndex: {'the': 1, 'to': 2, 'a': 3, 'is': 4}


# Decoder output data

In [56]:
#experiment with the output dim 
output_dim = 200
encoder_inputs = Input(shape=(None,))
encoder_embedding = Embedding(max_words, output_dim, mask_zero=True) (encoder_inputs)
encoder_outputs, state_h, state_c = LSTM(output_dim, return_state=True) (encoder_embedding)
# encoder_states = state_h, state_c


decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(max_words, output_dim, mask_zero=True) (decoder_inputs)
decoder_LSTM = LSTM(output_dim, return_state=True, return_sequences=True)
decoder_outputs, _, _ = decoder_LSTM(decoder_embedding, initial_state=[state_h, state_c])
decoder_dense = Dense(max_words, activation='softmax')
output = decoder_dense (decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], output)

In [57]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

In [58]:
model.summary()

Model: "model_5"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, None)         0                                            
__________________________________________________________________________________________________
input_8 (InputLayer)            (None, None)         0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, None, 200)    4000000     input_7[0][0]                    
__________________________________________________________________________________________________
embedding_6 (Embedding)         (None, None, 200)    4000000     input_8[0][0]                    
____________________________________________________________________________________________

# Removing the word START from the answer sequences and padding the sequences

In [59]:
decoder_output_seq = sequences_answers
for i in range(len(decoder_output_seq)):
    decoder_output_seq[i] = decoder_output_seq[i][1:]

target_data = pad_sequences(decoder_output_seq, max_seq_lenAns)


# Shuffling then splitting the input data and target data 80:20 

In [60]:
# should all share the shape (137052, _)
print("Encoder Input: ", encoder_input_data.shape)
print("Decoder Input: ", decoder_input_data.shape)
print("Target Data Input: ", target_data.shape)

Encoder Input:  (137052, 64)
Decoder Input:  (137052, 100)
Target Data Input:  (137052, 100)


In [61]:
eighty = 0.8 * encoder_input_data.shape[0]
print(eighty)
train_perc = 109500/encoder_input_data.shape[0]
print(train_perc)

109641.6
0.7989668155152789


In [62]:
# Since I will be fitting the model in portion sizes of 300 I am going
# use 109,500 which is approximately 80% (actually 79.897%)

In [63]:
indices = np.arange(encoder_input_data.shape[0])
np.random.shuffle(indices) 
# EID = encoder_input_data
shuffled_EID = encoder_input_data[indices]
shuffled_DID = decoder_input_data[indices]
shuffled_target = target_data[indices]

print(shuffled_EID.shape)
print(shuffled_DID.shape)
print(shuffled_target.shape)

(137052, 64)
(137052, 100)
(137052, 100)


In [64]:
train_encoder_data = shuffled_EID[0:12000] 
train_decoder_data = shuffled_DID[0:12000]
train_target_data = shuffled_target[0:12000]

test_encoder_data = shuffled_EID[12000: 15001 ] 
test_decoder_data = shuffled_DID[12000: 15001]
test_target_data = shuffled_target[12000: 15001]

print(train_encoder_data.shape)
print(train_decoder_data.shape)
print(train_target_data.shape)
print(test_encoder_data.shape)
print(test_decoder_data.shape)
print(test_target_data.shape)

print(train_encoder_data[1])
print(train_decoder_data[1])
print(train_target_data[1])

print(int(train_target_data.shape[0]/300))

(12000, 64)
(12000, 100)
(12000, 100)
(3001, 64)
(3001, 100)
(3001, 100)
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0   27   56  812  113    2  253  151    1 4694
 7179  427   61    3  509   24  596    6  129  204   16   38    5   35
   70   15  231   11    4  105  267  185]
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    7 2672   41  532  540  933   18   44
   58  191   77 1082   25   86   21    3 1714   12   39   81 8231  247
   11    4 6327  834   92 5643 1509   11    9  403   19  151    3  994
  449  161    5   10   46   15 4328   22    9   17  296  204   41 1509
   11    8]
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0 

# Fitting the model in portions since the OneHotEncoded Answers are too large to be held in computer memory

In [65]:
tenth = 300
times_completed = int(train_target_data.shape[0]/300)
for i in range(times_completed): # CHANGE THIS
    current = i * tenth
    nextone = (i+1) * tenth
    print("Currently--> {}:{}".format(current, nextone))
    train_X1 = train_encoder_data[current:nextone]
    train_X2 = train_decoder_data[current:nextone]
    portion = train_target_data[current:nextone]
    output = utils.to_categorical(portion, max_words)
#     print(train_X1.shape)
#     print(train_X2.shape)
#     print(output.shape)
#     print(output)
    model.fit([train_X1 , train_X2], output, batch_size=32, epochs=20 ) 

Currently--> 0:300
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 300:600
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 600:900
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20


Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 900:1200
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Currently--> 1200:1500
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 1500:1800
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20


Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 1800:2100
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20


Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 2100:2400
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 2400:2700
Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 2700:3000
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20


Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 3000:3300
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20


Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 3300:3600
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 3600:3900
Epoch 1/20
Epoch 2/20


Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 3900:4200
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20


Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 4200:4500
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20


Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 4500:4800
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 4800:5100
Epoch 1/20
Epoch 2/20
Epoch 3/20


Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 5100:5400
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20


Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 5400:5700
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20


Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 5700:6000
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 6000:6300
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 6300:6600
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20


Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 6600:6900
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20


Epoch 19/20
Epoch 20/20
Currently--> 6900:7200
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 7200:7500
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20


Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 7500:7800
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20


Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 7800:8100
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20


Epoch 20/20
Currently--> 8100:8400
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 8400:8700
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 8700:9000
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20


Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 9000:9300
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Currently--> 9300:9600
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 9600:9900
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20


Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 9900:10200
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20


Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 10200:10500
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 10500:10800


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 10800:11100
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20


Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 11100:11400
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20


Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 11400:11700
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Currently--> 11700:12000
Epoch 1/20
Epoch 2/20


Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [66]:
model.save("chat_botV1.h5")

In [67]:
# testing = load_model("chat_botV1.h5")

In [68]:
encoder_states = state_h, state_c

In [69]:
def make_inference_model():
    
    encoder_model = Model(encoder_inputs, encoder_states)
    
    decoder_state_input_h = Input(shape=(output_dim,))
    decoder_state_input_c = Input(shape=(output_dim,))
    
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
    
    decoder_outputs, state_h, state_c = decoder_LSTM(decoder_embedding, initial_state=decoder_states_inputs)
    decoder_states = [state_h, state_c]
    decoder_ouputs = decoder_dense(decoder_outputs)
    decoder_model = Model([decoder_inputs] + decoder_states_inputs,
                         [decoder_outputs] + decoder_states)
    
    return encoder_model, decoder_model

In [70]:
def format_questions(sentence):
    words = sentence.lower().split()
    sentence_words = list()
    for word in words:
        sentence_words.append(tokenizer.word_index[word])
    padded_input = pad_sequences([sentence_words], max_seq_len)
    
    return padded_input

In [71]:
print(format_questions("What can I do about cardiac pain"))

[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0  24  18  10  28  74 163  49]]


In [None]:
enc_model, dec_model = make_inference_model()

for i in range (10):
    state_values = enc_model.predict(format_questions(input('Enter question: ')))
    empty_target_seq = np.zeros((1,1))
    empty_target_seq[0, 0] = tokenizer.word_index['start']
    stop_condition = False
    decoded_translation = ''
    while not stop_condition:
        dec_outputs, h, c = dec_model.predict([empty_target_seq] + state_values)
        sampled_word_index = np.argmax(dec_outputs[0, -1, :])
        sampled_word = None
        for word, index in tokenizer.word_index.items():
            if sampled_word_index == index:
                decoded_translation += " {}".format(word)
                sampled_word = word
            
            if sampled_word == 'end' or len(decoded_translation.split()) > max_seq_lenAns:
                stop_condition = True
            
            empty_target_seq = np.zeros ((1, 1))
            empty_target_seq[0, 0] = sampled_word_index
            states_values = [h, c]
            
    print(decoded_translation)

Enter question: What could be a sign of cardiac arrest
 for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for has for
Enter question: What is cardiovascular disease
 for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for all for
Enter question: how do i lose weight
 for sleep for sleep for sleep for sleep for sleep 