In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
from pathlib import Path
import string
import re
import joblib
import json
from collections import Counter
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, Flatten
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
nltk.download('wordnet')


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\akhil\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
def load_doc(jsonFile):
    with open(jsonFile) as file:
        Json_data = json.loads(file.read())
    return Json_data

data = load_doc('./intents.json')
data

{'intents': [{'tag': 'start_conversation',
   'patterns': ['Hi there',
    'Is anyone there?',
    'Hey',
    'Hola',
    'Hello',
    'Good day',
    'Hi'],
   'responses': ['Hello',
    'Happy to have you here',
    'Good to see you again',
    'Hi there, how can I help?'],
   'context': ['']},
  {'tag': 'what_are_you',
   'patterns': ['What is your name?',
    'what are you?',
    'who are you?',
    'your name pls?'],
   'responses': ["Hi, I'm  Bowhead Bot",
    "I'm Bowhead Bot ",
    'Call me Bowhead Bot'],
   'context': ['']},
  {'tag': 'end_conversation',
   'patterns': ['Bye',
    'See you later',
    'Goodbye',
    'Nice chatting to you, bye',
    'Till next time'],
   'responses': ['Have a lovely Day!',
    'Bye',
    'Buhbyee !',
    'Happy to help you'],
   'context': ['']},
  {'tag': 'thanks',
   'patterns': ['Thanks',
    'Thank you',
    "That's helpful",
    'Awesome, thanks',
    'Thanks for helping me'],
   'responses': ['Happy to help!', 'Any time!', 'My pleasure', 

In [4]:
def frame_data(feat_1,feat_2,is_pattern):
    is_pattern = is_pattern
    df = pd.DataFrame(columns=[feat_1,feat_2])
    for intent in data['intents']:
        if is_pattern:
            for pattern in intent['patterns']:
                w = pattern
                df_to_append = pd.Series([w,intent['tag']], index = df.columns)
                df = df.append(df_to_append,ignore_index=True)
        else:
            for response in intent['responses']:
                w = response
                df_to_append = pd.Series([w,intent['tag']], index = df.columns)
                df = df.append(df_to_append,ignore_index=True)
    return df

In [5]:
questions_df = frame_data('questions','labels',True)
questions_df

Unnamed: 0,questions,labels
0,Hi there,start_conversation
1,Is anyone there?,start_conversation
2,Hey,start_conversation
3,Hola,start_conversation
4,Hello,start_conversation
5,Good day,start_conversation
6,Hi,start_conversation
7,What is your name?,what_are_you
8,what are you?,what_are_you
9,who are you?,what_are_you


In [6]:
responses_df = frame_data('responses','labels',False)
responses_df

Unnamed: 0,responses,labels
0,Hello,start_conversation
1,Happy to have you here,start_conversation
2,Good to see you again,start_conversation
3,"Hi there, how can I help?",start_conversation
4,"Hi, I'm Bowhead Bot",what_are_you
5,I'm Bowhead Bot,what_are_you
6,Call me Bowhead Bot,what_are_you
7,Have a lovely Day!,end_conversation
8,Bye,end_conversation
9,Buhbyee !,end_conversation


# For creating tokens and vocab of training data;this vocab is later used when testing new user input(testing the bot)

In [7]:
lemmatizer = WordNetLemmatizer()

vocab = Counter()
labels = []
def tokenizer(entry):
    tokens = entry.split()
    re_punc = re.compile('[%s]' % re.escape(string.punctuation))
    tokens = [re_punc.sub('', w) for w in tokens]
    tokens = [word for word in tokens if word.isalpha()]
    tokens = [lemmatizer.lemmatize(w.lower()) for w in tokens]
#     stop_words = set(stopwords.words('english'))
#     tokens = [w for w in tokens if not w in stop_words]
    tokens = [word.lower() for word in tokens if len(word) > 1]
    return tokens

def remove_stop_words(tokenizer,df,feature):
    doc_without_stopwords = []
    for entry in df[feature]:
        tokens = tokenizer(entry)
        joblib.dump(tokens,'tokens.pkl')
        doc_without_stopwords.append(' '.join(tokens))
    df[feature] = doc_without_stopwords
    return 

def create_vocab(tokenizer,df,feature):
    for entry in df[feature]:
        tokens = tokenizer(entry)   
        vocab.update(tokens)
    joblib.dump(vocab,'vocab.pkl')
    return 


create_vocab(tokenizer,questions_df,'questions')
remove_stop_words(tokenizer,questions_df,'questions')

# Creating training and test data and storing the indexes

In [8]:
# Creating a test list containing only the first question of each group after grouping them according to their labels.
test_list = list(questions_df.groupby(by='labels',as_index=False).first()['questions'])
test_list

# The index of the above test observations taken from the dataframe df1
test_index = []
for i,_ in enumerate(test_list):
    idx = questions_df[questions_df.questions == test_list[i]].index[0]
    test_index.append(idx)
test_index

# Train index
train_index = [i for i in questions_df.index if i not in test_index]
train_index

['what can you tell me about covid trial',
 'bye',
 'how can you help me',
 'hi there',
 'thanks',
 'what is your name']

[29, 11, 21, 0, 16, 7]

[1,
 2,
 3,
 4,
 5,
 6,
 8,
 9,
 10,
 12,
 13,
 14,
 15,
 17,
 18,
 19,
 20,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37]

# Initializing a tokenizer, fitting it on to questions df and encoding the questions_df

In [9]:
def encoder(df,feature):
#     text = ' '.join(list(vocab.keys()))
#     from tensorflow.keras.preprocessing.text import text_to_word_sequence
    t = Tokenizer()
    entries = [entry for entry in df[feature]]
    print(entries)
    #step1 To create dictionary having 'keys' as words from each entry(sentences) and 'values' are randomly assigned by tokenizer.
    t.fit_on_texts(entries)
    print(t.word_index)
    joblib.dump(t,'tokenizer_t.pkl')
    vocab_size = len(t.word_index) + 1
    print(vocab_size)
    entries = [entry for entry in df[feature]]
    max_length = max([len(s.split()) for s in entries])
    print(max_length)
    #setp2 To see the output of the dataframe when dictionary 'values' from the above created dictionary in step1 are used in the place of the words in the sentences
    encoded = t.texts_to_sequences(entries)
    print(encoded)
    padded = pad_sequences(encoded, maxlen=max_length, padding='post')
    return padded, vocab_size

In [10]:
X,vocab_size = encoder(questions_df,'questions')
X
questions_df_encoded = pd.DataFrame(X)
questions_df_encoded['labels'] = questions_df.labels
questions_df_encoded

['hi there', 'is anyone there', 'hey', 'hola', 'hello', 'good day', 'hi', 'what is your name', 'what are you', 'who are you', 'your name pls', 'bye', 'see you later', 'goodbye', 'nice chatting to you bye', 'till next time', 'thanks', 'thank you', 'thats helpful', 'awesome thanks', 'thanks for helping me', 'how can you help me', 'what can you do', 'what help you provide', 'how can you be helpful', 'what support do you offered', 'what do you know', 'what help', 'what are you about', 'what can you tell me about covid trial', 'need some information about covid trial', 'covid trial information needed', 'want to know about covid trial', 'show me covid trial info', 'want to find hepatitis trial', 'find heart trial detail', 'find medical trial info', 'diabetes trial']
{'you': 1, 'what': 2, 'trial': 3, 'covid': 4, 'me': 5, 'can': 6, 'about': 7, 'are': 8, 'to': 9, 'thanks': 10, 'help': 11, 'do': 12, 'find': 13, 'hi': 14, 'there': 15, 'is': 16, 'your': 17, 'name': 18, 'bye': 19, 'helpful': 20, 'h

array([[14, 15,  0,  0,  0,  0,  0,  0],
       [16, 26, 15,  0,  0,  0,  0,  0],
       [27,  0,  0,  0,  0,  0,  0,  0],
       [28,  0,  0,  0,  0,  0,  0,  0],
       [29,  0,  0,  0,  0,  0,  0,  0],
       [30, 31,  0,  0,  0,  0,  0,  0],
       [14,  0,  0,  0,  0,  0,  0,  0],
       [ 2, 16, 17, 18,  0,  0,  0,  0],
       [ 2,  8,  1,  0,  0,  0,  0,  0],
       [32,  8,  1,  0,  0,  0,  0,  0],
       [17, 18, 33,  0,  0,  0,  0,  0],
       [19,  0,  0,  0,  0,  0,  0,  0],
       [34,  1, 35,  0,  0,  0,  0,  0],
       [36,  0,  0,  0,  0,  0,  0,  0],
       [37, 38,  9,  1, 19,  0,  0,  0],
       [39, 40, 41,  0,  0,  0,  0,  0],
       [10,  0,  0,  0,  0,  0,  0,  0],
       [42,  1,  0,  0,  0,  0,  0,  0],
       [43, 20,  0,  0,  0,  0,  0,  0],
       [44, 10,  0,  0,  0,  0,  0,  0],
       [10, 45, 46,  5,  0,  0,  0,  0],
       [21,  6,  1, 11,  5,  0,  0,  0],
       [ 2,  6,  1, 12,  0,  0,  0,  0],
       [ 2, 11,  1, 47,  0,  0,  0,  0],
       [21,  6, 

Unnamed: 0,0,1,2,3,4,5,6,7,labels
0,14,15,0,0,0,0,0,0,start_conversation
1,16,26,15,0,0,0,0,0,start_conversation
2,27,0,0,0,0,0,0,0,start_conversation
3,28,0,0,0,0,0,0,0,start_conversation
4,29,0,0,0,0,0,0,0,start_conversation
5,30,31,0,0,0,0,0,0,start_conversation
6,14,0,0,0,0,0,0,0,start_conversation
7,2,16,17,18,0,0,0,0,what_are_you
8,2,8,1,0,0,0,0,0,what_are_you
9,32,8,1,0,0,0,0,0,what_are_you


In [11]:
#Creating 2 observations with 'confused' label and appending them to dataframe.
for i in range(0,2):
    dt = [0]*8
    dt.append('confused')
    dt = [dt]
    pd.DataFrame(dt).rename(columns = {8:'labels'})
    questions_df_encoded = questions_df_encoded.append(pd.DataFrame(dt).rename(columns = {8:'labels'}),ignore_index=True)
    
questions_df_encoded

Unnamed: 0,0,1,2,3,4,5,6,7,labels
0,0,0,0,0,0,0,0,0,confused


Unnamed: 0,0,1,2,3,4,5,6,7,labels
0,0,0,0,0,0,0,0,0,confused


Unnamed: 0,0,1,2,3,4,5,6,7,labels
0,14,15,0,0,0,0,0,0,start_conversation
1,16,26,15,0,0,0,0,0,start_conversation
2,27,0,0,0,0,0,0,0,start_conversation
3,28,0,0,0,0,0,0,0,start_conversation
4,29,0,0,0,0,0,0,0,start_conversation
5,30,31,0,0,0,0,0,0,start_conversation
6,14,0,0,0,0,0,0,0,start_conversation
7,2,16,17,18,0,0,0,0,what_are_you
8,2,8,1,0,0,0,0,0,what_are_you
9,32,8,1,0,0,0,0,0,what_are_you


In [12]:
# Appending one of the confused labeled observation to train index
train_index.append(38)
# Appending one of the confused labeled observation to test index
test_index.append(39)

# Encoding the labels of questions_df_encoded and then later responses_df 

In [13]:
# Encoding the 'labels' with values choosen(random) by labelEncoder 
from sklearn.preprocessing import LabelEncoder
lable_enc = LabelEncoder()

labl = lable_enc.fit_transform(questions_df_encoded.labels)
labl

mapper = {}
for index,key in enumerate(questions_df_encoded.labels):
    if key not in mapper.keys():
        mapper[key] = labl[index]
mapper

array([4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 3,
       3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0])

{'start_conversation': 4,
 'what_are_you': 6,
 'end_conversation': 2,
 'thanks': 5,
 'options': 3,
 'covid_trials': 1,
 'confused': 0}

In [14]:
# Encoding the labels of responses dataframe df2 with values choosen from questions dataframe df1.
responses_df.labels = responses_df.labels.map(mapper).astype({'labels': 'int32'})
responses_df
responses_df.to_csv('./responses.csv')

Unnamed: 0,responses,labels
0,Hello,4
1,Happy to have you here,4
2,Good to see you again,4
3,"Hi there, how can I help?",4
4,"Hi, I'm Bowhead Bot",6
5,I'm Bowhead Bot,6
6,Call me Bowhead Bot,6
7,Have a lovely Day!,2
8,Bye,2
9,Buhbyee !,2


# Creating train and test for the model based on the above calculated indexes

In [15]:
train = questions_df_encoded.loc[train_index]
test = questions_df_encoded.loc[test_index]
train
test

X_train = train.drop(columns=['labels'],axis=1)
X_train
y_train = train.labels
y_train
X_test = test.drop(columns=['labels'],axis=1)
X_test
y_test = test.labels
y_test
y_train =pd.get_dummies(y_train).values
y_train
y_test =pd.get_dummies(y_test).values
y_test

Unnamed: 0,0,1,2,3,4,5,6,7,labels
1,16,26,15,0,0,0,0,0,start_conversation
2,27,0,0,0,0,0,0,0,start_conversation
3,28,0,0,0,0,0,0,0,start_conversation
4,29,0,0,0,0,0,0,0,start_conversation
5,30,31,0,0,0,0,0,0,start_conversation
6,14,0,0,0,0,0,0,0,start_conversation
8,2,8,1,0,0,0,0,0,what_are_you
9,32,8,1,0,0,0,0,0,what_are_you
10,17,18,33,0,0,0,0,0,what_are_you
12,34,1,35,0,0,0,0,0,end_conversation


Unnamed: 0,0,1,2,3,4,5,6,7,labels
29,2,6,1,51,5,7,4,3,covid_trials
11,19,0,0,0,0,0,0,0,end_conversation
21,21,6,1,11,5,0,0,0,options
0,14,15,0,0,0,0,0,0,start_conversation
16,10,0,0,0,0,0,0,0,thanks
7,2,16,17,18,0,0,0,0,what_are_you
39,0,0,0,0,0,0,0,0,confused


Unnamed: 0,0,1,2,3,4,5,6,7
1,16,26,15,0,0,0,0,0
2,27,0,0,0,0,0,0,0
3,28,0,0,0,0,0,0,0
4,29,0,0,0,0,0,0,0
5,30,31,0,0,0,0,0,0
6,14,0,0,0,0,0,0,0
8,2,8,1,0,0,0,0,0
9,32,8,1,0,0,0,0,0
10,17,18,33,0,0,0,0,0
12,34,1,35,0,0,0,0,0


1     start_conversation
2     start_conversation
3     start_conversation
4     start_conversation
5     start_conversation
6     start_conversation
8           what_are_you
9           what_are_you
10          what_are_you
12      end_conversation
13      end_conversation
14      end_conversation
15      end_conversation
17                thanks
18                thanks
19                thanks
20                thanks
22               options
23               options
24               options
25               options
26               options
27               options
28               options
30          covid_trials
31          covid_trials
32          covid_trials
33          covid_trials
34          covid_trials
35          covid_trials
36          covid_trials
37          covid_trials
38              confused
Name: labels, dtype: object

Unnamed: 0,0,1,2,3,4,5,6,7
29,2,6,1,51,5,7,4,3
11,19,0,0,0,0,0,0,0
21,21,6,1,11,5,0,0,0
0,14,15,0,0,0,0,0,0
16,10,0,0,0,0,0,0,0
7,2,16,17,18,0,0,0,0
39,0,0,0,0,0,0,0,0


29          covid_trials
11      end_conversation
21               options
0     start_conversation
16                thanks
7           what_are_you
39              confused
Name: labels, dtype: object

array([[0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 1],
       [0, 0, 1, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0]], dtype=uin

array([[0, 1, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0, 0, 0]], dtype=uint8)

In [16]:
y_train[0].shape,y_test[0].shape
X_train.shape

((7,), (7,))

(33, 8)

In [17]:
max_length = X_train.shape[1]
max_length
output = 9

8

# Model construction

In [18]:
early_stopping = EarlyStopping(monitor='val_loss',patience=10)
checkpoint = ModelCheckpoint("model-v1.h5",
                             monitor="val_loss",
                             mode="min",
                             save_best_only = True,
                             verbose=1)
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.2, patience = 3, verbose = 1, min_delta = 0.0001)
callbacks = [early_stopping,checkpoint,reduce_lr]


def define_model(vocab_size, max_length):
    model = Sequential()
    model.add(Embedding(vocab_size,300, input_length=max_length))
    model.add(Conv1D(filters=64, kernel_size=4, activation='relu'))
    model.add(MaxPooling1D(pool_size=4))
    model.add(Flatten())
#     model.add(Dense(32, activation='relu'))
    model.add(Dense(7, activation='softmax'))
    
    
    # compile network
#     model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.compile(loss = 'categorical_crossentropy',
              # optimizer = Adam(lr=0.001),
              optimizer = 'adam',
              metrics = ['accuracy'])
    
    # summarize defined model
    model.summary()
#     plot_model(model, to_file='model.png', show_shapes=True)
    return model

In [19]:
model = define_model(vocab_size, max_length)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 300)            18300     
_________________________________________________________________
conv1d (Conv1D)              (None, 5, 64)             76864     
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 1, 64)             0         
_________________________________________________________________
flatten (Flatten)            (None, 64)                0         
_________________________________________________________________
dense (Dense)                (None, 7)                 455       
Total params: 95,6

In [20]:
history = model.fit(X_train, y_train, epochs=500, verbose=1,validation_data=(X_test,y_test),callbacks=callbacks)

Train on 33 samples, validate on 7 samples
Epoch 1/500
Epoch 00001: val_loss improved from inf to 1.94377, saving model to model-v1.h5
Epoch 2/500
Epoch 00002: val_loss improved from 1.94377 to 1.93532, saving model to model-v1.h5
Epoch 3/500
Epoch 00003: val_loss improved from 1.93532 to 1.92820, saving model to model-v1.h5
Epoch 4/500
Epoch 00004: val_loss improved from 1.92820 to 1.92366, saving model to model-v1.h5
Epoch 5/500
Epoch 00005: val_loss improved from 1.92366 to 1.92177, saving model to model-v1.h5
Epoch 6/500
Epoch 00006: val_loss did not improve from 1.92177
Epoch 7/500
Epoch 00007: val_loss did not improve from 1.92177
Epoch 8/500
Epoch 00008: val_loss did not improve from 1.92177

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 9/500
Epoch 00009: val_loss did not improve from 1.92177
Epoch 10/500
Epoch 00010: val_loss improved from 1.92177 to 1.92165, saving model to model-v1.h5
Epoch 11/500
Epoch 00011: val_loss improved from 1

Epoch 29/500
Epoch 00029: val_loss improved from 1.88238 to 1.87944, saving model to model-v1.h5
Epoch 30/500
Epoch 00030: val_loss improved from 1.87944 to 1.87687, saving model to model-v1.h5
Epoch 31/500
Epoch 00031: val_loss improved from 1.87687 to 1.87399, saving model to model-v1.h5
Epoch 32/500
Epoch 00032: val_loss improved from 1.87399 to 1.87145, saving model to model-v1.h5
Epoch 33/500
Epoch 00033: val_loss improved from 1.87145 to 1.86920, saving model to model-v1.h5
Epoch 34/500
Epoch 00034: val_loss improved from 1.86920 to 1.86651, saving model to model-v1.h5
Epoch 35/500
Epoch 00035: val_loss improved from 1.86651 to 1.86365, saving model to model-v1.h5
Epoch 36/500
Epoch 00036: val_loss improved from 1.86365 to 1.86103, saving model to model-v1.h5
Epoch 37/500
Epoch 00037: val_loss improved from 1.86103 to 1.85799, saving model to model-v1.h5
Epoch 38/500
Epoch 00038: val_loss improved from 1.85799 to 1.85479, saving model to model-v1.h5
Epoch 39/500
Epoch 00039: val_

Epoch 57/500
Epoch 00057: val_loss improved from 1.80791 to 1.80454, saving model to model-v1.h5
Epoch 58/500
Epoch 00058: val_loss improved from 1.80454 to 1.80183, saving model to model-v1.h5
Epoch 59/500
Epoch 00059: val_loss improved from 1.80183 to 1.79937, saving model to model-v1.h5
Epoch 60/500
Epoch 00060: val_loss improved from 1.79937 to 1.79753, saving model to model-v1.h5
Epoch 61/500
Epoch 00061: val_loss improved from 1.79753 to 1.79637, saving model to model-v1.h5
Epoch 62/500
Epoch 00062: val_loss improved from 1.79637 to 1.79552, saving model to model-v1.h5
Epoch 63/500
Epoch 00063: val_loss improved from 1.79552 to 1.79522, saving model to model-v1.h5
Epoch 64/500
Epoch 00064: val_loss improved from 1.79522 to 1.79489, saving model to model-v1.h5
Epoch 65/500
Epoch 00065: val_loss improved from 1.79489 to 1.79466, saving model to model-v1.h5
Epoch 66/500
Epoch 00066: val_loss improved from 1.79466 to 1.79430, saving model to model-v1.h5
Epoch 67/500
Epoch 00067: val_

# Testing the model

In [21]:
[np.argmax(i) for i in model.predict(X_test)][:8]

[3, 4, 3, 4, 4, 3, 4]

In [22]:
[np.argmax(i) for i in y_test][:8]

[1, 2, 3, 4, 5, 6, 0]

# Posing new question to our model

In [25]:
def get_text():
    input_text  = ['I want to find trials for hepatitis']
    df_input = pd.DataFrame(input_text,columns=['questions'])
    print(df_input)
    a = df_input.iloc[0]['questions']
    print(type(a))
    if not a:
        print('true')
    return df_input 

#load artifacts 
from tensorflow.keras.models import load_model
model = load_model('model-v1.h5')
# tokenizer_t to be used in encode_input_text
tokenizer_t = joblib.load('tokenizer_t.pkl')
# vocab to be used in bot_precaution
vocab = joblib.load('vocab.pkl')

def tokenizer(entry):
    tokens = entry.split()
    re_punc = re.compile('[%s]' % re.escape(string.punctuation))
    tokens = [re_punc.sub('', w) for w in tokens]
    tokens = [word for word in tokens if word.isalpha()]
    tokens = [lemmatizer.lemmatize(w.lower()) for w in tokens]
#     stop_words = set(stopwords.words('english'))
#     tokens = [w for w in tokens if not w in stop_words]
    tokens = [word.lower() for word in tokens if len(word) > 1]
    return tokens

def remove_stop_words_for_input(tokenizer,df,feature):
    doc_without_stopwords = []
    entry = df[feature][0]
    tokens = tokenizer(entry)
    doc_without_stopwords.append(' '.join(tokens))
    df[feature] = doc_without_stopwords
    return df

def encode_input_text(tokenizer_t,df,feature):
    t = tokenizer_t
    entry = entry = [df[feature][0]]
    encoded = t.texts_to_sequences(entry)
    padded = pad_sequences(encoded, maxlen=8, padding='post')
    return padded

def get_pred(model,encoded_input):
    pred = np.argmax(model.predict(encoded_input))
    return pred

def bot_precausion(df_input,pred):
    words = df_input.questions[0].split()
    if len([w for w in words if w in vocab])==0 :
        pred = 0
    return pred

def get_response(df2,pred):
    upper_bound = df2.groupby('labels').get_group(pred).shape[0]
    r = np.random.randint(0,upper_bound)
    responses = list(df2.groupby('labels').get_group(pred).responses)
    return responses[r]

def bot_response(response,):
    print(response)
    

In [26]:
df_input = get_text()

#load artifacts 
tokenizer_t = joblib.load('tokenizer_t.pkl')
vocab = joblib.load('vocab.pkl')

df_input = remove_stop_words_for_input(tokenizer,df_input,'questions')
input = df_input.iloc[0]['questions']
input
encoded_input = encode_input_text(tokenizer_t,df_input,'questions')
encoded_input

pred = get_pred(model,encoded_input)
pred
pred = bot_precausion(df_input,pred)
pred

response = get_response(responses_df,pred)
bot_response(response)


                             questions
0  I want to find trials for hepatitis
<class 'str'>


'want to find trial for hepatitis'

array([[24,  9, 13,  3, 45, 56,  0,  0]])

1

1

Getting the info


# If we change intents.json, we need to make few modifications to this file.
# 1. In cell 12, change the indexes for appending confused labels accordingly.
# 2. In cell 18 of model construction, change the number of output nodes in Dense layer to match with the 'maxlength - 1' (If not already matched)