<a href="https://colab.research.google.com/github/Dark-Sied/Intent_Classification/blob/master/Intent_classification_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Intent Recognition Dolores V01
Using a new vocabulary and training set.  
Intent Recognition is based on Intent_classification_final
Created by Christoph Windheuser, April 2020

In [1]:
import numpy as np
import pandas as pd
import csv
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.lancaster import LancasterStemmer
import nltk
import re
import os
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Dense, LSTM, Bidirectional, Embedding, Dropout
from keras.callbacks import ModelCheckpoint


Using TensorFlow backend.


In [26]:
# DEFINE GLOBAL VARIABLES:
NUM_SENT = 0
NUM_INTENTS = 0
NUM_INTENTS_UNIQUE = 0
VOCABULARY_SIZE = 0
MAX_SENT_LENGTH = 0

In [27]:
# SHOW GLOBAL VARIABLES
print ("NUM_SENT:           ", NUM_SENT)
print ("NUM_INTENTS:        ", NUM_INTENTS)
print ("NUM_INTENTS_UNIQUE: ", NUM_INTENTS_UNIQUE)
print ("VOCABULARY_SIZE:    ", VOCABULARY_SIZE)
print ("MAX_SENT_LENGTH:    ", MAX_SENT_LENGTH)

NUM_SENT:            0
NUM_INTENTS:         0
NUM_INTENTS_UNIQUE:  0
VOCABULARY_SIZE:     0
MAX_SENT_LENGTH:     0


# Read the data

In [28]:
# df = pd.read_csv("Dolores_Dataset_v01.csv", encoding = "latin1", names = ["Sentence", "Intent"])
df = pd.read_csv("mowgli_train_new.csv", encoding = "latin1", names = ["Sentence", "Intent"])
print(df.head())
intents            = df["Intent"]
NUM_INTENTS        = len(list(df["Intent"]))
intents_unique     = list(set(df["Intent"]))
NUM_INTENTS_UNIQUE = len(intents_unique)
sentences          = list(df["Sentence"])
NUM_SENT           = len(sentences)


                    Sentence           Intent
0         are you a machine?  are_you_a_robot
1  how are the things going?      how_are_you
2             nah not for me             deny
3           What's going on?      how_are_you
4             are you a bot?  are_you_a_robot


In [29]:
print ("Unique Intents: ")
print (intents_unique)
print ("Num of unique Intents: ", len(intents_unique))


Unique Intents: 
['skills', 'how_are_you', 'leave_budget', 'greet', 'insult', 'sorry', 'personal_question', 'goodbye', 'confirm', 'are_you_a_robot', 'what_is_your_name', 'conversation_restart', 'deny', 'thanks']
Num of unique Intents:  14


In [30]:
df.head()

Unnamed: 0,Sentence,Intent
0,are you a machine?,are_you_a_robot
1,how are the things going?,how_are_you
2,nah not for me,deny
3,What's going on?,how_are_you
4,are you a bot?,are_you_a_robot


In [31]:
df.shape

(346, 2)

In [32]:
print(sentences[:5])

['are you a machine?', 'how are the things going?', 'nah not for me', "What's going on?", 'are you a bot?']


# Word Cleaning
re.sub is a routine from the "Regular Expression" Library.     
r'string' means that this is a "raw string", where backslashes are treated as charachters.    
re.sub(r'[^ a-z A-Z 0-9]', " ", s) means that all characters exept a-z, A-Z and 0-9 will be replaced by space.

In [33]:
clean_sent = []
for s in sentences:
    clean = re.sub(r'[^ a-z A-Z 0-9]', " ", s)
    w = word_tokenize(clean)
    clean_sent.append([i.lower() for i in w])

print (clean_sent[:5])
print("Len of clean_sent: ", len(clean_sent))


[['are', 'you', 'a', 'machine'], ['how', 'are', 'the', 'things', 'going'], ['nah', 'not', 'for', 'me'], ['what', 's', 'going', 'on'], ['are', 'you', 'a', 'bot']]
Len of clean_sent:  346


### Documentation for Tokenizer:
https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text/Tokenizer

In [34]:
token = Tokenizer(filters = '!"#$%&()*+,-./:;<=>?@[\]^_`{|}~')
token.fit_on_texts(clean_sent)
word_index = token.word_index
VOCABULARY_SIZE = len(word_index) + 1
MAX_SENT_LENGTH = len(max(clean_sent, key = len))

#print("Vocab Size = %d. Maximum sent length = %d" % (vocab_size, max_sent_length))
print ("Max sent length: ", MAX_SENT_LENGTH)
encoded_sent = token.texts_to_sequences(clean_sent)
padded_sent = pad_sequences(encoded_sent, maxlen = MAX_SENT_LENGTH, padding = "post")


Max sent length:  12


In [35]:
# print (encoded_sent)
# print ("Num of Sentences: ", len(encoded_sent))

In [36]:
padded_sent[:5]

array([[  4,   1,  23, 129,   0,   0,   0,   0,   0,   0,   0,   0],
       [  2,   4,  60,  61,  24,   0,   0,   0,   0,   0,   0,   0],
       [ 89,  31,  21,  15,   0,   0,   0,   0,   0,   0,   0,   0],
       [  7,  10,  24,  73,   0,   0,   0,   0,   0,   0,   0,   0],
       [  4,   1,  23,  46,   0,   0,   0,   0,   0,   0,   0,   0]],
      dtype=int32)

In [37]:
print("Shape of padded sent = ",padded_sent.shape)

Shape of padded sent =  (346, 12)


# Tokenizing the intents

In [38]:
#tokenizer for the intents
token_intents = Tokenizer(filters = '!"#$%&()*+,-/:;<=>?@[\]^`{|}~')
token_intents.fit_on_texts(intents_unique)


In [39]:
token_intents.word_index

{'skills': 1,
 'how_are_you': 2,
 'leave_budget': 3,
 'greet': 4,
 'insult': 5,
 'sorry': 6,
 'personal_question': 7,
 'goodbye': 8,
 'confirm': 9,
 'are_you_a_robot': 10,
 'what_is_your_name': 11,
 'conversation_restart': 12,
 'deny': 13,
 'thanks': 14}

In [40]:
encoded_output = token_intents.texts_to_sequences(intents)


In [41]:
# print (encoded_output)


In [42]:
encoded_output = np.array(encoded_output).reshape(len(encoded_output), 1)

In [43]:
# print (encoded_output)
type (encoded_output)

numpy.ndarray

In [44]:
encoded_output.shape

(346, 1)

In [45]:
one_hot = OneHotEncoder(sparse = False)
output_one_hot = one_hot.fit_transform(encoded_output)

In [46]:
output_one_hot.shape

(346, 14)

# Define Training- and Testset

In [47]:
from sklearn.model_selection import train_test_split

In [48]:
# train_X, val_X, train_Y, val_Y = train_test_split(padded_sent, output_one_hot, shuffle = True, test_size = 0.1)
train_X = padded_sent
train_Y = output_one_hot
val_X   = train_X
val_Y   = train_Y


In [49]:
print("Shape of train_X = %s and train_Y = %s" % (train_X.shape, train_Y.shape))
print("Shape of val_X = %s and val_Y = %s" % (val_X.shape, val_Y.shape))

Shape of train_X = (346, 12) and train_Y = (346, 14)
Shape of val_X = (346, 12) and val_Y = (346, 14)


# Define Embeddings (from glove)

In [50]:
GLOVE_DIR  = "/Users/cwindheu/gensim-data/glove-wiki-gigaword-200/"
GLOVE_FILE = "glove-wiki-gigaword-200.txt"
EMBEDDING_DIM = 200

embeddings_index = {}

f = open(os.path.join(GLOVE_DIR, GLOVE_FILE))
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

print('Found %s word vectors.' % len(embeddings_index))


Found 400001 word vectors.


In [51]:
embedding_matrix = np.random.uniform(-1, 1, (VOCABULARY_SIZE, EMBEDDING_DIM))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be initialized randomly.
        embedding_matrix[i] = embedding_vector
    else:
        print ("Not in vacabulary: ", word)
                    

Not in vacabulary:  chatbot
Not in vacabulary:  amayzing
Not in vacabulary:  heeey
Not in vacabulary:  helloooo
Not in vacabulary:  jojojo
Not in vacabulary:  thanx
Not in vacabulary:  hellllooooooo
Not in vacabulary:  hellooo
Not in vacabulary:  hiihihi
Not in vacabulary:  thnx
Not in vacabulary:  heyho
Not in vacabulary:  hiii
Not in vacabulary:  sweatheart
Not in vacabulary:  heyo
Not in vacabulary:  ayyyy
Not in vacabulary:  whaddup


# Defining the Model

In [52]:
model = Sequential()
model.add(Embedding(VOCABULARY_SIZE, EMBEDDING_DIM, weights=[embedding_matrix], input_length = MAX_SENT_LENGTH, trainable = True))
model.add(Bidirectional(LSTM(128)))
model.add(Dense(32, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(NUM_INTENTS_UNIQUE, activation = "softmax"))

model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 12, 200)           56800     
_________________________________________________________________
bidirectional_1 (Bidirection (None, 256)               336896    
_________________________________________________________________
dense_1 (Dense)              (None, 32)                8224      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 14)                462       
Total params: 402,382
Trainable params: 402,382
Non-trainable par

# Training the Model

In [60]:
import time

filename = 'dir_01.h5'
checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

start = time.time()

hist = model.fit(train_X, train_Y, epochs = 500, batch_size = 32, validation_data = (val_X, val_Y), callbacks = [checkpoint])

print("Elapsed time in seconds: ", time.time() - start)


Train on 346 samples, validate on 346 samples
Epoch 1/500

Epoch 00001: val_loss improved from inf to 0.00415, saving model to dir_01.h5
Epoch 2/500

Epoch 00002: val_loss did not improve from 0.00415
Epoch 3/500

Epoch 00003: val_loss improved from 0.00415 to 0.00407, saving model to dir_01.h5
Epoch 4/500

Epoch 00004: val_loss did not improve from 0.00407
Epoch 5/500

Epoch 00005: val_loss did not improve from 0.00407
Epoch 6/500

Epoch 00006: val_loss did not improve from 0.00407
Epoch 7/500

Epoch 00007: val_loss did not improve from 0.00407
Epoch 8/500

Epoch 00008: val_loss did not improve from 0.00407
Epoch 9/500

Epoch 00009: val_loss did not improve from 0.00407
Epoch 10/500

Epoch 00010: val_loss did not improve from 0.00407
Epoch 11/500

Epoch 00011: val_loss did not improve from 0.00407
Epoch 12/500

Epoch 00012: val_loss did not improve from 0.00407
Epoch 13/500

Epoch 00013: val_loss did not improve from 0.00407
Epoch 14/500

Epoch 00014: val_loss did not improve from 0.0


Epoch 00044: val_loss improved from 0.00406 to 0.00405, saving model to dir_01.h5
Epoch 45/500

Epoch 00045: val_loss did not improve from 0.00405
Epoch 46/500

Epoch 00046: val_loss did not improve from 0.00405
Epoch 47/500

Epoch 00047: val_loss did not improve from 0.00405
Epoch 48/500

Epoch 00048: val_loss did not improve from 0.00405
Epoch 49/500

Epoch 00049: val_loss did not improve from 0.00405
Epoch 50/500

Epoch 00050: val_loss did not improve from 0.00405
Epoch 51/500

Epoch 00051: val_loss did not improve from 0.00405
Epoch 52/500

Epoch 00052: val_loss did not improve from 0.00405
Epoch 53/500

Epoch 00053: val_loss did not improve from 0.00405
Epoch 54/500

Epoch 00054: val_loss did not improve from 0.00405
Epoch 55/500

Epoch 00055: val_loss did not improve from 0.00405
Epoch 56/500

Epoch 00056: val_loss did not improve from 0.00405
Epoch 57/500

Epoch 00057: val_loss did not improve from 0.00405
Epoch 58/500

Epoch 00058: val_loss did not improve from 0.00405
Epoch 5


Epoch 00088: val_loss did not improve from 0.00405
Epoch 89/500

Epoch 00089: val_loss did not improve from 0.00405
Epoch 90/500

Epoch 00090: val_loss did not improve from 0.00405
Epoch 91/500

Epoch 00091: val_loss did not improve from 0.00405
Epoch 92/500

Epoch 00092: val_loss did not improve from 0.00405
Epoch 93/500

Epoch 00093: val_loss did not improve from 0.00405
Epoch 94/500

Epoch 00094: val_loss did not improve from 0.00405
Epoch 95/500

Epoch 00095: val_loss did not improve from 0.00405
Epoch 96/500

Epoch 00096: val_loss did not improve from 0.00405
Epoch 97/500

Epoch 00097: val_loss did not improve from 0.00405
Epoch 98/500

Epoch 00098: val_loss did not improve from 0.00405
Epoch 99/500

Epoch 00099: val_loss did not improve from 0.00405
Epoch 100/500

Epoch 00100: val_loss did not improve from 0.00405
Epoch 101/500

Epoch 00101: val_loss did not improve from 0.00405
Epoch 102/500

Epoch 00102: val_loss did not improve from 0.00405
Epoch 103/500

Epoch 00103: val_los


Epoch 00132: val_loss did not improve from 0.00405
Epoch 133/500

Epoch 00133: val_loss did not improve from 0.00405
Epoch 134/500

Epoch 00134: val_loss did not improve from 0.00405
Epoch 135/500

Epoch 00135: val_loss did not improve from 0.00405
Epoch 136/500

Epoch 00136: val_loss did not improve from 0.00405
Epoch 137/500

Epoch 00137: val_loss did not improve from 0.00405
Epoch 138/500

Epoch 00138: val_loss did not improve from 0.00405
Epoch 139/500

Epoch 00139: val_loss did not improve from 0.00405
Epoch 140/500

Epoch 00140: val_loss did not improve from 0.00405
Epoch 141/500

Epoch 00141: val_loss did not improve from 0.00405
Epoch 142/500

Epoch 00142: val_loss did not improve from 0.00405
Epoch 143/500

Epoch 00143: val_loss did not improve from 0.00405
Epoch 144/500

Epoch 00144: val_loss did not improve from 0.00405
Epoch 145/500

Epoch 00145: val_loss did not improve from 0.00405
Epoch 146/500

Epoch 00146: val_loss did not improve from 0.00405
Epoch 147/500

Epoch 001


Epoch 00176: val_loss did not improve from 0.00405
Epoch 177/500

Epoch 00177: val_loss did not improve from 0.00405
Epoch 178/500

Epoch 00178: val_loss did not improve from 0.00405
Epoch 179/500

Epoch 00179: val_loss did not improve from 0.00405
Epoch 180/500

Epoch 00180: val_loss did not improve from 0.00405
Epoch 181/500

Epoch 00181: val_loss did not improve from 0.00405
Epoch 182/500

Epoch 00182: val_loss did not improve from 0.00405
Epoch 183/500

Epoch 00183: val_loss did not improve from 0.00405
Epoch 184/500

Epoch 00184: val_loss did not improve from 0.00405
Epoch 185/500

Epoch 00185: val_loss did not improve from 0.00405
Epoch 186/500

Epoch 00186: val_loss did not improve from 0.00405
Epoch 187/500

Epoch 00187: val_loss did not improve from 0.00405
Epoch 188/500

Epoch 00188: val_loss improved from 0.00405 to 0.00404, saving model to dir_01.h5
Epoch 189/500

Epoch 00189: val_loss did not improve from 0.00404
Epoch 190/500

Epoch 00190: val_loss did not improve from 0


Epoch 00219: val_loss did not improve from 0.00403
Epoch 220/500

Epoch 00220: val_loss did not improve from 0.00403
Epoch 221/500

Epoch 00221: val_loss did not improve from 0.00403
Epoch 222/500

Epoch 00222: val_loss did not improve from 0.00403
Epoch 223/500

Epoch 00223: val_loss did not improve from 0.00403
Epoch 224/500

Epoch 00224: val_loss did not improve from 0.00403
Epoch 225/500

Epoch 00225: val_loss did not improve from 0.00403
Epoch 226/500

Epoch 00226: val_loss did not improve from 0.00403
Epoch 227/500

Epoch 00227: val_loss did not improve from 0.00403
Epoch 228/500

Epoch 00228: val_loss did not improve from 0.00403
Epoch 229/500

Epoch 00229: val_loss did not improve from 0.00403
Epoch 230/500

Epoch 00230: val_loss did not improve from 0.00403
Epoch 231/500

Epoch 00231: val_loss did not improve from 0.00403
Epoch 232/500

Epoch 00232: val_loss did not improve from 0.00403
Epoch 233/500

Epoch 00233: val_loss did not improve from 0.00403
Epoch 234/500

Epoch 002


Epoch 00263: val_loss did not improve from 0.00403
Epoch 264/500

Epoch 00264: val_loss did not improve from 0.00403
Epoch 265/500

Epoch 00265: val_loss did not improve from 0.00403
Epoch 266/500

Epoch 00266: val_loss did not improve from 0.00403
Epoch 267/500

Epoch 00267: val_loss did not improve from 0.00403
Epoch 268/500

Epoch 00268: val_loss did not improve from 0.00403
Epoch 269/500

Epoch 00269: val_loss did not improve from 0.00403
Epoch 270/500

Epoch 00270: val_loss did not improve from 0.00403
Epoch 271/500

Epoch 00271: val_loss did not improve from 0.00403
Epoch 272/500

Epoch 00272: val_loss did not improve from 0.00403
Epoch 273/500

Epoch 00273: val_loss did not improve from 0.00403
Epoch 274/500

Epoch 00274: val_loss did not improve from 0.00403
Epoch 275/500

Epoch 00275: val_loss did not improve from 0.00403
Epoch 276/500

Epoch 00276: val_loss did not improve from 0.00403
Epoch 277/500

Epoch 00277: val_loss did not improve from 0.00403
Epoch 278/500

Epoch 002


Epoch 00307: val_loss did not improve from 0.00403
Epoch 308/500

Epoch 00308: val_loss did not improve from 0.00403
Epoch 309/500

Epoch 00309: val_loss did not improve from 0.00403
Epoch 310/500

Epoch 00310: val_loss did not improve from 0.00403
Epoch 311/500

Epoch 00311: val_loss did not improve from 0.00403
Epoch 312/500

Epoch 00312: val_loss did not improve from 0.00403
Epoch 313/500

Epoch 00313: val_loss did not improve from 0.00403
Epoch 314/500

Epoch 00314: val_loss did not improve from 0.00403
Epoch 315/500

Epoch 00315: val_loss did not improve from 0.00403
Epoch 316/500

Epoch 00316: val_loss did not improve from 0.00403
Epoch 317/500

Epoch 00317: val_loss did not improve from 0.00403
Epoch 318/500

Epoch 00318: val_loss did not improve from 0.00403
Epoch 319/500

Epoch 00319: val_loss did not improve from 0.00403
Epoch 320/500

Epoch 00320: val_loss did not improve from 0.00403
Epoch 321/500

Epoch 00321: val_loss did not improve from 0.00403
Epoch 322/500

Epoch 003


Epoch 00351: val_loss did not improve from 0.00403
Epoch 352/500

Epoch 00352: val_loss did not improve from 0.00403
Epoch 353/500

Epoch 00353: val_loss did not improve from 0.00403
Epoch 354/500

Epoch 00354: val_loss did not improve from 0.00403
Epoch 355/500

Epoch 00355: val_loss did not improve from 0.00403
Epoch 356/500

Epoch 00356: val_loss did not improve from 0.00403
Epoch 357/500

Epoch 00357: val_loss did not improve from 0.00403
Epoch 358/500

Epoch 00358: val_loss improved from 0.00403 to 0.00403, saving model to dir_01.h5
Epoch 359/500

Epoch 00359: val_loss did not improve from 0.00403
Epoch 360/500

Epoch 00360: val_loss did not improve from 0.00403
Epoch 361/500

Epoch 00361: val_loss did not improve from 0.00403
Epoch 362/500

Epoch 00362: val_loss did not improve from 0.00403
Epoch 363/500

Epoch 00363: val_loss did not improve from 0.00403
Epoch 364/500

Epoch 00364: val_loss did not improve from 0.00403
Epoch 365/500

Epoch 00365: val_loss improved from 0.00403 


Epoch 00394: val_loss did not improve from 0.00402
Epoch 395/500

Epoch 00395: val_loss did not improve from 0.00402
Epoch 396/500

Epoch 00396: val_loss did not improve from 0.00402
Epoch 397/500

Epoch 00397: val_loss did not improve from 0.00402
Epoch 398/500

Epoch 00398: val_loss did not improve from 0.00402
Epoch 399/500

Epoch 00399: val_loss did not improve from 0.00402
Epoch 400/500

Epoch 00400: val_loss did not improve from 0.00402
Epoch 401/500

Epoch 00401: val_loss did not improve from 0.00402
Epoch 402/500

Epoch 00402: val_loss did not improve from 0.00402
Epoch 403/500

Epoch 00403: val_loss did not improve from 0.00402
Epoch 404/500

Epoch 00404: val_loss did not improve from 0.00402
Epoch 405/500

Epoch 00405: val_loss did not improve from 0.00402
Epoch 406/500

Epoch 00406: val_loss did not improve from 0.00402
Epoch 407/500

Epoch 00407: val_loss improved from 0.00402 to 0.00402, saving model to dir_01.h5
Epoch 408/500

Epoch 00408: val_loss did not improve from 0


Epoch 00437: val_loss did not improve from 0.00402
Epoch 438/500

Epoch 00438: val_loss did not improve from 0.00402
Epoch 439/500

Epoch 00439: val_loss did not improve from 0.00402
Epoch 440/500

Epoch 00440: val_loss did not improve from 0.00402
Epoch 441/500

Epoch 00441: val_loss did not improve from 0.00402
Epoch 442/500

Epoch 00442: val_loss did not improve from 0.00402
Epoch 443/500

Epoch 00443: val_loss did not improve from 0.00402
Epoch 444/500

Epoch 00444: val_loss did not improve from 0.00402
Epoch 445/500

Epoch 00445: val_loss did not improve from 0.00402
Epoch 446/500

Epoch 00446: val_loss did not improve from 0.00402
Epoch 447/500

Epoch 00447: val_loss did not improve from 0.00402
Epoch 448/500

Epoch 00448: val_loss did not improve from 0.00402
Epoch 449/500

Epoch 00449: val_loss did not improve from 0.00402
Epoch 450/500

Epoch 00450: val_loss did not improve from 0.00402
Epoch 451/500

Epoch 00451: val_loss did not improve from 0.00402
Epoch 452/500

Epoch 004


Epoch 00481: val_loss did not improve from 0.00401
Epoch 482/500

Epoch 00482: val_loss did not improve from 0.00401
Epoch 483/500

Epoch 00483: val_loss did not improve from 0.00401
Epoch 484/500

Epoch 00484: val_loss did not improve from 0.00401
Epoch 485/500

Epoch 00485: val_loss did not improve from 0.00401
Epoch 486/500

Epoch 00486: val_loss did not improve from 0.00401
Epoch 487/500

Epoch 00487: val_loss did not improve from 0.00401
Epoch 488/500

Epoch 00488: val_loss did not improve from 0.00401
Epoch 489/500

Epoch 00489: val_loss did not improve from 0.00401
Epoch 490/500

Epoch 00490: val_loss did not improve from 0.00401
Epoch 491/500

Epoch 00491: val_loss did not improve from 0.00401
Epoch 492/500

Epoch 00492: val_loss did not improve from 0.00401
Epoch 493/500

Epoch 00493: val_loss did not improve from 0.00401
Epoch 494/500

Epoch 00494: val_loss did not improve from 0.00401
Epoch 495/500

Epoch 00495: val_loss did not improve from 0.00401
Epoch 496/500

Epoch 004

In [61]:
 model = load_model("dir_01.h5")

In [62]:
def predictions(text):
    clean = re.sub(r'[^ a-z A-Z 0-9]', " ", text)
    test_word = word_tokenize(clean)
    test_word = [w.lower() for w in test_word]
    test_ls = token.texts_to_sequences(test_word)
    #print(test_word)
    #Check for unknown words
    if [] in test_ls:
        test_ls = list(filter(None, test_ls))
    
    test_ls = np.array(test_ls).reshape(1, len(test_ls))

    #print("test_ls: ", test_ls)

    x = pad_sequences(test_ls, maxlen = MAX_SENT_LENGTH, padding = "post")
    
    # print ("x: ", x)
    
    pred = model.predict_proba(x)
  
    return pred


In [63]:
def get_final_output(pred, classes):
    #print (type (pred))
    #print (pred)
    
    predictions = pred[0]
 
    classes = np.array(classes)
    ids = np.argsort(-predictions)
    classes = classes[ids]
#    predictions = -np.sort(-predictions)

#    for i in range(pred.shape[1]):
#        print("%s has confidence = %s" % (classes[i], (predictions[i])))
    
    return classes[0]


In [64]:
text = "Good morning"
pred = predictions(text)
out = get_final_output(pred, intents_unique)
print (out)

greet


In [66]:
# with open('mowgli_test_new.csv', newline='') as csvfile:
#    testfilelist = list(csv.reader(csvfile))

with open('mowgli_train_new.csv', newline='') as csvfile:
    testfilelist = list(csv.reader(csvfile))

total   = 0
correct = 0

for s in testfilelist:
    right_label = s[1]
    message     = s[0]

    pred   = predictions(message)
    intent = get_final_output(pred, intents_unique)

    if intent == right_label:
        correct += 1
    else:
        print ("%s recognized as %s, correct is %s" % (message, intent, right_label))
    total += 1

print ("Results: %d from %d correct = %4.2f percent" % (correct, total, (correct/total)*100.0))


cool recognized as thanks, correct is confirm
Results: 345 from 346 correct = 99.71 percent
