## Importing the necessary modules

In [14]:
from keras import Sequential
from keras.layers import Dense, Dropout, LeakyReLU
from keras.optimizers import Adam
from keras.models import save_model
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer
import numpy as np
import json
import random
import string
'''
nltk.download("punkt")
nltk.download("wordnet")
nltk.download('omw-1.4')
'''


'\nnltk.download("punkt")\nnltk.download("wordnet")\nnltk.download(\'omw-1.4\')\n'

## Loading the dataset

In [15]:
data = json.loads(open("dataset/intents.json").read())


## Using NLP to clean the data

In [16]:
# initializing lemmatizer to get stem of words
lemmatizer = WordNetLemmatizer()
# Each list to create
words = []
classes = []
doc_X = []
doc_y = []
# Loop through all the intents
# tokenize each pattern and append tokens to words, the patterns and
# the associated tag to their associated list
for intent in data["intents"]:
    for pattern in intent["patterns"]:
        tokens = word_tokenize(pattern)
        words.extend(tokens)
        doc_X.append(pattern)
        doc_y.append(intent["tag"])
    # add the tag to the classes if it's not there already
    if intent["tag"] not in classes:
        classes.append(intent["tag"])
# lemmatize all the words in the vocab and convert them to lowercase
# if the words don't appear in punctuation
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]
# sorting the vocab and classes in alphabetical order and taking the # set to ensure no duplicates occur
words = sorted(set(words))
classes = sorted(set(classes))
%store words
%store classes

Stored 'words' (list)
Stored 'classes' (list)


## Creating training and test sets

In [17]:
# list for training data
training = []
out_empty = [0] * len(classes)
# creating the bag of words model
for idx, doc in enumerate(doc_X):
    bow = []
    text = lemmatizer.lemmatize(doc.lower())
    for word in words:
        bow.append(1) if word in text else bow.append(0)
    # mark the index of class that the current pattern is associated to
    output_row = list(out_empty)
    output_row[classes.index(doc_y[idx])] = 1
    # add the one hot encoded BoW and associated classes to training
    training.append([bow, output_row])
# shuffle the data and convert it to an array
random.shuffle(training)
training = np.array(training, dtype=object)
# split the features and target labels
train_X = np.array(list(training[:, 0]))
train_y = np.array(list(training[:, 1]))


# Building deep learning model

In [18]:
# defining some parameters
input_shape = (len(train_X[0]),)
output_shape = len(train_y[0])
# the deep learning model
model = Sequential()
model.add(Dense(128, input_shape=input_shape, activation=LeakyReLU()))
model.add(Dropout(0.25))
model.add(Dense(64, activation=LeakyReLU()))
model.add(Dropout(0.25))
model.add(Dense(output_shape, activation="softmax"))
adam = Adam(learning_rate=0.01, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"])
print(model.summary())
model.fit(x=train_X, y=train_y, epochs=200, verbose=2)


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 128)               16768     
                                                                 
 dropout_2 (Dropout)         (None, 128)               0         
                                                                 
 dense_4 (Dense)             (None, 64)                8256      
                                                                 
 dropout_3 (Dropout)         (None, 64)                0         
                                                                 
 dense_5 (Dense)             (None, 28)                1820      
                                                                 
Total params: 26,844
Trainable params: 26,844
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/200


2022-10-29 11:47:35.147237: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


4/4 - 1s - loss: 3.3221 - accuracy: 0.0500 - 597ms/epoch - 149ms/step
Epoch 2/200
4/4 - 0s - loss: 2.8546 - accuracy: 0.3250 - 50ms/epoch - 13ms/step
Epoch 3/200
4/4 - 0s - loss: 2.2821 - accuracy: 0.4833 - 50ms/epoch - 13ms/step
Epoch 4/200
4/4 - 0s - loss: 1.6825 - accuracy: 0.6250 - 47ms/epoch - 12ms/step
Epoch 5/200
4/4 - 0s - loss: 1.1155 - accuracy: 0.7750 - 47ms/epoch - 12ms/step
Epoch 6/200
4/4 - 0s - loss: 0.7673 - accuracy: 0.8667 - 47ms/epoch - 12ms/step
Epoch 7/200
4/4 - 0s - loss: 0.4615 - accuracy: 0.9000 - 47ms/epoch - 12ms/step
Epoch 8/200
4/4 - 0s - loss: 0.3049 - accuracy: 0.9667 - 47ms/epoch - 12ms/step
Epoch 9/200
4/4 - 0s - loss: 0.1835 - accuracy: 0.9583 - 48ms/epoch - 12ms/step
Epoch 10/200
4/4 - 0s - loss: 0.1985 - accuracy: 0.9333 - 49ms/epoch - 12ms/step
Epoch 11/200
4/4 - 0s - loss: 0.1096 - accuracy: 0.9667 - 46ms/epoch - 11ms/step
Epoch 12/200
4/4 - 0s - loss: 0.0981 - accuracy: 0.9583 - 48ms/epoch - 12ms/step
Epoch 13/200
4/4 - 0s - loss: 0.0655 - accuracy

<keras.callbacks.History at 0x29731fa30>

# Functions for getting results

In [19]:
def clean_text(text):
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return tokens


def bag_of_words(text, vocab):
    tokens = clean_text(text)
    bow = [0] * len(vocab)
    for w in tokens:
        for idx, word in enumerate(vocab):
            if word == w:
                bow[idx] = 1
    return np.array(bow)


def pred_class(text, vocab, labels):
    bow = bag_of_words(text, vocab)
    result = model.predict(np.array([bow]))[0]
    thresh = 0.2
    y_pred = [[idx, res] for idx, res in enumerate(result) if res > thresh]
    y_pred.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in y_pred:
        return_list.append(labels[r[0]])
    return return_list


def get_response(intents_list, intents_json):
    tag = intents_list[0]
    list_of_intents = intents_json["intents"]
    for i in list_of_intents:
        if i["tag"] == tag:
            result = random.choice(i["responses"])
            break
    return result



In [20]:
save_model(model, "chatbot_model.h5", save_format="h5")

## Testing the model

In [21]:
# import platform
# import datetime
# import webbrowser
# import speech_recognition as sr
# import pyttsx3
# import wikipedia
# r = sr.Recognizer()
# engine = pyttsx3.init()
# VOICE_ID = "com.apple.speech.synthesis.voice.veena"  # for macOS
# if platform.system() == "Windows":  # for Windows
#     VOICE_ID = r"HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_ZIRA_11.0"
# engine.setProperty('voice', VOICE_ID)
# engine.setProperty('rate', 150)
# engine.setProperty('volume', 0.7)
print("Anuradha is ready to chat! (say 'exit' to quit)")
while True:
    # now = datetime.datetime.now()
    # with sr.Microphone() as source:
    #     r.adjust_for_ambient_noise(source)
    #     r.dynamic_energy_threshold = True
    #     audio = r.record(source, duration=3)
    try:
        # message = r.recognize_google(audio, language="en-IN")
        message = input("You : ")
        # print("You :", message)
        if message == 'exit':
            TEXT = "Bye! take care"
            # engine.say(TEXT)
            print("Anuradha :", TEXT)
            # engine.runAndWait()
            break
        else:
            intents = pred_class(message, words, classes)
            result = get_response(intents, data)
            # engine.say(result)
            print("Anuradha :", result)
            # engine.runAndWait()
    except ValueError:
        TEXT = "Sorry, I didn't get that"
        # engine.say(TEXT)
        print("Anuradha :", TEXT)
        # engine.runAndWait()
    # except sr.UnknownValueError:
    #     if len(message) == 0:
    #         TEXT = "Anuradha is listening..."
    #         print("Anuradha :", TEXT)
    #     else:
    #         TEXT = "Sorry! I didn't get that. Can you repeat?"
    #         print("Anuradha :", TEXT)
    #         # engine.say(TEXT)
    #         # engine.runAndWait()
    #     print("Anuradha is listening...")
    # except sr.RequestError as e:
    #     print("Can't connect to the internet at this moment. Please check you network connection.")


Anuradha is ready to chat! (say 'exit' to quit)


2022-10-29 11:47:57.231503: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Anuradha : How are you doing?
Anuradha : Good to see you again
Anuradha : I am Anuradha, a Deep-Learning chatbot. I am here to help you
Anuradha : Bye! Come back again soon.
Anuradha : How do you do?
Anuradha : Howdy Partner!
Anuradha : Greetings!
Anuradha : Never criticize someone until you've walked a mile in their shoes. That way, when you criticize them, they won't be able to hear you from that far away. Plus, you'll have their shoes.
Anuradha : Thanks for the laugh
Anuradha : How do you do?
Anuradha : Hi there, how can I help?
Anuradha : Speak soon!
Anuradha : Hi, I'm Anuradha, a Deep-Learning chatbot
Anuradha : Hi there, how can I help?
Anuradha : Bye! take care


Model performing good. But we can improve the model by using more data and more epochs.