## Importing the necessary modules

In [None]:
from json import loads
from keras import Sequential
from keras.layers import Dense, Dropout, LeakyReLU, Softmax
from keras.optimizers import Adam
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer
from numpy import array
from random import choice, shuffle
from string import punctuation
'''
nltk.download("punkt")
nltk.download("wordnet")
nltk.download('omw-1.4')
'''


## Loading the dataset

In [None]:
data = loads(open("dataset/intents.json").read())


## Using NLP to clean the data

In [None]:
lemmatizer = WordNetLemmatizer()

# Each list to create
words = [] # tokenized words of sentences in patterns
classes = [] # tags
doc_X = [] # patterns
doc_y = [] # tags ocurring number of times wrt patterns

'''Looping through all the intents and tokenizing each patterns
and appending tokens to words, the patterns 
and the associated tag to their associated list'''
for intent in data["intents"]:
    for pattern in intent["patterns"]:
        tokens = word_tokenize(pattern)
        words.extend(tokens)
        doc_X.append(pattern)
        doc_y.append(intent["tag"])
    # add the tag to the classes if it's not there already
    if intent["tag"] not in classes:
        classes.append(intent["tag"])


In [None]:
'''lemmatizing all the words in the vocab 
and converting them to lowercase
if the words don't appear in punctuation'''
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in punctuation] # removes punctuation and converts to lowercase


In [None]:
'''sorting the vocab and classes in alphabetical order 
and taking the set to ensure no duplicates occur'''
# removes duplicates and sorts in alphabetical order
words = sorted(set(words)) 
classes = sorted(set(classes))


In [None]:
print("lenght of words: ", len(words))
print("length of doc_X", len(doc_X))
print("length of doc_y", len(doc_y))
print("length of classes", len(classes))


## Creating training and test sets

In [None]:
# list for training data
training = []

# creating the bag of words model
for idx, doc in enumerate(doc_X):
    bow = []
    text = lemmatizer.lemmatize(doc.lower()) # lemmatizing the sentence and converting to lowercase
    for word in words:
        bow.append(1) if word in text else bow.append(0) # one hot encoding the words if they appear in the text
    output_row = [0] * len(classes)
    # marking the index of class that the current pattern is associated to as 1
    output_row[classes.index(doc_y[idx])] = 1
    # adding the one hot encoded BoW and associated classes to training
    training.append([bow, output_row])


In [None]:
# shuffling the data and convert it to a numpy array
shuffle(training)
training = array(training, dtype=object)


In [None]:
# splitting the features and target labels
train_X = array(list(training[:, 0])) # features
train_y = array(list(training[:, 1])) # target labels


In [None]:
train_X


In [None]:
train_y


In [None]:
print(f"Shape of train_X: {train_X.shape}")
print(f"Shape of train_y: {train_y.shape}")


# Building deep learning model

In [None]:
# defining some parameters
input_shape = (len(train_X[0]),)
output_shape = len(train_y[0])
print("input_shape: ", input_shape)
print("output_shape: ", output_shape)


In [None]:
# defining the model
model = Sequential()
model.add(Dense(128, input_shape=input_shape, activation=LeakyReLU(alpha=0.3)))
model.add(Dropout(0.25))
model.add(Dense(64, activation=LeakyReLU(alpha=0.3)))
model.add(Dropout(0.25))
model.add(Dense(output_shape, activation=Softmax()))


In [None]:
# compiling the model
adam = Adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"])


In [None]:
print(model.summary())


## Training the model

In [None]:
model.fit(x=train_X, y=train_y, epochs=200)


# Functions for getting results

In [None]:
# defining a function to clean the text
# tokenizing and then lemmatizing and returning the tokens array
def clean_text(text):
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return tokens

# defining a function to get the bag of words
# one hot encoding the words if they appear in the text and returning numpy array of bow
def bag_of_words(text, vocab):
    tokens = clean_text(text)
    bow = [0] * len(vocab)
    for w in tokens:
        for idx, word in enumerate(vocab):
            if word == w:
                bow[idx] = 1
    return array(bow)

# defining a function to predict the class
def pred_class(text, vocab, labels):
    bow = bag_of_words(text, vocab)
    result = model.predict(array([bow]))[0]
    thresh = 0.2
    y_pred = [[idx, res] for idx, res in enumerate(result) if res > thresh]
    y_pred.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in y_pred:
        return_list.append(labels[r[0]])
    return return_list

# defining a function to get the response 
# taking the predicted class and returning a random response from the intents.json file
def get_response(intents_list, intents_json):
    tag = intents_list[0]
    list_of_intents = intents_json["intents"]
    for i in list_of_intents:
        if i["tag"] == tag:
            result = choice(i["responses"])
            break
    return result


## Testing the model

In [None]:
from datetime import datetime
import platform
import pyttsx3
import speech_recognition as sr

now = datetime.now()
engine = pyttsx3.init()
r = sr.Recognizer()
mic = sr.Microphone()

if platform.system() == "Windows":  # for Windows
    engine.setProperty(
        'voice', r"HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_ZIRA_11.0")
elif platform.system() == "Darwin":  # for macOS
    engine.setProperty(
        'voice', "com.apple.speech.synthesis.voice.samantha")
else:
    pass

TEXT = "Anuradha is ready to chat! (type 'exit' to quit)"
print(TEXT)
engine.say(TEXT)
engine.runAndWait()

while True:
    with mic as source:
        r.adjust_for_ambient_noise(source)
        r.dynamic_energy_threshold = True
        print("Say Now")
        audio = r.record(source, duration=3)
    try:
        message = r.recognize_google(audio, language="en-IN") # message = input("You : ")
        intents = pred_class(message, words, classes)
        result = get_response(intents, data)
        if message == 'exit':
            TEXT = "Bye! take care"
            break
        elif result == "date":
            TEXT = now.strftime(r'%d/%m/%Y')
        elif result == "time":
            TEXT = now.strftime(r'%H:%M:%S')
        else:
            TEXT = result
    except sr.UnknownValueError:
        TEXT = "Sorry, I didn't get that"
    except sr.RequestError as e:
        TEXT = "Sorry, can't connect to the service"
    finally:
        try:
            print("You :", message)
        except NameError:
            pass
        finally:
            print("Anuradha :", TEXT)
            engine.say(TEXT)
            engine.runAndWait()
