## Importing the necessary modules

In [None]:
from json import loads
from keras import Sequential
from keras.layers import Dense, Dropout, LeakyReLU, Softmax
from keras.optimizers import Adam
from nltk import word_tokenize
from nltk.stem import WordNetLemmatizer
from numpy import array
from random import choice, shuffle
from string import punctuation
'''
nltk.download("punkt")
nltk.download("wordnet")
nltk.download('omw-1.4')
'''


## Loading the dataset

In [None]:
data = loads(open("dataset/intents.json").read())


## Using NLP to clean the data

In [None]:
lemmatizer = WordNetLemmatizer()

# Each list to create
words = []
classes = []
doc_X = []
doc_y = []

'''Looping through all the intents and tokenizing each patterns
and appending tokens to words, the patterns 
and the associated tag to their associated list'''
for intent in data["intents"]:
    for pattern in intent["patterns"]:
        tokens = word_tokenize(pattern)
        words.extend(tokens)
        doc_X.append(pattern)
        doc_y.append(intent["tag"])
    # add the tag to the classes if it's not there already
    if intent["tag"] not in classes:
        classes.append(intent["tag"])

In [None]:
# words of sentences in patterns
words

In [None]:
# tags
classes

In [None]:
#patterns
doc_X

In [None]:
# number of tags wrt patterns
doc_y

In [None]:
'''lemmatizing all the words in the vocab 
and converting them to lowercase
if the words don't appear in punctuation'''
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in punctuation]


In [None]:
# removes punctuation and converts to lowercase
words

In [None]:

'''sorting the vocab and classes in alphabetical order 
and taking the set to ensure no duplicates occur'''
words = sorted(set(words))
classes = sorted(set(classes))


In [None]:
# removes duplicates and sorts in alphabetical order
words

In [None]:
len(classes)

In [None]:
classes

## Creating training and test sets

In [None]:
# list for training data
training = []

In [None]:
len(words)

In [None]:
len(doc_X)

In [None]:
len(doc_y)

In [None]:
# creating the bag of words model
for idx, doc in enumerate(doc_X):
    bow = []
    text = lemmatizer.lemmatize(doc.lower()) # lemmatizing the text
    for word in words:
        bow.append(1) if word in text else bow.append(0) # one hot encoding the words if they appear in the text
    # marking the index of class that the current pattern is associated to 
    output_row = [0] * len(classes)
    output_row[classes.index(doc_y[idx])] = 1
    # adding the one hot encoded BoW and associated classes to training
    training.append([bow, output_row])

In [None]:
training

In [None]:
# shuffling the data and convert it to a numpy array
shuffle(training)

In [None]:
training

In [None]:

training = array(training, dtype=object)

In [None]:
training

In [None]:


# splitting the features and target labels
train_X = array(list(training[:, 0]))
train_y = array(list(training[:, 1]))


In [None]:
train_X

In [None]:
train_X.shape

In [None]:
train_y

In [None]:
train_y.shape

# Building deep learning model

In [None]:
# defining some parameters
input_shape = (len(train_X[0]),)
output_shape = len(train_y[0])

In [None]:
input_shape

In [None]:
output_shape

In [None]:
# defining the model
model = Sequential()
model.add(Dense(128, input_shape=input_shape, activation=LeakyReLU(alpha=0.3)))
model.add(Dropout(0.25))
model.add(Dense(64, activation=LeakyReLU(alpha=0.3)))
model.add(Dropout(0.25))
model.add(Dense(output_shape, activation=Softmax()))

In [None]:
# compiling the model
adam = Adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"])

In [None]:
print(model.summary())


## Training the model

In [None]:
model.fit(x=train_X, y=train_y, epochs=200)


# Functions for getting results

In [None]:
# defining a function to clean the text ( tokenizing and then lemmatizing  and returning the tokens array)
def clean_text(text):
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return tokens

# defining a function to get the bag of words(one hot encoding the words if they appear in the text and returning numpy array of bow)
def bag_of_words(text, vocab):
    tokens = clean_text(text)
    bow = [0] * len(vocab)
    for w in tokens:
        for idx, word in enumerate(vocab):
            if word == w:
                bow[idx] = 1
    return array(bow)

# defining a function to predict the class
def pred_class(text, vocab, labels):
    bow = bag_of_words(text, vocab)
    result = model.predict(array([bow]))[0]
    thresh = 0.2
    y_pred = [[idx, res] for idx, res in enumerate(result) if res > thresh]
    y_pred.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in y_pred:
        return_list.append(labels[r[0]])
    return return_list

# defining a function to get the response (taking the predicted class and returning a random response from the intents.json file)
def get_response(intents_list, intents_json):
    tag = intents_list[0]
    list_of_intents = intents_json["intents"]
    for i in list_of_intents:
        if i["tag"] == tag:
            result = choice(i["responses"])
            break
    return result



## Testing the model

In [None]:
from datetime import datetime
now = datetime.now()
print("Anuradha is ready to chat! (type 'exit' to quit)")
while True:
    try:
        message = input("You : ")
        if message == 'exit':
            TEXT = "Bye! take care"
            print("Anuradha :", TEXT)
            break
        intents = pred_class(message, words, classes)
        result = get_response(intents, data)
        if result == "date":
            TEXT = now.strftime(r'%d/%m/%Y')
            print("Anuradha :", TEXT)
        elif result == "time":
            TEXT = now.strftime(r'%H:%M:%S')
            print("Anuradha :", TEXT)
        else:
            print("Anuradha :", result)
    except ValueError:
        TEXT = "Sorry, I didn't get that"
        print("Anuradha :", TEXT)
