In [63]:
# import necessary libraries

import json
import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer

## Load the JSON file with the corpus

In [64]:
with open("GL Bot.json") as file:
    corpus = json.load(file)

## Create an array of text and labels from the corpus

In [65]:
W=[] # text array
L=[] # labels array
doc_X=[]
doc_Y=[]
for intent in corpus["intents"]:
    for pattern in intent["patterns"]:
        w_temp = nltk.word_tokenize(pattern) # tokenize the text
        W.extend(w_temp)
        doc_X.append(w_temp)
        doc_Y.append(intent["tag"])
    
    if intent["tag"] not in L:
        L.append(intent["tag"])

## Build the train and target arrays with the one hot encoded numerical data to be processed by the model

In [66]:
train=[]
target=[] 

out_empty = [0 for _ in range(len(L))]
lemmatizer = WordNetLemmatizer()

for x, doc in enumerate(doc_X):
    bag=[]
    w_temp = [lemmatizer.lemmatize(w.lower()) for w in doc] # use lemmatization to reduce the words to their roots
    for w in W:
        if w in w_temp:
            bag.append(1)
        else:
            bag.append(0)
    
    output_row = out_empty[:]
    output_row[L.index(doc_Y[x])] = 1
    
    train.append(bag)
    target.append(output_row)

# Build the train and test data

In [71]:
from sklearn.model_selection import  train_test_split
X_train, X_test, y_train, y_test = train_test_split(train, target, test_size=0.30, random_state=42)

# Use RandomForest model and fit the model

In [113]:
from sklearn.ensemble import RandomForestClassifier
rfl = RandomForestClassifier(random_state=42)
rfl.fit(X_train,y_train)

RandomForestClassifier(random_state=42)

In [114]:
print("Training Accuracy")
rfl.score(X_train,y_train)

Training Accuracy


0.9775280898876404

In [115]:
print("Testing Accuracy")
rfl.score(X_test,y_test)

Testing Accuracy


0.4358974358974359

## Define a function that takes text as input and convers it into the encoded text array to be fed into the model for label / response prediction

In [116]:
def bag_of_words(inp_txt):
    bag=[]
    inp_words = nltk.word_tokenize(inp_txt)
    w_temp = [lemmatizer.lemmatize(w.lower()) for w in inp_words]
    for w in W:
        if w in w_temp:
            bag.append(1)
        else:
            bag.append(0)
    return np.reshape(bag, (1,-1)) # convert the 1D array to 2D array, as expected by the model

# Define the chatbot function which will display the response after capturing the input from the user

In [117]:
def chatbot():
    global rfl
    responses = ''
    print("Welcome to my new Chatbot (type Quit to exit)")
    print("If response is not correct,type:*")
    while True:
        inp = input(" ")
        if inp.lower() == "*":
            print("BOT: Please rephrase your question and try again")
            break
        if inp.lower() == "quit":
            break
        
        result = rfl.predict(bag_of_words(inp)) # get the encoded result / label data
        result_index = np.argmax(result) # get the index with the max value, i.e. 1 in this case
        tag = L[result_index] # get the label / tag with the corresponding index from the label array
        
        for tg in corpus["intents"]:
            if tg['tag'] == tag:
                responses = tg["responses"] # capture the response for the index related to above label
        print(responses) # show the response to the end user       

## Invoke the chatbot function

In [None]:
chatbot()

Welcome to my new Chatbot (type Quit to exit)
If response is not correct,type:*
 adam
['Link: Neural Nets wiki']
 sgd
['Link: Neural Nets wiki']
 joke
['Hello! how can i help you ?']
 useless bot
['Link: Machine Learning wiki ']
 Hello
['Hello! how can i help you ?']
 How are you?
['Hello! how can i help you ?']
 wow
['Link: Machine Learning wiki ']
 great
['Hello! how can i help you ?']
 blended
['Link: Machine Learning wiki ']
 online
['Hello! how can i help you ?']
 thanks
['I hope I was able to assist you, Good Bye']
 ton
['Link: Machine Learning wiki ']
 great help
['I hope I was able to assist you, Good Bye']
 too good
['I hope I was able to assist you, Good Bye']
 thanks a ton
['I hope I was able to assist you, Good Bye']
 later
['Link: Machine Learning wiki ']
 good bye
['I hope I was able to assist you, Good Bye']
 see you
['I hope I was able to assist you, Good Bye']
 see you later
['I hope I was able to assist you, Good Bye']
 cya
['I hope I was able to assist you, Good Bye'