In [11]:
import warnings

In [12]:
warnings.filterwarnings("ignore")
import nltk
from nltk.stem import WordNetLemmatizer
import json
import pickle

In [13]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD
import random
from keras.models import load_model

In [14]:
import pandas as pd

In [15]:
# create an object of WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

In [16]:
# importing the GL Bot corpus file for pre-processing

words=[]
classes = []
documents = []
ignore_words = ['?', '!']

In [17]:
#data_file = open("data.json").read()
#intents = json.loads(data_file)

In [20]:
data_file = "https://raw.githubusercontent.com/TrinadhM-dev/GDPProject-02/main/data.json"
intents = pd.read_json(data_file)

In [21]:
intents

Unnamed: 0,intents
0,"{'tag': 'Welcome', 'patterns': ['just going to..."
1,"{'tag': 'FallBack', 'patterns': [], 'responses..."
2,"{'tag': 'CourseOverview', 'patterns': ['Tell m..."
3,"{'tag': 'ACSProfessors', 'patterns': ['Provide..."
4,"{'tag': 'CreditHours', 'patterns': ['How many ..."
5,"{'tag': 'TuitionFee', 'patterns': ['Is there a..."
6,"{'tag': 'Intakes', 'patterns': ['How many inta..."
7,"{'tag': 'ACSCourses', 'patterns': ['What are t..."
8,"{'tag': 'Electives', 'patterns': ['Are there a..."
9,"{'tag': 'Prerequisites', 'patterns': ['Are the..."


<h4>Data Preprocessing</h4>


In [None]:
# preprocessing the json data
# tokenization
nltk.download('punkt')
nltk.download('wordnet')

<h4>Tokenization</h4>

In [None]:
for intent in intents['intents']:
    for pattern in intent['patterns']:

 

        #tokenize each word
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        #add documents in the corpus
        documents.append((w, intent['tag']))

 

        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

<h4>Lemmatization</h4>

In [None]:


# lemmatize, lower each word and remove duplicates

 

words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]

words = sorted(list(set(words)))

 

# sort classes

classes = sorted(list(set(classes)))

 

# documents = combination between patterns and intents

print (len(documents), "documents")

 

# classes = intents

print (len(classes), "classes", classes)

 

# words = all words, vocabulary

print (len(words), "unique lemmatized words", words)

 

# creating a pickle file to store the Python objects which we will use while predicting

pickle.dump(words,open('words.pkl','wb'))

pickle.dump(classes,open('classes.pkl','wb'))

<h4>training Data</h4>

In [None]:


# create our training data

training = []

 

# create an empty array for our output

output_empty = [0] * len(classes)

 

# training set, bag of words for each sentence

for doc in documents:

    # initialize our bag of words

    bag = []

    # list of tokenized words for the pattern

    pattern_words = doc[0]

   

    # lemmatize each word - create base word, in attempt to represent related words

    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]

   

    # create our bag of words array with 1, if word match found in current pattern

    for w in words:

        bag.append(1) if w in pattern_words else bag.append(0)

    # output is a '0' for each tag and '1' for current tag (for each pattern)

    output_row = list(output_empty)

    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

 

# shuffle features and converting it into numpy arrays

random.shuffle(training)

training = np.array(training,dtype=list)

 

# create train and test lists

train_x = list(training[:,0])

train_y = list(training[:,1])

 

print("Training data created")

In [None]:
print(type(training))

<h4>Creating NN Model</h4>

In [None]:
# Import the necessary libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout

# Create NN model to predict the responses
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Compile model using legacy SGD optimizer
sgd = tf.keras.optimizers.legacy.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# Fitting and saving the model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
model.save('chatbot.h5', hist)  # Save the model for future use

print("\n")
print("*" * 50)
print("\nModel Created Successfully!")

In [None]:
# load the saved model file
model = load_model('chatbot.h5')
intents = json.loads(open(r'C:\Users\s559891\OneDrive - nwmissouri.edu\Documents\Z_GDP\jupyter\data.json').read())
words = pickle.load(open('words.pkl','rb'))
classes = pickle.load(open('classes.pkl','rb'))

In [None]:

def clean_up_sentence(sentence):

    # tokenize the pattern - split words into array

    sentence_words = nltk.word_tokenize(sentence)
    # stem each word - create short form for word

    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]

    return sentence_words


In [None]:
# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence

def bow(sentence, words, show_details=True):

    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words - matrix of N words, vocabulary matrix
    bag = [0]*len(words) 
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                # assign 1 if current word is in the vocabulary position
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)
    return(np.array(bag))

In [None]:

def predict_class(sentence, model):
    # filter out predictions below a threshold
    p = bow(sentence, words,show_details=False)
    res = model.predict(np.array([p]))[0]
    error = 0.25
    results = [[i,r] for i,r in enumerate(res) if r>error]
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
    return return_list

In [None]:
 

# function to get the response from the model

 

def getResponse(ints, intents_json):

    tag = ints[0]['intent']

    list_of_intents = intents_json['intents']

    for i in list_of_intents:

        if(i['tag']== tag):

            result = random.choice(i['responses'])

            break

    return result

In [None]:
# function to predict the class and get the response

def chatbot_response(text):
    ints = predict_class(text, model)
    res = getResponse(ints, intents)
    return res

In [None]:
# function to start the chat bot which will continue till the user type 'end'

 

def start_chat():
    print("Bot: This is MS ACS  Personal Assistant.\n\n")
    while True:
        inp = str(input()).lower()
        if inp.lower()=="end":
            break
        if inp.lower()== '' or inp.lower()== '*':
            print('Please re-phrase your query!')
            print("-"*50)
        else:
            print(f"Bot: {chatbot_response(inp)}"+'\n')
            print("-"*50) 

<h4>Chatting with BOT using the command Line option</h4>

In [31]:
start_chat()

<h4>Chatting with Bot using TKinter App</h4>


<h5>For running the Tkinter GUI you have to download this notebook in ipynb format and run using jupyter notebook in your local machine/pc because in google colab, you cannot run Tkinter apps.</h5>

In [None]:
import random
import tkinter as tk
from tkinter import *

In [None]:
root = tk.Tk()
filename = "Chat Bot"
root.title(f"Chat Bot")
root.geometry('500x400')
root.resizable(False, False)
message = tk.StringVar()

In [None]:
chat_win=Frame(root,bd=1,bg='white',width=50,height=8)
chat_win.place(x=6,y=6,height=300,width=488)

 

textcon=tk.Text(chat_win,bd=1,bg='white',width=50,height=8)
textcon.pack(fill="both",expand=True)

In [None]:
mes_win=Entry(root,width=30,xscrollcommand=True,textvariable=message)
mes_win.place(x=6,y=310,height=60,width=380)
mes_win.focus()

In [None]:
textcon.config(fg='black')
textcon.tag_config('usr',foreground='black')
textcon.insert(END,"Bot: This is ACS Chatbot, Your Personal Assistant.\n\n")
mssg=mes_win.get()

In [None]:
exit_list = ['exit','break','quit','see you later','chat with you later','end the chat','bye','ok bye','byie']

In [None]:
def greet_res(text):
    text=text.lower()
    bot_greet=['hi','hello','hola','hey','howdy','hyy']
    usr_greet=['hi','hey','hello','hola','greetings','wassup','whats up']
    for word in text.split():
        if word in usr_greet:
            return random.choice(bot_greet)

In [None]:
def send_msz(event=None):
    usr_input = message.get()
    usr_input = usr_input.lower()
    textcon.insert(END, f'You: {usr_input}'+'\n','usr')
    if usr_input in exit_list:
        textcon.config(fg='black')
        textcon.insert(END,"Bot: Ok bye! Chat with you later\n")
        return root.destroy()
    else:
        textcon.config(fg='black')
        if greet_res(usr_input) != None:
            lab=f"Bot: {greet_res(usr_input)}"+'\n'
            textcon.insert(END,lab)
            mes_win.delete(0,END)
        else:
            lab = f"Bot: {chatbot_response(usr_input)}"+'\n'
            textcon.insert(END,lab)
            mes_win.delete(0,END)

In [None]:
button_send=Button(root,text='Send',bg='dark green',activebackground='grey',command=send_msz,width=12,height=5,font=('Arial'))
button_send.place(x=376,y=310,height=60,width=110)
root.bind('<Return>', send_msz,button_send)
root.mainloop()