In [1]:
import warnings

In [2]:
warnings.filterwarnings("ignore")

#NLTK (Natural Language Toolkit) is an NLP (Natural Language Processing) library for Python. 
import nltk
from nltk.stem import WordNetLemmatizer
import json
import pickle

In [3]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers.legacy import SGD
import random
from keras.models import load_model
import pandas as pd




In [4]:
# create an object of WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

In [5]:
# importing the GL Bot corpus file for pre-processing

#stores all the words found in corpus.
words=[]

#stores all the different classes/intents for  chatbot.
classes = []

#stores tuples containing tokenized words and their associated intent or class.
documents = []

ignore_words = ['?', '!']

In [6]:
#data_file = open(r'C:\Users\s559891\OneDrive - nwmissouri.edu\Documents\Z_GDP\jupyter\data.json').read()
#intents = json.loads(data_file)

In [7]:
data_file = "https://raw.githubusercontent.com/TrinadhM-dev/GDPProject-02/main/data.json"
intents=pd.read_json(data_file)

In [8]:
intents

Unnamed: 0,intents
0,"{'tag': 'Welcome', 'patterns': ['just going to..."
1,"{'tag': 'FallBack', 'patterns': ['', '', '', '..."
2,"{'tag': 'CourseOverview', 'patterns': ['Tell m..."
3,"{'tag': 'ACSProfessors', 'patterns': ['Provide..."
4,"{'tag': 'Assistance for graduates ', 'patterns..."
5,"{'tag': 'CreditHours', 'patterns': ['How many ..."
6,"{'tag': 'TuitionFee', 'patterns': ['Is there a..."
7,"{'tag': 'Intakes', 'patterns': ['How many inta..."
8,"{'tag': 'ACSCourses', 'patterns': ['What are t..."
9,"{'tag': 'Electives', 'patterns': ['Are there a..."


<h4>Data Preprocessing</h4>

In [9]:
# preprocessing the json data
# tokenization
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\s559891\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\s559891\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [10]:
for intent in intents['intents']:
    for pattern in intent['patterns']:

        #tokenize each word
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        #add documents in the corpus
        documents.append((w, intent['tag']))

        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [11]:
# lemmatize, lower each word and remove duplicates

words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# sort classes
classes = sorted(list(set(classes)))

# documents = combination between patterns and intents
print (len(documents), "documents")

# classes = intents
print (len(classes), "classes", classes)

# words = all words, vocabulary
print (len(words), "unique lemmatized words", words)

# creating a pickle file to store the Python objects which we will use while predicting
pickle.dump(words,open('words.pkl','wb')) 
pickle.dump(classes,open('classes.pkl','wb'))

358 documents
27 classes ['ACSCourses', 'ACSProfessors', 'Academic Transcripts', 'Admission Crietria', 'Assistance for graduates ', 'CS/IS International Graduate Achievement Scholarship', 'College fees per credit in ACS', 'CourseOverview', 'CreditHours', 'DeadlinesForFallIntake', 'DeadlinesForSpringIntake', 'DeadlinesForSummerIntake', 'Duration of MS-ACS', 'Electives', 'FallBack', 'Intakes', 'JobPlacementRate', 'OnCampusJobs', 'Prerequisites', 'Rankings and Reputation', 'RequiredGPA', 'Standardized test score requirements', 'Student-Faculty Ratio', 'TuitionFee', 'Welcome', 'Wellness Center', 'specialized tools for coursework? ']
420 unique lemmatized words ["'d", "'m", "'s", ',', '.', ';', 'a', 'about', 'ac', 'academic', 'acceptable', 'accepts', 'access', 'achieve', 'achievement', 'achieving', 'additional', 'admission', 'advanced', 'after', 'again', 'aid', 'allowed', 'am', 'among', 'an', 'and', 'anticipated', 'any', 'applicant', 'application', 'applied', 'apply', 'applying', 'are', 'ar

<h4>Creating Training Data</h4>

In [12]:
# create our training data
training = []

# create an empty array for our output
output_empty = [0] * len(classes)

# training set, bag of words for each sentence
for doc in documents:
    # initialize our bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
   
    # lemmatize each word - create base word, in attempt to represent related words
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    
    # create our bag of words array with 1, if word match found in current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    # output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    training.append([bag, output_row])

# shuffle features and converting it into numpy arrays
random.shuffle(training)
training = np.array(training,dtype=list)

# create train and test lists
train_x = list(training[:,0])
train_y = list(training[:,1])

print("Training data created")

Training data created


In [13]:
type(training)

numpy.ndarray

<h4>Creating NN Model</h4> 

In [14]:
# Create NN model to predict the responses
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

#fitting and saving the model 
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
model.save('chatbot.h5', hist) # we will pickle this model to use in the future
print("\n")
print("*"*50)
print("\nModel Created Successfully!")


Epoch 1/200


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch

<h3>2. Creating LSTM-based model</h3>

In [15]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, Dropout

# Define LSTM Model
model_lstm = Sequential()

# Input Embedding layer
model_lstm.add(Embedding(input_dim=len(words), output_dim=128, input_length=len(train_x[0])))

# LSTM layer
model_lstm.add(LSTM(128))

# Dropout for regularization
model_lstm.add(Dropout(0.5))

# Output Dense layer with Softmax activation for classification
model_lstm.add(Dense(len(train_y[0]), activation='softmax'))

# Compile the model
model_lstm.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model_lstm.fit(np.array(train_x), np.array(train_y), epochs=10, batch_size=5, verbose=1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1b217f2e650>

<h3>3. Transformer-based Model</h3>

In [None]:
from tensorflow.keras.layers import MultiHeadAttention, GlobalAveragePooling1D, Flatten

# Define Transformer Model
model_transformer = Sequential()

# Input Embedding layer
model_transformer.add(Embedding(input_dim=len(words), output_dim=128, input_length=len(train_x[0])))

# MultiHeadAttention layer
model_transformer.add(MultiHeadAttention(num_heads=4, key_dim=128))

# Flatten or Pooling layer
model_transformer.add(GlobalAveragePooling1D())

# Dropout for regularization
model_transformer.add(Dropout(0.5))

# Output Dense layer with Softmax activation for classification
model_transformer.add(Dense(len(train_y[0]), activation='softmax'))

# Compile the model
model_transformer.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model_transformer.fit(np.array(train_x), np.array(train_y), epochs=10, batch_size=5, verbose=1)


"from tensorflow.keras.layers import MultiHeadAttention, GlobalAveragePooling1D, Flatten\n\n# Define Transformer Model\nmodel_transformer = Sequential()\n\n# Input Embedding layer\nmodel_transformer.add(Embedding(input_dim=len(words), output_dim=128, input_length=len(train_x[0])))\n\n# MultiHeadAttention layer\nmodel_transformer.add(MultiHeadAttention(num_heads=4, key_dim=128))\n\n# Flatten or Pooling layer\nmodel_transformer.add(GlobalAveragePooling1D())\n\n# Dropout for regularization\nmodel_transformer.add(Dropout(0.5))\n\n# Output Dense layer with Softmax activation for classification\nmodel_transformer.add(Dense(len(train_y[0]), activation='softmax'))\n\n# Compile the model\nmodel_transformer.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n\n# Train the model\nmodel_transformer.fit(np.array(train_x), np.array(train_y), epochs=10, batch_size=5, verbose=1)"

In [None]:
# load the saved model file
model = load_model('chatbot.h5')
#intents = json.loads(open(r'C:\Users\s559891\OneDrive - nwmissouri.edu\Documents\Z_GDP\jupyter\data.json').read())
intents = pd.read_json(data_file)
words = pickle.load(open('words.pkl','rb'))
classes = pickle.load(open('classes.pkl','rb'))

In [None]:
def clean_up_sentence(sentence):

    # tokenize the pattern - split words into array
    sentence_words = nltk.word_tokenize(sentence)
    
    # stem each word - create short form for word
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words


# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence

def bow(sentence, words, show_details=True):

    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)

    # bag of words - matrix of N words, vocabulary matrix
    bag = [0]*len(words) 
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
               
                # assign 1 if current word is in the vocabulary position
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)
    return(np.array(bag))

def predict_class(sentence, model):
   
    # filter out predictions below a threshold
    p = bow(sentence, words,show_details=False)
    res = model.predict(np.array([p]))[0]
    error = 0.25
    results = [[i,r] for i,r in enumerate(res) if r>error]
    
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    
    for r in results:
        return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
    return return_list

In [None]:
# function to get the response from the model

def getResponse(ints, intents_json):
    tag = ints[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if(i['tag']== tag):
            result = random.choice(i['responses'])
            break
    return result

# function to predict the class and get the response

def chatbot_response(text):
    ints = predict_class(text, model)
    res = getResponse(ints, intents)
    return res

In [None]:
# function to start the chat bot which will continue till the user type 'end'

def start_chat():
    print("Bot: This is MS-ACS Chatbot, Your Personal Assistant.\n\n")
    while True:
        inp = str(input()).lower()
        if inp.lower()=="end":
            break
        if inp.lower()== '' or inp.lower()== '*':
            print('Please re-phrase your query!')
            print("-"*50)
        else:
            print(f"Bot: {chatbot_response(inp)}"+'\n')
            print("-"*50)

In [None]:
start_chat()

Bot: This is MS-ACS Chatbot, Your Personal Assistant.




<h4>Chatting with BOT using the Tkinter App</h4>

In [None]:
import tkinter as tk
from tkinter import *
from datetime import datetime
import requests
import sqlite3

# Create a session
session = requests.Session()

# Create a new SQLite database for chat history
chat_conn = sqlite3.connect("chat_history.db")
chat_cursor = chat_conn.cursor()

# Create a chat history table if it doesn't exist
chat_cursor.execute('''
    CREATE TABLE IF NOT EXISTS chat_history (
        id INTEGER PRIMARY KEY,
        user_message TEXT,
        bot_response TEXT,
        timestamp TEXT
    )
''')

# Commit the changes and close the connection
chat_conn.commit()
chat_conn.close()





def send_msz(event=None):
    usr_input = message.get()
    usr_input = usr_input.lower()

    # Insert user query on the right side with a different tag
    textcon.insert(END, f'You: {usr_input}\n', 'user_query')

    if usr_input in exit_list:
        textcon.insert(END, "Bot: Ok bye! Chat with you later\n", 'chatbot_response')
        textcon.see("end")
        return root.destroy()
    else:
        lab = f"Bot: {chatbot_response(usr_input)}\n\n"

        # Insert chatbot response on the left side with a different tag
        textcon.insert(END, lab, 'chatbot_response')
        textcon.see("end")
        mes_win.delete(0, END)


def save_chat_history(user_message, bot_response):
    chat_conn = sqlite3.connect("chat_history.db")
    chat_cursor = chat_conn.cursor()

    # Insert chat history data into the chat history table
    chat_cursor.execute("INSERT INTO chat_history (user_message, bot_response, timestamp) VALUES (?, ?, ?)",
                       (user_message, bot_response, str(datetime.now())))

    # Commit the changes and close the connection
    chat_conn.commit()
    chat_conn.close()


def scroll_chat(direction):
    textcon.yview_scroll(direction, "units")


# Connect to the SQLite database or create a new one if it doesn't exist
conn = sqlite3.connect("feedback.db")

# Create a cursor object
cursor = conn.cursor()

# Create a feedback table if it doesn't exist
cursor.execute('''
    CREATE TABLE IF NOT EXISTS feedback (
        id INTEGER PRIMARY KEY,
        name TEXT,
        email TEXT,
        feedback TEXT,
        submission_date TEXT
    )
''')

# Commit the changes and close the connection
conn.commit()
conn.close()



# Declare feedback_window as a global variable
feedback_window = None

def open_feedback_form():
    
    global feedback_window  # Declare feedback_window as a global variable
    feedback_window = Toplevel(root)
    feedback_window.title("Feedback Form")

    # Create labels and entry fields for feedback form
    label_name = Label(feedback_window, text="Name:")
    label_name.pack()
    entry_name = Entry(feedback_window)
    entry_name.pack()

    label_email = Label(feedback_window, text="Email:")
    label_email.pack()
    entry_email = Entry(feedback_window)
    entry_email.pack()

    label_feedback = Label(feedback_window, text="Feedback:")
    label_feedback.pack()
    text_feedback = Text(feedback_window, height=5, width=30)
    text_feedback.pack()

    submit_button = Button(feedback_window, text="Submit", command=lambda: save_feedback(entry_name.get(), entry_email.get(), text_feedback.get("1.0", END)))
    submit_button.pack()

    feedback_window.mainloop()





'''def save_feedback(name, email, feedback):
    feedback_data = f"Name: {name}\nEmail: {email}\nFeedback: {feedback}\nSubmission Date: {datetime.now()}\n\n"

    # Save the feedback data to a local file
    with open("feedback.txt", "a") as file:
        file.write(feedback_data)

    # Provide a confirmation message
    print("Feedback saved locally.")

    # Close the feedback form window
    feedback_window.destroy()'''

def save_feedback(name, email, feedback):
    # Connect to the SQLite database
    conn = sqlite3.connect("feedback.db")
    cursor = conn.cursor()

    # Insert feedback data into the feedback table
    cursor.execute("INSERT INTO feedback (name, email, feedback, submission_date) VALUES (?, ?, ?, ?)",(name, email, feedback, str(datetime.now())))
    # Commit the changes and close the connection
    conn.commit()
    conn.close()

    print("Feedback saved to the database successfully.")

    # Close the feedback form window
    feedback_window.destroy()



   

root = tk.Tk()
root.title("MS-Applied Computer Science Chatbot Project")
root.geometry("500x400")
root.resizable(False, False)

chat_win = Frame(root, bd=1, bg="white", width=50, height=8)
chat_win.place(x=6, y=6, height=300, width=488)

canvas = Canvas(chat_win, bd=0, bg="white", highlightthickness=0)
canvas.pack(side="left", fill="both", expand=True)

vsb = Scrollbar(chat_win, orient="vertical", command=scroll_chat)
vsb.pack(side="right", fill="y")

up_button = Button(chat_win, text="▲", command=lambda: scroll_chat(-1), width=2, height=1)
up_button.pack(side="top")

down_button = Button(chat_win, text="▼", command=lambda: scroll_chat(1), width=2, height=1)
down_button.pack(side="bottom")

canvas.configure(yscrollcommand=vsb.set)
canvas.bind("<Configure>", lambda e: canvas.configure(scrollregion=canvas.bbox("all")))

textcon = Text(canvas, bd=0, bg="white", width=50, height=8)
textcon.pack(fill="both", expand=True)

message = tk.StringVar()

mes_win = Entry(root, width=30, xscrollcommand=True, textvariable=message)
mes_win.place(x=6, y=310, height=60, width=380)
mes_win.focus()

# Define tag configurations for user queries and chatbot responses
textcon.tag_config("user_query", justify="right", foreground="blue")
textcon.tag_config("chatbot_response", justify="left", foreground="green")

exit_list = ["exit", "break", "quit", "see you later", "chat with you later", "end the chat", "bye", "ok bye", "end"]

button_send = Button(root, text='Send', bg='dark green', activebackground='grey', command=send_msz, width=12, height=5, font=('Arial'))
button_send.place(x=376, y=310, height=60, width=110)

button_feedback = Button(root, text='Feedback', command=open_feedback_form)
button_feedback.place(x=6, y=370, height=30, width=110)

root.bind('<Return>', send_msz)

root.mainloop()


In [None]:
root.mainloop()

<h4>Using Flask for Web interface</h4>

In [1]:
from flask import Flask, render_template, request, jsonify
import sqlite3

app = Flask(__name__)

# Assuming chatbot_response is defined elsewhere and properly imported
#from your_chatbot_module import chatbot_response  # Replace with the actual import

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/ask', methods=['POST'])
def ask():
    message = request.form['messageText']
    response = chatbot_response(message)  # Ensure this function is defined in your chatbot module
    return jsonify({'status':'OK','answer':response})

@app.route('/submit_feedback', methods=['POST'])
def submit_feedback():
    name = request.form['name']
    email = request.form['email']
    feedback = request.form['feedback']
    
    # Here you would add validation and sanitization of the data

    # Connect to the SQLite database
    conn = sqlite3.connect('Flask_Feedback.db')
    c = conn.cursor()

    # Create the feedback table if it doesn't exist
    c.execute('''CREATE TABLE IF NOT EXISTS feedback
                 (name TEXT, email TEXT, feedback TEXT)''')

    # Insert the feedback into the database
    c.execute("INSERT INTO feedback (name, email, feedback) VALUES (?, ?, ?)",
              (name, email, feedback))

    # Commit the insert and close the database connection
    conn.commit()
    conn.close()

    # Return a success message
    return jsonify({'status':'success'})

if __name__ == "__main__":
    app.run(debug=False)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
[2023-11-10 17:33:42,175] ERROR in app: Exception on / [GET]
Traceback (most recent call last):
  File "c:\Python311\Lib\site-packages\flask\app.py", line 1455, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\flask\app.py", line 869, in full_dispatch_request
    rv = self.handle_user_exception(e)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\flask\app.py", line 867, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\flask\app.py", line 852, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\s559891\AppData\Local\Temp\ipykernel_4888\1650361750.py", line 11, in index
    return render_template('i