# TRP FAQ chatbot using pretrained Google News Word2Vec Model

Importing necessary libraries

In [1]:
import os 
import numpy as np
import pandas as pd
import re, unicodedata
import string
import pickle
from nltk import sent_tokenize, word_tokenize

In [2]:
os.chdir('C:/Users/akadali/Desktop/Deep_NLP/MLG_Capstone_ChatBot/ChatBot_GoogleW2V')
data = pd.read_csv('trp_faq_dataset.csv', encoding = 'latin1')
data = data[['question', 'answer']]

Importing stop words, WordNetLemmatizer and punctuations

In [3]:
import nltk
stops = nltk.corpus.stopwords.words('english')

from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

import string
puncs = string.punctuation
#Adding additional punctuations
puncs = puncs + "’'`" 
print(puncs)

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~’'`


Data Cleaning
-----

* This dataset has a lot of contractions, hence all of them need to be removed/replaced within the dataset. The below function replaces the contractions with corresponding word pairs.

* Also, there are quite a few abbreviations such as U.S., US, USI to be substituted with their expanded forms

* removing additional spaces, if there are any

In [4]:
import re
def text_clean(text):
    text = re.sub(r"what's", "what is ", text)
    text = re.sub(r"\'s", " ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub('[%s]'%re.escape(puncs), ' ', text)
    #convert u.s or us  to 'United States'
    text = re.sub(r" U.S. ", " united states ", text)
    text = re.sub(r" US ", " united states ", text)
    text = re.sub(r" USI ", " united states india ", text)
    text = re.sub(r" +"," ", text)
    return text

Data Preprocessing and Cleaning(2)
----------------------------
* Tokenizing the words
* Lemmatization
* removing numeric digits and punctuations

In [5]:
def text_preprocess(text):
    #tokenizing
    tokens = word_tokenize(text)
    #converting to lower case and lemmatization
    tokens = [lemmatizer.lemmatize(word.lower()) for word in tokens]
    filtered_tokens = []
    #Remove anything but alphabets - numbers, punctuations etc
    for token in tokens:
        if re.search('[a-zA-Z]', token):
            filtered_tokens.append(token)
    #return list of lists for embedding vectors
    return filtered_tokens

Applying 'Cleaning' and 'Preprocessing' methods on the dataset

In [6]:
#Clean the questions dataset
data['question'] = data['question'].apply(lambda x:text_clean(x))

In [7]:
#Preprocess the questions
data['question'] = data['question'].apply(lambda x:text_preprocess(x))

Pre-trained word and phrase vectors from gensim models
-----------------------------------------
* Getting GoogleNews-vectors-negative - A Pre-trained word and phrase vectors. 'Questions'in our dataset are converted to vectors by averaging the vectors of individual words in each question.

* This can be loaded using 'gensim.models'

In [8]:
import gensim.models
#Changing the directory to get to the embeddings 
os.chdir('C:/Users/akadali/Desktop/Deep_NLP/MLG_Capstone_ChatBot/ChatBot_GoogleW2V/Word Embeddings')
model = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin',binary=True)

Below function creates the 300-dimension vectors for each question (for all tokens that are present in the Google New pre-trained Word2Vec models)

In [9]:
# Creating a feature vector by averaging embeddings for all words in a sentence
def embedding_feats(list_of_tokens):
    DIMENSION = 300
    zero_vector = np.zeros(DIMENSION)
    #feats = []
    feat_for_this = zero_vector
    count_for_this = 0
    for token in list_of_tokens:
        if token in model:
            feat_for_this += model[token]
            count_for_this+= 1
    feats = feat_for_this/count_for_this
    return feats

Creating vectors for all questions in the faq dataset

In [10]:
#np.seterr(divide='ignore', invalid='ignore')
question_vectors = [embedding_feats(x) for x in data['question']]

# Estalishing connection to the 'Talent referral payout' database in SQL
* Establish connection with the TRP databases (both payout and referral databases) in SQL Workbench
* Write functions to retrieve the data based on user request.

In [11]:
import mysql.connector
mydb = mysql.connector.connect(host="localhost", 
                               user="root", 
                               passwd="root",
                               auth_plugin = 'mysql_native_password',
                               database = "talent_referral_payout")

In [12]:
mycursor = mydb.cursor(buffered = True)

In [13]:
#Definition to obtain te referral bonus payout history of a referring professional

def search_pay_db(emp_id):
    emp_id = str(emp_id)
    #print(type(emp_id))
    query = "SELECT referral_name,amount,date_of_origin FROM trp_payment_report_sample WHERE emp_num = "+ emp_id
    mycursor.execute(query)
    return mycursor.fetchone()

In [14]:
#Definition to obtain the status of referrals submitted by the referring professional

def search_status_db(rms_id):
    rms_id = str(rms_id)
    #print(type(emp_id))
    query = "SELECT candidate_name,requisition_no,current_status,reference_date FROM trp_status WHERE candidate_id = " + rms_id
    mycursor.execute(query)
    return mycursor.fetchone()

* This block of code would convert the user's question to a 300 dimensional vector and calculates the cosine_similarity against all question vectors in the FAQ dataset. 
* Obtains the correspoding answer of the best matched question

In [15]:
from sklearn.metrics.pairwise import cosine_similarity

def get_response(t):
    #print(query)
    while True:
        user_input = input("You:")
        if len(user_input) == 0:
            print("Bot: Can you please provide more information")
        elif user_input.lower() == 'referral_bonus':
            print("Enter your personnel number to fetch the payment details")
            emp_no = input("Emp. ID:")
            emp_no = str(emp_no)
            ans = search_pay_db(emp_no)
            print("Bot: Your bonus payout details are below (in the last 6 months)")
            print("Bot:", ans)
            print("Bot: Let me know if I can help you with anything else")
        elif user_input.lower() == 'referral_status':
            print("Bot: Enter your referral's RMS/Taleo ID to fetch the status info.")
            email = input("ID:")
            email = str(email)
            ans = search_status_db(email)
            print("Bot: Below are the list of referrals you submitted in the last 6 months")
            print("Bot:", ans)
            print("Bot: Let me know if I can help you with anything else")
        elif user_input.lower() == 'quit':
            break
        else:
            q = text_clean(user_input)
            q = text_preprocess(user_input)
            q_vec = [embedding_feats(q)]
            sims = cosine_similarity(q_vec, question_vectors)
            max_s = sims.max()
            if max_s < t:
                response = "Hmm..sorry, I don't quite understand that, can you please rephrase your question"
            else:
                max_i = np.argmax(sims)
                response = data.answer[max_i]
            print("Bot:", response)

In [16]:
"""
def chat():
    print("Specify the confidence level")
    level = input("Confidence:")
    conf = int(level)/100
    print("Hi There....I'm Talent Referral Bot and I'm here to help you with referral inquiries\n[type 'quit' to stop]")
    print("***TIP: PLEASE ENTER \n 1.'referral_status' -> to check your referral status \n 2.'referral_bonus' -> to check your bonus payout status and \n 3.'program' -> for program related inquiry")
    while True:
        user_input = input("You:")
        if user_input.lower() == 'quit':
            break
        if user_input == '':
            print("Bot: Can you please provide more information")
        elif user_input.lower() == 'referral_bonus':
            print("can you please provide your personnel employee number")
            emp_no = input("Emp. ID:")
            emp_no = str(emp_no)
            ans = search_pay_db(emp_no)
            print("Bot: Please find your bonus payout details below (in the last 6 months)")
            print("Bot:", ans)
        elif user_input.lower() == 'referral_status':
            print("can you please provide your referral's personal email adress to fetch the status info.")
            email = input("Email:")
            email = str(email)
            ans = search_status_db(email)
            print("Bot: Please find the status of your referrals(in the last 6 months)")
            print("Bot:",ans)
        elif user_input.lower() == 'program':
            print("Bot: Please enter your question here\n (type 'quit' to stop)")
            while True:
                user_input = input("You:")
                if user_input.lower() == 'quit':
                    break
                ans = get_response(user_input, conf)
                print("Bot:", ans)
"""

'\ndef chat():\n    print("Specify the confidence level")\n    level = input("Confidence:")\n    conf = int(level)/100\n    print("Hi There....I\'m Talent Referral Bot and I\'m here to help you with referral inquiries\n[type \'quit\' to stop]")\n    print("***TIP: PLEASE ENTER \n 1.\'referral_status\' -> to check your referral status \n 2.\'referral_bonus\' -> to check your bonus payout status and \n 3.\'program\' -> for program related inquiry")\n    while True:\n        user_input = input("You:")\n        if user_input.lower() == \'quit\':\n            break\n        if user_input == \'\':\n            print("Bot: Can you please provide more information")\n        elif user_input.lower() == \'referral_bonus\':\n            print("can you please provide your personnel employee number")\n            emp_no = input("Emp. ID:")\n            emp_no = str(emp_no)\n            ans = search_pay_db(emp_no)\n            print("Bot: Please find your bonus payout details below (in the last 6 mon

In [17]:
def chat_test():
    print("Specify the confidence level")
    level = input("Confidence:")
    conf = int(level)/100
    print("Hi There....I'm Talent Referral Bot and I'm here to help you with referral inquiries\n[type 'quit' to stop]")
    print("***TIP: PLEASE ENTER \n 1.'referral_status' -> to check your referral status \n 2.'referral_bonus' -> to check your bonus payout status and \n 3.'program' -> for program related inquiry")
    ans = get_response(conf)
    #print("Bot:", ans)

In [18]:
#chat_test()

In [19]:
#Sample RMS IDs for chatbot testing of referral status
"""
24211419
18327009
24482992
21996054
22425736
24426987
17783534
24601097
23736632
24698456
24623156
21705392
"""
#Sample Employee IDs for chatbot testing bonus status
"""
213408
239660
249058
250194
260943
271637
274346
282822
284183
284315
284754
285695
290560
"""

'\n213408\n239660\n249058\n250194\n260943\n271637\n274346\n282822\n284183\n284315\n284754\n285695\n290560\n'

# Methods for Chatbot GUI response 

In [20]:
def gui_response(user_input, t):
    if len(user_input) == 0:
        print("Bot: Can you please provide more information")
    else:
        q = text_clean(user_input)
        q = text_preprocess(user_input)
        q_vec = [embedding_feats(q)]
        sims = cosine_similarity(q_vec, question_vectors)
        max_s = sims.max()
        if max_s < t:
            response = "Hmm..sorry, I don't quite understand that, can you please rephrase your question"
        else:
            max_i = np.argmax(sims)
            response = data.answer[max_i]
    return response

In [21]:
def gui_bonus(emp_id):
    emp_id = str(emp_id)
    ans = search_pay_db(emp_id) 
    if ans is not None:
        res = "You were paid $"+ str(ans[1])+" for referring "+str(ans[0])+" along with "+str(ans[2])+" pay period"
    else:
        res = "Sorry...No result" 
    return res

In [22]:
def gui_status(rms_id):
    rms_id = str(rms_id)
    ans = search_status_db(rms_id)
    if ans is not None:
        res = "Your referral "+str(ans[0])+ " has been "+str(ans[2])+" for requisition with ID:"+str(ans[1])
    else:
        res = "Sorry...No result" 
    return res

# Creating Chatbot GUI using tkinter updated ao 09/16/2020

In [38]:
#Importing tkinter
import tkinter
from tkinter import *

In [39]:
#Enabling High DPI in Windows 10
try:
    from ctypes import windll
    windll.shcore.SetProcessDpiAwareness(1)
except:
    pass

In [40]:
def send(*args):
    msg = messageWindow.get("1.0",'end-1c')
    messageWindow.delete("0.0",END)
    if msg != '':
        chatWindow.config(state=NORMAL)
        chatWindow.insert(END, "You: " + msg + '\n\n')
        chatWindow.config(foreground="#C4D600", font=("Calibri", 10 ))
        res = gui_response(msg, 0.8)
        chatWindow.insert(END, "Bot: " + res + '\n\n')
        chatWindow.config(state=DISABLED)
        chatWindow.yview(END)

In [41]:
def bonus():
    pers = messageWindow.get("1.0",'end-1c')
    messageWindow.delete("0.0",END)
    if pers != '':
        chatWindow.config(state=NORMAL)
        chatWindow.insert(END, "Your Emp ID: " + pers + '\n\n')
        chatWindow.config(foreground="#C4D600", font=("Calibri", 10 ))
        res = gui_bonus(pers)
        chatWindow.insert(END, "Bot:" + res + '\n\n')
        chatWindow.config(state=DISABLED)
        chatWindow.yview(END)
    else:
        chatWindow.insert(END, "Bot: Please enter your Personal Emp.ID: \n\n")

In [42]:
def status():
    rms_id = messageWindow.get("1.0",'end-1c')
    messageWindow.delete("0.0",END)
    if rms_id != '':
        chatWindow.config(state=NORMAL)
        chatWindow.insert(END, "Candidate RMS ID: " + rms_id + '\n\n')
        chatWindow.config(foreground="#C4D600", font=("Calibri", 10 ))
        res = gui_status(rms_id)
        chatWindow.insert(END, "Bot: " + res + '\n\n')
        chatWindow.config(state=DISABLED)
        chatWindow.yview(END)
    else:
        chatWindow.insert(END, "Bot: Please enter your referral's RMS.ID: \n\n")

In [43]:
root = Tk()
root.title("Talent Referral Chat Bot")
root.geometry("500x600")
root.resizable(width = FALSE, height = FALSE)

''

In [44]:
#Message header of the chatbot

message = "Hi There..I'm Talent Referral ChatBot :)"
msg1 = tkinter.Message(root, text = message)
msg1.config(bg="grey20", fg="#86BC25", font=('Calibri', 10, 'bold','italic'), justify = LEFT, width = "500")
msg1.place(x=6, y=6, height= 15, width=485)

In [45]:
"""
main_menu = Menu(root)

# Create the submenu 
file_menu = Menu(root)

# Add commands to submenu
file_menu.add_command(label="New..")
file_menu.add_command(label="Save As..")
file_menu.add_command(label="Exit")
main_menu.add_cascade(label="File", menu=file_menu)

#Add the rest of the menu options to the main menu
main_menu.add_command(label="Edit")
main_menu.add_command(label="Quit")
root.config(menu=main_menu)
"""

'\nmain_menu = Menu(root)\n\n# Create the submenu \nfile_menu = Menu(root)\n\n# Add commands to submenu\nfile_menu.add_command(label="New..")\nfile_menu.add_command(label="Save As..")\nfile_menu.add_command(label="Exit")\nmain_menu.add_cascade(label="File", menu=file_menu)\n\n#Add the rest of the menu options to the main menu\nmain_menu.add_command(label="Edit")\nmain_menu.add_command(label="Quit")\nroot.config(menu=main_menu)\n'

In [46]:
#Create Chat Window

chatWindow = Text(root, bd=1, bg="grey30",  width="50", height="8", font=("Calibri", 12), foreground="#C4D600")
chatWindow.place(x=6,y=21, height=410, width=480)

In [47]:
#Message header for the message window

message2 = "Enter your questions here..."
msg2 = tkinter.Message(root, text = message2)
msg2.config(bg="grey20", fg="#86BC25", font=('Calibri', 10, 'bold','italic'), justify = LEFT, width = "366")
msg2.place(x=6, y=435, height= 15, width=366)

In [48]:
#Create Message Window

messageWindow = Text(root, bd=0, bg="grey30",width="30", height="4", font=("Calibri", 12), foreground="#C4D600")
#messageWindow.insert(END, "Please enter your queries here...")
#messageWindow.delete("0.0",END)
messageWindow.place(x=6, y=450, height=145, width=366)

In [49]:
#Bind scrollbar to Chat window

scrollbar = Scrollbar(root, command=chatWindow.yview, cursor="star")
scrollbar.place(x=485,y=5, height=425)
chatWindow['yscrollcommand'] = scrollbar.set

In [50]:
#Create Button to send message

Button1= Button(root, text="Send",  width="12", height=5, bd=0, bg="#86BC25", activebackground="#00bfff",
                foreground='#ffffff',font=("Calibri", 10, 'italic', 'bold'), command = send)
Button1.place(x=374, y=435, height=76)

Button2= Button(root, text="Referral Status", width="12", height=5, bd=0, bg="#000000", activebackground="#00bfff",
                foreground='#ffffff',font=("Calibri", 10,'italic', 'bold'), command = status)
Button2.place(x=374, y=513, height=40)

Button3= Button(root, text="Bonus Payout",width="12", height=5, bd=0, bg="#000000", activebackground="#00bfff",
                foreground='#ffffff',font=("Calibri", 10, 'italic', 'bold'), command = bonus)
Button3.place(x=374, y=555, height=40)


In [51]:
#Assigning shortcuts and key-bidings to the buttons

root.bind("<Return>",send)
root.bind("<KP_Enter>",send)

'2017100499144send'

In [52]:
root.mainloop()