In [182]:
#from absl import logging
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
sns.set()

# Overview
The Universal Sentence Encoder encodes text into high-dimensional vectors that can be used for text classification, semantic similarity, clustering and other natural language tasks.

The model is trained and optimized for greater-than-word length text, such as sentences, phrases or short paragraphs. It is trained on a variety of data sources and a variety of tasks with the aim of dynamically accommodating a wide variety of natural language understanding tasks. The input is variable length English text and the output is a 512 dimensional vector. We apply this model to the STS benchmark for semantic similarity, and the results can be seen in the example notebook made available. The universal-sentence-encoder model is trained with a deep averaging network (DAN) encoder.

To learn more about text embeddings, refer to the TensorFlow Embeddings documentation. Our encoder differs from word level embedding models in that we train on a number of natural language prediction tasks that require modeling the meaning of word sequences rather than just individual words. Details are available in the paper "Universal Sentence Encoder" 

In [183]:
import time
start_time = time.time()

In [184]:
#Loading model from USE DAN (Deep Averaging Network)
module_url = "https://tfhub.dev/google/universal-sentence-encoder/4" 
model = hub.load(module_url)
#print ("module %s loaded" % module_url)

In [185]:
os.chdir('C:/Users/akadali/Desktop/Deep_NLP/MLG_Capstone_ChatBot/ChatBot_GoogleW2V')
data = pd.read_csv('chatbot_train.csv', encoding = 'latin1')
data = data[['question', 'answer', 'intent']]

In [186]:
import nltk
from nltk.tokenize import word_tokenize
stops = nltk.corpus.stopwords.words('english')

from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

import string
puncs = string.punctuation
#Adding additional punctuations
puncs = puncs + "’'`" 
print(puncs)

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~’'`


In [187]:
print(stops)

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

# Data Cleaning
-----

* This dataset has a lot of contractions, hence all of them need to be removed/replaced within the dataset. The below function replaces the contractions with corresponding word pairs.

* Also, there are quite a few abbreviations such as U.S., US, USI to be substituted with their expanded forms

* removing additional spaces, if there are any

* Tokenizing the words
* Lemmatization
* removing numeric digits and punctuations

In [188]:
import re
def text_clean(text):
    text = re.sub(r"what's", "what is ", text)
    text = re.sub(r"\'s", " ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub('[%s]'%re.escape(puncs), ' ', text)
    #convert u.s or us  to 'United States'
    text = re.sub(r" U.S. ", " united states ", text)
    text = re.sub(r" US ", " united states ", text)
    text = re.sub(r" USI ", " united states india ", text)
    text = re.sub(r" +"," ", text)
    #Removing additional characters that captured during import
    text = re.sub("[\x97]","'", text)
    text = re.sub("[\x96]","'", text)
    text = re.sub("[\x95]","'", text)
    text = re.sub("[\x94]","'", text)
    text = re.sub("[\x93]","'", text)
    text = re.sub("[\x92]","'", text)
    text = re.sub("[\x91]","'", text)
    return text

In [189]:
def text_preprocess(text):
    #tokenizing
    tokens = word_tokenize(text)
    #converting to lower case and lemmatization
    #tokens = [lemmatizer.lemmatize(word.lower()) for word in tokens]
    tokens = [word.lower() for word in tokens if word not in stops]
    filtered_tokens = []
    #Remove anything but alphabets - numbers, punctuations etc
    for token in tokens:
        if re.search('[a-zA-Z]', token):
            filtered_tokens.append(token)
    #return list of lists for embedding vectors
    return " ".join(filtered_tokens)

In [190]:
#Clean the questions dataset
data['question'] = data['question'].apply(lambda x: text_clean(x))

In [191]:
#Preprocess the questions
data['question'] = data['question'].apply(lambda x:text_preprocess(x))
#data['question'] = data['question'].apply(lambda x: " ".join(x))

### Creating sentence embeddings using pretrained TF USE (universal-sentence-encoder)

In [192]:
#Method to embed inputs
def embed(input):
    return model(input)

In [193]:
#np.seterr(divide='ignore', invalid='ignore')
question_vectors = data['question'].apply(lambda x:embed([x]))

## Establishing connection to the 'referral' database

In [194]:
import mysql.connector
mydb = mysql.connector.connect(host="localhost", 
                               user="root", 
                               passwd="root",
                               auth_plugin = 'mysql_native_password',
                               database = "talent_referral_payout")

In [195]:
mycursor = mydb.cursor(buffered = True)

In [196]:
#Definition to obtain te referral bonus payout history of a referring professional

def search_pay_db(emp_id):
    emp_id = str(emp_id)
    #print(type(emp_id))
    query = "SELECT referral_name,amount,date_of_origin FROM trp_payment_report_sample WHERE emp_num = "+ emp_id
    mycursor.execute(query)
    return mycursor.fetchone()

In [197]:
#Definition to obtain the status of referrals submitted by the referring professional

def search_status_db(rms_id):
    rms_id = str(rms_id)
    #print(type(emp_id))
    query = "SELECT candidate_name,requisition_no,current_status,reference_date FROM trp_status WHERE candidate_id = " + rms_id
    mycursor.execute(query)
    return mycursor.fetchone()

In [198]:
def get_response(t):
    #print(query)
    while True:
        user_input = input("You:")
        if len(user_input) == 0:
            print("Bot: Can you please provide more information")
        elif user_input.lower() == '_bonus_':
            print("Enter your personnel number to fetch the payment details")
            emp_no = input("Emp. ID:")
            emp_no = str(emp_no)
            ans = search_pay_db(emp_no)
            #print("Bot: Your bonus payout details are below (in the last 6 months)")
            if ans is not None:
                print("Bot: You were paid $"+ str(ans[1])+" for referring "+str(ans[0])+" along with "+str(ans[2])+" pay period")
                print("     Let me know if I can help you with anything else")
            else:
                print("Bot: Sorry...I do not have your referral info. in my database. Please contact TRP Team")
        elif user_input.lower() == '_status_':
            print("Bot: Enter your referral's RMS/Taleo ID to fetch the status info.")
            email = input("ID:")
            email = str(email)
            ans = search_status_db(email)
            #print("Bot: Below are the list of referrals you submitted in the last 6 months")
            if ans is not None:
                print("Bot: Your referral "+str(ans[0])+ " has been "+str(ans[2])+" for requisition with ID:"+str(ans[1]))
                print("     Let me know if I can help you with anything else")
            else:
                print("Bot: Sorry...I do not have your referral info. in my database. Please contact TRP Team")
        elif user_input.lower() == 'quit':
            break
        else:
            #q = text_clean(user_input)
            #q = text_preprocess(user_input)
            q = user_input
            q_vec = embed([q])
            size_ = len(question_vectors)
            sims = np.zeros(size_)
            for i in range(size_):
                sims[i] = cosine_similarity(q_vec, question_vectors[i])
            max_s = sims.max()
            if max_s < t:
                response = "Hmm..sorry, I don't quite understand that, can you please rephrase your question"
            else:
                max_i = np.argmax(sims)
                response = data.answer[max_i]
            print("Bot:", response)

In [199]:
def chat_test():
    print("Specify the confidence level")
    level = input("Confidence:")
    conf = int(level)/100
    print("Hi There....I'm Talent Referral Bot and I'm here to help you with referral inquiries\n[type 'quit' to stop]")
    print("***TIP: PLEASE ENTER \n 1.'_status_' -> to check your referral status \n 2.'_bonus_' -> to check your bonus payout status \n 3.Else, enter your question")
    ans = get_response(conf)
    #print("Bot:", ans)

In [200]:
#Invoke this function only if you want to test the bot in here, else don't
#chat_test()

## Testing the Program

#### **** Can be skipped ****

In [201]:
#Testing the Chabot with Test Dataset
os.chdir('C:\\Users\\akadali\\Desktop\\Deep_NLP\\MLG_Capstone_ChatBot\\ChatBot_GoogleW2V')

#importing test dataset
test_data = pd.read_csv("chatbot_test.csv", encoding = 'latin1')
#test_data.drop(['Unnamed: 3'], axis = 1, inplace = True)
size = len(question_vectors)
sims = np.zeros(size)
def test_response(q, vecs = question_vectors):
    q = text_clean(q)
    q = text_preprocess(q)
    q_vec = embed([q])
    for i in range(size):
        sims[i] = cosine_similarity(q_vec, question_vectors[i])
    max_s = sims.max()
    max_i = np.argmax(sims)
    return max_s, data.answer[max_i], data.question[max_i], data.intent[max_i]

test_data['bot_response'] = test_data['test_question'].apply(lambda x:test_response(x)[1])
test_data['train_question'] = test_data['test_question'].apply(lambda x:test_response(x)[2])
test_data['sim_score'] = test_data['test_question'].apply(lambda x:test_response(x)[0])
test_data['train_intent'] = test_data['test_question'].apply(lambda x:test_response(x)[3])
test_data['valid'] = test_data['bot_response'] == test_data['test_answer']

In [202]:
print("Accuracy of TF USE DAN \n",(test_data.valid.sum()/len(test_data)).round(3))
print("Average Simailarity score",test_data.sim_score.mean())
print("--- %s seconds ---" % (time.time() - start_time))

Accuracy of TF USE DAN 
 0.538
Average Simailarity score 0.6816942118296102
--- 116.12505197525024 seconds ---


#### Accuracy Readings

In [None]:
##First Test result with stop words, with lemmatization
# Test_Accuracy - 34%
# Mean Similarity score - 0.64

##Second Test Result - with stop words, without lemmatization and 
# Test_Accuracy - 56%
# Mean Similarity score - 0.69

##Third Test Result - without stop words, wihtout lemmatization
# Test_Accuracy - 53%
# Mean Similarity score - 0.63 

##Fourth Test Result - without stop words, with lemmatization
# Test_Accuracy - 54%
# Mean Similarity score - 0.68 


In [None]:
test_data.to_csv("C:\\Users\\akadali\\Desktop\\Deep_NLP\\MLG_Capstone_ChatBot\\ChatBot_GoogleW2V\\test_results_dan.csv",
                 index = False, header = True)

# ChatBot GUI

### Methods for Chatbot GUI response 

In [54]:
def gui_response(user_input):
    user_input = str(user_input)
    if len(user_input) == 0:
        response = "Looks like you have not asked any question. Please feel free to ask me anything related to referral program."
    else:
        user_input = text_clean(user_input)
        user_input = text_preprocess(user_input)
        q_vec = embed([user_input])
        size_ = len(question_vectors)
        sims = np.zeros(size_)
        for i in range(size_):
            sims[i] = cosine_similarity(q_vec, question_vectors[i])
        max_s = sims.max()
        if max_s < 0.6: #Change the threshold if needed
            response = "I'm sorry, I don't think I understood that correctly, can you please rephrase your question and try again.Feel free to email ustalentreferralprogram@deloitte.com to reach referral team, if I’m not able to solve your problem right now"
        else:
            max_i = np.argmax(sims)
            response = data.answer[max_i]
    return response

In [56]:
def gui_bonus(emp_id):
    emp_id = str(emp_id)
    ans = search_pay_db(emp_id) 
    if ans is not None:
        res = "You were paid $"+ str(ans[1])+" for referring "+str(ans[0])+" along with "+str(ans[2])+" pay period"
    else:
        res = "Sorry, I couldn't find any result with the input provided" 
    return res

In [57]:
def gui_status(rms_id):
    rms_id = str(rms_id)
    ans = search_status_db(rms_id)
    if ans is not None:
        res = "Your referral "+str(ans[0])+ " has been "+str(ans[2])+" for requisition with ID:"+str(ans[1])
    else:
        res = "Sorry, I couldn't find any result with the input provided" 
    return res

# Creating Chatbot GUI using tkinter

In [58]:
import os
os.chdir("C:\\Users\\akadali\\Desktop\\Deep_NLP\\MLG_Capstone_ChatBot\\ChatBot_GoogleW2V")

In [59]:
#Importing tkinter
import tkinter
from tkinter import *

In [60]:
#Enabling High DPI in Windows 10
try:
    from ctypes import windll
    windll.shcore.SetProcessDpiAwareness(1)
except:
    pass

### Methods for Buttons

In [61]:
def send(*args):
    msg = messageWindow.get("1.0",'end-1c')
    messageWindow.delete("0.0",END)
    if msg == '':
        chatWindow.config(state=NORMAL)
        chatWindow.insert(END, "You: \n" + msg + '\n')
        chatWindow.config(foreground="#3b5998", font=("Helvetica Neue", 9, 'bold' ))
        res = "Please feel free to ask me anything related to referral program."
        chatWindow.insert(END, "Bot: " + res + '\n\n')
        chatWindow.config(state=DISABLED)
        chatWindow.yview(END)
    else: 
        chatWindow.config(state=NORMAL)
        chatWindow.insert(END, "You: " + msg + '\n')
        chatWindow.config(foreground="#3b5998", font=("Helvetica Neue", 9, 'bold' ))
        res = gui_response(msg)
        chatWindow.insert(END, "Bot: " + res + '\n\n')
        chatWindow.config(state=DISABLED)
        chatWindow.yview(END)

In [62]:
def bonus():
    pers = messageWindow.get("1.0",'end-1c')
    messageWindow.delete("0.0",END)
    if pers == '':
        chatWindow.config(state=NORMAL)
        #chatWindow.insert(END, "You: " + msg + '\n')
        chatWindow.config(foreground="#3b5998", font=("Helvetica Neue", 9, 'bold' ))
        res = "Please enter your Personal Emp.ID and click on 'Bonus History' again"
        chatWindow.insert(END, "Bot: " + res + '\n\n')
        chatWindow.config(state=DISABLED)
        chatWindow.yview(END)
    else:
        chatWindow.config(state=NORMAL)
        chatWindow.insert(END, "Your Emp ID: " + pers + '\n\n')
        chatWindow.config(foreground="#3b5998", font=("Helvetica Neue", 9, 'bold' ))
        res = gui_bonus(pers)
        chatWindow.insert(END, "Bot: " + res + '\n\n')
        chatWindow.config(state=DISABLED)
        chatWindow.yview(END)

In [63]:
def status():
    rms_id = messageWindow.get("1.0",'end-1c')
    messageWindow.delete("0.0",END)
    if rms_id == '':
        chatWindow.config(state=NORMAL)
        #chatWindow.insert(END, "You: " + msg + '\n')
        chatWindow.config(foreground="#3b5998", font=("Helvetica Neue", 9, 'bold' ))
        res = "Please enter your referral's RMS.ID and click on 'Referral Status' again"
        chatWindow.insert(END, "Bot: " + res + '\n\n')
        chatWindow.config(state=DISABLED)
        chatWindow.yview(END)
    else:
        chatWindow.config(state=NORMAL)
        chatWindow.insert(END, "Candidate RMS ID: " + rms_id + '\n\n')
        chatWindow.config(foreground="#3b5998", font=("Helvetica Neue", 9, 'bold' ))
        res = gui_status(rms_id)
        chatWindow.insert(END, "Bot: " + res + '\n\n')
        chatWindow.config(state=DISABLED)
        chatWindow.yview(END)

In [64]:
def quick_help():
    chatWindow.config(state=NORMAL)
    chatWindow.config(foreground="#3b5998", font=("Helvetica Neue", 9, 'bold'))
    res = "Hi there! Here are a few options for your quick help \n ----------------------------------------------------------------------------------------- \n -> Internship Referrals                                    - Type 'Intern'  \n ----------------------------------------------------------------------------------------- \n -> Cannot upload Resume                               - Type 'resume'   \n ----------------------------------------------------------------------------------------- \n -> Cannot access referral page                       - Type 'technical'\n ----------------------------------------------------------------------------------------- \n -> Position not found/No 'refer a friend' link - Type 'job'     \n----------------------------------------------------------------------------------------- \n-> Need 'Referral submission' link                   - Type 'link'     \n----------------------------------------------------------------------------------------- \n-> Bonus not received                                       - Type 'Bonus'    \n----------------------------------------------------------------------------------------- \n-> 'Client' Referrals                                            - Type 'client'   \n ----------------------------------------------------------------------------------------- \n Bot: Have a different query??..please feel free to type your question here"
    chatWindow.insert(END, "Bot: " + res + '\n\n')
    chatWindow.config(state=DISABLED)
    chatWindow.yview(END)

In [65]:
#root instance
root = Tk()
root.title("TRP_ChatBot BETA")
root.geometry("600x700")
root.resizable(width = FALSE, height = FALSE)

''

In [66]:
#Message header of the chatbot
message = "Referral Program ChatBot"
msg1 = tkinter.Message(root, text = message)
msg1.config(bg="#3b5998", fg="snow", font=('Helvetica Neue', 14, 'bold'), justify = RIGHT , width = "700")
msg1.place(x=0, y=0, height = 32, width = 600)

#Message sub-header of the chatbot

message = "- Get your referral inquiries answered here -"
msg1 = tkinter.Message(root, text = message)
msg1.config(bg="#3b5998", fg="snow", font=('Helvetica Neue', 8, 'italic'), justify = CENTER , width = "700")
msg1.place(x=0, y=32, height = 18, width = 600)


In [67]:
#Create Chat Window

chatWindow = Text(root, bd=1, bg="grey93",  width="50", height="8", font=("Helvetica Neue", 12), foreground="grey7")
chatWindow.place(x=0,y=50, height=500, width=585)

In [68]:
#Bind scrollbar to Chat window

scrollbar = Scrollbar(root, command=chatWindow.yview, cursor="clock")
scrollbar.place(x= 585,y = 50, height=500)
chatWindow['yscrollcommand'] = scrollbar.set

In [69]:
#Create Buttons

#login_btn = PhotoImage(file = 'login.png')

Button1= Button(root, text = "Enter >",  width="12", height=5, bd=0, bg="#4267B2", activebackground="#dfe3ee", 
                foreground='#ffffff',font=("Helvetica Neue", 12,'bold'), command = send, borderwidth = 3)
Button1.place(x=500, y=572, height=128, width = 100)


Button4= Button(root, text="Quick help",width="12", height=5, bd=0, bg="#4267B2", 
                activebackground="#dfe3ee", foreground='#ffffff',font=("Helvetica Neue", 10,'bold'), 
                borderwidth = 2, command = quick_help)
Button4.place(x=0, y=522, height=30, width = 200)


Button2= Button(root, text="Referral status", width="12", height=5, bd=0, bg="#4267B2", 
                activebackground="#dfe3ee", foreground='#ffffff',font=("Helvetica Neue", 10,'bold'), 
                borderwidth = 2, command = status)
Button2.place(x=400, y=522, height=30, width = 200)


Button3= Button(root, text="My bonus history", width="12", height=5, bd=0, bg="#4267B2", 
                activebackground="#dfe3ee", foreground='#ffffff',font=("Helvetica Neue", 10, 'bold'), 
                borderwidth = 2, command = bonus)
Button3.place(x=200, y=522, height=30, width = 200)

In [70]:
#Message header for the message window

message2 = "Enter your questions below..."
msg2 = tkinter.Message(root, text = message2)
msg2.config(bg="#8b9dc3", fg="snow", font=('Calibri', 9, 'italic'), justify = LEFT, width = "366")
msg2.place(x=0, y= 552, height= 20, width = 600)

In [71]:
#Create Message Window

messageWindow = Text(root, bd=0, bg="grey99",width="30", height="4", font=("Helvetica Neue", 10), foreground="grey7")
#messageWindow.insert(END, "Please enter your queries here...")
#messageWindow.delete("0.0",END)
messageWindow.place(x=7, y=579, height=113, width=486)

In [72]:
#Assigning shortcuts and key-bidings to the buttons

root.bind("<Return>",send)
root.bind("<KP_Enter>",send)

'3045228839880send'

In [73]:
root.mainloop()

In [None]:
#Sample RMS IDs for chatbot testing of referral status
"""
24211419
18327009
24482992
21996054
22425736
24426987
17783534
24601097
23736632
24698456
24623156
21705392
"""
#Sample Employee IDs for chatbot testing bonus status
"""
213408
239660
249058
250194
260943
271637
274346
282822
284183
284315
284754
285695
290560
"""