In [1]:
import nltk
import random
import string
import re
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
data = pd.read_csv('tutoring_data.csv')
q_string = ''
for i, row in data.iterrows():
    q_string = q_string + row.loc['Question'] + " "

q_string = q_string.lower()
# remove special characters and empty spaces
q_string = re.sub(r'\[[0-9]*\]', ' ', q_string)
q_string = re.sub(r'\s+', ' ', q_string)

q_tokens = nltk.sent_tokenize(q_string)

In [3]:
def clean_input(text):
    """ pre-process user input and data """
    
    # remove punctutation
    text = text.translate(str.maketrans(' ', ' ', string.punctuation))
    # tokenize input
    tokens = nltk.word_tokenize(text)
    wnl = nltk.stem.WordNetLemmatizer()

    for words in tokens:
        # lemmatize words
        wnl.lemmatize(words)
    return tokens

In [4]:
greeting_inputs = ("hey", "good morning", "good evening", "morning", "evening", "hi", "hello")
greeting_responses = ["Hi", "Nice to meet you!", "*nods*", "Hello!", "Welcome!"]

def generate_greeting_response(greeting):
    """ return a random greeting from a pre-defined list """

    for token in greeting.split():
        if token.lower() in greeting_inputs:
            return random.choice(greeting_responses)

In [5]:
def generate_response(user_input):
    """ get cosine similarity of user input and compare with article sentence tokens,
     return the token with the highest cosine score """

    bot_response = ''
    # add user input to list of tokens for comparison
    q_tokens.append(user_input)
    word_vectorizer = TfidfVectorizer(tokenizer=clean_input, stop_words='english')
    # convert tokens into a vector
    all_word_vectors = word_vectorizer.fit_transform(q_tokens)
    # find cosine similarity of user input (last item in list) with article vectors
    similar_vector_values = cosine_similarity(all_word_vectors[-1], all_word_vectors)
    similar_sentence_number = similar_vector_values.argsort()[0][-2]

    matched_vector = similar_vector_values.flatten()
    matched_vector.sort()
    vector_matched = matched_vector[-2]

    if vector_matched == 0:
        bot_response += "Sorry, I don't understand."
        return bot_response
    else:
        # output corresponding answer from answer column
        bot_response += data.at[similar_sentence_number, 'Answer']
        return bot_response

In [6]:
continue_dialogue = True
print("Hi, my name is Bot! \n"
      "I can answer FAQ about Rhodes CS tutoring.\n"
      "To end our conversation, type 'bye' or just thank me ;)")

while continue_dialogue:
    user_input = input()
    user_input = user_input.lower()
    if user_input != 'bye':
        if user_input == 'thanks' or user_input == 'thank you':
            continue_dialogue = False
            print("Bot: You're welcome!")
        else:
            if generate_greeting_response(user_input) is not None :
                print("Bot: " + generate_greeting_response(user_input))
            else:
                print("Bot: ", end="")
                print(generate_response(user_input))
                q_tokens.remove(user_input)
    else:
        continue_dialogue = False
        print("Bot: Bye bye!")

Hi, my name is Bot! 
I can answer FAQ about Rhodes CS tutoring.
To end our conversation, type 'bye' or just thank me ;)
Can you tell me what tutoring is?
Bot: Tutoring is first-come first-serve for 141, 142, 241 only. Tutors will help you work through concepts, debug, and provide resources for further information.

When does tutoring start?
Bot: Tutoring begins 5-11pm CDT from Sunday to Thursday (excluding school holidays) using the queue app.
Are there any rules for tutoring?
Bot: Tutors are asked to limit time for each individual tutoring session to 10-20 minutes, since we have over 150+ students in 141/142/241 and 9 tutors. Tutoring is restricted to tutoring hours only, and only available using the queue app. Please do NOT DM tutors directly.
What are some expectations?
Bot: Sorry, I don't understand.
Are there expectations for tutoring?
Bot: Tutoring is first-come first-serve for 141, 142, 241 only. Tutors will help you work through concepts, debug, and provide resources for furthe