## Import Libraries and Download NLTK

In [8]:
import nltk
import numpy as np
import random
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
nltk.download('punkt') # first-time use only
nltk.download('wordnet') # first-time use only
nltk.download('omw-1.4') # first time use only

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\it60537\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\it60537\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\it60537\AppData\Roaming\nltk_data...


True

## Read in Chatbot Training Text

In [9]:
file = open('chatbot.txt', 'r', errors = 'ignore') # dr seuss corpus
#file  open('tacoStandchatbot.txt', 'r', errors = 'ignore')
raw_file = file.read()
raw_file = raw_file.lower() # converts to lowercase
sent_tokens = nltk.sent_tokenize(raw_file)# converts to list of sentences 
word_tokens = nltk.word_tokenize(raw_file)# converts to list of words

## Lemmatize the Words and Remove Punctuation Function

In [10]:
lemmer = nltk.stem.WordNetLemmatizer() # create the lemmer object
def lemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

def lemNormalize(text):
    remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation) # remove punctuation
    return lemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))


## Greeting Function

In [11]:
GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up","hey",)
GREETING_RESPONSES = ["hi", "hey", "*nods*", "hi there", "hello", "Hola"]
def greeting(sentence): 
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES) # return a random greeting response

## Response Function

In [None]:
def response(user_response):  #Cosine similarity procedure for generating an appropriate response to user input
    robo_response=''
    sent_tokens.append(user_response)
    TfidfVec = TfidfVectorizer(tokenizer=lemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx=vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if(req_tfidf==0):  # if no cosine similarity is found have a default I do not understand
        robo_response=robo_response+"I do not understand"
        return robo_response
    else:
        robo_response = robo_response+sent_tokens[idx]
        return robo_response

## Run Chatbot

In [None]:
def main():
    print("ChatBot: My name is Thing. Have a chat with me! If you want to exit, type 'Bye'")
    while(True): # Start conversation
        try:
            user_response = input().lower() 
            if (user_response == 'bye'):
                print('Chatbot: Bye, talk to you soon!!')
                break
            else:
                if (user_response == 'thanks' or user_response == 'thank you' or user_response == 'gracias'):
                    print('ChatBot: You are welcome!!')
                else:
                    if(greeting(user_response) != None):
                        print("ChatBot: "+greeting(user_response))
                    else:
                        print("ChatBot: "+response(user_response))  #check cosine similarity for proper response
                        sent_tokens.remove(user_response)
        except NameError:
            user_response = input().lower()
main()