In [16]:
import nltk
import numpy as np
import random
import string
import sklearn

In [17]:
f = open('chatbots.txt', 'r', errors='ignore')      # must be in the same file
raw = f.read()
raw = raw.lower()                                   # converts everything to lowercase
nltk.download('punkt')                              # only need to do this once, but doesn't hurt
nltk.download('wordnet')                            # same as above
sent_tokens = nltk.sent_tokenize(raw)               # converts everything to a list of sentences
word_tokens = nltk.word_tokenize(raw)               # converts everything to a list of words

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\andy\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\andy\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [18]:
sent_tokens

['a chatbot is a software application used to conduct an on-line chat conversation via text or text-to-speech, in lieu of providing direct contact with a live human agent.',
 '[1] a chatbot is a type of software that can automate conversations and interact with people through messaging platforms.',
 '[2] designed to convincingly simulate the way a human would behave as a conversational partner, chatbot systems typically require continuous tuning and testing, and many in production remain unable to adequately converse or pass the industry standard turing test.',
 '[3] the term "chatterbot" was originally coined by michael mauldin (creator of the first verbot) in 1994 to describe these conversational programs.',
 '[4]\n\nchatbots are used in dialog systems for various purposes including customer service, request routing, or information gathering.',
 'while some chatbot applications use extensive word-classification processes, natural language processors, and sophisticated ai, others simp

In [19]:
lemmer = nltk.stem.WordNetLemmatizer()                  # Wordnet is a semantically-oriented dictionary of English included in NLTK

def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

In [20]:
GREETING_INPUTS = ('hello', 'hi', 'greetings', 'sup', "what's up", 'hey')
GREETING_RESPONSES = ['hi', 'hey', '*nods', 'hi there', 'hello', 'I am glad to talk to you.']

def greeting(sentence):
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)

In [21]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [22]:
# defining the bots responses to queries

def response(user_response):
    robo_response = " "
    sent_tokens.append(user_response)
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if(req_tfidf==0):
        robo_response = (robo_response + "I'm sorry, I don't understand.")
        return robo_response
    else:
        robo_response = robo_response + sent_tokens[idx]
        return robo_response

In [25]:
flag = True
print("Hi. I will answer queries about Chatbots. To exit, type Bye.")
while(flag==True):
    user_response = input()
    user_response = user_response.lower()
    if(user_response!='bye'):
        if(user_response=="thanks" or user_response=="thank you"):
            flag = False
            print("ROBO: You are welcome..")
        else:
            if(greeting(user_response)!=None):
                print("ROBO: " + greeting(user_response))
            else:
                print("ROBO: ", end='')
                print(response(user_response))
                sent_tokens.remove(user_response)
    else:
        flag==False
        print("ROBO: Bye.")



Hi. I will answer queries about Chatbots. To exit, type Bye.
ROBO: I am glad to talk to you.
ROBO: Bye.
ROBO: 



 I'm sorry, I don't understand.
ROBO:  I'm sorry, I don't understand.
