In [1]:
import nltk
import numpy as np
import random
import string # to process standard python strings
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

#### Convert the entire corpus into a list of sentences and a list of words for further pre-processing.

In [3]:
f=open('corpus.txt','r',errors = 'ignore')
raw=f.read()
raw=raw.lower()# converts to lowercase
sent_tokens = nltk.sent_tokenize(raw)# converts to list of sentences 
word_tokens = nltk.word_tokenize(raw)# converts to list of words

print(sent_tokens[:5])
print(word_tokens[:5])

['a chatbot (also known as a talkbot, chatterbot, bot, im bot, interactive agent, or artificial conversational entity) is a computer program or an artificial intelligence which conducts a conversation via auditory or textual methods.', 'such programs are often designed to convincingly simulate how a human would behave as a conversational partner, thereby passing the turing test.', 'chatbots are typically used in dialog systems for various practical purposes including customer service or information acquisition.', 'some chatterbots use sophisticated natural language processing systems, but many simpler systems scan for keywords within the input, then pull a reply with the most matching keywords, or the most similar wording pattern, from a database.', 'the term "chatterbot" was originally coined by michael mauldin (creator of the first verbot, julia) in 1994 to describe these conversational programs.today, most chatbots are either accessed via virtual assistants such as google assistant 

#### Function LemTokens which will take as input the tokens and return normalized tokens.


In [4]:
lemmer = nltk.stem.WordNetLemmatizer()
#WordNet is a semantically-oriented dictionary of English included in NLTK.

def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

#### Function for a greeting by the bot i.e if a user’s input is a greeting, the bot shall return a greeting response.

In [5]:
GREETING_INPUTS = ("hello", "hi", "greetings", "sup","wasup", "what's up","hey","hola","Ok Bot")
GREETING_RESPONSES = ["Hey :)", "Hi :)", "*nods*", "Hi there", "Hello", "I am glad! You are talking to me"]
def greeting(sentence):
 
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)

#### Use TfidfVectorizer and cosine_similarity to find the similarity between words entered by the user and the words in the corpus. 

In [6]:
def response(user_response):
    robo_response=''
    
    sent_tokens.append(user_response)
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx=vals.argsort()[0][-2]
    
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    #print(req_tfidf)
    #print(flat)
    
    if(req_tfidf==0):
        robo_response=robo_response+"I am sorry! I don't understand you"
        return robo_response
    else:
        robo_response = robo_response+sent_tokens[idx]
        return robo_response

#### init bot

In [7]:
flag=True
print("Bot: My name is Alex. I will answer your queries. If you want to exit, type Bye!")
while(flag==True):
    user_response = input()
    user_response=user_response.lower()
    if(user_response!='bye'):
        if(user_response=='thanks' or user_response=='thank you' ):
            flag=False
            print("Bot: You are welcome..")
        else:
            if(greeting(user_response)!=None):
                print("Bot: "+greeting(user_response))
            else:
                print("Bot: ",end="")
                print(response(user_response))
                sent_tokens.remove(user_response)
    else:
        flag=False
        print("Bot: Bye! take care..")

Bot: My name is Alex. I will answer your queries. If you want to exit, type Bye!
hi
Bot: I am glad! You are talking to me
what is chatbot
Bot: 

  'stop_words.' % sorted(inconsistent))


design
the chatbot design is the process that defines the interaction between the user and the chatbot.the chatbot designer will define the chatbot personality, the questions that will be asked to the users, and the overall interaction.it can be viewed as a subset of the conversational design.
what is hsbc


  'stop_words.' % sorted(inconsistent))


Bot: hsbc is one of the world’s largest banking and financial services organisations.
who is alan turing


  'stop_words.' % sorted(inconsistent))


Bot: in 1950, alan turing's famous article "computing machinery and intelligence" was published, which proposed what is now called the turing test as a criterion of intelligence.
what is china


  'stop_words.' % sorted(inconsistent))


Bot: I am sorry! I don't understand you
what is india


  'stop_words.' % sorted(inconsistent))


Bot: I am sorry! I don't understand you
bye
Bot: Bye! take care..
