# Importing Libraries

In [1]:
import numpy as np
import nltk
import string
import random

# Importing and reading the corpus

In [2]:
f = open('corpus.txt', 'r', errors='ignore')
raw_doc = f.read()
raw_doc = raw_doc.lower() #Converts text to lowercase
nltk.download('punkt') #using the Punkt tokenizer
nltk.download('wordnet') #using the wordnet dictionary
sent_tokens = nltk.sent_tokenize(raw_doc) #converts doc to list of sentences
word_tokens = nltk.word_tokenize(raw_doc) #converts doc to list of words

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\veena\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\veena\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
sent_tokens[:2]

[' tell me about the american civil war\ndo you think the south was right?',
 'do you know about the american civil war\ni am very interested in the war between the states.']

In [4]:
word_tokens[:2]

['tell', 'me']

# Text Preprocessing

In [5]:
lemmer = nltk.stem.WordNetLemmatizer() #WordNet is a semantically oriented dictionary of English included in NLTK
def LemTokens(tokens):
    return[lemmer.lemmatize(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

# Defining the greeting function

In [6]:
GREET_INPUTS = ("hello", "hi", "greetings", "whatsup", "hey", "hey there", "what's up", "hello there" )
GREET_RESPONSES = ("hi", "hey", "*nods*", "hi there", "Hello! I'm glad you are talking to me", "hello!", "Hi, I'm glad you noticed me")
def greet(sentence):
    for word in sentence.split():
        if word.lower() in GREET_INPUTS:
            return random.choice(GREET_RESPONSES)

# Response Generation

In [7]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [8]:
def response(user_response):
    robo1_response = ''
    TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words = 'english')
    tfidf = TfidfVec.fit_transform(sent_tokens)
    val = cosine_similarity(tfidf[-1], tfidf)
    idx = val.argsort()[0][-2]
    flat = val.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if (req_tfidf == 0):
        robo1_response = robo1_response+"I am sorry! I don't understand you"
        return robo1_response
    else:
        robo1_response = robo1_response + sent_tokens[idx]
        return robo1_response

In [None]:
# Defining Conversation Start/End Protocols

flag = True
print("Bot: My name is Akime. Let's have a conversation! Also, if you want to exit any time, just type Bye!")
while(flag==True): 
    user_response = input("User: ")
    user_reponse = user_response.lower()
    if (user_response!='bye'):
        if(user_response == 'thanks' or user_response == 'thank you'): 
            flag = False
            print("Akime: You are welcome...")
        else:
                if(greet(user_response)!=None):
                    print("Akime: "+ greet(user_response))
                else:
                    sent_tokens.append(user_response)
                    word_tokens=word_tokens+nltk.word_tokenize(user_response)
                    final_words=list(set(word_tokens))
                    print("Akime: ", end="")
                    print(response(user_response))
                    sent_tokens.remove(user_response)
    else:
            flag = False
            print("Akime: Goodbye! Take care <3")

Bot: My name is Akime. Let's have a conversation! Also, if you want to exit any time, just type Bye!
User: hi
Akime: hi there
User: i am bored
Akime: 



do you ever get bored
are you bored?
User: yesi am
Akime: I am sorry! I don't understand you
User: yes
Akime: yes it is.
User: what is it?
Akime: I am sorry! I don't understand you
User: tell me a joke
Akime: tell me a joke
o'm a not a comedy why don't you check out a joke?
