Importing the Libraries

In [1]:
import numpy as np
import nltk
import string
import random
#import pickle

Importing the corpus

In [5]:
f = open("chatbot.txt",'r',errors = 'ignore')
raw_doc = f.read()

#Pre Processing and Text Case Handling

raw_doc = raw_doc.lower()#converts everything inside the file into lower case
nltk.download('punkt')# we are going to use punkt tokenizer as it is pre trained for making chatbots.
nltk.download('wordnet')# we are going to use the wordnet dictionary
sent_tokens  = nltk.sent_tokenize(raw_doc)# converts doc into a list of sentences(each element in this list is a sentance)
word_tokens  = nltk.word_tokenize(raw_doc)# converts doc into a list of words(each element in this list is a word)

[nltk_data] Downloading package punkt to C:\Users\MR. DHRUV
[nltk_data]     VAIDH\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\MR. DHRUV
[nltk_data]     VAIDH\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Example of Sentence Token

In [6]:
sent_tokens[:2]

['data science is an interdisciplinary field that uses scientific methods, processes, algorithms and systems to extract knowledge and insights from structured and unstructured data,[1][2] and apply knowledge and actionable insights from data across a broad range of application domains.',
 'data science is related to data mining, machine learning and big data.']

Example of Word Token

In [7]:
word_tokens[:2]

['data', 'science']

Text Pre-Processing

In [8]:
# WordNet dictinary i already included in the nltk library
lemmer = nltk.stem.WordNetLemmatizer()
# WordNet is a semantically-oriented dictionary of English in NLTK
def LemTokens(tokens):
  return [lemmer.lemmatize(token) for token in tokens]
# We are removing the punctuations from the text here
remove_punct_dict = dict((ord(punct),None) for punct in string.punctuation)
def LemNormalize(text):
  return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

Greeting Function

In [9]:
# this list contains all the possible greetings a user can make to the bot
GREET_INPUTS = ["hello" , "hi" , "greetings" , "sup" , "what's up" , "hey"]
# this list contains some responses the bot can make after the user's greetings 
GREET_OUTPUTS = ["hi" , "hey" , "*nods*" , "hi there" , "hello" , "I'm glad! that you're talking to me"]
def greet(sentence):
# When the user enters a sentence, it is being split into words and each is being checked inside the GREET_INPUTS list
  for word in sentence.split():
    if word.lower() in GREET_INPUTS:
      return random.choice(GREET_OUTPUTS)

Response Generation

In [10]:
# We use Text Frequency and Inverse Document Frequency Vectorizer is used to find how many times a term is present in the corpus(Text Frequency)
# Inverse Document Frequency is used to find out how rare a term is in the corpus and it is converted into a vector form which is machine readable

from sklearn.feature_extraction.text import TfidfVectorizer

# After we obtain the the Bag of Words and their correcponding vectors, we use the cosine functionality to give a normalized output

from sklearn.metrics.pairwise import cosine_similarity


In [11]:
# We are converting the bag of words into vector form so that it is machine readable
# These vectors are converted into matrices which will be used to generate responses
def response(user_response):
  robo1_response = ''
  TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words = "english")
  tfidf = TfidfVec.fit_transform(sent_tokens)
  vals = cosine_similarity(tfidf[-1] , tfidf)
  idx = vals.argsort()[0][-2]
  flat = vals.flatten()
  flat.sort()
  req_tfidf = flat[-2]
  if (req_tfidf == 0):
    robo1_response = "I'm Sorry" + "/n" + "I don't understand you"
  else:
    robo1_response = robo1_response + sent_tokens[idx]
    return robo1_response

Conversation

In [None]:
flag = True
print("BOT: ",end="")
print("Welcome to Eduauraa."+"\n"+"Let's have a conversation."+"\n"+"If you want to exit, just type 'Bye'.")
#with open('contact', 'wb') as contacts_file
while (flag==True):
  class contact_details:
    def __init__(self):
        self.first_name = ""
        self.last_name = ""
        self.email_id =""
        self.phone_number = ""
        self.state = ""
        self.city = ""
        self.board = ""
        self.grade = ""
        self.account_type = ""
    def enter_data(self,a):
      self.first_name = input("First Name: ") 
      self.last_name = input("Last Name: ") 
      self.email_id = input("Email ID: ") 
      self.phone_number = input("Phone Number: ")
      self.state = input("State: ") 
      self.city = input("City: ") 
      self.board = input("Board: ")
      self.grade = input("Grade: ") 
      if (int(a)==2):
        self.account_type = 'trial'
      elif (int(a)==3):
        self.account_type = 'member'

        
  user_response = input()
  user_response = user_response.lower()
  if (user_response != 'bye'):
    if (user_response =='thanks' or user_response =='thank you'):
      flag = False
      print("BOT: You are welcome.......")
    else:
      if (greet(user_response) != None):
        print("BOT: "+greet(user_response))
        print("BOT: ",end="\n")
        print("Press 1 to know about the Courses being offered")
        print("Press 2 to Book a Free Trial")
        print("Press 3 for Creating a New Account")
        print("Press 4 for Customer Care Details ")
        print(" Type 'bye' to exit this conversation")
      if user_response == '1':
          print("BOT: ",end="")
          print("Courses")
      elif user_response == '2':
          print("BOT: ",end="")
          print("Enter Contact Details for Booking a Free Trial")
          user = contact_details()
          user.enter_data(user_response)
          #pickle.dump(user,contacts)

      elif user_response == '3':
          print("BOT: ",end="")
          print("Enter Contact Details for Creating a New Account")
          user = contact_details()
          user.enter_data(user_response)
          #pickle.dump(user,contacts)

      elif user_response == '4':
          print("BOT: ",end="")
          print("Customer Care Contact Details")

  else:
    flag = False
    print("BOT: Goodbye!!......")

BOT: Welcome to Eduauraa.
Let's have a conversation.
If you want to exit, just type 'Bye'.
hey
BOT: I'm glad! that you're talking to me
BOT: 
Press 1 to know about the Courses being offered
Press 2 to Book a Free Trial
Press 3 for Creating a New Account
Press 4 for Customer Care Details 
 Type 'bye' to exit this conversation
1
BOT: Courses
