In [None]:
#Imports
import string
import random
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import PunktSentenceTokenizer
import sklearn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from termcolor import colored

In [None]:
#NLTK packages download
nltk.download('punkt') 
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet') 
nltk.download('nps_chat')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package nps_chat to /root/nltk_data...
[nltk_data]   Unzipping corpora/nps_chat.zip.


True

In [40]:
#Global Constants
FILENAME           = "faq_dap_projekat"
GREETING_INPUTS    = ("hello", "hi", "hey")
GREETING_RESPONSES = ["hello", "hi", "hey", "hi there"]

In [41]:
#Global Variables
lem = nltk.stem.WordNetLemmatizer()
remove_punctuation = dict((ord(punct), None) for punct in string.punctuation)

question_file = open(FILENAME,'r',errors = 'ignore')
quesstion_file_text = question_file.read().lower()
sent_tokens = nltk.sent_tokenize(quesstion_file_text)# converts to list of sentences 
word_tokens = nltk.word_tokenize(quesstion_file_text)# converts to list of words
for i in sent_tokens:
  print(i + '++++\n')
question_answers_dictionary = {} #The Dictionary to store questions and corresponding answers
question_list = [] #List of all questions
sentence_counter = 0  #Sentence counter

q: what are your shipping options?++++

we ship with dhl and ups, which often complete delivery via usps.++++

q: do you offer expedited shipping?++++

we don’t offer expedited shipping at this time, but if you want to peer into the crystal ball to see when your order will arrive, email us at hello@magicspoon.com.++++

q: how long will it take for my order to arrive?++++

once you place your order, it’ll process within 1 business day.from there we’ll ship it out and send you your tracking number once it’s out the door.give your tracking number 24 hours to update once you receive it, and then wait patiently for your cereal game to completely change.++++

q: can i edit or cancel my order once it’s been placed?++++

hurry!there’s only a short window of time for us to edit or cancel your order.e-mail hello@magicspoon.com and we’ll do our best to work some magic for you.++++

q: how do i track my package?++++

you’ll receive a shipping confirmation with your tracking number as soon as your 

In [42]:
def fetch_features(chat):
    features = {}
    for word in nltk.word_tokenize(chat):
        features['contains({})'.format(word.lower())] = True
    return features

In [43]:
def lemmatise(tokens):
    return [lem.lemmatize(token) for token in tokens]

In [44]:
def tokenise(text):
    return lemmatise(nltk.word_tokenize(text.lower().translate(remove_punctuation)))


In [45]:
def greet(sentence):
    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)

In [46]:
def match(user_response):
    response = ''
    question_list.append(user_response)
    tfidf = TfidfVectorizer(tokenizer=tokenise, stop_words='english').fit_transform(question_list)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if(req_tfidf == 0):
        response = response + "Sorry! I don't know the answer to this. Would you like to try again? Type By to exit"
        return response
    else:
        resp_ids = question_answers_dictionary[idx]
        resp_str = ''
        s_id = resp_ids[0]
        end = resp_ids[1]
        while s_id < end :
            resp_str = resp_str + " " + sent_tokens[s_id]
            s_id += 1
        response = response + resp_str
        return response

In [51]:
#Training the classifier
chats = nltk.corpus.nps_chat.xml_posts()[:10000]
featuresets = [(fetch_features(chat.text), chat.get('class')) for chat in chats]
size = int(len(featuresets) * 0.1)
train_set, test_set = featuresets[size:], featuresets[:size]
classifier = nltk.NaiveBayesClassifier.train(train_set)
print(nltk.classify.accuracy(classifier, test_set))

0.668


In [52]:
#Extract questions and answers
#Answer is all the content between 2 questions [assumption]
while sentence_counter < len(sent_tokens):
    result = classifier.classify(fetch_features(sent_tokens[sentence_counter]))
    if("question" in result.lower()):
        next_question_id = sentence_counter + 1
        next_question = classifier.classify(fetch_features(sent_tokens[next_question_id]))
        while(not("question" in next_question.lower()) and next_question_id < len(sent_tokens) - 1):
            next_question_id += 1
            next_question = classifier.classify(fetch_features(sent_tokens[next_question_id]))
        question_list.append(sent_tokens[sentence_counter])
        end = next_question_id
        if(next_question_id - sentence_counter > 5):
            end = sentence_counter + 5
        question_answers_dictionary.update({len(question_list) - 1:[sentence_counter + 1, end]})
        sentence_counter = next_question_id
    else:
        sentence_counter += 1

In [53]:
#Response Fetching
flag=True
print(colored("BOT:\nI am Bot, Chat Bot. I have all the answers If you want to exit, type By",'blue',attrs=['bold']))
while(flag==True):
    print(colored("\nYOU:",'red',attrs=['bold']))
    u_input = input()
    u_input = u_input.lower()
    if(u_input != 'by'):
        print(colored("\nBOT:",'blue',attrs=['bold']))
        if(greet(u_input)!=None):
            print(greet(u_input))
        else:
            print(colored(match(u_input).strip().capitalize(),'blue'))
            question_list.remove(u_input)
    else:
        flag=False
        print(colored("\nBOT: Bye!",'blue', attrs=['bold']))

[1m[34mBOT:
I am Bot, Chat Bot. I have all the answers If you want to exit, type By[0m
[1m[31m
YOU:[0m
hi
[1m[34m
BOT:[0m
hi
[1m[31m
YOU:[0m
how long stay fresh?


  % sorted(inconsistent)


[1m[34m
BOT:[0m
[34mOur cereal has a 9 month shelf-life.[0m
[1m[31m
YOU:[0m
do kids eat?


  % sorted(inconsistent)


[1m[34m
BOT:[0m
[34mMagic spoon is delicious with everything from whole milk to oat milk.our favorite is almond milk but you should choose your own adventure.if you want to get really creative, try it as a topping for yogurt or even healthy ice cream.[0m
[1m[31m
YOU:[0m
do kids like magis spoon?


  % sorted(inconsistent)


[1m[34m
BOT:[0m
[34mAbsolutely!and since it tastes just like their favorite “unhealthy” cereal, they’ll love eating this instead of other “healthy” cereals.[0m
[1m[31m
YOU:[0m
why is this more expensive than regular cereals?


  % sorted(inconsistent)


[1m[34m
BOT:[0m
[34mEven though magic spoon looks and tastes like most cereals you’ll find at the grocery store, nutritionally, it’s lightyears ahead.think of magic spoon as more of a high-end protein bar or keto smoothie.and at $1.95 per bowl, it’s far cheaper than those other healthy breakfast options (not to mention your morning coffee)!making cereal with high nutritional value means working with more expensive ingredients than just sugar, corn, and wheat, but it’s all part of our commitment to bring you the best![0m
[1m[31m
YOU:[0m
how long it takes to arrive?


  % sorted(inconsistent)


[1m[34m
BOT:[0m
[34mOur cereal has a 9 month shelf-life.[0m
[1m[31m
YOU:[0m
when it arrives?


  % sorted(inconsistent)


[1m[34m
BOT:[0m
[34mSorry! i don't know the answer to this. would you like to try again? type by to exit[0m
[1m[31m
YOU:[0m
how can i track package


  % sorted(inconsistent)


[1m[34m
BOT:[0m
[34mYou’ll receive a shipping confirmation with your tracking number as soon as your order ships out.give that tracking number 24 hours to update once you receive it, and then you can follow your order on it’s journey to you.[0m
[1m[31m
YOU:[0m
can i get samples?


  % sorted(inconsistent)


[1m[34m
BOT:[0m
[34mWe aren’t able to send out any samples at the moment!however, feel free to order your first 4 pack case and give us a try!if you don’t love it, we’ll refund your first case in full.[0m
[1m[31m
YOU:[0m
by
[1m[34m
BOT: Bye![0m
