In [None]:
import string
import random
import numpy as np
import nltk
import spacy

In [None]:
f = open('chatbot.txt', 'r', errors='ignore')
raw_doc = f.read()
raw_doc = raw_doc.lower()
nltk.download('wordnet')
nltk.download('punkt')
sent_tokens = nltk.sent_tokenize(raw_doc)
word_tokens = nltk.word_tokenize(raw_doc)

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

In [None]:
GREET_INPUTS = ("hello", "hi", "greetings", "sup", "hey",)
GREET_RESPONSES = ["hi", "hey", "hi there", "hello", "I am glad! You are talking to me"]

def greet(sentence):
    for word in sentence.split():
        if word.lower() in GREET_INPUTS:
            return random.choice(GREET_RESPONSES)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
def response(user_response):
    robo1_response = ''
    TfidVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidVec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)  # Compare the last user_response with other responses
    idx = vals.argsort()[0][-2]  # Get the index of the most similar response
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if req_tfidf == 0:
        robo1_response = "I am sorry! I don't understand you."
    else:
        robo1_response = sent_tokens[idx]  # Use the most similar response to the user's input
    return robo1_response

In [None]:
nlp = spacy.load("en_core_web_sm")

def entity_recognition(text):
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

def handle_ambiguity(user_response):
    ambiguous_keywords = ["book", "reservation", "order"]

    for keyword in ambiguous_keywords:
        if keyword in user_response:
            return "Can you please clarify? Are you referring to booking a reservation or placing an order?"

    # No ambiguity found, return the regular response
    return response(user_response)

In [None]:

flag = True
print("BOT: My name is UDIT. Let's have a conversation! Also, If you want to exit any time, just type Bye!")
while flag:
    user_response = input("You: ")  # Prompt user for input
    user_response = user_response.lower()
    if user_response != 'bye':
        if user_response == 'thanks' or user_response == 'thank you':
            flag = False
            print("BOT: You are welcome.")
        else:
            if greet(user_response) is not None:
                print("BOT: " + greet(user_response))
            else:
                print("BOT: " + handle_ambiguity(user_response))  # Use handle_ambiguity to handle ambiguous queries
    else:
        flag = False
        print("BOT: Goodbye! Take care <3")
    sent_tokens.append(user_response)  # Add the user response to sent_tokens after each iteration

BOT: My name is UDIT. Let's have a conversation! Also, If you want to exit any time, just type Bye!
You: hi
BOT: hi there
You: order food
BOT: Can you please clarify? Are you referring to booking a reservation or placing an order?
You: booking a reservation
BOT: Can you please clarify? Are you referring to booking a reservation or placing an order?
You: bye
BOT: Goodbye! Take care <3


In [17]:
from sklearn.metrics import precision_score, recall_score, f1_score

ground_truth_labels = [1, 0, 1, 1, 1]
predicted_labels = [1, 1, 0, 1, 1]

precision = precision_score(ground_truth_labels, predicted_labels)
recall = recall_score(ground_truth_labels, predicted_labels)
f1 = f1_score(ground_truth_labels, predicted_labels)

print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)

Precision: 0.75
Recall: 0.75
F1-Score: 0.75
