### **Q8**

In [18]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

# Dataset
dataset = {
    'What is the capital of France?': 'The capital of France is Paris.',
    'Who painted the Mona Lisa?': 'The Mona Lisa was painted by Leonardo da Vinci.',
    # Add more questions and answers to the dataset
}

# Set up NLTK
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

# Preprocess text
def preprocess_text(text):
    # Tokenize text
    tokens = word_tokenize(text)

    # Remove stopwords and non-alphabetic characters, and lemmatize the words
    words = [lemmatizer.lemmatize(word.lower()) for word in tokens if word.isalpha() and word.lower() not in stop_words]

    return words

# Calculate similarity between two texts using Jaccard similarity
def calculate_similarity(text1, text2):
    set1 = set(text1)
    set2 = set(text2)
    intersection = set1.intersection(set2)
    union = set1.union(set2)
    similarity = len(intersection) / len(union)
    return similarity

# Find the best matching question from the dataset
def find_matching_question(user_input):
    preprocessed_input = preprocess_text(user_input)
    best_question = None
    best_similarity = 0

    for question in dataset.keys():
        preprocessed_question = preprocess_text(question)
        similarity = calculate_similarity(preprocessed_input, preprocessed_question)

        if similarity > best_similarity:
            best_similarity = similarity
            best_question = question

    return best_question

# Build knowledge bot
def build_knowledge_bot():
    print("Knowledge Bot: Hello! I am a knowledge bot. Ask me anything!")
    while True:
        user_input = input("User: ")
        if user_input.lower() == 'exit':
            break

        # Find the best matching question from the dataset
        matching_question = find_matching_question(user_input)

        if matching_question:
            answer = dataset[matching_question]
            print("Knowledge Bot:", answer)
        else:
            print("Knowledge Bot: I'm sorry, but I don't have the answer to that question.")

# Main program
if __name__ == '__main__':
    # Build knowledge bot
    build_knowledge_bot()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Knowledge Bot: Hello! I am a knowledge bot. Ask me anything!
User: France capital
Knowledge Bot: The capital of France is Paris.
User: Mona
Knowledge Bot: The Mona Lisa was painted by Leonardo da Vinci.
User: exit
