<a href="https://colab.research.google.com/github/ajay041100/chatbtot_project/blob/main/chatbot_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
import numpy as np
import nltk
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Download necessary NLTK data
nltk.download('punkt')

# Define chatbot knowledge base (questions & answers)
qa_pairs = {
    # Greetings
    "hello": "Hello! How can I assist you today?",
    "hi": "Hi there! What can I help you with?",
    "how are you": "I'm just a chatbot, but I'm here to assist you!",
    "good morning": "Good morning! How's your day going?",
    "good night": "Good night! Take care!",
    "bye": "Goodbye! Have a great day! 😊",
    "thank you": "You're welcome! I'm always here to help. 😊",

    # Artificial Intelligence
    "what is ai": "Artificial Intelligence (AI) is the simulation of human intelligence in machines.",
    "what is machine learning": "Machine Learning is a subset of AI that enables systems to learn from data and improve over time.",
    "what is deep learning": "Deep Learning is a type of machine learning that uses neural networks with multiple layers to analyze data.",
    "what is reinforcement learning": "Reinforcement Learning is a type of AI where agents learn by interacting with the environment to maximize rewards.",
    "what are neural networks": "Neural Networks are computing systems inspired by the human brain, used for pattern recognition and AI applications.",

    # Programming & Python
    "what is python": "Python is a high-level, interpreted programming language known for its simplicity and versatility.",
    "who created python": "Python was created by Guido van Rossum and released in 1991.",
    "what is programming": "Programming is the process of writing code to instruct a computer to perform specific tasks.",
    "what is a variable": "A variable is a container for storing data values in programming.",
    "what is a function": "A function is a reusable block of code that performs a specific task.",
    "what is an algorithm": "An algorithm is a step-by-step set of instructions for solving a problem.",
    "what is a loop": "A loop allows a block of code to be executed repeatedly based on a condition.",
    "what is recursion": "Recursion is a programming technique where a function calls itself to solve a problem.",

    # Computer Science Concepts
    "what is data structure": "A data structure is a way of organizing and storing data efficiently.",
    "what is an array": "An array is a collection of elements stored at contiguous memory locations.",
    "what is a linked list": "A linked list is a linear data structure where each element points to the next.",
    "what is a stack": "A stack is a LIFO (Last In, First Out) data structure used for managing data.",
    "what is a queue": "A queue is a FIFO (First In, First Out) data structure used for handling tasks sequentially.",
    "what is a binary tree": "A binary tree is a tree data structure where each node has at most two children.",

    # Mathematics & Logical Reasoning
    "what is pi": "Pi (π) is a mathematical constant approximately equal to 3.14159, representing the ratio of a circle's circumference to its diameter.",
    "what is the pythagorean theorem": "The Pythagorean theorem states that in a right triangle, the square of the hypotenuse is equal to the sum of the squares of the other two sides.",
    "what is probability": "Probability is a branch of mathematics that deals with the likelihood of an event occurring.",

    # Fun & Random Facts
    "tell me a joke": "Why do programmers prefer dark mode? Because the light attracts bugs! 😂",
    "tell me a fun fact": "Did you know? The first computer bug was an actual moth found inside a Harvard Mark II computer in 1947!",
    "who is the father of computer science": "Alan Turing is considered the father of computer science for his work on computation and AI.",
    "what is the speed of light": "The speed of light is approximately 299,792,458 meters per second.",
    "who discovered gravity": "Sir Isaac Newton formulated the laws of gravity in the 17th century."
}

# Prepare corpus and responses
corpus = list(qa_pairs.keys())  # List of predefined questions
responses = list(qa_pairs.values())  # Corresponding responses

# Preprocessing function
def preprocess_text(text):
    text = text.lower().translate(str.maketrans('', '', string.punctuation))  # Lowercase & remove punctuation
    return text

# Apply preprocessing to corpus
preprocessed_corpus = [preprocess_text(q) for q in corpus]

# Vectorize corpus
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(preprocessed_corpus)

# Chatbot response function
def chatbot_response(user_input):
    user_input = preprocess_text(user_input)  # Preprocess user input
    user_tfidf = vectorizer.transform([user_input])  # Convert to vector

    # Compute cosine similarity
    cosine_similarities = cosine_similarity(user_tfidf, tfidf_matrix)
    similarity_score = cosine_similarities.flatten()

    # Find the most relevant question
    response_index = similarity_score.argmax()

    # Check if the similarity is above a threshold
    if similarity_score[response_index] > 0.3:  # Adjusted threshold for better accuracy
        return responses[response_index]
    else:
        return "I'm sorry, I don't understand. Can you rephrase your question?"

# Start chatbot
print("Chatbot: Hello! Ask me anything about AI, Python, or general topics. Type 'exit' to end the chat.")

while True:
    user_input = input("You: ")
    if user_input.lower() == 'exit':
        print("Chatbot: Goodbye! Have a great day! 😊")
        break
    print("Chatbot:", chatbot_response(user_input))


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Chatbot: Hello! Ask me anything about AI, Python, or general topics. Type 'exit' to end the chat.
You: how's the day?
Chatbot: The Pythagorean theorem states that in a right triangle, the square of the hypotenuse is equal to the sum of the squares of the other two sides.
You: how are you?
Chatbot: I'm just a chatbot, but I'm here to assist you!
You: 2+2
Chatbot: I'm sorry, I don't understand. Can you rephrase your question?
You: what is the sum of 2+2 ?
Chatbot: The speed of light is approximately 299,792,458 meters per second.
You: speed of sound?
Chatbot: The speed of light is approximately 299,792,458 meters per second.
You: what is array?
Chatbot: An array is a collection of elements stored at contiguous memory locations.
You: exit
Chatbot: Goodbye! Have a great day! 😊
