<a href="https://colab.research.google.com/github/Vishnu448/CodeAlpha/blob/main/Chatbox.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# Install nltk
!pip install nltk




In [14]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import random
import numpy as np

# Download NLTK data
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [17]:
# Initialize the lemmatizer
lemmatizer = WordNetLemmatizer()

# Define stop words
stop_words = set(stopwords.words('english'))

# Sample conversation data
conversation_data = [
    ("hello", "Hi there! How can I help you?"),
    ("hi", "Hello! How are you?"),
    ("how are you", "I'm doing well, thank you. How can I assist you today?"),
    ("what is your name", "I am a chatbot created by Vishnu. What can I do for you?"),
    ("what can you do", "I can chat with you and answer basic questions. How can I assist you?"),
    ("bye", "Goodbye! Have a great day!"),
    ("who created you", "I was created by Vishnu."),
    ("help", "How can I assist you? What do you need help with?")
]

# Preprocess text
def preprocess(text):
    # Tokenize text
    tokens = word_tokenize(text.lower())
    # Remove punctuation and stop words
    tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
    # Lemmatize tokens
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(tokens)

# Preprocess the conversation data
preprocessed_conversations = [(preprocess(question), response) for question, response in conversation_data]

# Create a list of preprocessed questions and corresponding responses
questions, responses = zip(*preprocessed_conversations)

# Initialize TF-IDF Vectorizer and fit it to the questions
vectorizer = TfidfVectorizer()
vectorizer.fit(questions)

def get_response(user_input):
    # Preprocess the user input
    processed_input = preprocess(user_input)
    # Transform the user input using the TF-IDF vectorizer
    input_vec = vectorizer.transform([processed_input])
    # Transform the questions using the TF-IDF vectorizer
    questions_vec = vectorizer.transform(questions)
    # Compute cosine similarity between the user input and the questions
    similarities = cosine_similarity(input_vec, questions_vec)
    # Get the index of the most similar question
    index = np.argmax(similarities)
    # Return the corresponding response
    return responses[index]

# Create a function to simulate interaction
def chatbot_colab():
    print("Chatbot: Hi! How can I help you? (Type 'bye' to exit)")
    while True:
        user_input = input("You: ").lower()
        if user_input == 'bye':
            print("Chatbot: Goodbye!")
            break
        response = get_response(user_input)
        print(f"Chatbot: {response}")

# Simulate interaction
if __name__ == "__main__":
    chatbot_colab()


Chatbot: Hi! How can I help you? (Type 'bye' to exit)
You: bye
Chatbot: Goodbye!
