In [1]:
pip install nltk scikit-learn streamlit

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [1]:
import nltk  # For natural language processing
import random  # To randomly select responses
import os  # For file handling (if needed)
import ssl  # To handle SSL issues in nltk downloads
import streamlit as st  # For web-based chatbot UI
from sklearn.feature_extraction.text import TfidfVectorizer  # Corrected typo
from sklearn.linear_model import LogisticRegression  # For text classification

In [2]:
ssl._create_default_https_context = ssl._create_unverified_context
nltk.data.path.append(os.path.abspath('nltk_data'))
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\91852\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
intents = [
    {
        "tag": "greeting",
        "patterns": ["Hi", "Hello", "Hey", "What's up?", "How are you?"],
        "responses": ["Hi there!", "Hello!", "Hey!", "Not much, you?", "I'm doing well, thanks!"]
    },
    {
        "tag": "goodbye",
        "patterns": ["Bye", "See you", "Goodbye", "Take care"],
        "responses": ["Goodbye!", "See you soon!", "Take care!", "Have a great day!"]
    },
    {
        "tag": "thanks",
        "patterns": ["Thank you", "Thanks", "I appreciate it", "Thanks a lot"],
        "responses": ["You're welcome!", "No problem!", "Glad to help!", "Anytime!"]
    },
    {
        "tag": "about",
        "patterns": ["Who are you?", "What can you do?", "What are you?", "Tell me about yourself"],
        "responses": ["I'm a simple chatbot here to assist you!", "I'm an AI bot designed to help.", "I can answer your questions and chat with you!"]
    },
    {
        "tag": "help",
        "patterns": ["Help", "I need help", "Can you help me?", "What should I do?"],
        "responses": ["Sure! What do you need help with?", "I'm here to assist. Tell me your issue.", "Let me know how I can help!"]
    },
    {
        "tag": "age",
        "patterns": ["How old are you?", "What’s your age?"],
        "responses": ["I don't have an age, I'm just a bot!", "I was created recently in the digital world!", "Age doesn’t apply to chatbots!"]
    },
    {
        "tag": "hobbies",
        "patterns": ["What are your hobbies?", "Do you have a hobby?", "What do you like to do?"],
        "responses": ["I love chatting with people like you!", "Helping users is my favorite thing to do.", "I don't have hobbies, but I can talk about yours!"]
    },
    {
        "tag": "fun_fact",
        "patterns": ["Tell me a fun fact", "Give me an interesting fact", "Say something cool"],
        "responses": ["Did you know honey never spoils?", "Bananas are berries, but strawberries aren’t!", "Octopuses have three hearts!"]
    },
    {
        "tag": "favorite",
        "patterns": ["What's your favorite color?", "Do you have a favorite movie?", "What's your favorite food?"],
        "responses": ["I like all colors equally!", "I don’t watch movies, but I think sci-fi sounds fun!", "I don’t eat, but I hear pizza is popular!"]
    }
]

In [6]:
# Initializing TF-IDF Vectorizer (converts text to numbers) and the classifier model
vectorizer = TfidfVectorizer()  
clf = LogisticRegression(random_state=0, max_iter=10000)  # Model will classify user input into tags

# Preparing training data
tags = []  # Stores intent tags (like 'greeting', 'goodbye', etc.)
patterns = []  # Stores example user inputs

# Extracting training data from the intents dictionary
for intent in intents:
    for pattern in intent['patterns']:
        tags.append(intent['tag'])  # Storing intent category
        patterns.append(pattern)  # Storing the corresponding input example

# Converting text input into numerical format (feature extraction)
x = vectorizer.fit_transform(patterns)  # Converts text data into TF-IDF numerical values
y = tags  # Labels (intent categories)

# Training the model
clf.fit(x, y) 

In [7]:
def chatbot(input_text):
    input_text = vectorizer.transform([input_text])  # Convert user input into numerical form

    try:
        tag = clf.predict(input_text)[0]  # Predict the intent category
        for intent in intents:  # Search for the corresponding intent in our dataset
            if intent['tag'] == tag:
                return random.choice(intent['responses'])  # Pick a random response from available options
    except:
        return "Sorry, I don't understand that."  # Default reply if input is not recognized

In [8]:
print(chatbot("Hello"))

Not much, you?


In [9]:
print(chatbot("What are you?"))

I'm a simple chatbot here to assist you!


In [10]:
print(chatbot("Vanakkam"))

I'm doing well, thanks!


In [11]:
print(chatbot("nill"))

I'm doing well, thanks!


In [12]:
print(chatbot("undefined text pattern"))

Not much, you?


In [13]:
print(chatbot("Random text"))

Not much, you?
