<a href="https://colab.research.google.com/github/himashi27/NLP-P2-Chatbot-for-Online-Shopping-/blob/main/NLP(P4).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install nltk scikit-learn pandas




In [2]:
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import nltk
import re
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [3]:
data = {
    "Intent": [
        "order_status",
        "return_policy",
        "product_info"
    ],
    "Example": [
        "Where is my order #12345?",
        "How can I return a product?",
        "Does this phone support fast charging?"
    ],
    "Response": [
        "Your order {order_id} is currently being processed.",
        "You can return a product within 15 days through our online return portal.",
        "Yes, this product supports fast charging."
    ]
}

df = pd.DataFrame(data)
df


Unnamed: 0,Intent,Example,Response
0,order_status,Where is my order #12345?,Your order {order_id} is currently being proce...
1,return_policy,How can I return a product?,You can return a product within 15 days throug...
2,product_info,Does this phone support fast charging?,"Yes, this product supports fast charging."


In [4]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words("english"))

def preprocess(text):
    tokens = nltk.word_tokenize(text.lower())
    clean_tokens = [
        lemmatizer.lemmatize(w)
        for w in tokens
        if w.isalnum() and w not in stop_words
    ]
    return " ".join(clean_tokens)


In [5]:
df["Processed"] = df["Example"].apply(preprocess)
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["Processed"])


In [6]:
def extract_order_id(text):
    match = re.search(r"#\d+", text)
    return match.group() if match else None


In [7]:
def get_response(user_query):

    # Step 1: Check for order ID
    order_id = extract_order_id(user_query)

    # Step 2: Preprocess user query
    processed_query = preprocess(user_query)
    query_vec = vectorizer.transform([processed_query])

    # Step 3: Similarity Matching
    similarities = cosine_similarity(query_vec, X).flatten()
    index = similarities.argmax()

    if similarities[index] < 0.25:
        return "Sorry, I couldn't understand your question."

    intent = df.iloc[index]["Intent"]
    response = df.iloc[index]["Response"]

    # Step 4: Fill Order ID if needed
    if intent == "order_status":
        if order_id:
            return response.replace("{order_id}", order_id)
        else:
            return "Please provide your order ID (e.g., #12345)."

    return response


In [8]:
print("ðŸ›’ Customer Support Chatbot Ready! Type 'exit' to quit.\n")

while True:
    user = input("You: ")

    if user.lower() == "exit":
        print("Bot: Thank you for using our service! ðŸ˜Š")
        break

    print("Bot:", get_response(user))


ðŸ›’ Customer Support Chatbot Ready! Type 'exit' to quit.

You: How can i return a product?
Bot: You can return a product within 15 days through our online return portal.
You: exit
Bot: Thank you for using our service! ðŸ˜Š
