<a href="https://colab.research.google.com/github/ankitvermaaa/Project-2-Customer-Support-Chatbot/blob/main/Project_2_Customer_Support_Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install nltk scikit-learn



In [3]:
import nltk
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string
import re


In [4]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')        # fixes lemmatizer error
nltk.download('punkt_tab')      # fixes tokenization error in Colab


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [5]:
data = {
    "Intent": ["Order Status", "Return Policy", "Product Info"],
    "Example Query": [
        "Where is my order #12345?",
        "How can I return a product?",
        "Does this phone support fast charging?"
    ],
    "Response": [
        "Your order is out for delivery. 🚚",
        "You can return products within 15 days via our online portal.",
        "Yes, this phone supports fast charging ⚡."
    ]
}

df = pd.DataFrame(data)

In [6]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess(text):
    text = text.lower()  # lowercase all words
    text = re.sub(r'#\d+', '', text)  # remove order numbers like #12345
    tokens = nltk.word_tokenize(text)
    tokens = [t for t in tokens if t not in string.punctuation]
    tokens = [lemmatizer.lemmatize(t) for t in tokens if t not in stop_words]
    return " ".join(tokens)

In [7]:
df['Processed_Query'] = df['Example Query'].apply(preprocess)

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['Processed_Query'])

In [8]:
def get_response(query):
    query_processed = preprocess(query)
    query_vec = vectorizer.transform([query_processed])
    similarity = cosine_similarity(query_vec, X)
    idx = similarity.argmax()
    return df.iloc[idx]['Response']

In [10]:
print("🤖 Customer Support Chatbot (type 'exit' to quit)")
while True:
    user_input = input("Customer: ")
    if user_input.lower() in ["exit", "quit", "bye"]:
        print("Bot: Thank you! Have a great day 😊")
        break
    print("Bot:", get_response(user_input))


🤖 Customer Support Chatbot (type 'exit' to quit)
Customer: exit
Bot: Thank you! Have a great day 😊
