<a href="https://colab.research.google.com/github/VishalJha01/NLP-Project-2/blob/main/Customer_Support_Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install nltk scikit-learn pandas



In [2]:
import re
import nltk
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [3]:
nltk.download('punkt')
nltk.download('punkt_tab')     # fix for new NLTK versions
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [4]:
data = {
    'Intent': ['Order Status', 'Return Policy', 'Product Info'],
    'Example Query': [
        "Where is my order #12345?",
        "How can I return a product?",
        "Does this phone support fast charging?"
    ],
    'Response': [
        "Your order #12345 is out for delivery.",
        "You can return products within 15 days via our online portal.",
        "Yes, this phone supports fast charging."
    ]
}
chat_df = pd.DataFrame(data)


In [5]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess(text):
    tokens = nltk.word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word.isalnum() and word not in stop_words]
    return ' '.join(tokens)

chat_df['Processed_Query'] = chat_df['Example Query'].apply(preprocess)


In [6]:
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(chat_df['Processed_Query'])


In [7]:
def extract_order_number(text):
    match = re.search(r'#\d+', text)
    return match.group() if match else None


In [8]:
def chatbot_reply(user_input):
    order_number = extract_order_number(user_input)
    user_processed = preprocess(user_input)
    user_vec = vectorizer.transform([user_processed])
    sim = cosine_similarity(user_vec, tfidf_matrix)
    idx = sim.argmax()
    response = chat_df.iloc[idx]['Response']

    if order_number:
        response = response.replace("#12345", order_number)
    return response

In [None]:
print("üõçÔ∏è Customer Support Chatbot (type 'exit' to quit)")
while True:
    query = input("You: ")
    if query.lower() == 'exit':
        print("Bot: Thanks for chatting! üëã")
        break
    print("Bot:", chatbot_reply(query))

üõçÔ∏è Customer Support Chatbot (type 'exit' to quit)
