<a href="https://colab.research.google.com/github/itsayushi0/CODTECH/blob/main/task3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

"""
Task-3: NLP Chatbot with spaCy & scikit-learn
---------------------------------------------
This notebook builds a simple FAQ chatbot using:
- spaCy for text preprocessing (lemmatization, stopword removal)
- TF-IDF + cosine similarity for question matching
- Rule-based replies for small talk (hello, thanks, bye)

Run cells in order, then use the chat loop at the bottom.

How to use:
1. Run dependency install (Cell 2)
2. Edit FAQs (Cell 3) to your needs
3. Run the chatbot logic (Cell 4)
4. Interact with the chatbot (Cell 5)

Author:Ayushi pal
"""


In [None]:
!pip install spacy scikit-learn
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m80.2 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
# questions
faqs = [
    {"question": "What is your name?", "answer": "I am a simple NLP chatbot."},
    {"question": "How do I submit internship tasks?", "answer": "Push your code to GitHub and follow the WhatsApp video guidance."},
    {"question": "Which NLP library do you use?", "answer": "I use spaCy for text processing."},
    {"question": "How can I contact support?", "answer": "Post in the WhatsApp group or email the coordinator."}
]


In [None]:
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Lemmatizer function
def spacy_lemmas(text):
    doc = nlp(text.lower())
    return " ".join(
        tok.lemma_ for tok in doc
        if not tok.is_stop and not tok.is_punct and not tok.like_num and tok.lemma_.strip()
    )

# Prepare data
questions = [item["question"] for item in faqs]
answers = [item["answer"] for item in faqs]

vectorizer = TfidfVectorizer(preprocessor=spacy_lemmas)
q_matrix = vectorizer.fit_transform(questions)

# Rule-based small talk
RULES = {
    ("hi", "hello", "hey"): "Hello! How can I help you today?",
    ("thanks", "thank you"): "You're welcome!",
    ("bye", "goodbye"): "Bye! Have a great day."
}

def rule_based_reply(user_text):
    t = user_text.lower().strip()
    for triggers, reply in RULES.items():
        if any(k in t for k in triggers):
            return reply
    return None

# Chatbot answer function
SIM_THRESHOLD = 0.35  # tune this if needed

def answer_query(user_text):
    rb = rule_based_reply(user_text)
    if rb:
        return rb

    u_vec = vectorizer.transform([user_text])
    sims = cosine_similarity(u_vec, q_matrix).flatten()
    best_idx = sims.argmax()
    best_score = sims[best_idx]

    if best_score >= SIM_THRESHOLD:
        return answers[best_idx]
    else:
        return "Sorry, I’m not sure about that yet."


In [None]:
print("NLP Chatbot ready. Type 'quit' to exit.")
while True:
    user = input("You: ")
    if user.lower() in {"quit", "exit"}:
        print("Bot: Goodbye!")
        break
    print("Bot:", answer_query(user))


NLP Chatbot ready. Type 'quit' to exit.
You: quit
Bot: Goodbye!


In [8]:
# Install required packages (quiet mode)
!pip install streamlit > /dev/null
!npm install -g localtunnel > /dev/null

# Save chatbot logic into a Python file
with open("chatbot_logic.py", "w") as f:
    f.write('''
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

faqs = [
    {"question": "What is your name?", "answer": "I am a simple NLP chatbot."},
    {"question": "How do I submit internship tasks?", "answer": "Push your code to GitHub and follow the WhatsApp video guidance."},
    {"question": "Which NLP library do you use?", "answer": "I use spaCy for text processing."},
    {"question": "How can I contact support?", "answer": "Post in the WhatsApp group or email the coordinator."}
]

nlp = spacy.load("en_core_web_sm")

def spacy_lemmas(text):
    doc = nlp(text.lower())
    return " ".join(
        tok.lemma_ for tok in doc
        if not tok.is_stop and not tok.is_punct and not tok.like_num and tok.lemma_.strip()
    )

questions = [item["question"] for item in faqs]
answers = [item["answer"] for item in faqs]

vectorizer = TfidfVectorizer(preprocessor=spacy_lemmas)
q_matrix = vectorizer.fit_transform(questions)

RULES = {
    ("hi", "hello", "hey"): "Hello! How can I help you today?",
    ("thanks", "thank you"): "You're welcome!",
    ("bye", "goodbye"): "Bye! Have a great day."
}

def rule_based_reply(user_text):
    t = user_text.lower().strip()
    for triggers, reply in RULES.items():
        if any(k in t for k in triggers):
            return reply
    return None

SIM_THRESHOLD = 0.35

def answer_query(user_text):
    rb = rule_based_reply(user_text)
    if rb:
        return rb

    u_vec = vectorizer.transform([user_text])
    sims = cosine_similarity(u_vec, q_matrix).flatten()
    best_idx = sims.argmax()
    best_score = sims[best_idx]

    if best_score >= SIM_THRESHOLD:
        return answers[best_idx]
    else:
        return "Sorry, I’m not sure about that yet."
''')

# Create a simple Streamlit app
with open("app.py", "w") as f:
    f.write('''
import streamlit as st
from chatbot_logic import answer_query

st.set_page_config(page_title="NLP Chatbot", page_icon="🤖")

st.title("💬 NLP Chatbot")
st.markdown("Ask me a question and I'll try my best to answer.")

user_input = st.text_input("You:", "")

if st.button("Send"):
    if user_input.strip():
        reply = answer_query(user_input)
        st.write(f"**Bot:** {reply}")
    else:
        st.warning("Please type something before sending.")
''')

# Run Streamlit and expose via localtunnel
!streamlit run app.py --server.port 8501 & npx localtunnel --port 8501


[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.9.217.194:8501[0m
[0m
[1G[0K⠹[1G[0Kyour url is: https://fresh-apes-smoke.loca.lt
[34m  Stopping...[0m
^C
