In [134]:
import pandas as pd
import nltk
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from IPython.display import display, HTML


In [135]:
nlp = spacy.load("en_core_web_sm")
faq_data = pd.read_csv('/content/COVID19_FAQ.csv')

In [136]:
faq_data.head()


Unnamed: 0,questions,answers
0,1. How does COVID-19 spread?,People can catch COVID-19 from others who have...
1,2. What are the symptoms of COVID-19?,The most common symptoms of COVID-19 are fever...
2,3. How do I know if it is COVID-19 or just the...,A COVID-19 infection has the same signs and sy...
3,4. Can the virus that causes COVID-19 be trans...,Studies to date suggest that the virus that ca...
4,5. What can I do to protect myself and prevent...,Protection measures for everyone Stay aware ...


In [137]:
vectorizer = TfidfVectorizer()
faq_vectors = vectorizer.fit_transform(faq_data['questions'])

In [138]:
HTML("<link href='https://fonts.googleapis.com/css2?family=Louisa+Bold&display=swap' rel='stylesheet'>")

In [139]:
def display_response(response):
    # Display the response with HTML line breaks and styling
    display(HTML(f"<p style='font-family: \"Monaco\", monospace; font-size: 14px; color: green; line-height: 1.5;'>{response}</p>"))


In [140]:
def get_answer(query):

    query = preprocess_text(query)
    query_vector = vectorizer.transform([query])

    similarities = cosine_similarity(query_vector, faq_vectors).flatten()

    best_match_index = similarities.argmax()
    best_match_score = similarities[best_match_index]

    # Set a threshold to define when a match is good enough
    if best_match_score > 0.3:
      response = faq_data.iloc[best_match_index]['answers']
      html_response = display_response(response)
      return html_response
    else:
      return "I'm sorry, I don't have an answer for that question. Could you try rephrasing?"




In [141]:
#nltk

def keyword_matching(query, keywords):
    tokens = set(query.lower().split())
    return any(keyword in tokens for keyword in keywords)

def get_answer_with_rules(query):
    # Rule-based responses
    if keyword_matching(query, ["hello", "hi", "hey"]):
        return "Hello! How can I help you?"
    elif keyword_matching(query, ["bye", "goodbye"]):
        return "Goodbye! Have a great day!"
    else:
        # Use FAQ matching as fallback
        return get_answer(query)





In [None]:
# Interactive cell to test the chatbot
while True:
    query = input("Ask a question (type 'exit' to quit): ")
    if query.lower() == 'exit':
        print("Goodbye!")
        break
    response = get_answer_with_rules(query)
    print("COVIDBot:", response)


Ask a question (type 'exit' to quit): Hi
COVIDBot: Hello! How can I help you?
Ask a question (type 'exit' to quit): Should I worry about COVID-19?


COVIDBot: None
Ask a question (type 'exit' to quit): Can I recover from COVID-19?


COVIDBot: None
