In [24]:
import pandas as pd
from transformers import pipeline
from difflib import get_close_matches

pipe = pipeline("text2text-generation", model="google/flan-t5-large")



print("Loading knowledge base...")
try:
    faq_df = pd.read_csv("medquad.csv", engine="python")

    faq_dict = dict(zip(faq_df["question"], faq_df["answer"]))
    print(f"Knowledge base loaded with {len(faq_dict)} entries.")
except FileNotFoundError:
    print("Error: medquad.csv not found. Please ensure it's in the correct directory.")
    faq_dict = {}

def healthcare_chatbot(user_query):



    cleaned_query = user_query.strip().lower()

    questions = list(faq_dict.keys())
    match = get_close_matches(cleaned_query, questions, n=1, cutoff=0.6)

    if match and faq_dict:

        response = faq_dict[match[0]]
        source = "FAQ Knowledge Base"
    else:

        prompt = f"Question: {user_query}\nAnswer:"

        result = pipe(
            prompt,
            max_new_tokens=60,
            do_sample=True,
            temperature=0.7,
            top_p=0.95,
            repetition_penalty=1.2
        )

        response = result[0]['generated_text'].replace(prompt, "").strip()
        source = "BioGPT Model"


    return f"🤖 ({source}): {response}"





Device set to use cpu


Loading knowledge base...
Knowledge base loaded with 14984 entries.


In [25]:
print(healthcare_chatbot("What are the treatments for Alcohol Use and Older Adults?"))
print(healthcare_chatbot("How much water should I drink daily?"))

🤖 (FAQ Knowledge Base): Older people with alcohol problems respond to treatment as well as younger people. Some studies suggest that older adults do better when they are treated with other people the same age instead of mixed in with younger adults. Some communities have treatment programs and support groups specifically for older adults.
🤖 (BioGPT Model): 8 glasses of water a day


In [26]:
print(healthcare_chatbot("How offen should I exercise?"))

🤖 (BioGPT Model): 30 minutes a day


In [27]:
print(healthcare_chatbot("When should I take paracetamol?"))

🤖 (BioGPT Model): when you are feeling nauseous


In [28]:

import textwrap

print("Loading the FLAN-T5 model...")

pipe = pipeline("text2text-generation", model="google/flan-t5-large")


example_queries = [
    "How can I improve my daily health?",
    "What should I do if I'm feeling very stressed?",
    "Is a vegetarian diet healthy?",
    "How much should I exercise"
]


for i, user_query in enumerate(example_queries):
    print("\n" + "="*80)
    print(f"--- EXAMPLE {i+1} ---")
    print(f"User Question: \"{user_query}\"")
    print("="*80)


    prompt_v1 = user_query

    print("\n--- [V1] Testing Basic Prompt ---")
    print(f"Prompt: \"{prompt_v1}\"")

    try:
        result_v1 = pipe(prompt_v1, max_new_tokens=100, repetition_penalty=1.3,
                        do_sample=True, temperature=0.7)
        response_v1 = result_v1[0]['generated_text']
    except Exception as e:
        response_v1 = f"Error: {e}"

    print(f"Response V1: {textwrap.fill(response_v1, width=80)}")
    print("-" * 40)


    if "health" in user_query.lower():
        prompt_v2 = f"Give 3 general health tips for: {user_query}"
    elif "stress" in user_query.lower():
        prompt_v2 = f"List 3 ways to manage: {user_query.replace('What should I do if', 'when')}"
    elif "diet" in user_query.lower() and "healthy" in user_query.lower():
        prompt_v2 = f"Explain pros and cons: {user_query}"
    else:
        prompt_v2 = f"Provide helpful advice: {user_query}"

    print(f"\n--- [V2] Testing Improved Prompt ---")
    print(f"Prompt: \"{prompt_v2}\"")

    try:
        result_v2 = pipe(prompt_v2, max_new_tokens=100, repetition_penalty=1.3,
                        do_sample=True, temperature=0.7)
        response_v2 = result_v2[0]['generated_text']
    except Exception as e:
        response_v2 = f"Error: {e}"

    print(f"Response V2: {textwrap.fill(response_v2, width=80)}")
    print("-" * 40)

    prompt_v3 = f"Q: {user_query}\nA: Here are some helpful suggestions:"

    print(f"\n--- [V3] Testing Question-Answer Format ---")
    print(f"Prompt: \"{prompt_v3}\"")

    try:
        result_v3 = pipe(prompt_v3, max_new_tokens=100, repetition_penalty=1.3,
                        do_sample=True, temperature=0.7)
        response_v3 = result_v3[0]['generated_text']
    except Exception as e:
        response_v3 = f"Error: {e}"

    print(f"Response V3: {textwrap.fill(response_v3, width=80)}")
    print("-" * 40)





Loading the FLAN-T5 model...


Device set to use cpu



--- EXAMPLE 1 ---
User Question: "How can I improve my daily health?"

--- [V1] Testing Basic Prompt ---
Prompt: "How can I improve my daily health?"
Response V1: Eat more fruits and vegetables.
----------------------------------------

--- [V2] Testing Improved Prompt ---
Prompt: "Give 3 general health tips for: How can I improve my daily health?"
Response V2: Eat a balanced diet. Exercise at least 30 minutes a day.
----------------------------------------

--- [V3] Testing Question-Answer Format ---
Prompt: "Q: How can I improve my daily health?
A: Here are some helpful suggestions:"
Response V3: Eat more fruits and vegetables.
----------------------------------------

--- EXAMPLE 2 ---
User Question: "What should I do if I'm feeling very stressed?"

--- [V1] Testing Basic Prompt ---
Prompt: "What should I do if I'm feeling very stressed?"
Response V1: Take a deep breath and take a deep breath.
----------------------------------------

--- [V2] Testing Improved Prompt ---
Prompt: "L

## Inference Optimization

In [29]:

import gradio as gr
import pandas as pd
from difflib import get_close_matches
import os


print("Loading text generation model (google/flan-t5-large)...")

try:
    pipe = pipeline("text2text-generation", model="google/flan-t5-large")
    print("✅ Model loaded successfully.")
except Exception as e:
    print(f"🔥 Error loading model: {e}")
    pipe = None

print("Loading knowledge base from medquad.csv...")
faq_dict = {}
if os.path.exists("medquad.csv"):
    try:
        faq_df = pd.read_csv("medquad.csv", engine="python")

        faq_dict = dict(zip(faq_df["question"], faq_df["answer"]))
        print(f"✅ Knowledge base loaded with {len(faq_dict)} entries.")
    except Exception as e:
        print(f"🔥 Error loading or parsing medquad.csv: {e}")
else:
    print("⚠️ Warning: medquad.csv not found. The chatbot will rely solely on the AI model.")




def healthcare_chatbot(user_query):


    if pipe is None:
        return "Sorry, the AI model could not be loaded. Please check the terminal for errors."


    if not user_query or not user_query.strip():
        return "Please ask a question."

    cleaned_query = user_query.strip().lower()


    if faq_dict:
        questions = list(faq_dict.keys())

        match = get_close_matches(cleaned_query, questions, n=1, cutoff=0.6)

        if match:
            response = faq_dict[match[0]]
            source = "FAQ Knowledge Base"
            return f"✅ **Answer from {source}:**\n\n{response}"


    prompt = f"Question: {user_query}\nAnswer:"
    try:
        result = pipe(
            prompt,
            max_new_tokens=150,
            do_sample=True,
            temperature=0.7,
            top_p=0.95
        )
        response = result[0]['generated_text']
        source = "AI Model (FLAN-T5)"
        return f"🤖 **Answer from {source}:**\n\n{response}"
    except Exception as e:
        return f"Sorry, there was an error communicating with the AI model: {e}"




print("Creating Gradio interface...")

# We use a multi-line string with a bit of HTML for better formatting and emphasis.
interface_description = """
**Ask a health-related question.** This tool provides information by checking a local knowledge base (MedQuAD) and consulting the FLAN-T5 AI model if needed.
<br>
<p style='text-align: center; color: red; font-weight: bold;'>
Disclaimer: This is for educational purposes only and not a substitute for professional medical advice.
</p>
"""

iface = gr.Interface(
    fn=healthcare_chatbot,
    inputs=gr.Textbox(lines=4, label="Your Question", placeholder="e.g., What are the treatments for Alcohol Use and Older Adults?"),
    outputs=gr.Textbox(label="Chatbot Response"),
    title="🩺 Healthcare Information Assistant",
    description=interface_description,
    theme=gr.themes.Soft(),
    allow_flagging="never"
)

print("Launching Gradio app... Open the URL in your browser.")
iface.launch()

Loading text generation model (google/flan-t5-large)...


Device set to use cpu


✅ Model loaded successfully.
Loading knowledge base from medquad.csv...
✅ Knowledge base loaded with 14984 entries.
Creating Gradio interface...




Launching Gradio app... Open the URL in your browser.
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2b2f5a54b4cd850fb6.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


