In [None]:
pip install gradio


Collecting gradio
  Downloading gradio-5.23.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [None]:
!pip install faiss-cpu


Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m36.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [None]:
import os
import faiss
import numpy as np
import pandas as pd
import torch
import gradio as gr
from sentence_transformers import SentenceTransformer
from transformers import AutoModelForCausalLM, AutoTokenizer
from fastapi import FastAPI
from pydantic import BaseModel

# Set Hugging Face Token
HF_TOKEN = "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" # Replace with your actual token.  Never commit your real token!
os.environ["HF_TOKEN"] = HF_TOKEN

# Load FAISS index
try:
    faiss_index = faiss.read_index("medical_faiss.index")
except Exception as e:
    print(f"Error loading FAISS index: {e}")
    faiss_index = None # or handle the error as appropriate (e.g., create a dummy index)

# Load answers dataset
try:
    answers_df = pd.read_csv("medical_answers.csv")
except FileNotFoundError:
    print("Error: medical_answers.csv not found.")
    answers_df = None # Handle appropriately: exit, create an empty dataframe, etc.
except Exception as e:
    print(f"Error reading medical_answers.csv: {e}")
    answers_df = None # Handle appropriately.

# Load model for query encoding
try:
    encoder_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
except Exception as e:
    print(f"Error loading sentence transformer model: {e}")
    encoder_model = None # Handle the error: exit, use a different model, etc.


# Load TinyLLaMA for out-of-dataset queries
tiny_model_name = "TinyLLaMA/TinyLLaMA-1.1B-Chat"
device = "cuda" if torch.cuda.is_available() else "cpu"

try:
    tiny_model = AutoModelForCausalLM.from_pretrained(
        tiny_model_name, token=HF_TOKEN
    ).to(device)
    tiny_tokenizer = AutoTokenizer.from_pretrained(
        tiny_model_name, token=HF_TOKEN
    )
except Exception as e:
    print(f"Error loading TinyLLaMA model: {e}")
    tiny_model = None
    tiny_tokenizer = None


# Function to retrieve answer
def retrieve_answer(query):
    if encoder_model is None or faiss_index is None or answers_df is None:
        raise ValueError("Required models/dataframes not loaded.")

    query_embedding = encoder_model.encode([query], convert_to_numpy=True)
    D, I = faiss_index.search(query_embedding, 1)  # D is distances, I is indices

    if len(I) == 0 or len(I[0]) == 0 or I[0][0] < 0 or I[0][0] >= len(answers_df):
      raise ValueError("No suitable answer found in FAISS index.")


    retrieved_answer = answers_df.iloc[I[0][0]]["answer"]
    return retrieved_answer

# Function to generate answer if not found in FAISS
def generate_answer(query):
    if tiny_model is None or tiny_tokenizer is None:
        raise ValueError("TinyLLaMA model and tokenizer not loaded.")

    inputs = tiny_tokenizer(query, return_tensors="pt").to(device)
    try:
        outputs = tiny_model.generate(**inputs, max_new_tokens=100)
        response = tiny_tokenizer.decode(outputs[0], skip_special_tokens=True)
        return response
    except Exception as e:
        print(f"Error during TinyLLaMA generation: {e}")
        return "I encountered an error while generating an answer." # Provide a useful message.


# FastAPI Backend
app = FastAPI()

class QueryRequest(BaseModel):
    query: str

@app.post("/chat")
async def chat(request: QueryRequest):
    query = request.query
    try:
        answer = retrieve_answer(query)
        return {"response": answer}
    except ValueError as e:
        print(f"Error during retrieval: {e}")  # Log the error
        generated_response = generate_answer(query)
        return {"response": generated_response}
    except Exception as e:
        print(f"Unexpected error: {e}") # log unexpected errors
        return {"response": "An unexpected error occurred."}

# Gradio Interface
def chatbot_interface(user_input):
    try:
        return retrieve_answer(user_input)
    except ValueError as e:
        print(f"Error during retrieval in Gradio: {e}") # log errors
        return generate_answer(user_input)
    except Exception as e:
        print(f"Unexpected error in Gradio: {e}")
        return "An unexpected error occurred."

iface = gr.Interface(
    fn=chatbot_interface,
    inputs=gr.Textbox(placeholder="Ask a medical question..."),
    outputs="text",
    title="Medical Chatbot",
    description="Ask medical questions. Retrieves answers from database or generates using TinyLLaMA."
)

# Run Gradio App
if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)

Error loading TinyLLaMA model: TinyLLaMA/TinyLLaMA-1.1B-Chat is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://70ec28b852d7ed35b6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
