In [None]:
!pip install langchain langchain-groq faiss-cpu PyPDF2 python-docx langchain-community duckduckgo-search

In [25]:
import os
os.environ["GROQ_API_KEY"] = "YOUR_API_KEY"

In [26]:
import pandas as pd
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain.tools import DuckDuckGoSearchRun
from langchain.chains import LLMChain

In [27]:
from io import StringIO
try:
    from PyPDF2 import PdfReader
except ImportError:
    PdfReader = None
try:
    import docx
except ImportError:
    docx = None

In [28]:
def extract_text_from_file(path: str) -> str:
    """
    Given a file path, extract and return its text content.
    Supports .pdf, .docx/.doc, and plain text files.
    """
    ext = os.path.splitext(path)[1].lower()
    text = ""
    if ext == ".pdf" and PdfReader:
        reader = PdfReader(path)
        for page in reader.pages:
            text += page.extract_text() or ""
    elif ext in [".docx", ".doc"] and docx:
        document = docx.Document(path)
        for para in document.paragraphs:
            text += para.text + "\n"
    else:
        with open(path, 'r', encoding='utf‑8', errors='ignore') as f:
            text = f.read()
    return text


In [29]:
# Build FAISS VectorStore
def build_vectorstore(
    wiki_parquet_path: str,
    upload_paths: list[str],
    hf_model: str = "all-MiniLM-L6-v2"
) -> FAISS:

    # 1) Load wiki dataset
    df = pd.read_parquet(wiki_parquet_path)
    wiki_texts = df.apply(
        lambda row: f"{row['page_title']}\n\n{row['page_text']}", axis=1
    ).astype(str).tolist()

    user_texts = []
    for path in upload_paths:
        raw = extract_text_from_file(path)
        if raw:
            header = f"[USER_DOC: {os.path.basename(path)}]"
            user_texts.append(header + "\n" + raw)

    all_texts = wiki_texts + user_texts
    hf_emb = HuggingFaceEmbeddings(model_name=hf_model)
    return FAISS.from_texts(all_texts, hf_emb)


In [30]:
#Prompt Templates
cot_prompt = PromptTemplate.from_template(
    """
You are a helpful and factual medical expert. Use the context below to answer the user's question in depth,
with clear explanations, supported by factual information. Explain mechanisms, side effects, treatment options,
and what to do in case of overdose if applicable. Always be accurate, clear, and helpful.

Context:
{context}

Question: {question}

Let's think carefully, step by step.
"""
)

combine_prompt = PromptTemplate.from_template(
    """
Combine and synthesize the following two explanations into a clear, exhaustive, and helpful answer.
Provide depth, factual details, and helpful guidance.

Explanation 1:
{hop1}

Explanation 2:
{hop2}

Final Answer:
"""
)


In [31]:
# Websearch Setup
duck_search = DuckDuckGoSearchRun()
search_prompt = PromptTemplate.from_template(
    """
Below are the raw DuckDuckGo search results including titles, snippets, and URLs:
{search_results}

Using these results, format your response as follows:

Web Results:
1. Title - URL: snippet
2. Title - URL: snippet
...

Answer:
Provide a concise, factual summary, include dosage, mechanism, side effects if known, and add a brief disclaimer.
"""
)
search_chain = LLMChain(
    llm=ChatGroq(
        groq_api_key=os.environ.get("GROQ_API_KEY"),
        model_name="gemma2-9b-it",
        temperature=0.2
    ),
    prompt=search_prompt
)

In [32]:
def multi_hop_rag_with_fallback(
    vectorstore: FAISS,
    query: str,
    llm,
    k: int = 5,
    max_chars: int = 5000,
    similarity_threshold: float = 0.2
) -> str:
    def truncate(text: str, limit: int):
        return text[:limit] + "..." if len(text) > limit else text

    docs_scores = vectorstore.similarity_search_with_score(query, k=k)
    if not docs_scores or docs_scores[0][1] < similarity_threshold:
        raw_results = duck_search.run(query)
        result = search_chain.invoke({"search_results": raw_results, "query": query}).content
        print("\n🔍 Web Fallback Result:")
        print(result)
        return result

    docs1 = [d for d, _ in docs_scores]
    context1 = truncate("\n\n".join(d.page_content for d in docs1), max_chars)
    hop1 = llm.invoke(cot_prompt.format(context=context1, question=query))
    print("\n🔍 Hop 1 Reasoning:")

    print(hop1.content.strip())
    follow_up = (
        f"Based on this: {hop1.content.strip()}. "
        "What related medical process, structure, or effect supports this?"
    )
    docs2 = vectorstore.similarity_search(follow_up, k=k)
    context2 = truncate("\n\n".join(d.page_content for d in docs2), max_chars)
    hop2 = llm.invoke(cot_prompt.format(context=context2, question=follow_up))
    print("\n🔍 Hop 2 Reasoning:")
    print(hop2.content.strip())

    final_answer = llm.invoke(
        combine_prompt.format(hop1=hop1.content.strip(), hop2=hop2.content.strip())
    ).content.strip()
    print("\n✅ Final Answer:")
    print(final_answer)
    generic_triggers = ["please", "check", "spelling"]
    if any(trigger in final_answer.lower() for trigger in generic_triggers):
        raw_results = duck_search.run(query)
        try:
          result = search_chain.invoke({"search_results": raw_results, "query": query}).content
          print("\n🔍 Web Fallback Result:")
          print(result)
          return result
        except:
          print("")

    return final_answer

In [35]:
if __name__ == "__main__":
    wiki_path = "hf://datasets/gamino/wiki_medical_terms/wiki_medical_terms.parquet"
    upload_paths = []
    query = "What are best ways to deal with pneumonia?"

    vectorstore = build_vectorstore(wiki_path, upload_paths)
    llm = ChatGroq(
        groq_api_key=os.environ.get("GROQ_API_KEY"),
        model_name="gemma2-9b-it",
        temperature=0.2
    )
    answer = multi_hop_rag_with_fallback(vectorstore, query, llm)


🔍 Hop 1 Reasoning:
Pneumonia is a serious condition, so dealing with it effectively requires a multi-faceted approach. Here's a breakdown of the best ways to manage pneumonia:

**1.  Diagnosis is Key:**

*   **See a Doctor:**  Pneumonia requires medical attention. A doctor will assess your symptoms, perform a physical exam, and likely order tests like a chest X-ray, blood tests, and possibly a sputum culture to confirm the diagnosis and identify the cause (bacteria, virus, etc.).

**2. Treatment Tailored to the Cause:**

*   **Bacterial Pneumonia:**  Antibiotics are the primary treatment. Your doctor will prescribe the appropriate antibiotic based on the suspected bacteria and your individual health history. It's crucial to complete the full course of antibiotics even if you feel better before finishing them. This prevents the bacteria from becoming resistant to the medication.
*   **Viral Pneumonia:**  Antibiotics are ineffective against viruses. Treatment focuses on managing symptom