In [1]:
!pip -q uninstall -y langchain langchain-community langchain-openai langchain-groq chromadb sentence-transformers numpy scipy scikit-learn > /dev/null

!pip -q install \
  "numpy==1.26.4" "scipy==1.11.4" "scikit-learn==1.3.2" "sentence-transformers==2.7.0" \
  "langchain==0.2.16" "langchain-community==0.2.16" "langchain-groq==0.1.6" \
  "chromadb==0.5.5" "pypdf==4.3.1" \
  "langchain-huggingface==0.0.3"

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.8/35.8 MB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m75.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
mapclassify 2.10.0 requires scikit-learn>=1.4, but you have scikit-learn 1.3.2 which is incompatible.
mapclassify 2.10.0 requires scipy>=1.12, but you have scipy 1.11.4 which is incompatible.
opencv-python 4.13.0.92 requires numpy>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
pytensor 2.38.0 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.
tobler 0.13.0 requires numpy>=2.0, but you have numpy 1.26.4 

## Install + upload + key

In [2]:

import os, shutil
from getpass import getpass
from google.colab import files

# Modern LangChain Import Paths
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter # Use underscore
from langchain_community.vectorstores import Chroma

from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

print("Please upload your PDF file:")
uploaded = files.upload()
file_name = list(uploaded.keys())[0]

os.environ["GROQ_API_KEY"] = getpass("Enter your Groq API key: ")

PERSIST_DIR = "./chroma_db"
if os.path.exists(PERSIST_DIR):
    shutil.rmtree(PERSIST_DIR)

Please upload your PDF file:


Saving ver 6.0.pdf to ver 6.0 (1).pdf
Enter your Groq API key: ··········


In [5]:
# 1) Load PDF
loader = PyPDFLoader(file_name)
docs = loader.load()

# 2) Chunking
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=120)
chunks = splitter.split_documents(docs)
print("Total chunks:", len(chunks))

# 3) Local Embeddings (HF)
# Good default: small, fast, decent quality
hf_emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 4) Vector DB (local)
vector_db = Chroma.from_documents(chunks, hf_emb, persist_directory=PERSIST_DIR)

# 5) Anti-hallucination prompt
prompt_template = """You must answer ONLY using the provided context.
If the answer is not contained in the context, say: "I don't know based on the provided documents."

Context:
{context}

Question: {question}
Answer:"""
QA_PROMPT = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

# 6) Groq LLM (fast)
llm = ChatGroq(model="llama-3.1-8b-instant", temperature=0)

# 7) RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_db.as_retriever(search_type="mmr",search_kwargs={"k": 6,"fetch_k": 20, "lambda_mult": 0.6}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_PROMPT},
)

# 8) Test questions
questions = [
    "Why is explainability critical in clinical decision support (CDS)?",
    "What challenges or obstacles does the document describe for implementing explainable CDS in real clinical settings?",
    "Does this document mention Starbucks rewards? (Negative test)"
]

for q in questions:
    print("\n" + "="*80)
    print("QUERY:", q)
    resp = qa_chain.invoke({"query": q})
    print("\nANSWER:\n", resp["result"])
    print("\nSOURCES:")
    for i, d in enumerate(resp["source_documents"], 1):
        page = d.metadata.get("page", "N/A")
        snippet = d.page_content.replace("\n"," ")[:160]
        print(f"  [{i}] Page {page}: {snippet}...")

Total chunks: 24


ERROR:chromadb.telemetry.product.posthog:Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
ERROR:chromadb.telemetry.product.posthog:Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given



QUERY: Why is explainability critical in clinical decision support (CDS)?

ANSWER:
 Explainable AI (XAI) promises two linked benefits: (1) greater trust by clinicians and patients and (2) smoother implementation into workflows governed by regulation, liability, and professional norms.

SOURCES:
  [1] Page 3: ExplanationOfDecision  profile, enabling machine -readable provenance that  dovetails with audit -trail requirements.   Third, p ayers stand to gain: if explain...
  [2] Page 4: 6  Recommendations (2025 -2030)   Table  2 summari zes near-term actions for the four stakeholder groups most able to  accelerate explainable  CDS adoption. The...
  [3] Page 0: accompany an automated recommendation with a human -understandable rationale.  Applied to clinical -decision -support  (CDS) systems, XAI promises two linked be...
  [4] Page 4: Educators  Integrate XAI literacy into clinical -reasoning curricula;  offer micro -credentials.   7  Conclusion   Explainable  CDS is not a single product