### Step 1: Preprocess Data and Build the Vector Database using FAISS


In [1]:
%cd "/Users/rebeccaglick/Desktop/pubmedqa/data"

import json
from langchain.schema import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Load data
with open('ori_pqal.json', 'r') as f:
    data = json.load(f)

# Get examples from JSON file
items = list(data.items())
# Convert desired number of items in LangChain document - to start, I am using the first 50
# Each entry has contexts, long_answer, and ground truth final_decision
documents = []
for pmid, entry in list(data.items())[:1000]:
    context = " ".join(entry["CONTEXTS"])
    long_answer = entry["LONG_ANSWER"]
    full_text = f"Context: {context}\n\nConclusion: {long_answer}"
    documents.append(Document(page_content=full_text, metadata={"pmid": pmid}))

# Embed using local model 
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Store in FAISS vector database
vectorstore = FAISS.from_documents(documents, embedding_model)

# Save for later
vectorstore.save_local("faiss_index_1000_entries")

/Users/rebeccaglick/Desktop/pubmedqa/data


  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


### 2. Load the Vector Database and Create Retrieval QA Chain

In [2]:
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOllama
from langchain.embeddings import HuggingFaceEmbeddings

# 1. Reload the FAISS vector store
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.load_local("faiss_index_1000_entries", embedding_model, allow_dangerous_deserialization=True)

# 2. Create retriever and specify how many documents you want it to pull as context
retriever = vectorstore.as_retriever(search_kwargs={"k":2})

# 3. Connect to Ollama running LLaMA 3.2
llm = ChatOllama(model="llama3.2")

# 4. Prompt function
def create_query_to_answer_using_abstracts(question, abstract):
    return f"""
    Using this information {abstract}

    Answer the following quesiton with yes, no, or maybe. Do not elaborate. Only answer the question with one word. The answer must be yes, no, or maybe: {question} 
    """

# 5. Answer generator
def generate_answers_using_abstracts(question):
    # Step 1: Retrieve top-k documents from FAISS
    retrieved_docs = retriever.get_relevant_documents(question)

    # Step 2: Build full_text from the retrieved documents (includes context + long_answer)
    full_text = "\n\n".join([doc.page_content for doc in retrieved_docs])

    # Step 3: Use function to form the prompt
    prompt = create_query_to_answer_using_abstracts(question, full_text)
    # Pass prompt to LLM
    response = llm.invoke(prompt)

    # Step 4: Return full output and extracted first word
    raw_output = response.content.strip()
    first_word = raw_output.split()[0].rstrip('.').lower()
    return first_word, raw_output



  llm = ChatOllama(model="llama3.2")


### 3. Evaluation

In [3]:
# 6. Loop over dataset items
correct_retrievals = 0
correct_predictions = 0
predictions = []

for pmid, entry in items:
    question = entry["QUESTION"]
    truth = entry["final_decision"]

    answer, full_response = generate_answers_using_abstracts(question)
    retrieved_docs = retriever.get_relevant_documents(question)
    retrieved_pmids = [doc.metadata.get("pmid", "N/A") for doc in retrieved_docs]
    predictions.append({
        "pmid": pmid,
        "question": question,
        "truth": truth,
        "answer": answer,
        "full_response": full_response
    })

    # Print results
    print("\n" + "-" * 60)
    print(f"PMID: {pmid}")
    print(f"Question: {question}")
    print(f"Ground Truth: {truth}")
    print(f"Full LLM Output: {full_response}")
    print(f"Answer Used for Evaluation: {answer}")
    print(f"Retrieved PMIDs: {retrieved_pmids}")

    # Count a successful retrieval if the correct pmid is among the retrieved
    if str(pmid) in retrieved_pmids:
        correct_retrievals += 1
        print("Pulled the correct abstract.")

    if answer.split()[0] == truth:
        print("Correct prediction has been made.")
        correct_predictions += 1

    print("-" * 60)

print("-" * 60)
print("-" * 60)
print("-" * 60)

# Summary of evaluation 
print("\n--- Retrieval Summary ---")
print(f"Correct document retrieved {correct_retrievals} out of {len(items)} times.")
print(f"Retrieval accuracy: {correct_retrievals / len(items):.2f}")

print("\n--- Evaluation Summary ---")
print(f"Total examples: {len(predictions)}")
print(f"Correct predictions: {correct_predictions}")
print(f"Model accuracy: {correct_predictions / len(predictions):.2f}")

# Extract just the predicted answers
predicted_labels = [entry["answer"] for entry in predictions]

# Extract predictions and ground truths
predicted_labels = [entry["answer"] for entry in predictions]
ground_truth_labels = [entry["truth"] for entry in predictions]

# Print comma-separated lists
print("\nGround Truth Labels:")
print(", ".join(ground_truth_labels))

print("\nPredicted Labels:")
print(", ".join(predicted_labels))


  retrieved_docs = retriever.get_relevant_documents(question)



------------------------------------------------------------
PMID: 21645374
Question: Do mitochondria play a role in remodelling lace plant leaves during programmed cell death?
Ground Truth: yes
Full LLM Output: Yes.
Answer Used for Evaluation: yes
Retrieved PMIDs: ['21645374', '18222909']
Pulled the correct abstract.
Correct prediction has been made.
------------------------------------------------------------

------------------------------------------------------------
PMID: 16418930
Question: Landolt C and snellen e acuity: differences in strabismus amblyopia?
Ground Truth: no
Full LLM Output: Yes.
Answer Used for Evaluation: yes
Retrieved PMIDs: ['16418930', '10966943']
Pulled the correct abstract.
------------------------------------------------------------

------------------------------------------------------------
PMID: 9488747
Question: Syncope during bathing in infants, a pediatric form of water-induced urticaria?
Ground Truth: yes
Full LLM Output: Yes
Answer Used for Eval