### Step 1:

In [1]:
%cd "/Users/rebeccaglick/Desktop/SPRING 2025/NLU/Project/knowledge_graph_creation/data/"
import json
from langchain.schema import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Load data
with open('diabetes_subset.json', 'r') as f:
    data = json.load(f)

# Convert desired number of items in LangChain document - to start, I am using the first 50
# Each entry has contexts, long_answer, and ground truth final_decision
documents = []
for pmid, entry in data.items():
    context = " ".join(entry["CONTEXTS"])
    long_answer = entry["LONG_ANSWER"]
    full_text = f"Context: {context}\n\nConclusion: {long_answer}"
    documents.append(Document(page_content=full_text, metadata={"pmid": pmid}))

# Create embedding model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Build FAISS vector store/database
vectorstore = FAISS.from_documents(documents, embedding_model)

# Save locally for later
vectorstore.save_local("faiss_index_diabetes_subset")

/Users/rebeccaglick/Desktop/SPRING 2025/NLU/Project/knowledge_graph_creation/data


  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


### Step 2:

In [2]:
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOllama
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# 1. Reload vector DB later without re-embedding
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.load_local("faiss_index_diabetes_subset", embedding_model, allow_dangerous_deserialization=True)

# 2. Create retriever to fetch the most relevant documents given question
# Documents retrieved are then passed as context to LLM to answer the question (here k=3 -> returns 3 most similar passages)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

# 3. Connect to your local LLaMA 3.2 model via Ollama
llm = ChatOllama(model="llama3.2")

# 4. Define custom prompt
custom_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""Use the following context to answer the question.
If the answer is not explicitly clear from the context, respond with "Maybe".

Context:
{context}

Question: {question}
Answer with only one word: Yes, No, or Maybe.
Answer:"""
)

# 5. Build RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    chain_type_kwargs={"prompt": custom_prompt}
)

  llm = ChatOllama(model="llama3.2")


### Step 3:

In [3]:
import string 

# Keep track of how many answers the model gets correct
correct = 0 
total = len(data.items())
predictions = []

# Loop through all of the questions (each entry contains the question and ground truth answer)
for pmid, entry in data.items():
    question = entry["QUESTION"] # what is passed to the LLM
    truth = entry["final_decision"].lower() # the actual answer (Y/N/M)

    retrieved_docs = retriever.get_relevant_documents(question)

    if not retrieved_docs:
        print(f"[WARN] No documents retrieved for PMID {pmid}: '{question}'")
    else:
        print(f"[INFO] Retrieved {len(retrieved_docs)} docs for PMID {pmid}")
        for i, doc in enumerate(retrieved_docs):
            print(f"\n-- Doc {i+1} (snippet) --\n{doc.page_content[:300]}...\n")

    try:
        response = qa_chain.invoke({"query": question}) # query the LLM using the QA chain defined above
        raw_output = response["result"]
        print(f"\n[LLM RAW OUTPUT for PMID {pmid}]:\n{raw_output}\n")
        answer = raw_output.strip().lower().strip(string.punctuation) # LLM response stored here
        
        if not answer:
            print(f"[WARN] Empty response from LLM for PMID {pmid}")
        if answer not in {"yes", "no", "maybe"}:
            print(f"[WARN] Unexpected answer from LLM: '{answer}' — defaulting to 'maybe'")

    except Exception as e:
        print(f"Error with PMID {pmid}: {e}")
        answer = "maybe"

    # Fallback check
    if answer not in {"yes", "no", "maybe"}:
        answer = "maybe"

    
    print(f"[QUESTION]: {question}")
    print(f"[GROUND TRUTH]: {truth}")
    print(f"[FINAL PREDICTION]: {answer}")

    if answer == truth: # compare prediction of model to ground truth 
        correct += 1

    predictions.append(answer)

# Print/calculate final accuracies 
#print(f"Correct answers: {correct} out of {total}")
#print(f"Accuracy: {correct / total:.2f}")

valid_answers = [a for a in predictions if a in {"yes", "no", "maybe"}]
print(f"\nCorrect answers: {correct} out of {total}")
print(f"Accuracy: {correct / total:.2f}")
print(f"Valid predictions: {len(valid_answers)}")
print(f"Skipped or invalid predictions: {total - len(valid_answers)}")


  retrieved_docs = retriever.get_relevant_documents(question)


[INFO] Retrieved 4 docs for PMID 15703931

-- Doc 1 (snippet) --
Context: Compared with computed tomography (CT) and magnetic resonance imaging (MRI), positron emission tomography (PET) may have additional value in the assessment of primary and recurrent cervical cancer. However, the degree of tumour uptake of (18)F-2-fluoro-2-deoxy-D: -glucose (FDG) uptake is so...


-- Doc 2 (snippet) --
Context: Type 2 diabetes may be present for several years before diagnosis, by which time many patients have already developed diabetic complications. Earlier detection and treatment may reduce this burden, but evidence to support this approach is lacking. Glycemic control and clinical and surrogate...


-- Doc 3 (snippet) --
Context: Coronary atherosclerotic burden is excessive in diabetic patients. Diabetes mellitus (DM) is an independent predictor for both death and myocardial infarction. It is not known whether the prevalence of complex coronary lesions, such as bifurcation and ostial lesions, is

In [4]:
# k=1, 13/23, 57% accuracy
# k=2, 14/23, 61% accuracy
# k=3, 15/23, 65% accuracy
# k=4, 15/23, 65% accuracy