Update working directory

In [19]:
import os #

os.chdir("/Users/brianmann/git/knowledge_graph_creation")

Read the Questions

In [20]:
import json

with open("/Users/brianmann/Downloads/ori_pqal.json", "r") as f:
    json_data = json.load(f)

questions = [entry.get("QUESTION", "N/A") for entry in json_data.values()]
answer_key = [entry.get("final_decision", "N/A") for entry in json_data.values()]

In [21]:
questions[:10]

['Do mitochondria play a role in remodelling lace plant leaves during programmed cell death?',
 'Landolt C and snellen e acuity: differences in strabismus amblyopia?',
 'Syncope during bathing in infants, a pediatric form of water-induced urticaria?',
 'Are the long-term results of the transanal pull-through equal to those of the transabdominal pull-through?',
 'Can tailored interventions increase mammography use among HMO women?',
 'Double balloon enteroscopy: is it efficacious and safe in a community setting?',
 '30-Day and 1-year mortality in emergency general surgery laparotomies: an area of concern and need for improvement?',
 'Is adjustment for reporting heterogeneity necessary in sleep disorders?',
 'Do mutations causing low HDL-C promote increased carotid intima-media thickness?',
 "A short stay or 23-hour ward in a general and academic children's hospital: are they effective?"]

# Load KG

In [22]:
import spacy
from neo4j import GraphDatabase
import re

# Config
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = ""
RELATIONSHIP_FILE = "graph_output.txt"

# Define HealthcareKGQA Class
class HealthcareKGQA:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
        self.nlp = spacy.load("en_core_web_sm")  # Load once here

    def close(self):
        self.driver.close()

    def extract_entities(self, question):
        doc = self.nlp(question)
        return [chunk.text.strip().title() for chunk in doc.noun_chunks]

    def classify_question_type(self, question):
        q = question.lower()
        if "better" in q:
            return "comparison"
        elif "cause" in q or "predispose" in q:
            return "causal"
        elif "effective" in q:
            return "effectiveness"
        elif "associated" in q or "association" in q:
            return "association"
        else:
            return "general"

    def generate_cypher(self, entities, qtype):
        if len(entities) == 1:
            return None
        e1, e2 = entities[0], entities[1]

        if qtype == "comparison":
            return f"""
            MATCH (a {{name: '{e1}'}})-[r:IS_BETTER_THAN|HAS_BETTER_RESULTS_THAN|IS_MORE_EFFECTIVE_THAN|IS_AS_EFFECTIVE_AS|IMPROVES|TREATS|ENHANCES]->(b {{name: '{e2}'}})
            RETURN a.name AS better, type(r) AS relation, b.name AS worse
            UNION
            MATCH (a {{name: '{e2}'}})<-[r:IS_BETTER_THAN|HAS_BETTER_RESULTS_THAN|IS_MORE_EFFECTIVE_THAN|IS_AS_EFFECTIVE_AS|IMPROVES|TREATS|ENHANCES]-(b {{name: '{e1}'}})
            RETURN b.name AS better, type(r) AS relation, a.name AS worse
            """

        elif qtype == "causal":
            return f"""
            MATCH (a {{name: '{e1}'}})-[r:CAUSES|INCREASES_RISK_OF|REDUCES_RISK_OF|PREDISPOSES_TO|IS_A_RISK_FACTOR_FOR|WORSENS|IMPROVES]->(b {{name: '{e2}'}})
            RETURN a.name AS cause, type(r) AS relation, b.name AS effect
            UNION
            MATCH (a {{name: '{e2}'}})<-[r:CAUSES|INCREASES_RISK_OF|REDUCES_RISK_OF|PREDISPOSES_TO|IS_A_RISK_FACTOR_FOR|WORSENS|IMPROVES]-(b {{name: '{e1}'}})
            RETURN b.name AS cause, type(r) AS relation, a.name AS effect
            """

        elif qtype == "effectiveness":
            return f"""
            MATCH (a {{name: '{e1}'}})-[r:IS_AS_EFFECTIVE_AS|IS_MORE_EFFECTIVE_THAN|TREATS|IMPROVES|IS_SAFE_FOR|ENHANCES|FACILITATES]->(b {{name: '{e2}'}})
            RETURN a.name AS treatment1, type(r) AS relation, b.name AS treatment2
            UNION
            MATCH (a {{name: '{e2}'}})<-[r:IS_AS_EFFECTIVE_AS|IS_MORE_EFFECTIVE_THAN|TREATS|IMPROVES|IS_SAFE_FOR|ENHANCES|FACILITATES]-(b {{name: '{e1}'}})
            RETURN a.name AS treatment1, type(r) AS relation, b.name AS treatment2
            """

        elif qtype == "association":
            return f"""
            MATCH (a {{name: '{e1}'}})-[r:IS_ASSOCIATED_WITH|PREDICTS|WORSENS|IMPROVES|INFLUENCES|ENABLES|FACILITATES|CAUSES|CORRELATES_WITH]->(b {{name: '{e2}'}})
            RETURN a.name AS from_node, type(r) AS relation, b.name AS to_node
            UNION
            MATCH (a {{name: '{e2}'}})<-[r:IS_ASSOCIATED_WITH|PREDICTS|WORSENS|IMPROVES|INFLUENCES|ENABLES|FACILITATES|CAUSES|CORRELATES_WITH]-(b {{name: '{e1}'}})
            RETURN b.name AS from_node, type(r) AS relation, a.name AS to_node
            """

        else:
            return f"""
            MATCH (a)-[r]->(b)
            WHERE a.name = '{e1}' OR b.name = '{e1}' OR a.name = '{e2}' OR b.name = '{e2}'
            RETURN a.name AS from_node, type(r) AS relation, b.name AS to_node LIMIT 5
            """

    def query_kg(self, question):
        entities = self.extract_entities(question)
        qtype = self.classify_question_type(question)
        cypher = self.generate_cypher(entities, qtype)

        if not cypher:
            return ["Unable to generate query."]

        with self.driver.session() as session:
            result = session.run(cypher)
            facts = []
            for record in result:
                line = " - ".join(str(v) for v in record.values())
                facts.append(line)
            return facts


# --------------- DELETE ALL NODES ---------------

# Initialize HealthcareKGQA
kgqa = HealthcareKGQA(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

# Open a session and execute a query to delete all nodes and relationships
with kgqa.driver.session() as session:
    session.run("MATCH (n) DETACH DELETE n")

# Close the connection
kgqa.close()


# Answer Questions

In [23]:
answers = []

In [24]:
import re
from neo4j import GraphDatabase
from langchain_ollama import ChatOllama

# --------------- SETUP ---------------

NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "sunsh1ne1"  # Replace with your real password

# --------------- KNOWLEDGE GRAPH ACCESS ---------------

class HealthcareKGQA:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
        self.nlp = spacy.load("en_core_web_sm")  # Load once here

    def close(self):
        self.driver.close()

    def extract_entities(self, question):
        doc = self.nlp(question)
        return [chunk.text.strip().title() for chunk in doc.noun_chunks]

    def classify_question_type(self, question):
        q = question.lower()
        if "better" in q:
            return "comparison"
        elif "cause" in q or "predispose" in q:
            return "causal"
        elif "effective" in q:
            return "effectiveness"
        elif "associated" in q or "association" in q:
            return "association"
        else:
            return "general"

    def generate_cypher(self, entities, qtype):
        if len(entities) == 1:
            return None
        e1, e2 = entities[0], entities[1]

        if qtype == "comparison":
            return f"""
            MATCH (a {{name: '{e1}'}})-[r:IS_BETTER_THAN|HAS_BETTER_RESULTS_THAN|IS_MORE_EFFECTIVE_THAN|IS_AS_EFFECTIVE_AS|IMPROVES|TREATS|ENHANCES]->(b {{name: '{e2}'}})
            RETURN a.name AS better, type(r) AS relation, b.name AS worse
            UNION
            MATCH (a {{name: '{e2}'}})<-[r:IS_BETTER_THAN|HAS_BETTER_RESULTS_THAN|IS_MORE_EFFECTIVE_THAN|IS_AS_EFFECTIVE_AS|IMPROVES|TREATS|ENHANCES]-(b {{name: '{e1}'}})
            RETURN b.name AS better, type(r) AS relation, a.name AS worse
            """

        elif qtype == "causal":
            return f"""
            MATCH (a {{name: '{e1}'}})-[r:CAUSES|INCREASES_RISK_OF|REDUCES_RISK_OF|PREDISPOSES_TO|IS_A_RISK_FACTOR_FOR|WORSENS|IMPROVES]->(b {{name: '{e2}'}})
            RETURN a.name AS cause, type(r) AS relation, b.name AS effect
            UNION
            MATCH (a {{name: '{e2}'}})<-[r:CAUSES|INCREASES_RISK_OF|REDUCES_RISK_OF|PREDISPOSES_TO|IS_A_RISK_FACTOR_FOR|WORSENS|IMPROVES]-(b {{name: '{e1}'}})
            RETURN b.name AS cause, type(r) AS relation, a.name AS effect
            """

        elif qtype == "effectiveness":
            return f"""
            MATCH (a {{name: '{e1}'}})-[r:IS_AS_EFFECTIVE_AS|IS_MORE_EFFECTIVE_THAN|TREATS|IMPROVES|IS_SAFE_FOR|ENHANCES|FACILITATES]->(b {{name: '{e2}'}})
            RETURN a.name AS treatment1, type(r) AS relation, b.name AS treatment2
            UNION
            MATCH (a {{name: '{e2}'}})<-[r:IS_AS_EFFECTIVE_AS|IS_MORE_EFFECTIVE_THAN|TREATS|IMPROVES|IS_SAFE_FOR|ENHANCES|FACILITATES]-(b {{name: '{e1}'}})
            RETURN a.name AS treatment1, type(r) AS relation, b.name AS treatment2
            """

        elif qtype == "association":
            return f"""
            MATCH (a {{name: '{e1}'}})-[r:IS_ASSOCIATED_WITH|PREDICTS|WORSENS|IMPROVES|INFLUENCES|ENABLES|FACILITATES|CAUSES|CORRELATES_WITH]->(b {{name: '{e2}'}})
            RETURN a.name AS from_node, type(r) AS relation, b.name AS to_node
            UNION
            MATCH (a {{name: '{e2}'}})<-[r:IS_ASSOCIATED_WITH|PREDICTS|WORSENS|IMPROVES|INFLUENCES|ENABLES|FACILITATES|CAUSES|CORRELATES_WITH]-(b {{name: '{e1}'}})
            RETURN a.name AS from_node, type(r) AS relation, b.name AS to_node
            """

        else:
            return f"""
            MATCH (a)-[r]->(b)
            WHERE a.name = '{e1}' OR b.name = '{e1}' OR a.name = '{e2}' OR b.name = '{e2}'
            RETURN a.name AS from_node, type(r) AS relation, b.name AS to_node LIMIT 5
            """


    def query_kg(self, question):
        entities = self.extract_entities(question)
        qtype = self.classify_question_type(question)
        
        # Escape any apostrophes in the entities to prevent syntax errors
        entities = [e.replace("'", "\\'") for e in entities]
        
        cypher = self.generate_cypher(entities, qtype)

        if not cypher:
            return ["Unable to generate query."]

        with self.driver.session() as session:
            result = session.run(cypher)
            facts = []
            for record in result:
                line = " - ".join(str(v) for v in record.values())
                facts.append(line)
            return facts


# --------------- LLM RESPONSE USING LLAMA ---------------

def ask_llm(question, facts):
    context = "\n".join(facts) if facts else "No relevant facts found in the knowledge graph."
    prompt = f"""
You are a helpful medical assistant answering yes/no questions using the knowledge graph context below.
Your answer must be one of: Yes, No, or Maybe.

Context:
{context}

Question:
{question}

Answer (only one word: Yes, No, or Maybe):
"""

    llm = ChatOllama(model="llama3.2", temperature=0, max_tokens=2048)
    response = llm.invoke(prompt)
    return response.content.strip() if hasattr(response, "content") else response


# --------------- EXAMPLE USAGE ---------------

if __name__ == "__main__":
    kgqa = HealthcareKGQA(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

    # questions = [
    #     "Are the long-term results of the transanal pull-through equal to those of the transabdominal pull-through? A comparison of the 2 approaches for Hirschsprung disease.",
    #     "Can tailored interventions increase mammography use among HMO women?",
    #     "Double balloon enteroscopy: is it efficacious and safe in a community setting?",
    #     "30-Day and 1-year mortality in emergency general surgery laparotomies: an area of concern and need for improvement?",
    #     "Is adjustment for reporting heterogeneity necessary in sleep disorders? Results from the Japanese World Health Survey.",
    #     "Do mutations causing low HDL-C promote increased carotid intima-media thickness?"
    # ]

    for q in questions[:30]:
        facts = kgqa.query_kg(q)
        print(f"\n--- Question: {q} ---")
        print("Context:")
        print("\n".join(facts))
        print("\nAnswer:")
        print(ask_llm(q, facts))
        answers.append(ask_llm(q, facts).lower().rstrip('.'))

    kgqa.close()



--- Question: Do mitochondria play a role in remodelling lace plant leaves during programmed cell death? ---
Context:


Answer:
Maybe.

--- Question: Landolt C and snellen e acuity: differences in strabismus amblyopia? ---
Context:


Answer:
Maybe.

--- Question: Syncope during bathing in infants, a pediatric form of water-induced urticaria? ---
Context:


Answer:
Maybe.

--- Question: Are the long-term results of the transanal pull-through equal to those of the transabdominal pull-through? ---
Context:


Answer:
Maybe.

--- Question: Can tailored interventions increase mammography use among HMO women? ---
Context:


Answer:
Maybe.

--- Question: Double balloon enteroscopy: is it efficacious and safe in a community setting? ---
Context:


Answer:
Maybe.

--- Question: 30-Day and 1-year mortality in emergency general surgery laparotomies: an area of concern and need for improvement? ---
Context:


Answer:
Maybe.

--- Question: Is adjustment for reporting heterogeneity necessary in slee



No.

--- Question: A short stay or 23-hour ward in a general and academic children's hospital: are they effective? ---
Context:


Answer:
Maybe.

--- Question: Did Chile's traffic law reform push police enforcement? ---
Context:


Answer:
No.

--- Question: Therapeutic anticoagulation in the trauma patient: is it safe? ---
Context:


Answer:
No.

--- Question: Differentiation of nonalcoholic from alcoholic steatohepatitis: are routine laboratory markers useful? ---
Context:


Answer:
Maybe.

--- Question: Prompting Primary Care Providers about Increased Patient Risk As a Result of Family History: Does It Work? ---
Context:


Answer:
No.

--- Question: Do emergency ultrasound fellowship programs impact emergency medicine residents' ultrasound education? ---
Context:


Answer:
Maybe.

--- Question: Patient-Controlled Therapy of Breathlessness in Palliative Care: A New Therapeutic Concept for Opioid Administration? ---
Context:


Answer:
Maybe.

--- Question: Is there still a need for liv




--- Question: Israeli hospital preparedness for terrorism-related multiple casualty incidents: can the surge capacity and injury severity distribution be better predicted? ---
Context:


Answer:
Maybe.

--- Question: Acute respiratory distress syndrome in children with malignancy--can we predict outcome? ---
Context:


Answer:
Maybe.

--- Question: Secondhand smoke risk in infants discharged from an NICU: potential for significant health disparities? ---
Context:


Answer:
Maybe.





--- Question: Do nomograms designed to predict biochemical recurrence (BCR) do a better job of predicting more clinically relevant prostate cancer outcomes than BCR? ---
Context:


Answer:
Maybe.

--- Question: Are reports of mechanical dysfunction in chronic oro-facial pain related to somatisation? ---
Context:


Answer:
Maybe.

--- Question: Amblyopia: is visual loss permanent? ---
Context:


Answer:




Maybe.

--- Question: Implementation of epidural analgesia for labor: is the standard of effective analgesia reachable in all women? ---
Context:


Answer:
Maybe.

--- Question: Does HER2 immunoreactivity provide prognostic information in locally advanced urothelial carcinoma patients receiving adjuvant M-VEC chemotherapy? ---
Context:


Answer:
Maybe.

--- Question: Is halofantrine ototoxic? ---
Context:
Unable to generate query.

Answer:
Yes.

--- Question: Visceral adipose tissue area measurement at a single level: can it represent visceral adipose tissue volume? ---
Context:


Answer:
Maybe.


# Review Accuracy

In [25]:
correct_count = sum([1 for model, correct in zip(answers, answer_key) if model == correct])

print(f"Correct answers: {correct_count}/{len(answers)}")

Correct answers: 6/30


In [26]:
# Print answers side by side
for i, (model_answer, correct_answer) in enumerate(zip(answers, answer_key), 1):
    print(f"Q{i}: Our Answer: {model_answer}\tAnswer Key: {correct_answer}")

Q1: Our Answer: maybe	Answer Key: yes
Q2: Our Answer: maybe	Answer Key: no
Q3: Our Answer: maybe	Answer Key: yes
Q4: Our Answer: maybe	Answer Key: no
Q5: Our Answer: maybe	Answer Key: yes
Q6: Our Answer: maybe	Answer Key: yes
Q7: Our Answer: maybe	Answer Key: maybe
Q8: Our Answer: maybe	Answer Key: no
Q9: Our Answer: no	Answer Key: no
Q10: Our Answer: maybe	Answer Key: yes
Q11: Our Answer: no	Answer Key: yes
Q12: Our Answer: no	Answer Key: no
Q13: Our Answer: maybe	Answer Key: yes
Q14: Our Answer: no	Answer Key: no
Q15: Our Answer: maybe	Answer Key: yes
Q16: Our Answer: maybe	Answer Key: yes
Q17: Our Answer: maybe	Answer Key: yes
Q18: Our Answer: maybe	Answer Key: yes
Q19: Our Answer: maybe	Answer Key: yes
Q20: Our Answer: maybe	Answer Key: yes
Q21: Our Answer: maybe	Answer Key: yes
Q22: Our Answer: maybe	Answer Key: yes
Q23: Our Answer: maybe	Answer Key: yes
Q24: Our Answer: maybe	Answer Key: yes
Q25: Our Answer: maybe	Answer Key: yes
Q26: Our Answer: maybe	Answer Key: no
Q27: Our Ans