# Initial Setup

In [256]:
import json

with open("/Users/brianmann/Downloads/ori_pqal.json", "r") as f:
    json_data = json.load(f)

questions = [entry.get("QUESTION", "N/A") for entry in json_data.values()]
answer_key = [entry.get("final_decision", "N/A") for entry in json_data.values()]

num_questions = 50


# Clear and Load Neo4j

In [257]:
from neo4j import GraphDatabase

# Neo4j connection credentials
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "sunsh1ne1"

# Initialize the Neo4j driver
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

# Function to clear the graph and insert the data into Neo4j
def insert_data_to_neo4j(data):
    with driver.session() as session:
        # Clear all nodes and relationships
        session.run("MATCH (n) DETACH DELETE n")

        # Insert new data from the file
        for line in data:
            line.replace("((", "(").replace("))", ")").replace('"', '').replace("'", "")
            # Split each line by ' -[:' to get the nodes and relationship type
            parts = line.strip().split(' -[:')
            node_1 = parts[0][1:-1]  # Remove parentheses
            relationship_and_node_2 = parts[1].split(']->')
            relationship = relationship_and_node_2[0]
            node_2 = relationship_and_node_2[1][1:-1]  # Remove parentheses

            # Create the nodes and relationships
            cypher_query = f"""
            MERGE (a:Entity {{name: '{node_1}'}})
            MERGE (b:Entity {{name: '{node_2}'}})
            MERGE (a)-[:{relationship}]->(b)
            """
            session.run(cypher_query)

# Read the graph data from the file
with open("graph_output.txt", "r") as file:
    data = file.readlines()

# Insert the data into Neo4j
insert_data_to_neo4j(data)

print("Data inserted into Neo4j successfully.")


Data inserted into Neo4j successfully.


### Getting a list of all entities in a question Test

In [258]:
import re
from langchain_ollama import ChatOllama

def extract_entities_from_question(question):
    # Initialize the LLM model
    llm = ChatOllama(model="llama3.2", temperature=0, max_tokens=2048)

    # Generate the query to extract entities from the question
    query = f'''
    Please identify all meaningful entities involved in the following question: "{question}"
    Return the entities clearly and separate them with commas, like (Entity1, Entity2, Entity3, ...). Start the list with START and end with the word FINISHED.
    '''

    # Call the LLM to extract the entities
    response = llm.invoke(query)
    raw_output = response.content.strip()

    # Print the raw output (for debugging)
    print(f"Raw LLM response: {raw_output}")

    # Use regex to extract the entities between START and FINISHED
    # matches = re.findall(r'START\s*,\s*(.*?)\s*,\s*FINISHED', raw_output)
    matches = re.findall(r'START\s*,\s*(.*?)\s*FINISHED', raw_output)


    if matches:
        # The entities should be in the form (Entity1, Entity2, ...)
        # Split the match by commas and strip extra spaces to get the list of entities
        entities = [entity.strip() for entity in matches[0].split(',')]
        return entities
    else:
        return []

# Example question
question = "Do mitochondria play a role in remodelling lace plant leaves during programmed cell death?"
# question = "Are the long-term results of the transanal pull-through equal to those of the transabdominal pull-through?"

entities = extract_entities_from_question(question)
print(f"Extracted Entities: {entities}")


Raw LLM response: START, Mitochondria, Lace Plant, Leaves, Programmed Cell Death, Cell, FINISHED
Extracted Entities: ['Mitochondria', 'Lace Plant', 'Leaves', 'Programmed Cell Death', 'Cell', '']


In [262]:
# Test Individual Questions

In [259]:
from neo4j import GraphDatabase
from fuzzywuzzy import fuzz
from langchain_ollama import ChatOllama
import re
import itertools

# Neo4j connection credentials
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "sunsh1ne1"

# Initialize the Neo4j driver
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

#Relationships and their corresponding answers
relationship_to_answer = {
    "CAUSES": "Yes",
    "DOES_NOT_CAUSE": "No",
    "TREATS": "Yes",
    "DOES_NOT_TREAT": "No",
    "DIAGNOSES": "Maybe",
    "DOES_NOT_DIAGNOSE": "No",
    "REDUCES_RISK_OF": "No",
    "INCREASES_RISK_OF": "No",
    "IS_A_RISK_FACTOR_FOR": "No",
    "IS_NOT_A_RISK_FACTOR_FOR": "Yes",
    "IS_ASSOCIATED_WITH": "Maybe",
    "IS_NOT_ASSOCIATED_WITH": "No",
    "PREDICTS": "Maybe",
    "DOES_NOT_PREDICT": "No",
    "IS_AS_EFFECTIVE_AS": "Maybe",
    "IS_LESS_EFFECTIVE_THAN": "No",
    "IS_MORE_EFFECTIVE_THAN": "Maybe",
    "IMPROVES": "Yes",
    "WORSENS": "No",
    "IS_COST_EFFECTIVE_FOR": "Maybe",
    "IS_NOT_COST_EFFECTIVE_FOR": "No",
    "INFLUENCES": "Maybe",
    "DOES_NOT_INFLUENCE": "No",
    "IS_USEFUL_FOR": "Maybe",
    "IS_NOT_USEFUL_FOR": "No"
}

# Extract entities from the question using LLM
def extract_entities_from_question(question):
    llm = ChatOllama(model="llama3.2", temperature=0, max_tokens=2048)
    query = f'''
    Please identify all meaningful entities involved in the following question: "{question}"
    Return the entities clearly and separate them with commas, like (Entity1, Entity2, Entity3, ...). Start the list with START and end with the word FINISHED.
    '''
    response = llm.invoke(query)
    raw_output = response.content.strip()
    # matches = re.findall(r'START\s*,\s*(.*?)\s*,\s*FINISHED', raw_output)
    matches = re.findall(r'START\s*,\s*(.*?)\s*FINISHED', raw_output)
    if matches:
        return [entity.strip() for entity in matches[0].split(',')]
    return []

# Fuzzy match to the best graph node
def fuzzy_match_entity(entity, entity_db, threshold=80):
    best_match = None
    highest_score = 0
    for db_entity in entity_db:
        score = fuzz.partial_ratio(entity.lower(), db_entity.lower())
        if score > highest_score and score >= threshold:
            best_match = db_entity
            highest_score = score
    return best_match

# Look for relationships in either direction
def search_relationships_in_neo4j(entity_1, entity_2):
    with driver.session() as session:
        query = """
        MATCH (a:Entity {name: $entity_1})-[r]-(b:Entity {name: $entity_2})
        RETURN DISTINCT type(r) AS relationship_type;
        """
        result = session.run(query, entity_1=entity_1, entity_2=entity_2)
        return [record["relationship_type"] for record in result]

# Map to answer
def determine_answer_from_relationship(relationship_type):
    return relationship_to_answer.get(relationship_type, "Maybe")

# Find relationships for all pairs of entities
def answer_question(question):
    entities = extract_entities_from_question(question)
    print(entities)
    
    if len(entities) < 2:
        print("Could not extract enough entities.")
        return "Maybe"

    with driver.session() as session:
        query = "MATCH (n:Entity) RETURN n.name AS name"
        result = session.run(query)
        entity_db = [record["name"] for record in result]

    # Fuzzy match all entities once
    matched_entities = {
        entity: fuzzy_match_entity(entity, entity_db)
        for entity in entities
    }

    for e in matched_entities:
        if matched_entities[e] is None:
            print(f"No match for entity: {e}")
    
    # Get all combinations of 2 entities (ordered pairs)
    combinations = itertools.permutations(entities, 2)

    for ent1, ent2 in combinations:
        matched_1 = matched_entities.get(ent1)
        matched_2 = matched_entities.get(ent2)

        if matched_1 and matched_2:
            print(f"Checking: {matched_1} <-> {matched_2}")
            relationships = search_relationships_in_neo4j(matched_1, matched_2)

            if relationships:
                print(f"  Found relationships: {relationships}")
                for r in relationships:
                    answer = determine_answer_from_relationship(r)
                    print(answer)
                    if answer != "Maybe":
                        return answer

    print("No definitive relationship found.")
    return "Maybe"

#TESTs
# Example question
# question = "Do mitochondria play a role in remodelling lace plant leaves during programmed cell death?"
# question = "Are the long-term results of the transanal pull-through equal to those of the transabdominal pull-through?"
question = "Syncope during bathing in infants, a pediatric form of water-induced urticaria?"

entities = extract_entities_from_question(question)
print(f"Extracted Entities: {entities}")

answer = answer_question(question)
print(f"Answer: {answer}")


Extracted Entities: ['Syncope', 'Bathing', 'Infants', 'Urticaria', 'Water', 'Pediatric', 'Form', 'Condition']
['Syncope', 'Bathing', 'Infants', 'Urticaria', 'Water', 'Pediatric', 'Form', 'Condition']
No match for entity: Pediatric
Checking: (Syncope during bathing in infants <-> (Syncope during bathing in infants
Checking: (Syncope during bathing in infants <-> (Syncope during bathing in infants
Checking: (Syncope during bathing in infants <-> Aquagenic urticaria
Checking: (Syncope during bathing in infants <-> Water-induced urticaria
Checking: (Syncope during bathing in infants <-> (prognostic information
Checking: (Syncope during bathing in infants <-> (Misdiagnosis of Ultrasound-Related Conditions
Checking: (Syncope during bathing in infants <-> (Syncope during bathing in infants
Checking: (Syncope during bathing in infants <-> (Syncope during bathing in infants
Checking: (Syncope during bathing in infants <-> Aquagenic urticaria
Checking: (Syncope during bathing in infants <-> Wate

# Generate Answers

In [260]:

# Function to generate answers using the provided code
def generate_answers_for_questions(questions, num_questions):
    answers = []
    
    for i in range(min(num_questions, len(questions))):
        question = questions[i]
        print(f"Processing question: {question}")
        # Generate the answer using the existing function
        answer = answer_question(question)  # This is your provided function
        answers.append(answer)
        
    return answers

# Generate answers for the first 20 questions
generated_answers = generate_answers_for_questions(questions, num_questions)


Processing question: Do mitochondria play a role in remodelling lace plant leaves during programmed cell death?
['Mitochondria', 'Lace Plant', 'Leaves', 'Programmed Cell Death', 'Cell', '']
No match for entity: 
Checking: Mitochondria <-> (programmed cell death in lace plant leaves
  Found relationships: ['CAUSES']
Yes
Processing question: Landolt C and snellen e acuity: differences in strabismus amblyopia?
['Landolt C', 'Snellen chart', 'Strabismus', 'Amblyopia', '']
No match for entity: Snellen chart
No match for entity: 
Checking: (Landolt C acuity <-> (strabismus amblyopia
Checking: (Landolt C acuity <-> (strabismus amblyopia
Checking: (strabismus amblyopia <-> (Landolt C acuity
Checking: (strabismus amblyopia <-> (strabismus amblyopia
Checking: (strabismus amblyopia <-> (Landolt C acuity
Checking: (strabismus amblyopia <-> (strabismus amblyopia
No definitive relationship found.
Processing question: Syncope during bathing in infants, a pediatric form of water-induced urticaria?
['S

# Compute Accuracy

In [261]:
correct_count = sum([1 for model, correct in zip(generated_answers, answer_key) if model.lower() == correct])

print(f"Correct answers: {correct_count}/{len(generated_answers)}")
print()

# Print answers side by side
for i, (model_answer, correct_answer) in enumerate(zip(generated_answers, answer_key), 1):
    print(f"Q{i}: Our Answer: {model_answer.lower()}\tAnswer Key: {correct_answer}")

Correct answers: 10/50

Q1: Our Answer: yes	Answer Key: yes
Q2: Our Answer: maybe	Answer Key: no
Q3: Our Answer: maybe	Answer Key: yes
Q4: Our Answer: no	Answer Key: no
Q5: Our Answer: maybe	Answer Key: yes
Q6: Our Answer: maybe	Answer Key: yes
Q7: Our Answer: yes	Answer Key: maybe
Q8: Our Answer: maybe	Answer Key: no
Q9: Our Answer: maybe	Answer Key: no
Q10: Our Answer: maybe	Answer Key: yes
Q11: Our Answer: yes	Answer Key: yes
Q12: Our Answer: maybe	Answer Key: no
Q13: Our Answer: maybe	Answer Key: yes
Q14: Our Answer: maybe	Answer Key: no
Q15: Our Answer: maybe	Answer Key: yes
Q16: Our Answer: yes	Answer Key: yes
Q17: Our Answer: maybe	Answer Key: yes
Q18: Our Answer: no	Answer Key: yes
Q19: Our Answer: maybe	Answer Key: yes
Q20: Our Answer: maybe	Answer Key: yes
Q21: Our Answer: maybe	Answer Key: yes
Q22: Our Answer: no	Answer Key: yes
Q23: Our Answer: no	Answer Key: yes
Q24: Our Answer: maybe	Answer Key: yes
Q25: Our Answer: maybe	Answer Key: yes
Q26: Our Answer: maybe	Answer Key: