# Graph Modeling

## Librerie utili

In [1]:
import pandas as pd
from py2neo import Graph, Node, Relationship

## Collegamento a Neo4j

### Nel GraphDB 'neo4j' salveremo le entità estratte dalla NER senza 'RE'

In [22]:
graph = Graph("bolt://localhost:7687", user="neo4j", password="bigdata2023")

### Leggo il file CSV

In [23]:
note = pd.read_csv('ner_noteevents.csv', sep=';', header=0)

### Pulisco il GraphDB (SOLO se necessario)

In [24]:
graph.delete_all()

## Versione in cui tutte le entità sono collegate alla Nota Clinica

In [25]:
# Create relationships between nodes with the same row
for index, row in note.iterrows():
    #Read nodes from dataframe
    subject_id = row['Subject ID']
    note_clinical_id = row['Note ID']
    diagnostics = row['Diagnostic Procedure']
    body_parts = row['Body Part, Organ, or Organ Component']
    diseases = row['Disease or Syndrome']
    mental_diseases = row['Mental or Behavioral Dysfunction']
    lab_procedures = row['Laboratory Procedure']
    lab_results = row['Laboratory or Test Result']
    sign_symptoms = row['Sign or Symptom']
    therapeutic_procedures = row['Therapeutic or Preventive Procedure']
    drugs = row['Pharmacologic Substance']
    
    # Check if Patient node with subject_id already exists
    subject_node = graph.nodes.match("Patient", subject_id=subject_id).first()
    if not subject_node:
        subject_node = Node("Patient", subject_id=subject_id)
        graph.create(subject_node)
    
    note_node = Node("Note Clinical", note_clinical_id=note_clinical_id)
    relationship = Relationship(subject_node, "PRESENT", note_node)
    graph.create(relationship)
    
    # Create and connect nodes for Diagnostic Procedure
    if isinstance(diagnostics, str):
        diagnostics = diagnostics.split(", ")
        for diagnostic in diagnostics:
            diagnostic_node = Node("Diagnostic Procedure", name=diagnostic)
            graph.merge(diagnostic_node, "Diagnostic Procedure", "name")
            relationship = Relationship(note_node, "HAS_PROCEDURE", diagnostic_node)
            graph.create(diagnostic_node)
            graph.create(relationship)
    
    # Create and connect nodes for Body Parts
    if isinstance(body_parts, str):
        body_parts = body_parts.split(", ")
        for body_part in body_parts:
            body_part_node = Node("Body Part", name=body_part)
            graph.merge(body_part_node, "Body Part", "name")
            relationship = Relationship(note_node, "HAS_BODY_PART", body_part_node)
            graph.create(body_part_node)
            graph.create(relationship)

    # Create and connect nodes for Diseases
    if isinstance(diseases, str):
        diseases = diseases.split(", ")
        for disease in diseases:
            disease_node = Node("Disease or Syndrome", name=disease)
            graph.merge(disease_node, "Disease or Syndrome", "name")
            relationship = Relationship(note_node, "HAS_DISEASE", disease_node)
            graph.create(disease_node)
            graph.create(relationship)
    
    # Create and connect nodes for Mental Diseases
    if isinstance(mental_diseases, str):
        mental_diseases = mental_diseases.split(", ")
        for mental_disease in mental_diseases:
            mental_disease_node = Node("Mental or Behavioral Dysfunction", name=mental_disease)
            graph.merge(mental_disease_node, "Mental or Behavioral Dysfunction", "name")
            relationship = Relationship(note_node, "HAS_MENTAL_DISEASE", mental_disease_node)
            graph.create(mental_disease_node)
            graph.create(relationship)
    
    # Create and connect nodes for Laboratory Procedures
    if isinstance(lab_procedures, str):
        lab_procedures = lab_procedures.split(", ")
        for lab_procedure in lab_procedures:
            lab_procedure_node = Node("Laboratory Procedure", name=lab_procedure)            
            graph.merge(lab_procedure_node, "Laboratory Procedure", "name")
            relationship = Relationship(note_node, "HAS_LABORATORY_PROCEDURE", lab_procedure_node)
            graph.create(lab_procedure_node)
            graph.create(relationship)
    
    # Create and connect nodes for Lab Results
    if isinstance(lab_results, str):
        lab_results = lab_results.split(", ")
        for lab_result in lab_results:
            lab_result_node = Node("Laboratory or Test Result", name=lab_result)
            graph.merge(lab_result_node, "Laboratory or Test Result", "name")
            relationship = Relationship(note_node, "HAS_LAB_RESULT", lab_result_node)
            graph.create(lab_result_node)
            graph.create(relationship)
    
    # Create and connect nodes for Sign/Symptoms
    if isinstance(sign_symptoms, str):
        sign_symptoms = sign_symptoms.split(", ")
        for sign_symptom in sign_symptoms:
            sign_symptom_node = Node("Sign or Symptom", name=sign_symptom)
            graph.merge(sign_symptom_node, "Sign or Symptom", "name")
            relationship = Relationship(note_node, "HAS_SIGN_SYMPTOM", sign_symptom_node)
            graph.create(sign_symptom_node)
            graph.create(relationship)
    
    # Create and connect nodes for Therapeutic Procedures
    if isinstance(therapeutic_procedures, str):
        therapeutic_procedures = therapeutic_procedures.split(", ")
        for therapeutic_procedure in therapeutic_procedures:
            therapeutic_procedure_node = Node("Therapeutic or Preventive Procedure", name=therapeutic_procedure)
            graph.merge(therapeutic_procedure_node, "Therapeutic or Preventive Procedure", "name")
            relationship = Relationship(note_node, "HAS_THERAPEUTIC_PROCEDURE", therapeutic_procedure_node)
            graph.create(therapeutic_procedure_node)
            graph.create(relationship)
    
    # Create and connect nodes for Pharmacologic Substance
    if isinstance(drugs, str):
        drugs = drugs.split(", ")
        for drug in drugs:
            drug_node = Node("Pharmacologic Substance", name=drug)
            graph.merge(drug_node, "Pharmacologic Substance", "name")
            relationship = Relationship(note_node, "HAS_PRESCRIPTION", drug_node)
            graph.create(drug_node)
            graph.create(relationship)

## Versione con Relation Extraction

### Collegamento al GraphDB 'relationextraction'

In [2]:
graph_re = Graph("bolt://localhost:7687", name="relationextraction", user="neo4j", password="bigdata2023")

### Pulisco il DB (SOLO se necessario)

In [27]:
graph_re.delete_all()

### Leggo il file CSV delle relazioni estratte da "GPT-3.5-turbo"

In [28]:
relation = pd.read_csv('Relation Extracted/result.csv', sep=',', header=0)

### Creo i nodi creati dalla Relation Extraction

In [29]:
# Create relationships between nodes with the same row
for index, row in relation.iterrows():
    #Read nodes from dataframe
    subject_id = row['Subject ID']
    note_clinical_id = row['Note ID']
    diagnostics = row['Diagnostic Procedure']
    diseases = row['Disease or Syndrome']
    lab_results = row['Laboratory or Test Result']
    sign_symptoms = row['Sign or Symptom']
    drugs = row['Pharmacologic Substance']
    
    # Check if Patient node with subject_id already exists
    subject_node = graph_re.nodes.match("Patient", subject_id=subject_id).first()
    if not subject_node:
        subject_node = Node("Patient", subject_id=subject_id)
        graph_re.create(subject_node)
    
    note_node = Node("Note Clinical", note_clinical_id=note_clinical_id)
    graph_re.merge(note_node, "Note Clinical", "note_clinical_id")
    relationship = Relationship(subject_node, "PRESENT", note_node)
    graph_re.create(relationship)
    
    # Create and connect nodes for Diseases
    if isinstance(diseases, str):
        diseases = diseases.split(", ")
        for disease in diseases:
            disease_node = Node("Disease or Syndrome", name=disease)
            graph_re.merge(disease_node, "Disease or Syndrome", "name")
            relationship = Relationship(note_node, "HAS_DISEASE", disease_node)
            graph_re.create(disease_node)
            graph_re.create(relationship)
            
            # Create and connect nodes for Sign/Symptoms
            if isinstance(sign_symptoms, str):
                sign_symptoms_split = sign_symptoms.split(", ")
                for sign_symptom in sign_symptoms_split:
                    sign_symptom_node = Node("Sign or Symptom", name=sign_symptom)
                    graph_re.merge(sign_symptom_node, "Sign or Symptom", "name")
                    relationship = Relationship(sign_symptom_node, "MAY_CAUSE", disease_node)
                    graph_re.create(sign_symptom_node)
                    graph_re.create(relationship)
            
            # Create and connect nodes for Diagnostic Procedures
            if isinstance(diagnostics, str):
                diagnostics_split = diagnostics.split(", ")
                for diagnostic in diagnostics_split:
                    diagnostic_node = Node("Diagnostic Procedure", name=diagnostic)
                    graph_re.merge(diagnostic_node, "Diagnostic Procedure", "name")
                    relationship = Relationship(diagnostic_node, "DIAGNOSTICS", disease_node)
                    graph_re.create(diagnostic_node)
                    graph_re.create(relationship)
            
            # Create and connect nodes for Lab Results
            if isinstance(lab_results, str):
                lab_results_split = lab_results.split(", ")
                for lab_result in lab_results_split:
                    lab_result_node = Node("Laboratory or Test Result", name=lab_result)
                    graph_re.merge(lab_result_node, "Laboratory or Test Result", "name")
                    relationship = Relationship(lab_result_node, "TESTS", disease_node)
                    graph_re.create(lab_result_node)
                    graph_re.create(relationship)
            
            # Create and connect nodes for Pharmacologic Substance
            if isinstance(drugs, str):
                drugs_split = drugs.split(", ")
                for drug in drugs_split:
                    drug_node = Node("Pharmacologic Substance", name=drug)
                    graph_re.merge(drug_node, "Pharmacologic Substance", "name")
                    relationship = Relationship(drug_node, "TREATS", disease_node)
                    graph_re.create(drug_node)
                    graph_re.create(relationship)