In [26]:
from neo4j import GraphDatabase
import json
from collections import defaultdict
import pandas as pd

class Neo4jToJsonExporter:
    def __init__(self, uri, user, password):
        """
        Initializes the Neo4j connection.
        """
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        """
        Closes the Neo4j connection.
        """
        self.driver.close()

    def clean_properties(self, properties):
        """
        Removes any properties that have None or NaN values.
        """
        return {k: v for k, v in properties.items() if v is not None and str(v).lower() != "nan"}

    def fetch_nodes(self):
        """
        Retrieves all nodes and their properties from the Neo4j database.
        """
        query = "MATCH (n) RETURN labels(n) AS labels, properties(n) AS properties"
        node_counts = defaultdict(int)
        with self.driver.session() as session:
            result = session.run(query)
            nodes = []
            for record in result:
                labels = record["labels"]
                properties = self.clean_properties(record["properties"])
                nodes.append({"labels": labels, "properties": properties})
                for label in labels:
                    node_counts[label] += 1
        return nodes, node_counts

    def fetch_relationships(self):
        """
        Retrieves all relationships and their properties from the Neo4j database.
        """
        query = """
        MATCH (a)-[r]->(b) 
        RETURN labels(a) AS source_labels, type(r) AS rel_type, labels(b) AS target_labels, properties(r) AS properties
        """
        rel_counts = defaultdict(int)
        with self.driver.session() as session:
            result = session.run(query)
            relationships = []
            for record in result:
                source_labels = record["source_labels"]
                rel_type = record["rel_type"]
                target_labels = record["target_labels"]
                properties = self.clean_properties(record["properties"])

                # Construct key: "SourceType - RELATIONSHIP_TYPE -> TargetType"
                if source_labels and target_labels:
                    key = f"{source_labels[0]} - {rel_type} -> {target_labels[0]}"
                    rel_counts[key] += 1

                relationships.append({
                    "relationship": rel_type,
                    "properties": properties,
                    "source": {"labels": source_labels},
                    "target": {"labels": target_labels}
                })
        return relationships, rel_counts

    def save_to_json(self, filename="neo4j_export.json"):
        """
        Fetches all nodes and relationships and saves them as a JSON file.
        """
        nodes, _ = self.fetch_nodes()
        relationships, _ = self.fetch_relationships()

        data = {
            "nodes": nodes,
            "relationships": relationships
        }

        with open(filename, "w", encoding="utf-8") as f:
            json.dump(data, f, indent=4, ensure_ascii=False)

        print(f"Data successfully saved to {filename}")

    def generate_summary_statistics(self):
        """
        Generates and displays summary statistics for nodes and relationships.
        """
        _, node_counts = self.fetch_nodes()
        _, rel_counts = self.fetch_relationships()

        # Create summary tables
        node_summary = pd.DataFrame(node_counts.items(), columns=["Nodes", "Count"])
        relationship_summary = pd.DataFrame(rel_counts.items(), columns=["Relationship (Source - Type -> Target)", "Counts"])


        # Display summary tables
        print(node_summary)
        print(relationship_summary)



In [29]:
## CORAL Data
## Enter the Neo4j Database Details
coral_uri = "uri" 
coral_user = "user_name"
coral_password = "password"
exporter = Neo4jToJsonExporter(coral_uri, coral_user, coral_password)
exporter.save_to_json("CORAL_KG_Export.json")
# Generate and display summary statistics
exporter.generate_summary_statistics()
# Close the database connection
exporter.close()

Data successfully saved to CORAL_KG_Export.json
                  Nodes  Count
0               Patient     40
1             Admission     40
2               Symptom     71
3             Intensity      2
4             Frequency      4
5               History    299
6               Allergy      2
7         SocialHistory    182
8          FamilyMember     27
9  FamilyMedicalHistory     53
              Relationship (Source - Type -> Target)  Counts
0               Patient - HAS_ADMISSION -> Admission      40
1           Patient - HAS_MEDICAL_HISTORY -> History     347
2        Patient - HAS_FAMILY_MEMBER -> FamilyMember      89
3                 Admission - HAS_ALLERGY -> Allergy       4
4    Admission - HAS_SOCIAL_HISTORY -> SocialHistory     225
5                 Admission - HAS_SYMPTOM -> Symptom      25
6               Admission - HAS_NOSYMPTOM -> Symptom      60
7               Symptom - HAS_INTENSITY -> Intensity       3
8               Symptom - HAS_FREQUENCY -> Frequency       4
9

In [28]:
## CORAL Data
mimic_uri = "uri"
mimic_user = "user_name"
mimic_password = "password"

exporter = Neo4jToJsonExporter(mimic_uri, mimic_user, mimic_password)
exporter.save_to_json("MIMICIII_KG_Export.json")
exporter.generate_summary_statistics()

exporter.close()

Data successfully saved to MIMICIII_KG_Export.json
                   Nodes  Count
0                Patient     56
1              Admission     56
2                Symptom    240
3               Duration     41
4              Frequency      7
5              Intensity     19
6                History    364
7                  Vital    156
8                Allergy     51
9          SocialHistory    213
10          FamilyMember     20
11  FamilyMedicalHistory     46
               Relationship (Source - Type -> Target)  Counts
0                Patient - HAS_ADMISSION -> Admission      56
1            Patient - HAS_MEDICAL_HISTORY -> History     451
2         Patient - HAS_FAMILY_MEMBER -> FamilyMember      55
3                  Admission - HAS_SYMPTOM -> Symptom     309
4                  Admission - HAS_ALLERGY -> Allergy      61
5     Admission - HAS_SOCIAL_HISTORY -> SocialHistory     221
6                Admission - HAS_NOSYMPTOM -> Symptom      41
7                      Admission - HA