In [1]:
import pandas as pd
from neo4j import GraphDatabase
import os
import sys

In [2]:
class Graph:
    def __init__(self):
        self.adjacency_list = {}
        self.node_data = {}

    def add_node(self, node, **kwargs):
        if node not in self.adjacency_list:
            self.adjacency_list[node] = []
        if kwargs:
            self.node_data[node] = kwargs

    def get_node_data(self, node):
        return self.node_data.get(node, {})

    def add_edge(self, node1, node2):
        if node1 not in self.adjacency_list:
            self.add_node(node1)
        if node2 not in self.adjacency_list:
            self.add_node(node2)
        
        self.adjacency_list[node1].append(node2)
        self.adjacency_list[node2].append(node1) # Undirected graph

    def add_directed_edge(self, from_node, to_node):
        if from_node not in self.adjacency_list:
            self.add_node(from_node)
        if to_node not in self.adjacency_list:
            self.add_node(to_node)
        
        self.adjacency_list[from_node].append(to_node)

    def get_neighbors(self, node):
        return self.adjacency_list.get(node, [])

    def __str__(self):
        return str(self.adjacency_list)

    def save_to_file(self, filename):
        # Save the graph to a file using pickle
        import pickle
        with open(filename, 'wb') as f:
            pickle.dump({'adjacency_list': self.adjacency_list, 
                        'node_data': self.node_data}, f)
        print(f"Graph saved to {filename}")
    
    @staticmethod
    def load_from_file(filename):
        # Load a graph from a file
        import pickle
        with open(filename, 'rb') as f:
            data = pickle.load(f)
        
        graph = Graph()
        graph.adjacency_list = data['adjacency_list']
        graph.node_data = data['node_data']
        print(f"Graph loaded from {filename}")
        return graph

In [3]:
# To load the graph later:
icd_graph = Graph.load_from_file('icd_graph.pkl')
print(f"Loaded graph has {len(icd_graph.adjacency_list)} nodes")

Graph loaded from icd_graph.pkl
Loaded graph has 23842 nodes


In [4]:
# %%
# Neo4j Integration
#Configure your Neo4j connection
NEO4J_URI = "bolt://localhost:7687"  # or "neo4j://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "icd_2025"


In [5]:
class Neo4jConnector:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
    
    def close(self):
        self.driver.close()
    
    def clear_database(self):
        """Clear all nodes and relationships"""
        with self.driver.session() as session:
            session.run("MATCH (n) DETACH DELETE n")
            print("Database cleared")
    
    def import_graph(self, graph):
        """Import graph into Neo4j"""
        with self.driver.session() as session:
            # Create nodes with properties
            for node in graph.adjacency_list.keys():
                node_data = graph.get_node_data(node)
                description = node_data.get('description', '')
                
                # Determine if it's a parent (3 chars) or child (has decimal)
                node_type = "Parent" if len(node) == 3 else "Child"
                
                session.run(
                    """
                    MERGE (n:ICDCode {code: $code})
                    SET n.description = $description,
                        n.type = $type
                    """,
                    code=node,
                    description=description,
                    type=node_type
                )
            
            # Create relationships
            for parent, children in graph.adjacency_list.items():
                for child in children:
                    session.run(
                        """
                        MATCH (p:ICDCode {code: $parent})
                        MATCH (c:ICDCode {code: $child})
                        MERGE (p)-[:HAS_CHILD]->(c)
                        """,
                        parent=parent,
                        child=child
                    )
            
            print(f"Imported {len(graph.adjacency_list)} nodes to Neo4j")
    
    def query_children(self, parent_code):
        """Query all children of a parent code"""
        with self.driver.session() as session:
            result = session.run(
                """
                MATCH (p:ICDCode {code: $code})-[:HAS_CHILD]->(c:ICDCode)
                RETURN c.code as code, c.description as description
                ORDER BY c.code
                """,
                code=parent_code
            )
            return [(record["code"], record["description"]) for record in result]

In [6]:
# Connect to Neo4j
neo4j_conn = Neo4jConnector(NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD)

try:
    # Optional: Clear existing data
    neo4j_conn.clear_database()
    
    # Import the graph
    neo4j_conn.import_graph(icd_graph)
    
    # Query example
    print("\\nQuerying children of A00:")
    children = neo4j_conn.query_children("A00")
    for code, desc in children:
        print(f"  {code}: {desc}")
        
finally:
    neo4j_conn.close()


print("\n--- Neo4j Integration Ready ---")
print("To use Neo4j:")
print("1. Install Neo4j Desktop or use Neo4j Aura (cloud)")
print("2. Start your Neo4j database")
print("3. Update the connection credentials in the code above")
print("4. Clear existing data if needed.")
print("5. Run the code to import your graph")




Database cleared
Imported 23842 nodes to Neo4j
\nQuerying children of A00:
  A00.0: A00.0
Cholera due to Vibrio cholerae 01, biovar cholerae 
  A00.1: A00.1
Cholera due to Vibrio cholerae 01, biovar eltor 
  A00.9: A00.9
Cholera, unspecified 

--- Neo4j Integration Ready ---
To use Neo4j:
1. Install Neo4j Desktop or use Neo4j Aura (cloud)
2. Start your Neo4j database
3. Update the connection credentials in the code above
4. Clear existing data if needed.
5. Run the code to import your graph
