### This python code creates a graph for ICD-10 Codes
#### Parent nodes are 3 character ICD-10 parent-codes (e.g. A00)
#### Children nodes are parent-codes followed by decimal and digits (e.g. A00.12)
#### Graph is saved in a ".pkl" file

In [1]:
import pandas as pd
from neo4j import GraphDatabase
import os
import sys

In [3]:
class Graph:
    def __init__(self):
        self.adjacency_list = {}
        self.node_data = {}

    def add_node(self, node, **kwargs):
        if node not in self.adjacency_list:
            self.adjacency_list[node] = []
        if kwargs:
            self.node_data[node] = kwargs

    def get_node_data(self, node):
        return self.node_data.get(node, {})

    def add_edge(self, node1, node2):
        if node1 not in self.adjacency_list:
            self.add_node(node1)
        if node2 not in self.adjacency_list:
            self.add_node(node2)
        
        self.adjacency_list[node1].append(node2)
        self.adjacency_list[node2].append(node1) # Undirected graph

    def add_directed_edge(self, from_node, to_node):
        if from_node not in self.adjacency_list:
            self.add_node(from_node)
        if to_node not in self.adjacency_list:
            self.add_node(to_node)
        
        self.adjacency_list[from_node].append(to_node)

    def get_neighbors(self, node):
        return self.adjacency_list.get(node, [])

    def __str__(self):
        return str(self.adjacency_list)

    def save_to_file(self, filename):
        # Save the graph to a file using pickle
        import pickle
        with open(filename, 'wb') as f:
            pickle.dump({'adjacency_list': self.adjacency_list, 
                        'node_data': self.node_data}, f)
        print(f"Graph saved to {filename}")
    
    @staticmethod
    def load_from_file(filename):
        # Load a graph from a file
        import pickle
        with open(filename, 'rb') as f:
            data = pickle.load(f)
        
        graph = Graph()
        graph.adjacency_list = data['adjacency_list']
        graph.node_data = data['node_data']
        print(f"Graph loaded from {filename}")
        return graph



def import_icd_from_csv_pandas(graph, csv_file):
    df = pd.read_csv(csv_file)
    
    # Ensure 'code' column exists
    if 'Code' not in df.columns:
        raise ValueError("CSV must have a 'Code' column")
    
    # Fill NaN descriptions
    if 'Description' in df.columns:
        df['Description'] = df['Description'].fillna('')

    df['Clean_Description'] = df['Code'] + '\n' + df['Description']
    
    # First pass: Add all nodes with attributes
    for _, row in df.iterrows():
        code = str(row['Code'])
        description = str(row.get('Clean_Description', ''))
        graph.add_node(code, description=description)
        
    # Second pass: Build relationships
    codes = df['Code'].astype(str).tolist()
    for code in codes:
        if '.' in code:
            # It's a child node (has decimal)
            # Find the parent (first 3 characters)
            parent = code.split('.')[0]
            
            # Only add edge if parent exists in our list
            # 3-char codes are parents
            if len(parent) == 3:
                graph.add_directed_edge(parent, code)

In [None]:
#### USe icd_codes.csv to build hirarchical graphs

In [4]:
# ICD-10 Example
icd_graph = Graph()
import_icd_from_csv_pandas(icd_graph, 'icd_codes.csv')

print("ICD Graph Structure (Parents with Children):")
for node in icd_graph.adjacency_list:
    # Only print nodes that are parents (length 3) and have children
    if len(node) == 3:
        children = icd_graph.get_neighbors(node)
        if children:
            parent_desc = icd_graph.get_node_data(node).get('description', '')
            print(f"{node} ({parent_desc}) ->")
            for child in children:
                child_desc = icd_graph.get_node_data(child).get('description', '')
                print(f"  - {child} ({child_desc})")

ICD Graph Structure (Parents with Children):
A19 (A19
O98.01 ) ->
  - A19.0 (A19.0
Acute miliary tuberculosis of a single specified site )
  - A19.1 (A19.1
Acute miliary tuberculosis of multiple sites )
  - A19.2 (A19.2
Acute miliary tuberculosis, unspecified )
  - A19.8 (A19.8
Other miliary tuberculosis )
  - A19.9 (A19.9
Miliary tuberculosis, unspecified )
A28 (A28
Other zoonotic bacterial diseases, not elsewhere classified ) ->
  - A28.0 (A28.0
Pasteurellosis )
  - A28.1 (A28.1
Cat-scratch disease )
  - A28.2 (A28.2
Extraintestinal yersiniosis )
  - A28.8 (A28.8
Other specified zoonotic bacterial diseases, not elsewhere )
  - A28.9 (A28.9
Zoonotic bacterial disease, unspecified  )
A49 (A49
Bacterial infection of unspecified site ) ->
  - A49.0 (A49.0
Staphylococcal infection, unspecified site )
  - A49.1 (A49.1
Streptococcal infection, unspecified site )
  - A49.2 (A49.2
Hemophilus influenzae infection, unspecified site )
  - A49.3 (A49.3
Mycoplasma infection, unspecified site )
  -

In [5]:
# %%
# Save and Load Graph Example
print("\n--- Saving Graph ---")
icd_graph.save_to_file('icd_graph.pkl')

# To load the graph later:
loaded_graph = Graph.load_from_file('icd_graph.pkl')
print(f"Loaded graph has {len(loaded_graph.adjacency_list)} nodes")


--- Saving Graph ---
Graph saved to icd_graph.pkl
Graph loaded from icd_graph.pkl
Loaded graph has 23842 nodes
