In [1]:
import itertools as it

import networkx as nx
import numpy as np
import pandas as pd

In [2]:
copd = pd.read_csv("../data/clusters/copd_clustered.csv").dropna(subset=["cluster"])
copd["cluster"] = copd["cluster"].astype(int)

In [3]:
icd_ranges = {
    "infectious": ("A00", "B99"),
    "neoplasms": ("C00", "D48"),
    "blood": ("D50", "D89"),
    "endocrine": ("E00", "E90"),
    "mental": ("F00", "F99"),
    "nervous": ("G00", "G99"),
    "eye": ("H00", "H59"),
    "ear": ("H60", "H95"),
    "circulatory": ("I00", "I99"),
    "respiratory": ("J00", "J99"),
    "digestive": ("K00", "K93"),
    "skin": ("L00", "L99"),
    "muscoloskeletal": ("M00", "M99"),
    "genitourinary": ("N00", "N99"),
    #     "pregnancy": ("O00", "O99"),
    "perinatal": ("P00", "P99"),
    "congenital": ("Q00", "Q99"),
    "abnormal_findings": ("R00", "R99"),
    "injury": ("S00", "T98"),
    "external_causes": ("V01", "Y98"),
    "contact_factors": ("Z00", "Z99"),
    "special_use": ("U00", "U89"),
}

In [4]:
def get_weighted_adjacency(data, categories):

    adjacency = np.zeros((len(categories), len(categories)), dtype=int)
    adjacency = pd.DataFrame(adjacency, columns=categories, index=categories)

    for _, row in data[categories].iterrows():
        present = row[row > 0]
        for c1, c2 in it.product(present.index, repeat=2):
            if c1 != c2:
                adjacency.loc[c1, c2] += 1

    return adjacency

In [5]:
for cluster, data in copd.groupby("cluster"):

    adjacency = get_weighted_adjacency(data, list(icd_ranges.keys()))
    G = nx.from_pandas_adjacency(adjacency)
    nx.write_gml(G, f"../data/categories/{cluster}.gml")

Use Gephi to create the images. Colours are in order:

- ``#0072B2``
- ``#009E73``
- ``#D55E00``
- ``#CC79A7``