### Introduction

This notebook navigates Clinical Classifications Software (CCS) for ICD-9-CM. CCS abstracts the ICD codes into two different representations:

1. Flat grouping: Groups (or categries) are introduced to contain disjoint sets of ICD codes. These groups are assumed to be clinically homogeneous as much as possible.
2. Hierarchcical grouping: In this abstraction, codes are organized in a hierarchy of groups. Parents have more abstract meanings, whearas children have more detailed and specific meanings.




In [12]:
# conda activate mimic3-snonet
import pandas as pd

In [13]:
project_dir = '/home/asem/GP/MIMIC-SNONET'
data_dir = '/home/asem/GP/ehr-data/mimic3-v1.4/physionet.org/files/mimiciii/1.4'

D_ICD_DIAGNOSES = pd.read_csv(f'{data_dir}/D_ICD_DIAGNOSES.csv.gz')

In [14]:
D_ICD_DIAGNOSES.head()

Unnamed: 0,ROW_ID,ICD9_CODE,SHORT_TITLE,LONG_TITLE
0,174,1166,TB pneumonia-oth test,"Tuberculous pneumonia [any form], tubercle bac..."
1,175,1170,TB pneumothorax-unspec,"Tuberculous pneumothorax, unspecified"
2,176,1171,TB pneumothorax-no exam,"Tuberculous pneumothorax, bacteriological or h..."
3,177,1172,TB pneumothorx-exam unkn,"Tuberculous pneumothorax, bacteriological or h..."
4,178,1173,TB pneumothorax-micro dx,"Tuberculous pneumothorax, tubercle bacilli fou..."


In [53]:
CCS_DIAGNOSES_SINGLE = pd.read_csv(f'{project_dir}/CCS/$dxref 2015.csv', skiprows=1)
CCS_PROCEDURES_SINGLE = pd.read_csv(f'{project_dir}/CCS/$prref 2015.csv', skiprows=1)
CCS_DIAGNOSES_MULTI = pd.read_csv(f'{project_dir}/CCS/ccs_multi_dx_tool_2015.csv')
CCS_PROCEDURES_MULTI = pd.read_csv(f'{project_dir}/CCS/ccs_multi_pr_tool_2015.csv')

In [54]:
CCS_DIAGNOSES_SINGLE.head()

Unnamed: 0,'ICD-9-CM CODE','CCS CATEGORY','CCS CATEGORY DESCRIPTION','ICD-9-CM CODE DESCRIPTION','OPTIONAL CCS CATEGORY','OPTIONAL CCS CATEGORY DESCRIPTION'
0,' ','0 ','No DX',INVALID CODES IN USER DATA,' ',' '
1,'01000','1 ','Tuberculosis',PRIM TB COMPLEX-UNSPEC,' ',' '
2,'01001','1 ','Tuberculosis',PRIM TB COMPLEX-NO EXAM,' ',' '
3,'01002','1 ','Tuberculosis',PRIM TB COMPLEX-EXM UNKN,' ',' '
4,'01003','1 ','Tuberculosis',PRIM TB COMPLEX-MICRO DX,' ',' '


In [55]:
CCS_PROCEDURES_SINGLE.head()

Unnamed: 0,'ICD-9-CM CODE','CCS CATEGORY','CCS CATEGORY DESCRIPTION','ICD-9-CM CODE DESCRIPTION'
0,' ','0 ','No PROC ',INVALID CODES IN USER DATA
1,'0101 ','1 ','Inc/exc CNS ',CISTERNAL PUNCTURE
2,'0109 ','1 ','Inc/exc CNS ',CRANIAL PUNCTURE NEC
3,'0121 ','1 ','Inc/exc CNS ',CRANIAL SINUS I & D
4,'0122 ','1 ','Inc/exc CNS ',REMOV INTRACRAN STIMULAT


In [56]:
CCS_DIAGNOSES_SINGLE.shape

(15073, 6)

In [57]:
diag_ccs_cat_list = CCS_DIAGNOSES_SINGLE['\'CCS CATEGORY\''].apply(lambda cat: cat.strip('\'').strip()).tolist()
diag_ccs_icd_list = CCS_DIAGNOSES_SINGLE['\'ICD-9-CM CODE\''].apply(lambda c: c.strip('\'').strip()).tolist()

proc_ccs_cat_list = CCS_PROCEDURES_SINGLE['\'CCS CATEGORY\''].apply(lambda cat: cat.strip('\'').strip()).tolist()
proc_ccs_icd_list = CCS_PROCEDURES_SINGLE['\'ICD-9-CM CODE\''].apply(lambda c: c.strip('\'').strip()).tolist()

In [94]:
len(proc_ccs_icd_list)

3949

In [58]:
from collections import defaultdict

diag_icd_to_ccs_dict = dict(zip(diag_ccs_icd_list, diag_ccs_cat_list))
proc_icd_to_css_dict = dict(zip(proc_ccs_icd_list, proc_ccs_cat_list))

diag_ccs_to_icd = defaultdict(list)
for code, cat in zip(diag_ccs_icd_list, diag_ccs_cat_list):
    diag_ccs_to_icd[cat].append(code)
    
proc_ccs_to_icd = defaultdict(list)
for code, cat in zip(proc_ccs_icd_list, diag_ccs_cat_list):
    proc_ccs_to_icd[cat].append(code)

In [59]:
len(proc_ccs_to_icd)

81

In [89]:
len(diag_ccs_to_icd)

284

In [60]:
CCS_DIAGNOSES_MULTI.head()


Unnamed: 0,'ICD-9-CM CODE','CCS LVL 1','CCS LVL 1 LABEL','CCS LVL 2','CCS LVL 2 LABEL','CCS LVL 3','CCS LVL 3 LABEL','CCS LVL 4','CCS LVL 4 LABEL'
0,'01000','1',Infectious and parasitic diseases,'1.1',Bacterial infection,'1.1.1',Tuberculosis [1.],' ',
1,'01001','1',Infectious and parasitic diseases,'1.1',Bacterial infection,'1.1.1',Tuberculosis [1.],' ',
2,'01002','1',Infectious and parasitic diseases,'1.1',Bacterial infection,'1.1.1',Tuberculosis [1.],' ',
3,'01003','1',Infectious and parasitic diseases,'1.1',Bacterial infection,'1.1.1',Tuberculosis [1.],' ',
4,'01004','1',Infectious and parasitic diseases,'1.1',Bacterial infection,'1.1.1',Tuberculosis [1.],' ',


In [61]:
CCS_PROCEDURES_MULTI.head()

Unnamed: 0,'ICD-9-CM CODE','CCS LVL 1','CCS LVL 1 LABEL','CCS LVL 2','CCS LVL 2 LABEL','CCS LVL 3','CCS LVL 3 LABEL'
0,'0121','1',Operations on the nervous system,'1.1',Incision and excision of CNS [1.],'1.1.1',Craniotomy and craniectomy
1,'0122','1',Operations on the nervous system,'1.1',Incision and excision of CNS [1.],'1.1.1',Craniotomy and craniectomy
2,'0123','1',Operations on the nervous system,'1.1',Incision and excision of CNS [1.],'1.1.1',Craniotomy and craniectomy
3,'0124','1',Operations on the nervous system,'1.1',Incision and excision of CNS [1.],'1.1.1',Craniotomy and craniectomy
4,'0125','1',Operations on the nervous system,'1.1',Incision and excision of CNS [1.],'1.1.1',Craniotomy and craniectomy


In [80]:
CCS_DIAGNOSES_MULTI['I1'] = CCS_DIAGNOSES_MULTI['\'CCS LVL 1\''].apply(lambda l: l.strip('\'').strip())
CCS_DIAGNOSES_MULTI['I2'] = CCS_DIAGNOSES_MULTI['\'CCS LVL 2\''].apply(lambda l: l.strip('\'').strip())
CCS_DIAGNOSES_MULTI['I3'] = CCS_DIAGNOSES_MULTI['\'CCS LVL 3\''].apply(lambda l: l.strip('\'').strip())
CCS_DIAGNOSES_MULTI['I4'] = CCS_DIAGNOSES_MULTI['\'CCS LVL 4\''].apply(lambda l: l.strip('\'').strip())
CCS_DIAGNOSES_MULTI['L1'] = CCS_DIAGNOSES_MULTI['\'CCS LVL 1 LABEL\''].apply(lambda l: l.strip('\'').strip())
CCS_DIAGNOSES_MULTI['L2'] = CCS_DIAGNOSES_MULTI['\'CCS LVL 2 LABEL\''].apply(lambda l: l.strip('\'').strip())
CCS_DIAGNOSES_MULTI['L3'] = CCS_DIAGNOSES_MULTI['\'CCS LVL 3 LABEL\''].apply(lambda l: l.strip('\'').strip())
CCS_DIAGNOSES_MULTI['L4'] = CCS_DIAGNOSES_MULTI['\'CCS LVL 4 LABEL\''].apply(lambda l: l.strip('\'').strip())
CCS_DIAGNOSES_MULTI['ICD'] = CCS_DIAGNOSES_MULTI['\'ICD-9-CM CODE\''].apply(lambda l: l.strip('\'').strip())


CCS_PROCEDURES_MULTI['I1'] = CCS_PROCEDURES_MULTI['\'CCS LVL 1\''].apply(lambda l: l.strip('\'').strip())
CCS_PROCEDURES_MULTI['I2'] = CCS_PROCEDURES_MULTI['\'CCS LVL 2\''].apply(lambda l: l.strip('\'').strip())
CCS_PROCEDURES_MULTI['I3'] = CCS_PROCEDURES_MULTI['\'CCS LVL 3\''].apply(lambda l: l.strip('\'').strip())
CCS_PROCEDURES_MULTI['L1'] = CCS_PROCEDURES_MULTI['\'CCS LVL 1 LABEL\''].apply(lambda l: l.strip('\'').strip())
CCS_PROCEDURES_MULTI['L2'] = CCS_PROCEDURES_MULTI['\'CCS LVL 2 LABEL\''].apply(lambda l: l.strip('\'').strip())
CCS_PROCEDURES_MULTI['L3'] = CCS_PROCEDURES_MULTI['\'CCS LVL 3 LABEL\''].apply(lambda l: l.strip('\'').strip())
CCS_PROCEDURES_MULTI['ICD'] = CCS_PROCEDURES_MULTI['\'ICD-9-CM CODE\''].apply(lambda l: l.strip('\'').strip())


In [81]:
CCS_DIAGNOSES_MULTI.head()

Unnamed: 0,'ICD-9-CM CODE','CCS LVL 1','CCS LVL 1 LABEL','CCS LVL 2','CCS LVL 2 LABEL','CCS LVL 3','CCS LVL 3 LABEL','CCS LVL 4','CCS LVL 4 LABEL',L1,...,L4,ICD,L1l,L2l,L3l,L4l,I1,I2,I3,I4
0,'01000','1',Infectious and parasitic diseases,'1.1',Bacterial infection,'1.1.1',Tuberculosis [1.],' ',,Infectious and parasitic diseases,...,,1000,Infectious and parasitic diseases,Bacterial infection,Tuberculosis [1.],,1,1.1,1.1.1,
1,'01001','1',Infectious and parasitic diseases,'1.1',Bacterial infection,'1.1.1',Tuberculosis [1.],' ',,Infectious and parasitic diseases,...,,1001,Infectious and parasitic diseases,Bacterial infection,Tuberculosis [1.],,1,1.1,1.1.1,
2,'01002','1',Infectious and parasitic diseases,'1.1',Bacterial infection,'1.1.1',Tuberculosis [1.],' ',,Infectious and parasitic diseases,...,,1002,Infectious and parasitic diseases,Bacterial infection,Tuberculosis [1.],,1,1.1,1.1.1,
3,'01003','1',Infectious and parasitic diseases,'1.1',Bacterial infection,'1.1.1',Tuberculosis [1.],' ',,Infectious and parasitic diseases,...,,1003,Infectious and parasitic diseases,Bacterial infection,Tuberculosis [1.],,1,1.1,1.1.1,
4,'01004','1',Infectious and parasitic diseases,'1.1',Bacterial infection,'1.1.1',Tuberculosis [1.],' ',,Infectious and parasitic diseases,...,,1004,Infectious and parasitic diseases,Bacterial infection,Tuberculosis [1.],,1,1.1,1.1.1,


In [82]:
CCS_PROCEDURES_MULTI.head()

Unnamed: 0,'ICD-9-CM CODE','CCS LVL 1','CCS LVL 1 LABEL','CCS LVL 2','CCS LVL 2 LABEL','CCS LVL 3','CCS LVL 3 LABEL',L1,L2,L3,ICD,L1l,L2l,L3l,I1,I2,I3
0,'0121','1',Operations on the nervous system,'1.1',Incision and excision of CNS [1.],'1.1.1',Craniotomy and craniectomy,Operations on the nervous system,Incision and excision of CNS [1.],Craniotomy and craniectomy,121,Operations on the nervous system,Incision and excision of CNS [1.],Craniotomy and craniectomy,1,1.1,1.1.1
1,'0122','1',Operations on the nervous system,'1.1',Incision and excision of CNS [1.],'1.1.1',Craniotomy and craniectomy,Operations on the nervous system,Incision and excision of CNS [1.],Craniotomy and craniectomy,122,Operations on the nervous system,Incision and excision of CNS [1.],Craniotomy and craniectomy,1,1.1,1.1.1
2,'0123','1',Operations on the nervous system,'1.1',Incision and excision of CNS [1.],'1.1.1',Craniotomy and craniectomy,Operations on the nervous system,Incision and excision of CNS [1.],Craniotomy and craniectomy,123,Operations on the nervous system,Incision and excision of CNS [1.],Craniotomy and craniectomy,1,1.1,1.1.1
3,'0124','1',Operations on the nervous system,'1.1',Incision and excision of CNS [1.],'1.1.1',Craniotomy and craniectomy,Operations on the nervous system,Incision and excision of CNS [1.],Craniotomy and craniectomy,124,Operations on the nervous system,Incision and excision of CNS [1.],Craniotomy and craniectomy,1,1.1,1.1.1
4,'0125','1',Operations on the nervous system,'1.1',Incision and excision of CNS [1.],'1.1.1',Craniotomy and craniectomy,Operations on the nervous system,Incision and excision of CNS [1.],Craniotomy and craniectomy,125,Operations on the nervous system,Incision and excision of CNS [1.],Craniotomy and craniectomy,1,1.1,1.1.1


In [85]:
diag_ccs_multi = {}
diag_ccs_icd2ccs_multi = {}
diag_ccs_ccs2icd_multi = defaultdict(list)

for row in CCS_DIAGNOSES_MULTI.itertuples():
    code = row.ICD
    l1, l2, l3, l4 = row.L1, row.L2, row.L3, row.L4
    i1, i2, i3, i4 = row.I1, row.I2, row.I3, row.I4
    
    if i1 not in diag_ccs_multi:
        diag_ccs_multi[i1] = {'index': i1,
                              'label': l1,
                              'codes': []}
    
    last_node = diag_ccs_multi[i1]
    
    if i2:
        if i2 not in diag_ccs_multi[i1]:
            diag_ccs_multi[i1][i2] = {'index': i2,
                                      'label': l2,
                                      'codes': []}
        last_node = diag_ccs_multi[i1][i2]

    
    if i3:
        if i3 not in diag_ccs_multi[i1][i2]:
            diag_ccs_multi[i1][i2][i3] = {'index': i3,
                                          'label': l3,
                                          'codes': []}
        last_node = diag_ccs_multi[i1][i2][i3]
    if i4:
        if i4 not in diag_ccs_multi[i1][i2][i3]:
            diag_ccs_multi[i1][i2][i3][i4] = {'index': i4,
                                              'label': l4,
                                              'codes': []}
            
        last_node = diag_ccs_multi[i1][i2][i3][i4]
        
    diag_ccs_icd2ccs_multi[code] = last_node['index']
    diag_ccs_ccs2icd_multi[last_node['index']].append(code)
    last_node['codes'].append(code)

In [84]:
diag_ccs_multi

{'1': {'index': '1',
  'label': 'Infectious and parasitic diseases',
  'codes': [],
  '1.1': {'index': '1.1',
   'label': 'Bacterial infection',
   'codes': [],
   '1.1.1': {'index': '1.1.1',
    'label': 'Tuberculosis [1.]',
    'codes': ['01000',
     '01001',
     '01002',
     '01003',
     '01004',
     '01005',
     '01006',
     '01010',
     '01011',
     '01012',
     '01013',
     '01014',
     '01015',
     '01016',
     '01080',
     '01081',
     '01082',
     '01083',
     '01084',
     '01085',
     '01086',
     '01090',
     '01091',
     '01092',
     '01093',
     '01094',
     '01095',
     '01096',
     '01100',
     '01101',
     '01102',
     '01103',
     '01104',
     '01105',
     '01106',
     '01110',
     '01111',
     '01112',
     '01113',
     '01114',
     '01115',
     '01116',
     '01120',
     '01121',
     '01122',
     '01123',
     '01124',
     '01125',
     '01126',
     '01130',
     '01131',
     '01132',
     '01133',
     '01134',
     '011

In [87]:
len(diag_ccs_icd2ccs_multi)

15072

In [88]:
len(diag_ccs_ccs2icd_multi)

589

In [91]:
proc_ccs_multi = {}
proc_ccs_icd2ccs_multi = {}
proc_ccs_ccs2icd_multi = defaultdict(list)

for row in CCS_PROCEDURES_MULTI.itertuples():
    code = row.ICD
    l1, l2, l3 = row.L1, row.L2, row.L3
    i1, i2, i3 = row.I1, row.I2, row.I3
    
    if i1 not in proc_ccs_multi:
        proc_ccs_multi[i1] = {'index': i1,
                              'label': l1,
                              'codes': []}
    
    last_node = proc_ccs_multi[i1]
    
    if i2:
        if i2 not in proc_ccs_multi[i1]:
            proc_ccs_multi[i1][i2] = {'index': i2,
                                      'label': l2,
                                      'codes': []}
        last_node = proc_ccs_multi[i1][i2]

    
    if i3:
        if i3 not in proc_ccs_multi[i1][i2]:
            proc_ccs_multi[i1][i2][i3] = {'index': i3,
                                          'label': l3,
                                          'codes': []}
        last_node = proc_ccs_multi[i1][i2][i3]

        
    proc_ccs_icd2ccs_multi[code] = last_node['index']
    proc_ccs_ccs2icd_multi[last_node['index']].append(code)
    last_node['codes'].append(code)