This code will add a column of CCS category to your data, corresponding to the ICD codes and their type (ICD9 or ICD10).

In [None]:
import pandas as pd
import numpy as np
import sys, os
MAIN_DIR = os.path.expanduser('P:/xxx') # Your main directory
DATA_DIR = os.path.join(MAIN_DIR, 'crdm') # Your data directory
OUT_DIR  = os.path.join(MAIN_DIR, 'processed') # Your output directory
MAP_DIR  = os.path.expanduser('P:/xxx/ccs') # Your mapping file directory

In [None]:
print('Loading ICD Diagnoses')
filename = 'diagnoses.csv'
icd = pd.read_csv(os.path.join(DATA_DIR, filename),
    usecols=['PATID', 'DX_DATE', 'DX', 'DX_TYPE'], dtype=str)

print(icd.shape)
icd.head()

In [None]:
print('Finding CCS Categories for ICD Diagnoses')
import sys
sys.path.insert(0,MAP_DIR) # insert path of your ccs code

from importlib import reload
import ccs; reload(ccs); 
# Above lines are needed to refresh ccs if there are changes
from ccs import append_ccs_dx

In [None]:
# add a column of 'CCS CATEGORY' and 'CCS CATEGORY UP'
# 'CCS CATEGORY UP' maps ICD code before dot (i.e. upper level)
icd = append_ccs_dx(icd, multilevel=False) 
assert icd['CCS CATEGORY'].isna().sum() == 0 # There should be no NA in CCS CATEGORY

In [None]:
# replace CCS CATEGORY with CCS CATEGORY UP if it is 0
icd['CCS CATEGORY'] = np.where(icd['CCS CATEGORY'] == 0, icd['CCS CATEGORY UP'], icd['CCS CATEGORY'])

# See cases where no match was found
num_non_matches = (icd['CCS CATEGORY'] == 0).sum()

# Calculate the percentage of non-matches over all rows
total_rows = icd.shape[0]
percentage_non_matches = (num_non_matches / total_rows) * 100

# Print the desired output format
print(f'Number of non-matches = {num_non_matches} / {total_rows} ({percentage_non_matches:.2f}%)')
# see examples of non-matches
icd[icd['CCS CATEGORY'] == 0]

# About 1% missingness in my case, acceptable

In [None]:
# SAVE RESULTS
print('Saving Results')
frame = icd
frame[['PATID', 'CCS CATEGORY']].to_csv(OUT_DIR + '/' + filename, index=False)