In [4]:
hcc_mappings = {}

# ICD9 Codes
icd9_mappings = [line.split(',') for line in open('code_mappings/icd9_mappings.csv').readlines()]

for icd9_mapping in icd9_mappings[1:]:
    icd9_code = icd9_mapping[0].strip()         # DIAGNOSIS CODE
    desc = icd9_mapping[1].strip()              # SHORT DESCRIPTION
    hcc_category_2013 = icd9_mapping[2].strip() # 2013 CMS-HCC Model Category
    hcc_category_pace = icd9_mapping[3].strip() # CMS-HCC PACE/ESRD Model Category
    hcc_category_2014 = icd9_mapping[4].strip() # 2014 CMS-HCC Model Category
    hcc_category_rx = icd9_mapping[5].strip()   # RxHCC Model Category
    hcc_model_2013 = icd9_mapping[6].strip()    # 2013 CMS-HCC Model for 2014 Payment Year
    hcc_model_pace = icd9_mapping[7].strip()    # CMS-HCC PACE/ESRD Model for 2014 Payment Year
    hcc_model_2014 = icd9_mapping[8].strip()    # 2014 CMS-HCC Model for 2014 Payment Year
    hcc_model_rx = icd9_mapping[9].strip()      # RxHCC Model for 2014 Payment Year

    hcc_keys = []
    if hcc_model_2013 == 'Yes':
        hcc_keys.append('V12-' + hcc_category_2013)
    if hcc_model_2014 == 'Yes':
        hcc_keys.append('V22-' + hcc_category_2014)
        
    for key in hcc_keys:
        mapping = hcc_mappings[key] if key in hcc_mappings else {'icd9s':set(),'icd10s':set()}
        mapping['icd9s'].add(icd9_code)
        hcc_mappings[key] = mapping

# ICD10 Codes
icd10_mappings = [line.split(',') for line in open('code_mappings/icd10_mappings.csv').readlines()]

for icd10_mapping in icd10_mappings[1:]:
    icd10_code = icd10_mapping[0].strip()         # Diagnosis Code
    desc = icd10_mapping[1].strip()               # Description
    hcc_category_v21 = icd10_mapping[2].strip()   # CMS-HCC PACE/ESRD Model Category V21
    hcc_category_v22 = icd10_mapping[3].strip()   # CMS-HCC Model Category V22
    hcc_category_v05 = icd10_mapping[4].strip()   # RxHCC Model Category V05 (clinically revised model implemented in 2016)
    hcc_model_v21 = icd10_mapping[5].strip()      # CMS-HCC PACE/ESRD Model for 2016 Payment Year
    hcc_model_v22 = icd10_mapping[6].strip()      # CMS-HCC Model for 2016 Payment Year
    hcc_model_v05 = icd10_mapping[7].strip()      # RxHCC Model for 2016 Payment Year

    hcc_keys = []
    if hcc_model_v22 == 'Yes':
        hcc_keys.append('V22-' + hcc_category_v22)
        
    for key in hcc_keys:
        mapping = hcc_mappings[key] if key in hcc_mappings else {'icd9s':set(),'icd10s':set()}
        mapping['icd10s'].add(icd10_code)
        hcc_mappings[key] = mapping

In [56]:
import re

# ICD9 to ICD10 GEM
i9gems = [list(re.findall('(\S+)\s+(\S+)\s+(\S+)', line)[0]) for line in open('code_mappings/2016_I9gem.txt').readlines()]

icd9_to_icd10_mapping = {}
for i9gem in i9gems:
    icd9_code = i9gem[0]
    icd10_code = i9gem[1]
    flags = i9gem[2]
    
    approximate = flags[0] == '1'
    no_map = flags[1] == '1'
    combination = flags[2] == '1'
    scenario = flags[3]
    choice_list = flags[4]
    
    mapping = icd9_to_icd10_mapping[icd9_code] if icd9_code in icd9_to_icd10_mapping else []
    icd9_to_icd10 = {'icd10':icd10_code,
                     'approximate':approximate,
                     'no_map':no_map,
                     'combination':combination,
                     'scenario':scenario,
                     'choice_list':choice_list}
    mapping.append(icd9_to_icd10)
    icd9_to_icd10_mapping[icd9_code] = mapping


In [100]:
import re

# ICD9 to ICD10 GEM
i10gems = [list(re.findall('(\S+)\s+(\S+)\s+(\S+)', line)[0]) for line in open('code_mappings/2016_I10gem.txt').readlines()]

icd10_to_icd9_mapping = {}
for i10gem in i10gems:
    icd10_code = i10gem[0]
    icd9_code = i10gem[1]
    flags = i10gem[2]
    
    approximate = flags[0] == '1'
    no_map = flags[1] == '1'
    combination = flags[2] == '1'
    scenario = flags[3]
    choice_list = flags[4]
    
    mapping = icd10_to_icd9_mapping[icd10_code] if icd10_code in icd10_to_icd9_mapping else []
    icd10_to_icd9 = {'icd9':icd9_code,
                     'approximate':approximate,
                     'no_map':no_map,
                     'combination':combination,
                     'scenario':scenario,
                     'choice_list':choice_list}
    mapping.append(icd10_to_icd9)
    icd10_to_icd9_mapping[icd10_code] = mapping


In [102]:
icd10_stats = {'exact':{},
              'no_map':set(),
              'simple':{},
              'combination':{},
              'scenario':0,
              'choice_list':0}

for icd10 in all_icd10s:
    icd9s_from_icd10 = icd10_to_icd9_mapping[icd10]
    simple = set()
    for icd9_from_icd10 in icd9s_from_icd10:
        # if this is not approximate, then we have an exact match!
        if not icd9_from_icd10['approximate']:
            if len(icd9s_from_icd10) == 1:
                icd10_stats['exact'][icd10] = icd9_from_icd10['icd9']
            else:
                print 'then how did we get multiple icd9s!'
            #counts['approximate'] += 1
        elif icd9_from_icd10['no_map']:
            icd10_stats['no_map'].add(icd10)
        else:    
            if icd9_from_icd10['combination']:
                combinations = icd10_stats['combination'][icd10] if icd10 in icd10_stats['combination'] else []
                combinations.append(icd9_from_icd10)
                icd10_stats['combination'][icd10] = combinations
            else:
                simple.add(icd9_from_icd10['icd9'])
            #    counts['combination'] += 1
            #if icd10_from_icd9['scenario']:
            #    counts['scenario'] += 1
            #if icd10_from_icd9['choice_list']:
            #    counts['choice_list'] += 1
    if len(simple) == 1:
        icd10_stats['exact'][icd10] = next(iter(simple))
    elif len(simple) > 1:
        icd10_stats['simple'][icd10] = simple

# 7277 total 
# 6267 exact mappings
# 256 simple mappings
# 825 combination
# 1 no map

print len(icd10_stats['exact'])
print len(icd10_stats['simple'])
print(len(icd10_stats['combination']))
print(len(icd10_stats['no_map']))
print(len(all_icd10s))
#pprint(icd10_stats['combination'])

then how did we get multiple icd9s!
6267
256
825
1
7277


In [None]:
from pprint import pprint
icd9_to_icd10_same_hcc = {}
for hcc in hcc_mappings:
    icd9s = hcc_mappings[hcc]['icd9s']
    icd10s =hcc_mappings[hcc]['icd10s']
    icd10s_from_icd9s = set()
    for icd9 in icd9s:
        icd10s_from_icd9 = icd9_to_icd10_mapping[icd9]
        for icd10_from_icd9 in icd10s_from_icd9:
            icd10_code = icd10_from_icd9['icd10']
            icd10s_from_icd9s.add(icd10_code)
        
    for icd10 in icd10s_from_icd9s:
        if icd10 not in icd10s:
            print icd10 + ' not in original set'
        else:
            print icd10 + ' in original set'
    break

In [62]:
all_icd9s = set()
all_icd10s = set()
for hcc in hcc_mappings:
    for icd9 in hcc_mappings[hcc]['icd9s']:
        all_icd9s.add(icd9)
    for icd10 in hcc_mappings[hcc]['icd10s']:
        all_icd10s.add(icd10)
print len(all_icd9s)
print len(all_icd10s)

3161
7277


In [104]:
icd9_stats = {'exact':{},
              'no_map':set(),
              'simple':{},
              'combination':{},
              'scenario':0,
              'choice_list':0}

for icd9 in all_icd9s:
    icd10s_from_icd9 = icd9_to_icd10_mapping[icd9]
    simple = set()
    for icd10_from_icd9 in icd10s_from_icd9:
        # if this is not approximate, then we have an exact match!
        if not icd10_from_icd9['approximate']:
            if len(icd10s_from_icd9) == 1:
                icd9_stats['exact'][icd9] = icd10_from_icd9['icd10']
            else:
                print 'then how did we get multiple icd10s!'
            #counts['approximate'] += 1
        elif icd10_from_icd9['no_map']:
            icd9_stats['no_map'].add(icd9)
        else:    
            if icd10_from_icd9['combination']:
                combinations = icd9_stats['combination'][icd9] if icd9 in icd9_stats['combination'] else []
                combinations.append(icd10_from_icd9)
                icd9_stats['combination'][icd9] = combinations
            else:
                simple.add(icd10_from_icd9['icd10'])
            #    counts['combination'] += 1
            #if icd10_from_icd9['scenario']:
            #    counts['scenario'] += 1
            #if icd10_from_icd9['choice_list']:
            #    counts['choice_list'] += 1
    if len(simple) == 1:
        icd9_stats['exact'][icd9] = next(iter(simple))
    elif len(simple) > 1:
        icd9_stats['simple'][icd9] = simple

# 3161 total ICD9 codes
# 2176 exact mappings
# 519 simple mappings
# 440 cluster mappings
# 27 no mappings
print(len(all_icd9s))
print len(icd9_stats['exact'])
print len(icd9_stats['simple'])
print(len(icd9_stats['combination']))
print(len(icd9_stats['no_map']))



2176
519
440
27
3161


In [105]:
#print icd9_stats['simple']['24951']
#print icd9_stats['combination']['24951']
#print icd9_stats['combination']['']
#for icd9 in icd9_stats['simple']:
#    if icd9 in icd9_stats['combination']:
#        print icd9
# Only code 24951 has both simple mappings and combination mappings
# simple mappings (once checked for reversal), could potentially be enabled via
# row duplication on the icd9 code

In [None]:
# Analysis:
# Which ICD codes are a 1-1 match
# Which ICD codes exist on ICD 9 and not 10
# Which ICD codes exist on ICD 10 and not 9
# Which ICD9s map to multiple 10s
# Which ICD10s map to multiple 9s
# Which set of ICD10s and ICD9s that have many to many mapping