In [1]:
# Libraries
import pandas as pd
pd.set_option('display.max_columns', 40)
pd.set_option('display.width', 2000)

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Import

In [3]:
# Import UMLS CUI to snomed map
iter_csv = pd.read_csv(r"data/MRCONSO.RRF", sep="|" , iterator=True, chunksize=10000000)
mrconso_df = pd.concat([chunk for chunk in iter_csv])
mrconso_df.columns = ['cui_code', 'language', 'term_status', 'LUI', 'string_type', 'SUI', 'atom_status', 'AUI', 'SAUI', 'SCUI', 'SDUI', 'SAB', 'TTY', 'code', 'STR', 'SRL', 'suppress', 'CVF', 'extra']
print('Done!')

  mrconso_df = pd.concat([chunk for chunk in iter_csv])
  mrconso_df = pd.concat([chunk for chunk in iter_csv])


Done!


In [4]:
# Import charlson index (downloaded online)
path = r'data/charlson_index_2.csv'
charlson_index = pd.read_csv(path)

In [5]:
charlson_index

Unnamed: 0,Comorbid Condition,ICD-9-CMDiagnosisCodes,ICD-10-CADiagnosisCodes,Weight
0,Myocardial Infarction,410,I21,1.0
1,Myocardial Infarction,412,I22,1.0
2,Myocardial Infarction,,I25.21,1.0
3,Congestive Heart Failure,398.91,I09.9,1.0
4,Congestive Heart Failure,402.01,I11.0,1.0
...,...,...,...,...
330,HIV/AIDS,042,B20,6.0
331,HIV/AIDS,043,B21,6.0
332,HIV/AIDS,044,B22,6.0
333,HIV/AIDS,,B24,6.0


In [6]:
charlson_index.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 335 entries, 0 to 334
Data columns (total 4 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Comorbid Condition       334 non-null    object 
 1   ICD-9-CMDiagnosisCodes   280 non-null    object 
 2   ICD-10-CADiagnosisCodes  289 non-null    object 
 3   Weight                   334 non-null    float64
dtypes: float64(1), object(3)
memory usage: 10.6+ KB


In [7]:
# Get list of conditions
comorbid_condition_list = charlson_index['Comorbid Condition'].unique().tolist()
# Remove nan
comorbid_condition_list = comorbid_condition_list[:-1]

In [8]:
# Turn off 'A value is trying to be set on a copy of a slice from a DataFrame'
pd.options.mode.chained_assignment = None  # default='warn'

In [9]:
# Convert to snomed code and combine with charlson index
snomed_charlson_index = pd.DataFrame()

for condition in comorbid_condition_list:
    print(condition)
    # Get icd codes
    temp_charlson_df = charlson_index[charlson_index['Comorbid Condition'] == condition]
    icd9_list = temp_charlson_df['ICD-9-CMDiagnosisCodes'].unique().tolist()
    icd10_list = temp_charlson_df['ICD-10-CADiagnosisCodes'].unique().tolist()
    icd_list = icd9_list + icd10_list
    # Filter for icd cdoes to find cui_code
    temp_mrconso_df = mrconso_df[(mrconso_df['SAB'].str.contains('icd', case=False)) & (mrconso_df['code'].isin(icd_list)) & (mrconso_df['language'] == 'ENG')]
    cui_code_list = temp_mrconso_df['cui_code'].unique().tolist()
    # Filter for cui_code to find snomed code
    temp_mrconso_df_2 = mrconso_df[(mrconso_df['cui_code'].isin(cui_code_list)) & (mrconso_df['language'] == 'ENG') & (mrconso_df['SAB'] == 'SNOMEDCT_US')]
    # Get somed codes
    temp_snomed_charlson_index = temp_mrconso_df_2[['code', 'STR']]
    temp_snomed_charlson_index.drop_duplicates(inplace=True)
    temp_snomed_charlson_index['comorbid_condition'] = condition
    temp_snomed_charlson_index['weight'] = temp_charlson_df['Weight'].iloc[0]
    snomed_charlson_index = pd.concat([snomed_charlson_index, temp_snomed_charlson_index], ignore_index=True)
print('Done!')    

Myocardial Infarction
Congestive Heart Failure
Peripheral Vascular
Disease
Cerebrovascular Disease
Dementia
Chronic Pulmonary
Disease
Connective Tissue Disease Rheumatic Disease
Peptic Ulcer Disease
Mild Liver Disease
Diabetes without Chronic
Complications 
Diabetes with Chronic
Complications
Paraplegia and
Hemiplegia
Renal Disease
Cancer
Moderate or Severe Liver
Disease
Metastatic Carcinoma 
HIV/AIDS
Done!


In [10]:
snomed_charlson_index

Unnamed: 0,code,STR,comorbid_condition,weight
0,66514008,Thrombosis - coronary,Myocardial Infarction,1.0
1,194796000,Thrombosis - coronary,Myocardial Infarction,1.0
2,398274000,Thrombosis - coronary,Myocardial Infarction,1.0
3,155304006,Coronary thrombosis,Myocardial Infarction,1.0
4,194796000,Coronary thrombosis,Myocardial Infarction,1.0
...,...,...,...,...
4373,187445009,[X]Hiv disease resulting in unspecified infect...,HIV/AIDS,6.0
4374,186712001,[X]HIV disease resulting in unspecified malign...,HIV/AIDS,6.0
4375,187449003,[X]HIV disease resulting in unspecified malign...,HIV/AIDS,6.0
4376,186712001,[X]HIV disease resulting in unspecified malign...,HIV/AIDS,6.0


In [11]:
# Import snomed_distance
path = r'data/new_international_snomed_distance.csv'
snomed_distance = pd.read_csv(path, index_col=0)

In [13]:
# Filter for those of interest to this research
icht_code_list = snomed_distance.columns.to_list()
filtered_snomed_charlson_index = snomed_charlson_index[snomed_charlson_index['code'].isin(icht_code_list)]

In [14]:
filtered_snomed_charlson_index

Unnamed: 0,code,STR,comorbid_condition,weight
16,22298006,Myocardial infarction,Myocardial Infarction,1.0
17,22298006,"Myocardial infarction, NOS",Myocardial Infarction,1.0
25,22298006,Heart attack,Myocardial Infarction,1.0
26,22298006,"Heart attack, NOS",Myocardial Infarction,1.0
27,22298006,Infarction of heart,Myocardial Infarction,1.0
...,...,...,...,...
4354,86406008,HIV infection,HIV/AIDS,6.0
4357,86406008,HIV - Human immunodeficiency virus infection,HIV/AIDS,6.0
4359,86406008,Human immunodeficiency virus infection,HIV/AIDS,6.0
4360,86406008,"Human immunodeficiency virus infection, NOS",HIV/AIDS,6.0


In [28]:
# Save
#filtered_snomed_charlson_index.to_csv('filtered_snomed_charlson_index.csv', index=False)