# Combining all the variables and their values for the patients into a single CSV

In [51]:
# Import libraries
import pandas as pd
import os
import psycopg2
import getpass
import pandas as pd
import numpy as np

In [2]:
# Path to save the extracted values
export_dir = '../extracted_data'
if not os.path.isdir(export_dir):
    os.mkdir(export_dir)

In [3]:
# Create a database connection
user = 'dhruv.sharma'
host = 'localhost'
dbname = 'mimic'
schema = 'mimiciii'

## Loading the data

In [4]:
# Connect to the database
con = psycopg2.connect(dbname=dbname, user=user, host=host, 
                       password=getpass.getpass(prompt='Password:'.format(user)))
cur = con.cursor()
cur.execute('SET search_path to {}'.format(schema))

Password:········


## Combining the static variables

<b>Demographics:</b> Subject_ID, age, gender, weight

<b>Comorbidities:</b> Congestive Heart Failure, Cardiac arrhythmias, valvular disease, pulmonary circulation, peripheral vascular, hypertension, paralysis, neurological disorder, chronic pulmonary disease, diabetes, hypothyroidism, renal failure, liver diseases, peptic ulcer, AIDS, lymphoma, metastatic cancer, rheumatoid arthritis, coagulopathy, obesity, fluid electrolyte, anemias, alcohol abuse, drug abuse, psychosis, depression 

In [7]:
demographics_path = os.path.join(export_dir, 'demographics.csv')
comorbidities_path = os.path.join(export_dir, 'comorbidities.csv')

demographics = pd.read_csv(demographics_path)
comorbidities = pd.read_csv(comorbidities_path)

The function get_comorbidity() takes the ICD9 code and returns the correspnding comorbidity.

In [11]:
def get_comorbidity(icd):
    '''
    Args:
        icd: the ICD9 code
    Returns:
        The corresponding comorbidity
    '''
    if icd == 4280:
        return 'congestive_Heart_Failure'
    if icd == 4279:
        return 'cardiac_arrhythmias'
    if icd == 3969:
        return 'valvular_disease'
    if (icd >= 4150 and icd <= 4179) or (icd >= 41511 and icd <= 41519):
        return 'pulmonary_circulation'
    if icd == 4439:
        return 'peripheral_vascular'
    if icd == 4019:
        return 'hypertension'
    if icd == 3449:
        return 'paralysis'
    if icd == 3499:
        return 'neurological_disorder'
    if (icd >= 4910 and icd <= 4959) or (icd>=49120 and icd<=49122) or (icd>=49300 and icd<=49392) or icd==490 or icd == 496:
        return ' chronic_pulmonary_disease'
    if (icd >= 25000 or icd <= 25093):
        return 'diabetes'
    if (icd == 2449):
        return 'hypothyroidism'
    if (icd == 586):
        return 'renal_failure'
    if (icd >= 5710 and icd <= 5719) or (icd >= 57140 and icd <= 57149):
        return 'liver_diseases'
    if (icd >= 53300 and icd <= 53391):
        return 'peptic_ulcer'
    if icd == 42:
        return 'AIDS'
    if icd >= 20280 and icd <= 20288:
        return 'lymphoma'
    if icd == 1991:
        return 'metastatic_cancer'
    if icd == 7140:
        return 'rheumatoid_arthritis'
    if icd == 2869:
        return 'coagulopathy'
    if icd == 27800:
        return 'obesity'
    if icd == 2769:
        return 'fluid_electrolyte'
    if icd == 2809:
        return 'anemias'
    if icd >= 30500 and icd <= 30503:
        return 'alcohol_abuse'
    if icd >= 30590 and icd <= 30593:
        return 'drug_abuse'
    if icd == 2989:
        return 'psychosis'
    if icd == 311:
        return 'depression'
        

In [19]:
def get_comorbidity_dict():
    '''
    This functions generates a dictionary with all the comorbidities, set as 0
    Args:
        None
    Returns:
        comorb_dict
    '''
    comorb_dict = dict()
    comorb_dict['congestive_Heart_Failure'] = 0
    comorb_dict['cardiac_arrhythmias'] = 0
    comorb_dict['valvular_disease'] = 0
    comorb_dict['pulmonary_circulation'] = 0
    comorb_dict['peripheral_vascular'] = 0
    comorb_dict['hypertension'] = 0
    comorb_dict['paralysis'] = 0
    comorb_dict['neurological_disorder'] = 0
    comorb_dict['chronic_pulmonary_disease'] = 0
    comorb_dict['diabetes'] = 0
    comorb_dict['hypothyroidism'] = 0
    comorb_dict['renal_failure'] = 0
    comorb_dict['liver_diseases'] = 0
    comorb_dict['peptic_ulcer'] = 0
    comorb_dict['AIDS'] = 0
    comorb_dict['lymphoma'] = 0
    comorb_dict['metastatic_cancer'] = 0
    comorb_dict['rheumatoid_arthritis'] = 0
    comorb_dict['coagulopathy'] = 0
    comorb_dict['obesity'] = 0
    comorb_dict['fluid_electrolyte'] = 0
    comorb_dict['anemias'] = 0
    comorb_dict['alcohol_abuse'] = 0
    comorb_dict['drug_abuse'] = 0
    comorb_dict['psychosis'] = 0
    comorb_dict['depression'] = 0
    return comorb_dict

In [59]:
def get_age_weight_gender(demo_df, sub_id, hadm_id):
    '''
    This function gets the age, weight, and gender corresponding to the subject_id.
    Args:
        demo_df: the DataFrame containing the demographic information for all the patients
        sub_id: the subject ID corresponding to which we want the info
    Returns:
        a dictionary containing all the info
    '''
    info = {'subject_id':sub_id, 'hadm_id':hadm_id}
    sub_info = demo_df[demo_df.subject_id == sub_id]
    try:
        info['age'] = np.array(sub_info.age)[0]
    except:
        info['age'] = np.float('nan')
    try:
        info['weight'] = np.array(sub_info.weight)[0]
    except:
        info['weight'] = np.float('nan')
    try:
        if np.array(sub_info.gender)[0] == 'M':
            info['male'] = 1
            info['female'] = 0
        elif np.array(sub_info.gender)[0] == 'F':
            info['male'] = 0
            info['female'] = 1
    except:
        info['male'] = np.float('nan')
        info['female'] = np.float('nan')
    
    return info    

In [75]:
combined_static = pd.DataFrame()

prev_hadm = None
prev_info = None
count = comorbidities.shape[0]

for i in range(count):
    this_sub = comorbidities.subject_id[i]
    this_hadm = comorbidities.hadm_id[i]
    this_icd = comorbidities.icd9_code[i]
    
    this_comorbidity = get_comorbidity(this_icd)
    this_info = get_age_weight_gender(demographics, this_sub, this_hadm)
    comorb_dict = get_comorbidity_dict()
    
    this_info.update(comorb_dict)
    this_info[this_comorbidity] = 1
    if this_hadm == prev_hadm:
        this_info = prev_info
        this_info[this_comorbidity] = 1
    else:
        combined_static = combined_static.append(prev_info, ignore_index = True)
        prev_hadm = this_hadm
    prev_info = this_info


In [77]:
columns = ['subject_id', 'hadm_id', 'age', 'weight', 'male', 'female']
for k in comorb_dict:
    columns.append(k)

combined_static = combined_static.reindex(columns=columns)
combined_static.to_csv(os.path.join(export_dir, 'static_data.csv'), index = False)
combined_static.head()

Unnamed: 0,subject_id,hadm_id,age,weight,male,female,congestive_Heart_Failure,cardiac_arrhythmias,valvular_disease,pulmonary_circulation,...,metastatic_cancer,rheumatoid_arthritis,coagulopathy,obesity,fluid_electrolyte,anemias,alcohol_abuse,drug_abuse,psychosis,depression
0,3.0,145834.0,76.52,106.0,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4.0,185777.0,47.84,53.599998,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,9.0,150750.0,41.79,100.300003,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,12.0,112213.0,72.37,81.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,13.0,143045.0,39.86,74.599998,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
