# Extracting Patients with Specific Disease

- Identifying the needed ICD9 ranges as a dictionary
- Generate a new DF for the selected icd9 codes.
- extract the unique list of patients.
- save the list of patients..
-------------------------------------------------------
- Cardiology, 390--459
- Endocrinology (Diabetes), 240-279
- Infectious disease, 001-139
- Neurological condition 320-389

In [1]:
ICD9_CODES = dict()

ICD9_CODES['cardiology'] = [
    (390, 399, "Rheumatic fever and rheumatic heart disease"),
    (400, 405, "Hypertensive disease"),
    (410, 414, "Ischemic heart disease"),
    (415, 417, "Pulmonary heart disease and diseases of pulmonary circulation"),
    (420, 429, "Other forms of heart disease"),
    (430, 438, "Cerebrovascular disease"),
    (440, 449, "Diseases of arteries, arterioles, and capillaries"),
    (451, 459, "Diseases of veins, lymphatic vessels, and other circulatory diseases")
]

# List of ICD-9 code ranges for neurological conditions
ICD9_CODES['neurological'] = [
    (320, 327, "Inflammatory diseases of the central nervous system"),
    (330, 337, "Hereditary and degenerative diseases of the central nervous system"),
    (338, 338, "Pain (neurologically related)"),
    (340, 349, "Other disorders of the central nervous system"),
    (350, 359, "Disorders of the peripheral nervous system"),
    (360, 379, "Disorders of the eye and adnexa"),
    (380, 389, "Diseases of the ear and mastoid process")
]

# List of ICD-9 code ranges for infectious diseases
ICD9_CODES['infectious'] = [
    (1, 9, "Intestinal infectious diseases"),
    (10, 18, "Tuberculosis"),
    (20, 27, "Zoonotic bacterial diseases"),
    (30, 41, "Other bacterial diseases"),
    (42, 44, "HIV infection"),
    (45, 49, "Poliomyelitis and other enterovirus diseases"),
    (50, 59, "Viral diseases with exanthem"),
    (60, 66, "Arthropod-borne viral diseases"),
    (70, 79, "Other viral diseases and chlamydiae"),
    (80, 88, "Rickettsioses and other arthropod-borne diseases"),
    (90, 99, "Syphilis and other venereal diseases"),
    (100, 104, "Other spirochetal diseases"),
    (110, 118, "Mycoses (fungal infections)"),
    (120, 129, "Helminthiases (worm infections)"),
    (130, 136, "Other infectious and parasitic diseases"),
    (137, 139, "Late effects of infectious and parasitic diseases")
]

# List of ICD-9 code ranges for endocrinology (with a focus on diabetes)
ICD9_CODES['endocrinology'] = [
    (240, 246, "Disorders of the thyroid gland"),
    (249, 249, "Secondary diabetes mellitus"),
    (250, 250, "Diabetes mellitus (primary)"),
    (251, 252, "Disorders of pancreatic endocrine secretion"),
    (253, 254, "Disorders of the pituitary gland and hypothalamus"),
    (255, 258, "Disorders of the adrenal glands and other endocrine glands"),
    (259, 259, "Other endocrine disorders"),
    (260, 269, "Nutritional deficiencies"),
    (270, 279, "Metabolic disorders")
]

ICD9_CODES


{'cardiology': [(390, 399, 'Rheumatic fever and rheumatic heart disease'),
  (400, 405, 'Hypertensive disease'),
  (410, 414, 'Ischemic heart disease'),
  (415, 417, 'Pulmonary heart disease and diseases of pulmonary circulation'),
  (420, 429, 'Other forms of heart disease'),
  (430, 438, 'Cerebrovascular disease'),
  (440, 449, 'Diseases of arteries, arterioles, and capillaries'),
  (451,
   459,
   'Diseases of veins, lymphatic vessels, and other circulatory diseases')],
 'neurological': [(320,
   327,
   'Inflammatory diseases of the central nervous system'),
  (330,
   337,
   'Hereditary and degenerative diseases of the central nervous system'),
  (338, 338, 'Pain (neurologically related)'),
  (340, 349, 'Other disorders of the central nervous system'),
  (350, 359, 'Disorders of the peripheral nervous system'),
  (360, 379, 'Disorders of the eye and adnexa'),
  (380, 389, 'Diseases of the ear and mastoid process')],
 'infectious': [(1, 9, 'Intestinal infectious diseases'),
  (10

In [3]:
import pandas as pd
folder_path = '/lustre/home/almusawiaf/PhD_Projects/MIMIC_resources'

def extract3(code):
    return str(code)[:3]
    
df_DiagnosisICD  = pd.read_csv(f'{folder_path}/DIAGNOSES_ICD.csv')    # Diagnosis!


# processing the data, and modify length of the diagnosis and procedure:
print('For the given diagnosis, extract the sub dataframe....')
df_DiagnosisICD.dropna(subset=['ICD9_CODE'], inplace=True)

df_DiagnosisICD['ICD9_CODE']  = df_DiagnosisICD['ICD9_CODE'].apply(extract3)
df_DiagnosisICD


For the given diagnosis, extract the sub dataframe....


Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,SEQ_NUM,ICD9_CODE
0,1297,109,172335,1.0,403
1,1298,109,172335,2.0,486
2,1299,109,172335,3.0,582
3,1300,109,172335,4.0,585
4,1301,109,172335,5.0,425
...,...,...,...,...,...
651042,639798,97503,188195,2.0,202
651043,639799,97503,188195,3.0,V58
651044,639800,97503,188195,4.0,V12
651045,639801,97503,188195,5.0,527


In [5]:
def save_list_as_pickle(L, given_path):
    import pickle
    print(f'saving to {given_path}')
    with open(given_path, 'wb') as file:
        pickle.dump(L, file)

# Step 1: Convert ICD9_CODE to numeric, invalid parsing will be set as NaN
df_DiagnosisICD['ICD9_CODE'] = pd.to_numeric(df_DiagnosisICD['ICD9_CODE'], errors='coerce')

# Step 2: Extract the ranges for cardiology
for selected_disease in ICD9_CODES:
    cardiology_ranges = [(start, end) for start, end, _ in ICD9_CODES[selected_disease]]
    
    # Step 3: Define a function to check if ICD9_CODE falls in any range
    def is_in_cardiology_range(icd_code):
        if pd.isna(icd_code):
            return False
        for start, end in cardiology_ranges:
            if start <= icd_code <= end:
                return True
        return False
    
    # Step 4: Apply the function to filter the DataFrame
    df_filtered = df_DiagnosisICD[df_DiagnosisICD['ICD9_CODE'].apply(is_in_cardiology_range)]
    
    # Step 5: Display the filtered DataFrame
    Patients = list(df_filtered['SUBJECT_ID'].unique())
    Patients = [f'C_{p}' for p in Patients]
    
    save_list_as_pickle(Patients, f'../Data/Patients_{selected_disease}.pkl')
    print(f'\tNumber of patients = {len(Patients)}')


saving to ../Data/Patients_cardiology.pkl
	Number of patients = 32503
saving to ../Data/Patients_neurological.pkl
	Number of patients = 12738
saving to ../Data/Patients_infectious.pkl
	Number of patients = 11577
saving to ../Data/Patients_endocrinology.pkl
	Number of patients = 27440
