In [1]:
import ipywidgets as widgets
from pathlib import Path

In [2]:
from my_preprocessing.cohort_extractor import CohortExtractor
from my_preprocessing.prediction_task import TargetType, PredictionTask, DiseaseCode



# Welcome to MIMIC-IV Project

In [3]:
Path("raw_data").mkdir(parents=True, exist_ok=True)

This repository explains the steps to download and clean MIMIC-IV dataset for analysis.
The repository is compatible with MIMIC-IV v2.0

Please go to:
- https://physionet.org/content/mimiciv/2.0/ 

Follow instructions to get access to MIMIC-IV dataset.


Save downloaded files in the fikder raw_data

The structure should look like below
- raw_data/mimiciv_2_0/hosp
- raw_data/mimiciv_2_0/icu

## 1. DATA EXTRACTION

In [3]:
print("Please select what prediction task you want to perform ?")
task_ratio = widgets.RadioButtons(options=['Mortality','Length of Stay','Readmission','Phenotype'],value='Mortality')
display(task_ratio)

Please select what prediction task you want to perform ?


RadioButtons(options=('Mortality', 'Length of Stay', 'Readmission', 'Phenotype'), value='Mortality')

DEBUG:Comm:handle_msg[21421bb347724385ae7ee235f918b47c]({'header': {'date': datetime.datetime(2023, 12, 5, 13, 8, 48, 701000, tzinfo=tzutc()), 'msg_id': 'ef9ba017-4201-4139-9bfa-3380c0635ed6', 'msg_type': 'comm_msg', 'session': 'e419a1ed-71d0-440b-9c20-10b124493871', 'username': '382850c6-4afc-4077-b57d-6a67deffc5c7', 'version': '5.2'}, 'msg_id': 'ef9ba017-4201-4139-9bfa-3380c0635ed6', 'msg_type': 'comm_msg', 'parent_header': {}, 'metadata': {}, 'content': {'comm_id': '21421bb347724385ae7ee235f918b47c', 'data': {'method': 'update', 'state': {'index': 1}, 'buffer_paths': []}}, 'buffers': []})


### Refining Cohort and Prediction Task Definition

Based on your current selection following block will provide option to further refine prediction task and cohort associated with it:

- First you will refine the prediction task choosing from following options -
    - **Length of Stay** - You can select from two predefined options or enter custom number of days to predict length os stay greater than number of days.

    - **Readmission** - You can select from two predefined options or enter custom number of days to predict readmission after "number of days" after previous admission.

    - **Phenotype Prediction** - You can select from four major chronic diseases to predict its future outcome

        - Heart failure
        - CAD (Coronary Artery Disease)
        - CKD (Chronic Kidney Disease)
        - COPD (Chronic obstructive pulmonary disease)

- Second, you will choode whether to perfom above task using ICU or non-ICU admissions data

- Third, you can refine the refine the cohort selection for any of the above choosen prediction tasks by including the admission samples admitted with particular chronic disease - 
    - Heart failure
    - CAD (Coronary Artery Disease)
    - CKD (Chronic Kidney Disease)
    - COPD (Chronic obstructive pulmonary disease)
    

In [4]:
def create_length_of_stay_widgets():
    radio_options = ['Length of Stay ≥ 3', 'Length of Stay ≥ 7', 'Custom']
    radio_input = widgets.RadioButtons(options=radio_options, value='Length of Stay ≥ 3')
    slider = widgets.IntSlider(value=3, min=1, max=10, step=1, continuous_update=False)
    display(radio_input, widgets.HBox([widgets.Label('Length of stay ≥ (days):', layout={'width': '180px'}), slider]))
    return radio_input, slider

def create_readmission_widgets():
    radio_options = ['30 Day Readmission', '60 Day Readmission', '90 Day Readmission', '120 Day Readmission', 'Custom']
    radio_input = widgets.RadioButtons(options=radio_options, value='30 Day Readmission')
    slider = widgets.IntSlider(value=30, min=10, max=150, step=10)
    display(radio_input, widgets.HBox([widgets.Label('Readmission after (days):', layout={'width': '180px'}), slider]))
    return radio_input, slider

def create_phenotype_widgets():
    radio_options = ['Heart Failure in 30 days', 'CAD in 30 days', 'CKD in 30 days', 'COPD in 30 days']
    radio_input = widgets.RadioButtons(options=radio_options, value='Heart Failure in 30 days')
    display(radio_input)
    return radio_input

def create_mortality_widgets():
    radio_input = widgets.RadioButtons(options=['Mortality'], value='Mortality')
    return radio_input


In [10]:
if task_ratio.value != 'Mortality':
    print("Please select to precise the prediction task ")
if task_ratio.value == 'Length of Stay':
    los_radio, los_slider = create_length_of_stay_widgets()
elif task_ratio.value == 'Readmission':
    readmission_radio, readmission_slider = create_readmission_widgets()
elif task_ratio.value == 'Phenotype':
    phenotype_radio = create_phenotype_widgets()
elif task_ratio.value == 'Mortality':
    mortality_radio = create_mortality_widgets()

print("Please select below if you want to work with ICU or Non-ICU data:")
icu_type_input = widgets.RadioButtons(options=['ICU', 'Non-ICU'], value='ICU')
display(icu_type_input)

print("Please select if you want to perform the chosen prediction task for a specific disease.")
disease_filter_input = widgets.RadioButtons(options=['No Disease Filter', 'Heart Failure', 'CKD', 'CAD', 'COPD'], value='No Disease Filter')
display(disease_filter_input)



Please select to precise the prediction task 


RadioButtons(options=('Length of Stay ≥ 3', 'Length of Stay ≥ 7', 'Custom'), value='Length of Stay ≥ 3')

HBox(children=(Label(value='Length of stay ≥ (days):', layout=Layout(width='180px')), IntSlider(value=3, conti…

Please select below if you want to work with ICU or Non-ICU data:


RadioButtons(options=('ICU', 'Non-ICU'), value='ICU')

Please select if you want to perform the chosen prediction task for a specific disease.


RadioButtons(options=('No Disease Filter', 'Heart Failure', 'CKD', 'CAD', 'COPD'), value='No Disease Filter')

DEBUG:Comm:handle_msg[1c51e262966f4e559bdcc7bef82c2ca1]({'header': {'date': datetime.datetime(2023, 12, 5, 13, 10, 18, 395000, tzinfo=tzutc()), 'msg_id': '66f1980f-2c5d-428b-a61f-397b7ddb06b2', 'msg_type': 'comm_msg', 'session': 'e419a1ed-71d0-440b-9c20-10b124493871', 'username': '382850c6-4afc-4077-b57d-6a67deffc5c7', 'version': '5.2'}, 'msg_id': '66f1980f-2c5d-428b-a61f-397b7ddb06b2', 'msg_type': 'comm_msg', 'parent_header': {}, 'metadata': {}, 'content': {'comm_id': '1c51e262966f4e559bdcc7bef82c2ca1', 'data': {'method': 'update', 'state': {'index': 1}, 'buffer_paths': []}}, 'buffers': []})
DEBUG:Comm:handle_msg[ed8192e0315a4ae1a1e423e662f561b5]({'header': {'date': datetime.datetime(2023, 12, 5, 13, 10, 19, 226000, tzinfo=tzutc()), 'msg_id': '4cf4ed06-6fa7-4793-b212-f6e76ecfd30e', 'msg_type': 'comm_msg', 'session': 'e419a1ed-71d0-440b-9c20-10b124493871', 'username': '382850c6-4afc-4077-b57d-6a67deffc5c7', 'version': '5.2'}, 'msg_id': '4cf4ed06-6fa7-4793-b212-f6e76ecfd30e', 'msg_type'

In [12]:
def get_time_from_input():
    task_type = task_ratio.value
    if task_type == 'Length of Stay' and los_radio.value == 'Custom':
        return los_slider.value
    elif task_type == 'Readmission' and readmission_radio.value == 'Custom':
        return readmission_slider.value
    elif task_type == 'Readmission':
        return int(los_radio.value.split()[0])
    elif task_type == 'Length of Stay':
        return int(los_radio.value.split()[4])
    elif task_type == 'Phenotype':
        return 30
    return 0

def get_disease_label():
    if task_ratio.value != 'Phenotype':
        return None
    task_type = phenotype_radio.value
    disease_mapping = {
        'Heart Failure in 30 days': DiseaseCode.HEARTH_FAILURE,
        'CAD in 30 days': DiseaseCode.CAD,
        'CKD in 30 days': DiseaseCode.CKD,
        'COPD in 30 days': DiseaseCode.COPD
    }
    return disease_mapping.get(task_type, "")

def convert_to_icd_code(disease):
    if (disease=="Heart Failure"):
        icd_code=DiseaseCode.HEARTH_FAILURE
    elif (disease=="CKD"):
        icd_code=DiseaseCode.CKD
    elif (disease=="COPD"):
        icd_code=DiseaseCode.COPD
    elif (disease=="CAD"):
        icd_code=DiseaseCode.CAD
    else:
        icd_code=None
    return icd_code 

def convert_to_prediction_task(task_text):
    if task_text == 'Length of Stay':
        return TargetType.LOS
    elif task_text == 'Mortality':  
        return TargetType.MORTALITY
    else:
        return TargetType.READMISSION

In [13]:
prediction_task = PredictionTask(
    target_type = convert_to_prediction_task(task_ratio.value), 
    disease_readmission= get_disease_label() if task_ratio.value == 'Phenotype' else None, 
    disease_selection=convert_to_icd_code(disease_filter_input.value) ,
    nb_days=get_time_from_input(), 
    use_icu=(icu_type_input.value=="ICU")
)
cohort_extractor = CohortExtractor(prediction_task=prediction_task)
cohort = cohort_extractor.extract()

INFO:root:EXTRACTING FOR: NON-ICU | LENGHTH OF STAY ADMITTED DUE TO I25 | 7 |
INFO:root:[ LOS LABELS FINISHED: 21 LOS Cases ]
INFO:root:[ COHORT cohort_Non-ICU_lenghth_of_stay_7__I25 SAVED ]
INFO:root:[ COHORT SUCCESSFULLY SAVED ]
INFO:root:cohort_Non-ICU_lenghth_of_stay_7__I25
INFO:root:[ SUMMARY SUCCESSFULLY SAVED ]


## 2. FEATURE SELECTION
Features available for ICU data -
- Diagnosis (https://mimic.mit.edu/docs/iv/modules/hosp/diagnoses_icd/)
- Procedures (https://mimic.mit.edu/docs/iv/modules/icu/procedureevents/)
- Medications (https://mimic.mit.edu/docs/iv/modules/icu/inputevents/)
- Output Events (https://mimic.mit.edu/docs/iv/modules/icu/outputevents/)
- Chart Events (https://mimic.mit.edu/docs/iv/modules/icu/chartevents/)

Features available for ICU data -
- Diagnosis (https://mimic.mit.edu/docs/iv/modules/hosp/diagnoses_icd/)
- Procedures (https://mimic.mit.edu/docs/iv/modules/hosp/procedures_icd/)
- Medications (https://mimic.mit.edu/docs/iv/modules/hosp/prescriptions/)
- Lab Events (https://mimic.mit.edu/docs/iv/modules/hosp/labevents/)

All features will be saved in **./preproc_data/features/**

In [16]:
print("Feature Selection")
if cohort_extractor.prediction_task.use_icu:
    print("Which Features you want to include for cohort?")
    dia_input = widgets.Checkbox(description='Diagnosis')
    display(dia_input)
    out_input = widgets.Checkbox(description='Output Events')
    display(out_input)
    chart_input = widgets.Checkbox(description='Chart Events(Labs and Vitals)')
    display(chart_input)
    proc_input = widgets.Checkbox(description='Procedures')
    display(proc_input)
    med_input = widgets.Checkbox(description='Medications')
    display(med_input)
else:
    print("Which Features you want to include for cohort?")
    dia_input = widgets.Checkbox(description='Diagnosis')
    display(dia_input)
    lab_input = widgets.Checkbox(description='Labs')
    display(lab_input)
    proc_input = widgets.Checkbox(description='Procedures')
    display(proc_input)
    med_input = widgets.Checkbox(description='Medications')
    display(med_input)
print("**Please run below cell to extract selected features**")

Feature Selection
Which Features you want to include for cohort?


Checkbox(value=False, description='Diagnosis')

Checkbox(value=False, description='Labs')

Checkbox(value=False, description='Procedures')

Checkbox(value=False, description='Medications')

**Please run below cell to extract selected features**


DEBUG:Comm:handle_msg[aaebdd6403734e0db4d111937c0752ca]({'header': {'date': datetime.datetime(2023, 12, 5, 13, 11, 14, 463000, tzinfo=tzutc()), 'msg_id': '49c3eea1-a570-4308-84cf-056f4d8897ee', 'msg_type': 'comm_msg', 'session': 'e419a1ed-71d0-440b-9c20-10b124493871', 'username': '382850c6-4afc-4077-b57d-6a67deffc5c7', 'version': '5.2'}, 'msg_id': '49c3eea1-a570-4308-84cf-056f4d8897ee', 'msg_type': 'comm_msg', 'parent_header': {}, 'metadata': {}, 'content': {'comm_id': 'aaebdd6403734e0db4d111937c0752ca', 'data': {'method': 'update', 'state': {'value': True}, 'buffer_paths': []}}, 'buffers': []})
DEBUG:Comm:handle_msg[703868ec7c52421fbac529a1ccae84d6]({'header': {'date': datetime.datetime(2023, 12, 5, 13, 11, 14, 936000, tzinfo=tzutc()), 'msg_id': '8d0a2e72-d080-4000-b602-ac0007d2b1ab', 'msg_type': 'comm_msg', 'session': 'e419a1ed-71d0-440b-9c20-10b124493871', 'username': '382850c6-4afc-4077-b57d-6a67deffc5c7', 'version': '5.2'}, 'msg_id': '8d0a2e72-d080-4000-b602-ac0007d2b1ab', 'msg_ty

In [20]:
prediction_task.use_icu

False

In [17]:
dia_input.value

True

In [21]:
from my_preprocessing.features_extractor import FeatureExtractor
feature_extractor= FeatureExtractor(
    cohort_output=cohort_extractor.cohort_output,
    use_icu=prediction_task.use_icu,
    for_diagnoses=dia_input.value,
    for_output_events= prediction_task.use_icu and out_input.value,
    for_chart_events=prediction_task.use_icu and chart_input.value,
    for_procedures=proc_input.value,
    for_medications= med_input.value,
    for_labs= not prediction_task.use_icu and lab_input.value
)



In [24]:
features = feature_extractor.save_features()

INFO:root:[EXTRACTING DIAGNOSIS DATA]
INFO:root:[SUCCESSFULLY SAVED DIAGNOSES DATA]
INFO:root:[EXTRACTING PROCEDURES DATA]
INFO:root: # Unique ICD9 Procedures:42
INFO:root: # Unique ICD10 Procedures:70
INFO:root:
Value counts of each ICD version:
 icd_version
10    103
9      79
Name: count, dtype: int64
INFO:root:# Admissions:50
INFO:root:Total number of rows: 182
INFO:root:[SUCCESSFULLY SAVED PROCEDURES DATA]
INFO:root:[EXTRACTING MEDICATIONS DATA]
INFO:root:Number of unique type of drug: 275
INFO:root:Number of unique type of drug (after grouping to use Non propietary names): 236
INFO:root:[SUCCESSFULLY SAVED MEDICATIONS DATA]
INFO:root:[EXTRACTING LABS DATA]


# Admissions:   72
Total number of rows:  3648


1it [00:01,  1.36s/it]
INFO:root:[SUCCESSFULLY SAVED LABS DATA]


## 3. CLINICAL GROUPING
Grouping medical codes will reduce dimensional space of features.

Default options selected below will group medical codes to reduce feature dimension space.


In [25]:
if feature_extractor.for_diagnoses:
    print("Do you want to group ICD 10 DIAG codes ?")
    group_dia_icd_input = widgets.RadioButtons(options=['Keep both ICD-9 and ICD-10 codes','Convert ICD-9 to ICD-10 codes','Convert ICD-9 to ICD-10 and group ICD-10 codes'],value='Convert ICD-9 to ICD-10 and group ICD-10 codes',layout={'width': '100%'})
    display(group_dia_icd_input)   

if not prediction_task.use_icu:
    if feature_extractor.for_medications:
        print("Do you want to group Medication codes to use Non propietary names?")
        group_med_code_input = widgets.RadioButtons(options=['Yes','No'],value='Yes',layout={'width': '100%'})
        display(group_med_code_input)
    if feature_extractor.for_procedures:
        print("Which ICD codes for Procedures you want to keep in data?")
        group_proc_icd_input = widgets.RadioButtons(options=['ICD-9 and ICD-10','ICD-10'],value='ICD-10',layout={'width': '100%'})
        display(group_proc_icd_input)
print("**Please run below cell to perform feature preprocessing**")

Do you want to group ICD 10 DIAG codes ?


RadioButtons(index=2, layout=Layout(width='100%'), options=('Keep both ICD-9 and ICD-10 codes', 'Convert ICD-9…

Do you want to group Medication codes to use Non propietary names?


RadioButtons(layout=Layout(width='100%'), options=('Yes', 'No'), value='Yes')

Which ICD codes for Procedures you want to keep in data?


RadioButtons(index=1, layout=Layout(width='100%'), options=('ICD-9 and ICD-10', 'ICD-10'), value='ICD-10')

**Please run below cell to perform feature preprocessing**


In [None]:
from my_preprocessing.features_preprocessor import FeaturePreprocesor
feature_preprocessor = FeaturePreprocessor()