In [1]:
import ipywidgets as widgets
from pathlib import Path

In [81]:
from my_preprocessing.cohort_extractor import CohortExtractor
from my_preprocessing.prediction_task import TargetType, PredictionTask, DiseaseCode

# Welcome to MIMIC-IV Project

In [11]:

Path("raw_data").mkdir(parents=True, exist_ok=True)

This repository explains the steps to download and clean MIMIC-IV dataset for analysis.
The repository is compatible with MIMIC-IV v2.0

Please go to:
- https://physionet.org/content/mimiciv/2.0/ 

Follow instructions to get access to MIMIC-IV dataset.


Save downloaded files in the fikder raw_data

The structure should look like below
- raw_data/mimiciv_2_0/hosp
- raw_data/mimiciv_2_0/icu

## 1. DATA EXTRACTION

In [32]:
print("Please select what prediction task you want to perform ?")
task_ratio = widgets.RadioButtons(options=['Mortality','Length of Stay','Readmission','Phenotype'],value='Mortality')
display(task_ratio)

Please select what prediction task you want to perform ?


RadioButtons(options=('Mortality', 'Length of Stay', 'Readmission', 'Phenotype'), value='Mortality')

### Refining Cohort and Prediction Task Definition

Based on your current selection following block will provide option to further refine prediction task and cohort associated with it:

- First you will refine the prediction task choosing from following options -
    - **Length of Stay** - You can select from two predefined options or enter custom number of days to predict length os stay greater than number of days.

    - **Readmission** - You can select from two predefined options or enter custom number of days to predict readmission after "number of days" after previous admission.

    - **Phenotype Prediction** - You can select from four major chronic diseases to predict its future outcome

        - Heart failure
        - CAD (Coronary Artery Disease)
        - CKD (Chronic Kidney Disease)
        - COPD (Chronic obstructive pulmonary disease)

- Second, you will choode whether to perfom above task using ICU or non-ICU admissions data

- Third, you can refine the refine the cohort selection for any of the above choosen prediction tasks by including the admission samples admitted with particular chronic disease - 
    - Heart failure
    - CAD (Coronary Artery Disease)
    - CKD (Chronic Kidney Disease)
    - COPD (Chronic obstructive pulmonary disease)
    

In [85]:
def create_length_of_stay_widgets():
    radio_options = ['Length of Stay ≥ 3', 'Length of Stay ≥ 7', 'Custom']
    radio_input = widgets.RadioButtons(options=radio_options, value='Length of Stay ≥ 3')
    slider = widgets.IntSlider(value=3, min=1, max=10, step=1, continuous_update=False)
    display(radio_input, widgets.HBox([widgets.Label('Length of stay ≥ (days):', layout={'width': '180px'}), slider]))
    return radio_input, slider

def create_readmission_widgets():
    radio_options = ['30 Day Readmission', '60 Day Readmission', '90 Day Readmission', '120 Day Readmission', 'Custom']
    radio_input = widgets.RadioButtons(options=radio_options, value='30 Day Readmission')
    slider = widgets.IntSlider(value=30, min=10, max=150, step=10)
    display(radio_input, widgets.HBox([widgets.Label('Readmission after (days):', layout={'width': '180px'}), slider]))
    return radio_input, slider

def create_phenotype_widgets():
    radio_options = ['Heart Failure in 30 days', 'CAD in 30 days', 'CKD in 30 days', 'COPD in 30 days']
    radio_input = widgets.RadioButtons(options=radio_options, value='Heart Failure in 30 days')
    display(radio_input)
    return radio_input

def create_mortality_widgets():
    radio_input = widgets.RadioButtons(options=['Mortality'], value='Mortality')
    return radio_input


In [86]:
if task_ratio.value != 'Mortality':
    print("Please select to precise the prediction task ")
if task_ratio.value == 'Length of Stay':
    los_radio, los_slider = create_length_of_stay_widgets()
elif task_ratio.value == 'Readmission':
    readmission_radio, readmission_slider = create_readmission_widgets()
elif task_ratio.value == 'Phenotype':
    phenotype_radio = create_phenotype_widgets()
elif task_ratio.value == 'Mortality':
    mortality_radio = create_mortality_widgets()

print("Please select below if you want to work with ICU or Non-ICU data:")
icu_type_input = widgets.RadioButtons(options=['ICU', 'Non-ICU'], value='ICU')
display(icu_type_input)

print("Please select if you want to perform the chosen prediction task for a specific disease.")
disease_filter_input = widgets.RadioButtons(options=['No Disease Filter', 'Heart Failure', 'CKD', 'CAD', 'COPD'], value='No Disease Filter')
display(disease_filter_input)



Please select below if you want to work with ICU or Non-ICU data:


RadioButtons(options=('ICU', 'Non-ICU'), value='ICU')

Please select if you want to perform the chosen prediction task for a specific disease.


RadioButtons(options=('No Disease Filter', 'Heart Failure', 'CKD', 'CAD', 'COPD'), value='No Disease Filter')

In [92]:
def get_time_from_input():
    task_type = task_ratio.value
    if task_type == 'Length of Stay' and los_radio.value == 'Custom':
        return los_slider.value
    elif task_type == 'Readmission' and readmission_radio.value == 'Custom':
        return readmission_slider.value
    elif task_type == 'Readmission':
        return int(los_radio.value.split()[0])
    elif task_type == 'Length of Stay':
        return int(los_radio.value.split()[4])
    elif task_type == 'Phenotype':
        return 30
    return 0

def get_disease_label():
    if task_ratio.value != 'Phenotype':
        return None
    task_type = phenotype_radio.value
    disease_mapping = {
        'Heart Failure in 30 days': DiseaseCode.HEARTH_FAILURE,
        'CAD in 30 days': DiseaseCode.CAD,
        'CKD in 30 days': DiseaseCode.CKD,
        'COPD in 30 days': DiseaseCode.COPD
    }
    return disease_mapping.get(task_type, "")

def convert_to_icd_code(disease):
    if (disease=="Heart Failure"):
        icd_code=DiseaseCode.HEARTH_FAILURE
    elif (disease=="CKD"):
        icd_code=DiseaseCode.CKD
    elif (disease=="COPD"):
        icd_code=DiseaseCode.COPD
    elif (disease=="CAD"):
        icd_code=DiseaseCode.CAD
    else:
        icd_code=None
    return icd_code 

def convert_to_prediction_task(task_text):
    if task_text == 'Length of Stay':
        return TargetType.LOS
    elif task_text == 'Mortality':  
        return TargetType.MORTALITY
    else:
        return TargetType.READMISSION

In [95]:
prediction_task = PredictionTask(
    target_type = convert_to_prediction_task(task_ratio.value), 
    disease_readmission= get_disease_label(), 
    disease_selection=convert_to_icd_code(disease_filter_input.value) if 'in 30 days' in phenotype_radio.value else None,
    nb_days=get_time_from_input(), 
    use_icu=icu_type_input.value
)
cohort_extractor = CohortExtractor(
    prediction_task=prediction_task,
    preproc_dir="",
    cohort_output="",
    summary_output="",
)

In [96]:
cohort = cohort_extractor.extract()

## 2. FEATURE SELECTION
Features available for ICU data -
- Diagnosis (https://mimic.mit.edu/docs/iv/modules/hosp/diagnoses_icd/)
- Procedures (https://mimic.mit.edu/docs/iv/modules/icu/procedureevents/)
- Medications (https://mimic.mit.edu/docs/iv/modules/icu/inputevents/)
- Output Events (https://mimic.mit.edu/docs/iv/modules/icu/outputevents/)
- Chart Events (https://mimic.mit.edu/docs/iv/modules/icu/chartevents/)

Features available for ICU data -
- Diagnosis (https://mimic.mit.edu/docs/iv/modules/hosp/diagnoses_icd/)
- Procedures (https://mimic.mit.edu/docs/iv/modules/hosp/procedures_icd/)
- Medications (https://mimic.mit.edu/docs/iv/modules/hosp/prescriptions/)
- Lab Events (https://mimic.mit.edu/docs/iv/modules/hosp/labevents/)

All features will be saved in **./preproc_data/features/**

In [98]:
print("Feature Selection")
if cohort_extractor.prediction_task.use_icu:
    print("Which Features you want to include for cohort?")
    dia_input = widgets.Checkbox(description='Diagnosis')
    display(dia_input)
    out_input = widgets.Checkbox(description='Output Events')
    display(out_input)
    chart_input = widgets.Checkbox(description='Chart Events(Labs and Vitals)')
    display(chart_input)
    proc_input = widgets.Checkbox(description='Procedures')
    display(proc_input)
    med_input = widgets.Checkbox(description='Medications')
    display(med_input)
else:
    print("Which Features you want to include for cohort?")
    dia_input = widgets.Checkbox(description='Diagnosis')
    display(dia_input)
    lab_input = widgets.Checkbox(description='Labs')
    display(lab_input)
    proc_input = widgets.Checkbox(description='Procedures')
    display(proc_input)
    med_input = widgets.Checkbox(description='Medications')
    display(med_input)
print("**Please run below cell to extract selected features**")

Feature Selection
Which Features you want to include for cohort?


Checkbox(value=False, description='Diagnosis')

Checkbox(value=False, description='Output Events')

Checkbox(value=False, description='Chart Events(Labs and Vitals)')

Checkbox(value=False, description='Procedures')

Checkbox(value=False, description='Medications')

**Please run below cell to extract selected features**
