In [18]:
import ipywidgets as widgets
from pathlib import Path

In [19]:
from pipeline.cohort_extractor import CohortExtractor
from pipeline.prediction_task import TargetType, PredictionTask, DiseaseCode

# Welcome to MIMIC-IV Project

In [20]:
Path("raw_data").mkdir(parents=True, exist_ok=True)

This repository explains the steps to download and clean MIMIC-IV dataset for analysis.
The repository is compatible with MIMIC-IV v2.0

Please go to:
- https://physionet.org/content/mimiciv/2.0/ 

Follow instructions to get access to MIMIC-IV dataset.


Save downloaded files in the fikder raw_data

The structure should look like below
- raw_data/mimiciv_2_0/hosp
- raw_data/mimiciv_2_0/icu

## 1. DATA EXTRACTION

In [21]:
print("Please select what prediction task you want to perform ?")
task_ratio = widgets.RadioButtons(options=['Mortality','Length of Stay','Readmission','Phenotype'],value='Mortality')
display(task_ratio)

Please select what prediction task you want to perform ?


RadioButtons(options=('Mortality', 'Length of Stay', 'Readmission', 'Phenotype'), value='Mortality')

DEBUG:Comm:handle_msg[3bcfcd62849c4bad8e7dd439cb0868b0]({'header': {'date': datetime.datetime(2023, 12, 6, 16, 50, 16, 538000, tzinfo=tzutc()), 'msg_id': '7a682193-033a-424e-ac5a-b9a57ff09609', 'msg_type': 'comm_msg', 'session': '4a4e8d02-4780-424c-ac50-4a755bdddb91', 'username': 'c1719500-6c9e-4e19-a197-19c14a569555', 'version': '5.2'}, 'msg_id': '7a682193-033a-424e-ac5a-b9a57ff09609', 'msg_type': 'comm_msg', 'parent_header': {}, 'metadata': {}, 'content': {'comm_id': '3bcfcd62849c4bad8e7dd439cb0868b0', 'data': {'method': 'update', 'state': {'index': 3}, 'buffer_paths': []}}, 'buffers': []})


### Refining Cohort and Prediction Task Definition

Based on your current selection following block will provide option to further refine prediction task and cohort associated with it:

- First you will refine the prediction task choosing from following options -
    - **Length of Stay** - You can select from two predefined options or enter custom number of days to predict length os stay greater than number of days.

    - **Readmission** - You can select from two predefined options or enter custom number of days to predict readmission after "number of days" after previous admission.

    - **Phenotype Prediction** - You can select from four major chronic diseases to predict its future outcome

        - Heart failure
        - CAD (Coronary Artery Disease)
        - CKD (Chronic Kidney Disease)
        - COPD (Chronic obstructive pulmonary disease)

- Second, you will choode whether to perfom above task using ICU or non-ICU admissions data

- Third, you can refine the refine the cohort selection for any of the above choosen prediction tasks by including the admission samples admitted with particular chronic disease - 
    - Heart failure
    - CAD (Coronary Artery Disease)
    - CKD (Chronic Kidney Disease)
    - COPD (Chronic obstructive pulmonary disease)
    

In [22]:
def create_length_of_stay_widgets():
    radio_options = ['Length of Stay ≥ 3', 'Length of Stay ≥ 7', 'Custom']
    radio_input = widgets.RadioButtons(options=radio_options, value='Length of Stay ≥ 3')
    slider = widgets.IntSlider(value=3, min=1, max=10, step=1, continuous_update=False)
    display(radio_input, widgets.HBox([widgets.Label('Length of stay ≥ (days):', layout={'width': '180px'}), slider]))
    return radio_input, slider

def create_readmission_widgets():
    radio_options = ['30 Day Readmission', '60 Day Readmission', '90 Day Readmission', '120 Day Readmission', 'Custom']
    radio_input = widgets.RadioButtons(options=radio_options, value='30 Day Readmission')
    slider = widgets.IntSlider(value=30, min=10, max=150, step=10)
    display(radio_input, widgets.HBox([widgets.Label('Readmission after (days):', layout={'width': '180px'}), slider]))
    return radio_input, slider

def create_phenotype_widgets():
    radio_options = ['Heart Failure in 30 days', 'CAD in 30 days', 'CKD in 30 days', 'COPD in 30 days']
    radio_input = widgets.RadioButtons(options=radio_options, value='Heart Failure in 30 days')
    display(radio_input)
    return radio_input

def create_mortality_widgets():
    radio_input = widgets.RadioButtons(options=['Mortality'], value='Mortality')
    return radio_input


In [23]:
if task_ratio.value != 'Mortality':
    print("Please select to precise the prediction task ")
if task_ratio.value == 'Length of Stay':
    los_radio, los_slider = create_length_of_stay_widgets()
elif task_ratio.value == 'Readmission':
    readmission_radio, readmission_slider = create_readmission_widgets()
elif task_ratio.value == 'Phenotype':
    phenotype_radio = create_phenotype_widgets()
elif task_ratio.value == 'Mortality':
    mortality_radio = create_mortality_widgets()

print("Please select below if you want to work with ICU or Non-ICU data:")
icu_type_input = widgets.RadioButtons(options=['ICU', 'Non-ICU'], value='ICU')
display(icu_type_input)

print("Please select if you want to perform the chosen prediction task for a specific disease.")
disease_filter_input = widgets.RadioButtons(options=['No Disease Filter', 'Heart Failure', 'CKD', 'CAD', 'COPD'], value='No Disease Filter')
display(disease_filter_input)



Please select to precise the prediction task 


RadioButtons(options=('Heart Failure in 30 days', 'CAD in 30 days', 'CKD in 30 days', 'COPD in 30 days'), valu…

Please select below if you want to work with ICU or Non-ICU data:


RadioButtons(options=('ICU', 'Non-ICU'), value='ICU')

Please select if you want to perform the chosen prediction task for a specific disease.


RadioButtons(options=('No Disease Filter', 'Heart Failure', 'CKD', 'CAD', 'COPD'), value='No Disease Filter')

DEBUG:Comm:handle_msg[c36fbf0d84594a92991f0e4f16fada95]({'header': {'date': datetime.datetime(2023, 12, 6, 16, 50, 24, 870000, tzinfo=tzutc()), 'msg_id': 'cc4c1a57-67bc-41aa-bb71-661789887b61', 'msg_type': 'comm_msg', 'session': '4a4e8d02-4780-424c-ac50-4a755bdddb91', 'username': 'c1719500-6c9e-4e19-a197-19c14a569555', 'version': '5.2'}, 'msg_id': 'cc4c1a57-67bc-41aa-bb71-661789887b61', 'msg_type': 'comm_msg', 'parent_header': {}, 'metadata': {}, 'content': {'comm_id': 'c36fbf0d84594a92991f0e4f16fada95', 'data': {'method': 'update', 'state': {'index': 2}, 'buffer_paths': []}}, 'buffers': []})
DEBUG:Comm:handle_msg[67529df9b9a74905988156ebb1ce5591]({'header': {'date': datetime.datetime(2023, 12, 6, 16, 50, 26, 251000, tzinfo=tzutc()), 'msg_id': 'f6a411f2-52fd-4b55-a230-b350dd0bc4e8', 'msg_type': 'comm_msg', 'session': '4a4e8d02-4780-424c-ac50-4a755bdddb91', 'username': 'c1719500-6c9e-4e19-a197-19c14a569555', 'version': '5.2'}, 'msg_id': 'f6a411f2-52fd-4b55-a230-b350dd0bc4e8', 'msg_type'

In [24]:
def get_time_from_input():
    task_type = task_ratio.value
    if task_type == 'Length of Stay' and los_radio.value == 'Custom':
        return los_slider.value
    elif task_type == 'Readmission' and readmission_radio.value == 'Custom':
        return readmission_slider.value
    elif task_type == 'Readmission':
        return int(readmission_radio.value.split()[0])
    elif task_type == 'Length of Stay':
        return int(los_radio.value.split()[4])
    elif task_type == 'Phenotype':
        return 30
    return 0

def get_disease_label():
    if task_ratio.value != 'Phenotype':
        return None
    task_type = phenotype_radio.value
    disease_mapping = {
        'Heart Failure in 30 days': DiseaseCode.HEARTH_FAILURE,
        'CAD in 30 days': DiseaseCode.CAD,
        'CKD in 30 days': DiseaseCode.CKD,
        'COPD in 30 days': DiseaseCode.COPD
    }
    return disease_mapping.get(task_type, "")

def convert_to_icd_code(disease):
    if (disease=="Heart Failure"):
        icd_code=DiseaseCode.HEARTH_FAILURE
    elif (disease=="CKD"):
        icd_code=DiseaseCode.CKD
    elif (disease=="COPD"):
        icd_code=DiseaseCode.COPD
    elif (disease=="CAD"):
        icd_code=DiseaseCode.CAD
    else:
        icd_code=None
    return icd_code 

def convert_to_prediction_task(task_text):
    if task_text == 'Length of Stay':
        return TargetType.LOS
    elif task_text == 'Mortality':  
        return TargetType.MORTALITY
    else:
        return TargetType.READMISSION

In [25]:
prediction_task = PredictionTask(
    target_type = convert_to_prediction_task(task_ratio.value), 
    disease_readmission= get_disease_label() if task_ratio.value == 'Phenotype' else None, 
    disease_selection=convert_to_icd_code(disease_filter_input.value) ,
    nb_days=get_time_from_input(), 
    use_icu=(icu_type_input.value=="ICU")
)
cohort_extractor = CohortExtractor(prediction_task=prediction_task)
cohort = cohort_extractor.extract()

INFO:root:EXTRACTING FOR: NON-ICU | READMISSION DUE TO N18 | 30 |
INFO:root:[ READMISSION DUE TO N18 ]
INFO:root:[ READMISSION LABELS FINISHED: 10 Readmission Cases ]
INFO:root:[ COHORT cohort_Non-ICU_readmission_30_N18 SAVED ]
INFO:root:[ COHORT SUCCESSFULLY SAVED ]
INFO:root:cohort_Non-ICU_readmission_30_N18
INFO:root:[ SUMMARY SUCCESSFULLY SAVED ]


## 2. FEATURE SELECTION
Features available for ICU data -
- Diagnosis (https://mimic.mit.edu/docs/iv/modules/hosp/diagnoses_icd/)
- Procedures (https://mimic.mit.edu/docs/iv/modules/icu/procedureevents/)
- Medications (https://mimic.mit.edu/docs/iv/modules/icu/inputevents/)
- Output Events (https://mimic.mit.edu/docs/iv/modules/icu/outputevents/)
- Chart Events (https://mimic.mit.edu/docs/iv/modules/icu/chartevents/)

Features available for ICU data -
- Diagnosis (https://mimic.mit.edu/docs/iv/modules/hosp/diagnoses_icd/)
- Procedures (https://mimic.mit.edu/docs/iv/modules/hosp/procedures_icd/)
- Medications (https://mimic.mit.edu/docs/iv/modules/hosp/prescriptions/)
- Lab Events (https://mimic.mit.edu/docs/iv/modules/hosp/labevents/)

All features will be saved in **./preproc_data/features/**

In [26]:
print("Feature Selection")
if cohort_extractor.prediction_task.use_icu:
    print("Which Features you want to include for cohort?")
    dia_input = widgets.Checkbox(description='Diagnosis')
    display(dia_input)
    out_input = widgets.Checkbox(description='Output Events')
    display(out_input)
    chart_input = widgets.Checkbox(description='Chart Events(Labs and Vitals)')
    display(chart_input)
    proc_input = widgets.Checkbox(description='Procedures')
    display(proc_input)
    med_input = widgets.Checkbox(description='Medications')
    display(med_input)
else:
    print("Which Features you want to include for cohort?")
    dia_input = widgets.Checkbox(description='Diagnosis')
    display(dia_input)
    lab_input = widgets.Checkbox(description='Labs')
    display(lab_input)
    proc_input = widgets.Checkbox(description='Procedures')
    display(proc_input)
    med_input = widgets.Checkbox(description='Medications')
    display(med_input)
print("**Please run below cell to extract selected features**")

Feature Selection
Which Features you want to include for cohort?


Checkbox(value=False, description='Diagnosis')

Checkbox(value=False, description='Labs')

Checkbox(value=False, description='Procedures')

Checkbox(value=False, description='Medications')

**Please run below cell to extract selected features**


DEBUG:Comm:handle_msg[54ac504c8f9344798bd17c4626ca761a]({'header': {'date': datetime.datetime(2023, 12, 6, 16, 50, 38, 405000, tzinfo=tzutc()), 'msg_id': 'a626bb10-fe7b-4dbb-a19a-b3d6f4f3ac84', 'msg_type': 'comm_msg', 'session': '4a4e8d02-4780-424c-ac50-4a755bdddb91', 'username': 'c1719500-6c9e-4e19-a197-19c14a569555', 'version': '5.2'}, 'msg_id': 'a626bb10-fe7b-4dbb-a19a-b3d6f4f3ac84', 'msg_type': 'comm_msg', 'parent_header': {}, 'metadata': {}, 'content': {'comm_id': '54ac504c8f9344798bd17c4626ca761a', 'data': {'method': 'update', 'state': {'value': True}, 'buffer_paths': []}}, 'buffers': []})
DEBUG:Comm:handle_msg[b1363cc496c64745985b61fde3969653]({'header': {'date': datetime.datetime(2023, 12, 6, 16, 50, 38, 910000, tzinfo=tzutc()), 'msg_id': 'fe702424-011d-4b7a-84ab-d1b147a018db', 'msg_type': 'comm_msg', 'session': '4a4e8d02-4780-424c-ac50-4a755bdddb91', 'username': 'c1719500-6c9e-4e19-a197-19c14a569555', 'version': '5.2'}, 'msg_id': 'fe702424-011d-4b7a-84ab-d1b147a018db', 'msg_ty

In [27]:
from pipeline.features_extractor import FeatureExtractor
feature_extractor= FeatureExtractor(
    cohort_output=cohort_extractor.cohort_output,
    use_icu=prediction_task.use_icu,
    for_diagnoses=dia_input.value,
    for_output_events= prediction_task.use_icu and out_input.value,
    for_chart_events=prediction_task.use_icu and chart_input.value,
    for_procedures=proc_input.value,
    for_medications= med_input.value,
    for_labs= not prediction_task.use_icu and lab_input.value
)

features = feature_extractor.save_features()

INFO:root:[EXTRACTING DIAGNOSIS DATA]


INFO:root:[SUCCESSFULLY SAVED DIAGNOSES DATA]
INFO:root:[EXTRACTING PROCEDURES DATA]
INFO:root: # Unique ICD9 Procedures:36
INFO:root: # Unique ICD10 Procedures:48
INFO:root:
Value counts of each ICD version:
 icd_version
10    70
9     52
Name: count, dtype: int64
INFO:root:# Admissions:38
INFO:root:Total number of rows: 122
INFO:root:[SUCCESSFULLY SAVED PROCEDURES DATA]
INFO:root:[EXTRACTING MEDICATIONS DATA]
INFO:root:Number of unique type of drug: 236
INFO:root:Number of unique type of drug after grouping: 205
INFO:root:# Admissions: 59
INFO:root:Total number of rows: 2301
INFO:root:[SUCCESSFULLY SAVED MEDICATIONS DATA]
INFO:root:[EXTRACTING LABS DATA]
1it [00:01,  1.44s/it]
INFO:root:[SUCCESSFULLY SAVED LABS DATA]


## 3. CLINICAL GROUPING
Grouping medical codes will reduce dimensional space of features.

Default options selected below will group medical codes to reduce feature dimension space.


In [28]:
if feature_extractor.for_diagnoses:
    print("Do you want to group ICD 10 DIAG codes ?")
    group_dia_icd_input = widgets.RadioButtons(options=['Keep both ICD-9 and ICD-10 codes','Convert ICD-9 to ICD-10 codes','Convert ICD-9 to ICD-10 and group ICD-10 codes'],value='Convert ICD-9 to ICD-10 and group ICD-10 codes',layout={'width': '100%'})
    display(group_dia_icd_input)   

if not prediction_task.use_icu:
    if feature_extractor.for_medications:
        print("Do you want to group Medication codes to use Non propietary names?")
        group_med_code_input = widgets.RadioButtons(options=['Yes','No'],value='Yes',layout={'width': '100%'})
        display(group_med_code_input)
    if feature_extractor.for_procedures:
        print("Which ICD codes for Procedures you want to keep in data?")
        group_proc_icd_input = widgets.RadioButtons(options=['ICD-9 and ICD-10','ICD-10'],value='ICD-10',layout={'width': '100%'})
        display(group_proc_icd_input)
print("**Please run below cell to perform feature preprocessing**")

Do you want to group ICD 10 DIAG codes ?


RadioButtons(index=2, layout=Layout(width='100%'), options=('Keep both ICD-9 and ICD-10 codes', 'Convert ICD-9…

Do you want to group Medication codes to use Non propietary names?


RadioButtons(layout=Layout(width='100%'), options=('Yes', 'No'), value='Yes')

Which ICD codes for Procedures you want to keep in data?


RadioButtons(index=1, layout=Layout(width='100%'), options=('ICD-9 and ICD-10', 'ICD-10'), value='ICD-10')

**Please run below cell to perform feature preprocessing**


DEBUG:Comm:handle_msg[13acbd87149041f8a818333d89773ae8]({'header': {'date': datetime.datetime(2023, 12, 6, 16, 50, 54, 785000, tzinfo=tzutc()), 'msg_id': '5efdca73-a1cc-4b37-914a-1077ec60e0bd', 'msg_type': 'comm_msg', 'session': '4a4e8d02-4780-424c-ac50-4a755bdddb91', 'username': 'c1719500-6c9e-4e19-a197-19c14a569555', 'version': '5.2'}, 'msg_id': '5efdca73-a1cc-4b37-914a-1077ec60e0bd', 'msg_type': 'comm_msg', 'parent_header': {}, 'metadata': {}, 'content': {'comm_id': '13acbd87149041f8a818333d89773ae8', 'data': {'method': 'update', 'state': {'index': 1}, 'buffer_paths': []}}, 'buffers': []})
DEBUG:Comm:handle_msg[bfc5d829d7cd403fb2091b74047b2c4a]({'header': {'date': datetime.datetime(2023, 12, 6, 16, 50, 55, 571000, tzinfo=tzutc()), 'msg_id': '318406c2-dc7c-4745-8987-3c0b43f34a9a', 'msg_type': 'comm_msg', 'session': '4a4e8d02-4780-424c-ac50-4a755bdddb91', 'username': 'c1719500-6c9e-4e19-a197-19c14a569555', 'version': '5.2'}, 'msg_id': '318406c2-dc7c-4745-8987-3c0b43f34a9a', 'msg_type'

In [29]:
from pipeline.feature.diagnoses import IcdGroupOption

group_diag_icd = IcdGroupOption.KEEP
if feature_extractor.for_diagnoses:
    if group_dia_icd_input.value == "Keep both ICD-9 and ICD-10 codes":
        group_dia_icd_input = IcdGroupOption.KEEP
    elif group_dia_icd_input.value == "Convert ICD-9 to ICD-10 codes":
        group_dia_icd_input = IcdGroupOption.CONVERT
    elif group_dia_icd_input.value == "Convert ICD-9 to ICD-10 and group ICD-10 codes":
        group_dia_icd_input = IcdGroupOption.GROUP


group_med_code = feature_extractor.for_medications and (not prediction_task.use_icu) and (group_med_code_input.value=="Yes")
keep_proc_icd9 =  prediction_task.use_icu or not(feature_extractor.for_procedures and (group_proc_icd_input.value=="ICD-10"))

In [31]:
from pipeline.features_preprocessor import FeaturePreprocessor
feat_preproc = FeaturePreprocessor(feature_extractor=feature_extractor, 
                                   group_diag_icd=group_diag_icd, 
                                   group_med_code=group_med_code,
                                   keep_proc_icd9=keep_proc_icd9,
                                   clean_chart=False,
                                   impute_outlier_chart=False,
                                   clean_labs=False,
                                   impute_labs=False,
                                   )
preproc = feat_preproc.preprocess_no_event_features()

INFO:root:[PROCESSING DIAGNOSIS DATA]


KeyError: <DiagnosesHeader.ICD_CODE: 'icd_code'>

### 4. SUMMARY OF FEATURES

This step will generate summary of all features extracted so far.<br>
It will save summary files in **./preproc_data/summary/**<br>
- These files provide summary about **mean frequency** of medical codes per admission.<br>
- It also provides **total occurrence count** of each medical code.<br>
- For labs and chart events it will also provide <br>**missing %** which tells how many rows for a certain medical code has missing value.

Please use this information to further refine your cohort by selecting <br>which medical codes in each feature you want to keep and <br>which codes you would like to remove for downstream analysis tasks.

**Please run below cell to generate summary files**

In [32]:
summaries = feat_preproc.save_summaries()

## 5. Feature Selection

based on the files generated in previous step and other infromation gathered by you,<br>
Please select which medical codes you want to include in this study.

Please run below cell to to select options for which features you want to perform feature selection.

- Select **Yes** if you want to select a subset of medical codes for that feature and<br> **edit** the corresponding feature file for it.
- Select **No** if you want to keep all the codes in a feature.

In [33]:
if feature_extractor.for_diagnoses:
    print("Do you want to do Feature Selection for Diagnoses \n (If yes, please edit list of codes in ./data/summary/diag_features.csv)")
    select_dia_input = widgets.RadioButtons(options=['Yes','No'],value='No')
    display(select_dia_input)   
if feature_extractor.for_medications:
    print("Do you want to do Feature Selection for Medications \n (If yes, please edit list of codes in ./data/summary/med_features.csv)")
    select_med_input = widgets.RadioButtons(options=['Yes','No'],value='No')
    display(select_med_input)   
if feature_extractor.for_procedures:
    print("Do you want to do Feature Selection for Procedures \n (If yes, please edit list of codes in ./data/summary/proc_features.csv)")
    select_proc_input = widgets.RadioButtons(options=['Yes','No'],value='No')
    display(select_proc_input)   
if prediction_task.use_icu and feature_extractor.for_output_events:
    print("Do you want to do Feature Selection for Output event \n (If yes, please edit list of codes in ./data/summary/out_features.csv)")
    select_out_input = widgets.RadioButtons(options=['Yes','No'],value='No')
    display(select_out_input)  
if prediction_task.use_icu and feature_extractor.for_chart_events:
    print("Do you want to do Feature Selection for Chart events \n (If yes, please edit list of codes in ./data/summary/chart_features.csv)")
    select_chart_input = widgets.RadioButtons(options=['Yes','No'],value='No')
    display(select_chart_input)  
if not(prediction_task.use_icu) and feature_extractor.for_labs:
    print("Do you want to do Feature Selection for Labs \n (If yes, please edit list of codes in ./data/summary/lab_features.csv)")
    select_lab_input = widgets.RadioButtons(options=['Yes','No'],value='No')
    display(select_lab_input)  

Do you want to do Feature Selection for Diagnoses 
 (If yes, please edit list of codes in ./data/summary/diag_features.csv)


RadioButtons(index=1, options=('Yes', 'No'), value='No')

Do you want to do Feature Selection for Medications 
 (If yes, please edit list of codes in ./data/summary/med_features.csv)


RadioButtons(index=1, options=('Yes', 'No'), value='No')

Do you want to do Feature Selection for Procedures 
 (If yes, please edit list of codes in ./data/summary/proc_features.csv)


RadioButtons(index=1, options=('Yes', 'No'), value='No')

Do you want to do Feature Selection for Labs 
 (If yes, please edit list of codes in ./data/summary/lab_features.csv)


RadioButtons(index=1, options=('Yes', 'No'), value='No')

DEBUG:Comm:handle_msg[b99a2e5fbc6b442dbfd9d9e7418631c7]({'header': {'date': datetime.datetime(2023, 12, 6, 16, 51, 46, 219000, tzinfo=tzutc()), 'msg_id': '49d64066-c5aa-4e5a-98ae-671b15c31517', 'msg_type': 'comm_msg', 'session': '4a4e8d02-4780-424c-ac50-4a755bdddb91', 'username': 'c1719500-6c9e-4e19-a197-19c14a569555', 'version': '5.2'}, 'msg_id': '49d64066-c5aa-4e5a-98ae-671b15c31517', 'msg_type': 'comm_msg', 'parent_header': {}, 'metadata': {}, 'content': {'comm_id': 'b99a2e5fbc6b442dbfd9d9e7418631c7', 'data': {'method': 'update', 'state': {'index': 0}, 'buffer_paths': []}}, 'buffers': []})
DEBUG:Comm:handle_msg[742b411f803d446c98f413b37326fc91]({'header': {'date': datetime.datetime(2023, 12, 6, 16, 51, 47, 572000, tzinfo=tzutc()), 'msg_id': '01895a98-3d5b-436a-a36e-35007d0e7327', 'msg_type': 'comm_msg', 'session': '4a4e8d02-4780-424c-ac50-4a755bdddb91', 'username': 'c1719500-6c9e-4e19-a197-19c14a569555', 'version': '5.2'}, 'msg_id': '01895a98-3d5b-436a-a36e-35007d0e7327', 'msg_type'

In [36]:
from pipeline.feature_selector import FeatureSelector


select_diag=select_dia_input.value == 'Yes' if feature_extractor.for_diagnoses else False
select_med=select_med_input.value == 'Yes' if feature_extractor.for_medications else False
select_proc=select_proc_input.value == 'Yes' if feature_extractor.for_procedures else False
select_out=select_out_input.value == 'Yes' if prediction_task.use_icu and feature_extractor.for_output_events else False
select_chart=select_chart_input.value == 'Yes' if prediction_task.use_icu and feature_extractor.for_chart_events else False
select_lab=select_lab_input.value == 'Yes' if not (prediction_task.use_icu) and feature_extractor.for_labs else False

feature_selector = FeatureSelector(prediction_task.use_icu, select_diag, select_med,select_proc, select_lab,select_chart, select_out)

## 6. CLEANING OF FEATURES
Below you will have option to to clean lab and chart events by performing outlier removal and unit conversion.

Outlier removal is performed to remove values higher than selected **right threshold** percentile and lower than selected **left threshold** percentile among all values for each itemid. 

**Please run below cell to select preprocessing for diferent features**

In [42]:
if (prediction_task.use_icu and select_chart) or (not(prediction_task.use_icu) and select_lab):
    event_name = "chart" if prediction_task.use_icu else "lab"
    print(f"Outlier removal in values of {event_name} events ?")
    layout = widgets.Layout(width='100%', height='40px') #set width and height

    outlier_input = widgets.RadioButtons(options=['No outlier detection','Impute Outlier (default:98)','Remove outliers (default:98)'],value='No outlier detection',layout=layout)
    display(outlier_input)
    right_outlier=widgets.IntSlider(
    value=98,
    min=90,
    max=99,
    step=1,
    disabled=False,layout={'width': '100%'}
    )
    left_outlier=widgets.IntSlider(
    value=0,
    min=0,
    max=10,
    step=1,
    disabled=False,layout={'width': '100%'}
    )
    display(widgets.HBox([widgets.Label('Right Outlier Threshold',layout={'width': '150px'}), right_outlier]))
    display(widgets.HBox([widgets.Label('Left Outlier Threshold',layout={'width': '150px'}), left_outlier]))


Outlier removal in values of lab events ?


RadioButtons(layout=Layout(height='40px', width='100%'), options=('No outlier detection', 'Impute Outlier (def…

HBox(children=(Label(value='Right Outlier Threshold', layout=Layout(width='150px')), IntSlider(value=98, layou…

HBox(children=(Label(value='Left Outlier Threshold', layout=Layout(width='150px')), IntSlider(value=0, layout=…

In [43]:
right_outlier.value

98

In [51]:
right_thresh=100
left_thresh = 0
clean_chart = False
clean_lab = False
if (prediction_task.use_icu and select_chart):
    clean_chart=outlier_input.value!='No outlier detection'
    right_thresh = right_outlier
    lefty_thresh = left_outlier
if (prediction_task.use_icu and select_lab):
    clean_lab=outlier_input.value!='No outlier detection'
    right_thresh = right_outlier
    lefty_thresh = left_outlier



In [52]:
feat_preproc = FeaturePreprocessor(feature_extractor=feature_extractor, 
                                   group_diag_icd=None,
                                   group_med_code=False,
                                   keep_proc_icd9=False,
                            

                                   clean_chart=clean_chart,
                                   impute_outlier_chart = clean_chart,
                                   clean_labs=clean_lab,
                                   impute_labs = clean_lab,

                                   thresh = right_thresh,
                                   left_thresh=left_thresh
                                   )
preproc = feat_preproc.preprocess_no_event_features()

INFO:root:[PROCESSING DIAGNOSIS DATA]
INFO:root:Total number of rows: 1369
INFO:root:[SUCCESSFULLY SAVED DIAGNOSES DATA]
INFO:root:[PROCESSING MEDICATIONS DATA]


KeyError: <NonIcuMedicationHeader.DRUG: 'drug'>