In [51]:
import pandas as pd
import numpy as np
import json
from glob import glob

pd.set_option('display.max_columns', None)


In [52]:
def load_and_combine_json_files(directory_path, search_pattern):
    # Use glob to find JSON files in the directory based on the search pattern
    pop_files = glob(directory_path + search_pattern)

    # Initialize an empty DataFrame to hold all the data
    combined_df = pd.DataFrame()

    # Loop through each JSON file
    for file in pop_files:
        # Determine the dataset type based on the file name
        if 'train' in file:
            dataset_type = 'train'
        elif 'test' in file:
            dataset_type = 'test'
        elif 'val' in file:
            dataset_type = 'validate'
        else:
            dataset_type = 'unknown'

        print('Loading data files...', file, dataset_type)
        # Load the JSON file into a DataFrame
        with open(file) as f:
            data = json.load(f)
        df = pd.DataFrame(data)

        # Add a new column to flag the dataset type
        df['dataset_type'] = dataset_type

        # Append the DataFrame to the combined DataFrame
        combined_df = pd.concat([combined_df, df], ignore_index=True)

    return combined_df

# Define parameters for the function to combine JSON files
directory_path = '/Users/leegary/Downloads/mimic_iv_multilabel__json_files__20240302/'
pop_files = 'pop_*.json'  # This pattern can be changed based on the files you're looking for
sample_files = 'sample_*.json'

# Load and combine the JSON files
pop_df = load_and_combine_json_files(directory_path, pop_files)
sample_df = load_and_combine_json_files(directory_path, sample_files)

print(pop_df.columns)


Loading data files... /Users/leegary/Downloads/mimic_iv_multilabel__json_files__20240302/pop_validation_set__chexpert.json validate
Loading data files... /Users/leegary/Downloads/mimic_iv_multilabel__json_files__20240302/pop_test_set__chexpert.json test
Loading data files... /Users/leegary/Downloads/mimic_iv_multilabel__json_files__20240302/pop_train_set__chexpert.json train
Loading data files... /Users/leegary/Downloads/mimic_iv_multilabel__json_files__20240302/sample__train_set__chexpert.json train
Loading data files... /Users/leegary/Downloads/mimic_iv_multilabel__json_files__20240302/sample__test_set__chexpert.json test
Loading data files... /Users/leegary/Downloads/mimic_iv_multilabel__json_files__20240302/sample__validation_set__chexpert.json validate
Index(['patient_id', 'visit_id', 'study_id', 'temperature', 'heartrate',
       'resprate', 'o2sat', 'sbp', 'dbp', 'pain', 'acuity',
       'pathologies_number', 'pathologies_names', 'radiology_note',
       'discharge_note', 'chief

In [53]:
# Count the records for each dataframe represented for validation, training, and test
pop_dataset_type_counts = pop_df['dataset_type'].value_counts()
pop_dataset_type_percentages = pop_df['dataset_type'].value_counts(normalize=True) * 100  # Get percentages

print('Pop dataset size ',pop_df.shape)
print("\nCounts for Pop Dataset:")
print(pop_dataset_type_counts)
print("\nPercentages for Pop Dataset:")
print(pop_dataset_type_percentages)

Pop dataset size  (14443, 27)

Counts for Pop Dataset:
dataset_type
train       9630
validate    2407
test        2406
Name: count, dtype: int64

Percentages for Pop Dataset:
dataset_type
train       66.675898
validate    16.665513
test        16.658589
Name: proportion, dtype: float64


In [54]:
sample_dataset_type_counts = sample_df['dataset_type'].value_counts()
sample_dataset_type_percentages = sample_df['dataset_type'].value_counts(normalize=True) * 100  # Get percentages

print('Sample dataset size ',sample_df.shape)
print("\nCounts for Sample Dataset:")
print(sample_dataset_type_counts)
print("\nPercentages for Sample Dataset:")
print(sample_dataset_type_percentages)

Sample dataset size  (1445, 27)

Counts for Sample Dataset:
dataset_type
train       963
test        241
validate    241
Name: count, dtype: int64

Percentages for Sample Dataset:
dataset_type
train       66.643599
test        16.678201
validate    16.678201
Name: proportion, dtype: float64


In [55]:
pop_df.head()

Unnamed: 0,patient_id,visit_id,study_id,temperature,heartrate,resprate,o2sat,sbp,dbp,pain,acuity,pathologies_number,pathologies_names,radiology_note,discharge_note,chief_complaint,major_surgical_or_invasive_procedure,history_of_present_illness,past_medical_history,family_history,atelectasis,cardiomegaly,edema,lung_opacity,pleural_effusion,pneumonia,dataset_type
0,13110963.0,23553001,58949064.0,101.2,99.0,18.0,97.0,151.0,66.0,0,2.0,1.0,lung_opacity,FINAL REPORT\...,\nName: ___ Unit No: __...,fever,,"___ yo F with advanced dementia, found to be f...",# Dementia\n# Stable T-cell lymphoproliferativ...,The patient's sister had fibroids,0.0,0.0,0.0,1.0,0.0,0.0,validate
1,17918016.0,24868379,53178110.0,98.5,140.0,18.0,97.0,137.0,94.0,0,1.0,2.0,"cardiomegaly, pleural_effusion",FINAL REPORT\...,\nName: ___ Unit No: ___\n...,"shortness of breath, lower extremity edema, ch...",none.,"___ yo morbidly obese female with a h/o HLD, H...",GERD \nCholelythiasis \nOSTEOARTHRITIS \nOBE...,"No family history of early MI, arrhythmia, car...",0.0,1.0,0.0,0.0,1.0,0.0,validate
2,16856749.0,27527958,54094282.0,98.4,70.0,16.0,99.0,125.0,56.0,0,2.0,2.0,"edema, pleural_effusion",FINAL REPORT\...,\nName: ___ Unit No: ___...,Dyspnea,Right heart catheterization ___,History provided by wife b/c patient is hard o...,"1. CARDIAC RISK FACTORS: Diabetes Type II, Dys...","No family history of early MI, arrhythmia, car...",0.0,0.0,1.0,0.0,1.0,0.0,validate
3,17448207.0,22162530,52690784.0,97.9,150.0,18.0,98.0,128.0,101.0,0,1.0,0.0,no_finding,FINAL REPORT\...,\nName: ___ Unit No: _...,tachycardia,NONE,___ with a hisotry of atrial fibrillation s/p ...,Venous stasis \nA-fib s/p cardioversion in __...,Father died at ___ getting CABG \nTwin brothe...,0.0,0.0,0.0,0.0,0.0,0.0,validate
4,10692509.0,26210665,50447877.0,98.0,78.0,18.0,100.0,131.0,49.0,0,2.0,0.0,no_finding,FINAL REPORT\...,\nName: ___ Unit No: ___\n \n...,chest pain,,"Mr. ___ is a ___ w/ PMH of mild dementia, CAD ...","1. CARDIAC RISK FACTORS: (-)Diabetes, (+)Dysli...","No history of CAD, diabetes as far as he knows...",0.0,0.0,0.0,0.0,0.0,0.0,validate


In [63]:
pop_df.head()


Unnamed: 0,patient_id,visit_id,study_id,temperature,heartrate,resprate,o2sat,sbp,dbp,pain,acuity,pathologies_number,pathologies_names,radiology_note,discharge_note,chief_complaint,major_surgical_or_invasive_procedure,history_of_present_illness,past_medical_history,family_history,atelectasis,cardiomegaly,edema,lung_opacity,pleural_effusion,pneumonia,dataset_type
0,13110963.0,23553001,58949064.0,101.2,99.0,18.0,97.0,151.0,66.0,0,2.0,1.0,lung_opacity,FINAL REPORT\...,\nName: ___ Unit No: __...,fever,,"___ yo F with advanced dementia, found to be f...",# Dementia\n# Stable T-cell lymphoproliferativ...,The patient's sister had fibroids,0.0,0.0,0.0,1.0,0.0,0.0,validate
1,17918016.0,24868379,53178110.0,98.5,140.0,18.0,97.0,137.0,94.0,0,1.0,2.0,"cardiomegaly, pleural_effusion",FINAL REPORT\...,\nName: ___ Unit No: ___\n...,"shortness of breath, lower extremity edema, ch...",none.,"___ yo morbidly obese female with a h/o HLD, H...",GERD \nCholelythiasis \nOSTEOARTHRITIS \nOBE...,"No family history of early MI, arrhythmia, car...",0.0,1.0,0.0,0.0,1.0,0.0,validate
2,16856749.0,27527958,54094282.0,98.4,70.0,16.0,99.0,125.0,56.0,0,2.0,2.0,"edema, pleural_effusion",FINAL REPORT\...,\nName: ___ Unit No: ___...,Dyspnea,Right heart catheterization ___,History provided by wife b/c patient is hard o...,"1. CARDIAC RISK FACTORS: Diabetes Type II, Dys...","No family history of early MI, arrhythmia, car...",0.0,0.0,1.0,0.0,1.0,0.0,validate
3,17448207.0,22162530,52690784.0,97.9,150.0,18.0,98.0,128.0,101.0,0,1.0,0.0,no_finding,FINAL REPORT\...,\nName: ___ Unit No: _...,tachycardia,NONE,___ with a hisotry of atrial fibrillation s/p ...,Venous stasis \nA-fib s/p cardioversion in __...,Father died at ___ getting CABG \nTwin brothe...,0.0,0.0,0.0,0.0,0.0,0.0,validate
4,10692509.0,26210665,50447877.0,98.0,78.0,18.0,100.0,131.0,49.0,0,2.0,0.0,no_finding,FINAL REPORT\...,\nName: ___ Unit No: ___\n \n...,chest pain,,"Mr. ___ is a ___ w/ PMH of mild dementia, CAD ...","1. CARDIAC RISK FACTORS: (-)Diabetes, (+)Dysli...","No history of CAD, diabetes as far as he knows...",0.0,0.0,0.0,0.0,0.0,0.0,validate


In [72]:
def preprocess_data(df):

    analysis_cols = [df.columns[0]] + list(df.columns[3:11]) + list(df.columns[-7:])

    df_new = df[analysis_cols].set_index('patient_id', inplace=False)

    df_new['acuity'] = df_new['acuity'].astype('int64')

    y_cols = list(df_new.columns[-7:])
    y = df_new[y_cols]
    X = df_new.drop(y_cols[0:-1], axis=1)

    print('X Cols: ', X.columns)
    print('Y Cols: ', y.columns)
    print(X.shape)

    return df_new, X, y

pop_Xy, pop_X, pop_y = preprocess_data(pop_df)

X Cols:  Index(['temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp', 'pain',
       'acuity', 'dataset_type'],
      dtype='object')
Y Cols:  Index(['atelectasis', 'cardiomegaly', 'edema', 'lung_opacity',
       'pleural_effusion', 'pneumonia', 'dataset_type'],
      dtype='object')
(14443, 9)


In [58]:
pop_Xy[pop_Xy.columns[0:]].head()

Unnamed: 0_level_0,temperature,heartrate,resprate,o2sat,sbp,dbp,pain,acuity,atelectasis,cardiomegaly,edema,lung_opacity,pleural_effusion,pneumonia,dataset_type
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
13110963.0,101.2,99.0,18.0,97.0,151.0,66.0,0,2,0.0,0.0,0.0,1.0,0.0,0.0,validate
17918016.0,98.5,140.0,18.0,97.0,137.0,94.0,0,1,0.0,1.0,0.0,0.0,1.0,0.0,validate
16856749.0,98.4,70.0,16.0,99.0,125.0,56.0,0,2,0.0,0.0,1.0,0.0,1.0,0.0,validate
17448207.0,97.9,150.0,18.0,98.0,128.0,101.0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,validate
10692509.0,98.0,78.0,18.0,100.0,131.0,49.0,0,2,0.0,0.0,0.0,0.0,0.0,0.0,validate


In [59]:
pop_X[pop_X.columns[0:]].head()

Unnamed: 0_level_0,temperature,heartrate,resprate,o2sat,sbp,dbp,pain,acuity,atelectasis,dataset_type
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
13110963.0,101.2,99.0,18.0,97.0,151.0,66.0,0,2,0.0,validate
17918016.0,98.5,140.0,18.0,97.0,137.0,94.0,0,1,0.0,validate
16856749.0,98.4,70.0,16.0,99.0,125.0,56.0,0,2,0.0,validate
17448207.0,97.9,150.0,18.0,98.0,128.0,101.0,0,1,0.0,validate
10692509.0,98.0,78.0,18.0,100.0,131.0,49.0,0,2,0.0,validate


In [60]:
pop_y[pop_y.columns[0:]]

Unnamed: 0_level_0,atelectasis,cardiomegaly,edema,lung_opacity,pleural_effusion,pneumonia,dataset_type
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
13110963.0,0.0,0.0,0.0,1.0,0.0,0.0,validate
17918016.0,0.0,1.0,0.0,0.0,1.0,0.0,validate
16856749.0,0.0,0.0,1.0,0.0,1.0,0.0,validate
17448207.0,0.0,0.0,0.0,0.0,0.0,0.0,validate
10692509.0,0.0,0.0,0.0,0.0,0.0,0.0,validate
...,...,...,...,...,...,...,...
19995210.0,0.0,0.0,0.0,0.0,0.0,0.0,train
19995320.0,0.0,0.0,0.0,0.0,0.0,0.0,train
19997911.0,1.0,0.0,0.0,0.0,0.0,0.0,train
19998350.0,0.0,0.0,0.0,0.0,0.0,0.0,train


In [73]:
print(pop_Xy.info())
pop_Xy.head()

<class 'pandas.core.frame.DataFrame'>
Index: 14443 entries, 13110963.0 to 19998562.0
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   temperature       14443 non-null  float64
 1   heartrate         14443 non-null  float64
 2   resprate          14443 non-null  float64
 3   o2sat             14443 non-null  float64
 4   sbp               14443 non-null  float64
 5   dbp               14443 non-null  float64
 6   pain              14443 non-null  int64  
 7   acuity            14443 non-null  int64  
 8   atelectasis       14443 non-null  float64
 9   cardiomegaly      14443 non-null  float64
 10  edema             14443 non-null  float64
 11  lung_opacity      14443 non-null  float64
 12  pleural_effusion  14443 non-null  float64
 13  pneumonia         14443 non-null  float64
 14  dataset_type      14443 non-null  object 
dtypes: float64(12), int64(2), object(1)
memory usage: 1.8+ MB
None


Unnamed: 0_level_0,temperature,heartrate,resprate,o2sat,sbp,dbp,pain,acuity,atelectasis,cardiomegaly,edema,lung_opacity,pleural_effusion,pneumonia,dataset_type
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
13110963.0,101.2,99.0,18.0,97.0,151.0,66.0,0,2,0.0,0.0,0.0,1.0,0.0,0.0,validate
17918016.0,98.5,140.0,18.0,97.0,137.0,94.0,0,1,0.0,1.0,0.0,0.0,1.0,0.0,validate
16856749.0,98.4,70.0,16.0,99.0,125.0,56.0,0,2,0.0,0.0,1.0,0.0,1.0,0.0,validate
17448207.0,97.9,150.0,18.0,98.0,128.0,101.0,0,1,0.0,0.0,0.0,0.0,0.0,0.0,validate
10692509.0,98.0,78.0,18.0,100.0,131.0,49.0,0,2,0.0,0.0,0.0,0.0,0.0,0.0,validate


In [76]:

# Step 1 & 2: Calculate and sort unique values for each column, store them in a dictionary
unique_values_sorted = {}
for column in pop_X.columns:
    unique_values = np.sort(pop_X[column].unique())[::-1]  # Sort unique values in descending order
    unique_values_sorted[column] = unique_values

# Step 3: Create a new DataFrame from the dictionary with transposed columns
unique_values_df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in unique_values_sorted.items()]))

# Transpose the DataFrame so that each row now represents a column in the original DataFrame
unique_values_transposed = unique_values_df.T

unique_values_transposed

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180
temperature,106.0,105.9,105.8,105.4,105.3,105.0,104.9,104.7,104.6,104.5,104.4,104.3,104.2,104.1,104.0,103.9,103.8,103.7,103.6,103.5,103.4,103.3,103.2,103.1,103.0,102.9,102.8,102.7,102.6,102.5,102.4,102.3,102.2,102.1,102.0,101.9,101.8,101.7,101.6,101.5,101.4,101.3,101.2,101.1,101.0,100.9,100.89,100.8,100.7,100.6,100.5,100.4,100.3,100.2,100.1,100.0,99.91,99.9,99.8,99.7,99.6,99.5,99.4637,99.4,99.3,99.2,99.1,99.05,99.0,98.94,98.9,98.8,98.7,98.63,98.6,98.58,98.54,98.5,98.4,98.34,98.3,98.2,98.1,98.06,98.0,97.9,97.8,97.7,97.6,97.5,97.4,97.32,97.3,97.25,97.2,97.12,97.1,97.0,96.9,96.8,96.7,96.69,96.6,96.5,96.4,96.3,96.2,96.1,96.0,95.9,95.8,95.7,95.6,95.5,95.4,95.3,95.2,95.1,95.0,94.8,94.4,94.3,93.0,90.0,87.1,83.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
heartrate,180.0,174.0,169.0,168.0,166.0,165.0,163.0,162.0,160.0,159.0,158.0,157.0,156.0,155.0,153.0,152.0,151.0,150.0,149.0,148.0,147.0,146.0,145.0,144.0,143.0,142.0,141.0,140.0,139.0,138.0,137.0,136.0,135.0,134.0,133.0,132.0,131.0,130.0,129.0,128.0,127.0,126.0,125.0,124.0,123.0,122.0,121.0,120.0,119.0,118.0,117.0,116.0,115.0,114.0,113.0,112.0,111.0,110.0,109.0,108.0,107.0,106.0,105.0,104.0,103.0,102.0,101.0,100.0,99.0,98.0,97.0,96.0,95.0,94.0,93.0,92.0,91.0,90.0,89.0,88.0,87.0,86.0,85.0,84.0,83.0,82.0,81.0,80.0,79.0,78.0,77.0,76.0,75.0,74.0,73.0,72.0,71.0,70.0,69.0,68.0,67.0,66.0,65.0,64.0,63.0,62.0,61.0,60.0,59.0,58.0,57.0,56.0,55.0,54.0,53.0,52.0,51.0,50.0,49.0,48.0,47.0,46.0,45.0,44.0,43.0,42.0,41.0,40.0,39.0,38.0,37.0,36.0,35.0,34.0,33.0,32.0,31.0,30.0,28.0,25.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
resprate,60.0,52.0,50.0,48.0,46.0,45.0,44.0,42.0,40.0,38.0,37.0,36.0,35.0,34.0,33.0,32.0,31.0,30.0,29.0,28.0,27.0,26.0,25.0,24.0,23.0,22.0,21.0,20.0,19.0,18.0,17.0,16.0,15.0,14.0,13.0,12.0,10.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
o2sat,120.0,100.0,99.2,99.0,98.0,97.9,97.0,96.0,95.0,94.0,93.0,92.0,91.0,90.0,89.0,88.0,87.0,86.0,85.0,84.0,83.0,82.0,81.0,80.0,78.0,77.0,76.0,75.0,74.0,73.0,68.0,63.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
sbp,270.0,268.0,256.0,255.0,252.0,250.0,247.0,244.0,243.0,240.0,239.0,238.0,236.0,235.0,234.0,233.0,229.0,228.0,227.0,226.0,224.0,223.0,222.0,221.0,220.0,219.0,218.0,217.0,216.0,215.0,214.0,213.0,212.0,211.0,210.0,209.0,208.0,207.0,206.0,205.0,204.0,203.0,202.0,201.0,200.0,199.0,198.0,197.0,196.0,195.0,194.0,193.0,192.0,191.0,190.0,189.0,188.0,187.0,186.0,185.0,184.0,183.0,182.0,181.0,180.0,179.0,178.0,177.0,176.0,175.0,174.0,173.0,172.0,171.0,170.0,169.0,168.0,167.0,166.0,165.0,164.0,163.0,162.0,161.0,160.0,159.0,158.0,157.0,156.0,155.0,154.0,153.0,152.0,151.0,150.0,149.0,148.0,147.0,146.0,145.0,144.0,143.0,142.0,141.0,140.0,139.0,138.0,137.0,136.0,135.0,134.0,133.0,132.0,131.0,130.0,129.0,128.0,127.0,126.0,125.0,124.0,123.0,122.0,121.0,120.0,119.0,118.0,117.0,116.0,115.0,114.0,113.0,112.0,111.0,110.0,109.0,108.0,107.0,106.0,105.0,104.0,103.0,102.0,101.0,100.0,99.0,98.0,97.0,96.0,95.0,94.0,93.0,92.0,91.0,90.0,89.0,88.0,87.0,86.0,85.0,84.0,83.0,82.0,81.0,80.0,79.0,78.0,77.0,76.0,75.0,74.0,73.0,72.0,71.0,70.0,69.0,68.0,67.0,65.0,64.0,58.0
dbp,169.0,164.0,162.0,160.0,155.0,151.0,150.0,149.0,146.0,143.0,142.0,140.0,139.0,138.0,137.0,136.0,135.0,134.0,133.0,132.0,131.0,130.0,129.0,128.0,127.0,126.0,125.0,124.0,123.0,122.0,121.0,120.0,119.0,118.0,117.0,116.0,115.0,114.0,113.0,112.0,111.0,110.0,109.0,108.0,107.0,106.0,105.0,104.0,103.0,102.0,101.0,100.0,99.0,98.0,97.0,96.0,95.0,94.0,93.0,92.0,91.0,90.0,89.0,88.0,87.0,86.0,85.0,84.0,83.0,82.0,81.0,80.0,79.0,78.0,77.0,76.0,75.0,74.0,73.0,72.0,71.0,70.0,69.0,68.0,67.0,66.0,65.0,64.0,63.0,62.0,61.0,60.0,59.0,58.0,57.0,56.0,55.0,54.0,53.0,52.0,51.0,50.0,49.0,48.0,47.0,46.0,45.0,44.0,43.0,42.0,41.0,40.0,39.0,38.0,37.0,36.0,35.0,34.0,33.0,32.0,31.0,30.0,29.0,27.0,24.0,22.0,21.0,20.0,18.0,17.0,16.0,14.0,11.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
pain,10.0,9.0,8.0,7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
acuity,5.0,4.0,3.0,2.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
dataset_type,validate,train,test,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### Datatypes 
- **Ratio:** `temperature`, `heartrate`,`resprate`, `o2sat`, `sbp` and `dbp`  
- **Ordinal:** `pain` and `acuity`  
- **Nominal:** `patient_id`, `atelectasis`, `cardiomegaly`,`edema`, `lung_opacity`, `pleural_effusion` and `pneumonia` 

Pipeline

Model