# MIMIC3c aggregated data
https://www.kaggle.com/datasets/drscarlat/mimic3c

In [1]:
import sys
path_append = "../"
sys.path.append(path_append)  # Go up one directory from where you are.

import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd

df = pd.read_csv(path_append + "../data/mimic_custom/mimic3c.csv")

In [3]:
df['religion'].fillna('NOT SPECIFIED', inplace=True)
df['marital_status'].fillna('UNKNOWN (DEFAULT)', inplace=True)
df.dropna(inplace=True, axis=0)

In [4]:
df['AdmitProcedure']

0                              na
1         Endosc control gast hem
2          Non-invasive mech vent
3        Part sm bowel resect NEC
4        Aortocor bypas-3 cor art
                   ...           
58971    Aortcor bypas-4+ cor art
58972    Cont inv mec ven <96 hrs
58973    Opn/oth rep aort vlv-tis
58974    Aortocor bypas-2 cor art
58975               Thoracentesis
Name: AdmitProcedure, Length: 58951, dtype: object

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 58951 entries, 0 to 58975
Data columns (total 28 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   hadm_id           58951 non-null  int64  
 1   gender            58951 non-null  object 
 2   age               58951 non-null  int64  
 3   LOSdays           58951 non-null  float64
 4   admit_type        58951 non-null  object 
 5   admit_location    58951 non-null  object 
 6   AdmitDiagnosis    58951 non-null  object 
 7   insurance         58951 non-null  object 
 8   religion          58951 non-null  object 
 9   marital_status    58951 non-null  object 
 10  ethnicity         58951 non-null  object 
 11  NumCallouts       58951 non-null  float64
 12  NumDiagnosis      58951 non-null  float64
 13  NumProcs          58951 non-null  float64
 14  AdmitProcedure    58951 non-null  object 
 15  NumCPTevents      58951 non-null  float64
 16  NumInput          58951 non-null  float64
 17

In [6]:
df_selected_target = df[['LOSdays']]
df = df.drop('LOSdays', axis=1)

In [7]:
from tools.preprocessing.data_frame import auto_preprocess_dataframe
target_columns = ['ExpiredHospital']
drop_columns = ['hadm_id', 'AdmitDiagnosis', 'LOSgroupNum', 'AdmitProcedure']
encode_columns = ['gender', 'admit_type', 'admit_location', 'insurance', 'religion', 'marital_status',]
df, description = auto_preprocess_dataframe(df, target_columns=target_columns, drop_columns=drop_columns, encode_columns=encode_columns) 

Dropped columns: hadm_id, AdmitDiagnosis, LOSgroupNum, AdmitProcedure
Column 'admit_location' has 9 unique values.
Column 'admit_type' has 4 unique values.
Column 'ethnicity' has 41 unique values.
Column 'gender' has 2 unique values.
Column 'insurance' has 5 unique values.
Column 'marital_status' has 7 unique values.
Column 'religion' has 20 unique values.


Unnamed: 0,Min,Max,Mean,Std,Null Count,Scaled,Encoded
age,-2.036180,1.382810,7.219811e-17,1.000008,0,Standard,
NumCallouts,-0.099415,4.660585,-3.043410e-18,0.163459,0,,
NumDiagnosis,-0.892405,283.917722,8.009336e-01,5.232069,0,Robust,
NumProcs,-0.785199,274.214801,9.955867e-17,3.558457,0,,
NumCPTevents,-1.074307,223.925693,1.079958e-16,2.027405,0,,
...,...,...,...,...,...,...,...
religion_PROTESTANT QUAKER,0.000000,1.000000,1.209309e-01,0.326050,0,,
religion_ROMANIAN EAST. ORTH,0.000000,1.000000,1.407949e-03,0.037497,0,,
religion_UNITARIAN-UNIVERSALIST,0.000000,1.000000,2.103442e-03,0.045815,0,,
religion_UNOBTAINABLE,0.000000,1.000000,1.402351e-01,0.347234,0,,


In [8]:
import torch
from sklearn.model_selection import train_test_split
from tools.preprocessing.template_dataset import TemplateDataset

train_df, test_df = train_test_split(df, test_size=0.2, shuffle=True, random_state=42)
# predict the next value in the sequence
train_df_x = train_df.iloc[:, :-1] # all columns except the last one
train_df_y = train_df.iloc[:, -1:] # only the last column

test_df_x = test_df.iloc[:, :-1] # all columns except the last one
test_df_y = test_df.iloc[:, -1:] # only the last column

print('train df shape: ', train_df.shape)
print('test df shape: ', test_df.shape)
trainset = TemplateDataset(train_df_x, train_df_y)
testset = TemplateDataset(test_df_x, test_df_y)

train df shape:  (47160, 103)
test df shape:  (11791, 103)


In [9]:
from tools.setting.data_config import DataConfig
from tools.setting.ml_params import MLParameters
from trainer_hub import TrainerHub

num_features = description['num_features']
num_classes = description['num_classes']
data_config = DataConfig(dataset_name = 'mimic3', task_type='binary_classification', obs_shape=[num_features], label_size=num_classes)

#  Set training configuration from the AlgorithmConfig class, returning them as a Namespace object.
ml_params = MLParameters(ccnet_network = 'tabnet', encoder_network = 'none')
ml_params.algorithm.error_function = 'mae'
ml_params.model.ccnet_config.num_layers = 3
ml_params.training.num_epoch = 5

# Set the device to GPU if available, else CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 

# Initialize the TrainerHub class with the training configuration, data configuration, device, and use_print and use_wandb flags
trainer_hub = TrainerHub(ml_params, data_config, device, use_print=True, use_wandb= False) 

Trainer Name: causal_trainer


[1mModelParameters Parameters:[0m


Unnamed: 0,ccnet_config,ccnet_network,encoder_config,encoder_network
0,See details below,tabnet,,none


[3m
Detailed ccnet_config Configuration:[0m


Unnamed: 0,ccnet_config_model_name,ccnet_config_num_layers,ccnet_config_d_model,ccnet_config_dropout,ccnet_config_obs_shape
0,tabnet,3,256,0.05,[102]


[1mTrainingParameters Parameters:[0m


Unnamed: 0,batch_size,max_iters,max_seq_len,min_seq_len,num_epoch
0,64,100000,,,100


[1mOptimizationParameters Parameters:[0m


Unnamed: 0,clip_grad_range,decay_rate_100k,learning_rate,max_grad_norm,scheduler_type
0,,0.05,0.0002,1.0,exponential


[1mAlgorithmParameters Parameters:[0m


Unnamed: 0,enable_diffusion,error_function,reset_pretrained
0,False,mae,False


[1mDataConfig Parameters:[0m


Unnamed: 0,dataset_name,task_type,obs_shape,label_size,explain_size,explain_layer,state_size,show_image_indices
0,mimic3,binary_classification,[102],1,50,tanh,,








In [10]:
trainer_hub.train(trainset, testset)

Epochs:   0%|          | 0/100 [00:00<?, ?it/s]

Iterations:   0%|          | 0/736 [00:00<?, ?it/s]

[0/100][100/736][Time 12.97]
Unified LR across all optimizers: 0.0001993957766378747
--------------------Training Metrics--------------------
CCNet:  Three Tabnet
Inf: 0.1746	Gen: 0.4579	Rec: 0.4484	E: 0.1841	R: 0.1650	P: 0.7317
--------------------Test Metrics------------------------
accuracy: 0.9219
precision: 0.9528
recall: 0.9610
f1_score: 0.9569

[0/100][200/736][Time 12.94]
Unified LR across all optimizers: 0.00019879933411171295
--------------------Training Metrics--------------------
CCNet:  Three Tabnet
Inf: 0.0672	Gen: 0.2330	Rec: 0.2269	E: 0.0732	R: 0.0611	P: 0.3927
--------------------Test Metrics------------------------
accuracy: 0.8984
precision: 0.9044
recall: 0.9913
f1_score: 0.9458

[0/100][300/736][Time 12.01]
Unified LR across all optimizers: 0.00019820467569398644
--------------------Training Metrics--------------------
CCNet:  Three Tabnet
Inf: 0.0489	Gen: 0.2628	Rec: 0.2579	E: 0.0538	R: 0.0439	P: 0.4718
--------------------Test Metrics------------------------
accu

Iterations:   0%|          | 0/736 [00:00<?, ?it/s]

[1/100][64/736][Time 16.69]
Unified LR across all optimizers: 0.00019525795900462422
--------------------Training Metrics--------------------
CCNet:  Three Tabnet
Inf: 0.0314	Gen: 0.2569	Rec: 0.2537	E: 0.0346	R: 0.0282	P: 0.4793
--------------------Test Metrics------------------------
accuracy: 0.9258
precision: 0.9378
recall: 0.9826
f1_score: 0.9597

[1/100][164/736][Time 12.14]
Unified LR across all optimizers: 0.00019467389372357586
--------------------Training Metrics--------------------
CCNet:  Three Tabnet
Inf: 0.0324	Gen: 0.2075	Rec: 0.2039	E: 0.0360	R: 0.0289	P: 0.3790
--------------------Test Metrics------------------------
accuracy: 0.9219
precision: 0.9576
recall: 0.9576
f1_score: 0.9576



KeyboardInterrupt: 