In [1]:
import pandas as pd
from utils import create_missing_data
from utils.load_dataset import load_eval_columns

from dataset_config import MIMICIV_config

In [2]:
from imputers.knn import KNN as knn
from imputers.mice import MiceImputer as mice
from imputers.nnimputer import NeuralNetworkImputer
from imputers.ssl_imputer import SSLImputer
from evals.eval import Evaluation




In [20]:
data = pd.read_csv("F:\\Jobs\\InfAI\\INTELLILUNG_2-main\\INTELLILUNG_2-main\\MIMIC\\data\\mimiciv_state_vectors.csv")

In [3]:
# Dropping columns with 50% missing values
col_to_drop = ['ecmo_sweep', 'vital_co', 'is_controlled', 'vent_rsbi', 'blood_ffp', 'blood_prbc', 
                'blood_album', 'ecmo_rpm', 'ecmo_bloodflow', 'blood_sco2', 'vital_SVRI', 'blood_svo2',
                    'vent_etco2', 'vital_mpap', 'blood_smvo2', 'vital_cvp']
data = data.drop(columns=col_to_drop)
data.columns

Index(['stay_id', 'mv_id', 'timepoints', 'age', 'blood_be', 'blood_hco3',
       'blood_ph', 'drugs_vaso4h', 'vital_map', 'vital_DBP', 'vital_SBP',
       'blood_INR', 'blood_PTT', 'daemo_sex', 'daemo_weight', 'daemo_height',
       'daemo_discharge', 'ecmo_active', 'blood_calcium', 'blood_chlorid',
       'blood_caion', 'blood_magnes', 'blood_potas', 'blood_sodium',
       'cum_fluid_balance', 'state_ivfluid4h', 'blood_paco2', 'blood_pao2',
       'vent_fio2', 'vital_spo2', 'blood_sao2', 'blood_plat', 'blood_hb',
       'blood_hct', 'blood_wbc', 'vital_hr', 'state_bun', 'blood_crea',
       'state_urin4h', 'blood_ast', 'blood_alt', 'blood_billi', 'blood_lac',
       'blood_gluco', 'state_temp', 'vent_inspexp', 'vent_insp', 'vent_pinsp',
       'vent_mairpress', 'vent_mv', 'vent_peep', 'vent_rrtot',
       'vent_rrcontrolled', 'vent_rrspont', 'vent_suppress', 'vent_vt',
       'vent_vtnorm', 'vent_mode', 'state_airtype', 'daemo_morta',
       'episode_id'],
      dtype='object')

In [5]:
state_vectors = data.iloc[:, 4:-1] # Dropping the stayid, mv_id, charttime, age and episode_id columns
state_vectors.to_csv('mimiciv_missing_data.csv', index=False)
state_vectors.shape

(480060, 56)

In [6]:
state_vectors = state_vectors.dropna()
state_vectors.to_csv('mimiciv_full_data.csv', index=False)
state_vectors.shape


(138021, 56)

In [3]:
data_cfg = MIMICIV_config()

mimic_data = pd.read_csv(data_cfg.full_data_path)
mimic_data.shape

(138021, 56)

In [5]:
column_index = mimic_data.columns.get_loc('blood_chlorid')
print(column_index)


15


### Create Missing dataset
##### Params: 
#####         dataset name, 
#####         dataset/dataframe, 
#####         columns in which we need to add missing values, 
#####         stepped missingness, 
#####         random missing, 
#####         percent missing

In [6]:
create_missing_data.create_missing_dataset("mimiciv_blood_chlorid", mimic_data, mimic_data.columns[15:16], steps=None, random=True, percent=30)

Missing Data saved in "Dataset" Folder


In [5]:
data_cfg.missing_data_path

'dataset/mimiciv_blood_chlorid_w_missing_values_random.csv'

In [4]:
test_data = pd.read_csv(data_cfg.missing_data_path)
test_data.shape

(138021, 56)

##### For data having missing values in more than one columns and you want to use the GRU based imputation, please refer the main.py

In [6]:
#knn.impute_data(test_data, data_cfg)
mice.mice_impute(test_data, data_cfg)

Imputing: 100%|██████████| 138021/138021 [00:23<00:00, 5838.49rows/s]


Saving the imputed data 

Imputed Data saved at location "dataset/mimiciv_blood_chlorid_imp_mice.csv"


In [7]:
mice.mice_forest(test_data, data_cfg)

mf
Saving the imputed data 

Imputed Data saved at location "dataset/mimiciv_blood_chlorid_imp_miceforest.csv"


In [8]:
ssl_imputer = SSLImputer(data_cfg)
ssl_imputer.build_model()
ssl_imputer.fit(mimic_data)
ssl_imputer.transform(test_data)


Training Started...

Epoch 1/50



INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 2/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 3/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 4/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 5/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 6/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 7/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 8/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 9/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 10/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 11/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 12/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 13/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 14/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 15/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 16/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 17/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 18/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 19/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 20/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 21/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 22/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 23/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 24/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 25/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 26/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 27/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 28/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 29/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 30/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 31/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 32/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 33/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 34/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 35/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 36/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 37/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 38/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 39/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 40/50
Epoch 41/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 42/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 43/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 44/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 45/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 46/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 47/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 48/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 49/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Epoch 50/50


INFO:tensorflow:Assets written to: AEModels/MIMICIV_model_240424_145624\assets


Training Completed...

Imputation Started...

Imputation Completed...

Saving the imputed data 

Imputed Data saved at location "dataset/mimiciv_blood_chlorid_imp_ssl.csv"


In [9]:
cols = mimic_data.columns[15:16]
cols

Index(['blood_chlorid'], dtype='object')

In [12]:
mice_imp = pd.read_csv('dataset/mimiciv_blood_chlorid_imp_mice.csv', usecols=cols)
ssl_imp = pd.read_csv('dataset/mimiciv_blood_chlorid_imp_ssl.csv', usecols=cols)
mf_imp = pd.read_csv('dataset/mimiciv_blood_chlorid_imp_miceforest.csv', usecols=cols)
mice_imp.shape, ssl_imp.shape

((138021, 1), (138021, 1))

In [13]:
import numpy as np
from sklearn.metrics import mean_squared_error

def normalize_mse(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    normalized_mse = mse / (np.max(y_true) - np.min(y_true))**2
    return normalized_mse

for col in cols:
    mask = test_data[col].isna()
    og_col = mimic_data[mask][col]
    mice_col = mice_imp[mask][col]
    ssl_col = ssl_imp[mask][col]
    mf_col = mf_imp[mask][col]

    mse_mice = normalize_mse(og_col, mice_col)
    mse_ssl = normalize_mse(og_col, ssl_col)
    mse_mf = normalize_mse(og_col, mf_col)

    print(f"{col} ", end="")
    print(f"MICE: {round(mse_mice, 4)} | ", end="")
    print(f"SSL: {round(mse_ssl, 4)} | ", end="")
    print(f"MICEForest: {round(mse_mf, 4)}")
    print()


blood_chlorid MICE: 0.0018 | SSL: 0.0056 | MICEForest: 0.0011



In [4]:
imputers = ["mice", "ssl", "miceforest"]    # list the imputers for which yoiu want to evaluate the imputation
eval = Evaluation()

In [7]:
for imputer in imputers:
    eval_cols_dict = load_eval_columns(data_cfg, imputer)
    for i, col in enumerate(data_cfg.missing_state_vector, start=1):
        og_col = eval_cols_dict[f"og_{col}_{i}"]
        imputed_col = eval_cols_dict[f"imputed_{col}_{i}"]
        eval.scores(og_col, imputed_col, data_cfg.imputed_data_path, imputer, col)

Evalution of blood_chlorid for mice saved at evals/evaluations.csv.
Evalution of blood_chlorid for ssl saved at evals/evaluations.csv.
Evalution of blood_chlorid for miceforest saved at evals/evaluations.csv.


In [28]:
milos_mimic = pd.read_csv("C:\\Users\\prath\\Downloads\\mimiciv_state_vectors_2024_april_16_vent_pinsp-peep.csv", usecols=['cum_fluid_balance', 'state_ivfluid4h'])
milos_mimic.describe()

Unnamed: 0,cum_fluid_balance,state_ivfluid4h
count,482467.0,482467.0
mean,284.129112,296.171967
std,361.82193,348.490029
min,-3500.0,0.0
25%,64.994556,70.00267
50%,195.01668,200.0
75%,400.211955,402.010059
max,12886.321896,12886.321896
