In [1]:
!nvidia-smi

zsh:1: command not found: nvidia-smi


In [2]:
!pip install missingpy



In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_auc_score
import math

In [7]:
import sklearn.neighbors._base
sys.modules['sklearn.neighbors.base'] = sklearn.neighbors._base

In [8]:
import sys
import numpy as np
import pandas as pd

from missingpy import MissForest
from dsan import Imputer as DSANImputer
from utils import *

import warnings
warnings.filterwarnings('ignore')

In [9]:
seed = 128
set_seed(seed)

In [10]:
def get_imputer(config):
    
    model = config['model']
    
    if model == 'DSAN':
        imputer = DSANImputer(rep_dim=config['rep_dim'],
                num_heads=config['num_heads'],
                n_hidden=config['n_hidden'],
                lr=config['lr'],
                weight_decay=config['weight_decay'],
                batch_size=config['batch_size'],
                epochs=config['epochs'],
                noise_percent=config['noise_percent'],
                stopped_epoch=config['stopped_epoch'])
    
    elif model == 'MissForest':
        imputer = MissForest(random_state=0)
        
    elif model == 'Statistics':
        imputer = None
        
    return imputer

In [11]:
def experiment_run(data_name, config, test=False):
    # Retrieve dataset
    dataset = get_data(data_name)
    print(f"Retrieved dataset for {data_name}")

    # Limit to 1000 samples if test flag is true
    if test:
        dataset = (dataset[0][:1000], dataset[1], dataset[2], dataset[3])
        print("Limited dataset to 1000 samples for testing")

    k = 5
    percent_missing_lst = [p for p in range(5, 25, 5)]
    X_origin, n_col, num_vars, cat_vars = dataset

   # Apply category mapping
    X_origin = category_mapping(X_origin, cat_vars)
    print("Applied category mapping")

    # Check if X_origin is modified in-place or replaced
    if X_origin is not None:
        print("X_origin is replaced with a new version where categorical variables have been mapped to numerical values.")
    else:
        print("X_origin is modified in-place, with categorical variables mapped to numerical values.")

    # Print data size
    print("Data Size: {}".format(X_origin.shape))


    n_sample = X_origin.shape[0]
    idx_lst = list(range(n_sample))
    np.random.shuffle(idx_lst)

    # k-fold validation setup
    n_valid = len(idx_lst) // k
    print(f"Starting k-fold validation with k={k}")

    results = {}
    for percent_missing in percent_missing_lst:
        result = []
        print(f"\nProcessing {percent_missing}% missing data")

        for it, n in enumerate(range(0, n_sample, n_valid)):
            print(f"\nFold {it + 1}")
            valid_idx = idx_lst[n: n+n_valid]

            if len(valid_idx) < n_valid:
                print("Insufficient data for validation, breaking loop")
                break

            train_idx = list(set(idx_lst) - set(valid_idx))
            X = X_origin[train_idx, :]
            X_test = X_origin[valid_idx, :]

            # Generating incomplete data
            missing_mask = generate_missing_mask(X, percent_missing=percent_missing)
            X_incomplete = X.copy()
            X_incomplete[missing_mask] = np.nan
            print("Generated incomplete data with missing values for fold {}:".format(it+1))
            print("Original data with missing values (subset):")
            print(X_incomplete[:5])  # Adjust the number of rows as needed

            # Imputation
            imputer = get_imputer(config)
            X_imputed = imputer.fit_transform(X_incomplete, cat_vars=cat_vars)
            print("Completed imputation for fold {}:".format(it+1))

            X_res = X.copy()
            X_res[missing_mask] = X_imputed[missing_mask]
            print("X_res with imputed values (subset):")
            print(X_res[:5])  # Adjust the number of rows as needed

            # Calculate metrics
            metric = cal_metric_numpy(X_res, X, missing_mask, num_vars, cat_vars)
            score = ex_classify(data_name=data_name, train_array=X_res, test_array=X_test, num_vars=num_vars, cat_vars=cat_vars)
            
            metric['clf_aucroc'] = round(score, 4)
            result.append(metric)
            print("Metrics for this fold:", metric)

        # Average results
        avg_result = dict()
        for key in result[0].keys():
            value = np.mean([m[key] for m in result])
            avg_result[key] = value 
            print(f"Average {key} for {percent_missing}% missing data: {value}")

        results[percent_missing] = avg_result

    print("Completed all iterations")
    return results


In [12]:
config = {
    'model': 'DSAN',
    'rep_dim': 32,
    'num_heads': 8,
    'n_hidden': 2,
    'lr': 3e-3,
    'weight_decay': 1e-5,
    'batch_size': 128,
    'epochs': 15,
    'noise_percent': 10,
    'stopped_epoch': 10
}

In [13]:
data_name_lst = ['titanic']

result_dir = './results_titanic'
os.makedirs(result_dir)

for data_name in data_name_lst:
    results = experiment_run(data_name, config, test=True)
    results['config'] = config
    results['dataset'] = data_name
    result_path = os.path.join(result_dir, '{}.json'.format(data_name))
    write_json(results, result_path)

Retrieved dataset for titanic
Limited dataset to 1000 samples for testing
Applied category mapping
X_origin is replaced with a new version where categorical variables have been mapped to numerical values.
Data Size: (887, 7)
Starting k-fold validation with k=5

Processing 5% missing data

Fold 1
Generated incomplete data with missing values for fold 1:
Original data with missing values (subset):
[[1 0 1 26 0 0 8]
 [1 1 1 35 1 0 53]
 [0 0 0 35 0 0 8]
 [0 0 0 27 0 nan 8]
 [0 nan 0 54 0 0 52]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.18it/s]


Completed imputation for fold 1:
X_res with imputed values (subset):
[[1 0 1 26 0 0 8]
 [1 1 1 35 1 0 53]
 [0 0 0 35 0 0 8]
 [0 0 0 27 0 1.7759735584259033 8]
 [0 1 0 54 0 0 52]]
Metrics for this fold: {'nrmse': 0.4599, 'col_0_error_rate': 0.1818, 'col_1_error_rate': 0.3636, 'col_2_error_rate': 0.4375, 'total_error_rate': 0.3303, 'clf_aucroc': 0.8603}

Fold 2
Generated incomplete data with missing values for fold 2:
Original data with missing values (subset):
[[nan 0 0 22 1 0 7]
 [1 1 1 38 1 0 71]
 [1 0 nan 26 0 0 8]
 [1 1 1 nan 1 0 53]
 [0 0 0 35 0 0 8]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.02it/s]


Completed imputation for fold 2:
X_res with imputed values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 1 0 71]
 [1 0 0 26 0 0 8]
 [1 1 1 28.40114402770996 1 0 53]
 [0 0 0 35 0 0 8]]
Metrics for this fold: {'nrmse': 1.3549, 'col_0_error_rate': 0.2581, 'col_1_error_rate': 0.4186, 'col_2_error_rate': 0.3571, 'total_error_rate': 0.3534, 'clf_aucroc': 0.9152}

Fold 3
Generated incomplete data with missing values for fold 3:
Original data with missing values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 nan 0 71]
 [1 0 1 26 0 0 8]
 [0 0 0 nan 0 0 8]
 [0 0 0 27 0 0 8]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.05it/s]


Completed imputation for fold 3:
X_res with imputed values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 4.318942546844482 0 71]
 [1 0 1 26 0 0 8]
 [0 0 0 24.77794075012207 0 0 8]
 [0 0 0 27 0 0 8]]
Metrics for this fold: {'nrmse': 1.0995, 'col_0_error_rate': 0.6774, 'col_1_error_rate': 0.3333, 'col_2_error_rate': 0.5333, 'total_error_rate': 0.5294, 'clf_aucroc': 0.8064}

Fold 4
Generated incomplete data with missing values for fold 4:
Original data with missing values (subset):
[[0 0 0 22 1 0 7]
 [1 nan 1 38 nan 0 71]
 [1 1 1 35 1 0 53]
 [0 0 0 35 0 0 8]
 [0 0 0 27 0 0 8]]


100%|███████████████████████████████████████████| 15/15 [00:04<00:00,  3.50it/s]


Completed imputation for fold 4:
X_res with imputed values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 3.4001643657684326 0 71]
 [1 1 1 35 1 0 53]
 [0 0 0 35 0 0 8]
 [0 0 0 27 0 0 8]]
Metrics for this fold: {'nrmse': 1.3313, 'col_0_error_rate': 0.2647, 'col_1_error_rate': 0.2083, 'col_2_error_rate': 0.3182, 'total_error_rate': 0.2745, 'clf_aucroc': 0.8224}

Fold 5
Generated incomplete data with missing values for fold 5:
Original data with missing values (subset):
[[0 0 0 22 1 0 7]
 [nan 1 1 38 1 0 71]
 [1 0 1 26 0 0 8]
 [1 1 1 35 1 0 53]
 [nan 0 0 27 0 0 8]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  3.85it/s]


Completed imputation for fold 5:
X_res with imputed values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 1 0 71]
 [1 0 1 26 0 0 8]
 [1 1 1 35 1 0 53]
 [0 0 0 27 0 0 8]]
Metrics for this fold: {'nrmse': 0.9508, 'col_0_error_rate': 0.2188, 'col_1_error_rate': 0.5, 'col_2_error_rate': 0.3125, 'total_error_rate': 0.35, 'clf_aucroc': 0.8563}

Fold 6
Insufficient data for validation, breaking loop
Average nrmse for 5% missing data: 1.03928
Average col_0_error_rate for 5% missing data: 0.32016
Average col_1_error_rate for 5% missing data: 0.36476
Average col_2_error_rate for 5% missing data: 0.39172
Average total_error_rate for 5% missing data: 0.36751999999999996
Average clf_aucroc for 5% missing data: 0.85212

Processing 10% missing data

Fold 1
Generated incomplete data with missing values for fold 1:
Original data with missing values (subset):
[[1 0 1 26 0 0 8]
 [1 1 1 35 1 0 53]
 [0 0 0 35 0 0 8]
 [0 0 0 27 0 0 8]
 [0 1 0 54 0 0 52]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.50it/s]


Completed imputation for fold 1:
X_res with imputed values (subset):
[[1 0 1 26 0 0 8]
 [1 1 1 35 1 0 53]
 [0 0 0 35 0 0 8]
 [0 0 0 27 0 0 8]
 [0 1 0 54 0 0 52]]
Metrics for this fold: {'nrmse': 0.6386, 'col_0_error_rate': 0.411, 'col_1_error_rate': 0.4286, 'col_2_error_rate': 0.4561, 'total_error_rate': 0.43, 'clf_aucroc': 0.8619}

Fold 2
Generated incomplete data with missing values for fold 2:
Original data with missing values (subset):
[[0 0 nan nan 1 0 7]
 [1 nan 1 38 1 0 71]
 [1 0 1 26 0 0 8]
 [1 1 1 nan 1 0 nan]
 [0 0 0 35 0 0 nan]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.23it/s]


Completed imputation for fold 2:
X_res with imputed values (subset):
[[0 0 0 25.23448944091797 1 0 7]
 [1 1 1 38 1 0 71]
 [1 0 1 26 0 0 8]
 [1 1 1 21.49752426147461 1 0 33.14897537231445]
 [0 0 0 35 0 0 11.147013664245605]]
Metrics for this fold: {'nrmse': 0.9298, 'col_0_error_rate': 0.236, 'col_1_error_rate': 0.5195, 'col_2_error_rate': 0.2923, 'total_error_rate': 0.3463, 'clf_aucroc': 0.9153}

Fold 3
Generated incomplete data with missing values for fold 3:
Original data with missing values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 1 0 71]
 [1 0 1 nan 0 0 8]
 [0 0 0 35 0 0 8]
 [0 0 0 27 0 0 8]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.55it/s]


Completed imputation for fold 3:
X_res with imputed values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 1 0 71]
 [1 0 1 26.198911666870117 0 0 8]
 [0 0 0 35 0 0 8]
 [0 0 0 27 0 0 8]]
Metrics for this fold: {'nrmse': 0.6441, 'col_0_error_rate': 0.2361, 'col_1_error_rate': 0.3662, 'col_2_error_rate': 0.5513, 'total_error_rate': 0.3891, 'clf_aucroc': 0.8013}

Fold 4
Generated incomplete data with missing values for fold 4:
Original data with missing values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 1 nan 71]
 [1 1 1 35 1 0 53]
 [0 0 0 35 0 0 nan]
 [0 0 0 27 0 0 8]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.45it/s]


Completed imputation for fold 4:
X_res with imputed values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 1 0.5358657240867615 71]
 [1 1 1 35 1 0 53]
 [0 0 0 35 0 0 14.175859451293945]
 [0 0 0 27 0 0 8]]
Metrics for this fold: {'nrmse': 1.02, 'col_0_error_rate': 0.5375, 'col_1_error_rate': 0.2794, 'col_2_error_rate': 0.1316, 'total_error_rate': 0.3214, 'clf_aucroc': 0.8231}

Fold 5
Generated incomplete data with missing values for fold 5:
Original data with missing values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 1 0 71]
 [1 0 1 26 0 0 8]
 [1 1 1 35 1 0 53]
 [0 0 nan 27 0 0 8]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.30it/s]


Completed imputation for fold 5:
X_res with imputed values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 1 0 71]
 [1 0 1 26 0 0 8]
 [1 1 1 35 1 0 53]
 [0 0 0 27 0 0 8]]
Metrics for this fold: {'nrmse': 0.5938, 'col_0_error_rate': 0.3375, 'col_1_error_rate': 0.3191, 'col_2_error_rate': 0.3571, 'total_error_rate': 0.3401, 'clf_aucroc': 0.8523}

Fold 6
Insufficient data for validation, breaking loop
Average nrmse for 10% missing data: 0.7652599999999999
Average col_0_error_rate for 10% missing data: 0.35161999999999993
Average col_1_error_rate for 10% missing data: 0.38256
Average col_2_error_rate for 10% missing data: 0.35768
Average total_error_rate for 10% missing data: 0.36538000000000004
Average clf_aucroc for 10% missing data: 0.85078

Processing 15% missing data

Fold 1
Generated incomplete data with missing values for fold 1:
Original data with missing values (subset):
[[1 0 1 26 0 0 8]
 [1 nan 1 35 nan 0 nan]
 [0 0 0 35 0 nan 8]
 [0 0 0 27 0 0 8]
 [0 1 0 54 nan 0 52]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.46it/s]


Completed imputation for fold 1:
X_res with imputed values (subset):
[[1 0 1 26 0 0 8]
 [1 2 1 35 2.413994073867798 0 16.289121627807617]
 [0 0 0 35 0 0.5770098567008972 8]
 [0 0 0 27 0 0 8]
 [0 1 0 54 2.1022958755493164 0 52]]
Metrics for this fold: {'nrmse': 0.9773, 'col_0_error_rate': 0.5376, 'col_1_error_rate': 0.2593, 'col_2_error_rate': 0.3053, 'total_error_rate': 0.3615, 'clf_aucroc': 0.8359}

Fold 2
Generated incomplete data with missing values for fold 2:
Original data with missing values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 1 0 71]
 [1 0 1 26 0 0 8]
 [1 1 1 35 1 0 53]
 [0 0 nan 35 0 0 nan]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.43it/s]


Completed imputation for fold 2:
X_res with imputed values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 1 0 71]
 [1 0 1 26 0 0 8]
 [1 1 1 35 1 0 53]
 [0 0 0 35 0 0 55.74292755126953]]
Metrics for this fold: {'nrmse': 0.8926, 'col_0_error_rate': 0.3483, 'col_1_error_rate': 0.6396, 'col_2_error_rate': 0.4087, 'total_error_rate': 0.473, 'clf_aucroc': 0.9214}

Fold 3
Generated incomplete data with missing values for fold 3:
Original data with missing values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 nan 1 0 71]
 [1 0 nan 26 0 0 8]
 [0 0 0 35 0 nan 8]
 [0 0 0 nan 0 0 nan]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.59it/s]


Completed imputation for fold 3:
X_res with imputed values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 27.296133041381836 1 0 71]
 [1 0 1 26 0 0 8]
 [0 0 0 35 0 1.930166244506836 8]
 [0 0 0 25.54746437072754 0 0 53.35728073120117]]
Metrics for this fold: {'nrmse': 1.0957, 'col_0_error_rate': 0.3186, 'col_1_error_rate': 0.404, 'col_2_error_rate': 0.4057, 'total_error_rate': 0.3742, 'clf_aucroc': 0.808}

Fold 4
Generated incomplete data with missing values for fold 4:
Original data with missing values (subset):
[[0 0 0 22 nan nan 7]
 [1 1 1 38 nan 0 71]
 [1 1 1 35 1 0 53]
 [0 0 nan 35 0 0 8]
 [0 0 0 27 0 0 8]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.29it/s]


Completed imputation for fold 4:
X_res with imputed values (subset):
[[0 0 0 22 0.9011366963386536 0.22693775594234467 7]
 [1 1 1 38 2.2795894145965576 0 71]
 [1 1 1 35 1 0 53]
 [0 0 0 35 0 0 8]
 [0 0 0 27 0 0 8]]
Metrics for this fold: {'nrmse': 1.4482, 'col_0_error_rate': 0.2703, 'col_1_error_rate': 0.2653, 'col_2_error_rate': 0.4463, 'total_error_rate': 0.3333, 'clf_aucroc': 0.812}

Fold 5
Generated incomplete data with missing values for fold 5:
Original data with missing values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 1 0 71]
 [1 nan 1 nan 0 0 nan]
 [1 1 nan 35 1 0 53]
 [0 0 nan 27 nan 0 8]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.33it/s]


Completed imputation for fold 5:
X_res with imputed values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 1 0 71]
 [1 0 1 26.476531982421875 0 0 73.03582000732422]
 [1 1 1 35 1 0 53]
 [0 0 0 27 0.7108156681060791 0 8]]
Metrics for this fold: {'nrmse': 0.8243, 'col_0_error_rate': 0.3434, 'col_1_error_rate': 0.3814, 'col_2_error_rate': 0.3426, 'total_error_rate': 0.3553, 'clf_aucroc': 0.8575}

Fold 6
Insufficient data for validation, breaking loop
Average nrmse for 15% missing data: 1.04762
Average col_0_error_rate for 15% missing data: 0.36363999999999996
Average col_1_error_rate for 15% missing data: 0.38992
Average col_2_error_rate for 15% missing data: 0.38171999999999995
Average total_error_rate for 15% missing data: 0.37945999999999996
Average clf_aucroc for 15% missing data: 0.8469599999999999

Processing 20% missing data

Fold 1
Generated incomplete data with missing values for fold 1:
Original data with missing values (subset):
[[1 0 nan nan 0 0 8]
 [nan 1 1 35 1 0 53]
 [0 0 0 35 0 0 8]

100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.15it/s]


Completed imputation for fold 1:
X_res with imputed values (subset):
[[1 0 1 32.40189743041992 0 0 8]
 [1 1 1 35 1 0 53]
 [0 0 0 35 0 0 8]
 [0 0 0 27 0.7667214274406433 0 8]
 [0 1 0 54 0 0 52]]
Metrics for this fold: {'nrmse': 1.2944, 'col_0_error_rate': 0.3613, 'col_1_error_rate': 0.292, 'col_2_error_rate': 0.3143, 'total_error_rate': 0.3241, 'clf_aucroc': 0.8632}

Fold 2
Generated incomplete data with missing values for fold 2:
Original data with missing values (subset):
[[nan 0 0 22 1 0 7]
 [1 nan nan 38 1 nan nan]
 [nan 0 1 26 0 0 8]
 [1 1 1 35 1 0 53]
 [nan 0 0 35 0 0 nan]]


100%|███████████████████████████████████████████| 15/15 [00:04<00:00,  3.40it/s]


Completed imputation for fold 2:
X_res with imputed values (subset):
[[0 0 0 22 1 0 7]
 [1 1 0 38 1 0.697891891002655 14.02755069732666]
 [0 0 1 26 0 0 8]
 [1 1 1 35 1 0 53]
 [0 0 0 35 0 0 26.55202293395996]]
Metrics for this fold: {'nrmse': 0.9991, 'col_0_error_rate': 0.3691, 'col_1_error_rate': 0.7143, 'col_2_error_rate': 0.3221, 'total_error_rate': 0.4749, 'clf_aucroc': 0.8864}

Fold 3
Generated incomplete data with missing values for fold 3:
Original data with missing values (subset):
[[0 0 0 22 1 0 7]
 [1 nan 1 38 1 0 71]
 [1 0 1 26 0 0 8]
 [0 nan nan nan 0 nan 8]
 [0 0 nan 27 nan 0 8]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  4.36it/s]


Completed imputation for fold 3:
X_res with imputed values (subset):
[[0 0 0 22 1 0 7]
 [1 2 1 38 1 0 71]
 [1 0 1 26 0 0 8]
 [0 0 0 27.635210037231445 0 0.3982674777507782 8]
 [0 0 0 27 1.3875157833099365 0 8]]
Metrics for this fold: {'nrmse': 0.7478, 'col_0_error_rate': 0.3986, 'col_1_error_rate': 0.6579, 'col_2_error_rate': 0.2857, 'total_error_rate': 0.4591, 'clf_aucroc': 0.7915}

Fold 4
Generated incomplete data with missing values for fold 4:
Original data with missing values (subset):
[[0 0 0 22 1 0 7]
 [nan 1 1 38 nan nan 71]
 [nan nan nan 35 1 0 53]
 [0 nan 0 35 0 0 8]
 [0 0 0 27 0 0 8]]


100%|███████████████████████████████████████████| 15/15 [00:04<00:00,  3.46it/s]


Completed imputation for fold 4:
X_res with imputed values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 2.020275592803955 2.527261734008789 71]
 [1 0 0 35 1 0 53]
 [0 0 0 35 0 0 8]
 [0 0 0 27 0 0 8]]
Metrics for this fold: {'nrmse': 0.763, 'col_0_error_rate': 0.4551, 'col_1_error_rate': 0.4715, 'col_2_error_rate': 0.2532, 'total_error_rate': 0.388, 'clf_aucroc': 0.8284}

Fold 5
Generated incomplete data with missing values for fold 5:
Original data with missing values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 1 0 71]
 [1 0 1 26 nan 0 8]
 [1 1 1 35 1 nan nan]
 [nan 0 0 27 0 0 nan]]


100%|███████████████████████████████████████████| 15/15 [00:03<00:00,  3.99it/s]

Completed imputation for fold 5:
X_res with imputed values (subset):
[[0 0 0 22 1 0 7]
 [1 1 1 38 1 0 71]
 [1 0 1 26 1.3410472869873047 0 8]
 [1 1 1 35 1 0.40731361508369446 66.6235122680664]
 [0 0 0 27 0 0 18.985668182373047]]
Metrics for this fold: {'nrmse': 0.856, 'col_0_error_rate': 0.3385, 'col_1_error_rate': 0.561, 'col_2_error_rate': 0.381, 'total_error_rate': 0.4225, 'clf_aucroc': 0.8455}

Fold 6
Insufficient data for validation, breaking loop
Average nrmse for 20% missing data: 0.9320599999999999
Average col_0_error_rate for 20% missing data: 0.38452000000000003
Average col_1_error_rate for 20% missing data: 0.5393399999999999
Average col_2_error_rate for 20% missing data: 0.31126
Average total_error_rate for 20% missing data: 0.41372
Average clf_aucroc for 20% missing data: 0.8430000000000002
Completed all iterations



