In [1]:
import pandas as pd
import warnings
from hyperimpute.utils.serialization import load_model_from_file, save_model_to_file
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler

workspace = Path("workspace")
results_dir = Path("results")
data_dir = Path("data")

workspace.mkdir(parents=True, exist_ok=True)

warnings.filterwarnings("ignore")

cat_limit = 10
n_seeds = 5

version = "take6_v1"
changelog = f"multiple_imputation{n_seeds}_last_try"


In [2]:
def augment_base_dataset(df):
    for rid in df["RID_HASH"].unique():
        visits = len(df[df["RID_HASH"] == rid])
        last_visit = df[df["RID_HASH"] == rid]["VISCODE"].max()

        df.loc[df["RID_HASH"] == rid, "total_visits"] = visits
        df.loc[df["RID_HASH"] == rid, "last_visit"] = last_visit

    return df

def dataframe_hash(df: pd.DataFrame) -> str:
    cols = sorted(list(df.columns))
    return str(abs(pd.util.hash_pandas_object(df[cols].fillna(0)).sum()))


In [3]:
dev_set = pd.read_csv(data_dir / "dev_set.csv")
dev_set = dev_set.sort_values(["RID_HASH", "VISCODE"])
dev_set = augment_base_dataset(dev_set)

scaled_cols = [
    "MMSE",
    "ADAS13",
    "Ventricles",
    "Hippocampus",
    "WholeBrain",
    "Entorhinal",
    "Fusiform",
    "MidTemp",
]

scaler = MinMaxScaler().fit(dev_set[scaled_cols])
dev_set[scaled_cols] = scaler.transform(dev_set[scaled_cols])

dev_set

Unnamed: 0,RID_HASH,VISCODE,AGE,PTGENDER_num,PTEDUCAT,DX_num,APOE4,CDRSB,MMSE,ADAS13,Ventricles,Hippocampus,WholeBrain,Entorhinal,Fusiform,MidTemp,total_visits,last_visit
2163,001c7955017f905ccf78d55c94e81070a1cca7b1efb5bd...,0,79.1,0,20,1.0,1.0,0.5,0.923077,0.164384,0.071871,0.548646,0.376516,0.464021,0.194906,0.400709,2.0,6.0
154,001c7955017f905ccf78d55c94e81070a1cca7b1efb5bd...,6,79.6,0,20,1.0,1.0,1.5,0.923077,0.237397,0.071956,0.548307,0.366398,0.403880,0.193367,0.397291,2.0,6.0
1385,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,0,72.9,1,12,1.0,1.0,1.0,1.000000,0.123288,0.142655,0.525169,0.235599,0.513404,0.356253,0.294774,6.0,60.0
2698,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,6,73.4,1,12,1.0,1.0,1.0,1.000000,0.164384,0.144729,0.549210,0.230361,0.435097,0.322395,0.294175,6.0,60.0
2291,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,12,73.9,1,12,1.0,1.0,1.0,0.961538,0.109589,0.155550,0.527878,0.215944,0.487831,0.342600,0.277552,6.0,60.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2895,ff59785f0d6b12fc51a07f09bb3a02790e54d04bb0803b...,60,79.8,1,19,1.0,0.0,3.0,0.923077,0.223699,0.170895,0.357020,0.321346,0.310935,0.399047,0.461476,7.0,102.0
2646,ff59785f0d6b12fc51a07f09bb3a02790e54d04bb0803b...,102,83.3,1,19,1.0,0.0,3.0,0.846154,0.168904,0.178231,0.352043,0.309095,0.256790,0.372685,0.416478,7.0,102.0
1962,ff98c50c3e97b776ab61db883cf1c8fd5a6d304d7165c8...,0,72.1,0,12,1.0,0.0,0.5,0.884615,0.150685,0.416382,0.602438,0.636654,0.610229,0.743037,0.624631,3.0,24.0
122,ff98c50c3e97b776ab61db883cf1c8fd5a6d304d7165c8...,12,73.1,0,12,1.0,0.0,1.0,0.961538,0.155205,0.398451,0.608521,0.634650,0.617108,0.729087,0.638477,3.0,24.0


In [4]:
dev_1 = pd.read_csv(data_dir / "dev_1.csv")
dev_1 = dev_1.sort_values(["RID_HASH", "VISCODE"])
dev_1 = augment_base_dataset(dev_1)
dev_1[scaled_cols] = scaler.transform(dev_1[scaled_cols])

dev_1

Unnamed: 0,RID_HASH,VISCODE,AGE,PTGENDER_num,PTEDUCAT,DX_num,APOE4,CDRSB,MMSE,ADAS13,Ventricles,Hippocampus,WholeBrain,Entorhinal,Fusiform,MidTemp,total_visits,last_visit
2163,001c7955017f905ccf78d55c94e81070a1cca7b1efb5bd...,0,,0.0,20.0,1.0,1.0,0.5,0.923077,0.164384,,,0.376516,,,,2.0,6.0
154,001c7955017f905ccf78d55c94e81070a1cca7b1efb5bd...,6,79.6,0.0,20.0,1.0,1.0,1.5,0.923077,0.237397,0.071956,0.548307,0.366398,0.403880,0.193367,0.397291,2.0,6.0
1385,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,0,,1.0,12.0,,1.0,,,,,0.525169,0.235599,0.513404,0.356253,0.294774,6.0,60.0
2698,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,6,,1.0,12.0,,1.0,,,,,0.549210,0.230361,0.435097,0.322395,0.294175,6.0,60.0
2291,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,12,,1.0,12.0,,1.0,,,,,0.527878,0.215944,0.487831,0.342600,0.277552,6.0,60.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2895,ff59785f0d6b12fc51a07f09bb3a02790e54d04bb0803b...,60,79.8,1.0,19.0,,0.0,,,,0.170895,,0.321346,,,,7.0,102.0
2646,ff59785f0d6b12fc51a07f09bb3a02790e54d04bb0803b...,102,83.3,1.0,19.0,,0.0,,,,0.178231,,0.309095,,,,7.0,102.0
1962,ff98c50c3e97b776ab61db883cf1c8fd5a6d304d7165c8...,0,72.1,,12.0,1.0,0.0,0.5,0.884615,0.150685,0.416382,0.602438,,0.610229,0.743037,0.624631,3.0,24.0
122,ff98c50c3e97b776ab61db883cf1c8fd5a6d304d7165c8...,12,73.1,,12.0,1.0,0.0,1.0,0.961538,0.155205,0.398451,0.608521,,0.617108,0.729087,0.638477,3.0,24.0


In [5]:
dev_2 = pd.read_csv(data_dir / "dev_2.csv")
dev_2 = dev_2.sort_values(["RID_HASH", "VISCODE"])
dev_2 = augment_base_dataset(dev_2)
dev_2[scaled_cols] = scaler.transform(dev_2[scaled_cols])

dev_2

Unnamed: 0,RID_HASH,VISCODE,AGE,PTGENDER_num,PTEDUCAT,DX_num,APOE4,CDRSB,MMSE,ADAS13,Ventricles,Hippocampus,WholeBrain,Entorhinal,Fusiform,MidTemp,total_visits,last_visit
2163,001c7955017f905ccf78d55c94e81070a1cca7b1efb5bd...,0,79.1,0.0,20.0,1.0,1.0,0.5,0.923077,0.164384,0.071871,0.548646,0.376516,0.464021,0.194906,0.400709,2.0,6.0
154,001c7955017f905ccf78d55c94e81070a1cca7b1efb5bd...,6,79.6,,,,1.0,,,,0.071956,0.548307,,0.403880,0.193367,0.397291,2.0,6.0
1385,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,0,72.9,,12.0,1.0,1.0,1.0,1.000000,0.123288,0.142655,0.525169,,0.513404,0.356253,0.294774,6.0,60.0
2698,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,6,,,12.0,1.0,1.0,1.0,1.000000,0.164384,,0.549210,,0.435097,0.322395,0.294175,6.0,60.0
2291,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,12,,,12.0,1.0,1.0,1.0,0.961538,0.109589,,,,,,,6.0,60.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2895,ff59785f0d6b12fc51a07f09bb3a02790e54d04bb0803b...,60,,,19.0,1.0,0.0,3.0,0.923077,0.223699,,0.357020,,0.310935,0.399047,0.461476,7.0,102.0
2646,ff59785f0d6b12fc51a07f09bb3a02790e54d04bb0803b...,102,,,19.0,1.0,0.0,3.0,0.846154,0.168904,,0.352043,,0.256790,0.372685,0.416478,7.0,102.0
1962,ff98c50c3e97b776ab61db883cf1c8fd5a6d304d7165c8...,0,72.1,,12.0,,0.0,,,,0.416382,0.602438,,0.610229,0.743037,0.624631,3.0,24.0
122,ff98c50c3e97b776ab61db883cf1c8fd5a6d304d7165c8...,12,,,12.0,,0.0,,,,,,,,,,3.0,24.0


In [6]:
submission = pd.read_csv(data_dir / "sample_submission.csv")

submission.values[1]

array(['6b6a7136f42a8dbd469a201b88e2abb54a93667822761357db2f6d620da6af8a_0_Ventricles_test_A',
       40613.0818580834], dtype=object)

In [7]:
test_A = pd.read_csv(data_dir / "test_A.csv")
test_A = augment_base_dataset(test_A)
test_A[scaled_cols] = scaler.transform(test_A[scaled_cols])

test_A

Unnamed: 0,RID_HASH,VISCODE,AGE,PTGENDER_num,PTEDUCAT,DX_num,APOE4,CDRSB,MMSE,ADAS13,Ventricles,Hippocampus,WholeBrain,Entorhinal,Fusiform,MidTemp,total_visits,last_visit
0,988b6137f4352c01e4b52790505caa0c3ec438f117000a...,24,,,18.0,,0.0,,,,,,,,,,6.0,36.0
1,fb640cef87a6af00053e632140ce18f5722431bb92576b...,12,66.4,1.0,18.0,1.0,1.0,1.5,0.961538,0.077671,0.145063,,0.542904,,,,5.0,24.0
2,f24f78d62c90319b575dfb48a482159c4d0df14cb71530...,66,74.5,0.0,14.0,0.0,0.0,0.0,0.961538,0.050274,0.559104,0.565102,0.753302,0.641093,0.911086,0.866886,5.0,96.0
3,da4cbd3f09e8ddc87cc72e542d43f072e7df288face65e...,0,,,16.0,0.0,0.0,0.0,1.000000,0.191781,,,,,,,2.0,36.0
4,f665c6ee86356bdd135be03c61348607cabd64ed8433ba...,12,82.7,,13.0,,1.0,,,,0.206872,0.353047,,0.208289,0.188006,0.363489,7.0,60.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1323,51923c5d7573ef46aa9197cae78c3305abea5b3479331f...,6,83.5,,18.0,1.0,0.0,2.5,0.846154,0.150685,0.252103,0.541648,,0.454850,0.480663,0.479467,5.0,60.0
1324,06407d9ec85d62cd38189108ddffec23822f421b3db357...,0,,,20.0,,0.0,,,,,,,,,,1.0,0.0
1325,e5015703a58ccd5582a46d9f4a779edf062d683f3ae873...,132,83.0,0.0,20.0,0.0,1.0,0.0,0.961538,0.205479,0.193712,0.533115,0.491958,0.513933,0.508464,0.573437,9.0,132.0
1326,cf6ea2601bb119113371df79931cc3734b77218f734ad0...,12,82.4,,18.0,1.0,0.0,0.5,0.884615,0.205479,0.272259,0.572799,,0.597707,0.380480,0.361608,3.0,12.0


In [8]:
test_B = pd.read_csv(data_dir / "test_B.csv")
test_B = augment_base_dataset(test_B)
test_B[scaled_cols] = scaler.transform(test_B[scaled_cols])

test_B

Unnamed: 0,RID_HASH,VISCODE,AGE,PTGENDER_num,PTEDUCAT,DX_num,APOE4,CDRSB,MMSE,ADAS13,Ventricles,Hippocampus,WholeBrain,Entorhinal,Fusiform,MidTemp,total_visits,last_visit
0,90a4f1869cf459af5fe39e53f1c328540f1dcf5a1908f7...,60,67.9,1.0,20.0,0.0,1.0,0.0,1.000000,0.123288,0.069404,,0.330562,,,,6.0,60.0
1,fad8ca8f903cf3ddf566926eabdb8718e8568962675519...,30,69.1,,16.0,0.0,0.0,0.0,0.961538,0.059315,0.162418,0.749086,,0.899471,0.724619,0.481817,3.0,30.0
2,d342fb7689e49c754709870c77e1aa3ed770dd193e9f9c...,12,,,12.0,,1.0,,,,,,,,,,1.0,12.0
3,5319e7ba149f0f81715b5e7f854036fc937141840bbd52...,6,,,18.0,,0.0,,,,,0.476637,,0.430159,0.520727,0.415281,5.0,126.0
4,6eef135d8c4eca67b0e130b8f4aedbc37a99938224d661...,0,,0.0,16.0,0.0,0.0,0.0,0.961538,0.095890,,0.605756,0.508509,0.629277,0.604379,0.449383,5.0,72.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1465,fbf6267bf7d92b507feb4957d7aa90ea5bb50893bb79d4...,0,,,12.0,,2.0,,,,,0.492325,,0.497354,0.374770,0.460151,7.0,60.0
1466,03e8ddc654f8e27332c5b09618b355d7f9529d614adb0f...,12,81.5,0.0,15.0,,0.0,,,,0.264134,,0.208488,,,,3.0,12.0
1467,1156748dfd6e69e1f364c31584e957d3b1ef656b898942...,0,,0.0,18.0,0.0,0.0,0.0,0.961538,0.136986,,,0.686284,,,,4.0,84.0
1468,0c7e17c442e715e067bd472c1e472b4937914d7fb8d492...,12,,,16.0,,0.0,,,,,0.553883,,0.753086,0.606563,0.458784,4.0,42.0


In [9]:
test_A.isna().sum()

RID_HASH          0
VISCODE           0
AGE             612
PTGENDER_num    626
PTEDUCAT         65
DX_num          428
APOE4            49
CDRSB           428
MMSE            428
ADAS13          428
Ventricles      612
Hippocampus     668
WholeBrain      626
Entorhinal      668
Fusiform        668
MidTemp         668
total_visits      0
last_visit        0
dtype: int64

In [10]:
test_A.columns

Index(['RID_HASH', 'VISCODE', 'AGE', 'PTGENDER_num', 'PTEDUCAT', 'DX_num',
       'APOE4', 'CDRSB', 'MMSE', 'ADAS13', 'Ventricles', 'Hippocampus',
       'WholeBrain', 'Entorhinal', 'Fusiform', 'MidTemp', 'total_visits',
       'last_visit'],
      dtype='object')

In [11]:
from sklearn.preprocessing import LabelEncoder
import numpy as np
from hyperimpute.plugins.prediction import Classifiers, Regression
from hyperimpute.utils.tester import evaluate_regression, evaluate_estimator
from hyperimpute.utils.benchmarks import RMSE
from hyperimpute.utils.optimizer import EarlyStoppingExceeded, create_study
import optuna

train_cols = list(dev_set.drop(columns=["RID_HASH"]).columns)

eval_cols = [
    "DX_num",
    "CDRSB",
    "MMSE",
    "ADAS13",
    "Ventricles",
    "Hippocampus",
    "WholeBrain",
    "Entorhinal",
    "Fusiform",
    "MidTemp",
]



def prepare_temporal_data(data, target_col: str, direction: str):
    target_train_data = []
    target_train_labels = []

    for item in data.groupby("RID_HASH"):
        # print(item[0])
        local = item[1]
        local = local.sort_values(["RID_HASH", "VISCODE"])

        rid = local["RID_HASH"]

        prev_cols = [f"prev_{col}" for col in train_cols]
        prev_row = np.zeros(len(prev_cols))

        if direction == "forward":
            rows = local.iterrows()
        else:
            rows = local.iloc[::-1].iterrows()

        for idx, row in rows:
            target_val = row[target_col]
            tmp_row = row[train_cols].copy()
            src_data = tmp_row.to_frame().T.drop(columns=[target_col])

            src_data[prev_cols] = prev_row

            prev_row = tmp_row

            target_train_data.append(src_data)
            target_train_labels.append(target_val)

    target_train_data = pd.concat(target_train_data, ignore_index=True).astype(float)

    return target_train_data, target_train_labels


def evaluate_target(data, target_col: str, direction: str):
    train_data, labels = prepare_temporal_data(data, target_col, direction)
    assert target_col not in train_data.columns

    
    def evaluate_clf(plugin, args = {}):
        model = plugin(**args)
        encoded_labels = LabelEncoder().fit_transform(labels)

        return evaluate_estimator(
            model, train_data, pd.Series(encoded_labels)
        )["clf"]["aucroc"][0]
    
    def evaluate_reg(plugin, args = {}):
        model = plugin(**args)
        return evaluate_regression(model, train_data, labels)["clf"]["r2"][0]

    best_score = -99
    best_target_plugin = None
    for (clf_type, reg_type) in [
        ("lgbm", "lgbm_regressor"),
        ("xgboost", "xgboost_regressor"),
        ("logistic_regression", "linear_regression"),
        ("catboost", "catboost_regressor"),
        ("random_forest", "random_forest_regressor"),
        ("kneighbors", "kneighbors_regressor"),
    ]:
        if len(np.unique(labels)) < cat_limit:
            if clf_type is None:
                continue
            plugin = Classifiers().get_type(clf_type)
            cbk = evaluate_clf
        else:
            if reg_type is None:
                continue
            plugin = Regression().get_type(reg_type)
            cbk = evaluate_reg

        study, pruner = create_study(
            study_name=f"long_imputation_{plugin.name()}_{target_col}_{direction}",
            direction="maximize",
            load_if_exists = True,
        )
        
        def objective(trial: optuna.Trial) -> float:
            args = plugin.sample_hyperparameters(trial)
            pruner.check_trial(trial)

            try:
                score = cbk(plugin, args)
            except BaseException:
                print("      failed evaluation", plugin.name(), args)
                return -5

            #print(f"    >>  {plugin.name()} {args} -> {score}")
            pruner.report_score(score)

            return score

        try:
            study.optimize(objective, n_trials=100, timeout=60 * 10)
        except EarlyStoppingExceeded:
            pass

        baseline_score = cbk(plugin)

        if study.best_value > baseline_score:
            score = study.best_value
            args = study.best_trial.params
        else:
            score = baseline_score
            args = {}
            
        if score > best_score:
            best_score = score
            best_target_plugin, best_target_plugin_args = plugin, args
            
    print(f"     >> Selected {target_col} --> {best_target_plugin.name()} -- {best_target_plugin_args}", best_score)

    model = best_target_plugin(**best_target_plugin_args)
        
    if len(np.unique(labels)) < cat_limit:
        labels = LabelEncoder().fit_transform(labels)
    
    model.fit(train_data, labels)
    
    return model


def prepare_longitudinal_imputers(data, columns):
    imputers = {}

    for direction in ["forward", "reverse"]:
        imputers[direction] = {}
        for target_col in columns:
            train_data, labels = prepare_temporal_data(data, target_col, direction)
            print("train", target_col, direction, len(np.unique(labels)))

            if len(np.unique(labels)) > cat_limit:
                model = Regression().get("catboost_regressor")
            else:
                model = Classifiers().get("catboost")

            model.fit(train_data, labels)

            imputers[direction][target_col] = model

    return imputers


def prepare_longitudinal_imputers_automl(data, columns):
    imputers = {}

    for direction in ["forward", "reverse"]:
        imputers[direction] = {}
        for target_col in columns:
            model = evaluate_target(data, target_col, direction=direction)

            imputers[direction][target_col] = model

    return imputers

In [None]:
for col in eval_cols:
    evaluate_target(dev_set, col, direction = "forward")

     >> Selected DX_num --> xgboost -- {'reg_lambda': 8.962330859372681, 'reg_alpha': 8.608303558298164, 'colsample_bytree': 0.7745875968255177, 'colsample_bynode': 0.3990200802902815, 'colsample_bylevel': 0.8924327091878707, 'subsample': 0.8411533780703977, 'lr': 0.0001, 'max_depth': 3, 'n_estimators': 169, 'min_child_weight': 1, 'max_bin': 256, 'booster': 1} 0.9877047491583673
     >> Selected CDRSB --> linear_regression -- {'max_iter': 10000, 'solver': 1} 0.8235423012308886
     >> Selected MMSE --> xgboost_regressor -- {'reg_lambda': 2.1888371329742604, 'reg_alpha': 0.9312430343555846, 'max_depth': 2, 'n_estimators': 82, 'lr': 0.01} 0.7464127570286078
     >> Selected ADAS13 --> linear_regression -- {} 0.8184916603842121
     >> Selected Ventricles --> linear_regression -- {'max_iter': 100, 'solver': 1} 0.7916836168497275


In [None]:
for col in eval_cols:
    evaluate_target(dev_set, col, direction = "reverse")

In [None]:
raise

In [16]:
dev_set_id = dataframe_hash(dev_set)

imputers_bkp_file = workspace / f"longitudinal_imputers_scaled_cat{cat_limit}_{dev_set_id}_automl_100.bkp"

if imputers_bkp_file.exists():
    longitudinal_imputers = load_model_from_file(imputers_bkp_file)
else:
    longitudinal_imputers = prepare_longitudinal_imputers_automl(dev_set, eval_cols)
    save_model_to_file(imputers_bkp_file, longitudinal_imputers)

In [17]:
for direction in longitudinal_imputers:
    for col in longitudinal_imputers[direction]:
        print(direction, col,  longitudinal_imputers[direction][col].name())

forward DX_num xgboost
forward CDRSB linear_regression
forward MMSE xgboost_regressor
forward ADAS13 linear_regression
forward Ventricles linear_regression
forward Hippocampus xgboost_regressor
forward WholeBrain linear_regression
forward Entorhinal linear_regression
forward Fusiform linear_regression
forward MidTemp linear_regression
reverse DX_num xgboost
reverse CDRSB xgboost_regressor
reverse MMSE xgboost_regressor
reverse ADAS13 xgboost_regressor
reverse Ventricles linear_regression
reverse Hippocampus xgboost_regressor
reverse WholeBrain linear_regression
reverse Entorhinal linear_regression
reverse Fusiform linear_regression
reverse MidTemp linear_regression


## Preprocess data

In [18]:
from hyperimpute.plugins.imputers import Imputers

# VISCODE 6 * x -> AGE 0.5 * x

const_by_patient = ["PTGENDER_num", "PTEDUCAT", "APOE4"]


def dataframe_hash(df: pd.DataFrame) -> str:
    return str(abs(pd.util.hash_pandas_object(df).sum()))


def normalize(test_data):
    return test_data

def prepare_consts(train_data, test_data):
    test_data = test_data.copy()
    train_data = train_data.copy()

    train_data = train_data.sort_values(["RID_HASH", "VISCODE"])
    test_data = test_data.sort_values(["RID_HASH", "VISCODE"])

    for item in test_data.groupby("RID_HASH"):
        local = item[1]

        # fill consts
        for col in const_by_patient:
            if len(local[col].unique()) == 1:
                continue
            rid = local["RID_HASH"].unique()[0]

            val = local[col][~local[col].isna()].unique()[0]
            local[col] = local[col].fillna(val)
            test_data.loc[test_data["RID_HASH"] == rid, col] = test_data[
                test_data["RID_HASH"] == rid
            ][col].fillna(val)
            assert len(local[col].unique()) == 1, col

    return test_data


def prepare_age(train_data, test_data):
    test_data = test_data.copy()
    train_data = train_data.copy()

    train_data = train_data.sort_values(["RID_HASH", "VISCODE"])
    test_data = test_data.sort_values(["RID_HASH", "VISCODE"])

    col = "AGE"

    for rid in test_data["RID_HASH"].unique():
        local = test_data[test_data["RID_HASH"] == rid]

        # fill age
        ages = local["AGE"]
        if ages.isna().sum() == 0:
            continue

        if ages.isna().sum() == len(ages):
            continue

        # forward impute age
        prev_viscode = 0
        prev_age = 0
        for idx, row in local.iterrows():
            current_viscode = row["VISCODE"]
            local_idx = (test_data["VISCODE"] == current_viscode) & (
                test_data["RID_HASH"] == rid
            )
            if prev_age > 0 and prev_age == prev_age:
                pred_age = (current_viscode - prev_viscode) / 6 * 0.5 + prev_age
            else:
                pred_age = row[col]

            if pred_age == pred_age:
                # print("forward imputed", pred_age, current_viscode)
                test_data.loc[local_idx, col] = test_data.loc[local_idx][col].fillna(
                    pred_age
                )

            prev_viscode = row["VISCODE"]
            prev_age = pred_age

        # reverse impute age
        prev_viscode = 0
        prev_age = 0
        for idx, row in local.iloc[::-1].iterrows():
            current_viscode = row["VISCODE"]
            local_idx = (test_data["VISCODE"] == current_viscode) & (
                test_data["RID_HASH"] == rid
            )

            if prev_age > 0 and prev_age == prev_age:
                pred_age = prev_age - (prev_viscode - current_viscode) / 6 * 0.5
            else:
                pred_age = row[col]

            if pred_age == pred_age:
                # print("reversed imputed", pred_age, current_viscode)
                test_data.loc[local_idx, col] = test_data.loc[local_idx][col].fillna(
                    pred_age
                )

            prev_viscode = row["VISCODE"]
            prev_age = pred_age

        # print(test_data[(test_data["RID_HASH"] == rid)][["VISCODE", "AGE"]])
    return test_data


def impute_longitudinal(
    train_data,
    test_data,
    n_iter=5,
    eval_cols=[
        "DX_num",
        "CDRSB",
        "MMSE",
        "ADAS13",
        "Ventricles",
        "Hippocampus",
        "WholeBrain",
        "Entorhinal",
        "Fusiform",
        "MidTemp",
    ],
    imputed_test_data=None,
):
    test_data = test_data.copy()
    train_data = train_data.copy()

    if imputed_test_data is None:
        imputed_test_data = interm_imputation(
            train_data, test_data
        )

    train_data = train_data.sort_values(["RID_HASH", "VISCODE"])
    test_data = test_data.sort_values(["RID_HASH", "VISCODE"])
    imputed_test_data = imputed_test_data.sort_values(["RID_HASH", "VISCODE"])

    prev_cols = [f"prev_{col}" for col in train_cols]

    for rid in test_data["RID_HASH"].unique():
        patient = test_data[test_data["RID_HASH"] == rid]
        patient_imputed = imputed_test_data[imputed_test_data["RID_HASH"] == rid]

        prediction_rows = [pd.Series(np.zeros(len(prev_cols)), index=train_cols)]
        for ridx, row in patient.iterrows():
            prediction_rows.append(row[train_cols])
        prediction_rows.append(pd.Series(np.zeros(len(prev_cols)), index=train_cols))

        for col in eval_cols:
            if patient[col].isna().sum() == 0:
                continue

            for ridx, row in enumerate(prediction_rows[1:-1]):
                real_idx = ridx + 1
                if row[col] == row[col]:
                    continue
                current_viscode = row["VISCODE"]
                local_idx = (test_data["VISCODE"] == current_viscode) & (
                    test_data["RID_HASH"] == rid
                )

                prev_col_val = prediction_rows[real_idx - 1][col]
                next_col_val = prediction_rows[real_idx + 1][col]

                if next_col_val == next_col_val and ridx + 1 < len(patient_imputed):
                    eval_data = (
                        patient_imputed.iloc[ridx].to_frame().T[train_cols]
                    ).drop(columns = [col]) #row.to_frame().T[train_cols]
                    eval_data[prev_cols] = (
                        patient_imputed.iloc[ridx + 1].to_frame().T[train_cols].values
                    )
                    eval_data = eval_data.astype(float)

                    assert eval_data.isna().sum().sum() == 0
                    assert eval_data[f"prev_{col}"].values[0] == next_col_val

                    imputer = longitudinal_imputers["reverse"][col]
                    imputed_val = imputer.predict(eval_data).values.squeeze()

                    test_data.loc[local_idx, col] = imputed_val

                if prev_col_val == prev_col_val and ridx > 0:
                    # print("Imputing using the prev value", prev_col_val)
                    eval_data = (
                        patient_imputed.iloc[ridx].to_frame().T[train_cols]
                    ).drop(columns = [col])
                    eval_data[prev_cols] = (
                        patient_imputed.iloc[ridx - 1].to_frame().T[train_cols].values
                    )
                    eval_data = eval_data.astype(float)

                    assert eval_data.isna().sum().sum() == 0
                    assert eval_data[f"prev_{col}"].values[0] == prev_col_val

                    imputer = longitudinal_imputers["forward"][col]
                    imputed_val = imputer.predict(eval_data).values.squeeze()
                    
                    existing_value = test_data.loc[local_idx, col].values[0]
                    if existing_value == existing_value:
                        imputed_val = (imputed_val + existing_value) / 2
                    test_data.loc[local_idx, col] = imputed_val


    return normalize(test_data)

def interm_imputation(train_data, test_data, forward_first = True):
    test_data = test_data.copy()

    for rid in test_data["RID_HASH"].unique():
        local = test_data[test_data["RID_HASH"] == rid]

        if forward_first:
            local = local.ffill().bfill()
        else:
            local = local.bfill().ffill()

        test_data.loc[test_data["RID_HASH"] == rid] = local
        
    test_data = prepare_consts(train_data, test_data)
    test_data = prepare_age(train_data, test_data)
    return full_imputation(train_data, test_data)

def full_imputation(train_data, test_data):
    imputed_test_data = test_data.copy()
    imputer_kwargs = {
        "optimizer": "bayesian",
        "classifier_seed": ["xgboost",],
        "regression_seed": ["xgboost_regressor"],
        "class_threshold": cat_limit,
    }

    imputer = Imputers().get(
        "hyperimpute",
        **imputer_kwargs,
    )
    imputation_input = pd.concat([train_data, test_data], ignore_index=True)
    imputation_ids = imputation_input["RID_HASH"]
    imputation_input = imputation_input.drop(columns = ["RID_HASH"])
    
    imputed_test_data = imputer.fit_transform(imputation_input)
    imputed_test_data = imputed_test_data.tail(len(test_data))

    out_cols = ["RID_HASH"] + list(imputed_test_data.columns)
    imputed_test_data["RID_HASH"] = test_data["RID_HASH"].values
    
    return imputed_test_data[out_cols]

def evaluate_static_imputation(train_data, test_data, static_imputation):
    test_data = test_data.copy()
    train_data = train_data.copy()

    train_data = train_data.sort_values(["RID_HASH", "VISCODE"])
    test_data = test_data.sort_values(["RID_HASH", "VISCODE"])

    for rid in test_data["RID_HASH"].unique():
        patient = test_data[test_data["RID_HASH"] == rid]
        misses = []
        viscodes = []
        for idx, row in patient.iterrows():
            misses.append(row.isna().sum())
            viscodes.append(row["VISCODE"])
        cidx = np.argmin(misses)

        current_viscode = viscodes[cidx]
        local_idx = (test_data["VISCODE"] == current_viscode) & (
            test_data["RID_HASH"] == rid
        )
        imputed_idx = (static_imputation["VISCODE"] == current_viscode) & (
            static_imputation["RID_HASH"] == rid
        )

        if len(test_data[local_idx]) == 0:
            continue

        for col in test_data.columns:
            val = test_data.loc[local_idx][col].values[0]
            if val == val:
                continue
            imputed_val = static_imputation.loc[imputed_idx][col].values[0]
            test_data.loc[local_idx, col] = imputed_val

            # print("imputed", test_data.loc[local_idx, col])

    return normalize(test_data)


def impute_data(
    train_data, test_data, use_longitudinal=True, static_strategy="missmin"
):
    test_id = dataframe_hash(test_data)
    train_id = dataframe_hash(train_data)

    print("Evaluate constants", test_id, test_data.isna().sum().sum())
    test_data = prepare_consts(train_data, test_data)
    test_data = prepare_age(train_data, test_data)

    while use_longitudinal:
        print("Evaluate longitudinals", test_id, test_data.isna().sum().sum())
        new_test_data = impute_longitudinal(train_data, test_data)
        if new_test_data.isna().sum().sum() == test_data.isna().sum().sum():
            break

        test_data = new_test_data

    print(
        "Evaluate static imputation",
        test_id,
        test_data.isna().sum().sum(),
        static_strategy,
    )
    static_imputation = full_imputation(train_data, test_data)

    test_data = evaluate_static_imputation(train_data, test_data, static_imputation)

    print("Evaluate constants take 2", test_id, test_data.isna().sum().sum())
    test_data = prepare_consts(train_data, test_data)
    test_data = prepare_age(train_data, test_data)

    while use_longitudinal:
        print("Evaluate longitudinals take 2", test_id, test_data.isna().sum().sum())
        new_test_data = impute_longitudinal(train_data, test_data)
        if new_test_data.isna().sum().sum() == test_data.isna().sum().sum():
            break

        test_data = new_test_data

    print("Normalize data", test_id, test_data.isna().sum().sum())
    return normalize(test_data)

In [19]:
dev_1_eval = impute_data(dev_set, dev_1)
dev_2_eval = impute_data(dev_set, dev_2)

Evaluate constants 8756985925945686816 22466
Evaluate longitudinals 8756985925945686816 19844


[2022-11-09T12:26:03.494688+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T12:26:03.521989+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T12:26:03.522620+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T12:26:03.725939+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9352865489811768. old score = -9999999
[2022-11-09T12:26:05.323179+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9352865489811768 <-- Model xgboost({})
[2022-11-09T12:26:05.690715+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7208799560547148. old score = -9999999
[2022-11-09T12:26:06.762308+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7208799560547148 <-- Model xgboost_regressor({})
[2022-11-09T12:26:07.123063+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.3383687029319891. old score = -9999999
[2022-11-09T12:26:26.9092

Evaluate longitudinals 8756985925945686816 14795


[2022-11-09T12:30:54.959441+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T12:30:54.983424+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T12:30:54.983991+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T12:30:55.186054+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9311935776506117. old score = -9999999
[2022-11-09T12:30:56.774330+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9311935776506117 <-- Model xgboost({})
[2022-11-09T12:30:57.150268+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7253432868938026. old score = -9999999
[2022-11-09T12:30:58.164353+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7253432868938026 <-- Model xgboost_regressor({})
[2022-11-09T12:30:58.534577+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.31352164412351885. old score = -9999999
[2022-11-09T12:30:59.895

Evaluate longitudinals 8756985925945686816 11519


[2022-11-09T12:35:33.360404+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T12:35:33.384524+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T12:35:33.385174+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T12:35:33.588423+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9306749723038247. old score = -9999999
[2022-11-09T12:35:35.098791+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9306749723038247 <-- Model xgboost({})
[2022-11-09T12:35:35.491508+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7273816617988911. old score = -9999999
[2022-11-09T12:35:36.947250+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7408523158908511 <-- Model xgboost_regressor({'reg_lambda': 8.64350553798755, 'reg_alpha': 0.17037809345144456, 'max_depth': 4, 'n_estimators': 66, 'lr': 0.001})
[2022-11-09T12:35:37.308119+0200][3193248][INFO] [

Evaluate longitudinals 8756985925945686816 9881


[2022-11-09T12:40:27.341076+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T12:40:27.366131+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T12:40:27.366689+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T12:40:27.572730+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9291082346018653. old score = -9999999
[2022-11-09T12:40:29.340712+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9291082346018653 <-- Model xgboost({})
[2022-11-09T12:40:29.721362+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7223495977031287. old score = -9999999
[2022-11-09T12:40:30.909386+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7223495977031287 <-- Model xgboost_regressor({})
[2022-11-09T12:40:31.289251+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.2983128293484597. old score = -9999999
[2022-11-09T12:40:51.7535

Evaluate longitudinals 8756985925945686816 9445


[2022-11-09T12:44:41.388147+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T12:44:41.412251+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T12:44:41.412946+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T12:44:41.615961+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9295150753857544. old score = -9999999
[2022-11-09T12:44:43.516138+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9295150753857544 <-- Model xgboost({})
[2022-11-09T12:44:43.904890+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7236947157032319. old score = -9999999
[2022-11-09T12:44:44.962855+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7236947157032319 <-- Model xgboost_regressor({})
[2022-11-09T12:44:45.351495+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.29616925155723584. old score = -9999999
[2022-11-09T12:45:06.201

Evaluate longitudinals 8756985925945686816 9241


[2022-11-09T12:48:27.404050+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T12:48:27.428433+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T12:48:27.429086+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T12:48:27.632245+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9262818083421986. old score = -9999999
[2022-11-09T12:48:29.289150+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9262818083421986 <-- Model xgboost({})
[2022-11-09T12:48:29.668379+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7242854452855487. old score = -9999999
[2022-11-09T12:48:30.813137+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7242854452855487 <-- Model xgboost_regressor({})
[2022-11-09T12:48:31.192280+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.2939966277835752. old score = -9999999
[2022-11-09T12:48:32.5233

Evaluate longitudinals 8756985925945686816 9132


[2022-11-09T12:51:27.851183+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T12:51:27.875557+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T12:51:27.876161+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T12:51:28.079338+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9244641733640597. old score = -9999999
[2022-11-09T12:51:29.688965+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9244641733640597 <-- Model xgboost({})
[2022-11-09T12:51:30.071417+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7227736352343147. old score = -9999999
[2022-11-09T12:51:31.207215+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7227736352343147 <-- Model xgboost_regressor({})
[2022-11-09T12:51:31.586689+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.2893227066233032. old score = -9999999
[2022-11-09T12:51:52.5758

Evaluate longitudinals 8756985925945686816 9127


[2022-11-09T12:56:12.202782+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T12:56:12.227486+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T12:56:12.228084+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T12:56:12.431364+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9244597970056131. old score = -9999999
[2022-11-09T12:56:13.691722+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9244597970056131 <-- Model xgboost({})
[2022-11-09T12:56:14.073850+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7227436443991186. old score = -9999999
[2022-11-09T12:56:15.418675+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7227436443991186 <-- Model xgboost_regressor({})
[2022-11-09T12:56:15.803281+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.2893227066233032. old score = -9999999
[2022-11-09T12:56:36.7212

Evaluate longitudinals 8756985925945686816 9126


[2022-11-09T12:59:27.982812+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T12:59:28.007104+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T12:59:28.007700+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T12:59:28.210822+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9244597970056131. old score = -9999999
[2022-11-09T12:59:30.871119+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9293943132677924 <-- Model xgboost({'reg_lambda': 9.747590814192312, 'reg_alpha': 2.272736847293677, 'colsample_bytree': 0.34227958040519885, 'colsample_bynode': 0.6854730843538198, 'colsample_bylevel': 0.7785607744881073, 'subsample': 0.6874068604186498, 'lr': 0.0001, 'max_depth': 5, 'n_estimators': 273, 'min_child_weight': 3, 'max_bin': 473, 'booster': 2})
[2022-11-09T12:59:31.487900+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.72093986138012

Evaluate static imputation 8756985925945686816 9126 missmin


[2022-11-09T13:03:40.374195+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9244597970056131. old score = -9999999
[2022-11-09T13:03:42.181752+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9244597970056131 <-- Model xgboost({})
[2022-11-09T13:03:42.563554+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7227436443991186. old score = -9999999
[2022-11-09T13:03:43.611093+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7227436443991186 <-- Model xgboost_regressor({})
[2022-11-09T13:03:43.989128+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.2893225228402823. old score = -9999999
[2022-11-09T13:04:04.990215+0200][3193248][INFO]      >>> Column PTEDUCAT <-- score 0.3787550016542833 <-- Model xgboost_regressor({'reg_lambda': 2.6969845324483863, 'reg_alpha': 6.976259453149939, 'max_depth': 4, 'n_estimators': 100, 'lr': 0.001})
[2022-11-09T13:04:05.462866+0200][3193248][INFO] [Evaluate Hippocampus]

Evaluate constants take 2 8756985925945686816 5492
Evaluate longitudinals take 2 8756985925945686816 4386


[2022-11-09T13:06:43.764941+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:06:43.790611+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:06:43.791229+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:06:43.798922+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:06:43.805774+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:06:43.812465+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:06:43.819090+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:06:43.825825+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:06:43.832446+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:06:43.839256+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:06:43.846212+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:06:43.853015+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:06:43.859664+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8756985925945686816 2959


[2022-11-09T13:07:09.513252+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:07:09.537846+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:07:09.538381+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:07:09.546193+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:07:09.552889+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:07:09.559752+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:07:09.566498+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:07:09.573294+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:07:09.580041+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:07:09.586793+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:07:09.593724+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:07:09.600433+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:07:09.607102+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8756985925945686816 1918


[2022-11-09T13:07:31.091347+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:07:31.115923+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:07:31.116525+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:07:31.124822+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:07:31.131704+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:07:31.138619+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:07:31.145591+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:07:31.152460+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:07:31.159907+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:07:31.166851+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:07:31.174111+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:07:31.181318+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:07:31.188470+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8756985925945686816 1197


[2022-11-09T13:07:48.992087+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:07:49.016675+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:07:49.017300+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:07:49.024746+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:07:49.031363+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:07:49.038150+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:07:49.044717+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:07:49.051266+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:07:49.057928+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:07:49.064593+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:07:49.071485+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:07:49.078011+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:07:49.084664+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8756985925945686816 712


[2022-11-09T13:08:04.237893+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:08:04.262587+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:08:04.263218+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:08:04.270679+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:08:04.277299+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:08:04.283899+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:08:04.290532+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:08:04.297263+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:08:04.303882+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:08:04.310611+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:08:04.317348+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:08:04.324079+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:08:04.330777+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8756985925945686816 397


[2022-11-09T13:08:17.587970+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:08:17.612975+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:08:17.613577+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:08:17.621281+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:08:17.627998+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:08:17.634595+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:08:17.641302+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:08:17.648075+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:08:17.654850+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:08:17.661820+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:08:17.668516+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:08:17.675369+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:08:17.682199+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8756985925945686816 201


[2022-11-09T13:08:29.701856+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:08:29.725933+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:08:29.726529+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:08:29.734026+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:08:29.740730+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:08:29.747275+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:08:29.753764+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:08:29.760433+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:08:29.767177+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:08:29.774318+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:08:29.781001+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:08:29.787638+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:08:29.794394+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8756985925945686816 89


[2022-11-09T13:08:40.940742+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:08:40.965728+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:08:40.966365+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:08:40.973849+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:08:40.980426+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:08:40.986984+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:08:40.993725+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:08:41.000536+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:08:41.007173+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:08:41.013919+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:08:41.020650+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:08:41.027409+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:08:41.034125+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8756985925945686816 27


[2022-11-09T13:08:51.612934+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:08:51.637600+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:08:51.638266+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:08:51.646045+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:08:51.652819+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:08:51.659476+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:08:51.666198+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:08:51.672879+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:08:51.679649+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:08:51.686497+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:08:51.693294+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:08:51.699984+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:08:51.706580+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8756985925945686816 4


[2022-11-09T13:09:01.911956+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:09:01.936305+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:09:01.936951+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:09:01.944502+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:09:01.951122+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:09:01.957770+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:09:01.964546+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:09:01.971346+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:09:01.978124+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:09:01.984933+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:09:01.991692+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:09:01.998522+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:09:02.005262+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8756985925945686816 0


[2022-11-09T13:09:12.026442+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:09:12.050900+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:09:12.051564+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:09:12.059157+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:09:12.065829+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:09:12.072378+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:09:12.079116+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:09:12.086016+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:09:12.092901+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:09:12.099809+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:09:12.106501+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:09:12.113180+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:09:12.120324+0200][3193248][INFO]   > Imputation it

Normalize data 8756985925945686816 0
Evaluate constants 1637477679073542181 22991
Evaluate longitudinals 1637477679073542181 20000


[2022-11-09T13:09:27.817701+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:09:27.847397+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:09:27.848067+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:09:28.052222+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9358957158597447. old score = -9999999
[2022-11-09T13:09:29.796727+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9358957158597447 <-- Model xgboost({})
[2022-11-09T13:09:30.164835+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7261934097658648. old score = -9999999
[2022-11-09T13:09:31.365732+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7261934097658648 <-- Model xgboost_regressor({})
[2022-11-09T13:09:31.739941+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.3377801933814129. old score = -9999999
[2022-11-09T13:09:51.8988

Evaluate longitudinals 1637477679073542181 14883


[2022-11-09T13:13:59.233569+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:13:59.257662+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:13:59.258350+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:13:59.462127+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9362881586622593. old score = -9999999
[2022-11-09T13:14:01.438816+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9362881586622593 <-- Model xgboost({})
[2022-11-09T13:14:01.817226+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7214677199377355. old score = -9999999
[2022-11-09T13:14:03.000385+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7214677199377355 <-- Model xgboost_regressor({})
[2022-11-09T13:14:03.371616+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.3317740182326123. old score = -9999999
[2022-11-09T13:14:24.1110

Evaluate longitudinals 1637477679073542181 11491


[2022-11-09T13:17:51.264114+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:17:51.288867+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:17:51.289465+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:17:51.504071+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9371504958195606. old score = -9999999
[2022-11-09T13:17:52.989419+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9371504958195606 <-- Model xgboost({})
[2022-11-09T13:17:53.370428+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7182025667477394. old score = -9999999
[2022-11-09T13:17:54.473494+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7182025667477394 <-- Model xgboost_regressor({})
[2022-11-09T13:17:54.848560+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.3207928560412515. old score = -9999999
[2022-11-09T13:18:15.3168

Evaluate longitudinals 1637477679073542181 9892


[2022-11-09T13:21:44.896141+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:21:44.920722+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:21:44.921421+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:21:45.125262+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9356072979454275. old score = -9999999
[2022-11-09T13:21:46.604361+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9356072979454275 <-- Model xgboost({})
[2022-11-09T13:21:46.985758+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7214666271512393. old score = -9999999
[2022-11-09T13:21:49.395387+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7502926848718892 <-- Model xgboost_regressor({'reg_lambda': 7.542063881848956, 'reg_alpha': 0.2403021756527126, 'max_depth': 4, 'n_estimators': 96, 'lr': 0.0001})
[2022-11-09T13:21:49.834291+0200][3193248][INFO] 

Evaluate longitudinals 1637477679073542181 9506


[2022-11-09T13:24:55.615804+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:24:55.639940+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:24:55.640591+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:24:55.843555+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9335747618121719. old score = -9999999
[2022-11-09T13:24:57.581754+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9335747618121719 <-- Model xgboost({})
[2022-11-09T13:24:57.963952+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7173682556273653. old score = -9999999
[2022-11-09T13:24:59.074313+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7173682556273653 <-- Model xgboost_regressor({})
[2022-11-09T13:24:59.453165+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.30530680764437285. old score = -9999999
[2022-11-09T13:25:20.363

Evaluate longitudinals 1637477679073542181 9321


[2022-11-09T13:29:12.260905+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:29:12.284782+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:29:12.285337+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:29:12.488290+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9318108756238253. old score = -9999999
[2022-11-09T13:29:14.253275+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9318108756238253 <-- Model xgboost({})
[2022-11-09T13:29:14.635123+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7154646376221647. old score = -9999999
[2022-11-09T13:29:15.514955+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7154646376221647 <-- Model xgboost_regressor({})
[2022-11-09T13:29:15.894899+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.3294673261941555. old score = -9999999
[2022-11-09T13:29:36.7755

Evaluate longitudinals 1637477679073542181 9218


[2022-11-09T13:32:24.725412+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:32:24.749824+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:32:24.750414+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:32:24.955523+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9349747229243632. old score = -9999999
[2022-11-09T13:32:26.781673+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9349747229243632 <-- Model xgboost({})
[2022-11-09T13:32:27.171695+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7143175691292248. old score = -9999999
[2022-11-09T13:32:28.296564+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7143175691292248 <-- Model xgboost_regressor({})
[2022-11-09T13:32:28.684468+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.31115743960975684. old score = -9999999
[2022-11-09T13:32:49.296

Evaluate longitudinals 1637477679073542181 9196


[2022-11-09T13:35:48.166996+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:35:48.191802+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:35:48.192429+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:35:48.395647+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.932494004796163. old score = -9999999
[2022-11-09T13:35:52.692458+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9421095015879188 <-- Model xgboost({'reg_lambda': 2.8291145614669326, 'reg_alpha': 0.7554107644453453, 'colsample_bytree': 0.7709435661314413, 'colsample_bynode': 0.23506988817241992, 'colsample_bylevel': 0.8579252111158383, 'subsample': 0.3498114454381951, 'lr': 0.001, 'max_depth': 5, 'n_estimators': 287, 'min_child_weight': 0, 'max_bin': 394, 'booster': 2})
[2022-11-09T13:35:53.303752+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.71533063608095

Evaluate longitudinals 1637477679073542181 9192


[2022-11-09T13:39:09.602783+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:39:09.626884+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:39:09.627504+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:39:09.830650+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.932494004796163. old score = -9999999
[2022-11-09T13:39:11.599002+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.932494004796163 <-- Model xgboost({})
[2022-11-09T13:39:11.982803+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7207911858414242. old score = -9999999
[2022-11-09T13:39:12.957857+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7207911858414242 <-- Model xgboost_regressor({})
[2022-11-09T13:39:13.337621+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.3057673279366528. old score = -9999999
[2022-11-09T13:39:34.009072

Evaluate static imputation 1637477679073542181 9192 missmin


[2022-11-09T13:43:06.395398+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.932494004796163. old score = -9999999
[2022-11-09T13:43:07.733441+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.932494004796163 <-- Model xgboost({})
[2022-11-09T13:43:08.115818+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.7207911858414242. old score = -9999999
[2022-11-09T13:43:09.128067+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7207911858414242 <-- Model xgboost_regressor({})
[2022-11-09T13:43:09.507718+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.3057673279366528. old score = -9999999
[2022-11-09T13:43:30.753853+0200][3193248][INFO]      >>> Column PTEDUCAT <-- score 0.3947420740207125 <-- Model xgboost_regressor({'reg_lambda': 2.9297788083913163, 'reg_alpha': 9.116246979300765, 'max_depth': 4, 'n_estimators': 98, 'lr': 0.01})
[2022-11-09T13:43:31.215773+0200][3193248][INFO] [Evaluate Hippocampus] pre

Evaluate constants take 2 1637477679073542181 5410
Evaluate longitudinals take 2 1637477679073542181 4115


[2022-11-09T13:47:41.597954+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:47:41.622303+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:47:41.622951+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:47:41.630421+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:47:41.637177+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:47:41.643655+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:47:41.650209+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:47:41.656839+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:47:41.663581+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:47:41.670330+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:47:41.676956+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:47:41.683608+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:47:41.690336+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1637477679073542181 2690


[2022-11-09T13:48:07.515508+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:48:07.540710+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:48:07.541351+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:48:07.548890+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:48:07.555576+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:48:07.562332+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:48:07.569176+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:48:07.576079+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:48:07.582857+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:48:07.589687+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:48:07.596465+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:48:07.603234+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:48:07.610075+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1637477679073542181 1680


[2022-11-09T13:48:28.363021+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:48:28.387503+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:48:28.388154+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:48:28.395802+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:48:28.402637+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:48:28.409471+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:48:28.416043+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:48:28.422689+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:48:28.429455+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:48:28.436216+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:48:28.443213+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:48:28.449801+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:48:28.456386+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1637477679073542181 1027


[2022-11-09T13:48:45.299308+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:48:45.323552+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:48:45.324095+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:48:45.331403+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:48:45.338235+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:48:45.344843+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:48:45.351449+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:48:45.358128+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:48:45.364774+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:48:45.371513+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:48:45.378147+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:48:45.384716+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:48:45.391334+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1637477679073542181 606


[2022-11-09T13:48:59.793500+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:48:59.818217+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:48:59.818786+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:48:59.826487+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:48:59.833354+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:48:59.840139+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:48:59.847089+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:48:59.853841+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:48:59.860574+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:48:59.867317+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:48:59.874075+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:48:59.880604+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:48:59.887270+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1637477679073542181 346


[2022-11-09T13:49:12.592264+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:49:12.617150+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:49:12.617717+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:49:12.625468+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:49:12.632445+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:49:12.639529+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:49:12.646298+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:49:12.653195+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:49:12.660117+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:49:12.666711+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:49:12.673598+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:49:12.680180+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:49:12.687113+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1637477679073542181 181


[2022-11-09T13:49:24.389939+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:49:24.414376+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:49:24.414957+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:49:24.422770+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:49:24.429627+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:49:24.436362+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:49:24.443340+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:49:24.450149+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:49:24.456959+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:49:24.463879+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:49:24.470634+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:49:24.477300+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:49:24.483883+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1637477679073542181 76


[2022-11-09T13:49:35.672616+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:49:35.698097+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:49:35.698689+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:49:35.706018+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:49:35.712595+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:49:35.719102+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:49:35.725620+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:49:35.732128+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:49:35.738729+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:49:35.745653+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:49:35.752395+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:49:35.759289+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:49:35.766178+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1637477679073542181 21


[2022-11-09T13:49:46.384224+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:49:46.408996+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:49:46.409612+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:49:46.416899+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:49:46.423711+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:49:46.430601+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:49:46.437344+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:49:46.443978+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:49:46.450659+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:49:46.457600+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:49:46.464450+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:49:46.471162+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:49:46.477896+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1637477679073542181 2


[2022-11-09T13:49:56.641610+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:49:56.666239+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:49:56.666805+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:49:56.674134+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:49:56.680716+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:49:56.687355+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:49:56.694323+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:49:56.701406+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:49:56.708467+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:49:56.715452+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:49:56.722419+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:49:56.729231+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:49:56.735992+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1637477679073542181 1


[2022-11-09T13:50:06.752330+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:50:06.776781+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:50:06.777399+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:50:06.785085+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:50:06.791909+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:50:06.798483+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:50:06.805076+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:50:06.811694+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:50:06.818437+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:50:06.825541+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:50:06.832229+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:50:06.839138+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:50:06.845872+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1637477679073542181 0


[2022-11-09T13:50:16.861232+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:50:16.886701+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:50:16.887296+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:50:16.894875+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:50:16.901547+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:50:16.908129+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:50:16.915021+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:50:16.922110+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:50:16.929063+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:50:16.935898+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:50:16.942752+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:50:16.949459+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:50:16.956050+0200][3193248][INFO]   > Imputation it

Normalize data 1637477679073542181 0


In [20]:
# use static missmin visit strategy
from hyperimpute.plugins.imputers import Imputers
from hyperimpute.utils.benchmarks import benchmark_model
from sklearn.preprocessing import LabelEncoder

train_eval_data = pd.concat([dev_set, dev_1_eval, dev_2_eval], ignore_index=True)

train_eval_data_raw = pd.concat([dev_set, dev_1, dev_2], ignore_index=True)

train_gt = pd.concat([dev_set, dev_set, dev_set], ignore_index=True)

train_mask = train_eval_data_raw.isna().astype(int)
train_mask_bool = train_eval_data_raw.isna()

le = LabelEncoder().fit(train_gt["RID_HASH"])
train_gt["RID_HASH"] = le.transform(train_gt["RID_HASH"])
train_eval_data["RID_HASH"] = le.transform(train_eval_data["RID_HASH"])

plugin = Imputers().get(
    "hyperimpute",
    optimizer="simple",
)

benchmark_model("missforest", plugin, train_gt, train_eval_data, train_mask)

[2022-11-09T13:50:21.640550+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:50:21.673222+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:50:21.673899+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:50:21.683617+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T13:50:21.692370+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T13:50:21.700994+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T13:50:21.709919+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T13:50:21.718332+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T13:50:21.726671+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T13:50:21.735120+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T13:50:21.743636+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T13:50:21.752135+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T13:50:21.760754+0200][3193248][INFO]   > Imputation it

(1.2126093403476297, 0.4807779568291668)

In [21]:
# Current best (1.2941082455707285, 0.6173876160675963)


## Submission data

In [22]:
test_A_eval = impute_data(dev_set, test_A)
test_B_eval = impute_data(dev_set, test_B)

Evaluate constants 1708900434718589799 6974
Evaluate longitudinals 1708900434718589799 6161


[2022-11-09T13:50:24.937492+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:50:24.959505+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:50:24.960056+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:50:25.171800+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9110322380000676. old score = -9999999
[2022-11-09T13:50:26.752141+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9110322380000676 <-- Model xgboost({})
[2022-11-09T13:50:27.089756+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6850865995975314. old score = -9999999
[2022-11-09T13:50:28.096097+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6850865995975314 <-- Model xgboost_regressor({})
[2022-11-09T13:50:28.435177+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.24010931618225062. old score = -9999999
[2022-11-09T13:50:48.391

Evaluate longitudinals 1708900434718589799 4562


[2022-11-09T13:54:15.670136+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:54:15.691748+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:54:15.692300+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:54:15.912228+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9140756219543197. old score = -9999999
[2022-11-09T13:54:17.488728+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9140756219543197 <-- Model xgboost({})
[2022-11-09T13:54:17.828643+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6884131956602488. old score = -9999999
[2022-11-09T13:54:19.026115+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6884131956602488 <-- Model xgboost_regressor({})
[2022-11-09T13:54:19.363455+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.22340113433057002. old score = -9999999
[2022-11-09T13:54:40.265

Evaluate longitudinals 1708900434718589799 3497


[2022-11-09T13:57:24.710429+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T13:57:24.731709+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T13:57:24.732297+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T13:57:24.938581+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9090589048315918. old score = -9999999
[2022-11-09T13:57:26.972224+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9090589048315918 <-- Model xgboost({})
[2022-11-09T13:57:27.312280+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6908449573135915. old score = -9999999
[2022-11-09T13:57:29.258429+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7070568280504559 <-- Model xgboost_regressor({'reg_lambda': 1.5817437855073948, 'reg_alpha': 0.5370751928014875, 'max_depth': 4, 'n_estimators': 100, 'lr': 0.0001})
[2022-11-09T13:57:29.654298+0200][3193248][INFO

Evaluate longitudinals 1708900434718589799 3015


[2022-11-09T14:00:22.093050+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:00:22.211933+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:00:22.212610+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:00:22.435169+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.908471071789586. old score = -9999999
[2022-11-09T14:00:24.386264+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.908471071789586 <-- Model xgboost({})
[2022-11-09T14:00:24.732482+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6893764738926047. old score = -9999999
[2022-11-09T14:00:25.818286+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6893764738926047 <-- Model xgboost_regressor({})
[2022-11-09T14:00:26.167718+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.21218572940275376. old score = -9999999
[2022-11-09T14:00:46.99376

Evaluate longitudinals 1708900434718589799 2881


[2022-11-09T14:04:02.505834+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:04:02.527334+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:04:02.528168+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:04:02.737043+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9113864567695436. old score = -9999999
[2022-11-09T14:04:04.185363+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9113864567695436 <-- Model xgboost({})
[2022-11-09T14:04:04.525966+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6878686001155097. old score = -9999999
[2022-11-09T14:04:21.477257+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.7104207507791782 <-- Model xgboost_regressor({'reg_lambda': 7.917282628538301, 'reg_alpha': 0.23216995269139384, 'max_depth': 4, 'n_estimators': 91, 'lr': 0.0001})
[2022-11-09T14:04:21.854520+0200][3193248][INFO]

Evaluate longitudinals 1708900434718589799 2820


[2022-11-09T14:07:28.423508+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:07:28.445023+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:07:28.445589+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:07:28.654781+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9092274018813382. old score = -9999999
[2022-11-09T14:07:30.418360+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9092274018813382 <-- Model xgboost({})
[2022-11-09T14:07:30.760698+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.689954857549187. old score = -9999999
[2022-11-09T14:07:31.873060+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.689954857549187 <-- Model xgboost_regressor({})
[2022-11-09T14:07:32.212586+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.2246644283912605. old score = -9999999
[2022-11-09T14:07:52.307361

Evaluate longitudinals 1708900434718589799 2778


[2022-11-09T14:11:18.270839+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:11:18.292480+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:11:18.293099+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:11:18.503135+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.909633358279765. old score = -9999999
[2022-11-09T14:11:19.962281+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.909633358279765 <-- Model xgboost({})
[2022-11-09T14:11:20.303039+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6936686479520877. old score = -9999999
[2022-11-09T14:11:21.389605+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6936686479520877 <-- Model xgboost_regressor({})
[2022-11-09T14:11:21.732284+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.22425211966074882. old score = -9999999
[2022-11-09T14:11:42.69511

Evaluate longitudinals 1708900434718589799 2770


[2022-11-09T14:14:24.352432+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:14:24.374261+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:14:24.374871+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:14:24.582435+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9113336240501123. old score = -9999999
[2022-11-09T14:14:26.292307+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9113336240501123 <-- Model xgboost({})
[2022-11-09T14:14:26.633681+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6954557094518552. old score = -9999999
[2022-11-09T14:14:27.674760+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6954557094518552 <-- Model xgboost_regressor({})
[2022-11-09T14:14:28.023393+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.22083166465075366. old score = -9999999
[2022-11-09T14:14:48.974

Evaluate longitudinals 1708900434718589799 2766


[2022-11-09T14:18:16.073659+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:18:16.095334+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:18:16.095923+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:18:16.308668+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9113336240501123. old score = -9999999
[2022-11-09T14:18:18.068786+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9113336240501123 <-- Model xgboost({})
[2022-11-09T14:18:18.409698+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6954557094518552. old score = -9999999
[2022-11-09T14:18:19.450855+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6954557094518552 <-- Model xgboost_regressor({})
[2022-11-09T14:18:19.790644+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.21983067650105742. old score = -9999999
[2022-11-09T14:18:40.665

Evaluate static imputation 1708900434718589799 2766 missmin


[2022-11-09T14:22:01.068998+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9113336240501123. old score = -9999999
[2022-11-09T14:22:02.450456+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9113336240501123 <-- Model xgboost({})
[2022-11-09T14:22:02.790628+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6954557094518552. old score = -9999999
[2022-11-09T14:22:03.817821+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6954557094518552 <-- Model xgboost_regressor({})
[2022-11-09T14:22:04.156784+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.21983067650105742. old score = -9999999
[2022-11-09T14:22:25.249103+0200][3193248][INFO]      >>> Column PTEDUCAT <-- score 0.2741876130396578 <-- Model xgboost_regressor({'reg_lambda': 0.4395316684056182, 'reg_alpha': 4.261381348945343, 'max_depth': 4, 'n_estimators': 98, 'lr': 0.001})
[2022-11-09T14:22:25.645311+0200][3193248][INFO] [Evaluate Hippocampus]

Evaluate constants take 2 1708900434718589799 1532
Evaluate longitudinals take 2 1708900434718589799 1239


[2022-11-09T14:25:08.133835+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:25:08.155469+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:25:08.156007+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:25:08.162709+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:25:08.168960+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:25:08.175040+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:25:08.181107+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:25:08.187034+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:25:08.192870+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:25:08.198640+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:25:08.204548+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:25:08.210296+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:25:08.216083+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1708900434718589799 827


[2022-11-09T14:25:15.494979+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:25:15.516492+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:25:15.517150+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:25:15.523815+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:25:15.529811+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:25:15.535668+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:25:15.541493+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:25:15.547281+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:25:15.553389+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:25:15.559198+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:25:15.565125+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:25:15.570996+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:25:15.577002+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1708900434718589799 486


[2022-11-09T14:25:22.109009+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:25:22.130730+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:25:22.131358+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:25:22.137867+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:25:22.143753+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:25:22.149501+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:25:22.155250+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:25:22.161033+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:25:22.166823+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:25:22.172824+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:25:22.178843+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:25:22.184927+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:25:22.191015+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1708900434718589799 272


[2022-11-09T14:25:27.388466+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:25:27.410358+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:25:27.410970+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:25:27.417544+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:25:27.423470+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:25:27.429650+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:25:27.435515+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:25:27.441757+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:25:27.448158+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:25:27.454348+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:25:27.460906+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:25:27.467235+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:25:27.473219+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1708900434718589799 120


[2022-11-09T14:25:32.046398+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:25:32.067988+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:25:32.068511+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:25:32.075246+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:25:32.081430+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:25:32.087344+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:25:32.093317+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:25:32.099217+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:25:32.105179+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:25:32.111161+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:25:32.117228+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:25:32.123152+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:25:32.129006+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1708900434718589799 50


[2022-11-09T14:25:35.876445+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:25:35.898324+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:25:35.898865+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:25:35.905668+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:25:35.911510+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:25:35.917577+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:25:35.923589+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:25:35.929596+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:25:35.935420+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:25:35.941287+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:25:35.947125+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:25:35.952935+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:25:35.958718+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1708900434718589799 23


[2022-11-09T14:25:39.297150+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:25:39.318733+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:25:39.319346+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:25:39.325829+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:25:39.331535+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:25:39.337286+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:25:39.343026+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:25:39.348764+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:25:39.354512+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:25:39.360249+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:25:39.365894+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:25:39.371574+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:25:39.377262+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1708900434718589799 8


[2022-11-09T14:25:42.572365+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:25:42.594201+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:25:42.594840+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:25:42.601657+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:25:42.607518+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:25:42.613376+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:25:42.619111+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:25:42.624852+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:25:42.631101+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:25:42.637207+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:25:42.643372+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:25:42.649392+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:25:42.655509+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1708900434718589799 2


[2022-11-09T14:25:45.774871+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:25:45.796714+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:25:45.797292+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:25:45.803793+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:25:45.809594+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:25:45.815616+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:25:45.821372+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:25:45.827229+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:25:45.833153+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:25:45.839295+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:25:45.845269+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:25:45.851002+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:25:45.856867+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1708900434718589799 1


[2022-11-09T14:25:48.933646+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:25:48.955647+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:25:48.956258+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:25:48.962932+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:25:48.968882+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:25:48.974741+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:25:48.980456+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:25:48.986302+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:25:48.991973+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:25:48.997740+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:25:49.003504+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:25:49.009436+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:25:49.015326+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 1708900434718589799 0


[2022-11-09T14:25:52.084352+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:25:52.106293+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:25:52.106929+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:25:52.113343+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:25:52.119079+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:25:52.124794+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:25:52.130601+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:25:52.136302+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:25:52.142015+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:25:52.147634+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:25:52.153399+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:25:52.159065+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:25:52.164845+0200][3193248][INFO]   > Imputation it

Normalize data 1708900434718589799 0
Evaluate constants 8191117053172621229 8052
Evaluate longitudinals 8191117053172621229 6925


[2022-11-09T14:25:56.994201+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:25:57.015524+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:25:57.016164+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:25:57.220294+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9039627090442165. old score = -9999999
[2022-11-09T14:25:58.690574+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9039627090442165 <-- Model xgboost({})
[2022-11-09T14:25:59.028051+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6802836575729523. old score = -9999999
[2022-11-09T14:26:00.161033+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6802836575729523 <-- Model xgboost_regressor({})
[2022-11-09T14:26:00.496274+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.2214953604510344. old score = -9999999
[2022-11-09T14:26:20.7607

Evaluate longitudinals 8191117053172621229 5320


[2022-11-09T14:29:18.554386+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:29:18.579141+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:29:18.579717+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:29:18.787249+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9049321969730657. old score = -9999999
[2022-11-09T14:29:20.380204+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9049321969730657 <-- Model xgboost({})
[2022-11-09T14:29:20.731296+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6784177392875995. old score = -9999999
[2022-11-09T14:29:21.850342+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6784177392875995 <-- Model xgboost_regressor({})
[2022-11-09T14:29:22.198587+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.224347373018579. old score = -9999999
[2022-11-09T14:29:42.80108

Evaluate longitudinals 8191117053172621229 4139


[2022-11-09T14:32:15.156903+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:32:15.178524+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:32:15.179309+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:32:15.386517+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9036677594771616. old score = -9999999
[2022-11-09T14:32:17.527871+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9036677594771616 <-- Model xgboost({})
[2022-11-09T14:32:17.869592+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6798783843431263. old score = -9999999
[2022-11-09T14:32:18.892789+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6798783843431263 <-- Model xgboost_regressor({})
[2022-11-09T14:32:19.233618+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.19917223261538108. old score = -9999999
[2022-11-09T14:32:40.173

Evaluate longitudinals 8191117053172621229 3527


[2022-11-09T14:35:35.222573+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:35:35.244628+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:35:35.245230+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:35:35.456765+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9035462432132271. old score = -9999999
[2022-11-09T14:35:37.056165+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9035462432132271 <-- Model xgboost({})
[2022-11-09T14:35:37.401826+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6870363321238808. old score = -9999999
[2022-11-09T14:35:38.561648+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6870363321238808 <-- Model xgboost_regressor({})
[2022-11-09T14:35:38.904150+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.20721731784217623. old score = -9999999
[2022-11-09T14:35:59.488

Evaluate longitudinals 8191117053172621229 3365


[2022-11-09T14:37:56.961481+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:37:56.984277+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:37:56.984902+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:37:57.196888+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9004799301121568. old score = -9999999
[2022-11-09T14:37:58.847615+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9004799301121568 <-- Model xgboost({})
[2022-11-09T14:37:59.190305+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6788960464051348. old score = -9999999
[2022-11-09T14:38:03.547879+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6827699879013864 <-- Model xgboost_regressor({'reg_lambda': 5.832141159139406, 'reg_alpha': 0.002071560372141512, 'max_depth': 3, 'n_estimators': 75, 'lr': 0.001})
[2022-11-09T14:38:03.854625+0200][3193248][INFO]

Evaluate longitudinals 8191117053172621229 3284


[2022-11-09T14:40:55.916254+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:40:55.937926+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:40:55.938490+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:40:56.144846+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9020699132101743. old score = -9999999
[2022-11-09T14:40:57.797163+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9020699132101743 <-- Model xgboost({})
[2022-11-09T14:40:58.143889+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6837902147823736. old score = -9999999
[2022-11-09T14:40:59.255158+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6837902147823736 <-- Model xgboost_regressor({})
[2022-11-09T14:40:59.596619+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.2189344809157151. old score = -9999999
[2022-11-09T14:41:19.8748

Evaluate longitudinals 8191117053172621229 3237


[2022-11-09T14:43:51.461516+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:43:51.483487+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:43:51.484113+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:43:51.697065+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9014819657427611. old score = -9999999
[2022-11-09T14:43:53.165198+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9014819657427611 <-- Model xgboost({})
[2022-11-09T14:43:53.509580+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6814396977919525. old score = -9999999
[2022-11-09T14:43:54.677703+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6814396977919525 <-- Model xgboost_regressor({})
[2022-11-09T14:43:55.023477+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.19823456718421234. old score = -9999999
[2022-11-09T14:44:15.876

Evaluate longitudinals 8191117053172621229 3226


[2022-11-09T14:46:20.342395+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:46:20.364037+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:46:20.365005+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:46:20.573192+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.9008513906365729. old score = -9999999
[2022-11-09T14:46:24.224162+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.9054625026822388 <-- Model xgboost({'reg_lambda': 9.223728408058607, 'reg_alpha': 4.087752404641743, 'colsample_bytree': 0.8333681594740548, 'colsample_bynode': 0.7839592705971191, 'colsample_bylevel': 0.7055103433312622, 'subsample': 0.8481447475956955, 'lr': 0.0001, 'max_depth': 5, 'n_estimators': 107, 'min_child_weight': 2, 'max_bin': 263, 'booster': 1})
[2022-11-09T14:46:24.612356+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.685053624874210

Evaluate longitudinals 8191117053172621229 3221


[2022-11-09T14:50:11.750774+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:50:11.772273+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:50:11.772885+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:50:11.986268+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.8994604350391804. old score = -9999999
[2022-11-09T14:50:13.817138+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.8994604350391804 <-- Model xgboost({})
[2022-11-09T14:50:14.162923+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6843017595263224. old score = -9999999
[2022-11-09T14:50:15.287290+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6843017595263224 <-- Model xgboost_regressor({})
[2022-11-09T14:50:15.628286+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.20278936968189637. old score = -9999999
[2022-11-09T14:50:36.510

Evaluate static imputation 8191117053172621229 3221 missmin


[2022-11-09T14:53:07.519890+0200][3193248][INFO] [Evaluate PTGENDER_num] previous config new score = 0.8994604350391804. old score = -9999999
[2022-11-09T14:53:09.272234+0200][3193248][INFO]      >>> Column PTGENDER_num <-- score 0.8994604350391804 <-- Model xgboost({})
[2022-11-09T14:53:09.614809+0200][3193248][INFO] [Evaluate MidTemp] previous config new score = 0.6843017595263224. old score = -9999999
[2022-11-09T14:53:10.688300+0200][3193248][INFO]      >>> Column MidTemp <-- score 0.6843017595263224 <-- Model xgboost_regressor({})
[2022-11-09T14:53:11.030436+0200][3193248][INFO] [Evaluate PTEDUCAT] previous config new score = 0.20278936968189637. old score = -9999999
[2022-11-09T14:53:31.668158+0200][3193248][INFO]      >>> Column PTEDUCAT <-- score 0.25598635831101013 <-- Model xgboost_regressor({'reg_lambda': 8.73979934250846, 'reg_alpha': 4.773174556534757, 'max_depth': 4, 'n_estimators': 100, 'lr': 0.001})
[2022-11-09T14:53:32.072230+0200][3193248][INFO] [Evaluate Hippocampus]

Evaluate constants take 2 8191117053172621229 2024
Evaluate longitudinals take 2 8191117053172621229 1579


[2022-11-09T14:55:30.768648+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:55:30.790729+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:55:30.791344+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:55:30.798045+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:55:30.804212+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:55:30.810021+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:55:30.816116+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:55:30.822047+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:55:30.828029+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:55:30.834152+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:55:30.840114+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:55:30.846192+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:55:30.852216+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8191117053172621229 1121


[2022-11-09T14:55:38.807122+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:55:38.829315+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:55:38.829905+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:55:38.836571+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:55:38.842707+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:55:38.848831+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:55:38.854778+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:55:38.860743+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:55:38.866807+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:55:38.872795+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:55:38.878648+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:55:38.884623+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:55:38.890389+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8191117053172621229 746


[2022-11-09T14:55:45.957029+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:55:45.979313+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:55:45.979896+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:55:45.986637+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:55:45.993229+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:55:45.999165+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:55:46.005383+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:55:46.011363+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:55:46.017304+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:55:46.023290+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:55:46.029141+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:55:46.035033+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:55:46.041146+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8191117053172621229 474


[2022-11-09T14:55:52.001016+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:55:52.023090+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:55:52.023677+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:55:52.030460+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:55:52.036587+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:55:52.042556+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:55:52.048575+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:55:52.054787+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:55:52.060871+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:55:52.066688+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:55:52.072544+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:55:52.078337+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:55:52.084216+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8191117053172621229 304


[2022-11-09T14:55:56.980240+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:55:57.002148+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:55:57.002746+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:55:57.009582+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:55:57.015672+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:55:57.021597+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:55:57.027550+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:55:57.033474+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:55:57.039286+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:55:57.045343+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:55:57.051365+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:55:57.057450+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:55:57.063458+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8191117053172621229 207


[2022-11-09T14:56:01.250293+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:56:01.272219+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:56:01.273171+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:56:01.279676+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:56:01.285566+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:56:01.291538+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:56:01.297440+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:56:01.303426+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:56:01.309332+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:56:01.315407+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:56:01.321251+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:56:01.327055+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:56:01.333208+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8191117053172621229 131


[2022-11-09T14:56:05.278725+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:56:05.300994+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:56:05.301618+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:56:05.308388+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:56:05.314525+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:56:05.320564+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:56:05.326606+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:56:05.332399+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:56:05.338418+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:56:05.344212+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:56:05.350113+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:56:05.355909+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:56:05.361993+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8191117053172621229 77


[2022-11-09T14:56:09.092159+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:56:09.114204+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:56:09.114816+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:56:09.121659+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:56:09.127691+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:56:09.133647+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:56:09.139698+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:56:09.145533+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:56:09.151299+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:56:09.157141+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:56:09.162968+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:56:09.168897+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:56:09.175035+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8191117053172621229 40


[2022-11-09T14:56:12.719608+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:56:12.741516+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:56:12.742102+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:56:12.748711+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:56:12.754555+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:56:12.760370+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:56:12.766323+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:56:12.772195+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:56:12.778051+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:56:12.783921+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:56:12.789979+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:56:12.796061+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:56:12.801866+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8191117053172621229 17


[2022-11-09T14:56:16.192243+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:56:16.215114+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:56:16.215705+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:56:16.222247+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:56:16.228183+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:56:16.234220+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:56:16.240206+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:56:16.246356+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:56:16.252084+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:56:16.257910+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:56:16.263715+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:56:16.269566+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:56:16.275412+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8191117053172621229 8


[2022-11-09T14:56:19.626565+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:56:19.648509+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:56:19.649093+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:56:19.655873+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:56:19.661886+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:56:19.667883+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:56:19.673933+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:56:19.679917+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:56:19.685715+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:56:19.691735+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:56:19.697701+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:56:19.703634+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:56:19.709691+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8191117053172621229 4


[2022-11-09T14:56:22.902011+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:56:22.924844+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:56:22.925467+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:56:22.932060+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:56:22.937962+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:56:22.944017+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:56:22.949710+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:56:22.955443+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:56:22.961134+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:56:22.966906+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:56:22.972644+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:56:22.978342+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:56:22.984043+0200][3193248][INFO]   > Imputation it

Evaluate longitudinals take 2 8191117053172621229 0


[2022-11-09T14:56:26.176331+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:56:26.198313+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:56:26.198851+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:56:26.205442+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:56:26.211280+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:56:26.217309+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:56:26.223347+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:56:26.229349+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:56:26.235389+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:56:26.241295+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:56:26.247065+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:56:26.252860+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:56:26.258520+0200][3193248][INFO]   > Imputation it

Normalize data 8191117053172621229 0


In [23]:
eval_data = pd.concat([dev_set, test_A_eval, test_B_eval], ignore_index=True)


eval_data

Unnamed: 0,RID_HASH,VISCODE,AGE,PTGENDER_num,PTEDUCAT,DX_num,APOE4,CDRSB,MMSE,ADAS13,Ventricles,Hippocampus,WholeBrain,Entorhinal,Fusiform,MidTemp,total_visits,last_visit
0,001c7955017f905ccf78d55c94e81070a1cca7b1efb5bd...,0,79.100000,0.0,20.0,1.0,1.0,0.5,0.923077,0.164384,0.071871,0.548646,0.376516,0.464021,0.194906,0.400709,2.0,6.0
1,001c7955017f905ccf78d55c94e81070a1cca7b1efb5bd...,6,79.600000,0.0,20.0,1.0,1.0,1.5,0.923077,0.237397,0.071956,0.548307,0.366398,0.403880,0.193367,0.397291,2.0,6.0
2,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,0,72.900000,1.0,12.0,1.0,1.0,1.0,1.000000,0.123288,0.142655,0.525169,0.235599,0.513404,0.356253,0.294774,6.0,60.0
3,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,6,73.400000,1.0,12.0,1.0,1.0,1.0,1.000000,0.164384,0.144729,0.549210,0.230361,0.435097,0.322395,0.294175,6.0,60.0
4,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,12,73.900000,1.0,12.0,1.0,1.0,1.0,0.961538,0.109589,0.155550,0.527878,0.215944,0.487831,0.342600,0.277552,6.0,60.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6894,ff4eb5a64e2b89861d5dea81190669893070b227f3a335...,0,74.304489,0.0,18.0,1.0,1.0,1.5,0.884615,0.114110,0.322319,0.502370,0.492262,0.394356,0.397160,0.531003,3.0,84.0
6895,ff4eb5a64e2b89861d5dea81190669893070b227f3a335...,12,75.304489,0.0,18.0,1.0,1.0,1.5,0.923077,0.242055,0.339084,0.519639,0.509578,0.294356,0.416522,0.545575,3.0,84.0
6896,ff4eb5a64e2b89861d5dea81190669893070b227f3a335...,84,81.304489,0.0,18.0,1.0,1.0,1.5,1.000000,0.178082,0.412867,0.432054,0.483387,0.363316,0.468451,0.508440,3.0,84.0
6897,ffa86109ba8684f31325842d0ff26568e105f0f63b366a...,0,66.300000,1.0,13.0,0.0,0.0,0.0,0.923077,0.118767,0.177669,0.603072,0.335950,0.536760,0.308488,0.445137,2.0,24.0


In [24]:
from hyperimpute.plugins.imputers import Imputers

plugin = Imputers().get(
    "hyperimpute",
    optimizer="simple",
    classifier_seed=["catboost"],
    regression_seed=["xgboost_regressor", "catboost_regressor"],
    class_threshold=cat_limit,
)


imputed_X = plugin.fit_transform(eval_data.copy())
imputed_X[scaled_cols] = scaler.inverse_transform(imputed_X[scaled_cols])

imputed_X

[2022-11-09T14:56:27.829144+0200][3193248][INFO] Iteration imputation: select_model_by_column: True, select_model_by_iteration: False
[2022-11-09T14:56:27.864814+0200][3193248][INFO]   > HyperImpute using inner optimization
[2022-11-09T14:56:27.865434+0200][3193248][INFO]   > Imputation iter 0
[2022-11-09T14:56:27.873042+0200][3193248][INFO]   > Imputation iter 1
[2022-11-09T14:56:27.879696+0200][3193248][INFO]   > Imputation iter 2
[2022-11-09T14:56:27.886314+0200][3193248][INFO]   > Imputation iter 3
[2022-11-09T14:56:27.892947+0200][3193248][INFO]   > Imputation iter 4
[2022-11-09T14:56:27.899744+0200][3193248][INFO]   > Imputation iter 5
[2022-11-09T14:56:27.906297+0200][3193248][INFO]   > Imputation iter 6
[2022-11-09T14:56:27.913054+0200][3193248][INFO]   > Imputation iter 7
[2022-11-09T14:56:27.919666+0200][3193248][INFO]   > Imputation iter 8
[2022-11-09T14:56:27.926695+0200][3193248][INFO]   > Imputation iter 9
[2022-11-09T14:56:27.933449+0200][3193248][INFO]   > Imputation it

Unnamed: 0,RID_HASH,VISCODE,AGE,PTGENDER_num,PTEDUCAT,DX_num,APOE4,CDRSB,MMSE,ADAS13,Ventricles,Hippocampus,WholeBrain,Entorhinal,Fusiform,MidTemp,total_visits,last_visit
0,001c7955017f905ccf78d55c94e81070a1cca7b1efb5bd...,0.0,79.100000,0.0,20.0,1.0,1.0,0.5,28.0,12.00,16636.000000,7208.000000,9.790100e+05,3672.000000,12661.000000,18165.000000,2.0,6.0
1,001c7955017f905ccf78d55c94e81070a1cca7b1efb5bd...,6.0,79.600000,0.0,20.0,1.0,1.0,1.5,28.0,17.33,16649.000000,7205.000000,9.707820e+05,3331.000000,12630.000000,18085.000000,2.0,6.0
2,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,0.0,72.900000,1.0,12.0,1.0,1.0,1.0,30.0,9.00,27456.000000,7000.000000,8.644140e+05,3952.000000,15911.000000,15686.000000,6.0,60.0
3,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,6.0,73.400000,1.0,12.0,1.0,1.0,1.0,30.0,12.00,27773.000000,7213.000000,8.601540e+05,3508.000000,15229.000000,15672.000000,6.0,60.0
4,00e6fb56250581a8c8b5133f91443dd8c037e3cd8d0ba8...,12.0,73.900000,1.0,12.0,1.0,1.0,1.0,29.0,8.00,29427.000000,7024.000000,8.484300e+05,3807.000000,15636.000000,15283.000000,6.0,60.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6894,ff4eb5a64e2b89861d5dea81190669893070b227f3a335...,0.0,74.304489,0.0,18.0,1.0,1.0,1.5,27.0,8.33,54919.015071,6798.000000,1.073138e+06,3277.000000,16735.000000,21214.000000,3.0,84.0
6895,ff4eb5a64e2b89861d5dea81190669893070b227f3a335...,12.0,75.304489,0.0,18.0,1.0,1.0,1.5,28.0,17.67,57481.678221,6951.000000,1.087219e+06,2710.000000,17125.000000,21555.000000,3.0,84.0
6896,ff4eb5a64e2b89861d5dea81190669893070b227f3a335...,84.0,81.304489,0.0,18.0,1.0,1.0,1.5,30.0,13.00,68759.948410,6175.000000,1.065920e+06,3101.000000,18171.000000,20686.000000,3.0,84.0
6897,ffa86109ba8684f31325842d0ff26568e105f0f63b366a...,0.0,66.300000,1.0,13.0,0.0,0.0,0.0,28.0,8.67,32808.200000,7690.220979,9.460212e+05,4084.430357,14948.865953,19204.643123,2.0,24.0


In [27]:
import numpy as np

results = []


def normalize_output(test_data):
    test_data = test_data.copy()

    factor = test_data["CDRSB"] / 0.5
    factor[factor < 0] = 0
    factor = factor.fillna(-1)
    factor = factor.round(0).astype(int)
    factor = factor.replace(-1, np.nan)
    test_data["CDRSB"] = factor * 0.5
    
    test_data["DX_num"] = test_data["DX_num"].round(0)

    test_data["ADAS13"] = ((test_data["ADAS13"] * 3).round(0) / 3).round(2)
    test_data["MMSE"] = test_data["MMSE"].round(0)

    return test_data

def dump_results(imputed_data: pd.DataFrame, fpath: str):
    for name, data in [
        ("test_A", test_A),
        ("test_B", test_B),
    ]:
        for idx, row in data.iterrows():
            for col in row.index:
                local = row.T
                val = local[col]
                if val == val:
                    continue
                imputed_id = f"{local['RID_HASH']}_{local['VISCODE']}_{col}_{name}"
                imputed_val = imputed_data[
                    (imputed_data["RID_HASH"] == local["RID_HASH"])
                    & (imputed_data["VISCODE"] == local["VISCODE"])
                ][col].values[0]
                
                assert imputed_val == imputed_val
                assert imputed_val != ""
                
                results.append([imputed_id, imputed_val])

    output = pd.DataFrame(results, columns=submission.columns)
    output.to_csv(fpath, index=None)

    return output


version = "v9"
changelog = "automl_one_more_try"
output_normalized = dump_results(
    normalize_output(imputed_X),
    results_dir / f"imputation_results_{version}_{changelog}.csv",
)

output_normalized

Unnamed: 0,Id,Predicted
0,988b6137f4352c01e4b52790505caa0c3ec438f117000a...,58.20
1,988b6137f4352c01e4b52790505caa0c3ec438f117000a...,0.00
2,988b6137f4352c01e4b52790505caa0c3ec438f117000a...,2.00
3,988b6137f4352c01e4b52790505caa0c3ec438f117000a...,3.50
4,988b6137f4352c01e4b52790505caa0c3ec438f117000a...,26.00
...,...,...
15021,8b33cc9dd06fc18f130e185fdf1e6d657dbc80add9ff6e...,1.00
15022,8b33cc9dd06fc18f130e185fdf1e6d657dbc80add9ff6e...,0.00
15023,8b33cc9dd06fc18f130e185fdf1e6d657dbc80add9ff6e...,1.00
15024,8b33cc9dd06fc18f130e185fdf1e6d657dbc80add9ff6e...,29.00


In [28]:
pd.set_option("display.expand_frame_repr", True)

output_normalized.tail(5).values

array([['8b33cc9dd06fc18f130e185fdf1e6d657dbc80add9ff6ea81ced3e328cea9e63_0_DX_num_test_B',
        1.0],
       ['8b33cc9dd06fc18f130e185fdf1e6d657dbc80add9ff6ea81ced3e328cea9e63_0_APOE4_test_B',
        0.0],
       ['8b33cc9dd06fc18f130e185fdf1e6d657dbc80add9ff6ea81ced3e328cea9e63_0_CDRSB_test_B',
        1.0],
       ['8b33cc9dd06fc18f130e185fdf1e6d657dbc80add9ff6ea81ced3e328cea9e63_0_MMSE_test_B',
        29.0],
       ['8b33cc9dd06fc18f130e185fdf1e6d657dbc80add9ff6ea81ced3e328cea9e63_0_ADAS13_test_B',
        18.33]], dtype=object)