In [7]:
import sys
import pathlib
sys.path.append(pathlib.Path().cwd().parent.as_posix())

import auxiliary as aux

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import SGDRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor


valid_true, valid = aux.train_valid_split(aux.data, frac=0.0125, seed=19)
sn = 1      # subset number

## statistics

In [None]:
predicted = aux.ImputeHelper(
    aux.Step(aux.simplestat, aux.subcol[sn], imputer=SimpleImputer()),
).run(valid, validate_on=valid_true)

In [None]:
predicted = aux.ImputeHelper(
    aux.Step(aux.groupstat, aux.subcol[2] + aux.subcol[sn], gcol=['F_2_10']),
).run(valid, validate_on=valid_true)

## predictor

In [None]:
predicted = aux.ImputeHelper(
    aux.Step(aux.predictor, aux.subcol[sn], estimator=LGBMRegressor(random_state=7, n_jobs=-1, n_estimators=200, num_leaves=29)),
).run(valid, validate_on=valid_true)

## mean matching

In [None]:
pipeline = make_pipeline(
    SimpleImputer(),
    DecisionTreeRegressor(random_state=7, max_leaf_nodes=50)
)

predicted = aux.ImputeHelper(
    aux.Step(aux.mean_matching, aux.subcol[sn], N=2500, init=pipeline, backend='threading'),
).run(valid, validate_on=valid_true)

In [None]:
pipeline = make_pipeline(
    SimpleImputer(),
    DecisionTreeRegressor(random_state=7, max_leaf_nodes=100)
)

predicted = aux.ImputeHelper(
    aux.Step(aux.mean_matching, aux.subcol[sn], N=2500, init=pipeline, backend='threading'),
).run(valid, validate_on=valid_true)

In [None]:
predicted = aux.ImputeHelper(
    aux.Step(aux.mean_matching, aux.subcol[sn], N=2500, init=LGBMRegressor(random_state=7, n_jobs=-1, n_estimators=5, num_leaves=13), backend='threading'),
).run(valid, validate_on=valid_true)

## MICE

In [None]:
predicted = aux.ImputeHelper(
    aux.Step(aux.mice, aux.subcol[sn], estimator=SGDRegressor(max_iter=1000), epochs=5, seed=11, autosplit=False),
).run(valid, validate_on=valid_true)

In [None]:
predicted = aux.ImputeHelper(
    aux.Step(aux.mice, aux.subcol[sn], estimator=SGDRegressor(learning_rate='adaptive', eta0=0.1, alpha=0.001, max_iter=1000), epochs=5, seed=11, autosplit=False),
).run(valid, validate_on=valid_true)

In [None]:
#