In [None]:
import numpy as np
import pandas as pd


from mymodels import data_engineer
from mymodels import MyModel

In [2]:
mymodel = MyModel(random_state = 0)

In [3]:
data_engineer_pipeline = data_engineer(
    outlier_cols = None,
    missing_values_cols = None,
    impute_method = None,
    cat_features = ["Gender", "CAEC", "CALC", "MTRANS"],
    encode_method = ["onehot", "ordinal", "ordinal", "ordinal"],
    scale_cols = ["Age", "Height", "Weight"],
    scale_method = ["standard", "standard", "standard"],
    n_jobs = 5,
    verbose = False
)

In [4]:
data = pd.read_csv("data/obesity.zip", encoding="utf-8",
                   na_values=np.nan, index_col=["id"])

mymodel.load(
    model_name = "rfc",
    input_data = data,
    y = "0be1dad",
    x_list = ["Gender","Age","Height","Weight",\
              "family_history_with_overweight",\
              "FAVC","FCVC","NCP","CAEC","SMOKE",\
              "CH2O","SCC","FAF","TUE","CALC","MTRANS"],
    test_ratio = 0.3,
    stratify = False,
    data_engineer_pipeline = data_engineer_pipeline,
    model_configs_path = "model_configs.yml"
)

In [5]:
mymodel.format(
    results_dir = "results/obesity",
    show = False,
    plot_format = "jpg",
    plot_dpi = 500,
    save_optimal_model = True,
    save_raw_data = True,
    save_shap_values = True
)

In [None]:
mymodel.diagnose(sample_k = None)

In [None]:
mymodel.optimize(
    strategy = "tpe",
    cv = 5,
    trials = 10,
    n_jobs = 5,
    direction = "maximize",
    eval_function = None
)

In [None]:
mymodel.evaluate(
    show_train = True,
    dummy = True,
    eval_metric = None
)

In [None]:
mymodel.explain(
    select_background_data = "train",
    select_shap_data = "test",
    sample_background_data_k = 50,
    sample_shap_data_k = 50
)