In [1]:
INPUT_TYPE = ["dc_5", "nc_3", "nc_5", "nc_10", "nc_25", "nc_30", "nc_50", "nc_75"]
OUTPUT_TYPE = [
    "umato_srho_0",
    "pca_tnc_25",
    "tsne_pr_0",
    "umato_tnc_25",
    "isomap_tnc_25",
    "lle_pr_0",
    "isomap_pr_0",
    "tsne_tnc_25",
    "umap_pr_0",
    "umap_tnc_25",
    "pca_pr_0",
    "lle_tnc_25",
    "umato_pr_0",
]
SCORE_TYPE = [
    "explained_variance_score",
    "max_error",
    "mean_absolute_error",
    "mean_squared_error",
    "root_mean_squared_error",
    "mean_squared_log_error",
    "root_mean_squared_log_error",
    "median_absolute_error",
    "r2_score",
    "mean_poisson_deviance",
    "mean_gamma_deviance",
    "mean_absolute_percentage_error",
    "d2_absolute_error_score",
    "d2_pinball_score",
    "d2_tweedie_score",
]

In [3]:
import os

import autosklearn.regression
import joblib
import pandas as pd
import sklearn

SAVE_PATH = "pretrained_model/multi/"
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)

input = pd.read_csv("data/input.csv", index_col=0)

# Regression with All Inputs
for t in OUTPUT_TYPE:
    label = pd.read_csv(f"data/output_{t}.csv", index_col=0)
    X = input
    y = label

    reg = autosklearn.regression.AutoSklearnRegressor(
        time_left_for_this_task=600,
        per_run_time_limit=30,
        memory_limit=10000,
        resampling_strategy="cv",
        resampling_strategy_arguments={"folds": 5},
    )
    reg.fit(X, y)

    save_path = f"{SAVE_PATH}/{t}.pkl"
    joblib.dump(reg, save_path)
    print(reg.leaderboard())

    pred = reg.predict(X)
    print(f"{t} - R2:", sklearn.metrics.r2_score(y, pred))

          rank  ensemble_weight                 type      cost  duration
model_id                                                                
20           1             0.10  k_nearest_neighbors  0.955044  2.012543
2            2             0.20        random_forest  1.089489  5.465669
148          3             0.18  k_nearest_neighbors  1.166891  1.657177
107          4             0.20        random_forest  1.184347  4.659312
45           5             0.20  k_nearest_neighbors  1.204066  2.009746
155          6             0.12        decision_tree  1.209773  1.626308
umato_srho_0 - R2: 0.8252300286596169
          rank  ensemble_weight                 type      cost  duration
model_id                                                                
147          1             0.08        random_forest  0.960756  6.301394
2            2             0.60        random_forest  0.984159  6.704716
137          3             0.04  k_nearest_neighbors  0.995801  1.810390
39           