In [13]:
import optuna
import pandas as pd
from sklearn.metrics import mean_absolute_percentage_error

In [14]:
class CFG:
    seed = 42
    n_trials = 3000
    save_dir = "G:/マイドライブ/signate_StudentCup2023/exp/"
    filename = "exp003"

In [15]:
def get_score(y_true, y_pred):
    """get MAPE score"""
    score = mean_absolute_percentage_error(y_true, y_pred)
    return score * 100

In [16]:
df_1 = pd.read_csv(CFG.save_dir+"exp001_oof_df.csv").rename(columns={"pred":"pred_1"})
df_2 = pd.read_csv(CFG.save_dir+"exp002_oof_df.csv").rename(columns={"pred":"pred_2"})
df = pd.merge(df_1, df_2[["id", "pred_2"]], on="id", how="left")

In [17]:
def objective(trial):
    a = trial.suggest_float("a", 0, 1)
    b = trial.suggest_float("b", 0, 1)
    
    df[f"pred"] = df[f"pred_1"] * a + df[f"pred_2"] * b
    score = get_score(y_true=df["price"], y_pred = df[f"pred"])
    return score
    
optuna.logging.set_verbosity(optuna.logging.WARNING)
study = optuna.create_study(direction="minimize",
                            sampler=optuna.samplers.TPESampler(seed=CFG.seed))
study.optimize(objective, 
                n_trials=CFG.n_trials,
                show_progress_bar=True)

best_params = study.best_params
best_value = study.best_value
print(best_params, best_value)

  0%|          | 0/3000 [00:00<?, ?it/s]

{'a': 0.04812491286410649, 'b': 0.5583144788845906} 45.84279120500015


In [30]:
test_1 = pd.read_csv(CFG.save_dir+"exp001.csv", names=['id', 'pred']).rename(columns={"pred":"pred_1"})
test_2 = pd.read_csv(CFG.save_dir+"exp002.csv", names=['id', 'pred']).rename(columns={"pred":"pred_2"})

test_1["pred"] = test_1["pred_1"] * 0.04812491286410649 + test_2["pred_2"] * 0.5583144788845906
test_1[["id", "pred"]]

Unnamed: 0,id,pred
0,27532,8265.731348
1,27533,5159.518780
2,27534,3996.817662
3,27535,16806.616329
4,27536,6004.221289
...,...,...
27532,55064,11964.737759
27533,55065,8692.346714
27534,55066,7575.092500
27535,55067,5735.992060


In [31]:
test_1[["id", "pred"]].to_csv(CFG.save_dir+CFG.filename+".csv", index=False, header=None)