In [6]:
# ===================================================================
#  Library
# ===================================================================
import pandas as pd
from sklearn.metrics import mean_absolute_percentage_error

import warnings
warnings.simplefilter("ignore")

import optuna
import numpy as np

In [2]:
# ===================================================================
#  CFG
# ===================================================================
class CFG:
    seed = 42
    n_seeds = 3
    n_trials = 2000
    save_dir = "G:/マイドライブ/signate_StudentCup2023/exp/"
    data_dir = "G:/マイドライブ/signate_StudentCup2023/data/"
    filename = "exp024"

In [3]:
# ===================================================================
#  Utils
# ===================================================================
def get_score(y_true, y_pred):
    """get MAPE score"""
    score = mean_absolute_percentage_error(y_true, y_pred)
    return score * 100

In [4]:
# ===================================================================
#  DataLoading
# ===================================================================
df_1 = pd.read_csv(CFG.save_dir+"oof_df_exp014.csv", names=['id', 'pred']).rename(columns={"pred":"pred_1"})
df_2 = pd.read_csv(CFG.save_dir+"oof_df_exp015.csv", names=['id', 'pred']).rename(columns={"pred":"pred_2"})
df_3 = pd.read_csv(CFG.save_dir+"oof_df_exp016.csv", names=['id', 'pred']).rename(columns={"pred":"pred_3"})
df_4 = pd.read_csv(CFG.save_dir+"oof_df_exp017.csv", names=['id', 'pred']).rename(columns={"pred":"pred_4"})
df_5 = pd.read_csv(CFG.save_dir+"oof_df_exp018.csv", names=['id', 'pred']).rename(columns={"pred":"pred_5"})
df_6 = pd.read_csv(CFG.save_dir+"oof_df_exp019.csv", names=['id', 'pred']).rename(columns={"pred":"pred_6"})
df_7 = pd.read_csv(CFG.save_dir+"oof_df_exp020.csv", names=['id', 'pred']).rename(columns={"pred":"pred_7"})
df_8 = pd.read_csv(CFG.save_dir+"oof_df_exp021.csv", names=['id', 'pred']).rename(columns={"pred":"pred_8"})
df_9 = pd.read_csv(CFG.save_dir+"oof_df_exp022.csv", names=['id', 'pred']).rename(columns={"pred":"pred_9"})
df_10 = pd.read_csv(CFG.save_dir+"oof_df_exp023.csv", names=['id', 'pred']).rename(columns={"pred":"pred_10"})

train = pd.read_csv(CFG.data_dir+"train.csv")

df = pd.merge(train, df_1[["id", "pred_1"]], on="id", how="left")
df = pd.merge(df, df_2[["id","pred_2"]], on="id", how="left")
df = pd.merge(df, df_3[["id", "pred_3"]], on="id", how="left")
df = pd.merge(df, df_4[["id", "pred_4"]], on="id", how="left")
df = pd.merge(df, df_5[["id", "pred_5"]], on="id", how="left")
df = pd.merge(df, df_6[["id", "pred_6"]], on="id", how="left")
df = pd.merge(df, df_7[["id", "pred_7"]], on="id", how="left")
df = pd.merge(df, df_8[["id", "pred_8"]], on="id", how="left")
df = pd.merge(df, df_9[["id", "pred_9"]], on="id", how="left")
df = pd.merge(df, df_10[["id", "pred_10"]], on="id", how="left")

In [5]:
# ===================================================================
#  optuna
# ===================================================================
def objective(trial):
    a = trial.suggest_float("a", 1e-8, 1, log=True)
    b = trial.suggest_float("b", 1e-8, 1, log=True)
    c = trial.suggest_float("c", 1e-8, 1, log=True)
    d = trial.suggest_float("d", 1e-8, 1, log=True)
    e = trial.suggest_float("e", 1e-8, 1, log=True)
    
    f = trial.suggest_float("f", 1e-8, 1, log=True)
    g = trial.suggest_float("g", 1e-8, 1, log=True)
    h = trial.suggest_float("h", 1e-8, 1, log=True)
    i = trial.suggest_float("i", 1e-8, 1, log=True)
    j = trial.suggest_float("j", 1e-8, 1, log=True)
    
    
    df[f"pred"] = df[f"pred_1"] * a +\
                  df[f"pred_2"] * b +\
                  df[f"pred_3"] * c +\
                  df[f"pred_4"] * d +\
                  df[f"pred_5"] * e +\
                  df[f"pred_6"] * f +\
                  df[f"pred_7"] * g +\
                  df[f"pred_8"] * h +\
                  df[f"pred_9"] * i +\
                  df[f"pred_10"] * j
                      
                      
    score = get_score(y_true=df["price"], y_pred = df[f"pred"])
    return score
    
optuna.logging.set_verbosity(optuna.logging.WARNING)

# シードのリストを定義
seeds = [seed for seed in range(CFG.seed, CFG.seed+CFG.n_seeds)]

best_values = []
best_params_list = []

for seed in seeds:    
    study = optuna.create_study(
        direction="minimize",
        sampler=optuna.samplers.TPESampler(seed=seed)
    )
    study.optimize(objective, 
                   n_trials=CFG.n_trials, 
                   n_jobs = -1,
                   show_progress_bar=True)
    
    best_value = study.best_value
    best_params = study.best_params
    
    best_values.append(best_value)
    best_params_list.append(best_params)
    
    print(f"Seed: {seed}, Best Value: {best_value}, Best Params: {best_params}")

  0%|          | 0/2000 [00:00<?, ?it/s]

Seed: 42, Best Value: 43.88749979822287, Best Params: {'a': 0.3589752687480132, 'b': 0.0005307989486694464, 'c': 1.2431940900726542e-07, 'd': 5.634079740847213e-05, 'e': 2.533456001188329e-07, 'f': 0.2015664707608615, 'g': 5.079089580849648e-08, 'h': 0.4456921777558138, 'i': 3.6166977887987366e-07, 'j': 1.8592609236366082e-05}


  0%|          | 0/2000 [00:00<?, ?it/s]

Seed: 43, Best Value: 43.885207515251004, Best Params: {'a': 0.4582327044453581, 'b': 0.07841726518251729, 'c': 0.15361668696462, 'd': 0.06231433066114554, 'e': 0.012378140132942578, 'f': 0.0704799231252861, 'g': 0.0057944181331323775, 'h': 0.05604367866710879, 'i': 2.2970822787791101e-07, 'j': 0.10185647964836345}


  0%|          | 0/2000 [00:00<?, ?it/s]

Seed: 44, Best Value: 43.887876632791055, Best Params: {'a': 0.261384020033897, 'b': 2.5611627393384453e-08, 'c': 0.1500901611424017, 'd': 3.598426173755583e-06, 'e': 3.031804499375664e-06, 'f': 0.3425125916127622, 'g': 0.0009047774425401772, 'h': 0.030454396731970576, 'i': 4.8786769653523006e-05, 'j': 0.21437656223298374}


NameError: name 'np' is not defined

In [7]:

# 最も小さい best_value を持つ Study を探索
best_index = np.argmin(best_values)
best_params_final = best_params_list[best_index]
best_value_final = best_values[best_index]

print("Final Best Value:", best_value_final)
print("Final Best Params:", best_params_final)

Final Best Value: 43.885207515251004
Final Best Params: {'a': 0.4582327044453581, 'b': 0.07841726518251729, 'c': 0.15361668696462, 'd': 0.06231433066114554, 'e': 0.012378140132942578, 'f': 0.0704799231252861, 'g': 0.0057944181331323775, 'h': 0.05604367866710879, 'i': 2.2970822787791101e-07, 'j': 0.10185647964836345}


In [8]:
# ===================================================================
#  test
# ===================================================================
test_1 = pd.read_csv(CFG.save_dir+"exp014.csv", names=['id', 'pred']).rename(columns={"pred":"pred_1"})
test_2 = pd.read_csv(CFG.save_dir+"exp015.csv", names=['id', 'pred']).rename(columns={"pred":"pred_2"})
test_3 = pd.read_csv(CFG.save_dir+"exp016.csv", names=['id', 'pred']).rename(columns={"pred":"pred_3"})
test_4 = pd.read_csv(CFG.save_dir+"exp017.csv", names=['id', 'pred']).rename(columns={"pred":"pred_4"})
test_5 = pd.read_csv(CFG.save_dir+"exp018.csv", names=['id', 'pred']).rename(columns={"pred":"pred_5"})
test_6 = pd.read_csv(CFG.save_dir+"exp019.csv", names=['id', 'pred']).rename(columns={"pred":"pred_6"})
test_7 = pd.read_csv(CFG.save_dir+"exp020.csv", names=['id', 'pred']).rename(columns={"pred":"pred_7"})
test_8 = pd.read_csv(CFG.save_dir+"exp021.csv", names=['id', 'pred']).rename(columns={"pred":"pred_8"})
test_9 = pd.read_csv(CFG.save_dir+"exp022.csv", names=['id', 'pred']).rename(columns={"pred":"pred_9"})
test_10 = pd.read_csv(CFG.save_dir+"exp023.csv", names=['id', 'pred']).rename(columns={"pred":"pred_10"})


test_1["pred"] = test_1["pred_1"] * best_params_final["a"] +\
                 test_2["pred_2"] * best_params_final["b"] +\
                 test_3["pred_3"] * best_params_final["c"] +\
                 test_4["pred_4"] * best_params_final["d"] +\
                 test_5["pred_5"] * best_params_final["e"] +\
                 test_6["pred_6"] * best_params_final["f"] +\
                 test_7["pred_7"] * best_params_final["g"] +\
                 test_8["pred_8"] * best_params_final["h"] +\
                 test_9["pred_9"] * best_params_final["i"] +\
                 test_10["pred_10"] * best_params_final["j"]
                     
test_1[["id", "pred"]].to_csv(CFG.save_dir+f"{CFG.filename}.csv", index=False, header=None)
test_1[["id", "pred"]].head(2)

Unnamed: 0,id,pred
0,27532,9584.253958
1,27533,5074.0509
