In [1]:
# ===================================================================
#  Library
# ===================================================================
import pandas as pd
from sklearn.metrics import mean_absolute_percentage_error

import warnings
warnings.simplefilter("ignore")

import optuna
import numpy as np

In [2]:
# ===================================================================
#  CFG
# ===================================================================
class CFG:
    seed = 42
    n_seeds = 3
    n_trials = 2000
    save_dir = "G:/マイドライブ/signate_StudentCup2023/exp/"
    data_dir = "G:/マイドライブ/signate_StudentCup2023/data/"
    filename = "exp025"

In [3]:
# ===================================================================
#  Utils
# ===================================================================
def get_score(y_true, y_pred):
    """get MAPE score"""
    score = mean_absolute_percentage_error(y_true, y_pred)
    return score * 100

In [4]:
# ===================================================================
#  DataLoading
# ===================================================================
df_1 = pd.read_csv(CFG.save_dir+"oof_df_exp014.csv", names=['id', 'pred']).rename(columns={"pred":"pred_1"})
df_2 = pd.read_csv(CFG.save_dir+"oof_df_exp015.csv", names=['id', 'pred']).rename(columns={"pred":"pred_2"})
df_3 = pd.read_csv(CFG.save_dir+"oof_df_exp016.csv", names=['id', 'pred']).rename(columns={"pred":"pred_3"})
df_4 = pd.read_csv(CFG.save_dir+"oof_df_exp017.csv", names=['id', 'pred']).rename(columns={"pred":"pred_4"})
df_5 = pd.read_csv(CFG.save_dir+"oof_df_exp018.csv", names=['id', 'pred']).rename(columns={"pred":"pred_5"})
df_6 = pd.read_csv(CFG.save_dir+"oof_df_exp019.csv", names=['id', 'pred']).rename(columns={"pred":"pred_6"})
df_7 = pd.read_csv(CFG.save_dir+"oof_df_exp020.csv", names=['id', 'pred']).rename(columns={"pred":"pred_7"})
df_8 = pd.read_csv(CFG.save_dir+"oof_df_exp021.csv", names=['id', 'pred']).rename(columns={"pred":"pred_8"})
df_9 = pd.read_csv(CFG.save_dir+"oof_df_exp022.csv", names=['id', 'pred']).rename(columns={"pred":"pred_9"})
df_10 = pd.read_csv(CFG.save_dir+"oof_df_exp023.csv", names=['id', 'pred']).rename(columns={"pred":"pred_10"})
df_11 = pd.read_csv(CFG.save_dir+"kun_exp38_oof_pred.csv").rename(columns={"oof_pred":"pred_11"})
df_12 = pd.read_csv(CFG.save_dir+"kun_exp39_oof_pred.csv").rename(columns={"oof_pred":"pred_12"})
df_13 = pd.read_csv(CFG.save_dir+"kun_exp40_oof_pred.csv").rename(columns={"oof_pred":"pred_13"})

train = pd.read_csv(CFG.data_dir+"train.csv")

df = pd.merge(train, df_1[["id", "pred_1"]], on="id", how="left")
df = pd.merge(df, df_2[["id","pred_2"]], on="id", how="left")
df = pd.merge(df, df_3[["id", "pred_3"]], on="id", how="left")
df = pd.merge(df, df_4[["id", "pred_4"]], on="id", how="left")
df = pd.merge(df, df_5[["id", "pred_5"]], on="id", how="left")
df = pd.merge(df, df_6[["id", "pred_6"]], on="id", how="left")
df = pd.merge(df, df_7[["id", "pred_7"]], on="id", how="left")
df = pd.merge(df, df_8[["id", "pred_8"]], on="id", how="left")
df = pd.merge(df, df_9[["id", "pred_9"]], on="id", how="left")
df = pd.merge(df, df_10[["id", "pred_10"]], on="id", how="left")
df = pd.concat([df, df_11], axis=1)
df = pd.concat([df, df_12], axis=1)
df = pd.concat([df, df_13], axis=1)

df.to_csv(CFG.save_dir+f"{CFG.filename}_preds.csv", index=False)

In [5]:
# ===================================================================
#  optuna
# ===================================================================
def objective(trial):
    a = trial.suggest_float("a", 1e-8, 1, log=True)
    b = trial.suggest_float("b", 1e-8, 1, log=True)
    c = trial.suggest_float("c", 1e-8, 1, log=True)
    d = trial.suggest_float("d", 1e-8, 1, log=True)
    e = trial.suggest_float("e", 1e-8, 1, log=True)
    
    f = trial.suggest_float("f", 1e-8, 1, log=True)
    g = trial.suggest_float("g", 1e-8, 1, log=True)
    h = trial.suggest_float("h", 1e-8, 1, log=True)
    i = trial.suggest_float("i", 1e-8, 1, log=True)
    j = trial.suggest_float("j", 1e-8, 1, log=True)
    
    k = trial.suggest_float("k", 1e-8, 1, log=True)
    l = trial.suggest_float("l", 1e-8, 1, log=True)
    m = trial.suggest_float("m", 1e-8, 1, log=True)
    
    
    df[f"pred"] = df[f"pred_1"] * a +\
                  df[f"pred_2"] * b +\
                  df[f"pred_3"] * c +\
                  df[f"pred_4"] * d +\
                  df[f"pred_5"] * e +\
                  df[f"pred_6"] * f +\
                  df[f"pred_7"] * g +\
                  df[f"pred_8"] * h +\
                  df[f"pred_9"] * i +\
                  df[f"pred_10"] * j +\
                  df[f"pred_11"] * k +\
                  df[f"pred_12"] * l +\
                  df[f"pred_13"] * m
                      
                      
                      
    score = get_score(y_true=df["price"], y_pred = df[f"pred"])
    return score
    
optuna.logging.set_verbosity(optuna.logging.WARNING)

# シードのリストを定義
seeds = [seed for seed in range(CFG.seed, CFG.seed+CFG.n_seeds)]

best_values = []
best_params_list = []

for seed in seeds:    
    study = optuna.create_study(
        direction="minimize",
        sampler=optuna.samplers.TPESampler(seed=seed)
    )
    study.optimize(objective, 
                   n_trials=CFG.n_trials, 
                   n_jobs = -1,
                   show_progress_bar=True)
    
    best_value = study.best_value
    best_params = study.best_params
    
    best_values.append(best_value)
    best_params_list.append(best_params)
    
    print(f"Seed: {seed}, Best Value: {best_value}, Best Params: {best_params}")
    
    
# 最も小さい best_value を持つ Study を探索
best_index = np.argmin(best_values)
best_params_final = best_params_list[best_index]
best_value_final = best_values[best_index]

print("Final Best Value:", best_value_final)
print("Final Best Params:", best_params_final)

  0%|          | 0/2000 [00:00<?, ?it/s]

Seed: 42, Best Value: 43.87820413829381, Best Params: {'a': 0.004830532362170212, 'b': 0.003202013741302334, 'c': 0.015959853453707446, 'd': 0.20931673935822329, 'e': 0.2643416744103098, 'f': 0.17715955539970848, 'g': 1.8129155260506372e-08, 'h': 0.3105253749301927, 'i': 3.0633346375096162e-06, 'j': 0.013731686951451775, 'k': 1.6025291731665643e-05, 'l': 5.0109568989328915e-08, 'm': 0.001885492850424893}


  0%|          | 0/2000 [00:00<?, ?it/s]

Seed: 43, Best Value: 43.87920673735458, Best Params: {'a': 0.3926646857699835, 'b': 3.6791341187144746e-07, 'c': 0.0547190409421216, 'd': 2.6606897908882205e-07, 'e': 0.0013075026363245405, 'f': 0.0025984682940336365, 'g': 0.010256743474565896, 'h': 5.4179513445994855e-08, 'i': 0.3598836616556117, 'j': 1.495619798701362e-08, 'k': 0.06351338280312532, 'l': 1.5979356131660024e-05, 'm': 0.11231667318465739}


  0%|          | 0/2000 [00:00<?, ?it/s]

Seed: 44, Best Value: 43.88651276669383, Best Params: {'a': 0.05211266762641686, 'b': 1.8666983652103266e-05, 'c': 0.2268215033877116, 'd': 8.463361814149529e-07, 'e': 4.938229300539913e-07, 'f': 0.2626370571698365, 'g': 0.0027458606106617153, 'h': 1.7377380284769496e-07, 'i': 2.2892985246575045e-07, 'j': 1.5192801861437697e-07, 'k': 0.443811035481086, 'l': 0.001122402424890268, 'm': 0.015502552346286263}
Final Best Value: 43.87820413829381
Final Best Params: {'a': 0.004830532362170212, 'b': 0.003202013741302334, 'c': 0.015959853453707446, 'd': 0.20931673935822329, 'e': 0.2643416744103098, 'f': 0.17715955539970848, 'g': 1.8129155260506372e-08, 'h': 0.3105253749301927, 'i': 3.0633346375096162e-06, 'j': 0.013731686951451775, 'k': 1.6025291731665643e-05, 'l': 5.0109568989328915e-08, 'm': 0.001885492850424893}


In [6]:
# ===================================================================
#  test
# ===================================================================
test_1 = pd.read_csv(CFG.save_dir+"exp014.csv", names=['id', 'pred']).rename(columns={"pred":"pred_1"})
test_2 = pd.read_csv(CFG.save_dir+"exp015.csv", names=['id', 'pred']).rename(columns={"pred":"pred_2"})
test_3 = pd.read_csv(CFG.save_dir+"exp016.csv", names=['id', 'pred']).rename(columns={"pred":"pred_3"})
test_4 = pd.read_csv(CFG.save_dir+"exp017.csv", names=['id', 'pred']).rename(columns={"pred":"pred_4"})
test_5 = pd.read_csv(CFG.save_dir+"exp018.csv", names=['id', 'pred']).rename(columns={"pred":"pred_5"})
test_6 = pd.read_csv(CFG.save_dir+"exp019.csv", names=['id', 'pred']).rename(columns={"pred":"pred_6"})
test_7 = pd.read_csv(CFG.save_dir+"exp020.csv", names=['id', 'pred']).rename(columns={"pred":"pred_7"})
test_8 = pd.read_csv(CFG.save_dir+"exp021.csv", names=['id', 'pred']).rename(columns={"pred":"pred_8"})
test_9 = pd.read_csv(CFG.save_dir+"exp022.csv", names=['id', 'pred']).rename(columns={"pred":"pred_9"})
test_10 = pd.read_csv(CFG.save_dir+"exp023.csv", names=['id', 'pred']).rename(columns={"pred":"pred_10"})
test_11 = pd.read_csv(CFG.save_dir+"kun_exp038.csv", names=['id', 'pred']).rename(columns={"pred":"pred_11"})
test_12 = pd.read_csv(CFG.save_dir+"kun_exp039.csv", names=['id', 'pred']).rename(columns={"pred":"pred_12"})
test_13 = pd.read_csv(CFG.save_dir+"kun_exp040.csv", names=['id', 'pred']).rename(columns={"pred":"pred_13"})

test = pd.merge(test_1, test_2, on="id", how="left")
test = pd.merge(test, test_3, on="id", how="left")
test = pd.merge(test, test_4, on="id", how="left")
test = pd.merge(test, test_5, on="id", how="left")
test = pd.merge(test, test_6, on="id", how="left")
test = pd.merge(test, test_7, on="id", how="left")
test = pd.merge(test, test_8, on="id", how="left")
test = pd.merge(test, test_9, on="id", how="left")
test = pd.merge(test, test_10, on="id", how="left")
test = pd.merge(test, test_11, on="id", how="left")
test = pd.merge(test, test_12, on="id", how="left")
test = pd.merge(test, test_13, on="id", how="left")

test["pred"] =   test["pred_1"] * best_params_final["a"] +\
                 test["pred_2"] * best_params_final["b"] +\
                 test["pred_3"] * best_params_final["c"] +\
                 test["pred_4"] * best_params_final["d"] +\
                 test["pred_5"] * best_params_final["e"] +\
                 test["pred_6"] * best_params_final["f"] +\
                 test["pred_7"] * best_params_final["g"] +\
                 test["pred_8"] * best_params_final["h"] +\
                 test["pred_9"] * best_params_final["i"] +\
                 test["pred_10"] * best_params_final["j"] +\
                 test["pred_11"] * best_params_final["k"] +\
                 test["pred_12"] * best_params_final["l"] +\
                 test["pred_13"] * best_params_final["m"]
                     
test.to_csv(CFG.save_dir+f"{CFG.filename}_test_preds.csv", index=False)                     

test[["id", "pred"]].to_csv(CFG.save_dir+f"{CFG.filename}.csv", index=False, header=None)
test[["id", "pred"]].head(2)

Unnamed: 0,id,pred
0,27532,9727.856427
1,27533,5212.032563
