In [1]:
import optuna
import pandas as pd
from tqdm.auto import tqdm
from sklearn.metrics import mean_absolute_percentage_error

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class CFG:
    seed = 42
    n_trials = 500
    save_dir = "G:/マイドライブ/signate_StudentCup2023/exp/"

In [3]:
def get_score(y_true, y_pred):
    """get MAPE score"""
    score = mean_absolute_percentage_error(y_true, y_pred)
    return score * 100

In [4]:
df_1 = pd.read_csv(CFG.save_dir+"exp005_oof.csv").rename(columns={"pred":"pred_2"})
df_2 = pd.read_csv(CFG.save_dir+"exp006_oof.csv").rename(columns={"pred":"pred_3"})
df = pd.merge(df_1, df_2[["id", "pred_3"]], on="id", how="left")

In [5]:
df

Unnamed: 0,id,region,year,manufacturer,condition,cylinders,fuel,title_status,transmission,drive,...,region_count_encoding,cylinders_count_encoding,state_count_encoding,cylinders_max_encoding,fuel_std_encoding,manufacturer_median_encoding,condition_median_encoding,pred_2,pred_1,pred_3
0,16,wichita,2006,buick,excellent,other,gas,clean,automatic,fwd,...,182.0,54,197,68716.0,9534.947212,7815.0,11556.0,3783.379330,707.484036,4200.8590
1,21,palm springs,2008,ford,good,4 cylinders,gas,clean,manual,fwd,...,61.0,8040,2147,78749.0,9534.947212,11644.0,7462.5,3421.197400,639.756772,3946.3910
2,22,cincinnati,2014,ford,excellent,6 cylinders,gas,clean,automatic,fwd,...,124.0,9237,685,86915.0,9534.947212,11644.0,11556.0,9704.815064,1814.780157,8097.6130
3,23,rockford,2016,ford,excellent,6 cylinders,diesel,rebuilt,manual,4wd,...,88.0,9237,2601,86915.0,15919.858934,11644.0,11556.0,16894.387280,3159.215153,25215.2360
4,26,redding,2012,toyota,excellent,4 cylinders,gas,clean,manual,fwd,...,124.0,8040,2147,78749.0,9534.947212,6726.0,11556.0,5517.371288,1031.736913,5222.2260
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27527,27493,appleton-oshkosh-FDL,2009,ford,excellent,4 cylinders,gas,clean,automatic,fwd,...,56.0,8038,512,78749.0,9524.006300,11206.5,11505.0,4153.394542,776.676109,3151.4927
27528,27496,louisville,2014,audi,like new,4 cylinders,gas,clean,automatic,fwd,...,136.0,8038,180,78749.0,9524.006300,13649.0,13894.0,8622.468252,1612.383563,7647.0100
27529,27508,lakeland,2002,jeep,excellent,6 cylinders,gas,rebuilt,automatic,4wd,...,46.0,9176,1304,95329.0,9524.006300,11069.5,11505.0,14374.730589,2688.044612,12672.3950
27530,27519,new haven,2004,saturn,fair,6 cylinders,gas,clean,manual,fwd,...,86.0,9176,214,95329.0,9524.006300,6276.0,4912.0,2259.960688,422.607931,2344.4124


In [6]:
def objective(trial):
    a = trial.suggest_float("a", 0, 1)
    b = trial.suggest_float("b", 0, 1)
    
    df[f"pred"] = df[f"pred_2"] * a + df[f"pred_3"] * b
    score = get_score(y_true=df["price"], y_pred = df[f"pred"])
    return score
    
optuna.logging.set_verbosity(optuna.logging.WARNING)
study = optuna.create_study(direction="minimize",
                            sampler=optuna.samplers.TPESampler(seed=CFG.seed))
study.optimize(objective, 
                n_trials=CFG.n_trials,
                show_progress_bar=True)

best_params = study.best_params
best_value = study.best_value
print(best_params, best_value)

Best trial: 194. Best value: 44.2007: 100%|██████████| 500/500 [00:09<00:00, 52.54it/s]

{'a': 0.9747549501503786, 'b': 0.02511768562993632} 44.2007184830608





In [9]:
print(best_params, best_value)

{'a': 0.9747549501503786, 'b': 0.02511768562993632} 44.2007184830608


In [7]:
test_1 = pd.read_csv(CFG.save_dir+"exp005.csv", names=['id', 'pred']).rename(columns={"pred":"pred_1"})
test_2 = pd.read_csv(CFG.save_dir+"exp006.csv", names=['id', 'pred']).rename(columns={"pred":"pred_2"})

test_1["pred"] = test_1["pred_1"] * best_params["a"] + test_2["pred_2"] * best_params["b"]
test_1[["id", "pred"]]

Unnamed: 0,id,pred
0,27532,9354.083043
1,27533,5704.773652
2,27534,5504.203525
3,27535,19135.199123
4,27536,3965.415718
...,...,...
27532,55064,13416.395178
27533,55065,9058.868009
27534,55066,6432.896708
27535,55067,4928.140944


In [8]:
test_1[["id", "pred"]].to_csv(CFG.save_dir+"exp007.csv", index=False, header=None)