In [1]:
# ===================================================================
#  Library
# ===================================================================
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import math
import time


from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import mean_absolute_percentage_error
from tqdm.auto import tqdm

import warnings
warnings.simplefilter("ignore")

import unicodedata
import lightgbm as lgb

import optuna

In [2]:
# ===================================================================
#  CFG
# ===================================================================
class CFG:
    seed = 42
    n_trials = 3000
    save_dir = "G:/マイドライブ/signate_StudentCup2023/exp/"
    filename = "exp013"

In [3]:
# ===================================================================
#  Utils
# ===================================================================
def get_score(y_true, y_pred):
    """get MAPE score"""
    score = mean_absolute_percentage_error(y_true, y_pred)
    return score * 100

In [4]:
# ===================================================================
#  DataLoading
# ===================================================================
df_1 = pd.read_csv(CFG.save_dir+"exp011_oof_df.csv").rename(columns={"pred":"pred_1"})
df_2 = pd.read_csv(CFG.save_dir+"exp012_oof_df.csv").rename(columns={"pred":"pred_2"})
df = pd.merge(df_1, df_2[["id", "pred_2"]], on="id", how="left")

In [5]:
# ===================================================================
#  optuna
# ===================================================================
def objective(trial):
    a = trial.suggest_float("a", 0, 1)
    b = trial.suggest_float("b", 0, 1)
    
    df[f"pred"] = df[f"pred_1"] * a + df[f"pred_2"] * b
    score = get_score(y_true=df["price"], y_pred = df[f"pred"])
    return score
    
optuna.logging.set_verbosity(optuna.logging.WARNING)
study = optuna.create_study(direction="minimize",
                            sampler=optuna.samplers.TPESampler(seed=CFG.seed))
study.optimize(objective, 
                n_trials=CFG.n_trials,
                show_progress_bar=True)

best_params = study.best_params
best_value = study.best_value
print(best_params, best_value)

  0%|          | 0/3000 [00:00<?, ?it/s]

{'a': 0.5144978198294123, 'b': 0.4820933622229025} 44.00756302577998


In [6]:
# ===================================================================
#  test
# ===================================================================
test_1 = pd.read_csv(CFG.save_dir+"exp011.csv", names=['id', 'pred']).rename(columns={"pred":"pred_1"})
test_2 = pd.read_csv(CFG.save_dir+"exp012.csv", names=['id', 'pred']).rename(columns={"pred":"pred_2"})

test_1["pred"] = test_1["pred_1"] * best_params["a"] + test_2["pred_2"] * best_params["b"]
test_1[["id", "pred"]].to_csv(CFG.save_dir+f"{CFG.filename}.csv", index=False, header=None)
test_1[["id", "pred"]].head(2)

Unnamed: 0,id,pred
0,27532,9596.409495
1,27533,5417.556117
