In [1]:
#import split data
from sklearn.model_selection import train_test_split
import numpy as np
from scipy.stats import truncnorm
import pandas as pd

In [2]:
import requests
import pandas as pd


# Specify the countries and indicator
countries = "CAN;FIN;ITA;KEN;NOR;SGP"  # ISO codes for Canada, Finland, Italy, Kenya, Norway, Singapore
indicator = "NY.GDP.PCAP.CD"  # GDP per capita (current US$)
years = ["2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022","2023"]

gdp_data = {}

for year in years:
        
    url = f"https://api.worldbank.org/v2/country/{countries}/indicator/{indicator}?format=json&date={year}"
    # Fetch the data
    response = requests.get(url)
    data = response.json()

    # Extract relevant data
    for entry in data[1]:
        country = entry['country']['value']
        gdp_per_capita = entry['value']
        year = entry['date']
        if country not in gdp_data:
            gdp_data[country] = {}
            gdp_data[country][year] = gdp_per_capita
        else:
            if year not in gdp_data[country]:
                gdp_data[country][year] = gdp_per_capita


print(gdp_data)


{'Canada': {'2010': 47560.6666009406, '2011': 52223.8588398531, '2012': 52670.3447335415, '2013': 52638.1187235237, '2014': 50960.8431174661, '2015': 43594.1941045394, '2016': 42314.0615817218, '2017': 45129.628116623, '2018': 46539.1761570405, '2019': 46352.8693445211, '2020': 43537.839298904, '2021': 52496.8441693242, '2022': 55509.393176404, '2023': 53431.1857063879}, 'Finland': {'2010': 46506.2919016566, '2011': 51060.3242589767, '2012': 47551.6740841369, '2013': 49691.0145200739, '2014': 50073.7760815871, '2015': 42560.3456767103, '2016': 43451.2562442158, '2017': 46085.0174739036, '2018': 49654.2497035329, '2019': 48358.1807773701, '2020': 48828.6846862799, '2021': 53099.1351400148, '2022': 50438.4753952355, '2023': 52925.6897638424}, 'Italy': {'2010': 36184.7118698678, '2011': 38851.3881339353, '2012': 35235.7989058242, '2013': 35747.707952689, '2014': 35750.7197500382, '2015': 30387.1293187854, '2016': 31126.3246947273, '2017': 32554.14668453, '2018': 34746.3441392416, '2019': 

In [3]:

from sklearn.pipeline import Pipeline
from sklearn.compose import TransformedTargetRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import TimeSeriesSplit
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
import pandas as pd

def compute_gdp_per_capita(X,gdp_data):
    def get_gdp(row):
        country = str(row["country"])
        year = str(int(row["year"])+2010)
        return gdp_data[country][year]
    X["gdp_per_capita"] = X.apply(get_gdp, axis=1)
    return X

def generate_features(X, train=False):
    X = X.copy()
        
    if train:
        X = X.dropna(subset=["num_sold"])
        X = X.drop(columns=["id"])

    X["date"] = pd.to_datetime(X["date"], format="%Y-%m-%d")
    X["dayofweek"] = X["date"].dt.dayofweek
    X["month"] = X["date"].dt.month - 1
    X["year"] = X["date"].dt.year - 2010
    X["is_weekend"] = X["dayofweek"].isin([5,6])
    X["sin_dayofweek"] = np.sin(X["dayofweek"] * (2 * np.pi / 7))
    X["cos_dayofweek"] = np.cos(X["dayofweek"] * (2 * np.pi / 7))
    X["sin_month"] = np.sin(X["month"] * (2 * np.pi / 12))
    X["cos_month"] = np.cos(X["month"] * (2 * np.pi / 12))
    X["sin_year"] = np.sin(X["year"] * (2 * np.pi / 10))
    X["cos_year"] = np.cos(X["year"] * (2 * np.pi / 10))
    
    X = compute_gdp_per_capita(X,gdp_data)
    
    X = X.drop(columns=["date","month","year","dayofweek"])
        
    return X

FOLDER = "playground-series-s5e1/"
train_data = pd.read_csv(FOLDER + "train.csv")
test_data = pd.read_csv(FOLDER + "test.csv")

print(train_data["country"].unique())
train_data = generate_features(train_data,train=True)
test_data = generate_features(test_data)

print("Train data")
print(train_data.head())
print("Test data")
print(test_data.head())

['Canada' 'Finland' 'Italy' 'Kenya' 'Norway' 'Singapore']
Train data
  country              store             product  num_sold  is_weekend  \
1  Canada  Discount Stickers              Kaggle     973.0       False   
2  Canada  Discount Stickers        Kaggle Tiers     906.0       False   
3  Canada  Discount Stickers            Kerneler     423.0       False   
4  Canada  Discount Stickers  Kerneler Dark Mode     491.0       False   
5  Canada  Stickers for Less   Holographic Goose     300.0       False   

   sin_dayofweek  cos_dayofweek  sin_month  cos_month  sin_year  cos_year  \
1      -0.433884      -0.900969        0.0        1.0       0.0       1.0   
2      -0.433884      -0.900969        0.0        1.0       0.0       1.0   
3      -0.433884      -0.900969        0.0        1.0       0.0       1.0   
4      -0.433884      -0.900969        0.0        1.0       0.0       1.0   
5      -0.433884      -0.900969        0.0        1.0       0.0       1.0   

   gdp_per_capita  
1  

In [16]:
from xgboost import XGBRegressor
import optuna
from sklearn.metrics import mean_squared_error
X = train_data.drop(columns=['num_sold'])
y = train_data['num_sold']

cat_cols = ['country', 'store', 'product']
num_cols = ["sin_dayofweek", "cos_dayofweek", "sin_month", "cos_month", "sin_year", "cos_year","gdp_per_capita"]

# Create different preprocessors for different models
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

# For models that need one-hot encoding (XGBoost, RandomForest)
categorical_transformer_ohe = Pipeline(steps=[
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

# Create different preprocessors
preprocessor_ohe = ColumnTransformer(transformers=[
    ('num', numeric_transformer, num_cols),
    ('cat', categorical_transformer_ohe, cat_cols)
])

# For LightGBM and CatBoost, we'll just scale numerics and pass categoricals as is
preprocessor_native = ColumnTransformer(transformers=[
    ('num', numeric_transformer, num_cols)
])

model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor_ohe),
    ('regressor', XGBRegressor(
        n_estimators=5000,
        learning_rate=0.1,
        max_depth=6,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    ))
])

def objective(trial):
    
    #params max_depth, min_child_weight, gamma, learning_rate
    max_depth = trial.suggest_int("max_depth", 3, 10)
    min_child_weight = trial.suggest_int("min_child_weight", 1, 10)
    gamma = trial.suggest_int("gamma", 0, 10)
    learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
    n_estimators = trial.suggest_int("n_estimators", 100, 5000)
    subsample = trial.suggest_float("subsample", 0.5, 1)
    colsample_bytree = trial.suggest_float("colsample_bytree", 0.5, 1)
    colsample_bylevel = trial.suggest_float("colsample_bylevel", 0.5, 1)
    colsample_bynode = trial.suggest_float("colsample_bynode", 0.5, 1)
    lambda_l1 = trial.suggest_float("lambda_l1", 0, 10)
    lambda_l2 = trial.suggest_float("lambda_l2", 0, 10)
    
    model_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor_ohe),
        ('regressor', XGBRegressor(
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            max_depth=max_depth,
            min_child_weight=min_child_weight,
            gamma=gamma,
            random_state=42,
            objective='reg:squarederror',
            n_jobs=4,
            booster="gbtree",
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            colsample_bylevel=colsample_bylevel,
            colsample_bynode=colsample_bynode,
            reg_alpha=lambda_l1,
            reg_lambda=lambda_l2
        ))
    ])
    #Time-series cross-validation
    time_series = TimeSeriesSplit(n_splits=5)
    fold_scores_val= []
    fold_scores_train= []
    loss_train= []
    loss_val= []
    model_name = "xgboost"
    for fold_index, (train_index, val_index) in enumerate(time_series.split(X)):
        X_train_fold = X.iloc[train_index]
        y_train_fold = y.iloc[train_index]
        X_val_fold = X.iloc[val_index]
        y_val_fold = y.iloc[val_index]
        
        y_train_fold_transformed = np.log1p(y_train_fold)
        y_val_fold_transformed = np.log1p(y_val_fold)
        
        model_pipeline.fit(X_train_fold, y_train_fold_transformed)
        
        y_pred_val = model_pipeline.predict(X_val_fold)    
        y_pred_train = model_pipeline.predict(X_train_fold)
        
        y_pred_val_transformed = np.expm1(y_pred_val)
        y_pred_train_transformed = np.expm1(y_pred_train)
        
        mape_val = mean_absolute_percentage_error(y_val_fold, y_pred_val_transformed)
        mape_train = mean_absolute_percentage_error(y_train_fold, y_pred_train_transformed)
        
        mse_train = mean_squared_error(y_train_fold, y_pred_train_transformed)
        mse_val = mean_squared_error(y_val_fold, y_pred_val_transformed)
        
        fold_scores_val.append(mape_val)
        fold_scores_train.append(mape_train)
        loss_train.append(mse_train)
        loss_val.append(mse_val)
        
        # print(f"Fold {fold_index + 1} loss train: {mse_train:.4f}")
        # print(f"Fold {fold_index + 1} loss val: {mse_val:.4f}")
        # print("--------------------------------")
        # print(f"Fold {fold_index + 1} metric val: {mape_val:.4f}")
        # print(f"Fold {fold_index + 1} metric train: {mape_train:.4f}")
        # print("--------------------------------")
        
    print(f"Average MAPE for {model_name}: {np.mean(fold_scores_val):.4f}")
    print(f"Average MAPE for {model_name} train: {np.mean(fold_scores_train):.4f}")
    # print(f"Average MSE for {model_name} train: {np.mean(loss_train):.4f}")
    # print(f"Average MSE for {model_name} val: {np.mean(loss_val):.4f}")
    
    return np.mean(fold_scores_val)

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=40)
print(f"Best parameters: {study.best_params}")
print(f"Best value: {study.best_value:.4f}")





[I 2025-01-16 15:34:44,602] A new study created in memory with name: no-name-56b05ca6-82ad-44c6-9b6e-cea28db6f337


[I 2025-01-16 15:34:58,736] Trial 0 finished with value: 0.12966753406813186 and parameters: {'max_depth': 7, 'min_child_weight': 4, 'gamma': 10, 'learning_rate': 0.055655589237251474, 'n_estimators': 2853, 'subsample': 0.8568177969240526, 'colsample_bytree': 0.5883164702578396, 'colsample_bylevel': 0.543443936431018, 'colsample_bynode': 0.7188464044274396, 'lambda_l1': 8.524350384737788, 'lambda_l2': 9.8826325063152}. Best is trial 0 with value: 0.12966753406813186.


Average MAPE for xgboost: 0.1297
Average MAPE for xgboost train: 0.0956


[I 2025-01-16 15:35:08,394] Trial 1 finished with value: 0.11371241991516697 and parameters: {'max_depth': 6, 'min_child_weight': 2, 'gamma': 2, 'learning_rate': 0.2246715285707862, 'n_estimators': 2294, 'subsample': 0.9163315042373482, 'colsample_bytree': 0.811053958551528, 'colsample_bylevel': 0.981254357834842, 'colsample_bynode': 0.5914905563231887, 'lambda_l1': 7.777153516447762, 'lambda_l2': 3.300290653721125}. Best is trial 1 with value: 0.11371241991516697.


Average MAPE for xgboost: 0.1137
Average MAPE for xgboost train: 0.0736


[I 2025-01-16 15:35:14,161] Trial 2 finished with value: 0.14931813283992143 and parameters: {'max_depth': 6, 'min_child_weight': 6, 'gamma': 9, 'learning_rate': 0.2892311743386916, 'n_estimators': 1107, 'subsample': 0.6515392007129133, 'colsample_bytree': 0.5547684489953875, 'colsample_bylevel': 0.5904742479332403, 'colsample_bynode': 0.7071713866484965, 'lambda_l1': 7.648246119709156, 'lambda_l2': 6.152721372085178}. Best is trial 1 with value: 0.11371241991516697.


Average MAPE for xgboost: 0.1493
Average MAPE for xgboost train: 0.1045


[I 2025-01-16 15:35:30,838] Trial 3 finished with value: 0.12719946221743558 and parameters: {'max_depth': 6, 'min_child_weight': 8, 'gamma': 8, 'learning_rate': 0.11198859782365964, 'n_estimators': 4205, 'subsample': 0.9360323843766796, 'colsample_bytree': 0.8457068146695095, 'colsample_bylevel': 0.7923932850975399, 'colsample_bynode': 0.9560479755910459, 'lambda_l1': 6.1905052870244965, 'lambda_l2': 5.793690087184569}. Best is trial 1 with value: 0.11371241991516697.


Average MAPE for xgboost: 0.1272
Average MAPE for xgboost train: 0.0852


[I 2025-01-16 15:35:35,755] Trial 4 finished with value: 0.12293572798922717 and parameters: {'max_depth': 7, 'min_child_weight': 7, 'gamma': 6, 'learning_rate': 0.07296924650225366, 'n_estimators': 708, 'subsample': 0.6985632398123833, 'colsample_bytree': 0.843447383664423, 'colsample_bylevel': 0.5805172729887159, 'colsample_bynode': 0.8931715576399897, 'lambda_l1': 9.535161232841666, 'lambda_l2': 4.5429578241883615}. Best is trial 1 with value: 0.11371241991516697.


Average MAPE for xgboost: 0.1229
Average MAPE for xgboost train: 0.0871


[I 2025-01-16 15:35:55,559] Trial 5 finished with value: 0.11399267377374582 and parameters: {'max_depth': 6, 'min_child_weight': 10, 'gamma': 2, 'learning_rate': 0.07511166752946155, 'n_estimators': 4657, 'subsample': 0.849457968147402, 'colsample_bytree': 0.8931837766069685, 'colsample_bylevel': 0.5701251536229224, 'colsample_bynode': 0.9213382743442453, 'lambda_l1': 5.395379703786225, 'lambda_l2': 1.79755644850763}. Best is trial 1 with value: 0.11371241991516697.


Average MAPE for xgboost: 0.1140
Average MAPE for xgboost train: 0.0743


[I 2025-01-16 15:36:05,265] Trial 6 finished with value: 0.11946757262625796 and parameters: {'max_depth': 5, 'min_child_weight': 2, 'gamma': 1, 'learning_rate': 0.15863521120567353, 'n_estimators': 2168, 'subsample': 0.863090503233521, 'colsample_bytree': 0.5613112103187976, 'colsample_bylevel': 0.5091074108885643, 'colsample_bynode': 0.8605952836725871, 'lambda_l1': 3.6386203420105656, 'lambda_l2': 3.4961733574532525}. Best is trial 1 with value: 0.11371241991516697.


Average MAPE for xgboost: 0.1195
Average MAPE for xgboost train: 0.0755


[I 2025-01-16 15:36:24,045] Trial 7 finished with value: 0.12927461111468627 and parameters: {'max_depth': 8, 'min_child_weight': 1, 'gamma': 10, 'learning_rate': 0.06154556047272338, 'n_estimators': 4108, 'subsample': 0.7664159107075491, 'colsample_bytree': 0.6344371839274167, 'colsample_bylevel': 0.8869814915821235, 'colsample_bynode': 0.5088550297866954, 'lambda_l1': 8.224583223938948, 'lambda_l2': 6.916361673996776}. Best is trial 1 with value: 0.11371241991516697.


Average MAPE for xgboost: 0.1293
Average MAPE for xgboost train: 0.0926


[I 2025-01-16 15:36:43,869] Trial 8 finished with value: 0.12130956311236112 and parameters: {'max_depth': 10, 'min_child_weight': 10, 'gamma': 10, 'learning_rate': 0.03556057452902116, 'n_estimators': 4627, 'subsample': 0.9441024258068549, 'colsample_bytree': 0.9248056873324935, 'colsample_bylevel': 0.7570848137869897, 'colsample_bynode': 0.5512706870989168, 'lambda_l1': 8.699068786710157, 'lambda_l2': 0.8299298196091087}. Best is trial 1 with value: 0.11371241991516697.


Average MAPE for xgboost: 0.1213
Average MAPE for xgboost train: 0.0858


[I 2025-01-16 15:36:52,101] Trial 9 finished with value: 0.11377232819288138 and parameters: {'max_depth': 7, 'min_child_weight': 10, 'gamma': 2, 'learning_rate': 0.12525641912059576, 'n_estimators': 1484, 'subsample': 0.5527358776003355, 'colsample_bytree': 0.7653307959355053, 'colsample_bylevel': 0.8113649109935525, 'colsample_bynode': 0.9647795339542788, 'lambda_l1': 6.5574488877316375, 'lambda_l2': 9.199889420480988}. Best is trial 1 with value: 0.11371241991516697.


Average MAPE for xgboost: 0.1138
Average MAPE for xgboost train: 0.0742


[I 2025-01-16 15:37:04,008] Trial 10 finished with value: 0.1344096098247976 and parameters: {'max_depth': 3, 'min_child_weight': 3, 'gamma': 4, 'learning_rate': 0.2270848855391619, 'n_estimators': 3069, 'subsample': 0.9725160418331502, 'colsample_bytree': 0.7003307868733896, 'colsample_bylevel': 0.9641631556975894, 'colsample_bynode': 0.6134604600867174, 'lambda_l1': 0.3302273601351562, 'lambda_l2': 2.7340950807307554}. Best is trial 1 with value: 0.11371241991516697.


Average MAPE for xgboost: 0.1344
Average MAPE for xgboost train: 0.0942


[I 2025-01-16 15:37:36,723] Trial 11 finished with value: 0.1105268561223163 and parameters: {'max_depth': 9, 'min_child_weight': 4, 'gamma': 0, 'learning_rate': 0.18291598323159047, 'n_estimators': 1545, 'subsample': 0.5427540395735444, 'colsample_bytree': 0.7635121936519796, 'colsample_bylevel': 0.9609019094186486, 'colsample_bynode': 0.8018623839887371, 'lambda_l1': 3.3675771054050943, 'lambda_l2': 9.016040243935315}. Best is trial 11 with value: 0.1105268561223163.


Average MAPE for xgboost: 0.1105
Average MAPE for xgboost train: 0.0504


[I 2025-01-16 15:37:43,279] Trial 12 finished with value: 0.10300128663070598 and parameters: {'max_depth': 10, 'min_child_weight': 4, 'gamma': 0, 'learning_rate': 0.2128936324487472, 'n_estimators': 224, 'subsample': 0.5081255105157426, 'colsample_bytree': 0.990706481774542, 'colsample_bylevel': 0.9912603454496889, 'colsample_bynode': 0.8101448425444391, 'lambda_l1': 3.488710588382519, 'lambda_l2': 8.376547524198598}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1030
Average MAPE for xgboost train: 0.0512


[I 2025-01-16 15:37:50,845] Trial 13 finished with value: 0.10645143990286798 and parameters: {'max_depth': 10, 'min_child_weight': 5, 'gamma': 0, 'learning_rate': 0.19964368365939592, 'n_estimators': 261, 'subsample': 0.5136184496059514, 'colsample_bytree': 0.9661239503757648, 'colsample_bylevel': 0.8933846976115403, 'colsample_bynode': 0.7989826557738251, 'lambda_l1': 3.196009662517498, 'lambda_l2': 8.083857142662966}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1065
Average MAPE for xgboost train: 0.0510


[I 2025-01-16 15:37:53,422] Trial 14 finished with value: 0.11731006249726864 and parameters: {'max_depth': 10, 'min_child_weight': 5, 'gamma': 4, 'learning_rate': 0.22298454721636038, 'n_estimators': 358, 'subsample': 0.5042940833995991, 'colsample_bytree': 0.9976866867594517, 'colsample_bylevel': 0.8767185098796213, 'colsample_bynode': 0.784874786471991, 'lambda_l1': 1.6309786137091455, 'lambda_l2': 7.688756766546912}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1173
Average MAPE for xgboost train: 0.0759


[I 2025-01-16 15:37:59,728] Trial 15 finished with value: 0.11032567652997127 and parameters: {'max_depth': 9, 'min_child_weight': 6, 'gamma': 0, 'learning_rate': 0.28874004931097147, 'n_estimators': 212, 'subsample': 0.6169097587650452, 'colsample_bytree': 0.9948545238201754, 'colsample_bylevel': 0.6741964892640063, 'colsample_bynode': 0.8299192290621293, 'lambda_l1': 3.0385749795015653, 'lambda_l2': 7.910338517777819}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1103
Average MAPE for xgboost train: 0.0514


[I 2025-01-16 15:38:04,729] Trial 16 finished with value: 0.11632401577654952 and parameters: {'max_depth': 9, 'min_child_weight': 8, 'gamma': 6, 'learning_rate': 0.1910207083907172, 'n_estimators': 864, 'subsample': 0.601637247414281, 'colsample_bytree': 0.9437306089604708, 'colsample_bylevel': 0.8910856705532754, 'colsample_bynode': 0.683989707301172, 'lambda_l1': 2.1311065170034884, 'lambda_l2': 8.32047944010365}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1163
Average MAPE for xgboost train: 0.0798


[I 2025-01-16 15:38:06,279] Trial 17 finished with value: 0.11169111041376471 and parameters: {'max_depth': 10, 'min_child_weight': 4, 'gamma': 3, 'learning_rate': 0.2576789555244833, 'n_estimators': 115, 'subsample': 0.7377466012680778, 'colsample_bytree': 0.9483020342697068, 'colsample_bylevel': 0.915116659930929, 'colsample_bynode': 0.7511664805810694, 'lambda_l1': 4.780765308569379, 'lambda_l2': 4.848467804914678}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1117
Average MAPE for xgboost train: 0.0708


[I 2025-01-16 15:38:51,391] Trial 18 finished with value: 0.11044024189651555 and parameters: {'max_depth': 8, 'min_child_weight': 5, 'gamma': 0, 'learning_rate': 0.1869303522539992, 'n_estimators': 1727, 'subsample': 0.5230322361087437, 'colsample_bytree': 0.8830276142288398, 'colsample_bylevel': 0.844466993828429, 'colsample_bynode': 0.6639123862597409, 'lambda_l1': 0.04826991192850727, 'lambda_l2': 6.917545134162272}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1104
Average MAPE for xgboost train: 0.0476


[I 2025-01-16 15:38:56,177] Trial 19 finished with value: 0.11492351613404898 and parameters: {'max_depth': 4, 'min_child_weight': 3, 'gamma': 1, 'learning_rate': 0.12979413727461894, 'n_estimators': 720, 'subsample': 0.5819273582557898, 'colsample_bytree': 0.6992376270136432, 'colsample_bylevel': 0.7165355278554152, 'colsample_bynode': 0.8517161587171613, 'lambda_l1': 4.3150182344169465, 'lambda_l2': 7.029487340894897}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1149
Average MAPE for xgboost train: 0.0749


[I 2025-01-16 15:39:11,592] Trial 20 finished with value: 0.11326328018382406 and parameters: {'max_depth': 8, 'min_child_weight': 7, 'gamma': 5, 'learning_rate': 0.25718868465578887, 'n_estimators': 3472, 'subsample': 0.6817559220660249, 'colsample_bytree': 0.9620807683536184, 'colsample_bylevel': 0.9950117242597503, 'colsample_bynode': 0.7640249145328349, 'lambda_l1': 1.8473840158493433, 'lambda_l2': 9.663681383318885}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1133
Average MAPE for xgboost train: 0.0747


[I 2025-01-16 15:39:14,789] Trial 21 finished with value: 0.10992121508946959 and parameters: {'max_depth': 9, 'min_child_weight': 5, 'gamma': 0, 'learning_rate': 0.2899457327023958, 'n_estimators': 100, 'subsample': 0.6197410570535519, 'colsample_bytree': 0.9964476456614813, 'colsample_bylevel': 0.6753563127276148, 'colsample_bynode': 0.830183175083127, 'lambda_l1': 2.8859257770779, 'lambda_l2': 8.172748131904207}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1099
Average MAPE for xgboost train: 0.0518


[I 2025-01-16 15:39:21,094] Trial 22 finished with value: 0.1140191991134027 and parameters: {'max_depth': 10, 'min_child_weight': 5, 'gamma': 1, 'learning_rate': 0.25416096393068743, 'n_estimators': 1124, 'subsample': 0.5670421710916754, 'colsample_bytree': 0.999339482149956, 'colsample_bylevel': 0.6502728960428292, 'colsample_bynode': 0.818852130885817, 'lambda_l1': 2.594963675231706, 'lambda_l2': 8.675089527223827}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1140
Average MAPE for xgboost train: 0.0658


[I 2025-01-16 15:39:32,908] Trial 23 finished with value: 0.10542707234161756 and parameters: {'max_depth': 9, 'min_child_weight': 3, 'gamma': 0, 'learning_rate': 0.15989011238937767, 'n_estimators': 472, 'subsample': 0.5022843307499842, 'colsample_bytree': 0.9012578186049708, 'colsample_bylevel': 0.9335976836210718, 'colsample_bynode': 0.8894626569864569, 'lambda_l1': 4.047217282196868, 'lambda_l2': 5.898828493748634}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1054
Average MAPE for xgboost train: 0.0511


[I 2025-01-16 15:39:36,707] Trial 24 finished with value: 0.11306115590915786 and parameters: {'max_depth': 10, 'min_child_weight': 3, 'gamma': 3, 'learning_rate': 0.16585331876583503, 'n_estimators': 610, 'subsample': 0.5050550809517487, 'colsample_bytree': 0.8995474271663773, 'colsample_bylevel': 0.9289232787257918, 'colsample_bynode': 0.9008453762306695, 'lambda_l1': 4.0870156131404975, 'lambda_l2': 5.764362730490383}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1131
Average MAPE for xgboost train: 0.0731


[I 2025-01-16 15:39:43,559] Trial 25 finished with value: 0.10924896553575773 and parameters: {'max_depth': 9, 'min_child_weight': 1, 'gamma': 1, 'learning_rate': 0.1994695112002339, 'n_estimators': 1260, 'subsample': 0.5462895226794717, 'colsample_bytree': 0.8516444965029804, 'colsample_bylevel': 0.9309098580216637, 'colsample_bynode': 0.9905124726579322, 'lambda_l1': 1.1603341569086005, 'lambda_l2': 7.327801521973223}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1092
Average MAPE for xgboost train: 0.0629


[I 2025-01-16 15:39:52,727] Trial 26 finished with value: 0.11291145393183843 and parameters: {'max_depth': 8, 'min_child_weight': 2, 'gamma': 3, 'learning_rate': 0.13800054967419792, 'n_estimators': 1878, 'subsample': 0.6408510370922579, 'colsample_bytree': 0.9217500274962608, 'colsample_bylevel': 0.9994370067553804, 'colsample_bynode': 0.9287530248431305, 'lambda_l1': 5.5603247689688855, 'lambda_l2': 6.155971205837159}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1129
Average MAPE for xgboost train: 0.0706


[I 2025-01-16 15:40:05,405] Trial 27 finished with value: 0.1112954779859558 and parameters: {'max_depth': 10, 'min_child_weight': 4, 'gamma': 0, 'learning_rate': 0.20609668142913032, 'n_estimators': 535, 'subsample': 0.5794383464242614, 'colsample_bytree': 0.799899696890003, 'colsample_bylevel': 0.8312201849797303, 'colsample_bynode': 0.8750578047390535, 'lambda_l1': 3.9816874350707163, 'lambda_l2': 5.26230478806367}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1113
Average MAPE for xgboost train: 0.0511


[I 2025-01-16 15:40:11,634] Trial 28 finished with value: 0.11155231221492037 and parameters: {'max_depth': 9, 'min_child_weight': 3, 'gamma': 2, 'learning_rate': 0.10033933432783518, 'n_estimators': 943, 'subsample': 0.5029598058672277, 'colsample_bytree': 0.9595566120708026, 'colsample_bylevel': 0.8604186104665624, 'colsample_bynode': 0.7869072369759064, 'lambda_l1': 4.7587628679369764, 'lambda_l2': 4.468696642865681}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1116
Average MAPE for xgboost train: 0.0691


[I 2025-01-16 15:40:22,947] Trial 29 finished with value: 0.11172537775898653 and parameters: {'max_depth': 10, 'min_child_weight': 6, 'gamma': 1, 'learning_rate': 0.1622740779193307, 'n_estimators': 2557, 'subsample': 0.7927179974062881, 'colsample_bytree': 0.8789399840855109, 'colsample_bylevel': 0.9510635565798929, 'colsample_bynode': 0.7247554653427666, 'lambda_l1': 1.0272061755149071, 'lambda_l2': 9.479653471202912}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1117
Average MAPE for xgboost train: 0.0618


[I 2025-01-16 15:40:33,534] Trial 30 finished with value: 0.1083526740143714 and parameters: {'max_depth': 8, 'min_child_weight': 4, 'gamma': 0, 'learning_rate': 0.24059534076860187, 'n_estimators': 522, 'subsample': 0.6750497210374501, 'colsample_bytree': 0.9163079537272051, 'colsample_bylevel': 0.9089597711381265, 'colsample_bynode': 0.7296922518618365, 'lambda_l1': 6.610853454077167, 'lambda_l2': 6.6676638151494725}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1084
Average MAPE for xgboost train: 0.0517


[I 2025-01-16 15:40:42,233] Trial 31 finished with value: 0.10794380656682123 and parameters: {'max_depth': 8, 'min_child_weight': 4, 'gamma': 0, 'learning_rate': 0.24554548778905794, 'n_estimators': 431, 'subsample': 0.5457714178434185, 'colsample_bytree': 0.9221574643446165, 'colsample_bylevel': 0.9250776250414705, 'colsample_bynode': 0.7365445712527339, 'lambda_l1': 6.651854874613989, 'lambda_l2': 6.569519045807253}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1079
Average MAPE for xgboost train: 0.0521


[I 2025-01-16 15:40:45,300] Trial 32 finished with value: 0.10919288783534079 and parameters: {'max_depth': 9, 'min_child_weight': 3, 'gamma': 1, 'learning_rate': 0.21969326813801907, 'n_estimators': 401, 'subsample': 0.5347706595291375, 'colsample_bytree': 0.9562471566204208, 'colsample_bylevel': 0.9458428481870053, 'colsample_bynode': 0.7766398635245475, 'lambda_l1': 5.818148048338141, 'lambda_l2': 7.592786075304799}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1092
Average MAPE for xgboost train: 0.0649


[I 2025-01-16 15:40:52,088] Trial 33 finished with value: 0.1203637559368326 and parameters: {'max_depth': 7, 'min_child_weight': 4, 'gamma': 2, 'learning_rate': 0.17796717754049898, 'n_estimators': 1206, 'subsample': 0.5917765873231667, 'colsample_bytree': 0.8054931571215349, 'colsample_bylevel': 0.9649075866989179, 'colsample_bynode': 0.65454572221869, 'lambda_l1': 7.430074275725913, 'lambda_l2': 8.513740255873374}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1204
Average MAPE for xgboost train: 0.0753


[I 2025-01-16 15:41:09,153] Trial 34 finished with value: 0.10764758693766904 and parameters: {'max_depth': 9, 'min_child_weight': 2, 'gamma': 0, 'learning_rate': 0.20652878782038936, 'n_estimators': 944, 'subsample': 0.5304356666852938, 'colsample_bytree': 0.9728993130749427, 'colsample_bylevel': 0.7809927129057488, 'colsample_bynode': 0.696405802707972, 'lambda_l1': 6.982570765662273, 'lambda_l2': 6.156143853985516}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1076
Average MAPE for xgboost train: 0.0519


[I 2025-01-16 15:41:14,041] Trial 35 finished with value: 0.12787927806875118 and parameters: {'max_depth': 10, 'min_child_weight': 2, 'gamma': 8, 'learning_rate': 0.206903750499997, 'n_estimators': 904, 'subsample': 0.5179817477265184, 'colsample_bytree': 0.9718001579694096, 'colsample_bylevel': 0.7786441027602815, 'colsample_bynode': 0.691088773838828, 'lambda_l1': 2.5868377042847572, 'lambda_l2': 9.961213509639094}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1279
Average MAPE for xgboost train: 0.0869


[I 2025-01-16 15:41:21,938] Trial 36 finished with value: 0.11081995656043961 and parameters: {'max_depth': 9, 'min_child_weight': 1, 'gamma': 1, 'learning_rate': 0.14944326150990084, 'n_estimators': 1433, 'subsample': 0.6460995713388713, 'colsample_bytree': 0.8356188150949584, 'colsample_bylevel': 0.7262157185979694, 'colsample_bynode': 0.6146532086994061, 'lambda_l1': 4.893711200049831, 'lambda_l2': 5.694916031318721}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1108
Average MAPE for xgboost train: 0.0695


[I 2025-01-16 15:41:26,525] Trial 37 finished with value: 0.1290708278107352 and parameters: {'max_depth': 5, 'min_child_weight': 2, 'gamma': 2, 'learning_rate': 0.2716926700398381, 'n_estimators': 805, 'subsample': 0.5678213052766481, 'colsample_bytree': 0.8746409174411938, 'colsample_bylevel': 0.82594135712617, 'colsample_bynode': 0.859223214955933, 'lambda_l1': 9.54269837452801, 'lambda_l2': 3.6961437530242254}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1291
Average MAPE for xgboost train: 0.0823


[I 2025-01-16 15:41:35,874] Trial 38 finished with value: 0.10529069810993752 and parameters: {'max_depth': 10, 'min_child_weight': 7, 'gamma': 0, 'learning_rate': 0.1723454705981434, 'n_estimators': 344, 'subsample': 0.522412224004461, 'colsample_bytree': 0.9746320504584155, 'colsample_bylevel': 0.8546192633039899, 'colsample_bynode': 0.8872046086818155, 'lambda_l1': 3.5565692200592296, 'lambda_l2': 4.176062611764001}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1053
Average MAPE for xgboost train: 0.0509


[I 2025-01-16 15:41:46,756] Trial 39 finished with value: 0.10926719577523263 and parameters: {'max_depth': 10, 'min_child_weight': 7, 'gamma': 1, 'learning_rate': 0.0974372897445942, 'n_estimators': 2020, 'subsample': 0.8501838251580903, 'colsample_bytree': 0.5272822420002222, 'colsample_bylevel': 0.8650642649610389, 'colsample_bynode': 0.9134890826338979, 'lambda_l1': 3.737953582406535, 'lambda_l2': 3.9510288575173913}. Best is trial 12 with value: 0.10300128663070598.


Average MAPE for xgboost: 0.1093
Average MAPE for xgboost train: 0.0628
Best parameters: {'max_depth': 10, 'min_child_weight': 4, 'gamma': 0, 'learning_rate': 0.2128936324487472, 'n_estimators': 224, 'subsample': 0.5081255105157426, 'colsample_bytree': 0.990706481774542, 'colsample_bylevel': 0.9912603454496889, 'colsample_bynode': 0.8101448425444391, 'lambda_l1': 3.488710588382519, 'lambda_l2': 8.376547524198598}
Best value: 0.1030
