# Hyperparameter tuning

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utils import *

In [2]:
df = pd.read_csv('df_with_target_and_features.csv', sep='\t')
df.shape

(7781619, 57)

In [6]:
df.head()

Unnamed: 0,period,product_id,customer_id,tn,target,cat1_FOODS,cat1_HC,cat1_PC,cat1_REF,tn_lag1,...,share_cliente,prod_start,cust_start,prod_age,cust_age,no_sale,no_sale_streak,tn_slope3,tn_slope6,tn_slope12
0,2017-01,20001,10001,99.43861,92.46537,0.0,1.0,0.0,0.0,,...,0.052969,2017-01,2017-01,0,0,0,0,,,
1,2017-02,20001,10001,198.84365,13.29728,0.0,1.0,0.0,0.0,99.43861,...,0.085162,2017-01,2017-01,1,1,0,0,99.40504,99.40504,99.40504
2,2017-03,20001,10001,92.46537,101.00563,0.0,1.0,0.0,0.0,198.84365,...,0.032619,2017-01,2017-01,2,2,0,0,-3.48662,-3.48662,-3.48662
3,2017-04,20001,10001,13.29728,128.04792,0.0,1.0,0.0,0.0,92.46537,...,0.00535,2017-01,2017-01,3,3,0,0,-92.773185,-36.480227,-36.480227
4,2017-05,20001,10001,101.00563,101.20711,0.0,1.0,0.0,0.0,13.29728,...,0.036501,2017-01,2017-01,4,4,0,0,4.27013,-18.241233,-18.241233


In [None]:
import optuna
import lightgbm as lgb
from sklearn.metrics import mean_absolute_error

def objective(trial):
    # Hiperparámetros
    params = {
        "objective": "regression",
        "metric": "mae",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),
        "num_leaves": trial.suggest_int("num_leaves", 20, 150),
        "feature_fraction": trial.suggest_uniform("feature_fraction", 0.6, 1.0),
        "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.6, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "random_state": 42  # fijá la seed si querés reproducibilidad
    }
    
    # Entrenamiento
    dtrain = lgb.Dataset(X_train, label=y_train)
    dvalid = lgb.Dataset(X_test, label=y_test)
    
    model = lgb.train(
        params, 
        dtrain, 
        valid_sets=[dvalid], 
        num_boost_round=2000, 
        early_stopping_rounds=50,
        verbose_eval=False
    )
    
    preds = model.predict(X_test)
    mae = mean_absolute_error(y_test, preds)
    return mae

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=40)
print("Best trial:", study.best_trial.params)
