## 1. Import Libraries and Setup

In [3]:
dict = {"A": 1, "B": 2, "C": 3, "D": 4, "E": 5, "F": 6, "G": 7, "H": 8, "I": 9, "J": 10,}
list(dict.keys())

['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']

In [6]:
# Core libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta
import warnings
warnings.filterwarnings('ignore')

# Machine Learning libraries
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV,RandomizedSearchCV
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, make_scorer
import lightgbm as lgb
import joblib

# Optuna for hyperparameter optimization
import optuna
from optuna.samplers import TPESampler

# Define custom RMSLE function to handle zero values
def rmsle_score(y_true, y_pred):
    """Calculate RMSLE with handling for zero and negative values"""
    # Add small epsilon to avoid log(0) and ensure positive values
    epsilon = 1e-15
    y_true_log = np.log1p(np.maximum(y_true, epsilon))
    y_pred_log = np.log1p(np.maximum(y_pred, epsilon))
    return np.sqrt(mean_squared_error(y_true_log, y_pred_log))

# Create scorer for GridSearchCV
rmsle_scorer = make_scorer(rmsle_score, greater_is_better=False)

# Set random seed for reproducibility
np.random.seed(42)

# Visualization settings
plt.style.use('default')
sns.set_palette('husl')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

## 2. Data Loading and Initial Preprocessing

In [7]:
train_df = pd.read_csv('../data/interim/traditional_final_train.csv', parse_dates=['date'])
test_df = pd.read_csv('../data/interim/traditional_final_test.csv', parse_dates=['date'])

In [8]:
# print categorial columns
print("Categorical columns in the dataset:")
for col in train_df.select_dtypes(include=['object']).columns:
    print(f"- {col}")
    
# Encode categorical features
categorical_cols = train_df.select_dtypes(include=['object']).columns
encoders = {}
for col in categorical_cols:
    encoder = OrdinalEncoder()
    train_df[col+"_encoded"] = encoder.fit_transform(train_df[[col]])
    encoders[col] = encoder

Categorical columns in the dataset:
- family
- city
- state
- type


In [9]:
for col in categorical_cols:
    test_df[col+"_encoded"] = encoders[col].transform(test_df[[col]])

In [10]:
train_df.columns

Index(['id', 'date', 'store_nbr', 'family', 'sales', 'onpromotion', 'year',
       'month', 'day', 'dayofweek', 'weekofyear', 'day_of_year', 'is_weekend',
       'is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end',
       'is_payday', 'days_since_payday', 'days_until_payday',
       'sales_rolling_mean_7', 'sales_rolling_std_7', 'sales_rolling_max_7',
       'sales_rolling_min_7', 'sales_rolling_mean_14', 'sales_rolling_std_14',
       'sales_rolling_max_14', 'sales_rolling_min_14', 'sales_rolling_mean_30',
       'sales_rolling_std_30', 'sales_rolling_max_30', 'sales_rolling_min_30',
       'sales_lag_1', 'sales_lag_7', 'sales_lag_14', 'sales_lag_30',
       'is_national_holiday', 'is_regional_holiday', 'is_local_holiday',
       'is_additional_holiday', 'is_working_day', 'is_event', 'is_bridge_day',
       'is_transferred_day', 'dcoilwtico', 'city', 'state', 'type', 'cluster',
       'transactions', 'family_encoded', 'city_encoded', 'state_encoded',
       'type_en

In [11]:
feature_cols = ['date', 'store_nbr',"family",'onpromotion', 'year',
       'month', 'day', 'dayofweek', 'weekofyear', 'day_of_year', 'is_weekend',
       'is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end',
       'is_payday', 'days_since_payday', 'days_until_payday',
       'is_national_holiday', 'is_regional_holiday', 'is_local_holiday',
       'is_additional_holiday', 'is_working_day', 'is_event', 'is_bridge_day',
       'is_transferred_day', 'dcoilwtico', 'cluster',
       'transactions', 'family_encoded', 'city_encoded', 'state_encoded',
       'type_encoded','sales_rolling_mean_7', 'sales_rolling_std_7', 'sales_rolling_max_7',
       'sales_rolling_min_7', 'sales_rolling_mean_14', 'sales_rolling_std_14',
       'sales_rolling_max_14', 'sales_rolling_min_14', 'sales_rolling_mean_30',
       'sales_rolling_std_30', 'sales_rolling_max_30', 'sales_rolling_min_30',
       'sales_lag_1', 'sales_lag_7', 'sales_lag_14', 'sales_lag_30',
]

In [12]:
train = train_df[feature_cols]
test = test_df[feature_cols]

## 3. Hyperparameter Tuning Setup

In [13]:
# Define hyperparameter grid for LightGBM
param_grid = {
    'n_estimators': [500, 1000, 1500],
    'learning_rate': [0.01, 0.05, 0.1],
    'num_leaves': [31, 50, 100],
    'max_depth': [-1, 10, 20],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.8, 0.9, 1.0],
    'reg_alpha': [0, 0.1, 0.5],
    'reg_lambda': [0, 0.1, 0.5]
}

# For faster tuning, use a smaller parameter grid
# You can expand this for more thorough tuning
param_grid_small = {
    'n_estimators': [500, 1000],
    'learning_rate': [0.01, 0.05],
    'num_leaves': [31, 50],
    'max_depth': [-1, 10],
    'subsample': [0.8, 0.9],
    'colsample_bytree': [0.8, 0.9],
    'reg_alpha': [0, 0.1],
    'reg_lambda': [0, 0.1]
}

print("Hyperparameter grid defined for LightGBM tuning")
print(f"Total combinations in small grid: {np.prod([len(v) for v in param_grid_small.values()])}")

Hyperparameter grid defined for LightGBM tuning
Total combinations in small grid: 256


In [14]:
# Function to perform hyperparameter tuning for a specific family
def tune_hyperparameters(X_train, y_train, family_name, param_grid=param_grid_small, cv_folds=3):
    """
    Perform hyperparameter tuning for LightGBM model
    """
    print(f"\n=== Tuning hyperparameters for {family_name} ===")
    
    # Create base model
    lgb_model = lgb.LGBMRegressor(
        objective='regression',
        metric='rmse',
        random_state=42,
        force_row_wise=True,
        verbose=-1  # Reduce verbosity
    )
    
    # Create time series split for cross-validation
    tscv = TimeSeriesSplit(n_splits=cv_folds)
    
    # Perform grid search
    grid_search = GridSearchCV(
        estimator=lgb_model,
        param_grid=param_grid,
        scoring=rmsle_scorer,
        cv=tscv,
        n_jobs=-1,
        verbose=1
    )
    
    # Fit grid search
    print(f"Starting grid search with {np.prod([len(v) for v in param_grid.values()])} parameter combinations...")
    grid_search.fit(X_train, y_train)
    
    # Print results
    print(f"Best RMSLE score: {-grid_search.best_score_:.4f}")
    print(f"Best parameters: {grid_search.best_params_}")
    
    return grid_search.best_estimator_, grid_search.best_params_, -grid_search.best_score_

In [15]:
# Function to perform hyperparameter tuning for a specific family
def Randomized_tune_hyperparameters(X_train, y_train, family_name, param_grid=param_grid_small, cv_folds=3):
    """
    Perform hyperparameter tuning for LightGBM model using RandomizedSearchCV
    """
    print(f"\n=== Randomized Tuning hyperparameters for {family_name} ===")
    
    # Create base model
    lgb_model = lgb.LGBMRegressor(
        objective='regression',
        metric='rmse',
        random_state=42,
        force_row_wise=True,
        verbose=-1  # Reduce verbosity
    )
    
    # Create time series split for cross-validation
    tscv = TimeSeriesSplit(n_splits=cv_folds)
    
    # Perform randomized search
    random_search = RandomizedSearchCV(
        estimator=lgb_model,
        param_distributions=param_grid,
        scoring=rmsle_scorer,
        cv=tscv,
        n_iter=30,  # Number of iterations for randomized search
        n_jobs=-1,
        verbose=1,
        random_state=42
    )
    
    # Fit randomized search
    print(f"Starting randomized search with {len(param_grid)} parameter combinations...")
    random_search.fit(X_train, y_train)
    
    # Print results
    print(f"Best RMSLE score: {-random_search.best_score_:.4f}")
    print(f"Best parameters: {random_search.best_params_}")
    
    return random_search.best_estimator_, random_search.best_params_, -random_search.best_score_

In [20]:
def optuna_tune_hyperparameters(X_train, y_train, family_name, param_grid=param_grid_small, cv_folds=3):
    """
    Perform hyperparameter tuning for LightGBM model using Optuna
    """
    print(f"\n=== Optuna Tuning hyperparameters for {family_name} ===")
    
    # Define objective function for Optuna
    def objective(trial):
        params = {
            'n_estimators': trial.suggest_categorical('n_estimators', param_grid['n_estimators']),
            'learning_rate': trial.suggest_categorical('learning_rate', param_grid['learning_rate']),
            'num_leaves': trial.suggest_categorical('num_leaves', param_grid['num_leaves']),
            'max_depth': trial.suggest_categorical('max_depth', param_grid['max_depth']),
            'subsample': trial.suggest_categorical('subsample', param_grid['subsample']),
            'colsample_bytree': trial.suggest_categorical('colsample_bytree', param_grid['colsample_bytree']),
            'reg_alpha': trial.suggest_categorical('reg_alpha', param_grid['reg_alpha']),
            'reg_lambda': trial.suggest_categorical('reg_lambda', param_grid['reg_lambda'])
        }
        
        lgb_model = lgb.LGBMRegressor(
            **params,
            objective='regression',
            metric='rmse',
            random_state=42,
            force_row_wise=True,
            verbose=-1
        )
        
        tscv = TimeSeriesSplit(n_splits=cv_folds)
        scores = []
        
        for train_index, val_index in tscv.split(X_train):
            X_tr, X_val = X_train.iloc[train_index], X_train.iloc[val_index]
            y_tr, y_val = y_train.iloc[train_index], y_train.iloc[val_index]
            
            lgb_model.fit(X_tr, y_tr, eval_set=[(X_val, y_val)])
            y_pred = lgb_model.predict(X_val)
            score = rmsle_score(y_val, y_pred)
            scores.append(score)
        
        return np.mean(scores)
    
    # Create Optuna study
    study = optuna.create_study(direction='minimize', sampler=TPESampler(seed=42))
    
    # Optimize the study
    study.optimize(objective, n_trials=50)  # Number of trials can be adjusted
    
    print(f"Best RMSLE score: {study.best_value:.4f}")
    print(f"Best parameters: {study.best_params}")
    
    # Create final model with best parameters
    best_params = study.best_params
    best_model = lgb.LGBMRegressor(
        **best_params,
        objective='regression',
        metric='rmse',
        random_state=42,
        force_row_wise=True,
        verbose=-1
    )
    best_model.fit(X_train, y_train)
    return best_model, best_params, study.best_value


## 4. Hyperparameter Tuning and Model Training

In [21]:
# Train LightGBM models with hyperparameter tuning for each family
families = train_df['family'].unique()
models = {}
best_params_dict = {}
tuning_results = {}

print(f"Starting hyperparameter tuning for {len(families)} families...")
print("This may take a while depending on the parameter grid size.\n")

for i, family in enumerate(families, 1):
    print(f"\n{'='*60}")
    print(f"Progress: {i}/{len(families)} - Training model for family: {family}")
    print(f"{'='*60}")
    
    # Filter the data for the current family
    family_data = train[train['family'] == family].copy()
    family_target = train_df[train_df['family'] == family]['sales'].copy()
    
    # Prepare features (remove date and family columns)
    X_family = family_data.drop(columns=['date', 'family'])
    y_family = family_target
    
    print(f"Family {family} data shape: {X_family.shape}")
    
    # Split data for hyperparameter tuning (80% for tuning, 20% for validation)
    split_index = int(len(X_family) * 0.8)
    X_tune = X_family[:split_index]
    y_tune = y_family[:split_index]
    X_val = X_family[split_index:]
    y_val = y_family[split_index:]
    
    print(f"Tuning set: {X_tune.shape}, Validation set: {X_val.shape}")
    
    # Perform hyperparameter tuning
    try:
        best_model, best_params, best_score = optuna_tune_hyperparameters(
            X_tune, y_tune, family, param_grid_small, cv_folds=3
        )
        
        # Store results
        best_params_dict[family] = best_params
        tuning_results[family] = best_score
        
        # Evaluate on validation set
        y_val_pred = best_model.predict(X_val)
        y_val_pred = np.maximum(y_val_pred, 0)  # Ensure no negative predictions
        
        # Calculate evaluation metrics
        rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))
        mae = mean_absolute_error(y_val, y_val_pred)
        rmsle = rmsle_score(y_val, y_val_pred)
        
        print(f"Validation - RMSE: {rmse:.4f}, MAE: {mae:.4f}, RMSLE: {rmsle:.4f}")
        print(f"✓ Successfully tuned model for {family}")
        
        # Store the tuned model (we'll retrain on full data later)
        models[family] = best_model
        
    except Exception as e:
        print(f"✗ Error tuning model for {family}: {str(e)}")
        # Fallback to default parameters
        default_model = lgb.LGBMRegressor(
            objective='regression',
            metric='rmse',
            n_estimators=1000,
            learning_rate=0.01,
            num_leaves=31,
            max_depth=-1,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42,
            force_row_wise=True
        )
        default_model.fit(X_tune, y_tune)
        models[family] = default_model
        best_params_dict[family] = 'default_params'
        tuning_results[family] = None

print(f"\n{'='*60}")
print("HYPERPARAMETER TUNING COMPLETED")
print(f"{'='*60}")

Starting hyperparameter tuning for 33 families...
This may take a while depending on the parameter grid size.


Progress: 1/33 - Training model for family: AUTOMOTIVE


[I 2025-05-31 10:58:34,288] A new study created in memory with name: no-name-495be042-15e3-4d12-9fe2-e9e9e524b743


Family AUTOMOTIVE data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for AUTOMOTIVE ===


[I 2025-05-31 10:58:42,261] Trial 0 finished with value: 0.5270288537706745 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.5270288537706745.
[I 2025-05-31 10:58:48,904] Trial 1 finished with value: 0.5236975677886853 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 0.5236975677886853.
[I 2025-05-31 10:58:56,726] Trial 2 finished with value: 0.5306647045225192 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 1 with value: 0.5236975677886853.
[I 2025-05-31 10:59:00,966] Trial 3 finished with value: 0.5445368982254789 and parameters: {'n_esti

Best RMSLE score: 0.5186
Best parameters: {'n_estimators': 500, 'learning_rate': 0.05, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.8, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0}
Validation - RMSE: 5.0195, MAE: 3.4110, RMSLE: 0.4121
✓ Successfully tuned model for AUTOMOTIVE

Progress: 2/33 - Training model for family: BABY CARE


[I 2025-05-31 11:02:34,705] A new study created in memory with name: no-name-9fc45a46-4a56-4256-a6fe-b8380f95dc2e


Family BABY CARE data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for BABY CARE ===


[I 2025-05-31 11:02:40,233] Trial 0 finished with value: 0.2096489618820908 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.2096489618820908.
[I 2025-05-31 11:02:44,983] Trial 1 finished with value: 0.20964377751668192 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 0.20964377751668192.
[I 2025-05-31 11:02:51,051] Trial 2 finished with value: 0.2114645841478265 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 1 with value: 0.20964377751668192.
[I 2025-05-31 11:02:53,742] Trial 3 finished with value: 0.20707234332020966 and parameters: {'n_

Best RMSLE score: 0.2071
Best parameters: {'n_estimators': 500, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0}
Validation - RMSE: 0.1665, MAE: 0.0202, RMSLE: 0.0736
✓ Successfully tuned model for BABY CARE

Progress: 3/33 - Training model for family: BEAUTY


[I 2025-05-31 11:05:41,314] A new study created in memory with name: no-name-de3c453e-1737-4aaf-a8e1-5122512537b6


Family BEAUTY data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for BEAUTY ===


[I 2025-05-31 11:05:49,101] Trial 0 finished with value: 0.456543357121638 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.456543357121638.
[I 2025-05-31 11:05:58,995] Trial 1 finished with value: 0.45880457602382546 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 0.456543357121638.
[I 2025-05-31 11:06:09,374] Trial 2 finished with value: 0.46039992721139217 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.456543357121638.
[I 2025-05-31 11:06:14,795] Trial 3 finished with value: 0.4785726231762566 and parameters: {'n_estima

Best RMSLE score: 0.4395
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}
Validation - RMSE: 3.6306, MAE: 2.1390, RMSLE: 0.3784
✓ Successfully tuned model for BEAUTY

Progress: 4/33 - Training model for family: BEVERAGES


[I 2025-05-31 11:12:01,308] A new study created in memory with name: no-name-39faa03a-b827-4a1f-99d6-ecc652ef0b78


Family BEVERAGES data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for BEVERAGES ===


[I 2025-05-31 11:12:11,032] Trial 0 finished with value: 1.201111731352347 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 1.201111731352347.
[I 2025-05-31 11:12:20,721] Trial 1 finished with value: 1.184422406534354 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 1.184422406534354.
[I 2025-05-31 11:12:30,431] Trial 2 finished with value: 1.1909683364737287 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 1 with value: 1.184422406534354.
[I 2025-05-31 11:12:35,063] Trial 3 finished with value: 1.4222096712990615 and parameters: {'n_estimator

Best RMSLE score: 1.0730
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0}
Validation - RMSE: 1222.1127, MAE: 712.2011, RMSLE: 0.3451
✓ Successfully tuned model for BEVERAGES

Progress: 5/33 - Training model for family: BOOKS


[I 2025-05-31 11:19:55,425] A new study created in memory with name: no-name-9dd7c658-291d-477f-a788-6a9493225a10


Family BOOKS data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for BOOKS ===


[I 2025-05-31 11:19:59,490] Trial 0 finished with value: 0.0609081929661513 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.0609081929661513.
[I 2025-05-31 11:20:03,897] Trial 1 finished with value: 0.060889515647882995 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 0.060889515647882995.
[I 2025-05-31 11:20:09,309] Trial 2 finished with value: 0.06100139546866231 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 1 with value: 0.060889515647882995.
[I 2025-05-31 11:20:11,528] Trial 3 finished with value: 0.06043217349290921 and parameters: 

Best RMSLE score: 0.0604
Best parameters: {'n_estimators': 500, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.8, 'reg_alpha': 0, 'reg_lambda': 0}
Validation - RMSE: 0.5744, MAE: 0.1071, RMSLE: 0.1575
✓ Successfully tuned model for BOOKS

Progress: 6/33 - Training model for family: BREAD/BAKERY


[I 2025-05-31 11:22:53,380] A new study created in memory with name: no-name-e8af4379-bb40-4378-affe-cb3a40d4ac50


Family BREAD/BAKERY data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for BREAD/BAKERY ===


[I 2025-05-31 11:23:01,422] Trial 0 finished with value: 0.9043408537556621 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.9043408537556621.
[I 2025-05-31 11:23:12,790] Trial 1 finished with value: 0.8903560120982338 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 0.8903560120982338.
[I 2025-05-31 11:23:24,583] Trial 2 finished with value: 0.8999579102164247 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 1 with value: 0.8903560120982338.
[I 2025-05-31 11:23:30,345] Trial 3 finished with value: 1.0166421485807524 and parameters: {'n_esti

Best RMSLE score: 0.7929
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': 10, 'subsample': 0.8, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}
Validation - RMSE: 130.9187, MAE: 89.8859, RMSLE: 0.2868
✓ Successfully tuned model for BREAD/BAKERY

Progress: 7/33 - Training model for family: CELEBRATION


[I 2025-05-31 11:32:00,713] A new study created in memory with name: no-name-4ed32838-021a-40bb-8ef5-554a3fe9dbc1


Family CELEBRATION data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for CELEBRATION ===


[I 2025-05-31 11:32:06,826] Trial 0 finished with value: 0.4632833954705949 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.4632833954705949.
[I 2025-05-31 11:32:12,631] Trial 1 finished with value: 0.46297474024588636 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 0.46297474024588636.
[I 2025-05-31 11:32:23,899] Trial 2 finished with value: 0.46071436066291777 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 2 with value: 0.46071436066291777.
[I 2025-05-31 11:32:27,492] Trial 3 finished with value: 0.47417918794848896 and parameters: {'n

Best RMSLE score: 0.4537
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 50, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}
Validation - RMSE: 12.4434, MAE: 4.6371, RMSLE: 0.3678
✓ Successfully tuned model for CELEBRATION

Progress: 8/33 - Training model for family: CLEANING


[I 2025-05-31 11:38:35,602] A new study created in memory with name: no-name-d5450a07-379a-4963-bccf-b1a1b022f477


Family CLEANING data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for CLEANING ===


[I 2025-05-31 11:38:46,180] Trial 0 finished with value: 1.1401153203061531 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 1.1401153203061531.
[I 2025-05-31 11:38:56,018] Trial 1 finished with value: 1.1461459872060669 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 1.1401153203061531.
[I 2025-05-31 11:39:06,736] Trial 2 finished with value: 1.1428520472948398 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 0 with value: 1.1401153203061531.
[I 2025-05-31 11:39:11,876] Trial 3 finished with value: 1.2512029153854332 and parameters: {'n_esti

Best RMSLE score: 1.0911
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': -1, 'subsample': 0.8, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0}
Validation - RMSE: 293.9887, MAE: 202.5995, RMSLE: 0.3143
✓ Successfully tuned model for CLEANING

Progress: 9/33 - Training model for family: DAIRY


[I 2025-05-31 11:48:01,881] A new study created in memory with name: no-name-3eb01502-3535-4254-a642-62adceca5b9f


Family DAIRY data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for DAIRY ===


[I 2025-05-31 11:48:11,789] Trial 0 finished with value: 0.7352638297216543 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.7352638297216543.
[I 2025-05-31 11:48:19,338] Trial 1 finished with value: 0.7808706297389821 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 0.7352638297216543.
[I 2025-05-31 11:48:27,825] Trial 2 finished with value: 0.780298978408679 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.7352638297216543.
[I 2025-05-31 11:48:32,308] Trial 3 finished with value: 0.8864641824465456 and parameters: {'n_estim

Best RMSLE score: 0.5142
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': 10, 'subsample': 0.8, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}
Validation - RMSE: 247.3630, MAE: 169.5704, RMSLE: 0.2597
✓ Successfully tuned model for DAIRY

Progress: 10/33 - Training model for family: DELI


[I 2025-05-31 11:56:53,677] A new study created in memory with name: no-name-4f4569f3-56d5-42dc-9e88-6fca7e9d4088


Family DELI data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for DELI ===


[I 2025-05-31 11:57:02,711] Trial 0 finished with value: 0.822479476947715 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.822479476947715.
[I 2025-05-31 11:57:10,678] Trial 1 finished with value: 0.83202064625807 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 0.822479476947715.
[I 2025-05-31 11:57:19,495] Trial 2 finished with value: 0.8261553599947246 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.822479476947715.
[I 2025-05-31 11:57:26,832] Trial 3 finished with value: 0.9064429583984093 and parameters: {'n_estimators

Best RMSLE score: 0.7875
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}
Validation - RMSE: 89.4610, MAE: 58.6638, RMSLE: 0.2812
✓ Successfully tuned model for DELI

Progress: 11/33 - Training model for family: EGGS


[I 2025-05-31 12:05:26,360] A new study created in memory with name: no-name-9ec32118-7e4c-43b6-84a1-a81e8455c4e6


Family EGGS data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for EGGS ===


[I 2025-05-31 12:05:36,188] Trial 0 finished with value: 0.8651452484646249 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.8651452484646249.
[I 2025-05-31 12:05:46,825] Trial 1 finished with value: 0.8479890183500972 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 0.8479890183500972.
[I 2025-05-31 12:05:58,088] Trial 2 finished with value: 0.8656105831572187 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 1 with value: 0.8479890183500972.
[I 2025-05-31 12:06:04,238] Trial 3 finished with value: 0.9051708494896248 and parameters: {'n_esti

Best RMSLE score: 0.7449
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.8, 'colsample_bytree': 0.8, 'reg_alpha': 0.1, 'reg_lambda': 0.1}
Validation - RMSE: 48.1156, MAE: 32.3554, RMSLE: 0.2704
✓ Successfully tuned model for EGGS

Progress: 12/33 - Training model for family: FROZEN FOODS


[I 2025-05-31 12:13:43,328] A new study created in memory with name: no-name-3ef48b5c-d663-4eeb-9eb8-817574f69671


Family FROZEN FOODS data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for FROZEN FOODS ===


[I 2025-05-31 12:13:51,152] Trial 0 finished with value: 0.6268866781091673 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.6268866781091673.
[I 2025-05-31 12:13:58,811] Trial 1 finished with value: 0.5258521691513678 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 0.5258521691513678.
[I 2025-05-31 12:14:06,881] Trial 2 finished with value: 0.5414148554775661 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 1 with value: 0.5258521691513678.
[I 2025-05-31 12:14:11,113] Trial 3 finished with value: 0.7690929015830094 and parameters: {'n_esti

Best RMSLE score: 0.4844
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.8, 'reg_alpha': 0, 'reg_lambda': 0}
Validation - RMSE: 239.8899, MAE: 62.8021, RMSLE: 0.6354
✓ Successfully tuned model for FROZEN FOODS

Progress: 13/33 - Training model for family: GROCERY I


[I 2025-05-31 12:18:58,952] A new study created in memory with name: no-name-33e8f18d-d557-4dce-8c99-094cdfbb181f


Family GROCERY I data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for GROCERY I ===


[I 2025-05-31 12:19:09,618] Trial 0 finished with value: 1.22741844420364 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 1.22741844420364.
[I 2025-05-31 12:19:18,281] Trial 1 finished with value: 1.2112293578455322 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 1.2112293578455322.
[I 2025-05-31 12:19:27,550] Trial 2 finished with value: 1.2292325907905008 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 1 with value: 1.2112293578455322.
[I 2025-05-31 12:19:32,265] Trial 3 finished with value: 1.4794755891833722 and parameters: {'n_estimato

Best RMSLE score: 1.1566
Best parameters: {'n_estimators': 500, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': 10, 'subsample': 0.8, 'colsample_bytree': 0.8, 'reg_alpha': 0.1, 'reg_lambda': 0.1}
Validation - RMSE: 1536.6755, MAE: 854.5172, RMSLE: 0.3849
✓ Successfully tuned model for GROCERY I

Progress: 14/33 - Training model for family: GROCERY II


[I 2025-05-31 12:25:55,098] A new study created in memory with name: no-name-d968ec54-ebee-4426-902b-8480164c2139


Family GROCERY II data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for GROCERY II ===


[I 2025-05-31 12:26:02,348] Trial 0 finished with value: 0.6007349238698908 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.6007349238698908.
[I 2025-05-31 12:26:10,368] Trial 1 finished with value: 0.5988966460055912 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 0.5988966460055912.
[I 2025-05-31 12:26:18,238] Trial 2 finished with value: 0.6110331005139021 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 1 with value: 0.5988966460055912.
[I 2025-05-31 12:26:22,787] Trial 3 finished with value: 0.6263115562651517 and parameters: {'n_esti

Best RMSLE score: 0.5635
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}
Validation - RMSE: 21.1978, MAE: 11.9117, RMSLE: 0.4213
✓ Successfully tuned model for GROCERY II

Progress: 15/33 - Training model for family: HARDWARE


[I 2025-05-31 12:32:30,502] A new study created in memory with name: no-name-da931246-520a-4ae1-bffa-16618c4eb39a


Family HARDWARE data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for HARDWARE ===


[I 2025-05-31 12:32:37,315] Trial 0 finished with value: 0.4552974654165323 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.4552974654165323.
[I 2025-05-31 12:32:43,473] Trial 1 finished with value: 0.4553962385637562 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 0.4552974654165323.
[I 2025-05-31 12:32:49,596] Trial 2 finished with value: 0.45549939213460594 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.4552974654165323.
[I 2025-05-31 12:32:53,020] Trial 3 finished with value: 0.4556219861107813 and parameters: {'n_est

Best RMSLE score: 0.4553
Best parameters: {'n_estimators': 500, 'learning_rate': 0.01, 'num_leaves': 50, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}
Validation - RMSE: 1.4322, MAE: 0.9549, RMSLE: 0.4998
✓ Successfully tuned model for HARDWARE

Progress: 16/33 - Training model for family: HOME AND KITCHEN I


[I 2025-05-31 12:37:07,532] A new study created in memory with name: no-name-6865f6f3-387b-4bdf-bb4b-43958b09f7d6


Family HOME AND KITCHEN I data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for HOME AND KITCHEN I ===


[I 2025-05-31 12:37:13,940] Trial 0 finished with value: 0.6036239011873938 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.6036239011873938.
[I 2025-05-31 12:37:20,402] Trial 1 finished with value: 0.6066413835436512 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 0.6036239011873938.
[I 2025-05-31 12:37:27,689] Trial 2 finished with value: 0.6076251726187837 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.6036239011873938.
[I 2025-05-31 12:37:31,282] Trial 3 finished with value: 0.6022287864594541 and parameters: {'n_esti

Best RMSLE score: 0.5542
Best parameters: {'n_estimators': 500, 'learning_rate': 0.01, 'num_leaves': 50, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}
Validation - RMSE: 40.2124, MAE: 14.5579, RMSLE: 0.5554
✓ Successfully tuned model for HOME AND KITCHEN I

Progress: 17/33 - Training model for family: HOME AND KITCHEN II


[I 2025-05-31 12:42:40,144] A new study created in memory with name: no-name-6ad44a1c-1d5f-489e-bffc-f9d2fa63b3d2


Family HOME AND KITCHEN II data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for HOME AND KITCHEN II ===


[I 2025-05-31 12:42:46,643] Trial 0 finished with value: 0.5060469306186303 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.5060469306186303.
[I 2025-05-31 12:42:52,900] Trial 1 finished with value: 0.5370034587600563 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 0.5060469306186303.
[I 2025-05-31 12:42:59,884] Trial 2 finished with value: 0.49127762598870756 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 2 with value: 0.49127762598870756.
[I 2025-05-31 12:43:03,359] Trial 3 finished with value: 0.49743843172909924 and parameters: {'n_e

Best RMSLE score: 0.4387
Best parameters: {'n_estimators': 500, 'learning_rate': 0.01, 'num_leaves': 50, 'max_depth': 10, 'subsample': 0.8, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}
Validation - RMSE: 31.7055, MAE: 11.1285, RMSLE: 0.4525
✓ Successfully tuned model for HOME AND KITCHEN II

Progress: 18/33 - Training model for family: HOME APPLIANCES


[I 2025-05-31 12:47:03,534] A new study created in memory with name: no-name-7b6e8218-828a-4ed9-b8d7-53c47ec7a425


Family HOME APPLIANCES data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for HOME APPLIANCES ===


[I 2025-05-31 12:47:09,173] Trial 0 finished with value: 0.27527800876603753 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.27527800876603753.
[I 2025-05-31 12:47:14,497] Trial 1 finished with value: 0.2754608323621401 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 0.27527800876603753.
[I 2025-05-31 12:47:20,195] Trial 2 finished with value: 0.275270265307595 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 2 with value: 0.275270265307595.
[I 2025-05-31 12:47:23,294] Trial 3 finished with value: 0.2752526336385573 and parameters: {'n_est

Best RMSLE score: 0.2751
Best parameters: {'n_estimators': 500, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.8, 'colsample_bytree': 0.8, 'reg_alpha': 0.1, 'reg_lambda': 0.1}
Validation - RMSE: 0.9672, MAE: 0.6037, RMSLE: 0.3997
✓ Successfully tuned model for HOME APPLIANCES

Progress: 19/33 - Training model for family: HOME CARE


[I 2025-05-31 12:50:21,293] A new study created in memory with name: no-name-ebe7608b-241c-4e3e-98ef-7e9033a97000


Family HOME CARE data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for HOME CARE ===


[I 2025-05-31 12:50:29,713] Trial 0 finished with value: 0.6652737702231624 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.6652737702231624.
[I 2025-05-31 12:50:37,864] Trial 1 finished with value: 0.6769316450901245 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 0.6652737702231624.
[I 2025-05-31 12:50:47,167] Trial 2 finished with value: 0.6312958457392748 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 2 with value: 0.6312958457392748.
[I 2025-05-31 12:50:51,603] Trial 3 finished with value: 0.7759087620080812 and parameters: {'n_esti

Best RMSLE score: 0.4731
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': -1, 'subsample': 0.8, 'colsample_bytree': 0.8, 'reg_alpha': 0.1, 'reg_lambda': 0}
Validation - RMSE: 98.1831, MAE: 51.0300, RMSLE: 0.6360
✓ Successfully tuned model for HOME CARE

Progress: 20/33 - Training model for family: LADIESWEAR


[I 2025-05-31 12:58:17,417] A new study created in memory with name: no-name-ef4dc83b-9f98-40de-a928-3734900f535a


Family LADIESWEAR data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for LADIESWEAR ===


[I 2025-05-31 12:58:24,041] Trial 0 finished with value: 0.3640195889581081 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.3640195889581081.
[I 2025-05-31 12:58:30,363] Trial 1 finished with value: 0.3632699063097165 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 0.3632699063097165.
[I 2025-05-31 12:58:37,093] Trial 2 finished with value: 0.36234859157102295 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 2 with value: 0.36234859157102295.
[I 2025-05-31 12:58:40,503] Trial 3 finished with value: 0.3705846327840079 and parameters: {'n_es

Best RMSLE score: 0.3353
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0}
Validation - RMSE: 5.9494, MAE: 3.2065, RMSLE: 0.4072
✓ Successfully tuned model for LADIESWEAR

Progress: 21/33 - Training model for family: LAWN AND GARDEN


[I 2025-05-31 13:04:18,510] A new study created in memory with name: no-name-ff36ec74-e016-4801-9211-b1df52ded223


Family LAWN AND GARDEN data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for LAWN AND GARDEN ===


[I 2025-05-31 13:04:24,890] Trial 0 finished with value: 0.49008041070591 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.49008041070591.
[I 2025-05-31 13:04:31,021] Trial 1 finished with value: 0.4918962242262926 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 0.49008041070591.
[I 2025-05-31 13:04:37,928] Trial 2 finished with value: 0.4880317039699664 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 2 with value: 0.4880317039699664.
[I 2025-05-31 13:04:41,569] Trial 3 finished with value: 0.48931231528429725 and parameters: {'n_estimator

Best RMSLE score: 0.4494
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': 10, 'subsample': 0.8, 'colsample_bytree': 0.8, 'reg_alpha': 0.1, 'reg_lambda': 0.1}
Validation - RMSE: 8.4211, MAE: 3.6689, RMSLE: 0.5005
✓ Successfully tuned model for LAWN AND GARDEN

Progress: 22/33 - Training model for family: LINGERIE


[I 2025-05-31 13:09:54,373] A new study created in memory with name: no-name-80b5e72a-e8c8-40fb-a3c7-483d0f3efd0c


Family LINGERIE data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for LINGERIE ===


[I 2025-05-31 13:10:00,748] Trial 0 finished with value: 0.6173750807919167 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.6173750807919167.
[I 2025-05-31 13:10:07,056] Trial 1 finished with value: 0.6153682903526322 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 0.6153682903526322.
[I 2025-05-31 13:10:13,564] Trial 2 finished with value: 0.6136775777334769 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 2 with value: 0.6136775777334769.
[I 2025-05-31 13:10:17,102] Trial 3 finished with value: 0.6325040360555786 and parameters: {'n_esti

Best RMSLE score: 0.5959
Best parameters: {'n_estimators': 500, 'learning_rate': 0.05, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.8, 'reg_alpha': 0.1, 'reg_lambda': 0.1}
Validation - RMSE: 11.1828, MAE: 3.4911, RMSLE: 0.6678
✓ Successfully tuned model for LINGERIE

Progress: 23/33 - Training model for family: LIQUOR,WINE,BEER


[I 2025-05-31 13:13:17,553] A new study created in memory with name: no-name-7b4e7f11-d7de-4135-abed-58dfc25c1b3d


Family LIQUOR,WINE,BEER data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for LIQUOR,WINE,BEER ===


[I 2025-05-31 13:13:24,731] Trial 0 finished with value: 0.9586562029223442 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.9586562029223442.
[I 2025-05-31 13:13:31,862] Trial 1 finished with value: 0.9426231835532265 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 0.9426231835532265.
[I 2025-05-31 13:13:39,661] Trial 2 finished with value: 0.9143835082376155 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 2 with value: 0.9143835082376155.
[I 2025-05-31 13:13:43,591] Trial 3 finished with value: 1.0047722686062464 and parameters: {'n_esti

Best RMSLE score: 0.8887
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 50, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.8, 'reg_alpha': 0.1, 'reg_lambda': 0}
Validation - RMSE: 63.7747, MAE: 27.4202, RMSLE: 0.7739
✓ Successfully tuned model for LIQUOR,WINE,BEER

Progress: 24/33 - Training model for family: MAGAZINES


[I 2025-05-31 13:21:56,962] A new study created in memory with name: no-name-1db3a4e4-5e20-4d1e-a719-3ee5c89e48cc


Family MAGAZINES data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for MAGAZINES ===


[I 2025-05-31 13:22:03,048] Trial 0 finished with value: 0.3748259231595084 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.3748259231595084.
[I 2025-05-31 13:22:08,792] Trial 1 finished with value: 0.3747034058876996 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 0.3747034058876996.
[I 2025-05-31 13:22:14,965] Trial 2 finished with value: 0.3749115264918383 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 1 with value: 0.3747034058876996.
[I 2025-05-31 13:22:18,313] Trial 3 finished with value: 0.3809809257350211 and parameters: {'n_esti

Best RMSLE score: 0.3660
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.8, 'colsample_bytree': 0.8, 'reg_alpha': 0.1, 'reg_lambda': 0.1}
Validation - RMSE: 3.4584, MAE: 1.7342, RMSLE: 0.3630
✓ Successfully tuned model for MAGAZINES

Progress: 25/33 - Training model for family: MEATS


[I 2025-05-31 13:28:14,304] A new study created in memory with name: no-name-4315410c-62b4-4c51-acbd-0e8415ac6a2b


Family MEATS data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for MEATS ===


[I 2025-05-31 13:28:23,854] Trial 0 finished with value: 1.0306348016193978 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 1.0306348016193978.
[I 2025-05-31 13:28:32,468] Trial 1 finished with value: 1.0473876715714932 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 1.0306348016193978.
[I 2025-05-31 13:28:43,825] Trial 2 finished with value: 1.0568817267536275 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 0 with value: 1.0306348016193978.
[I 2025-05-31 13:28:53,033] Trial 3 finished with value: 1.1091167876344796 and parameters: {'n_esti

Best RMSLE score: 0.9965
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 50, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}
Validation - RMSE: 658.5498, MAE: 273.9953, RMSLE: 0.4975
✓ Successfully tuned model for MEATS

Progress: 26/33 - Training model for family: PERSONAL CARE


[I 2025-05-31 13:38:00,707] A new study created in memory with name: no-name-4762861f-e5f4-4579-a9f4-7cf55172966a


Family PERSONAL CARE data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for PERSONAL CARE ===


[I 2025-05-31 13:38:10,138] Trial 0 finished with value: 0.7929002982596097 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.7929002982596097.
[I 2025-05-31 13:38:19,838] Trial 1 finished with value: 0.7910260493547034 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 0.7910260493547034.
[I 2025-05-31 13:38:30,118] Trial 2 finished with value: 0.7992512469458442 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 1 with value: 0.7910260493547034.
[I 2025-05-31 13:38:35,278] Trial 3 finished with value: 0.8395482709715526 and parameters: {'n_esti

Best RMSLE score: 0.6865
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.8, 'reg_alpha': 0.1, 'reg_lambda': 0.1}
Validation - RMSE: 164.5577, MAE: 83.6847, RMSLE: 0.3119
✓ Successfully tuned model for PERSONAL CARE

Progress: 27/33 - Training model for family: PET SUPPLIES


[I 2025-05-31 13:44:33,336] A new study created in memory with name: no-name-ce2b36c8-242b-460f-98da-de0f0b5bde38


Family PET SUPPLIES data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for PET SUPPLIES ===


[I 2025-05-31 13:44:39,426] Trial 0 finished with value: 0.35537304992704843 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.35537304992704843.
[I 2025-05-31 13:44:45,553] Trial 1 finished with value: 0.355409216862905 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 0.35537304992704843.
[I 2025-05-31 13:44:51,898] Trial 2 finished with value: 0.35522672883743683 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 2 with value: 0.35522672883743683.
[I 2025-05-31 13:44:55,383] Trial 3 finished with value: 0.36243576683777223 and parameters: {'n

Best RMSLE score: 0.3505
Best parameters: {'n_estimators': 500, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': -1, 'subsample': 0.8, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}
Validation - RMSE: 4.9254, MAE: 2.4312, RMSLE: 0.3256
✓ Successfully tuned model for PET SUPPLIES

Progress: 28/33 - Training model for family: PLAYERS AND ELECTRONICS


[I 2025-05-31 13:49:46,807] A new study created in memory with name: no-name-020c646f-56c8-4a74-a749-fc494fd8d2d7


Family PLAYERS AND ELECTRONICS data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for PLAYERS AND ELECTRONICS ===


[I 2025-05-31 13:49:55,454] Trial 0 finished with value: 0.4039212864040862 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.4039212864040862.
[I 2025-05-31 13:50:01,967] Trial 1 finished with value: 0.40473072881124744 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 0.4039212864040862.
[I 2025-05-31 13:50:20,457] Trial 2 finished with value: 0.40423989140387645 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.4039212864040862.
[I 2025-05-31 13:50:28,020] Trial 3 finished with value: 0.4123502646698543 and parameters: {'n_es

Best RMSLE score: 0.3973
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0}
Validation - RMSE: 7.2803, MAE: 3.7287, RMSLE: 0.3685
✓ Successfully tuned model for PLAYERS AND ELECTRONICS

Progress: 29/33 - Training model for family: POULTRY


[I 2025-05-31 13:55:40,398] A new study created in memory with name: no-name-c9cf6800-59d3-4640-82b0-5d3a7cc2429c


Family POULTRY data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for POULTRY ===


[I 2025-05-31 13:55:50,300] Trial 0 finished with value: 0.890233233068526 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.890233233068526.
[I 2025-05-31 13:56:01,833] Trial 1 finished with value: 0.8922417408892845 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 0.890233233068526.
[I 2025-05-31 13:56:11,079] Trial 2 finished with value: 0.8843106760721006 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 2 with value: 0.8843106760721006.
[I 2025-05-31 13:56:15,655] Trial 3 finished with value: 1.0165055317902578 and parameters: {'n_estimat

Best RMSLE score: 0.8212
Best parameters: {'n_estimators': 500, 'learning_rate': 0.05, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.8, 'reg_alpha': 0.1, 'reg_lambda': 0.1}
Validation - RMSE: 287.3123, MAE: 156.2629, RMSLE: 0.4722
✓ Successfully tuned model for POULTRY

Progress: 30/33 - Training model for family: PREPARED FOODS


[I 2025-05-31 14:00:41,234] A new study created in memory with name: no-name-8246f094-f6a2-4172-90c6-c15f2bff8c1c


Family PREPARED FOODS data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for PREPARED FOODS ===


[I 2025-05-31 14:00:49,675] Trial 0 finished with value: 0.6612927055703611 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.6612927055703611.
[I 2025-05-31 14:00:58,038] Trial 1 finished with value: 0.6744117537266955 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 0.6612927055703611.
[I 2025-05-31 14:01:07,910] Trial 2 finished with value: 0.6705029398512218 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.6612927055703611.
[I 2025-05-31 14:01:16,349] Trial 3 finished with value: 0.697240018959565 and parameters: {'n_estim

Best RMSLE score: 0.6353
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': 10, 'subsample': 0.8, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}
Validation - RMSE: 40.2154, MAE: 28.4386, RMSLE: 0.3189
✓ Successfully tuned model for PREPARED FOODS

Progress: 31/33 - Training model for family: PRODUCE


[I 2025-05-31 14:09:23,228] A new study created in memory with name: no-name-48f90676-7c54-407d-82d8-3e2916ad0c07


Family PRODUCE data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for PRODUCE ===


[I 2025-05-31 14:09:31,204] Trial 0 finished with value: 1.2699388602897221 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 1.2699388602897221.
[I 2025-05-31 14:09:39,364] Trial 1 finished with value: 1.2512726432390173 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 1.2512726432390173.
[I 2025-05-31 14:10:00,439] Trial 2 finished with value: 1.2468893191189911 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 2 with value: 1.2468893191189911.
[I 2025-05-31 14:10:17,055] Trial 3 finished with value: 1.797965568400044 and parameters: {'n_estim

Best RMSLE score: 0.9187
Best parameters: {'n_estimators': 500, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}
Validation - RMSE: 763.9216, MAE: 416.6744, RMSLE: 0.8694
✓ Successfully tuned model for PRODUCE

Progress: 32/33 - Training model for family: SCHOOL AND OFFICE SUPPLIES


[I 2025-05-31 14:16:48,055] A new study created in memory with name: no-name-f85baddd-61dd-44b1-b8ed-cf7310706d27


Family SCHOOL AND OFFICE SUPPLIES data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for SCHOOL AND OFFICE SUPPLIES ===


[I 2025-05-31 14:16:53,419] Trial 0 finished with value: 0.3152476464250231 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.3152476464250231.
[I 2025-05-31 14:16:58,821] Trial 1 finished with value: 0.31574066335692247 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 0 with value: 0.3152476464250231.
[I 2025-05-31 14:17:06,794] Trial 2 finished with value: 0.31572273907575177 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.3152476464250231.
[I 2025-05-31 14:17:09,888] Trial 3 finished with value: 0.3155643881088893 and parameters: {'n_es

Best RMSLE score: 0.3125
Best parameters: {'n_estimators': 500, 'learning_rate': 0.01, 'num_leaves': 50, 'max_depth': -1, 'subsample': 0.8, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}
Validation - RMSE: 27.2635, MAE: 4.3615, RMSLE: 0.4187
✓ Successfully tuned model for SCHOOL AND OFFICE SUPPLIES

Progress: 33/33 - Training model for family: SEAFOOD


[I 2025-05-31 14:21:06,406] A new study created in memory with name: no-name-b6564763-5aab-42b8-ac28-ee8856dd3c28


Family SEAFOOD data shape: (89316, 47)
Tuning set: (71452, 47), Validation set: (17864, 47)

=== Optuna Tuning hyperparameters for SEAFOOD ===


[I 2025-05-31 14:21:19,741] Trial 0 finished with value: 0.5341753440771532 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0.1}. Best is trial 0 with value: 0.5341753440771532.
[I 2025-05-31 14:21:32,310] Trial 1 finished with value: 0.5305605413418171 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': 10, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0, 'reg_lambda': 0}. Best is trial 1 with value: 0.5305605413418171.
[I 2025-05-31 14:21:44,160] Trial 2 finished with value: 0.5345230575790424 and parameters: {'n_estimators': 1000, 'learning_rate': 0.01, 'num_leaves': 31, 'max_depth': -1, 'subsample': 0.9, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.1}. Best is trial 1 with value: 0.5305605413418171.
[I 2025-05-31 14:21:49,280] Trial 3 finished with value: 0.6111942373903237 and parameters: {'n_esti

Best RMSLE score: 0.5102
Best parameters: {'n_estimators': 1000, 'learning_rate': 0.05, 'num_leaves': 50, 'max_depth': -1, 'subsample': 0.8, 'colsample_bytree': 0.8, 'reg_alpha': 0.1, 'reg_lambda': 0}
Validation - RMSE: 13.0273, MAE: 8.4533, RMSLE: 0.3373
✓ Successfully tuned model for SEAFOOD

HYPERPARAMETER TUNING COMPLETED


In [22]:
# Display tuning results summary
print("\n=== HYPERPARAMETER TUNING RESULTS SUMMARY ===")
print("-" * 80)
print(f"{'Family':<25} {'Best RMSLE':<15} {'Key Parameters':<40}")
print("-" * 80)

for family in families:
    best_score = tuning_results.get(family, 'N/A')
    best_params = best_params_dict.get(family, 'N/A')
    
    if isinstance(best_score, float):
        score_str = f"{best_score:.4f}"
    else:
        score_str = str(best_score)
    
    if isinstance(best_params, dict):
        # Show only key parameters for readability
        key_params = {k: v for k, v in best_params.items() if k in ['n_estimators', 'learning_rate', 'num_leaves']}
        params_str = str(key_params)[:35] + '...' if len(str(key_params)) > 35 else str(key_params)
    else:
        params_str = str(best_params)
    
    print(f"{family[:24]:<25} {score_str:<15} {params_str:<40}")

print("-" * 80)

# Calculate average RMSLE
valid_scores = [score for score in tuning_results.values() if isinstance(score, float)]
if valid_scores:
    avg_rmsle = np.mean(valid_scores)
    print(f"\nAverage RMSLE across all families: {avg_rmsle:.4f}")
    best_family = min(tuning_results.items(), key=lambda x: x[1] if isinstance(x[1], float) else float('inf'))[0]
    worst_family = max(tuning_results.items(), key=lambda x: x[1] if isinstance(x[1], float) else 0)[0]
    print(f"Best performing family: {best_family}")
    print(f"Worst performing family: {worst_family}")


=== HYPERPARAMETER TUNING RESULTS SUMMARY ===
--------------------------------------------------------------------------------
Family                    Best RMSLE      Key Parameters                          
--------------------------------------------------------------------------------
AUTOMOTIVE                0.5186          {'n_estimators': 500, 'learning_rat...  
BABY CARE                 0.2071          {'n_estimators': 500, 'learning_rat...  
BEAUTY                    0.4395          {'n_estimators': 1000, 'learning_ra...  
BEVERAGES                 1.0730          {'n_estimators': 1000, 'learning_ra...  
BOOKS                     0.0604          {'n_estimators': 500, 'learning_rat...  
BREAD/BAKERY              0.7929          {'n_estimators': 1000, 'learning_ra...  
CELEBRATION               0.4537          {'n_estimators': 1000, 'learning_ra...  
CLEANING                  1.0911          {'n_estimators': 1000, 'learning_ra...  
DAIRY                     0.5142          {'

## 5. Final Model Training on Full Dataset

In [23]:
# Retrain models with best parameters on the full training dataset
print("\n=== RETRAINING MODELS WITH BEST PARAMETERS ON FULL DATASET ===")
print("This will train the final models using the entire training and validation data...\n")

final_models = {}
final_training_results = {}

for i, family in enumerate(families, 1):
    print(f"Progress: {i}/{len(families)} - Final training for family: {family}")
    
    # Filter the data for the current family
    family_data = train[train['family'] == family].copy()
    family_target = train_df[train_df['family'] == family]['sales'].copy()
    
    # Prepare features (use full dataset)
    X_family_full = family_data.drop(columns=['date', 'family'])
    y_family_full = family_target
    
    # Get best parameters for this family
    best_params = best_params_dict.get(family, {})
    
    if isinstance(best_params, dict):
        # Create model with best parameters
        final_model = lgb.LGBMRegressor(
            objective='regression',
            metric='rmse',
            random_state=42,
            force_row_wise=True,
            **best_params
        )
    else:
        # Use default parameters if tuning failed
        final_model = lgb.LGBMRegressor(
            objective='regression',
            metric='rmse',
            n_estimators=1000,
            learning_rate=0.01,
            num_leaves=31,
            max_depth=-1,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42,
            force_row_wise=True
        )
    
    # Train the final model on the full dataset
    final_model.fit(X_family_full, y_family_full)
    
    # Store the final model
    final_models[family] = final_model
    
    # Store training info
    final_training_results[family] = {
        'training_samples': len(X_family_full),
        'features': X_family_full.shape[1],
        'best_params': best_params
    }
    
    print(f"  ✓ Trained on {len(X_family_full)} samples with {X_family_full.shape[1]} features")

print(f"\n✓ Final training completed for all {len(families)} families!")

# Update the models dictionary to use final models
models = final_models


=== RETRAINING MODELS WITH BEST PARAMETERS ON FULL DATASET ===
This will train the final models using the entire training and validation data...

Progress: 1/33 - Final training for family: AUTOMOTIVE
  ✓ Trained on 89316 samples with 47 features
Progress: 2/33 - Final training for family: BABY CARE
  ✓ Trained on 89316 samples with 47 features
Progress: 3/33 - Final training for family: BEAUTY
  ✓ Trained on 89316 samples with 47 features
Progress: 4/33 - Final training for family: BEVERAGES
  ✓ Trained on 89316 samples with 47 features
Progress: 5/33 - Final training for family: BOOKS
  ✓ Trained on 89316 samples with 47 features
Progress: 6/33 - Final training for family: BREAD/BAKERY
  ✓ Trained on 89316 samples with 47 features
Progress: 7/33 - Final training for family: CELEBRATION
  ✓ Trained on 89316 samples with 47 features
Progress: 8/33 - Final training for family: CLEANING
  ✓ Trained on 89316 samples with 47 features
Progress: 9/33 - Final training for family: DAIRY
  ✓ T

In [24]:
# Save the trained models and metadata
print("\n=== SAVING TRAINED MODELS ===")

import os
from datetime import datetime

# Create models directory if it doesn't exist
models_dir = '../models'
os.makedirs(models_dir, exist_ok=True)

# Create timestamp for model versioning
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

# Save individual models and metadata
saved_models_info = {}

for family in families:
    # Clean family name for filename
    clean_family_name = family.replace('/', '_').replace(' ', '_').replace('&', 'and')
    
    # Save model
    model_filename = f'lgb_model_{clean_family_name}_{timestamp}.pkl'
    model_path = os.path.join(models_dir, model_filename)
    joblib.dump(models[family], model_path)
    
    saved_models_info[family] = {
        'model_path': model_path,
        'best_params': best_params_dict.get(family, 'default'),
        'tuning_score': tuning_results.get(family, 'N/A'),
        'training_samples': final_training_results[family]['training_samples']
    }
    
    print(f"  ✓ Saved model for {family[:30]}: {model_filename}")

# Save models metadata
metadata_filename = f'models_metadata_{timestamp}.pkl'
metadata_path = os.path.join(models_dir, metadata_filename)
joblib.dump(saved_models_info, metadata_path)

print(f"\n✓ Models metadata saved: {metadata_filename}")
print(f"✓ All models saved in: {models_dir}")

# Also save best parameters as JSON for easy inspection
import json
params_filename = f'best_parameters_{timestamp}.json'
params_path = os.path.join(models_dir, params_filename)
with open(params_path, 'w') as f:
    # Convert numpy types to native Python types for JSON serialization
    json_params = {}
    for family, params in best_params_dict.items():
        if isinstance(params, dict):
            json_params[family] = {k: int(v) if isinstance(v, np.integer) else float(v) if isinstance(v, np.floating) else v for k, v in params.items()}
        else:
            json_params[family] = str(params)
    json.dump(json_params, f, indent=2)

print(f"✓ Best parameters saved as JSON: {params_filename}")


=== SAVING TRAINED MODELS ===
  ✓ Saved model for AUTOMOTIVE: lgb_model_AUTOMOTIVE_20250531_143548.pkl
  ✓ Saved model for BABY CARE: lgb_model_BABY_CARE_20250531_143548.pkl
  ✓ Saved model for BEAUTY: lgb_model_BEAUTY_20250531_143548.pkl
  ✓ Saved model for BEVERAGES: lgb_model_BEVERAGES_20250531_143548.pkl
  ✓ Saved model for BOOKS: lgb_model_BOOKS_20250531_143548.pkl
  ✓ Saved model for BREAD/BAKERY: lgb_model_BREAD_BAKERY_20250531_143548.pkl
  ✓ Saved model for CELEBRATION: lgb_model_CELEBRATION_20250531_143548.pkl
  ✓ Saved model for CLEANING: lgb_model_CLEANING_20250531_143548.pkl
  ✓ Saved model for DAIRY: lgb_model_DAIRY_20250531_143548.pkl
  ✓ Saved model for DELI: lgb_model_DELI_20250531_143548.pkl
  ✓ Saved model for EGGS: lgb_model_EGGS_20250531_143548.pkl
  ✓ Saved model for FROZEN FOODS: lgb_model_FROZEN_FOODS_20250531_143548.pkl
  ✓ Saved model for GROCERY I: lgb_model_GROCERY_I_20250531_143548.pkl
  ✓ Saved model for GROCERY II: lgb_model_GROCERY_II_20250531_143548.pkl

## 6. Generate Predictions for Test Set

In [25]:
# Generate predictions for test set
print("Generating predictions for test set...")

# Initialize predictions array
test_predictions = []
test_ids = []

# Generate predictions for each family
for family in families:
    print(f"Predicting for family: {family}")
    
    # Filter test data for current family
    family_test_data = test[test['family'] == family].copy()
    
    if len(family_test_data) > 0:
        # Prepare features (remove date and family columns)
        X_test_family = family_test_data.drop(columns=['date', 'family'])
        
        # Get the trained model for this family
        model = models[family]
        
        # Generate predictions
        family_predictions = model.predict(X_test_family)
        
        # Ensure no negative predictions
        family_predictions = np.maximum(family_predictions, 0)
        
        # Store predictions and corresponding IDs
        test_predictions.extend(family_predictions)
        
        # Get corresponding IDs from test_df
        family_ids = test_df[test_df['family'] == family]['id'].values
        test_ids.extend(family_ids)
        
        print(f"Generated {len(family_predictions)} predictions for {family}")

print(f"Total predictions generated: {len(test_predictions)}")

Generating predictions for test set...
Predicting for family: AUTOMOTIVE
Generated 864 predictions for AUTOMOTIVE
Predicting for family: BABY CARE
Generated 864 predictions for BABY CARE
Predicting for family: BEAUTY
Generated 864 predictions for BEAUTY
Predicting for family: BEVERAGES
Generated 864 predictions for BEVERAGES
Predicting for family: BOOKS
Generated 864 predictions for BOOKS
Predicting for family: BREAD/BAKERY
Generated 864 predictions for BREAD/BAKERY
Predicting for family: CELEBRATION
Generated 864 predictions for CELEBRATION
Predicting for family: CLEANING
Generated 864 predictions for CLEANING
Predicting for family: DAIRY
Generated 864 predictions for DAIRY
Predicting for family: DELI
Generated 864 predictions for DELI
Predicting for family: EGGS
Generated 864 predictions for EGGS
Predicting for family: FROZEN FOODS
Generated 864 predictions for FROZEN FOODS
Predicting for family: GROCERY I
Generated 864 predictions for GROCERY I
Predicting for family: GROCERY II
Gene

In [26]:
# Create submission DataFrame
submission_df = pd.DataFrame({
    'id': test_ids,
    'sales': test_predictions
})

# Sort by id to ensure proper order
submission_df = submission_df.sort_values('id').reset_index(drop=True)

print(f"Submission shape: {submission_df.shape}")
print("\nFirst few predictions:")
print(submission_df.head(10))

print("\nLast few predictions:")
print(submission_df.tail(10))

print(f"\nPrediction statistics:")
print(f"Min: {submission_df['sales'].min():.4f}")
print(f"Max: {submission_df['sales'].max():.4f}")
print(f"Mean: {submission_df['sales'].mean():.4f}")
print(f"Median: {submission_df['sales'].median():.4f}")

Submission shape: (28512, 2)

First few predictions:
        id        sales
0  3000888     3.726486
1  3000889     0.002220
2  3000890     5.926308
3  3000891  2002.855071
4  3000892     0.001093
5  3000893   355.498043
6  3000894     7.830936
7  3000895   715.918517
8  3000896   726.602182
9  3000897   136.533694

Last few predictions:
            id        sales
28502  3029390     9.912054
28503  3029391   476.483615
28504  3029392   526.104009
28505  3029393     5.311023
28506  3029394     5.982623
28507  3029395   411.933115
28508  3029396   148.108294
28509  3029397  2141.166371
28510  3029398   119.941219
28511  3029399    14.793278

Prediction statistics:
Min: 0.0000
Max: 11780.7429
Mean: 434.7197
Median: 28.2085


In [27]:
# Save submission file
submission_filename = f'../data/submission/traditional_submission_tuned_{timestamp}.csv'
submission_df.to_csv(submission_filename, index=False)

print(f"Submission file saved as: {submission_filename}")
print(f"File contains {len(submission_df)} predictions")

# Verify the submission file
print("\nVerifying submission file...")
verify_df = pd.read_csv(submission_filename)
print(f"Loaded file shape: {verify_df.shape}")
print(f"Columns: {list(verify_df.columns)}")
print(f"No missing values: {verify_df.isnull().sum().sum() == 0}")
print(f"All IDs unique: {len(verify_df['id'].unique()) == len(verify_df)}")

Submission file saved as: ../data/submission/traditional_submission_tuned_20250531_143548.csv
File contains 28512 predictions

Verifying submission file...
Loaded file shape: (28512, 2)
Columns: ['id', 'sales']
No missing values: True
All IDs unique: True
