In [None]:
%cd ..

In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns 
from sklearn.metrics import (
    mean_absolute_error, 
    mean_absolute_percentage_error, 
    root_mean_squared_error
)
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK

In [None]:
df = pd.read_csv('data/hp_tunning_df.csv', low_memory=False)
df['Date'] = pd.to_datetime(df['Date'])

In [None]:
cat_features = ['store', 'product']
features = ['day_of_month', 'day_of_week', 'month', 'y_lag_1', 'y_lag_2', 'y_lag_3', 'y_lag_4', 'y_lag_5', 'y_lag_6'] + cat_features
df[cat_features] = df[cat_features].astype('category')

## Prepare CV Folds (use 1 year of data in each valid test) and use 3 folds

In [None]:
from src.hp_tuning_helpers import *
from src.lgb_model_train import *

In [None]:
folds = create_folds(df, 3)

In [None]:
def objective(params):
    params['num_leaves'] = int(params['num_leaves'])
    params['max_depth'] = int(params['max_depth'])
    params['min_child_samples'] = int(params['min_child_samples'])
    boosting_type = params.pop('boosting_type')
    params['boosting_type'] = boosting_type
    num_boost_round = int(params.pop('num_boost_round'))
    model_class = LGBModelTrainer(params, 'month_deviation', features, num_boost_round)
    avg_mae = cross_val_metrics(model_class, folds, cv=3)
    return {'loss':avg_mae, 'status':STATUS_OK}


In [None]:
space = {
    'objective': 'regression',
    'metric': 'rmse',
    'boosting_type': hp.choice('boosting_type', ['gbdt', 'dart', 'goss']),
    'learning_rate': hp.loguniform('learning_rate', 0.01, 1),
    'num_leaves': hp.quniform('num_leaves', 20, 50, 1),
    'max_depth': hp.quniform('max_depth', 3, 50, 1),
    'min_child_samples': hp.quniform('min_child_samples', 3, 50, 1),
    # 'subsample': hp.uniform('subsample', 0.5, 1),
    # 'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
    # 'reg_alpha': hp.uniform('reg_alpha', 0.0, 1),
    # 'reg_lambda': hp.uniform('reg_lambda', 0.0, 1),
    "num_boost_round": hp.quniform("num_boost_round", 3000, 20000, 1000),
    'seed': "42",
}

In [None]:
trials = Trials()
best = fmin(
    fn=objective,
    space=space,
    algo = tpe.suggest,
    max_evals=5,
    trials=trials
)

In [None]:
best

# Manual GridSearch

In [None]:
from itertools import product
import numpy as np

param_grid = {
    'boosting_type': ['gbdt', 'dart', 'goss'],
    'learning_rate': np.linspace(0.01, 1.0, 21),
    'num_leaves': [x for x in range(20, 50)],
    'max_depth': [x for x in range(3, 50)],
    'min_child_samples': [x for x in range(3, 50)],
    'subsample': np.linspace(0.5, 1.0, 11),
    'colsample_bytree': np.linspace(0.5, 1.0, 11),
    "num_boost_round": [x for x in range(3000, 21000, 1000)],
    # 'reg_alpha': np.linspace(0.1, 1.0, 10),
    # 'reg_lambda': np.linspace(0.1, 1.0, 10),
}


# Generate combinations of parameters
param_combinations = list(product(
    param_grid['boosting_type'],
    param_grid['learning_rate'],
    param_grid['num_leaves'],
    param_grid['max_depth'],
    param_grid['min_child_samples'],
    param_grid['num_boost_round'],
    # param_grid['reg_alpha'],
    # param_grid['reg_lambda']
))


In [None]:
import lightgbm

best_mae = float("inf")
best_params = None

# Loop over all combinations
for bt, lr, nl, md, ms, nbr in param_combinations[:10]:
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type':bt,
        'learning_rate': lr,
        'num_leaves': nl,
        'max_depth': md,
        'min_child_samples':ms,
        # 'subsample': ss,
        # 'colsample_bytree': cb, 
        # 'reg_alpha':ra,
        # 'reg_lambda':rl,
        'verbosity': -1,
        'seed': 42
    }
    num_boost_round = nbr
    

    model_class = LGBModelTrainer(params, 'month_deviation', features, num_boost_round)
    avg_mae = cross_val_metrics(model_class, folds, cv=3)

    if avg_mae < best_mae:
        best_mae = avg_mae
        best_params = params
        best_num_boost_round = num_boost_round

print("\nBest MAE:", best_mae)
print("Best Params:", best_params)

In [None]:
best_params

In [None]:
model_class = LGBModelTrainer(best_params, 'month_deviation', features, best_num_boost_round)

In [None]:
model = model_class.train_model(folds[0][0])

In [None]:
data = folds[0][0].copy()
data['pred'] = model.predict(data[model_class.features])

In [None]:
mean_absolute_error(data[model_class.target], data['pred'])