In [1]:
import optuna
import pandas as pd
import numpy as np
import lightgbm as lgb
from tqdm import tqdm
from lightgbm import log_evaluation

import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def calculate_lags(df, col, lags, shift=0):
    for l in lags:
        df[f"lag_{col}_{l}"] = df.groupby(["ts_id"])[col].shift(shift + l).astype(np.float16)

def calculate_rollings(df, col, rollings, shift=0):
    for r in rollings:
        df[f"rol_mean_{col}_{r}"] = df.groupby(["ts_id"])[col].shift(shift + 1).rolling(r, min_periods=1).mean()
        df[f"rol_std_{col}_{r}"] = df.groupby(["ts_id"])[col].shift(shift + 1).rolling(r, min_periods=1).std()
        df[f"rol_min_{col}_{r}"] = df.groupby(["ts_id"])[col].shift(shift + 1).rolling(r, min_periods=1).min()
        df[f"rol_max_{col}_{r}"] = df.groupby(["ts_id"])[col].shift(shift + 1).rolling(r, min_periods=1).max()


from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples
from sklearn.utils.validation import _deprecate_positional_args

# https://github.com/getgaurav2/scikit-learn/blob/d4a3af5cc9da3a76f0266932644b884c99724c57/sklearn/model_selection/_split.py#L2243
class GroupTimeSeriesSplit(_BaseKFold):
    """Time Series cross-validator variant with non-overlapping groups.
    Provides train/test indices to split time series data samples
    that are observed at fixed time intervals according to a
    third-party provided group.
    In each split, test indices must be higher than before, and thus shuffling
    in cross validator is inappropriate.
    This cross-validation object is a variation of :class:`KFold`.
    In the kth split, it returns first k folds as train set and the
    (k+1)th fold as test set.
    The same group will not appear in two different folds (the number of
    distinct groups has to be at least equal to the number of folds).
    Note that unlike standard cross-validation methods, successive
    training sets are supersets of those that come before them.
    Read more in the :ref:`User Guide <cross_validation>`.
    Parameters
    ----------
    n_splits : int, default=5
        Number of splits. Must be at least 2.
    max_train_size : int, default=None
        Maximum size for a single training set.
    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import GroupTimeSeriesSplit
    >>> groups = np.array(['a', 'a', 'a', 'a', 'a', 'a',\
                           'b', 'b', 'b', 'b', 'b',\
                           'c', 'c', 'c', 'c',\
                           'd', 'd', 'd'])
    >>> gtss = GroupTimeSeriesSplit(n_splits=3)
    >>> for train_idx, test_idx in gtss.split(groups, groups=groups):
    ...     print("TRAIN:", train_idx, "TEST:", test_idx)
    ...     print("TRAIN GROUP:", groups[train_idx],\
                  "TEST GROUP:", groups[test_idx])
    TRAIN: [0, 1, 2, 3, 4, 5] TEST: [6, 7, 8, 9, 10]
    TRAIN GROUP: ['a' 'a' 'a' 'a' 'a' 'a']\
    TEST GROUP: ['b' 'b' 'b' 'b' 'b']
    TRAIN: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] TEST: [11, 12, 13, 14]
    TRAIN GROUP: ['a' 'a' 'a' 'a' 'a' 'a' 'b' 'b' 'b' 'b' 'b']\
    TEST GROUP: ['c' 'c' 'c' 'c']
    TRAIN: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]\
    TEST: [15, 16, 17]
    TRAIN GROUP: ['a' 'a' 'a' 'a' 'a' 'a' 'b' 'b' 'b' 'b' 'b' 'c' 'c' 'c' 'c']\
    TEST GROUP: ['d' 'd' 'd']
    """
    @_deprecate_positional_args
    def __init__(self,
                 n_splits=5,
                 *,
                 max_train_size=None
                 ):
        super().__init__(n_splits, shuffle=False, random_state=None)
        self.max_train_size = max_train_size

    def split(self, X, y=None, groups=None):
        """Generate indices to split data into training and test set.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.
        y : array-like of shape (n_samples,)
            Always ignored, exists for compatibility.
        groups : array-like of shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set.
        Yields
        ------
        train : ndarray
            The training set indices for that split.
        test : ndarray
            The testing set indices for that split.
        """
        if groups is None:
            raise ValueError(
                "The 'groups' parameter should not be None")
        X, y, groups = indexable(X, y, groups)
        n_samples = _num_samples(X)
        n_splits = self.n_splits
        n_folds = n_splits + 1
        group_dict = {}
        u, ind = np.unique(groups, return_index=True)
        unique_groups = u[np.argsort(ind)]
        n_samples = _num_samples(X)
        n_groups = _num_samples(unique_groups)
        for idx in np.arange(n_samples):
            if (groups[idx] in group_dict):
                group_dict[groups[idx]].append(idx)
            else:
                group_dict[groups[idx]] = [idx]
        if n_folds > n_groups:
            raise ValueError(
                ("Cannot have number of folds={0} greater than"
                 " the number of groups={1}").format(n_folds,
                                                     n_groups))
        group_test_size = n_groups // n_folds
        group_test_starts = range(n_groups - n_splits * group_test_size,
                                  n_groups, group_test_size)
        for group_test_start in group_test_starts:
            train_array = []
            test_array = []
            for train_group_idx in unique_groups[:group_test_start]:
                train_array_tmp = group_dict[train_group_idx]
                train_array = np.sort(np.unique(
                                      np.concatenate((train_array,
                                                      train_array_tmp)),
                                      axis=None), axis=None)
            train_end = train_array.size
            if self.max_train_size and self.max_train_size < train_end:
                train_array = train_array[train_end -
                                          self.max_train_size:train_end]
            for test_group_idx in unique_groups[group_test_start:
                                                group_test_start +
                                                group_test_size]:
                test_array_tmp = group_dict[test_group_idx]
                test_array = np.sort(np.unique(
                                              np.concatenate((test_array,
                                                              test_array_tmp)),
                                     axis=None), axis=None)
            yield [int(i) for i in train_array], [int(i) for i in test_array]

In [3]:
path = '/Users/idris/Documents/ds_project/forecast_store_sales/data/'
cutoff = '2017-07-31'

col_to_ignore = ["date", "is_future", "forecast_step", "ts_id"]

TARGET_COL = 'sales'

prediction_length = 16
lags_target=list(range(1, 16))
rollings_target=[2, 4, 8, 16]

In [4]:
df_cutoff = pd.read_csv(path + f'fe/cutoff/{cutoff}/final_frame.csv', sep=';').drop('transactions', axis=1)
df_cutoff['date'] = pd.to_datetime(df_cutoff['date'])
df_cutoff['is_future'] = df_cutoff['is_future'].fillna(False)
df_cutoff["time_idx"] = ((df_cutoff["date"] - df_cutoff["date"].min()).dt.days).astype(int)

df_cv = df_cutoff.loc[df_cutoff["is_future"] == False].reset_index(drop=True)

clean = pd.read_csv(path + f'trainclean.csv', sep=';')
clean['date'] = pd.to_datetime(clean['date'])

In [5]:
prediction_length = 16

lags_target=list(range(1, 16))
rollings_target=[2, 4, 8, 16]

In [6]:
def objective(trial):
     
    params = {
        "objective": "tweedie",
        "tweedie_variance_power": trial.suggest_float("tweedie_variance_power", 1.1, 1.6, step=0.1),
        "n_estimators": trial.suggest_categorical("n_estimators", [1000]),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "num_leaves": trial.suggest_int("num_leaves", 20, 3000, step=20),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 200, 10000, step=100),
        "lambda_l1": trial.suggest_int("lambda_l1", 0, 100, step=5),
        "lambda_l2": trial.suggest_int("lambda_l2", 0, 100, step=5),
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
        "bagging_fraction": trial.suggest_float(
            "bagging_fraction", 0.2, 0.95, step=0.1
        ),
        "bagging_freq": trial.suggest_categorical("bagging_freq", [1]),
        "feature_fraction": trial.suggest_float(
            "feature_fraction", 0.2, 0.95, step=0.1
        ),
        "random_state": 1990,
        "metric": "rmse",
        "verbose": -1
    }
    
    list_wape = []
    
    for train_idx, test_idx in tqdm(GroupTimeSeriesSplit().split(df_cv, groups=df_cv['time_idx'])):
    
        i_train = df_cv.loc[train_idx, 'time_idx'].unique()
        df_train = df_cv.loc[df_cv['time_idx'].isin(i_train)].drop(columns=col_to_ignore)
        i_test = df_cv.loc[test_idx, 'time_idx'].unique()[:16]
        df_test = df_cv.loc[df_cv['time_idx'].isin(i_test)].drop(columns=col_to_ignore).drop(columns=TARGET_COL) 

        min_train_idx = df_test['time_idx'].min() - prediction_length
        max_train_idx = df_test['time_idx'].max()
        cutoff_idx = np.arange(min_train_idx, max_train_idx + 1, 1)
        df_rec = df_cv.loc[df_cv['time_idx'].isin(cutoff_idx)]    

        df_rec.loc[df_rec['time_idx'].isin(i_test), 'is_future'] = True
        df_rec.loc[df_rec["is_future"] == True, "sales"] = np.nan
                
        print('id time to pred', i_test)
        
        train_ds = lgb.Dataset(
        data=df_train.drop(columns=TARGET_COL), 
        label=df_train[TARGET_COL]
        )

        gbm = lgb.train(
        params=params,
        train_set=train_ds,
        valid_sets=train_ds,
        callbacks=[log_evaluation(period=100)]
        )

        first_future_time_idx = i_test.min()
        df_rec.loc[:, "forecast_step"] = df_rec.loc[:, "time_idx"] - first_future_time_idx + 1 

        for fs in range(1, prediction_length + 1):

            # predict one step
            df_predict = df_rec.loc[df_rec["forecast_step"] == fs].drop(columns=col_to_ignore + [TARGET_COL])
            predictions = np.clip(gbm.predict(df_predict), a_min=0, a_max=None)

            # update sales_quantity usings predictions
            df_rec.loc[df_rec["forecast_step"] == fs, "sales"] = predictions

            # update target lags & rollings
            calculate_lags(df_rec, TARGET_COL, lags_target)
            calculate_rollings(df_rec, TARGET_COL, rollings_target)

        df_forecast = df_rec.loc[df_rec["is_future"] == True, ["ts_id", "date", "sales"]]
        df_forecast["forecast"] = df_forecast["sales"].round(0).astype(np.float16)
        df_forecast.drop(columns="sales", inplace=True)

        df_error = pd.merge(df_forecast, clean[['ts_id', 'date', 'sales']], how="left", on=['ts_id', 'date'])
        wape = np.round(np.sum(np.abs(df_error["sales"] - df_error["forecast"])) / np.sum(df_error["sales"]), 3)
        print('wape: ', wape)
        list_wape.append(wape)
    
    mean_wape = np.mean(list_wape)
    
    return mean_wape

In [7]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2022-05-21 19:59:02,193][0m A new study created in memory with name: no-name-429f9755-890f-4da9-8507-1e4ca235bc20[0m
0it [00:00, ?it/s]

id time to pred [283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298]
[100]	training's rmse: 199.458
[200]	training's rmse: 179.281
[300]	training's rmse: 170.337
[400]	training's rmse: 164.152
[500]	training's rmse: 160.042
[600]	training's rmse: 156.07
[700]	training's rmse: 152.984
[800]	training's rmse: 150.567
[900]	training's rmse: 148.103
[1000]	training's rmse: 146.087


1it [00:40, 40.18s/it]

wape:  0.137
id time to pred [561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576]
[100]	training's rmse: 214.817
[200]	training's rmse: 192.429
[300]	training's rmse: 180.217
[400]	training's rmse: 171.586
[500]	training's rmse: 165.748
[600]	training's rmse: 160.597
[700]	training's rmse: 156.304
[800]	training's rmse: 153.056
[900]	training's rmse: 150.033
[1000]	training's rmse: 147.087


2it [01:44, 54.37s/it]

wape:  0.29
id time to pred [839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854]
[100]	training's rmse: 221.361
[200]	training's rmse: 198.773
[300]	training's rmse: 188.027
[400]	training's rmse: 180.806
[500]	training's rmse: 175.302
[600]	training's rmse: 170.78
[700]	training's rmse: 167.089
[800]	training's rmse: 163.373
[900]	training's rmse: 160.306
[1000]	training's rmse: 157.529


3it [03:26, 76.05s/it]

wape:  0.259
id time to pred [1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
 1131 1132]
[100]	training's rmse: 226.243
[200]	training's rmse: 207.084
[300]	training's rmse: 196.152
[400]	training's rmse: 190.082
[500]	training's rmse: 184.892
[600]	training's rmse: 180.83
[700]	training's rmse: 177.563
[800]	training's rmse: 174.094
[900]	training's rmse: 171.011
[1000]	training's rmse: 167.765


4it [06:00, 106.77s/it]

wape:  0.165
id time to pred [1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408
 1409 1410]
[100]	training's rmse: 266.887
[200]	training's rmse: 250.644
[300]	training's rmse: 240.88
[400]	training's rmse: 233.838
[500]	training's rmse: 228.373
[600]	training's rmse: 222.409
[700]	training's rmse: 216.967
[800]	training's rmse: 212.244
[900]	training's rmse: 208.055
[1000]	training's rmse: 202.952


5it [09:24, 112.86s/it]
[32m[I 2022-05-21 20:08:26,521][0m Trial 0 finished with value: 0.209 and parameters: {'tweedie_variance_power': 1.1, 'n_estimators': 1000, 'learning_rate': 0.1853560761229214, 'num_leaves': 2660, 'max_depth': 7, 'min_data_in_leaf': 5500, 'lambda_l1': 60, 'lambda_l2': 60, 'min_gain_to_split': 10.691292879149728, 'bagging_fraction': 0.7, 'bagging_freq': 1, 'feature_fraction': 0.6000000000000001}. Best is trial 0 with value: 0.209.[0m


wape:  0.194


0it [00:00, ?it/s]

id time to pred [283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298]
[100]	training's rmse: 265.496
[200]	training's rmse: 248.144
[300]	training's rmse: 236.546
[400]	training's rmse: 233.714
[500]	training's rmse: 223.809
[600]	training's rmse: 223.401
[700]	training's rmse: 222.881
[800]	training's rmse: 222.825
[900]	training's rmse: 222.763
[1000]	training's rmse: 222.74


1it [00:33, 33.65s/it]

wape:  0.149
id time to pred [561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576]
[100]	training's rmse: 308.785
[200]	training's rmse: 278.798
[300]	training's rmse: 261.754
[400]	training's rmse: 255.608
[500]	training's rmse: 248.206
[600]	training's rmse: 243.288
[700]	training's rmse: 238.23
[800]	training's rmse: 233.625
[900]	training's rmse: 230.153
[1000]	training's rmse: 227.657


2it [01:23, 43.42s/it]

wape:  inf
id time to pred [839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854]
[100]	training's rmse: 298.565
[200]	training's rmse: 274.992
[300]	training's rmse: 263.767
[400]	training's rmse: 255.043
[500]	training's rmse: 246.583
[600]	training's rmse: 242.298
[700]	training's rmse: 239.511
[800]	training's rmse: 235.593
[900]	training's rmse: 232.427
[1000]	training's rmse: 229.017


3it [03:01, 68.03s/it]

wape:  0.273
id time to pred [1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
 1131 1132]
[100]	training's rmse: 310.047
[200]	training's rmse: 287.893
[300]	training's rmse: 276.957
[400]	training's rmse: 271.43
[500]	training's rmse: 262.817
[600]	training's rmse: 259.32
[700]	training's rmse: 254.166
[800]	training's rmse: 250.685
[900]	training's rmse: 247.954
[1000]	training's rmse: 244.431


4it [05:18, 95.20s/it]

wape:  0.24
id time to pred [1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408
 1409 1410]


IOStream.flush timed out


[100]	training's rmse: 332.742
[200]	training's rmse: 314.105
[300]	training's rmse: 304.213
[400]	training's rmse: 296.805
[500]	training's rmse: 291.558
[600]	training's rmse: 286.9
[700]	training's rmse: 284.007
[800]	training's rmse: 281.287
[900]	training's rmse: 278.989
[1000]	training's rmse: 276.286


5it [08:20, 100.05s/it]
[32m[I 2022-05-21 20:16:46,764][0m Trial 1 finished with value: inf and parameters: {'tweedie_variance_power': 1.6, 'n_estimators': 1000, 'learning_rate': 0.2376438294698057, 'num_leaves': 3000, 'max_depth': 3, 'min_data_in_leaf': 7700, 'lambda_l1': 5, 'lambda_l2': 35, 'min_gain_to_split': 8.412714304092384, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 0 with value: 0.209.[0m


wape:  0.195


0it [00:00, ?it/s]

id time to pred [283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298]
[100]	training's rmse: 159.293
[200]	training's rmse: 149.599
[300]	training's rmse: 145.908
[400]	training's rmse: 143.746
[500]	training's rmse: 142.431
[600]	training's rmse: 141.274
[700]	training's rmse: 140.642
[800]	training's rmse: 139.904
[900]	training's rmse: 139.251
[1000]	training's rmse: 138.993


1it [00:35, 35.61s/it]

wape:  0.135
id time to pred [561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576]
[100]	training's rmse: 178.182
[200]	training's rmse: 159.705
[300]	training's rmse: 150.8
[400]	training's rmse: 145.043
[500]	training's rmse: 140.908
[600]	training's rmse: 138.342
[700]	training's rmse: 136.435
[800]	training's rmse: 134.88
[900]	training's rmse: 133.579
[1000]	training's rmse: 132.518


2it [01:36, 50.32s/it]

wape:  0.176
id time to pred [839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854]
[100]	training's rmse: 184.493
[200]	training's rmse: 171.005
[300]	training's rmse: 160.785
[400]	training's rmse: 154.215
[500]	training's rmse: 149.195
[600]	training's rmse: 145.776
[700]	training's rmse: 143.082
[800]	training's rmse: 141.014
[900]	training's rmse: 139.136
[1000]	training's rmse: 138.147


3it [03:16, 73.35s/it]

wape:  0.143
id time to pred [1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
 1131 1132]
[100]	training's rmse: 192.96
[200]	training's rmse: 179.105
[300]	training's rmse: 170.753
[400]	training's rmse: 164.57
[500]	training's rmse: 159.163
[600]	training's rmse: 154.916
[700]	training's rmse: 151.884
[800]	training's rmse: 149.615
[900]	training's rmse: 147.564
[1000]	training's rmse: 145.739


4it [05:50, 105.12s/it]

wape:  0.172
id time to pred [1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408
 1409 1410]
[100]	training's rmse: 234.355
[200]	training's rmse: 219.355
[300]	training's rmse: 207.98
[400]	training's rmse: 196.677
[500]	training's rmse: 188.833
[600]	training's rmse: 179.977
[700]	training's rmse: 173.527
[800]	training's rmse: 169.701
[900]	training's rmse: 165.18
[1000]	training's rmse: 162.489


5it [09:17, 111.59s/it]
[32m[I 2022-05-21 20:26:04,711][0m Trial 2 finished with value: 0.16299999999999998 and parameters: {'tweedie_variance_power': 1.3, 'n_estimators': 1000, 'learning_rate': 0.28248300281123323, 'num_leaves': 2820, 'max_depth': 10, 'min_data_in_leaf': 2100, 'lambda_l1': 15, 'lambda_l2': 40, 'min_gain_to_split': 6.846719835661776, 'bagging_fraction': 0.8, 'bagging_freq': 1, 'feature_fraction': 0.4}. Best is trial 2 with value: 0.16299999999999998.[0m


wape:  0.189


0it [00:00, ?it/s]

id time to pred [283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298]
[100]	training's rmse: 242.105
[200]	training's rmse: 203.125
[300]	training's rmse: 186.835
[400]	training's rmse: 178.043
[500]	training's rmse: 172.997
[600]	training's rmse: 168.61
[700]	training's rmse: 165.059
[800]	training's rmse: 162.125
[900]	training's rmse: 159.305
[1000]	training's rmse: 157.081


1it [00:36, 36.37s/it]

wape:  0.152
id time to pred [561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576]
[100]	training's rmse: 282.726
[200]	training's rmse: 241.921
[300]	training's rmse: 223.128
[400]	training's rmse: 211.247
[500]	training's rmse: 202.935
[600]	training's rmse: 196.664
[700]	training's rmse: 191.61
[800]	training's rmse: 187.247
[900]	training's rmse: 183.929
[1000]	training's rmse: 180.76


2it [01:34, 49.05s/it]

wape:  0.254
id time to pred [839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854]
[100]	training's rmse: 291.909
[200]	training's rmse: 250.451
[300]	training's rmse: 234.934
[400]	training's rmse: 224.983
[500]	training's rmse: 217.02
[600]	training's rmse: 210.927
[700]	training's rmse: 206.475
[800]	training's rmse: 202.273
[900]	training's rmse: 198.669
[1000]	training's rmse: 195.552


3it [03:11, 71.08s/it]

wape:  0.21
id time to pred [1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
 1131 1132]
[100]	training's rmse: 299.624
[200]	training's rmse: 256.367
[300]	training's rmse: 241.194
[400]	training's rmse: 232.527
[500]	training's rmse: 225.727
[600]	training's rmse: 220.113


3it [05:01, 100.54s/it]


KeyboardInterrupt: 

In [None]:
print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
 Trial 0 finished with value: 0.14940000000000003 and parameters: 

{
    'tweedie_variance_power': 1.4000000000000001,
    'n_estimators': 500,
    'learning_rate': 0.067307721852165,
    'num_leaves': 300,
    'max_depth': 10,
    'min_data_in_leaf': 1300,
    'lambda_l1': 100,
    'lambda_l2': 80,
    'min_gain_to_split': 9.72024574054834,
    'bagging_fraction': 0.8,
    'bagging_freq': 1,
    'feature_fraction': 0.6000000000000001
}

. Best is trial 0 with value: 0.14940000000000003.

In [None]:
Trial 7 finished with value: 0.156 and parameters: {'tweedie_variance_power': 1.3, 'n_estimators': 500, 'learning_rate': 0.047505552909073004, 'num_leaves': 2780, 'max_depth': 9, 'min_data_in_leaf': 4400, 'lambda_l1': 65, 'lambda_l2': 40, 'min_gain_to_split': 9.020620157726453, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.9}. Best is trial 7 with value: 0.156.

In [None]:
Trial 0 finished with value: 0.15299999999999997 and parameters: {'tweedie_variance_power': 1.4000000000000001, 'n_estimators': 500, 'learning_rate': 0.06908883769088184, 'num_leaves': 600, 'max_depth': 10, 'min_data_in_leaf': 400, 'lambda_l1': 5, 'lambda_l2': 50, 'min_gain_to_split': 3.4052603539384463, 'bagging_fraction': 0.5, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with value: 0.15299999999999997.

In [25]:
df_cutoff[df_cutoff['ts_id'] == '9_28'].sort_values('date', ascending=False).head(20)

Unnamed: 0,ts_id,date,id,bu,famid,sales,onprom,lag_sales_1,lag_sales_2,lag_sales_3,...,hol_before_1,hol_before_2,hol_before_3,hol_after_1,prixoil,isclosed,typeid,cityid,stateid,cluster
2444481,9_28,2017-08-15,3000883,9,28,377.118167,0,379.25,527.5,448.0,...,0,0,0,1,47.57,0,1,0,0,6
2444480,9_28,2017-08-14,2999101,9,28,379.155766,0,527.5,448.0,467.0,...,0,0,0,1,47.59,0,1,0,0,6
2444479,9_28,2017-08-13,2997319,9,28,527.73125,0,448.0,467.0,341.0,...,0,0,1,0,48.81,0,1,0,0,6
2444478,9_28,2017-08-12,2995537,9,28,447.991523,0,467.0,341.0,327.0,...,0,1,0,0,48.81,0,1,0,0,6
2444477,9_28,2017-08-11,2993755,9,28,467.10187,22,341.0,327.0,358.25,...,1,0,0,0,48.81,0,1,0,0,6
2444476,9_28,2017-08-10,2991973,9,28,341.112831,0,327.0,358.25,375.25,...,0,0,0,0,48.54,0,1,0,0,6
2444475,9_28,2017-08-09,2990191,9,28,327.100216,1,358.25,375.25,614.5,...,0,0,0,1,49.59,0,1,0,0,6
2444474,9_28,2017-08-08,2988409,9,28,358.32902,0,375.25,614.5,497.0,...,0,0,1,0,49.07,0,1,0,0,6
2444473,9_28,2017-08-07,2986627,9,28,375.205631,0,614.5,497.0,544.0,...,0,1,0,0,49.37,0,1,0,0,6
2444472,9_28,2017-08-06,2984845,9,28,614.312139,0,497.0,544.0,364.5,...,1,0,0,0,49.57,0,1,0,0,6


In [36]:
prediction_length = 8
lags_target=list(range(1, 16))
rollings_target=[2, 4, 8, 16]

In [33]:
def objective(trial):

    def calculate_lags(df, col, lags, shift=0):
        for l in lags:
            df[f"lag_{col}_{l}"] = df.groupby(["ts_id"])[col].shift(shift + l).astype(np.float16)

    def calculate_rollings(df, col, rollings, shift=0):
        for r in rollings:
            df[f"rol_mean_{col}_{r}"] = df.groupby(["ts_id"])[col].shift(shift + 1).rolling(r, min_periods=1).mean()
            df[f"rol_std_{col}_{r}"] = df.groupby(["ts_id"])[col].shift(shift + 1).rolling(r, min_periods=1).std()
            df[f"rol_min_{col}_{r}"] = df.groupby(["ts_id"])[col].shift(shift + 1).rolling(r, min_periods=1).min()
            df[f"rol_max_{col}_{r}"] = df.groupby(["ts_id"])[col].shift(shift + 1).rolling(r, min_periods=1).max()


    params = {
        "boosting_type": "gbdt", 
        "num_leaves": trial.suggest_int("num_leaves", 4, 10),
        "max_depth": -1, 
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-3, 1), 
        "n_estimators": 500, #100
        "subsample_for_bin": 200000, 
        "objective": trial.suggest_categorical("objective", ["regression", "tweedie"]),
        "class_weight": None, 
        "min_split_gain": 0.0,
        "min_child_weight": 0.001, 
        "min_child_samples": trial.suggest_int("min_child_samples", 10, 100, step=10),
        "subsample": trial.suggest_float("subsample", 0.1, 1.0, step=0.1),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.4, 1.0, step=0.1),
        "reg_alpha": trial.suggest_float("reg_alpha", 0, 1.0, step=0.1),
        "reg_lambda": trial.suggest_float("reg_lambda", 0, 1.0, step=0.1),
        "random_state": 666,
        "metric": "rmse"
    }
    
    params["num_leaves"] = 2**params["num_leaves"]-1
    
    if params["objective"] == "tweedie":
        params["tweedie_variance_power"] = trial.suggest_float("tweedie_variance_power", 1.0, 1.9, step=0.1)
    
    if params["subsample"] < 1.0:
        params["subsample_freq"] = trial.suggest_int("subsample_freq", 1, 7)
    


    train_ds = lgb.Dataset(
        data=df_train.drop(columns=TARGET_COL), 
        label=df_train[TARGET_COL]
    )

    print("Fit")
    gbm = lgb.train(
        params=params,
        train_set=train_ds,
        valid_sets=train_ds
        )

    df_cutoff.loc[df_cutoff["is_future"] == True, "sales"] = np.nan

    for fs in range(1, prediction_length + 1):
        print(fs)

        # predict one step
        df_predict = df_cutoff.loc[df_cutoff["forecast_step"] == fs].drop(columns=col_to_ignore + [TARGET_COL])
        predictions = np.clip(gbm.predict(df_predict), a_min=0, a_max=None)

        # update sales_quantity usings predictions
        df_cutoff.loc[df_cutoff["forecast_step"] == fs, "sales"] = predictions

        # update target lags & rollings
        calculate_lags(df_cutoff, TARGET_COL, lags_target)
        calculate_rollings(df_cutoff, TARGET_COL, rollings_target)
    
    print("Format")
    df_forecast = df_cutoff.loc[df_cutoff["is_future"] == True, ["ts_id", "date", "sales"]]
    df_forecast["forecast"] = df_forecast["sales"].round(0).astype(np.float16)
    df_forecast.drop(columns="sales", inplace=True)
    print("Log metrics")
    df_error = pd.merge(df_forecast, clean[['ts_id', 'date', 'sales']], how="left", on=['ts_id', 'date'])
    wape = np.round(np.sum(np.abs(df_error["sales"] - df_error["forecast"])) / np.sum(df_error["sales"]), 4)
    
    return wape

In [37]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20)

print("Number of finished trials: {}".format(len(study.trials)))

print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[32m[I 2022-05-14 20:33:22,518][0m A new study created in memory with name: no-name-3593a23c-3b2f-4436-ba0e-ae10dd20e2d4[0m
  in terms of current number of round (e.g. yields learning rate decay).


Fit
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 6.111432
[1]	training's rmse: 1187.75
[2]	training's rmse: 1169.07
[3]	training's rmse: 1156.22
[4]	training's rmse: 1144.68
[5]	training's rmse: 1133.28
[6]	training's rmse: 1121.62
[7]	training's rmse: 1109.61
[8]	training's rmse: 1097.05
[9]	training's rmse: 1084.02
[10]	training's rmse: 1070.5
[11]	training's rmse: 1056.41
[12]	training's rmse: 1041.96
[13]	training's rmse: 1026.99
[14]	training's rmse: 1011.46
[15]	training's rmse: 995.469
[16]	training's rmse: 979.022
[17]	training's rmse: 962.064
[18]	training's rmse: 944.743
[19]	training's rmse: 927.005
[20]	training's rmse: 908.879
[21]	training's rmse: 890.5
[22]	training's rmse: 871.838
[23]	training's rmse: 852.772
[24]	trainin

[32m[I 2022-05-14 20:36:14,107][0m Trial 0 finished with value: 0.0778 and parameters: {'num_leaves': 6, 'learning_rate': 0.07998230745890325, 'objective': 'tweedie', 'min_child_samples': 30, 'subsample': 0.8, 'colsample_bytree': 0.5, 'reg_alpha': 0.2, 'reg_lambda': 0.30000000000000004, 'tweedie_variance_power': 1.9, 'subsample_freq': 4}. Best is trial 0 with value: 0.0778.[0m
  in terms of current number of round (e.g. yields learning rate decay).


Fit
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 450.984147
[1]	training's rmse: 1147.2
[2]	training's rmse: 1069.91
[3]	training's rmse: 998.149
[4]	training's rmse: 931.649
[5]	training's rmse: 870.393
[6]	training's rmse: 814.228
[7]	training's rmse: 762.883
[8]	training's rmse: 715.519
[9]	training's rmse: 672.213
[10]	training's rmse: 632.737
[11]	training's rmse: 596.649
[12]	training's rmse: 563.454
[13]	training's rmse: 533.283
[14]	training's rmse: 506.147
[15]	training's rmse: 480.663
[16]	training's rmse: 458.138
[17]	training's rmse: 437.556
[18]	training's rmse: 419.321
[19]	training's rmse: 402.492
[20]	training's rmse: 387.507
[21]	training's rmse: 373.868
[22]	training's rmse: 361.466
[23]	training's rmse: 350.553
[24]	tra

[32m[I 2022-05-14 20:38:57,857][0m Trial 1 finished with value: 0.0746 and parameters: {'num_leaves': 6, 'learning_rate': 0.07513117103691569, 'objective': 'regression', 'min_child_samples': 10, 'subsample': 1.0, 'colsample_bytree': 0.5, 'reg_alpha': 0.0, 'reg_lambda': 0.1}. Best is trial 1 with value: 0.0746.[0m
  in terms of current number of round (e.g. yields learning rate decay).


Fit
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 6.111432
[1]	training's rmse: 1211.22
[2]	training's rmse: 1190.17
[3]	training's rmse: 1168.18
[4]	training's rmse: 1145.42
[5]	training's rmse: 1121.93
[6]	training's rmse: 1098.04
[7]	training's rmse: 1073.92
[8]	training's rmse: 1049.59
[9]	training's rmse: 1025.27
[10]	training's rmse: 1001.11
[11]	training's rmse: 977.141
[12]	training's rmse: 953.244
[13]	training's rmse: 929.739
[14]	training's rmse: 906.651
[15]	training's rmse: 884.006
[16]	training's rmse: 861.803
[17]	training's rmse: 840.028
[18]	training's rmse: 818.803
[19]	training's rmse: 798.067
[20]	training's rmse: 777.874
[21]	training's rmse: 758.122
[22]	training's rmse: 738.905
[23]	training's rmse: 720.156
[24]	trai

[32m[I 2022-05-14 20:41:54,707][0m Trial 2 finished with value: 0.0691 and parameters: {'num_leaves': 6, 'learning_rate': 0.03309337787385885, 'objective': 'tweedie', 'min_child_samples': 20, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.4, 'reg_alpha': 0.30000000000000004, 'reg_lambda': 0.1, 'tweedie_variance_power': 1.1, 'subsample_freq': 6}. Best is trial 2 with value: 0.0691.[0m
  in terms of current number of round (e.g. yields learning rate decay).


Fit
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 450.984147
[1]	training's rmse: 1084.95
[2]	training's rmse: 959.03
[3]	training's rmse: 849.829
[4]	training's rmse: 755.449
[5]	training's rmse: 674.362
[6]	training's rmse: 605.744
[7]	training's rmse: 547.91
[8]	training's rmse: 498.949
[9]	training's rmse: 457.826
[10]	training's rmse: 423.027
[11]	training's rmse: 394.592
[12]	training's rmse: 371.224
[13]	training's rmse: 351.962
[14]	training's rmse: 336.63
[15]	training's rmse: 322.736
[16]	training's rmse: 311.854
[17]	training's rmse: 303.021
[18]	training's rmse: 295.628
[19]	training's rmse: 289.198
[20]	training's rmse: 283.784
[21]	training's rmse: 279.277
[22]	training's rmse: 275.856
[23]	training's rmse: 272.714
[24]	train

[32m[I 2022-05-14 21:00:40,475][0m Trial 3 finished with value: 0.0791 and parameters: {'num_leaves': 6, 'learning_rate': 0.1314115230393358, 'objective': 'regression', 'min_child_samples': 10, 'subsample': 0.4, 'colsample_bytree': 0.5, 'reg_alpha': 0.30000000000000004, 'reg_lambda': 0.8, 'subsample_freq': 6}. Best is trial 2 with value: 0.0691.[0m
  in terms of current number of round (e.g. yields learning rate decay).


Fit
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 6.111432
[1]	training's rmse: 1056.25
[2]	training's rmse: 860.359
[3]	training's rmse: 620.561
[4]	training's rmse: 418.721
[5]	training's rmse: 313.95
[6]	training's rmse: 288.784
[7]	training's rmse: 279.551
[8]	training's rmse: 278.071
[9]	training's rmse: 285.222
[10]	training's rmse: 321.186
[11]	training's rmse: 294.88
[12]	training's rmse: 339.811
[13]	training's rmse: 574.886
[14]	training's rmse: 1291.75
[15]	training's rmse: 780.13
[16]	training's rmse: 833.094
[17]	training's rmse: 1868.54
[18]	training's rmse: 4520.73
[19]	training's rmse: 8131.06
[20]	training's rmse: 5029.03
[21]	training's rmse: 9471.49
[22]	training's rmse: 23609.8
[23]	training's rmse: 5408.37
[24]	trainin

[32m[I 2022-05-14 21:54:05,997][0m Trial 4 finished with value: inf and parameters: {'num_leaves': 10, 'learning_rate': 0.7318637628717782, 'objective': 'tweedie', 'min_child_samples': 50, 'subsample': 0.5, 'colsample_bytree': 0.8, 'reg_alpha': 0.30000000000000004, 'reg_lambda': 1.0, 'tweedie_variance_power': 1.8, 'subsample_freq': 6}. Best is trial 2 with value: 0.0691.[0m
  in terms of current number of round (e.g. yields learning rate decay).


Fit
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 450.984147
[1]	training's rmse: 1222.23
[2]	training's rmse: 1213.33
[3]	training's rmse: 1204.45
[4]	training's rmse: 1195.59
[5]	training's rmse: 1186.78
[6]	training's rmse: 1178.05
[7]	training's rmse: 1169.47
[8]	training's rmse: 1160.87
[9]	training's rmse: 1152.45
[10]	training's rmse: 1144.08
[11]	training's rmse: 1135.82
[12]	training's rmse: 1127.53
[13]	training's rmse: 1119.34
[14]	training's rmse: 1111.27
[15]	training's rmse: 1103.23
[16]	training's rmse: 1095.28
[17]	training's rmse: 1087.36
[18]	training's rmse: 1079.54
[19]	training's rmse: 1071.76
[20]	training's rmse: 1064.07
[21]	training's rmse: 1056.47
[22]	training's rmse: 1048.92
[23]	training's rmse: 1041.38
[24]	tr

[32m[I 2022-05-14 22:12:50,260][0m Trial 5 finished with value: 0.077 and parameters: {'num_leaves': 7, 'learning_rate': 0.008017165747612583, 'objective': 'regression', 'min_child_samples': 100, 'subsample': 0.30000000000000004, 'colsample_bytree': 0.5, 'reg_alpha': 0.8, 'reg_lambda': 1.0, 'subsample_freq': 4}. Best is trial 2 with value: 0.0691.[0m
  in terms of current number of round (e.g. yields learning rate decay).


Fit
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 450.984147
[1]	training's rmse: 676.246
[2]	training's rmse: 418.861
[3]	training's rmse: 304.944
[4]	training's rmse: 260.844
[5]	training's rmse: 241.596
[6]	training's rmse: 230.907
[7]	training's rmse: 223.615
[8]	training's rmse: 217.184
[9]	training's rmse: 211.371
[10]	training's rmse: 205.691
[11]	training's rmse: 201.137
[12]	training's rmse: 196.757
[13]	training's rmse: 192.806
[14]	training's rmse: 189.486
[15]	training's rmse: 186.255
[16]	training's rmse: 183.202
[17]	training's rmse: 180.332
[18]	training's rmse: 177.256
[19]	training's rmse: 174.362
[20]	training's rmse: 171.939
[21]	training's rmse: 169.294
[22]	training's rmse: 166.151
[23]	training's rmse: 163.565
[24]	tr

[32m[I 2022-05-14 22:17:14,604][0m Trial 6 finished with value: 0.0924 and parameters: {'num_leaves': 10, 'learning_rate': 0.49842369619246313, 'objective': 'regression', 'min_child_samples': 30, 'subsample': 0.9, 'colsample_bytree': 0.6000000000000001, 'reg_alpha': 0.9, 'reg_lambda': 0.8, 'subsample_freq': 7}. Best is trial 2 with value: 0.0691.[0m
  in terms of current number of round (e.g. yields learning rate decay).


Fit
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 450.984147
[1]	training's rmse: 1227.88
[2]	training's rmse: 1224.51
[3]	training's rmse: 1221.15
[4]	training's rmse: 1217.8
[5]	training's rmse: 1214.45
[6]	training's rmse: 1211.11
[7]	training's rmse: 1207.79
[8]	training's rmse: 1204.48
[9]	training's rmse: 1201.2
[10]	training's rmse: 1197.93
[11]	training's rmse: 1194.66
[12]	training's rmse: 1191.39
[13]	training's rmse: 1188.13
[14]	training's rmse: 1184.87
[15]	training's rmse: 1181.65
[16]	training's rmse: 1178.46
[17]	training's rmse: 1175.23
[18]	training's rmse: 1172.04
[19]	training's rmse: 1168.84
[20]	training's rmse: 1165.67
[21]	training's rmse: 1162.49
[22]	training's rmse: 1159.33
[23]	training's rmse: 1156.18
[24]	trai

[32m[I 2022-05-14 22:21:22,250][0m Trial 7 finished with value: 0.2052 and parameters: {'num_leaves': 10, 'learning_rate': 0.0029579830262834493, 'objective': 'regression', 'min_child_samples': 20, 'subsample': 0.1, 'colsample_bytree': 0.6000000000000001, 'reg_alpha': 0.0, 'reg_lambda': 0.30000000000000004, 'subsample_freq': 7}. Best is trial 2 with value: 0.0691.[0m
  in terms of current number of round (e.g. yields learning rate decay).


Fit
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 450.984147
[1]	training's rmse: 1172.25
[2]	training's rmse: 1115.99
[3]	training's rmse: 1062.93
[4]	training's rmse: 1012.75
[5]	training's rmse: 965.391
[6]	training's rmse: 920.8
[7]	training's rmse: 878.896
[8]	training's rmse: 839.027
[9]	training's rmse: 801.714
[10]	training's rmse: 766.572
[11]	training's rmse: 733.469
[12]	training's rmse: 702.049
[13]	training's rmse: 672.433
[14]	training's rmse: 644.601
[15]	training's rmse: 618.484
[16]	training's rmse: 594.061
[17]	training's rmse: 570.968
[18]	training's rmse: 549.577
[19]	training's rmse: 529.432
[20]	training's rmse: 510.56
[21]	training's rmse: 492.81
[22]	training's rmse: 476.333
[23]	training's rmse: 460.892
[24]	traini

[32m[I 2022-05-14 22:39:59,043][0m Trial 8 finished with value: 0.0757 and parameters: {'num_leaves': 7, 'learning_rate': 0.05228033363770627, 'objective': 'regression', 'min_child_samples': 100, 'subsample': 0.6, 'colsample_bytree': 0.7000000000000001, 'reg_alpha': 0.5, 'reg_lambda': 0.7000000000000001, 'subsample_freq': 6}. Best is trial 2 with value: 0.0691.[0m
  in terms of current number of round (e.g. yields learning rate decay).


Fit
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 450.984147
[1]	training's rmse: 1227.03
[2]	training's rmse: 1222.84
[3]	training's rmse: 1218.66
[4]	training's rmse: 1214.5
[5]	training's rmse: 1210.35
[6]	training's rmse: 1206.22
[7]	training's rmse: 1202.1
[8]	training's rmse: 1197.99
[9]	training's rmse: 1193.89
[10]	training's rmse: 1189.81
[11]	training's rmse: 1185.75
[12]	training's rmse: 1181.7
[13]	training's rmse: 1177.67
[14]	training's rmse: 1173.65
[15]	training's rmse: 1169.66
[16]	training's rmse: 1165.7
[17]	training's rmse: 1161.74
[18]	training's rmse: 1157.81
[19]	training's rmse: 1153.88
[20]	training's rmse: 1149.98
[21]	training's rmse: 1146.09
[22]	training's rmse: 1142.2
[23]	training's rmse: 1138.34
[24]	trainin

[32m[I 2022-05-14 22:43:32,219][0m Trial 9 finished with value: 0.1479 and parameters: {'num_leaves': 8, 'learning_rate': 0.0036911256391123997, 'objective': 'regression', 'min_child_samples': 70, 'subsample': 0.30000000000000004, 'colsample_bytree': 1.0, 'reg_alpha': 0.6000000000000001, 'reg_lambda': 0.5, 'subsample_freq': 7}. Best is trial 2 with value: 0.0691.[0m


Fit


  in terms of current number of round (e.g. yields learning rate decay).


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 6.111432
[1]	training's rmse: 1212.4
[2]	training's rmse: 1194.41
[3]	training's rmse: 1176.89
[4]	training's rmse: 1159.72
[5]	training's rmse: 1142.7
[6]	training's rmse: 1126.15
[7]	training's rmse: 1109.91
[8]	training's rmse: 1093.95
[9]	training's rmse: 1078.22
[10]	training's rmse: 1062.9
[11]	training's rmse: 1047.97
[12]	training's rmse: 1033.13
[13]	training's rmse: 1018.75
[14]	training's rmse: 1004.52
[15]	training's rmse: 990.545
[16]	training's rmse: 976.884
[17]	training's rmse: 963.41
[18]	training's rmse: 950.203
[19]	training's rmse: 937.247
[20]	training's rmse: 924.446
[21]	training's rmse: 911.895
[22]	training's rmse: 899.655
[23]	training's rmse: 887.456
[24]	training's r

[32m[I 2022-05-14 23:03:10,257][0m Trial 10 finished with value: 0.0786 and parameters: {'num_leaves': 4, 'learning_rate': 0.015591955454367486, 'objective': 'tweedie', 'min_child_samples': 60, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.4, 'reg_alpha': 0.6000000000000001, 'reg_lambda': 0.0, 'tweedie_variance_power': 1.0, 'subsample_freq': 1}. Best is trial 2 with value: 0.0691.[0m


Fit


  in terms of current number of round (e.g. yields learning rate decay).


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 6.111432
[1]	training's rmse: 1072.06
[2]	training's rmse: 910.38
[3]	training's rmse: 773.679
[4]	training's rmse: 663.02
[5]	training's rmse: 574.221
[6]	training's rmse: 507.235
[7]	training's rmse: 458.292
[8]	training's rmse: 421.242
[9]	training's rmse: 393.931
[10]	training's rmse: 374.228
[11]	training's rmse: 360.613
[12]	training's rmse: 347.367
[13]	training's rmse: 339.263
[14]	training's rmse: 333.552
[15]	training's rmse: 329.506
[16]	training's rmse: 326.956
[17]	training's rmse: 324.561
[18]	training's rmse: 321.895
[19]	training's rmse: 320.245
[20]	training's rmse: 318.883
[21]	training's rmse: 317.12
[22]	training's rmse: 316.366
[23]	training's rmse: 314.288
[24]	training's 

[32m[I 2022-05-14 23:22:43,650][0m Trial 11 finished with value: 0.0697 and parameters: {'num_leaves': 4, 'learning_rate': 0.18812335134389452, 'objective': 'tweedie', 'min_child_samples': 10, 'subsample': 1.0, 'colsample_bytree': 0.4, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'tweedie_variance_power': 1.1}. Best is trial 2 with value: 0.0691.[0m


Fit


  in terms of current number of round (e.g. yields learning rate decay).


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 6.111432
[1]	training's rmse: 986.349
[2]	training's rmse: 781.387
[3]	training's rmse: 631.996
[4]	training's rmse: 529.669
[5]	training's rmse: 459.482
[6]	training's rmse: 414.253
[7]	training's rmse: 386.73
[8]	training's rmse: 367.369
[9]	training's rmse: 354.868
[10]	training's rmse: 346.758
[11]	training's rmse: 342.114
[12]	training's rmse: 337.273
[13]	training's rmse: 335.71
[14]	training's rmse: 332.004
[15]	training's rmse: 329.14
[16]	training's rmse: 328.517
[17]	training's rmse: 327.134
[18]	training's rmse: 325.739
[19]	training's rmse: 324.265
[20]	training's rmse: 322.914
[21]	training's rmse: 319.075
[22]	training's rmse: 317.855
[23]	training's rmse: 317.466
[24]	training's 

[32m[I 2022-05-14 23:42:01,243][0m Trial 12 finished with value: 0.0756 and parameters: {'num_leaves': 4, 'learning_rate': 0.24746756944750647, 'objective': 'tweedie', 'min_child_samples': 40, 'subsample': 1.0, 'colsample_bytree': 0.4, 'reg_alpha': 0.2, 'reg_lambda': 0.2, 'tweedie_variance_power': 1.1}. Best is trial 2 with value: 0.0691.[0m


Fit


  in terms of current number of round (e.g. yields learning rate decay).


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 6.111432
[1]	training's rmse: 1222.95
[2]	training's rmse: 1214.62
[3]	training's rmse: 1206.26
[4]	training's rmse: 1197.85
[5]	training's rmse: 1189.38
[6]	training's rmse: 1180.85
[7]	training's rmse: 1172.25
[8]	training's rmse: 1163.57
[9]	training's rmse: 1154.83
[10]	training's rmse: 1145.99
[11]	training's rmse: 1137.07
[12]	training's rmse: 1128.06
[13]	training's rmse: 1118.96
[14]	training's rmse: 1109.77
[15]	training's rmse: 1100.5
[16]	training's rmse: 1091.12
[17]	training's rmse: 1081.66
[18]	training's rmse: 1072.09
[19]	training's rmse: 1062.46
[20]	training's rmse: 1052.73
[21]	training's rmse: 1042.92
[22]	training's rmse: 1033.05
[23]	training's rmse: 1023.08
[24]	training'

[32m[I 2022-05-14 23:53:37,620][0m Trial 13 finished with value: 0.0733 and parameters: {'num_leaves': 5, 'learning_rate': 0.021934824759356146, 'objective': 'tweedie', 'min_child_samples': 10, 'subsample': 0.8, 'colsample_bytree': 0.9, 'reg_alpha': 0.1, 'reg_lambda': 0.0, 'tweedie_variance_power': 1.3, 'subsample_freq': 2}. Best is trial 2 with value: 0.0691.[0m


Fit


  in terms of current number of round (e.g. yields learning rate decay).


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 6.111432
[1]	training's rmse: 1230.78
[2]	training's rmse: 1230.32
[3]	training's rmse: 1229.86
[4]	training's rmse: 1229.39
[5]	training's rmse: 1228.93
[6]	training's rmse: 1228.46
[7]	training's rmse: 1228
[8]	training's rmse: 1227.53
[9]	training's rmse: 1227.06
[10]	training's rmse: 1226.6
[11]	training's rmse: 1226.13
[12]	training's rmse: 1225.67
[13]	training's rmse: 1225.2
[14]	training's rmse: 1224.74
[15]	training's rmse: 1224.27
[16]	training's rmse: 1223.81
[17]	training's rmse: 1223.34
[18]	training's rmse: 1222.87
[19]	training's rmse: 1222.41
[20]	training's rmse: 1221.94
[21]	training's rmse: 1221.47
[22]	training's rmse: 1221.01
[23]	training's rmse: 1220.54
[24]	training's rm

[32m[I 2022-05-14 23:56:16,338][0m Trial 14 finished with value: 0.4711 and parameters: {'num_leaves': 5, 'learning_rate': 0.0012394593512061254, 'objective': 'tweedie', 'min_child_samples': 30, 'subsample': 0.7000000000000001, 'colsample_bytree': 0.4, 'reg_alpha': 0.4, 'reg_lambda': 0.5, 'tweedie_variance_power': 1.3, 'subsample_freq': 5}. Best is trial 2 with value: 0.0691.[0m


Fit


  in terms of current number of round (e.g. yields learning rate decay).


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 6.111432
[1]	training's rmse: 1131.44
[2]	training's rmse: 1012.9
[3]	training's rmse: 887.167
[4]	training's rmse: 766.963
[5]	training's rmse: 659.544
[6]	training's rmse: 568.207
[7]	training's rmse: 492.525
[8]	training's rmse: 432.578
[9]	training's rmse: 384.75
[10]	training's rmse: 347.579
[11]	training's rmse: 320.03
[12]	training's rmse: 299.547
[13]	training's rmse: 283.536
[14]	training's rmse: 272.031
[15]	training's rmse: 263.084
[16]	training's rmse: 256.877
[17]	training's rmse: 251.216
[18]	training's rmse: 246.955
[19]	training's rmse: 243.261
[20]	training's rmse: 240.592
[21]	training's rmse: 237.609
[22]	training's rmse: 234.877
[23]	training's rmse: 231.633
[24]	training's 

[32m[I 2022-05-14 23:59:41,354][0m Trial 15 finished with value: 0.0719 and parameters: {'num_leaves': 8, 'learning_rate': 0.18844811289527197, 'objective': 'tweedie', 'min_child_samples': 70, 'subsample': 0.9, 'colsample_bytree': 0.7000000000000001, 'reg_alpha': 0.1, 'reg_lambda': 0.2, 'tweedie_variance_power': 1.2, 'subsample_freq': 3}. Best is trial 2 with value: 0.0691.[0m
  in terms of current number of round (e.g. yields learning rate decay).


Fit
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 6.111432
[1]	training's rmse: 1179.91
[2]	training's rmse: 1137.01
[3]	training's rmse: 1096.92
[4]	training's rmse: 1059.15
[5]	training's rmse: 1023.39
[6]	training's rmse: 989.348
[7]	training's rmse: 956.631
[8]	training's rmse: 925.583
[9]	training's rmse: 895.796
[10]	training's rmse: 867.406
[11]	training's rmse: 840.251
[12]	training's rmse: 814.138
[13]	training's rmse: 789.241
[14]	training's rmse: 765.387
[15]	training's rmse: 742.36
[16]	training's rmse: 720.4
[17]	training's rmse: 699.364
[18]	training's rmse: 679.087
[19]	training's rmse: 659.491
[20]	training's rmse: 640.858
[21]	training's rmse: 623.236
[22]	training's rmse: 606.29
[23]	training's rmse: 590.073
[24]	training

[32m[I 2022-05-15 00:02:35,134][0m Trial 16 finished with value: 0.0725 and parameters: {'num_leaves': 5, 'learning_rate': 0.035360906191480794, 'objective': 'tweedie', 'min_child_samples': 20, 'subsample': 0.6, 'colsample_bytree': 0.6000000000000001, 'reg_alpha': 0.4, 'reg_lambda': 0.0, 'tweedie_variance_power': 1.0, 'subsample_freq': 5}. Best is trial 2 with value: 0.0691.[0m


Fit


  in terms of current number of round (e.g. yields learning rate decay).


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 6.111432
[1]	training's rmse: 1103.05
[2]	training's rmse: 979.524
[3]	training's rmse: 834.525
[4]	training's rmse: 688.537
[5]	training's rmse: 558.967
[6]	training's rmse: 470.691
[7]	training's rmse: 414.526
[8]	training's rmse: 382.978
[9]	training's rmse: 359.876
[10]	training's rmse: 348.104
[11]	training's rmse: 344.632
[12]	training's rmse: 341.669
[13]	training's rmse: 332.727
[14]	training's rmse: 330.996
[15]	training's rmse: 327.766
[16]	training's rmse: 326.526
[17]	training's rmse: 323.499
[18]	training's rmse: 320.32
[19]	training's rmse: 319.655
[20]	training's rmse: 318.037
[21]	training's rmse: 313.088
[22]	training's rmse: 312.04
[23]	training's rmse: 311.341
[24]	training's

[32m[I 2022-05-15 00:05:03,508][0m Trial 17 finished with value: 0.0725 and parameters: {'num_leaves': 4, 'learning_rate': 0.4373979979857726, 'objective': 'tweedie', 'min_child_samples': 50, 'subsample': 0.8, 'colsample_bytree': 0.4, 'reg_alpha': 0.0, 'reg_lambda': 0.4, 'tweedie_variance_power': 1.6, 'subsample_freq': 3}. Best is trial 2 with value: 0.0691.[0m


Fit


  in terms of current number of round (e.g. yields learning rate decay).


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 6.111432
[1]	training's rmse: 1226.85
[2]	training's rmse: 1222.52
[3]	training's rmse: 1218.25
[4]	training's rmse: 1214.05
[5]	training's rmse: 1209.89
[6]	training's rmse: 1205.8
[7]	training's rmse: 1201.76
[8]	training's rmse: 1197.76
[9]	training's rmse: 1193.8
[10]	training's rmse: 1189.88
[11]	training's rmse: 1186
[12]	training's rmse: 1182.15
[13]	training's rmse: 1178.32
[14]	training's rmse: 1174.52
[15]	training's rmse: 1170.75
[16]	training's rmse: 1167
[17]	training's rmse: 1163.27
[18]	training's rmse: 1159.56
[19]	training's rmse: 1155.86
[20]	training's rmse: 1152.17
[21]	training's rmse: 1148.49
[22]	training's rmse: 1144.82
[23]	training's rmse: 1141.15
[24]	training's rmse:

[32m[I 2022-05-15 00:09:45,252][0m Trial 18 finished with value: 0.0715 and parameters: {'num_leaves': 8, 'learning_rate': 0.012516954890250833, 'objective': 'tweedie', 'min_child_samples': 20, 'subsample': 1.0, 'colsample_bytree': 0.7000000000000001, 'reg_alpha': 0.2, 'reg_lambda': 0.1, 'tweedie_variance_power': 1.5}. Best is trial 2 with value: 0.0691.[0m


Fit


  in terms of current number of round (e.g. yields learning rate decay).


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 35821
[LightGBM] [Info] Number of data points in the train set: 2244863, number of used features: 175
[LightGBM] [Info] Start training from score 6.111432
[1]	training's rmse: 1184.5
[2]	training's rmse: 1135.45
[3]	training's rmse: 1083.39
[4]	training's rmse: 1028.32
[5]	training's rmse: 970.408
[6]	training's rmse: 911.484
[7]	training's rmse: 852.443
[8]	training's rmse: 794.123
[9]	training's rmse: 737.855
[10]	training's rmse: 685.171
[11]	training's rmse: 636.634
[12]	training's rmse: 591.304
[13]	training's rmse: 550.675
[14]	training's rmse: 515.293
[15]	training's rmse: 483.273
[16]	training's rmse: 455.306
[17]	training's rmse: 431.111
[18]	training's rmse: 410.84
[19]	training's rmse: 392.684
[20]	training's rmse: 376.838
[21]	training's rmse: 364.312
[22]	training's rmse: 353.165
[23]	training's rmse: 343.8
[24]	training's r

[32m[I 2022-05-15 00:13:41,261][0m Trial 19 finished with value: 0.0776 and parameters: {'num_leaves': 5, 'learning_rate': 0.1190962206280641, 'objective': 'tweedie', 'min_child_samples': 40, 'subsample': 0.5, 'colsample_bytree': 0.4, 'reg_alpha': 0.6000000000000001, 'reg_lambda': 0.2, 'tweedie_variance_power': 1.3, 'subsample_freq': 5}. Best is trial 2 with value: 0.0691.[0m


Number of finished trials: 20
Best trial:
  Value: 0.0691
  Params: 
    num_leaves: 6
    learning_rate: 0.03309337787385885
    objective: tweedie
    min_child_samples: 20
    subsample: 0.7000000000000001
    colsample_bytree: 0.4
    reg_alpha: 0.30000000000000004
    reg_lambda: 0.1
    tweedie_variance_power: 1.1
    subsample_freq: 6


In [42]:
trial.datetime_complete

datetime.datetime(2022, 5, 14, 20, 41, 54, 706093)