# Driver

## Imports

In [1]:
import modin.pandas as pd
from transformer.DataAggregator import DataAggregator
from transformer.Dataformator import DataFormator
from transformer.ImputeMean import ImputeMean
from transformer.TrainTestSplit import TrainTestSplit
import utils
import numpy as np
import json
import xgboost as xgb
from hyperopt import fmin, tpe, hp, STATUS_OK
from sklearn.metrics import mean_absolute_error
from sklearn.multioutput import MultiOutputRegressor
from configs.space import space

## Data Loading

In [2]:
sales = pd.read_csv("./data/sales_train_validation.csv")
calender =pd.read_csv("./data/calendar.csv")


    from distributed import Client

    client = Client()

Data types of partitions are different! Please refer to the troubleshooting section of the Modin documentation to fix this issue.


## Data Transformation

In [3]:
data_aggregator = DataAggregator(sales)
aggregated_data = data_aggregator.aggregate(["store_id","dept_id"],"sum")
data_formator =DataFormator(aggregated_data,calender)
data = data_formator.format_data('store_id','dept_id')
impute_mean =ImputeMean(data)
data = impute_mean.replace_zero_with_mean()
tts = TrainTestSplit( data, test_size=0.3, random_state=0,shuffle=False)
X_train_, X_test_, y_train_, y_test_ = tts.split_data()

Please refer to https://modin.readthedocs.io/en/stable/supported_apis/defaulting_to_pandas.html for explanation.


## Parameter Space for Xgboost data preparation
The link to papers is given below in the notebook, for The approch used in this model data preparation.

In [4]:
target_sequence_length = 1 #The forecasting horizon
test_size = 0.30
# for data preparation for xgboost
hyperparameters = {
    "in_length" : 1, # =target_sequence_length, forecasting horizon length
    "step_size" : 4, # window size
    }

## Training for best Parameters Model tuning

### Model Tuning

In [5]:
best ={}

for i in data.columns:
    y_train_data_ = pd.DataFrame()
    y_test_data_ = pd.DataFrame()
    y_train_data_[i] = y_train_[i]
    y_test_data_[i] = y_test_[i]
    
    x_train, y_train,x_test, y_test = utils.prepare_data_for_xgb(
                                    y_train_data_,y_test_data_,
                                    hyperparameters["in_length"],hyperparameters["step_size"],
                                    target_sequence_length
                                                            )
    def objective(params):
        model = xgb.XGBRegressor(**params)
        model = MultiOutputRegressor(model).fit(x_train, y_train)
        
        # train_forecasts = model.predict(x_train)
        test_forecasts = model.predict(x_test)
        mse = mean_absolute_error(y_test, test_forecasts)
        # print(f'Test MAE: { mse}')
        # print("Mean test data value: {}".format(np.mean(y_test)))
        return {'loss': mse, 'status': STATUS_OK}

    # optimize hyperparameters using Hyperopt's Tree-structured Parzen Estimator (TPE) algorithm
    print(f"Training .......\n model_{i} ")
    best["best_param_{0}".format(i)] = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=100)

    print(best["best_param_{0}".format(i)])



Training .......
 model_CA_1_FOODS_1 
100%|██████████| 100/100 [00:19<00:00,  5.06trial/s, best loss: 52.586634815989676]
{'gamma': 0.5588210624767112, 'learning_rate': 0.05074997617448306, 'max_depth': 0, 'n_estimators': 64, 'reg_alpha': 0.1570738033857957, 'reg_lambda': 0.3811456627123333, 'subsample': 0.9558001899293613}




Training .......
 model_CA_1_FOODS_2 
100%|██████████| 100/100 [00:20<00:00,  4.88trial/s, best loss: 79.29041146231698]
{'gamma': 0.12388016989323263, 'learning_rate': 0.15542105913122262, 'max_depth': 0, 'n_estimators': 127, 'reg_alpha': 0.8712273858269363, 'reg_lambda': 0.5910378848614662, 'subsample': 0.6599834393809063}




Training .......
 model_CA_1_FOODS_3 
100%|██████████| 100/100 [00:19<00:00,  5.08trial/s, best loss: 334.1713662314248]
{'gamma': 0.36465572675570623, 'learning_rate': 0.029430824850158386, 'max_depth': 0, 'n_estimators': 14, 'reg_alpha': 0.7453984450528848, 'reg_lambda': 0.70165266394807, 'subsample': 0.1745655713417647}




Training .......
 model_CA_1_HOBBIES_1 
100%|██████████| 100/100 [00:19<00:00,  5.17trial/s, best loss: 86.95195700905539]
{'gamma': 0.7556096198694703, 'learning_rate': 0.03747273181330879, 'max_depth': 0, 'n_estimators': 55, 'reg_alpha': 0.09641772946373879, 'reg_lambda': 0.9981059636634522, 'subsample': 0.10153131495183741}




Training .......
 model_CA_1_HOBBIES_2 
100%|██████████| 100/100 [00:17<00:00,  5.71trial/s, best loss: 17.002576974722054]
{'gamma': 0.3474895008233909, 'learning_rate': 0.14389199385035126, 'max_depth': 0, 'n_estimators': 171, 'reg_alpha': 0.6345637879092634, 'reg_lambda': 0.009768594613932202, 'subsample': 0.561030415398034}




Training .......
 model_CA_1_HOUSEHOLD_1 
100%|██████████| 100/100 [00:19<00:00,  5.15trial/s, best loss: 136.63267570442252]
{'gamma': 0.7459098551556653, 'learning_rate': 0.12121902516986023, 'max_depth': 0, 'n_estimators': 179, 'reg_alpha': 0.9652863754604647, 'reg_lambda': 0.1679659605999831, 'subsample': 0.17403513706112314}




Training .......
 model_CA_1_HOUSEHOLD_2 
100%|██████████| 100/100 [00:18<00:00,  5.33trial/s, best loss: 36.27036418781414]
{'gamma': 0.31830638471175965, 'learning_rate': 0.06943189799981729, 'max_depth': 0, 'n_estimators': 106, 'reg_alpha': 0.998851390463523, 'reg_lambda': 0.7826823765442714, 'subsample': 0.1562603008752001}




Training .......
 model_CA_2_FOODS_1 
100%|██████████| 100/100 [00:19<00:00,  5.09trial/s, best loss: 70.833363139546] 
{'gamma': 0.7407879575910307, 'learning_rate': 0.056412210500076776, 'max_depth': 0, 'n_estimators': 65, 'reg_alpha': 0.004466007004977728, 'reg_lambda': 0.4499523944218754, 'subsample': 0.42330096075995466}




Training .......
 model_CA_2_FOODS_2 
100%|██████████| 100/100 [00:21<00:00,  4.68trial/s, best loss: 158.79138112679271]
{'gamma': 0.7092780688945485, 'learning_rate': 0.11734075093854937, 'max_depth': 0, 'n_estimators': 28, 'reg_alpha': 0.0462276755692661, 'reg_lambda': 0.5241559478829924, 'subsample': 0.3002952679196812}




Training .......
 model_CA_2_FOODS_3 
100%|██████████| 100/100 [00:22<00:00,  4.35trial/s, best loss: 273.234287928868] 
{'gamma': 0.2607198402136667, 'learning_rate': 0.04871406459619994, 'max_depth': 0, 'n_estimators': 195, 'reg_alpha': 0.6173202376456515, 'reg_lambda': 0.6063672800687152, 'subsample': 0.2577381995310242}




Training .......
 model_CA_2_HOBBIES_1 
100%|██████████| 100/100 [00:22<00:00,  4.53trial/s, best loss: 67.22239930479677]
{'gamma': 0.7240060593002589, 'learning_rate': 0.023588340800302532, 'max_depth': 0, 'n_estimators': 154, 'reg_alpha': 0.6700811846973521, 'reg_lambda': 0.07016515578912991, 'subsample': 0.5161355913450786}




Training .......
 model_CA_2_HOBBIES_2 
100%|██████████| 100/100 [00:20<00:00,  4.98trial/s, best loss: 13.321292637111423]
{'gamma': 0.05752566772752793, 'learning_rate': 0.02281650574762629, 'max_depth': 0, 'n_estimators': 182, 'reg_alpha': 0.9915200219810656, 'reg_lambda': 0.9337515329312664, 'subsample': 0.3481919004719462}




Training .......
 model_CA_2_HOUSEHOLD_1 
100%|██████████| 100/100 [00:23<00:00,  4.17trial/s, best loss: 149.40455851521526]
{'gamma': 0.7634286871538463, 'learning_rate': 0.025353043669084196, 'max_depth': 0, 'n_estimators': 128, 'reg_alpha': 0.2839915241637063, 'reg_lambda': 0.23878612030891788, 'subsample': 0.5888237131804535}




Training .......
 model_CA_2_HOUSEHOLD_2 
100%|██████████| 100/100 [00:21<00:00,  4.56trial/s, best loss: 55.480144900875494]
{'gamma': 0.343993032749161, 'learning_rate': 0.10579430485045312, 'max_depth': 0, 'n_estimators': 54, 'reg_alpha': 0.2572626220780719, 'reg_lambda': 0.4629179105853045, 'subsample': 0.9980648512720148}




Training .......
 model_CA_3_FOODS_1 
100%|██████████| 100/100 [00:24<00:00,  4.08trial/s, best loss: 61.88190892359594]
{'gamma': 0.861839768351795, 'learning_rate': 0.14567195904762253, 'max_depth': 0, 'n_estimators': 109, 'reg_alpha': 0.8138450507392905, 'reg_lambda': 0.02449695746574166, 'subsample': 0.6294739452686405}




Training .......
 model_CA_3_FOODS_2 
100%|██████████| 100/100 [00:22<00:00,  4.52trial/s, best loss: 85.47476623108336]
{'gamma': 0.3652443815700329, 'learning_rate': 0.018538818258547782, 'max_depth': 3, 'n_estimators': 111, 'reg_alpha': 0.7294177510118056, 'reg_lambda': 0.7705903647081914, 'subsample': 0.11432704892882226}




Training .......
 model_CA_3_FOODS_3 
100%|██████████| 100/100 [00:22<00:00,  4.40trial/s, best loss: 340.10030252950173]
{'gamma': 0.31669620548684, 'learning_rate': 0.029760865616342013, 'max_depth': 0, 'n_estimators': 23, 'reg_alpha': 0.9329154636016294, 'reg_lambda': 0.5844656575028409, 'subsample': 0.9980393693703729}




Training .......
 model_CA_3_HOBBIES_1 
100%|██████████| 100/100 [00:21<00:00,  4.64trial/s, best loss: 94.61110959353147]
{'gamma': 0.35241846042079167, 'learning_rate': 0.07356741084753197, 'max_depth': 0, 'n_estimators': 37, 'reg_alpha': 0.31138592544267546, 'reg_lambda': 0.012916848804217673, 'subsample': 0.5776856541489349}




Training .......
 model_CA_3_HOBBIES_2 
100%|██████████| 100/100 [00:19<00:00,  5.12trial/s, best loss: 15.758779672475962]
{'gamma': 0.7382104735227537, 'learning_rate': 0.28209623638716813, 'max_depth': 0, 'n_estimators': 61, 'reg_alpha': 0.3383546390734284, 'reg_lambda': 0.16802787096632416, 'subsample': 0.1586556922828337}




Training .......
 model_CA_3_HOUSEHOLD_1 
100%|██████████| 100/100 [00:23<00:00,  4.30trial/s, best loss: 177.89505538406905]
{'gamma': 0.5324453903330196, 'learning_rate': 0.03065317156971177, 'max_depth': 0, 'n_estimators': 97, 'reg_alpha': 0.9232419182110484, 'reg_lambda': 0.05378617977584782, 'subsample': 0.5679257748252957}




Training .......
 model_CA_3_HOUSEHOLD_2 
100%|██████████| 100/100 [00:20<00:00,  4.89trial/s, best loss: 46.89258580774694]
{'gamma': 0.7952383052032264, 'learning_rate': 0.018380292422813242, 'max_depth': 0, 'n_estimators': 142, 'reg_alpha': 0.8579991513031977, 'reg_lambda': 0.9499275107480382, 'subsample': 0.14320391730005794}




Training .......
 model_CA_4_FOODS_1 
100%|██████████| 100/100 [00:21<00:00,  4.57trial/s, best loss: 32.91882772378988]
{'gamma': 0.2081306778678953, 'learning_rate': 0.04206500947978999, 'max_depth': 0, 'n_estimators': 89, 'reg_alpha': 0.18814674898537825, 'reg_lambda': 0.927369104608424, 'subsample': 0.7297794219338474}




Training .......
 model_CA_4_FOODS_2 
100%|██████████| 100/100 [00:20<00:00,  4.82trial/s, best loss: 48.30916141963505]
{'gamma': 0.16765541213944157, 'learning_rate': 0.08001871765403326, 'max_depth': 0, 'n_estimators': 9, 'reg_alpha': 0.47428688790688267, 'reg_lambda': 0.20040144524572712, 'subsample': 0.196572483623611}




Training .......
 model_CA_4_FOODS_3 
100%|██████████| 100/100 [00:22<00:00,  4.36trial/s, best loss: 115.75073626324847]
{'gamma': 0.3178721965888936, 'learning_rate': 0.020903647808618843, 'max_depth': 0, 'n_estimators': 75, 'reg_alpha': 0.3028648865503935, 'reg_lambda': 0.9958247693232871, 'subsample': 0.6617907198007507}




Training .......
 model_CA_4_HOBBIES_1 
100%|██████████| 100/100 [00:20<00:00,  4.86trial/s, best loss: 71.80174948952414]
{'gamma': 0.3076710642424022, 'learning_rate': 0.039246324286996155, 'max_depth': 0, 'n_estimators': 146, 'reg_alpha': 0.6146824480509617, 'reg_lambda': 0.545389805974777, 'subsample': 0.15626701973549534}




Training .......
 model_CA_4_HOBBIES_2 
100%|██████████| 100/100 [00:19<00:00,  5.22trial/s, best loss: 7.31074642634892]
{'gamma': 0.4724104903833074, 'learning_rate': 0.039870194500154214, 'max_depth': 0, 'n_estimators': 129, 'reg_alpha': 0.21492602813498055, 'reg_lambda': 0.6401726184661014, 'subsample': 0.45176154992409306}




Training .......
 model_CA_4_HOUSEHOLD_1 
100%|██████████| 100/100 [00:20<00:00,  4.77trial/s, best loss: 46.70050368942581]
{'gamma': 0.2404438407963951, 'learning_rate': 0.1029833254183118, 'max_depth': 2, 'n_estimators': 116, 'reg_alpha': 0.5008466545659385, 'reg_lambda': 0.6843442608388475, 'subsample': 0.437336095750083}




Training .......
 model_CA_4_HOUSEHOLD_2 
100%|██████████| 100/100 [00:20<00:00,  4.87trial/s, best loss: 19.653900413246422]
{'gamma': 0.5409024714684539, 'learning_rate': 0.10077735269018101, 'max_depth': 0, 'n_estimators': 19, 'reg_alpha': 0.44324950177183764, 'reg_lambda': 0.7766355605611338, 'subsample': 0.12016785186572781}




Training .......
 model_TX_1_FOODS_1 
100%|██████████| 100/100 [00:23<00:00,  4.28trial/s, best loss: 35.55320206222001]
{'gamma': 0.45143094185337207, 'learning_rate': 0.06344384511475738, 'max_depth': 0, 'n_estimators': 93, 'reg_alpha': 0.04033098906619227, 'reg_lambda': 0.726422818330411, 'subsample': 0.10209126734457469}




Training .......
 model_TX_1_FOODS_2 
100%|██████████| 100/100 [00:21<00:00,  4.57trial/s, best loss: 52.75869729635599]
{'gamma': 0.4576180797737322, 'learning_rate': 0.022292127588987555, 'max_depth': 0, 'n_estimators': 108, 'reg_alpha': 0.5340447330560512, 'reg_lambda': 0.7548295342927687, 'subsample': 0.5415459550493279}




Training .......
 model_TX_1_FOODS_3 
100%|██████████| 100/100 [00:22<00:00,  4.48trial/s, best loss: 207.10577349896198]
{'gamma': 0.011358069435407386, 'learning_rate': 0.02125819754215819, 'max_depth': 0, 'n_estimators': 36, 'reg_alpha': 0.15425379149730567, 'reg_lambda': 0.9883134635817898, 'subsample': 0.7986765166627464}




Training .......
 model_TX_1_HOBBIES_1 
100%|██████████| 100/100 [00:21<00:00,  4.71trial/s, best loss: 49.37886687592193]
{'gamma': 0.44679373138166106, 'learning_rate': 0.04036330463839631, 'max_depth': 0, 'n_estimators': 90, 'reg_alpha': 0.8318675355747676, 'reg_lambda': 0.5295794203256627, 'subsample': 0.1783758400046711}




Training .......
 model_TX_1_HOBBIES_2 
100%|██████████| 100/100 [00:18<00:00,  5.27trial/s, best loss: 12.230013243801944]
{'gamma': 0.5841019118673093, 'learning_rate': 0.8748671154758422, 'max_depth': 1, 'n_estimators': 181, 'reg_alpha': 0.15086662582766736, 'reg_lambda': 0.7281189427057249, 'subsample': 0.16721548367011352}




Training .......
 model_TX_1_HOUSEHOLD_1 
100%|██████████| 100/100 [00:21<00:00,  4.69trial/s, best loss: 108.71659691016991]
{'gamma': 0.5703488657186145, 'learning_rate': 0.07470834900394373, 'max_depth': 0, 'n_estimators': 60, 'reg_alpha': 0.1967045737523126, 'reg_lambda': 0.599005364093277, 'subsample': 0.11483677211697557}




Training .......
 model_TX_1_HOUSEHOLD_2 
100%|██████████| 100/100 [00:20<00:00,  4.87trial/s, best loss: 33.361344664246886]
{'gamma': 0.569386961017191, 'learning_rate': 0.3157459142451313, 'max_depth': 0, 'n_estimators': 33, 'reg_alpha': 0.2208857314604689, 'reg_lambda': 0.8242028288173187, 'subsample': 0.9080579574621797}




Training .......
 model_TX_2_FOODS_1 
100%|██████████| 100/100 [00:20<00:00,  4.79trial/s, best loss: 38.04285324203384]
{'gamma': 0.019872383982020847, 'learning_rate': 0.02076413881917787, 'max_depth': 2, 'n_estimators': 52, 'reg_alpha': 0.4947470779045674, 'reg_lambda': 0.083693854555696, 'subsample': 0.37365208034474096}




Training .......
 model_TX_2_FOODS_2 
100%|██████████| 100/100 [00:21<00:00,  4.61trial/s, best loss: 59.92894398082387]
{'gamma': 0.005356418935194713, 'learning_rate': 0.018820724344367674, 'max_depth': 0, 'n_estimators': 64, 'reg_alpha': 0.7386420007564747, 'reg_lambda': 0.4805544543921316, 'subsample': 0.47704313112894114}




Training .......
 model_TX_2_FOODS_3 
100%|██████████| 100/100 [00:21<00:00,  4.55trial/s, best loss: 235.88238482708698]
{'gamma': 0.6428421091750623, 'learning_rate': 0.025551945835298937, 'max_depth': 2, 'n_estimators': 14, 'reg_alpha': 0.5755402789531425, 'reg_lambda': 0.802565543819492, 'subsample': 0.4422029965137498}




Training .......
 model_TX_2_HOBBIES_1 
100%|██████████| 100/100 [00:21<00:00,  4.71trial/s, best loss: 52.95463615364128]
{'gamma': 0.5171860046069167, 'learning_rate': 0.034874407405528246, 'max_depth': 0, 'n_estimators': 74, 'reg_alpha': 0.6349195218185381, 'reg_lambda': 0.13729033118395179, 'subsample': 0.2911510575263648}




Training .......
 model_TX_2_HOBBIES_2 
100%|██████████| 100/100 [00:19<00:00,  5.05trial/s, best loss: 17.475852552827423]
{'gamma': 0.654639628619461, 'learning_rate': 0.1156624523250407, 'max_depth': 0, 'n_estimators': 9, 'reg_alpha': 0.6109045701069916, 'reg_lambda': 0.8689341647512425, 'subsample': 0.23114294714902792}




Training .......
 model_TX_2_HOUSEHOLD_1 
100%|██████████| 100/100 [00:21<00:00,  4.56trial/s, best loss: 110.69672335111179]
{'gamma': 0.6629629236949397, 'learning_rate': 0.042925070286563005, 'max_depth': 0, 'n_estimators': 199, 'reg_alpha': 0.5767440338090593, 'reg_lambda': 0.8274832164531146, 'subsample': 0.20470598995934502}




Training .......
 model_TX_2_HOUSEHOLD_2 
100%|██████████| 100/100 [00:19<00:00,  5.20trial/s, best loss: 31.821432847243088]
{'gamma': 0.24503017531548238, 'learning_rate': 0.06580408512479687, 'max_depth': 0, 'n_estimators': 123, 'reg_alpha': 0.7073583299366927, 'reg_lambda': 0.8254894945066705, 'subsample': 0.22483709216243275}




Training .......
 model_TX_3_FOODS_1 
100%|██████████| 100/100 [00:20<00:00,  4.79trial/s, best loss: 43.33379529406141]
{'gamma': 0.45775840083617797, 'learning_rate': 0.10062921404238263, 'max_depth': 0, 'n_estimators': 43, 'reg_alpha': 0.7360524440861652, 'reg_lambda': 0.45986908236147606, 'subsample': 0.9647242436105248}




Training .......
 model_TX_3_FOODS_2 
100%|██████████| 100/100 [00:24<00:00,  4.07trial/s, best loss: 67.17330612502731]
{'gamma': 0.275985295984844, 'learning_rate': 0.12333589089995119, 'max_depth': 0, 'n_estimators': 73, 'reg_alpha': 0.3043017278184674, 'reg_lambda': 0.25135956125013476, 'subsample': 0.6314649106570508}




Training .......
 model_TX_3_FOODS_3 
100%|██████████| 100/100 [00:24<00:00,  4.13trial/s, best loss: 203.7774205774694]
{'gamma': 0.8183803681247244, 'learning_rate': 0.02523376177294422, 'max_depth': 0, 'n_estimators': 96, 'reg_alpha': 0.3540462934924419, 'reg_lambda': 0.6752829123148416, 'subsample': 0.9960228207560924}




Training .......
 model_TX_3_HOBBIES_1 
100%|██████████| 100/100 [00:21<00:00,  4.58trial/s, best loss: 80.21140849506938]
{'gamma': 0.1080649541959728, 'learning_rate': 0.8836761434677427, 'max_depth': 0, 'n_estimators': 74, 'reg_alpha': 0.38354853558108054, 'reg_lambda': 0.9806816430128263, 'subsample': 0.11116413243004987}




Training .......
 model_TX_3_HOBBIES_2 
100%|██████████| 100/100 [00:21<00:00,  4.65trial/s, best loss: 17.192830265818777]
{'gamma': 0.47253906322003947, 'learning_rate': 0.3539959479748102, 'max_depth': 0, 'n_estimators': 183, 'reg_alpha': 0.7565131599904404, 'reg_lambda': 0.2746520447592633, 'subsample': 0.1937270304777622}




Training .......
 model_TX_3_HOUSEHOLD_1 
100%|██████████| 100/100 [00:24<00:00,  4.16trial/s, best loss: 118.20505443653026]
{'gamma': 0.5151266411235738, 'learning_rate': 0.05782443245245471, 'max_depth': 0, 'n_estimators': 49, 'reg_alpha': 0.15769230953970237, 'reg_lambda': 0.17895303430468706, 'subsample': 0.9450266991159563}




Training .......
 model_TX_3_HOUSEHOLD_2 
100%|██████████| 100/100 [00:22<00:00,  4.42trial/s, best loss: 24.417405882081784]
{'gamma': 0.21469532158006166, 'learning_rate': 0.2810608460532317, 'max_depth': 0, 'n_estimators': 58, 'reg_alpha': 0.1513528047395567, 'reg_lambda': 0.5627153381728833, 'subsample': 0.5421972429049938}




Training .......
 model_WI_1_FOODS_1 
100%|██████████| 100/100 [00:23<00:00,  4.26trial/s, best loss: 51.1628229101221]
{'gamma': 0.03132739956623207, 'learning_rate': 0.06836934972500036, 'max_depth': 0, 'n_estimators': 60, 'reg_alpha': 0.7361550945579006, 'reg_lambda': 0.018063526737600057, 'subsample': 0.9858168481751596}




Training .......
 model_WI_1_FOODS_2 
100%|██████████| 100/100 [00:22<00:00,  4.43trial/s, best loss: 124.89498410525022]
{'gamma': 0.0497246568205546, 'learning_rate': 0.03295752936147199, 'max_depth': 0, 'n_estimators': 81, 'reg_alpha': 0.3707888004255495, 'reg_lambda': 0.20246282228627455, 'subsample': 0.1432457976498608}




Training .......
 model_WI_1_FOODS_3 
100%|██████████| 100/100 [00:25<00:00,  3.91trial/s, best loss: 254.41639762825065]
{'gamma': 0.455610063309114, 'learning_rate': 0.147581713178079, 'max_depth': 3, 'n_estimators': 41, 'reg_alpha': 0.12675576231273622, 'reg_lambda': 0.3581182105282215, 'subsample': 0.13438438582793905}




Training .......
 model_WI_1_HOBBIES_1 
100%|██████████| 100/100 [00:23<00:00,  4.25trial/s, best loss: 86.51422567300864]
{'gamma': 0.8571170542289852, 'learning_rate': 0.03910704634920598, 'max_depth': 0, 'n_estimators': 60, 'reg_alpha': 0.09558552893615216, 'reg_lambda': 0.5125935237847005, 'subsample': 0.11042762327800501}




Training .......
 model_WI_1_HOBBIES_2 
100%|██████████| 100/100 [00:21<00:00,  4.74trial/s, best loss: 14.354733240354312]
{'gamma': 0.03289889851160113, 'learning_rate': 0.1437440166340672, 'max_depth': 0, 'n_estimators': 94, 'reg_alpha': 0.7830582799337992, 'reg_lambda': 0.44680737050635955, 'subsample': 0.1236700162776351}




Training .......
 model_WI_1_HOUSEHOLD_1 
100%|██████████| 100/100 [00:24<00:00,  4.14trial/s, best loss: 111.94833117931873]
{'gamma': 0.2756185893412686, 'learning_rate': 0.0431927227234141, 'max_depth': 0, 'n_estimators': 139, 'reg_alpha': 0.18004163302636475, 'reg_lambda': 0.8570935519345697, 'subsample': 0.6663423941264718}




Training .......
 model_WI_1_HOUSEHOLD_2 
100%|██████████| 100/100 [00:21<00:00,  4.59trial/s, best loss: 31.716602112029815]
{'gamma': 0.5557461230128099, 'learning_rate': 0.08343011244529513, 'max_depth': 0, 'n_estimators': 12, 'reg_alpha': 0.4141375720251462, 'reg_lambda': 0.6946098708645698, 'subsample': 0.3957296439488189}




Training .......
 model_WI_2_FOODS_1 
100%|██████████| 100/100 [00:24<00:00,  4.14trial/s, best loss: 46.33593707318072]
{'gamma': 0.80914580176805, 'learning_rate': 0.3580668635445632, 'max_depth': 0, 'n_estimators': 47, 'reg_alpha': 0.11363408233412717, 'reg_lambda': 0.7930536244371424, 'subsample': 0.4134136597197903}




Training .......
 model_WI_2_FOODS_2 
100%|██████████| 100/100 [00:22<00:00,  4.45trial/s, best loss: 177.73890728717083]
{'gamma': 0.8324346746085025, 'learning_rate': 0.029716431068106612, 'max_depth': 0, 'n_estimators': 173, 'reg_alpha': 0.669428793877894, 'reg_lambda': 0.03470394284418, 'subsample': 0.3757772659245178}




Training .......
 model_WI_2_FOODS_3 
100%|██████████| 100/100 [00:23<00:00,  4.30trial/s, best loss: 285.2672618519176]
{'gamma': 0.5411058299910246, 'learning_rate': 0.03702599227668506, 'max_depth': 2, 'n_estimators': 27, 'reg_alpha': 0.3326541927294104, 'reg_lambda': 0.30197792173353094, 'subsample': 0.16418669729267818}




Training .......
 model_WI_2_HOBBIES_1 
100%|██████████| 100/100 [00:23<00:00,  4.19trial/s, best loss: 38.908307909131885]
{'gamma': 0.12230646913221754, 'learning_rate': 0.048395991302880616, 'max_depth': 1, 'n_estimators': 74, 'reg_alpha': 0.6448275938262289, 'reg_lambda': 0.6682485020128388, 'subsample': 0.8871648587230653}




Training .......
 model_WI_2_HOBBIES_2 
100%|██████████| 100/100 [00:20<00:00,  4.82trial/s, best loss: 11.297372724626447]
{'gamma': 0.588059764246618, 'learning_rate': 0.07366359764496039, 'max_depth': 2, 'n_estimators': 116, 'reg_alpha': 0.9411423226628364, 'reg_lambda': 0.7357732373896229, 'subsample': 0.2481877000084947}




Training .......
 model_WI_2_HOUSEHOLD_1 
100%|██████████| 100/100 [00:23<00:00,  4.31trial/s, best loss: 139.02668708854623]
{'gamma': 0.4710862404740909, 'learning_rate': 0.03609956133030435, 'max_depth': 0, 'n_estimators': 45, 'reg_alpha': 0.8020843840186299, 'reg_lambda': 0.5379780790661505, 'subsample': 0.14973177206155186}




Training .......
 model_WI_2_HOUSEHOLD_2 
100%|██████████| 100/100 [00:22<00:00,  4.52trial/s, best loss: 19.857890629268194]
{'gamma': 0.6287288373793504, 'learning_rate': 0.04036786324876051, 'max_depth': 0, 'n_estimators': 149, 'reg_alpha': 0.7875797125736884, 'reg_lambda': 0.46058799067735245, 'subsample': 0.11098479237573385}




Training .......
 model_WI_3_FOODS_1 
100%|██████████| 100/100 [00:21<00:00,  4.58trial/s, best loss: 38.487697574642155]
{'gamma': 0.7107373872161139, 'learning_rate': 0.030886300930063868, 'max_depth': 0, 'n_estimators': 7, 'reg_alpha': 0.647883124035436, 'reg_lambda': 0.8350341732517451, 'subsample': 0.325921106833911}




Training .......
 model_WI_3_FOODS_2 
100%|██████████| 100/100 [00:22<00:00,  4.38trial/s, best loss: 70.65110042545345]
{'gamma': 0.6360408589554469, 'learning_rate': 0.021010689232445076, 'max_depth': 0, 'n_estimators': 55, 'reg_alpha': 0.4708527794921259, 'reg_lambda': 0.6394468686264886, 'subsample': 0.39495726307653595}




Training .......
 model_WI_3_FOODS_3 
100%|██████████| 100/100 [00:23<00:00,  4.20trial/s, best loss: 272.56447873415647]
{'gamma': 0.5697316181384277, 'learning_rate': 0.028045941420302697, 'max_depth': 5, 'n_estimators': 5, 'reg_alpha': 0.16181549596556433, 'reg_lambda': 0.6838399371848547, 'subsample': 0.2392566816512926}




Training .......
 model_WI_3_HOBBIES_1 
100%|██████████| 100/100 [00:22<00:00,  4.37trial/s, best loss: 45.282028625061464]
{'gamma': 0.1602469272135567, 'learning_rate': 0.01875816639394134, 'max_depth': 0, 'n_estimators': 105, 'reg_alpha': 0.29741130568266017, 'reg_lambda': 0.3481557780155611, 'subsample': 0.5277959382129103}




Training .......
 model_WI_3_HOBBIES_2 
100%|██████████| 100/100 [00:20<00:00,  4.86trial/s, best loss: 8.651480681412703]
{'gamma': 0.3200786419139773, 'learning_rate': 0.09356973620601616, 'max_depth': 0, 'n_estimators': 6, 'reg_alpha': 0.28862362268090747, 'reg_lambda': 0.6419828093066774, 'subsample': 0.643712103850159}




Training .......
 model_WI_3_HOUSEHOLD_1 
100%|██████████| 100/100 [00:24<00:00,  4.13trial/s, best loss: 96.38408890304032]
{'gamma': 0.06366564696466426, 'learning_rate': 0.025081050996349802, 'max_depth': 0, 'n_estimators': 93, 'reg_alpha': 0.23600009987152074, 'reg_lambda': 0.7356317991811427, 'subsample': 0.5940076822488978}




Training .......
 model_WI_3_HOUSEHOLD_2 
100%|██████████| 100/100 [00:22<00:00,  4.50trial/s, best loss: 24.413331892106918]
{'gamma': 0.1790459984105559, 'learning_rate': 0.03075873982033497, 'max_depth': 0, 'n_estimators': 98, 'reg_alpha': 0.4626942425458335, 'reg_lambda': 0.5325705383823421, 'subsample': 0.48234789966913555}


### Save Best parameters

In [6]:
utils.save_to_json('temp',best,'w')

## Training with best parameters Model Training

### Reading Best Parameters

In [5]:
best =utils.read_from_json('xgb_best_params',mode="r")
best.keys()

dict_keys(['best_param_CA_1_FOODS_1', 'best_param_CA_1_FOODS_2', 'best_param_CA_1_FOODS_3', 'best_param_CA_1_HOBBIES_1', 'best_param_CA_1_HOBBIES_2', 'best_param_CA_1_HOUSEHOLD_1', 'best_param_CA_1_HOUSEHOLD_2', 'best_param_CA_2_FOODS_1', 'best_param_CA_2_FOODS_2', 'best_param_CA_2_FOODS_3', 'best_param_CA_2_HOBBIES_1', 'best_param_CA_2_HOBBIES_2', 'best_param_CA_2_HOUSEHOLD_1', 'best_param_CA_2_HOUSEHOLD_2', 'best_param_CA_3_FOODS_1', 'best_param_CA_3_FOODS_2', 'best_param_CA_3_FOODS_3', 'best_param_CA_3_HOBBIES_1', 'best_param_CA_3_HOBBIES_2', 'best_param_CA_3_HOUSEHOLD_1', 'best_param_CA_3_HOUSEHOLD_2', 'best_param_CA_4_FOODS_1', 'best_param_CA_4_FOODS_2', 'best_param_CA_4_FOODS_3', 'best_param_CA_4_HOBBIES_1', 'best_param_CA_4_HOBBIES_2', 'best_param_CA_4_HOUSEHOLD_1', 'best_param_CA_4_HOUSEHOLD_2', 'best_param_TX_1_FOODS_1', 'best_param_TX_1_FOODS_2', 'best_param_TX_1_FOODS_3', 'best_param_TX_1_HOBBIES_1', 'best_param_TX_1_HOBBIES_2', 'best_param_TX_1_HOUSEHOLD_1', 'best_param_TX_

### Model Training

In [6]:
trained_model={}

for i in data.columns:
    y_train_data_ = pd.DataFrame()
    y_test_data_ = pd.DataFrame()
    y_train_data_[i] = y_train_[i]
    y_test_data_[i] = y_test_[i]
    
    x_train, y_train,x_test, y_test = utils.prepare_data_for_xgb(
                                    y_train_data_,y_test_data_,
                                    hyperparameters["in_length"],hyperparameters["step_size"],
                                    target_sequence_length
                                                            )
    
    
    model = xgb.XGBRegressor(
        gamma= best[f"best_param_{i}"]['gamma'],
        learning_rate= best[f"best_param_{i}"]['learning_rate'],
        max_depth= best[f"best_param_{i}"]['max_depth'],
        n_estimators= best[f"best_param_{i}"]['n_estimators'],
        reg_alpha= best[f"best_param_{i}"]['reg_alpha'],
        reg_lambda= best[f"best_param_{i}"]['reg_lambda'],
        subsample= best[f"best_param_{i}"]['subsample']
        )

    trained_model["model_{0}".format(i)] = MultiOutputRegressor(model).fit(x_train, y_train)
    print(f"trained_model   :{i}")
    # train_forecasts = trained_model["model_{0}".format(i)].predict(x_train)
    test_forecasts = trained_model["model_{0}".format(i)].predict(x_test)
    test_mae = mean_absolute_error(y_test, test_forecasts)
    print(f'Test MAE: { test_mae}')
    # print("Mean test data value: {}".format(np.mean(y_test)))



trained_model   :CA_1_FOODS_1
Test MAE: 78.9238757153491




trained_model   :CA_1_FOODS_2
Test MAE: 84.73878446992461




trained_model   :CA_1_FOODS_3
Test MAE: 332.80848960609706




trained_model   :CA_1_HOBBIES_1
Test MAE: 109.77147498497597




trained_model   :CA_1_HOBBIES_2
Test MAE: 31.946973947378304




trained_model   :CA_1_HOUSEHOLD_1
Test MAE: 580.6659671276599




trained_model   :CA_1_HOUSEHOLD_2
Test MAE: 45.2247589751557




trained_model   :CA_2_FOODS_1
Test MAE: 88.87617396641444




trained_model   :CA_2_FOODS_2
Test MAE: 180.25912762132603




trained_model   :CA_2_FOODS_3
Test MAE: 282.54493126502405




trained_model   :CA_2_HOBBIES_1
Test MAE: 73.29558856670673




trained_model   :CA_2_HOBBIES_2
Test MAE: 16.03231786181043




trained_model   :CA_2_HOUSEHOLD_1
Test MAE: 151.80916141963505




trained_model   :CA_2_HOUSEHOLD_2
Test MAE: 71.21907587651606




trained_model   :CA_3_FOODS_1
Test MAE: 67.81268097137237




trained_model   :CA_3_FOODS_2
Test MAE: 90.84396191576978




trained_model   :CA_3_FOODS_3
Test MAE: 477.44100674715907




trained_model   :CA_3_HOBBIES_1
Test MAE: 99.90998552229021




trained_model   :CA_3_HOBBIES_2
Test MAE: 19.20704718903228




trained_model   :CA_3_HOUSEHOLD_1
Test MAE: 889.6386116934823




trained_model   :CA_3_HOUSEHOLD_2
Test MAE: 57.439996359231586




trained_model   :CA_4_FOODS_1
Test MAE: 39.18851711033108




trained_model   :CA_4_FOODS_2
Test MAE: 73.85021257733965




trained_model   :CA_4_FOODS_3
Test MAE: 181.69639064548733




trained_model   :CA_4_HOBBIES_1
Test MAE: 89.3296150260872




trained_model   :CA_4_HOBBIES_2
Test MAE: 8.422060206219866




trained_model   :CA_4_HOUSEHOLD_1
Test MAE: 50.727133264074794




trained_model   :CA_4_HOUSEHOLD_2
Test MAE: 23.428105601063976




trained_model   :TX_1_FOODS_1
Test MAE: 42.75639247227382




trained_model   :TX_1_FOODS_2
Test MAE: 69.41824788980551
trained_model   :TX_1_FOODS_3
Test MAE: 277.74009523191654




trained_model   :TX_1_HOBBIES_1
Test MAE: 63.7168954702524




trained_model   :TX_1_HOBBIES_2
Test MAE: 15.095882075649875




trained_model   :TX_1_HOUSEHOLD_1
Test MAE: 123.06829001686789




trained_model   :TX_1_HOUSEHOLD_2
Test MAE: 35.28760389848189




trained_model   :TX_2_FOODS_1
Test MAE: 42.51307619535006




trained_model   :TX_2_FOODS_2
Test MAE: 75.83666682743527




trained_model   :TX_2_FOODS_3
Test MAE: 294.75555035784527




trained_model   :TX_2_HOBBIES_1
Test MAE: 157.3871004331362




trained_model   :TX_2_HOBBIES_2
Test MAE: 21.053773026366333




trained_model   :TX_2_HOUSEHOLD_1
Test MAE: 431.09725290578564




trained_model   :TX_2_HOUSEHOLD_2
Test MAE: 34.765460887988965




trained_model   :TX_3_FOODS_1
Test MAE: 49.28710145216722




trained_model   :TX_3_FOODS_2
Test MAE: 86.88928041258059




trained_model   :TX_3_FOODS_3
Test MAE: 252.9318480591674




trained_model   :TX_3_HOBBIES_1
Test MAE: 90.60188261445586




trained_model   :TX_3_HOBBIES_2
Test MAE: 19.84668660997511




trained_model   :TX_3_HOUSEHOLD_1
Test MAE: 133.80007998593203




trained_model   :TX_3_HOUSEHOLD_2
Test MAE: 31.5337177623402




trained_model   :WI_1_FOODS_1
Test MAE: 73.99389413686899




trained_model   :WI_1_FOODS_2
Test MAE: 150.22964125413162




trained_model   :WI_1_FOODS_3
Test MAE: 302.08490182303046




trained_model   :WI_1_HOBBIES_1
Test MAE: 100.92039948576814




trained_model   :WI_1_HOBBIES_2
Test MAE: 17.958714651894738




trained_model   :WI_1_HOUSEHOLD_1
Test MAE: 137.07704087904284




trained_model   :WI_1_HOUSEHOLD_2
Test MAE: 33.44239777118176




trained_model   :WI_2_FOODS_1
Test MAE: 52.09352154498334




trained_model   :WI_2_FOODS_2
Test MAE: 224.13296999631228




trained_model   :WI_2_FOODS_3
Test MAE: 649.7399770029775




trained_model   :WI_2_HOBBIES_1
Test MAE: 51.017825200007515




trained_model   :WI_2_HOBBIES_2
Test MAE: 11.864348028089617




trained_model   :WI_2_HOUSEHOLD_1
Test MAE: 788.2653299611765




trained_model   :WI_2_HOUSEHOLD_2
Test MAE: 21.39462861814699




trained_model   :WI_3_FOODS_1
Test MAE: 42.12892753761131




trained_model   :WI_3_FOODS_2
Test MAE: 73.62659694431545




trained_model   :WI_3_FOODS_3
Test MAE: 300.9999368307474




trained_model   :WI_3_HOBBIES_1
Test MAE: 132.87426784488704




trained_model   :WI_3_HOBBIES_2
Test MAE: 9.265095810790163




trained_model   :WI_3_HOUSEHOLD_1
Test MAE: 304.84417319131063




trained_model   :WI_3_HOUSEHOLD_2
Test MAE: 29.56210540558075


In [7]:
model = trained_model['model_CA_1_FOODS_1']
model

MultiOutputRegressor(estimator=XGBRegressor(base_score=None, booster=None,
                                            callbacks=None,
                                            colsample_bylevel=None,
                                            colsample_bynode=None,
                                            colsample_bytree=None,
                                            early_stopping_rounds=None,
                                            enable_categorical=False,
                                            eval_metric=None,
                                            feature_types=None,
                                            gamma=0.053543859402459315,
                                            gpu_id=None, grow_policy=None,
                                            importance_type=None,
                                            interaction_constraints=None,
                                            learning_rate=0.43304389079434624,
                             

## Generating forecast

#### Refrence of papers for the used approch: 
https://arxiv.org/abs/1603.02754  &    
https://arxiv.org/abs/2101.02118
#### Refrence to site for data preparing steps:
https://towardsdatascience.com/multi-step-time-series-forecasting-with-xgboost-65d6820bec39

# Pipelines

In [5]:
import modin.pandas as pd
from transformer.DataAggregator import DataAggregator
from transformer.Dataformator import DataFormator
from transformer.ImputeMean import ImputeMean
from transformer.TrainTestSplit import TrainTestSplit
import utils
import numpy as np
import json
import xgboost as xgb
from hyperopt import fmin, tpe, hp, STATUS_OK
from sklearn.metrics import mean_absolute_error
from sklearn.multioutput import MultiOutputRegressor
from configs.space import space

In [17]:
data = pd.read_csv('./data/store_dept_sales.csv',index_col='Unnamed: 0')

In [20]:
data.head()

Unnamed: 0,CA_1_FOODS_1,CA_1_FOODS_2,CA_1_FOODS_3,CA_1_HOBBIES_1,CA_1_HOBBIES_2,CA_1_HOUSEHOLD_1,CA_1_HOUSEHOLD_2,CA_2_FOODS_1,CA_2_FOODS_2,CA_2_FOODS_3,...,WI_2_HOBBIES_2,WI_2_HOUSEHOLD_1,WI_2_HOUSEHOLD_2,WI_3_FOODS_1,WI_3_FOODS_2,WI_3_FOODS_3,WI_3_HOBBIES_1,WI_3_HOBBIES_2,WI_3_HOUSEHOLD_1,WI_3_HOUSEHOLD_2
2011-01-29,297,674,2268,528,28,361,181,406,212,1575,...,10,353,98,152,583,2293,256,22,584,148
2011-01-30,284,655,2198,489,9,350,170,408,227,1286,...,7,268,94,138,585,2383,342,14,541,195
2011-01-31,214,396,1398,409,6,279,114,238,138,913,...,18,250,69,127,575,1841,228,20,420,106
2011-02-01,175,476,1607,383,9,278,123,240,174,1126,...,15,305,80,98,533,1965,183,11,327,94
2011-02-02,182,354,1496,263,5,195,135,220,102,956,...,1,118,32,87,340,1427,70,4,151,53
2011-02-03,191,486,1730,453,14,256,146,263,103,1128,...,6,259,107,170,853,2760,285,8,392,122
2011-02-04,224,554,1915,339,7,268,143,304,140,1341,...,10,318,100,136,612,2727,357,3,492,159


In [19]:
tts = TrainTestSplit( data, test_size=0.3, random_state=0,shuffle=False)
X_train_, X_test_, y_train_, y_test_ = tts.split_data()

In [27]:
from sklearn.impute import SimpleImputer
from transformerExpr.ClipNegValues import ClipNegValues
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer

In [None]:

p1 = Pipeline([
    ('Clip neg values', ClipNegValues),
    ('impute mean values', )
])