In [1]:
import pandas as pd
import json
import xgboost as xgb
import pickle
import lightgbm as lgb
import numpy as np
from numpy import mean
from statsforecast.models import (
    HistoricAverage,
    Naive,
    RandomWalkWithDrift,
    SeasonalNaive,
    WindowAverage,
    SeasonalWindowAverage,
    ADIDA,
    CrostonClassic,
    CrostonSBA,
    IMAPA,
    TSB,
    Theta,
    AutoARIMA,
    OptimizedTheta,
    AutoCES,
    AutoETS,
    DynamicTheta,
    SimpleExponentialSmoothing,
    SimpleExponentialSmoothingOptimized,
    SeasonalExponentialSmoothing,
    SeasonalExponentialSmoothingOptimized,
    Holt,
    HoltWinters,
)

baseline_model_dict = {
    "historic_average_baseline": HistoricAverage,
    "naive_baseline": Naive,
    "random_walk_baseline": RandomWalkWithDrift,
    # "window_average_baseline": WindowAverage,
}

seasonal_baseline_model_dict = {
    "seasonal_naive_baseline": SeasonalNaive,
    # "seasonal_window_average_baseline": SeasonalWindowAverage,
}

model_dict = {
    # 'adida_model':ADIDA,
    # 'croston_classic_model':CrostonClassic,
    # 'croston_sba_model':CrostonSBA,
    # 'imapa_model':IMAPA,
    # 'tsb_model':TSB,
    'theta_model': Theta,
    'auto_arima_model':AutoARIMA,
    'optimized_theta_model': OptimizedTheta,
    'auto_ces_model': AutoCES,
    'auto_ets_model': AutoETS,
    'dynamic_theta_model': DynamicTheta,
    # 'simple_exponential_smoothing_model':SimpleExponentialSmoothing,
    'simple_exponential_smoothing_optimized_model': SimpleExponentialSmoothingOptimized,
    # 'seasonal_exponential_smoothing_model':SeasonalExponentialSmoothing,
    'seasonal_exponential_smoothing_optimized_model': SeasonalExponentialSmoothingOptimized,
    'holt_model': Holt,
    'holt_winters_model': HoltWinters,
}

raw_sales_data = pd.read_csv(
    'gs://gfk-eco-local-forecast/simulations/neo_backtest_regular/Weekly/2780/backtests/raw_sales_data.csv')

all_cell_rows = ['-'.join(value) for value in raw_sales_data[['country_code', 'item_group_code', 'outlet_group_code']].values]

raw_sales_data['cell'] = all_cell_rows

def get_metrics(prediction_dataframe, prediction_column, actual_column):

    e = prediction_dataframe[prediction_column].values - \
        prediction_dataframe[actual_column].values

    ae, se, pe = abs(e), e**2, (e/prediction_dataframe[actual_column].values)

    mae, mse, mpe, ape = mean(ae), mean(se), mean(
        pe), (ae/prediction_dataframe[actual_column].values)

    rmse, mape = mse**0.5, mean(ape)

    return {'mae': mae, 'mse': mse, 'mpe': mpe, 'rmse': rmse, 'mape': mape}


def get_model_predictions(model_dict, univariant_data, train_data, actual_values, forecast_horizon, seasonal):

    model_result = {}
    model_predictions = {}
    model_inpredictions = {}

    for model_name, model in model_dict.items():
        try:
            print(model_name)

            try:
                intmodel = model(season_length=seasonal)
            except:
                intmodel = model()

            fitted_model = intmodel.forecast(
                y=train_data, fitted=True, h=forecast_horizon)

            mean_prediction = fitted_model['mean']

            insample_prediction = fitted_model['fitted']
            insample_prediction[np.isnan(insample_prediction)] = 0

            prediction_dataframe = pd.DataFrame(
                mean_prediction, actual_values).reset_index()
            prediction_dataframe.columns = ["actual", "prediction"]

            model_result[model_name] = get_metrics(
                prediction_dataframe, 'prediction', 'actual')
            model_predictions[model_name] = mean_prediction
            model_inpredictions[model_name] = insample_prediction

        except Exception as e:
            print(e)
            print(model_predictions)

    model_predictions_df = pd.DataFrame(
        model_predictions, index=univariant_data.index[-forecast_horizon:]).fillna(0)
    model_result_df = pd.DataFrame(model_result).fillna(0)
    model_inpredictions_df = pd.DataFrame(
        model_inpredictions, univariant_data.index[:-forecast_horizon]).fillna(0)

    return model_predictions_df, model_inpredictions_df, model_result_df


def get_baseline_forecasts(baseline_model_dict, seasonal_baseline_model_dict, model_dict, univariant_data, train_data, actual_values, forecast_horizon, seasonal, window):

    baseline_result = {}
    baseline_predictions = {}
    baseline_inpredictions = {}

    for model_name, model in baseline_model_dict.items():

        print(model_name)

        if 'window' in model_name:
            intmodel = model(window_size=window)

        else:
            intmodel = model()

        fitted_model = intmodel.forecast(
            y=train_data, fitted=True, h=forecast_horizon)

        mean_prediction = fitted_model['mean']
        insample_prediction = fitted_model['fitted']

        prediction_dataframe = pd.DataFrame(
            mean_prediction, actual_values).reset_index()
        prediction_dataframe.columns = ["actual", "prediction"]

        baseline_result[model_name] = get_metrics(
            prediction_dataframe, 'prediction', 'actual')

        baseline_predictions[model_name] = mean_prediction
        baseline_inpredictions[model_name] = insample_prediction

    for model_name, model in seasonal_baseline_model_dict.items():
        print(model_name)

        if 'window' in model_name:
            intmodel = model(season_length=seasonal, window_size=1)

        else:
            intmodel = model(season_length=seasonal)

        fitted_model = intmodel.forecast(
            y=train_data, fitted=True, h=forecast_horizon)

        mean_prediction = fitted_model['mean']

        insample_prediction = fitted_model['fitted']
        insample_prediction[np.isnan(insample_prediction)] = 0

        prediction_dataframe = pd.DataFrame(
            mean_prediction, actual_values).reset_index()
        prediction_dataframe.columns = ["actual", "prediction"]

        baseline_result[model_name] = get_metrics(
            prediction_dataframe, 'prediction', 'actual')
        baseline_predictions[model_name] = mean_prediction
        baseline_inpredictions[model_name] = insample_prediction

    baseline_predictions['actual'] = actual_values
    baseline_inpredictions['actual'] = train_data

    baseline_predictions_df = pd.DataFrame(
        baseline_predictions, index=univariant_data.index[-forecast_horizon:]).fillna(0)
    baseline_result_df = pd.DataFrame(baseline_result).fillna(0)
    baseline_inpredictions_df = pd.DataFrame(
        baseline_inpredictions, univariant_data.index[:-forecast_horizon]).fillna(0)
    
    return baseline_predictions_df, baseline_inpredictions_df, baseline_result_df


  from tqdm.autonotebook import tqdm


In [2]:
final_results = {}
for cell in set(all_cell_rows):

    full_data = raw_sales_data[raw_sales_data['cell']==cell].sort_values(by='period_seq').set_index("start_date")

    full_data.index = pd.to_datetime(full_data.index, format="%Y-%m-%d")

    single_column = "quantity"

    univariant_data = full_data[[single_column]]

    if univariant_data.shape[0] < 149:
        continue

    univariant_data = univariant_data
    univariant_data[single_column] = univariant_data[single_column].astype(float)

    forecast_horizon = 26

    window = 13
    seasonal = 52

    actual_values = np.array([value[0] for value in univariant_data.values[-forecast_horizon:]])
    train_data = np.array([value[0] for value in univariant_data.values[:-forecast_horizon]])

    model_predictions_df, model_inpredictions_df, model_result_df = get_model_predictions(model_dict, univariant_data, train_data, actual_values, forecast_horizon, seasonal)

    baseline_predictions_df, baseline_inpredictions_df, baseline_result_df = get_baseline_forecasts(baseline_model_dict, seasonal_baseline_model_dict, model_dict, univariant_data, train_data, actual_values, forecast_horizon, seasonal, window)

    all_results = pd.concat([model_result_df, baseline_result_df], axis=1).T.sort_values(by=['mpe'])

    all_predictions = pd.concat([baseline_predictions_df, model_predictions_df], axis=1)

    all_inpredictions = pd.concat([baseline_inpredictions_df, model_inpredictions_df], axis=1)

    model_1 = xgb.XGBRegressor()
    model_2 = lgb.LGBMRegressor()

    good_models = all_results.sort_values(by='rmse').index.tolist()

    model_1.fit(all_inpredictions[good_models], all_inpredictions['actual'])

    model_2.fit(all_inpredictions[good_models], all_inpredictions['actual'])

    testing_1 = pd.DataFrame(list(model_1.predict(all_predictions[good_models])),all_predictions['actual'].to_list()).reset_index()

    testing_2 = pd.DataFrame(list(model_2.predict(all_predictions[good_models])),all_predictions['actual'].to_list()).reset_index()
    
    testing_3 = pd.DataFrame(testing_1['index'].values, pd.concat([testing_1, testing_2], axis=1).drop(columns=['index']).T.mean().values).reset_index()

    xgboost_result = pd.DataFrame([get_metrics(testing_1, 0, 'index')])
    xgboost_result = xgboost_result.T.rename(columns={0:'xgboost'}).T

    lightgbm_result = pd.DataFrame([get_metrics(testing_2, 0, 'index')])
    lightgbm_result = lightgbm_result.T.rename(columns={0:'lightgbm'}).T

    lgb_xgb_result = pd.DataFrame([get_metrics(testing_3, 0, 'index')])
    lgb_xgb_result = lgb_xgb_result.T.rename(columns={0:'lightgbm-xgboost'}).T

    all_results  = pd.concat([all_results, xgboost_result, lightgbm_result, lgb_xgb_result]).sort_values(by='rmse')

    final_results[cell] = all_results

    file_path = "dataframes.pkl"

    with open(file_path, "wb") as pkl_file:
        pickle.dump(final_results, pkl_file)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


theta_model
auto_arima_model
optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000185 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 953
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 69395.381836


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


theta_model
auto_arima_model
optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000119 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 953
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 27026.472628
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000102 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 810
[LightGBM] [Info] Number of data points in the train set: 197, number of used features: 13
[LightGBM] [Info] Start training from score 29034.997834
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000103 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 809
[LightGBM] [Info] Number of data points in the train set: 197, number of used features: 13
[LightGBM] [Info] Start training from score 4157.007924
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000321 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 961
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 317.688125
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000063 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 953
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 47208.766790
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000066 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 801
[LightGBM] [Info] Number of data points in the train set: 197, number of used features: 13
[LightGBM] [Info] Start training from score 130.914232
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000076 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 915
[LightGBM] [Info] Number of data points in the train set: 218, number of used features: 13
[LightGBM] [Info] Start training from score 1557.766055
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000124 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 953
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 119588.896784
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000105 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 809
[LightGBM] [Info] Number of data points in the train set: 197, number of used features: 13
[LightGBM] [Info] Start training from score 1493.511152
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000063 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 809
[LightGBM] [Info] Number of data points in the train set: 197, number of used features: 13
[LightGBM] [Info] Start training from score 20808.891096
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000071 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 953
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 35235.114789
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000076 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 579
[LightGBM] [Info] Number of data points in the train set: 145, number of used features: 13
[LightGBM] [Info] Start training from score 4572.579047
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000384 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 954
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 24811.382667
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000076 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 944
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 1485.039232
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000068 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 953
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 77097.014426
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000068 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 810
[LightGBM] [Info] Number of data points in the train set: 198, number of used features: 13
[LightGBM] [Info] Start training from score 26542.697537
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000131 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 953
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 11209.402213
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000100 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 953
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 47976.567306
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000065 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 809
[LightGBM] [Info] Number of data points in the train set: 197, number of used features: 13
[LightGBM] [Info] Start training from score 29946.086245
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000061 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 577
[LightGBM] [Info] Number of data points in the train set: 144, number of used features: 13
[LightGBM] [Info] Start training from score 4323.392804
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000117 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 810
[LightGBM] [Info] Number of data points in the train set: 197, number of used features: 13
[LightGBM] [Info] Start training from score 14567.079488
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000163 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 686
[LightGBM] [Info] Number of data points in the train set: 164, number of used features: 13
[LightGBM] [Info] Start training from score 762.504580
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 946
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 889.846340
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000098 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 585
[LightGBM] [Info] Number of data points in the train set: 145, number of used features: 13
[LightGBM] [Info] Start training from score 9507.741947
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000110 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 953
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 84802.373629
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000082 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 953
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 62138.356462
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


optimized_theta_model
auto_ces_model
auto_ets_model
dynamic_theta_model
simple_exponential_smoothing_optimized_model
seasonal_exponential_smoothing_optimized_model
holt_model
holt_winters_model
historic_average_baseline
naive_baseline
random_walk_baseline
seasonal_naive_baseline
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000065 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 953
[LightGBM] [Info] Number of data points in the train set: 228, number of used features: 13
[LightGBM] [Info] Start training from score 106864.646484
theta_model
auto_arima_model


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  univariant_data[single_column] = univariant_data[single_column].astype(float)


In [None]:
from neuralforecast import NeuralForecast
from neuralforecast.losses.pytorch import MAE
from neuralforecast.auto import (
    AutoRNN,
    AutoLSTM,
    AutoGRU,
    AutoTCN,
    AutoDeepAR,
    AutoDilatedRNN,
    AutoMLP,
    AutoNBEATS,
    AutoNBEATSx,
    AutoNHITS,
    AutoTFT,
    AutoVanillaTransformer,
    AutoInformer,
    AutoAutoformer,
    AutoFEDformer,
    AutoPatchTST,
    AutoTimesNet,
    AutoStemGNN,
    AutoHINT,
)

model_list = {
    'AutoRNN':AutoRNN,
    'AutoLSTM':AutoLSTM,
    'AutoGRU':AutoGRU,
    'AutoTCN':AutoTCN,
    # 'AutoDeepAR': AutoDeepAR,
    'AutoDilatedRNN':AutoDilatedRNN,
    'AutoMLP':AutoMLP,
    'AutoNBEATS':AutoNBEATS,
    'AutoNBEATSx':AutoNBEATSx,
    'AutoNHITS':AutoNHITS,
    'AutoTFT':AutoTFT,
    'AutoVanillaTransformer':AutoVanillaTransformer,
    'AutoInformer':AutoInformer,
    'AutoAutoformer':AutoAutoformer,
    'AutoFEDformer':AutoFEDformer,
    'AutoPatchTST':AutoPatchTST,
    'AutoTimesNet':AutoTimesNet,
    'AutoStemGNN':AutoStemGNN,
    'AutoHINT':AutoHINT,
}

In [None]:
new_uni_data = univariant_data.reset_index().rename(columns={'date':'ds', single_column:'y'}).head(1800)
new_uni_data['unique_id'] = 1.0

In [None]:
import matplotlib.pyplot as plt
from time import sleep

# Try different hyperparmeters to improve accuracy.
all_forecasts = {}
for model_name, model in model_list.items():
    print(str(model))
    try:
        models = [model(h=forecast_horizon, backend='ray', num_samples=10, loss=MAE(), config=dict(max_steps=100))]
        nf = NeuralForecast(models=models, freq='D')
        nf.fit(df=new_uni_data[:-forecast_horizon])
        Y_hat_df = nf.predict()
        Y_hat_df = Y_hat_df.reset_index()
        Y_hat_df['actual'] = new_uni_data[-forecast_horizon:]['y'].values
        Y_hat_df.set_index('ds').drop(columns='unique_id').plot()
        all_forecasts[model_name] = Y_hat_df
        plt.show()
    except Exception as e:
        print(e)
        print(f'Failed {model_name}')
        continue


In [None]:
get_metrics(Y_hat_df, 'AutoTCN', 'actual')

In [None]:
results = pd.concat([value for value in all_forecasts.values()], axis=1)
results = results[[col for col in results.columns if 'Auto' in col]+['actual']].T.drop_duplicates().T

In [None]:
result_metrics = pd.DataFrame([get_metrics(results, col, 'actual') for col in results.columns if 'Auto' in col]).T

In [None]:
result_metrics.columns = [col for col in results.columns if 'Auto' in col]

In [None]:
all_forecasts['AutoLSTM'].set_index('ds').drop(columns='unique_id').plot()

In [None]:
result_metrics

In [None]:
univariant_data.pivot_table

In [None]:
univariant_data

In [None]:
actual_values

In [None]:
final_results['BE-INKJET_PRINTING_DEVICES-ONLINE']