In [None]:
import pandas as pd
import json
import xgboost as xgb
import pickle
import lightgbm as lgb
import numpy as np
import matplotlib.pyplot as plt
from numpy import mean
from time import sleep
from timeit import default_timer
from neuralforecast import NeuralForecast
from neuralforecast.losses.pytorch import MAE
from neuralforecast.auto import (
    AutoRNN,
    AutoLSTM,
    AutoGRU,
    AutoTCN,
    AutoDeepAR,
    AutoDilatedRNN,
    AutoMLP,
    AutoNBEATS,
    AutoNBEATSx,
    AutoNHITS,
    AutoTFT,
    AutoVanillaTransformer,
    AutoInformer,
    AutoAutoformer,
    AutoFEDformer,
    AutoPatchTST,
    AutoTimesNet,
    AutoStemGNN,
    AutoHINT,
)
from statsforecast.models import (
    HistoricAverage,
    Naive,
    RandomWalkWithDrift,
    SeasonalNaive,
    WindowAverage,
    SeasonalWindowAverage,
    ADIDA,
    CrostonClassic,
    CrostonSBA,
    IMAPA,
    TSB,
    Theta,
    AutoARIMA,
    OptimizedTheta,
    AutoCES,
    AutoETS,
    DynamicTheta,
    SimpleExponentialSmoothing,
    SimpleExponentialSmoothingOptimized,
    SeasonalExponentialSmoothing,
    SeasonalExponentialSmoothingOptimized,
    Holt,
    HoltWinters,
)

baseline_model_dict = {
    "historic_average_baseline": HistoricAverage,
    "naive_baseline": Naive,
    "random_walk_baseline": RandomWalkWithDrift,
    # "window_average_baseline": WindowAverage,
}

seasonal_baseline_model_dict = {
    "seasonal_naive_baseline": SeasonalNaive,
    # "seasonal_window_average_baseline": SeasonalWindowAverage,
}

model_dict = {
    # 'adida_model':ADIDA,
    # 'croston_classic_model':CrostonClassic,
    # 'croston_sba_model':CrostonSBA,
    # 'imapa_model':IMAPA,
    # 'tsb_model':TSB,
    'theta_model': Theta,
    'auto_arima_model':AutoARIMA,
    'optimized_theta_model': OptimizedTheta,
    'auto_ces_model': AutoCES,
    'auto_ets_model': AutoETS,
    'dynamic_theta_model': DynamicTheta,
    # 'simple_exponential_smoothing_model':SimpleExponentialSmoothing,
    'simple_exponential_smoothing_optimized_model': SimpleExponentialSmoothingOptimized,
    # 'seasonal_exponential_smoothing_model':SeasonalExponentialSmoothing,
    'seasonal_exponential_smoothing_optimized_model': SeasonalExponentialSmoothingOptimized,
    'holt_model': Holt,
    'holt_winters_model': HoltWinters,
}

model_list = {
    # 'AutoRNN':AutoRNN,
    # 'AutoStemGNN':AutoStemGNN,
    # 'AutoHINT':AutoHINT,
    'AutoLSTM':AutoLSTM,
    'AutoGRU':AutoGRU,
    'AutoTCN':AutoTCN,
    # 'AutoDeepAR': AutoDeepAR,
    'AutoDilatedRNN':AutoDilatedRNN,
    'AutoMLP':AutoMLP,
    'AutoNBEATS':AutoNBEATS,
    'AutoNBEATSx':AutoNBEATSx,
    'AutoNHITS':AutoNHITS,
    # 'AutoTFT':AutoTFT,
    # 'AutoVanillaTransformer':AutoVanillaTransformer,
    # 'AutoInformer':AutoInformer,
    # 'AutoAutoformer':AutoAutoformer,
    # 'AutoFEDformer':AutoFEDformer,
    # 'AutoPatchTST':AutoPatchTST,
    # 'AutoTimesNet':AutoTimesNet,
}

def run_auto_deep_learning(model_list, forecast_horizon, new_uni_data):
    
    all_forecasts = {}
    all_metrics = {}

    for model_name, model in model_list.items():

        print(str(model_name))
        try:

            start = default_timer()
            models = [model(h=forecast_horizon, backend='ray', num_samples=2, loss=MAE())]

            nf = NeuralForecast(models=models, freq='D')
            nf.fit(df=new_uni_data[:-forecast_horizon])

            Y_hat_df = nf.predict()
            Y_hat_df = Y_hat_df.reset_index()

            Y_hat_df['actual'] = new_uni_data[-forecast_horizon:]['y'].values

            all_forecasts[model_name] = Y_hat_df

            single_metric = get_metrics(Y_hat_df, model_name, 'actual')

            single_metric['time_taken'] = round(default_timer() - start, 3)

            all_metrics[model_name] = single_metric


        except Exception as e:

            print(e)
            print(f'Failed {model_name}')
            results = pd.DataFrame([e])
            results.to_csv(f'{model_name}.csv')
            continue
        
    return pd.DataFrame(all_metrics).T, pd.DataFrame([all_forecasts]).T



def get_metrics(prediction_dataframe, prediction_column, actual_column):

    e = prediction_dataframe[prediction_column].values - \
        prediction_dataframe[actual_column].values

    ae, se, pe = abs(e), e**2, (e/prediction_dataframe[actual_column].values)

    mae, mse, mpe, ape = mean(ae), mean(se), mean(
        pe), (ae/prediction_dataframe[actual_column].values)

    rmse, mape = mse**0.5, mean(ape)
    
    wide_mape = round(abs(prediction_dataframe[prediction_column].sum() - prediction_dataframe[actual_column].sum())/prediction_dataframe[actual_column].sum(),4)
    
    return {'mae': mae, 'mse': mse, 'mpe': mpe, 'rmse': rmse, 'mape': mape, 'wide_mape':wide_mape}


def get_model_predictions(model_dict, univariant_data, train_data, actual_values, forecast_horizon, seasonal):

    model_result = {}
    model_predictions = {}
    model_inpredictions = {}

    for model_name, model in model_dict.items():
        try:
            print(model_name)

            try:
                intmodel = model(season_length=seasonal)
            except:
                intmodel = model()

            fitted_model = intmodel.forecast(
                y=train_data, fitted=True, h=forecast_horizon)

            mean_prediction = fitted_model['mean']

            insample_prediction = fitted_model['fitted']
            insample_prediction[np.isnan(insample_prediction)] = 0

            prediction_dataframe = pd.DataFrame(
                mean_prediction, actual_values).reset_index()
            prediction_dataframe.columns = ["actual", "prediction"]

            model_result[model_name] = get_metrics(
                prediction_dataframe, 'prediction', 'actual')
            model_predictions[model_name] = mean_prediction
            model_inpredictions[model_name] = insample_prediction

        except Exception as e:
            print(e)
            print(model_predictions)

    model_predictions_df = pd.DataFrame(
        model_predictions, index=univariant_data.index[-forecast_horizon:]).fillna(0)
    model_result_df = pd.DataFrame(model_result).fillna(0)
    model_inpredictions_df = pd.DataFrame(
        model_inpredictions, univariant_data.index[:-forecast_horizon]).fillna(0)

    return model_predictions_df, model_inpredictions_df, model_result_df


def get_baseline_forecasts(baseline_model_dict, seasonal_baseline_model_dict, model_dict, univariant_data, train_data, actual_values, forecast_horizon, seasonal, window):

    baseline_result = {}
    baseline_predictions = {}
    baseline_inpredictions = {}

    for model_name, model in baseline_model_dict.items():

        print(model_name)

        if 'window' in model_name:
            intmodel = model(window_size=window)

        else:
            intmodel = model()

        fitted_model = intmodel.forecast(
            y=train_data, fitted=True, h=forecast_horizon)

        mean_prediction = fitted_model['mean']
        insample_prediction = fitted_model['fitted']

        prediction_dataframe = pd.DataFrame(
            mean_prediction, actual_values).reset_index()
        prediction_dataframe.columns = ["actual", "prediction"]

        baseline_result[model_name] = get_metrics(
            prediction_dataframe, 'prediction', 'actual')

        baseline_predictions[model_name] = mean_prediction
        baseline_inpredictions[model_name] = insample_prediction

    for model_name, model in seasonal_baseline_model_dict.items():
        print(model_name)

        if 'window' in model_name:
            intmodel = model(season_length=seasonal, window_size=1)

        else:
            intmodel = model(season_length=seasonal)

        fitted_model = intmodel.forecast(
            y=train_data, fitted=True, h=forecast_horizon)

        mean_prediction = fitted_model['mean']

        insample_prediction = fitted_model['fitted']
        insample_prediction[np.isnan(insample_prediction)] = 0

        prediction_dataframe = pd.DataFrame(
            mean_prediction, actual_values).reset_index()
        prediction_dataframe.columns = ["actual", "prediction"]

        baseline_result[model_name] = get_metrics(
            prediction_dataframe, 'prediction', 'actual')
        baseline_predictions[model_name] = mean_prediction
        baseline_inpredictions[model_name] = insample_prediction

    baseline_predictions['actual'] = actual_values
    baseline_inpredictions['actual'] = train_data

    baseline_predictions_df = pd.DataFrame(
        baseline_predictions, index=univariant_data.index[-forecast_horizon:]).fillna(0)
    baseline_result_df = pd.DataFrame(baseline_result).fillna(0)
    baseline_inpredictions_df = pd.DataFrame(
        baseline_inpredictions, univariant_data.index[:-forecast_horizon]).fillna(0)
    
    return baseline_predictions_df, baseline_inpredictions_df, baseline_result_df


In [None]:
final_results = {}
final_forecast = {}

# raw_sales_data = pd.read_csv(
#     'gs://gfk-eco-local-forecast/simulations/neo_backtest_regular/Weekly/2780/backtests/raw_sales_data.csv')

raw_sales_data = pd.read_csv('sales_data.csv').drop(columns=['Unnamed: 0'])

all_cell_rows = ['-'.join(value) for value in raw_sales_data[['country_code', 'item_group_code', 'outlet_group_code']].values]

raw_sales_data['cell'] = all_cell_rows

for cell in set(all_cell_rows):

    full_data = raw_sales_data[raw_sales_data['cell']==cell].sort_values(by='period_seq').set_index("start_date")

    full_data.index = pd.to_datetime(full_data.index, format="%Y-%m-%d")

    single_column = "quantity"

    univariant_data = full_data[[single_column]]

    if univariant_data.shape[0] < 80:
        continue

    univariant_data = univariant_data
    univariant_data[single_column] = univariant_data[single_column].astype(float)

    forecast_horizon = 12

    window = 6
    seasonal = 12

    actual_values = np.array([value[0] for value in univariant_data.values[-forecast_horizon:]])
    train_data = np.array([value[0] for value in univariant_data.values[:-forecast_horizon]])

    model_predictions_df, model_inpredictions_df, model_result_df = get_model_predictions(model_dict, univariant_data, train_data, actual_values, forecast_horizon, seasonal)

    baseline_predictions_df, baseline_inpredictions_df, baseline_result_df = get_baseline_forecasts(baseline_model_dict, seasonal_baseline_model_dict, model_dict, univariant_data, train_data, actual_values, forecast_horizon, seasonal, window)

    all_results = pd.concat([model_result_df, baseline_result_df], axis=1).T.sort_values(by=['mpe'])

    all_predictions = pd.concat([baseline_predictions_df, model_predictions_df], axis=1)

    all_inpredictions = pd.concat([baseline_inpredictions_df, model_inpredictions_df], axis=1)
    
    
    testing_4 = all_predictions.T.mean().values
    
    testing_5 = all_predictions.T.median().values
    
    model_1 = xgb.XGBRegressor()
    model_2 = lgb.LGBMRegressor()

    good_models = all_results.sort_values(by='rmse').index.tolist()

    model_1.fit(all_inpredictions[good_models], all_inpredictions['actual'])

    model_2.fit(all_inpredictions[good_models], all_inpredictions['actual'])

    testing_1 = pd.DataFrame(list(model_1.predict(all_predictions[good_models])),all_predictions['actual'].to_list()).reset_index()

    testing_2 = pd.DataFrame(list(model_2.predict(all_predictions[good_models])),all_predictions['actual'].to_list()).reset_index()
    
    testing_3 = pd.DataFrame(testing_1['index'].values, pd.concat([testing_1, testing_2], axis=1).drop(columns=['index']).T.mean().values).reset_index()
    
    testing_4 = pd.DataFrame(testing_4,all_predictions['actual'].to_list()).reset_index()
    
    testing_5 = pd.DataFrame(testing_5, all_predictions['actual'].to_list()).reset_index()

    xgboost_result = pd.DataFrame([get_metrics(testing_1, 0, 'index')])
    xgboost_result = xgboost_result.T.rename(columns={0:'xgboost'}).T

    lightgbm_result = pd.DataFrame([get_metrics(testing_2, 0, 'index')])
    lightgbm_result = lightgbm_result.T.rename(columns={0:'lightgbm'}).T

    lgb_xgb_result = pd.DataFrame([get_metrics(testing_3, 0, 'index')])
    lgb_xgb_result = lgb_xgb_result.T.rename(columns={0:'lightgbm-xgboost'}).T
    
    mean_ensemble = pd.DataFrame([get_metrics(testing_4, 0, 'index')])
    mean_ensemble = mean_ensemble.T.rename(columns={0:'mean_ensemble'}).T
    
    median_ensemble = pd.DataFrame([get_metrics(testing_5, 0, 'index')])
    median_ensemble = median_ensemble.T.rename(columns={0:'median_ensemble'}).T
    
    testing_3.plot()
    plt.show()
    
    all_results  = pd.concat([all_results, xgboost_result, lightgbm_result, lgb_xgb_result, mean_ensemble, median_ensemble]).sort_values(by='rmse')
    
    
    all_predictions['lightgbm'] = testing_1[0].values
    all_predictions['xgboost'] = testing_2[0].values
    all_predictions['lightgbm-xgboost'] = testing_3[0].values
    
    new_uni_data = univariant_data.reset_index().rename(columns={'start_date':'ds', single_column:'y'}).head(1800)
    new_uni_data['unique_id'] = 1.0
    
    deep_results, deep_forecasts = run_auto_deep_learning(model_list, forecast_horizon, new_uni_data)

    all_results = pd.concat([all_results, deep_results])
    
    all_predictions = pd.concat([all_predictions, deep_forecasts])

    final_results[cell] = all_results
    
    final_forecast[cell] = all_predictions
    
    metric_file_path = "metrics.pkl"
    
    forecast_file_path = "forecast.pkl"

    with open(metric_file_path, "wb") as pkl_file:
        pickle.dump(final_results, pkl_file)
        
    with open(forecast_file_path, "wb") as pkl_file:
        pickle.dump(final_forecast, pkl_file)


In [None]:
deep_result

In [None]:
pd.concat([all_results, deep_result])

In [None]:
deep_result