## Generate Final Forecasts Using NeuralProphet 
Generate final NeuralProphet forecasts using the configurations of the best ensembles in the retrospective analysis.

In [None]:
import pandas as pd
pd.set_option('precision', 3)

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
from matplotlib.patches import Patch

import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import os

import ast
import re

from neuralprophet import NeuralProphet


In [None]:
"""
Set sample rate. In this notebook, all data will be resampled at the chosen frequency.
'MS' : Monthly (Month Start)
'W' : Weekly
'D' : Daily
"""

year_period = {'MS': 12, 'W': 52, 'D': 365}
frequency = 'MS'
one_year = year_period[frequency]

output_path = "./output/neuralprophet_final_forecasts_TEST"
if not os.path.exists(output_path):
    if not os.path.exists("./output"):
        os.mkdir("./output")
    os.mkdir(output_path)

## Load Data From Files

In [None]:
foodprice_categories = pd.read_csv("./foodprice_categories.txt", sep='\n', header=None)[0].to_list()
foodprice_df = pd.read_csv("./all_data.csv", index_col=0)
foodprice_df = foodprice_df.set_index(pd.DatetimeIndex(foodprice_df.index))
foodprice_df

In [None]:
def get_neuralprophet_df(foodprice_df, food_category, dates, lagged_regressor):
    prophet_df = pd.DataFrame({'ds': foodprice_df[food_category][dates].index, 
                           'y':foodprice_df[food_category].loc[foodprice_df.index][dates], 
                           lagged_regressor: foodprice_df[lagged_regressor].loc[foodprice_df.index][dates],
                          })
    prophet_df.reset_index(drop=True, inplace=True)

    return prophet_df
    

## Fit Models Using All Data To Produce Final Forecast

In [None]:
def fit_final_forecast_model(foodprice_df, food_category, all_dates, lagged_regressor, nlags):
           
    train_model =NeuralProphet(n_forecasts =18,                                                    # initialize model
                                n_lags = nlags)
    
    print("for food category: ", food_category, "\n for lagged regressor: ", lagged_regressor)
    train_df = get_neuralprophet_df(foodprice_df, food_category, all_dates, str(lagged_regressor)) # select data

    train_model = train_model.add_lagged_regressor(names=[lagged_regressor])                      # lagged regressor to model
    train_model.fit(train_df, freq='MS')                                                         # fit the model
    
                
    future = train_model.make_future_dataframe(train_df, periods=18)                            # create the future period
    forecast = train_model.predict(future)                                                     # Produce the future period forecast. 
            
    return train_model, forecast

Use `ensemble_results.pkl` to indicate which features and number of lags to use for the final forecasts.

In [None]:
ensemble_file = pd.read_pickle("./output/ensemble_TEST/ensemble_results.pkl")
ensemble_file.index.name = 'food_category'
d = ensemble_file.T.to_dict()
print(d)

In [None]:
%%time
all_forecasts = {} 
count = 0
for key,value in d.items():
    for val in (value['best_config']):
        if not val.startswith("neuralprophet"):
            continue
        else:
            exp = re.compile(r'.*_(.*)_nlags_(.*)')
            re_match = exp.match(val)
            lagged_reg = str(re_match.group(1))
            nlags = int(re_match.group(2))
            print(lagged_reg)
            
            # fit final forecast based on lagged_reg and nlags
            model, forecast= fit_final_forecast_model(foodprice_df, key, foodprice_df.index, lagged_reg, nlags)
            all_forecasts[(key, lagged_reg, nlags)] = forecast
            
            print("------------ done: ", count, " -----------")
            count += 1

In [None]:
for (food_category, lagged_reg, nlags), forecast_df in all_forecasts.items():
    all_forecasts_yhat = {}
    print(food_category, lagged_reg, nlags)
    
    all_yhats = []
    for yhat in ["yhat" + str(i) for i in range(1,19)]:
        all_yhats.append((forecast_df.loc[forecast_df[yhat].first_valid_index()][yhat]))  #get all yhats from forecast
    
    all_forecasts_yhat[(food_category)] = pd.Series(all_yhats)
    all_forecasts_yhat[(food_category)].index = pd.DatetimeIndex(forecast_df.ds[-18:])
    
    final_forecast_df = pd.DataFrame(all_forecasts_yhat)
    output_path_final = f"{output_path}/{food_category}_{str(lagged_reg)}_nlags_{str(nlags)}"
    #output_path = output + food_category + "_" + str(lagged_reg) + "_nlags_" + str(nlags)
    if not os.path.exists(output_path_final):
        print("new directory created: ", output_path_final)
        os.mkdir(output_path_final)
    
    final_forecast_df.to_csv(f"{output_path_final}/fc_final.csv")