## Generate retrospective forecasts using NeuralProphet

Use NeuralProphet to generate retrospective forecasts.

In [None]:
import pandas as pd
pd.set_option('precision', 3)

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
from matplotlib.patches import Patch

import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

import os

from neuralprophet import NeuralProphet
from sklearn.model_selection import ParameterGrid


In [None]:
"""
Set sample rate. In this notebook, all data will be resampled at the chosen frequency.
'MS' : Monthly (Month Start)
'W' : Weekly
'D' : Daily
"""

year_period = {'MS': 12, 'W': 52, 'D': 365}
frequency = 'MS'
one_year = year_period[frequency]

## Load Data From Files
Be sure to run `load_data.ipynb` to produce the data file `all_data.csv`

In [None]:
foodprice_categories = pd.read_csv("./foodprice_categories.txt", sep='\n', header=None)[0].to_list()
foodprice_df = pd.read_csv("./all_data.csv", index_col=0)
foodprice_df = foodprice_df.set_index(pd.DatetimeIndex(foodprice_df.index))
foodprice_df

## Experiment Setup

In [None]:
report_sim_dates = pd.read_csv("./reportsimdates.txt", sep='\n', header=None)[0].to_list()
report_sim_dates

In [None]:
sim_train_dates = {}
sim_valid_dates = {}

for date in report_sim_dates:
    sim_train_dates[date] = foodprice_df.index[foodprice_df.index <= date]
    sim_valid_dates[date] = foodprice_df.index[(foodprice_df.index > date) & (foodprice_df.index <= (pd.to_datetime(date) + pd.DateOffset(months=18)))]

In [None]:
dataset_df = foodprice_df[foodprice_categories].T
dataset_df

In [None]:
def get_neuralprophet_df(foodprice_df, food_category, dates, lagged_regressor):
    prophet_df = pd.DataFrame({'ds': foodprice_df[food_category][dates].index, 
                           'y':foodprice_df[food_category].loc[foodprice_df.index][dates], 
                           lagged_regressor: foodprice_df[lagged_regressor].loc[foodprice_df.index][dates],
                          })
    prophet_df.reset_index(drop=True, inplace=True)

    return prophet_df
    

In [None]:
data_sources = ["DEXCAUS",
                "DCOILWTICO",
                "WILL5000IND",
                "VXOCLS",
                "CUSR0000SAF112",
                "CUSR0000SAF113",
                "CPIFABSL",
                "UNRATE",
                "FEDFUNDS",
                "IRLTLT01CAM156N",
                "LRUNTTTTCAM156S",
                "CPALCY01CAM661N",
                "CPGRLE01CAM657N",
                "QCAR368BIS"
               ]

In [None]:
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

metrics = {
    'r2_score': r2_score,
    'mae': mean_absolute_error,
    'mape': mean_absolute_percentage_error,
    'mse': mean_squared_error,
    'rmse': rmse
}



## Fit Models and Evaluate Models for All CPI Categories

Fit a model using data up to the `report_sim_date` and use it to produce forecasts for the following 18 months. Collect evaluation metrics for comparison and save the forecasts for use in the ensembling experiments.

Use ParameterGrid for Hyperparameter tuning.

In [None]:
params_grid = {'n_forecasts':[18],
              'n_lags':[24, 36, 48, 60]}

# TESTING WITH REDUCED PARAMS
params_grid = {'n_forecasts':[18],
              'n_lags':[24]}
data_sources = ['DEXCAUS']

grid = ParameterGrid(params_grid)
cnt = 0
for p in grid:
    cnt = cnt+1
print('Total Possible Models',cnt)

In [None]:
all_output_paths = []
all_valid_metrics = {}
all_forecasts = {}
for var in data_sources:
    for p in grid:
        details = str(var) + '_nlags_' + str(p['n_lags'])
        output_path = './output/neuralprophet_TEST_' + details
        if not os.path.exists(output_path):
            print("new directory created: ", output_path)
            os.mkdir(output_path)
        else:
            print("directory already exists, skipping")
            continue
        all_output_paths.append(output_path)
        
        
        for food_category in foodprice_df.columns[:9]:
            
            annual_valid_metrics = {}
            annual_forecasts = {}
                        
            for date in report_sim_dates:
                
                train_dates = sim_train_dates[date]
                valid_dates = sim_valid_dates[date]
                
                
                train_model =NeuralProphet(n_forecasts = p['n_forecasts'],                          #initialize model
                                           n_lags = p['n_lags'])
                train_df = get_neuralprophet_df(foodprice_df, food_category, train_dates, str(var)) #select data
                train_model = train_model.add_lagged_regressor(names=[str(var)])                   #lagged regressor to model
                train_model.fit(train_df, freq='MS')                                              #fit the model
                
                
                future = train_model.make_future_dataframe(train_df, periods=18)                #create the future period
                forecast = train_model.predict(future)                                        #produce the future period forecast
                
                
                all_yhats = []
                for yhat in ["yhat" + str(i) for i in range(1,19)]:
                    all_yhats.append((forecast.loc[forecast[yhat].first_valid_index()][yhat]))  #get all yhats from forecast

                
                valid_df = get_neuralprophet_df(foodprice_df, food_category, valid_dates, str(var))
                
                ##special case for 2020 forecast
                if len(valid_df) == 14:
                    all_yhats = all_yhats[:-4]
                    
                ##special case
                if len(valid_df) == 15:
                    all_yhats = all_yhats[:-3]
                
                ## compute valid metrics
                valid_metrics = {metric_name: metric_fn(y_true=valid_df.y, y_pred=all_yhats) for metric_name, metric_fn in metrics.items()}
                valid_metrics_series = pd.Series(valid_metrics)
                
                annual_valid_metrics[date] = pd.DataFrame(valid_metrics_series).mean(axis=1)
                annual_forecasts[date] = pd.Series(all_yhats)
                            
            
                # break ### loop for dates 
                
            all_valid_metrics[(output_path, food_category)] = annual_valid_metrics
            all_forecasts[(output_path, food_category)] = annual_forecasts
                
            mean_valid_metrics = {}
            for (output_path,food_category), valid_scores in all_valid_metrics.items():
                mean_valid_metrics[food_category] = pd.DataFrame(valid_scores).mean(axis=1)

            mean_valid_metrics_df = pd.DataFrame(mean_valid_metrics).T
            mean_valid_metrics_df.to_csv(f"{output_path}/mean_fc_valid_metrics.csv")
        
            # break ## loop for food category
        
        # break ## loop for n_lags

    # break ##loop for lagged regressors



## Save Validation Forecasts to File (For Ensembling)


In [None]:
date_forecasts = {}
for date in report_sim_dates:
    valid_dates = sim_valid_dates[date]
    food_forecasts = {}
    for my_output_path in all_output_paths:
        for food_category in foodprice_categories:
            food_forecasts[food_category] = all_forecasts[(my_output_path,food_category)][date]
        date_forecasts[date] = pd.DataFrame(food_forecasts)
        date_forecasts[date] = date_forecasts[date].set_index(pd.DatetimeIndex(valid_dates))
        date_forecasts[date].to_csv(f"{my_output_path}/forecasts_{date}.csv")