## Generate retrospective forecasts using Prophet

Use Prophet to generate retrospective forecasts.

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import numpy as np
import pandas as pd
pd.set_option('precision', 3)
from prophet import Prophet
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import os

In [None]:
"""
Set sample rate. In this notebook, all data will be resampled at the chosen frequency.
'MS' : Monthly (Month Start)
'W' : Weekly
'D' : Daily

Set the output path corresponding to the experiment. The ensembling experiment will 
look for specific output files in the output directory corresponding to forecasts, 
e.g.: ./output/prophet_202110/forecasts_2015-07-01.csv
"""

year_period = {'MS': 12, 'W': 52, 'D': 365}
frequency = 'MS'
one_year = year_period[frequency]
output_path = "./output/prophet_TEST"
if not os.path.exists(output_path):
    if not os.path.exists("./output"):
        os.mkdir("./output")
    os.mkdir(output_path)

## Load Data From File

Be sure to run `load_data.ipynb` to produce the data file `all_data.csv`

In [None]:
foodprice_categories = pd.read_csv("./foodprice_categories.txt", sep='\n', header=None)[0].to_list()
foodprice_df = pd.read_csv("./all_data.csv", index_col=0)
foodprice_df = foodprice_df.set_index(pd.DatetimeIndex(foodprice_df.index))
foodprice_df

## Experiment Setup

In [None]:
report_sim_dates = pd.read_csv("./reportsimdates.txt", sep='\n', header=None)[0].to_list()
report_sim_dates

In [None]:
sim_train_dates = {}
sim_valid_dates = {}

for date in report_sim_dates:
    sim_train_dates[date] = foodprice_df.index[foodprice_df.index <= date]
    sim_valid_dates[date] = foodprice_df.index[(foodprice_df.index > date) & (foodprice_df.index <= (pd.to_datetime(date) + pd.DateOffset(months=18)))]

In [None]:
def get_prophet_df(foodprice_df, food_category, dates):
    df = foodprice_df[food_category][dates]
    df = df.reset_index()
    df = df.rename({'REF_DATE':'ds', 'index':'ds', food_category:'y'}, axis=1)
    return df

In [None]:
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

metrics = {
    'r2_score': r2_score,
    'mae': mean_absolute_error,
    'mape': mean_absolute_percentage_error,
    'mse': mean_squared_error,
    'rmse': rmse
}

## Fit and Evaluate Models for All CPI Categories (Validation)

In [None]:
def fit_eval_forecast_model(foodprice_df, food_category, train_dates, valid_dates):
    train_df = get_prophet_df(foodprice_df, food_category, train_dates)     # Select training data.
    model = Prophet()                                                       # Initialize model with (default) paramters.
    model.fit(train_df)                                                     # Fit the model.
    valid_df = get_prophet_df(foodprice_df, food_category, valid_dates)     # Select the validation data. 
    valid_forecast = model.predict(valid_df[['ds']])                        # Produce the validation period forecast. 

    valid_metrics = {metric_name: metric_fn(y_true=valid_df.y,              # Compute validation metrics. 
                                      y_pred=valid_forecast.yhat) for metric_name, metric_fn in metrics.items()}

    return model, valid_forecast, valid_metrics

In [None]:
def plot_valid_forecast(model, food_category, sim_forecast_date, valid_forecast, valid_actual):

    fig = model.plot(valid_forecast, figsize=(10,4))
    ax = fig.axes[0]
    ax.scatter(valid_actual.ds, valid_actual.y, color='green', s=10, label='Actual CPI')
    ax.set_title(f'CPI Forecast (Simulated From {date}) - {food_category}')
    ax.set_ylabel('CPI (% of 2002 Baseline)')
    ax.set_xlabel('Date')

    ### Creating legend elements
    cmap = plt.get_cmap("tab10")
    handles, labels = ax.get_legend_handles_labels()
    actual_patch = Line2D([0], [0], color='w', markerfacecolor='black', marker='o', label='Historical CPI')
    fc_patch = mpatches.Patch(color=cmap(0), label='Predicted CPI Uncertainty', alpha=0.5)
    fc_line = Line2D([0], [0], color=cmap(0), label='Predicted CPI')
    handles.append(actual_patch)
    handles.append(fc_line)
    handles.append(fc_patch)
    ax.legend(handles=handles)
    plt.savefig(f"{output_path}/fc_valid_{food_category}_{sim_forecast_date}.svg", bbox_inches='tight')
    plt.savefig(f"{output_path}/fc_valid_{food_category}_{sim_forecast_date}.png", bbox_inches='tight')
    plt.show()

In [None]:
all_valid_metrics = {}
all_forecasts = {}

for food_category in foodprice_categories:

    annual_valid_metrics = {}
    annual_forecasts = {}

    for date in report_sim_dates:  

        train_dates = sim_train_dates[date]
        valid_dates = sim_valid_dates[date]

        model, valid_forecast, valid_metrics = fit_eval_forecast_model(foodprice_df, food_category, train_dates, valid_dates)
        valid_actual = get_prophet_df(foodprice_df, food_category, valid_dates) 

        plot_valid_forecast(model, food_category, date, valid_forecast, valid_actual)
        valid_metrics_series = pd.Series(valid_metrics)
        print(valid_metrics_series)

        annual_valid_metrics[date] = pd.DataFrame(valid_metrics_series).mean(axis=1)
        annual_forecasts[date] = valid_forecast.yhat

    all_valid_metrics[food_category] = annual_valid_metrics
    all_forecasts[food_category] = annual_forecasts

### Collect Validation Metrics and Save to File

In [None]:
mean_valid_metrics = {}
for food_category, valid_scores in all_valid_metrics.items():
    mean_valid_metrics[food_category] = pd.DataFrame(valid_scores).mean(axis=1)

mean_valid_metrics_df = pd.DataFrame(mean_valid_metrics).T
mean_valid_metrics_df.to_csv(f"{output_path}/mean_fc_valid_metrics.csv")
mean_valid_metrics_df

## Save Validation Forecasts to File (For Ensembling)

In [None]:
date_forecasts = {}
for date in report_sim_dates:
    valid_dates = sim_valid_dates[date]
    food_forecasts = {}
    for food_category in foodprice_categories:
        food_forecasts[food_category] = all_forecasts[food_category][date]
    date_forecasts[date] = pd.DataFrame(food_forecasts)
    date_forecasts[date] = date_forecasts[date].set_index(pd.DatetimeIndex(valid_dates))
    date_forecasts[date].to_csv(f"{output_path}/forecasts_{date}.csv")