In [None]:
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.3f' % x)
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
import os
from itertools import combinations

import numpy as np
import pickle
import data

import importlib
importlib.reload(data)

from data import update_expl_data, update_target_data, food_categories, preprocess_expl
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import json

In [None]:
"""
Set sample rate. In this notebook, all data will be resampled at the chosen frequency.
'MS' : Monthly (Month Start)
'W' : Weekly
'D' : Daily
"""

year_period = {'MS': 12, 'W': 52, 'D': 365}
frequency = 'MS'
one_year = year_period[frequency]
output_path = "./output/ensemble_TEST"
if not os.path.exists(output_path):
    if not os.path.exists("./output"):
        os.mkdir("./output")
    os.mkdir(output_path)
    
neural_prophet_data_dir = "neuralprophet_final_forecasts_TEST"
neural_prophet_exp_name = "neuralprophet_TEST"

## Load Data From File

In [None]:
foodprice_categories = pd.read_csv("./foodprice_categories.txt", sep='\n', header=None)[0].to_list()
foodprice_df = pd.read_csv("./all_data.csv", index_col=0)
foodprice_df = foodprice_df.set_index(pd.DatetimeIndex(foodprice_df.index))
foodprice_df = foodprice_df[foodprice_categories]
foodprice_df

In [None]:
"""
Load ensemble configurations and scores. 
"""

results_df = pd.read_pickle(f"{output_path}/ensemble_results.pkl")
results_df

## Final forecast

In [None]:
def get_final_forecast_ensemble(food_category, results_dict):

    forecasts = []
    exp_results = results_dict[food_category]
    for exp_name in exp_results:
        if exp_name.startswith('neuralprophet'):
#             full_exp_name = f"{food_category}_{exp_name}".replace(f"_{neural_prophet_exp_name}_", '_')
            full_exp_name = f"{food_category}_{exp_name}"
            file_exp_name = f"{food_category}_{exp_name}".replace(f"_{neural_prophet_exp_name}_", '_')
            fc_df = pd.read_csv(f"./output/{neural_prophet_data_dir}/{file_exp_name}/fc_final.csv")
        else:
            full_exp_name = exp_name
            fc_df = pd.read_csv(f"./output/{full_exp_name}/fc_final.csv")
        full_exp_name = full_exp_name.replace(f"{food_category}_", '')
        if "REF_DATE" in fc_df.columns:
            fc_df = fc_df.set_index("REF_DATE")
        elif "Unnamed: 0" in fc_df.columns:
            fc_df = fc_df.set_index("Unnamed: 0")
        elif "ds" in fc_df.columns:
            fc_df = fc_df.set_index("ds")
        fc_df.index = pd.DatetimeIndex(fc_df.index)
        fc_series = fc_df[food_category]
        fc_series = fc_series.rename(full_exp_name)
        print(full_exp_name)
        forecasts.append(fc_series)
    
    return pd.concat(forecasts, axis=1)


In [None]:
results = results_df["best_config"].to_dict()
results

In [None]:
def get_final_forecast_df(food_category, experiment_names):

    forecasts = []

    for ex_name in experiment_names:
        print(ex_name)
        
        if ex_name.startswith('neuralprophet'):
            full_exp_name = f"{food_category}_{ex_name}"
            file_exp_name = f"{food_category}_{ex_name}".replace(f"_{neural_prophet_exp_name}_", '_')
            fc_df = pd.read_csv(f"./output/{neural_prophet_data_dir}/{file_exp_name}/fc_final.csv")
        else:
            fc_df = pd.read_csv(f"./output/{ex_name}/fc_final.csv")
        if "REF_DATE" in fc_df.columns:
            fc_df = fc_df.set_index("REF_DATE")
        elif "Unnamed: 0" in fc_df.columns:
            fc_df = fc_df.set_index("Unnamed: 0")
        elif "ds" in fc_df.columns:
            fc_df = fc_df.set_index("ds")
        fc_df.index = pd.DatetimeIndex(fc_df.index)
        fc_series = fc_df[food_category]
        fc_series = fc_series.rename(ex_name)
        forecasts.append(fc_series)

    return pd.concat(forecasts, axis=1)

In [None]:
for category in food_categories:

    fig, ax = plt.subplots(figsize=(12,4))
    final_forecasts_df = get_final_forecast_ensemble(category, results)
    final_forecasts_df = final_forecasts_df.loc[final_forecasts_df.index < "2023-01-01"]

    for index, col in enumerate(final_forecasts_df):
        ax.plot(final_forecasts_df[col], color=f"C{index}", label=col)

    ax.plot(final_forecasts_df.mean(axis=1), color="purple", label="Mean Forecast", lw=3)
    ax.scatter(foodprice_df[category].index[-120:], foodprice_df[category].iloc[-120:], color='black', s=1, label='Historical CPI')

    plt.title(category)
    plt.legend()
    plt.grid()
#     plt.savefig(f"./report_output/ensemble_forecast_raw_{category}.svg", bbox_inches='tight')
    plt.show()
    


In [None]:
for category in food_categories:

    fig, ax = plt.subplots(figsize=(12,4))
    final_forecasts_df = get_final_forecast_ensemble(category, results)
    final_forecasts_df = final_forecasts_df.loc[final_forecasts_df.index < "2023-01-01"]

    ax.plot(final_forecasts_df.mean(axis=1), color="purple", label="Mean Forecast")
    ax.fill_between(final_forecasts_df.mean(axis=1).index, final_forecasts_df.min(axis=1), final_forecasts_df.max(axis=1), color="purple", alpha=0.2, label='Forecast Range')
    ax.scatter(foodprice_df[category].index[-120:], foodprice_df[category].iloc[-120:], color='black', s=2, label='Historical CPI')

    plt.title(category)
    plt.legend(loc='upper left')
    plt.grid()
    # plt.savefig(f"./report_output/ensemble_forecast_{category}.svg", bbox_inches='tight')
    plt.show()


## Forecast as Predicted Percent Change

Below we compute the expected change in CPI for 2022, relative to mean CPIs in 2021 so far. 

In [None]:
food_forecast_stats = {}
food_forecast_change_stats = {}

for category in food_categories:

    forecast_stats = {}
    forecast_change_stats = {}

    final_forecasts_df = get_final_forecast_ensemble(category, results)
    final_forecasts_df = final_forecasts_df.loc[final_forecasts_df.index < "2023-01-01"]
    historical_cpi = foodprice_df[category]
    
    fc_min = final_forecasts_df.min(axis=1)
    fc_mean = final_forecasts_df.mean(axis=1)
    fc_max = final_forecasts_df.max(axis=1)

    forecast_stats['min'] = historical_cpi.append(fc_min)
    forecast_stats['mean'] = historical_cpi.append(fc_mean)
    forecast_stats['max'] = historical_cpi.append(fc_max)

    food_forecast_stats[category] = pd.DataFrame(forecast_stats)
    food_forecast_stats[category] = food_forecast_stats[category].loc[food_forecast_stats[category].index < "2023-01-01"]

    fc_stats = food_forecast_stats[category].iloc[-12:]
    historical_data = foodprice_df[category].iloc[-9:]

    forecast_change_stats['2021_mean'] = historical_data.mean()
    forecast_change_stats['2021_max'] = historical_data.max()
    forecast_change_stats['2022_fc_min'] = fc_stats.min().min() 
    forecast_change_stats['2022_fc_mean'] = fc_stats.mean().mean()
    forecast_change_stats['2022_fc_max'] = fc_stats.max().max()

    food_forecast_change_stats[category] = pd.Series(forecast_change_stats)


In [None]:
pd.set_option('display.float_format', lambda x: '%.1f' % x)
results_df = pd.DataFrame(food_forecast_change_stats).T
results_df

In [None]:
results_df[['2021_mean', '2022_fc_mean']]

In [None]:
## percent change formula from Jim
# P1 = P_0*CPI_1/100 and P2 = P_0*CPI_2/100

p1 = results_df['2021_mean']
p2 = results_df['2022_fc_mean']

(((p2-p1) / p1) * 100).to_frame()


In [None]:
## percent change formula from Jim
# P1 = P_0*CPI_1/100 and P2 = P_0*CPI_2/100

p1 = results_df['2021_max']
p2 = results_df['2022_fc_max']

(((p2-p1) / p1) * 100).to_frame()


In [None]:
## Actual percent change from 2020 to 2021 (Up to September, 2021)

p1 = foodprice_df.loc[foodprice_df.index < "2021-01-01"]
p1 = p1.loc[p1.index >= "2020-01-01"]
p1 = p1.mean(axis=0).rename("2020 Mean CPI")

p2 = foodprice_df.loc[foodprice_df.index < "2022-01-01"]
p2 = p2.loc[p2.index >= "2021-01-01"]
p2 = p2.mean(axis=0).rename("2021 Mean CPI")

result = (((p2-p1) / p1) * 100).rename("Percent Change")
pd.concat((p1, p2, result), axis=1)

In [None]:
pct_change_from_mean_results = {}
pct_change_from_max_results = {}

for col in ['2022_fc_min', '2022_fc_mean', '2022_fc_max']:
    change = results_df[["2021_mean", col]].T.pct_change(1)
    pct_change_from_mean_results[col] = change.T[col]
    change = results_df[["2021_max", col]].T.pct_change(1)
    pct_change_from_max_results[col] = change.T[col]

In [None]:
pd.DataFrame(pct_change_from_mean_results)* 100

In [None]:
pd.DataFrame(pct_change_from_max_results)