In [11]:
import pandas as pd
pd.set_option('precision', 3)

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
from matplotlib.patches import Patch

import numpy as np
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import os

import ast
import re

from neuralprophet import NeuralProphet


In [2]:
"""
Set sample rate. In this notebook, all data will be resampled at the chosen frequency.
'MS' : Monthly (Month Start)
'W' : Weekly
'D' : Daily
"""

year_period = {'MS': 12, 'W': 52, 'D': 365}
frequency = 'MS'
one_year = year_period[frequency]

## Load Data From File

In [3]:
foodprice_categories = pd.read_csv("./foodprice_categories.txt", sep='\n', header=None)[0].to_list()
foodprice_df = pd.read_csv("./all_data.csv.bak", index_col=0)
foodprice_df = foodprice_df.set_index(pd.DatetimeIndex(foodprice_df.index))
foodprice_df

Unnamed: 0,Bakery and cereal products (excluding baby food),Dairy products and eggs,"Fish, seafood and other marine products",Food purchased from restaurants,Food,"Fruit, fruit preparations and nuts",Meat,Other food products and non-alcoholic beverages,Vegetables and vegetable preparations,DEXCAUS,...,XTIMVA01CAM657S,XTIMVA01CAM659S,XTIMVA01CAM664N,XTIMVA01CAM664S,XTIMVA01CAM667S,XTNTVA01CAM664N,XTNTVA01CAM664S,XTNTVA01CAM667S,TOTALNS,TOTALSL
1986-01-01,69.3,70.9,60.6,59.1,67.3,76.0,65.1,77.5,76.0,1.392,...,3.644,12.051,9.368e+09,9.496e+09,6.749e+09,6.906e+08,1.052e+09,7.479e+08,607.369,605.703
1986-02-01,70.3,70.8,61.3,59.1,66.9,77.6,64.2,78.1,68.4,1.392,...,1.965,16.745,9.495e+09,9.632e+09,6.881e+09,-9.880e+07,1.539e+08,1.099e+08,605.807,610.678
1986-03-01,70.6,71.1,61.3,59.3,67.0,79.2,64.2,78.6,66.2,1.392,...,-11.565,1.655,8.803e+09,8.529e+09,6.085e+09,9.138e+08,9.079e+08,6.478e+08,606.799,613.377
1986-04-01,71.3,71.0,61.4,59.7,67.7,82.2,63.6,79.5,71.1,1.392,...,13.334,10.821,1.034e+10,9.569e+09,6.897e+09,3.470e+08,6.563e+08,4.730e+08,614.367,619.658
1986-05-01,71.2,71.4,61.9,59.9,68.2,83.5,64.0,79.8,75.3,1.377,...,-4.236,6.160,9.598e+09,9.091e+09,6.605e+09,7.013e+08,6.893e+08,5.008e+08,621.915,625.820
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-06-01,157.7,145.3,146.2,163.9,156.8,144.5,176.7,142.2,153.4,1.203,...,-1.087,31.561,5.159e+10,4.979e+10,4.075e+10,3.661e+09,3.111e+09,2.546e+09,4259.604,4307.137
2021-07-01,157.9,146.4,146.6,165.2,157.6,141.7,180.9,141.9,154.8,1.240,...,1.990,18.580,5.106e+10,5.205e+10,4.156e+10,3.780e+08,1.129e+09,9.018e+08,4276.202,4323.730
2021-08-01,158.5,148.3,146.8,165.9,158.0,142.5,182.1,141.7,152.2,1.251,...,-2.312,14.723,5.245e+10,5.115e+10,4.060e+10,1.300e+09,2.674e+09,2.123e+09,4316.303,4337.489
2021-09-01,158.1,148.0,147.1,165.9,158.5,141.5,184.8,144.3,150.0,1.262,...,-3.461,8.557,5.080e+10,4.965e+10,3.920e+10,1.561e+09,2.277e+09,1.798e+09,4346.662,4367.402


In [5]:
def get_neuralprophet_df(foodprice_df, food_category, dates, lagged_regressor):
    prophet_df = pd.DataFrame({'ds': foodprice_df[food_category][dates].index, 
                           'y':foodprice_df[food_category].loc[foodprice_df.index][dates], 
                           lagged_regressor: foodprice_df[lagged_regressor].loc[foodprice_df.index][dates],
                          })
    prophet_df.reset_index(drop=True, inplace=True)

    return prophet_df
    

## Fit Models Using All Data To Produce Final Forecast

In [6]:
def fit_final_forecast_model(foodprice_df, food_category, all_dates, lagged_regressor, nlags):
           
    train_model =NeuralProphet(n_forecasts =18,                          #initialize model
                                n_lags = nlags)
    
    print("for food category: ", food_category, "\n for lagged regressor: ", lagged_regressor)
    train_df = get_neuralprophet_df(foodprice_df, food_category, all_dates, str(lagged_regressor)) #select data

    train_model = train_model.add_lagged_regressor(names=[lagged_regressor])                   #lagged regressor to model
    train_model.fit(train_df, freq='MS')                                              #fit the model
    
                
    future = train_model.make_future_dataframe(train_df, periods=18)                #create the future period
    forecast = train_model.predict(future)                                     # Produce the future period forecast. 
    print("done forecast")
        
    
    return model, forecast

In [8]:
ensemble_file = pd.read_csv("./ensemble_models.csv")
ensemble_file['models'] = ensemble_file['models'].apply(lambda x: ast.literal_eval(x))
# print(ensemble_file)
d = ensemble_file.set_index('food_category').T.to_dict()
print(d)

{'Bakery and cereal products (excluding baby food)': {'models': ['neuralprophet_202110_IRLTLT01CAM156N_nlags_36', 'nbeatsfredvars_202110', 'neuralprophet_202110_CUSR0000SAF112_nlags_24']}, 'Dairy products and eggs': {'models': ['neuralprophet_202110_CUSR0000SAF112_nlags_48', 'nbeats_202110']}, 'Fish, seafood and other marine products': {'models': ['neuralprophet_202110_CUSR0000SAF113_nlags_48', 'nbeats_202110', 'neuralprophet_202110_QCAR368BIS_nlags_60']}, 'Food purchased from restaurants': {'models': ['nbeatsfredvars_202110', 'nbeats_202110', 'neuralprophet_202110_WILL5000IND_nlags_36']}, 'Food': {'models': ['nbeatsfredvars_202110', 'neuralprophet_202110_CUSR0000SAF112_nlags_24', 'neuralprophet_202110_IRLTLT01CAM156N_nlags_60']}, 'Fruit, fruit preparations and nuts': {'models': ['neuralprophet_202110_CUSR0000SAF112_nlags_24', 'neuralprophet_202110_CUSR0000SAF112_nlags_36', 'neuralprophet_202110_CPALCY01CAM661N_nlags_24']}, 'Meat': {'models': ['nbeatsfredvars_202110', 'neuralprophet_20

In [12]:
%%time
all_forecasts = {} 
count = 0
for key,value in d.items():
    for val in (value['models']):
        print("this is key ", key)
        if not val.startswith("neuralprophet"):
            print("ignoring bc not neuralprophet")
        else:
            exp = re.compile(r'.*_(.*)_nlags_(.*)')
            re_match = exp.match(val)
            lagged_reg = str(re_match.group(1))
            nlags = int(re_match.group(2))
            print(lagged_reg)
            
            model, forecast= fit_final_forecast_model(foodprice_df, key, foodprice_df.index, lagged_reg, nlags)
            all_forecasts[(key, lagged_reg, nlags)] = forecast
            
            print("----------------done--------  ", count, "-----------")
            count += 1
#         break
#     break


INFO - (NP.utils.set_auto_seasonalities) - Disabling weekly seasonality. Run NeuralProphet with weekly_seasonality=True to override this.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 16
INFO - (NP.config.set_auto_batch_epoch) - Auto-set epochs to 223


  0%|          | 0/229 [00:00<?, ?it/s]

this is key  Bakery and cereal products (excluding baby food)
IRLTLT01CAM156N
for food category:  Bakery and cereal products (excluding baby food) 
 for lagged regressor:  IRLTLT01CAM156N


INFO - (NP.utils_torch.lr_range_test) - lr-range-test results: steep: 4.83E-02, min: 3.26E-01


  0%|          | 0/229 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [13]:
for (food_category, lagged_reg, nlags), forecast_df in all_forecasts.items():
    all_forecasts_yhat = {}
    print(food_category, lagged_reg, nlags)
    
    all_yhats = []
    for yhat in ["yhat" + str(i) for i in range(1,19)]:
        all_yhats.append((forecast_df.loc[forecast_df[yhat].first_valid_index()][yhat]))  #get all yhats from forecast
    print("this is all yhats ", all_yhats)
    
    all_forecasts_yhat[(food_category)] = pd.Series(all_yhats)
    all_forecasts_yhat[(food_category)].index = pd.DatetimeIndex(forecast_df.ds[-18:])
    
    final_forecast_df = pd.DataFrame(all_forecasts_yhat)
    output_path = "./output/final_forecasts/" + food_category + "_" + str(lagged_reg) + "_nlags_" + str(nlags)
    if not os.path.exists(output_path):
        print("making new directory for: ", output_path)
        os.mkdir(output_path)
    else:
        print("already done")
    
    final_forecast_df.to_csv(f"{output_path}/fc_final.csv")