In [1]:
from data import update_expl_data, update_target_data, food_categories, preprocess_expl
import pandas as pd
import os

In [2]:
"""
Set sample rate. In this notebook, all data will be resampled at the chosen frequency.
'MS' : Monthly (Month Start)
'W' : Weekly
'D' : Daily
"""

frequency = 'MS'

In [3]:
"""
Load food CPI data from January 1986 to the most recently available data.
"""
if not os.path.exists("./data_files"):
    os.mkdir("./data_files")
if not os.path.exists("./statscan_data"):
    os.mkdir("./statscan_data")
foodprice_df = update_target_data(food_categories, './data_files/food_cpi.csv')
foodprice_df = foodprice_df.resample(frequency).mean().interpolate()
foodprice_df

Unnamed: 0_level_0,Bakery and cereal products (excluding baby food),Dairy products and eggs,"Fish, seafood and other marine products",Food purchased from restaurants,Food,"Fruit, fruit preparations and nuts",Meat,Other food products and non-alcoholic beverages,Vegetables and vegetable preparations
REF_DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1986-01-01,69.3,70.9,60.6,59.1,67.3,76.0,65.1,77.5,76.0
1986-02-01,70.3,70.8,61.3,59.1,66.9,77.6,64.2,78.1,68.4
1986-03-01,70.6,71.1,61.3,59.3,67.0,79.2,64.2,78.6,66.2
1986-04-01,71.3,71.0,61.4,59.7,67.7,82.2,63.6,79.5,71.1
1986-05-01,71.2,71.4,61.9,59.9,68.2,83.5,64.0,79.8,75.3
...,...,...,...,...,...,...,...,...,...
2021-06-01,157.7,145.3,146.2,163.9,156.8,144.5,176.7,142.2,153.4
2021-07-01,157.9,146.4,146.6,165.2,157.6,141.7,180.9,141.9,154.8
2021-08-01,158.5,148.3,146.8,165.9,158.0,142.5,182.1,141.7,152.2
2021-09-01,158.1,148.0,147.1,165.9,158.5,141.5,184.8,144.3,150.0


In [4]:
"""
Load exogenous/auxiliary explanatory variables from FRED: https://fred.stlouisfed.org/
These data sources reflect various economic factors that may improve forecasts. 
Please visit the FRED website to learn more about these series, and to find others
that may be useful for food CPI forecasting. 
"""

print(">>> Please paste your FRED API key below.")
FRED_API_KEY = input()

data_sources = ["DEXCAUS",
                "DCOILWTICO",
                "WILL5000IND",
                "VXOCLS",
                "CUSR0000SAF112",
                "CUSR0000SAF113",
                "CPIFABSL",
                "UNRATE",
                "FEDFUNDS",
                "IRLTLT01CAM156N",
                "LRUNTTTTCAM156S",
                "CPALCY01CAM661N",
                "CPGRLE01CAM657N",
                "QCAR368BIS"
               ]


other_fred_sources = pd.read_csv("./FRED_series_names.csv")['0'].to_list()
data_sources = data_sources + [s for s in other_fred_sources if s not in data_sources]

expl_df = preprocess_expl(update_expl_data(data_sources, './data_files/expl_vars.csv', sleep_sec=1.0, api_key=FRED_API_KEY))
expl_df_monthly = expl_df.resample(frequency).mean().interpolate()
expl_df_monthly

b87bd38f4622b9c19ed583bcfc97bd3e
TOTALSL loaded successfully, 323 of 323. of 323..3.

Unnamed: 0,DEXCAUS,DCOILWTICO,WILL5000IND,VXOCLS,CUSR0000SAF112,CUSR0000SAF113,CPIFABSL,UNRATE,FEDFUNDS,IRLTLT01CAM156N,...,XTIMVA01CAM657S,XTIMVA01CAM659S,XTIMVA01CAM664N,XTIMVA01CAM664S,XTIMVA01CAM667S,XTNTVA01CAM664N,XTNTVA01CAM664S,XTNTVA01CAM667S,TOTALNS,TOTALSL
1986-01-01,1.406561,23.040000,4.852174,18.119130,102.260870,107.960870,107.452174,6.939130,8.006087,10.006370,...,2.840961,14.296042,9.428739e+09,9.561243e+09,6.812040e+09,3.130609e+08,6.226826e+08,4.427938e+08,606.621956,608.082497
1986-02-01,1.405033,15.580476,5.119048,20.487143,101.952381,105.585714,107.495238,7.200000,7.679048,9.698452,...,-4.477950,9.559469,9.165286e+09,9.106676e+09,6.502263e+09,3.833905e+08,5.129476e+08,3.660732e+08,606.279391,611.963209
1986-03-01,1.401948,12.529773,5.439773,23.459318,101.518182,106.806818,107.695455,7.152273,7.246136,9.137852,...,0.318618,6.029776,9.538316e+09,9.025059e+09,6.472681e+09,6.432818e+08,7.878182e+08,5.643973e+08,610.410928,616.374423
1986-04-01,1.387868,12.843636,5.597727,23.015455,101.147727,108.897727,107.990909,7.147727,6.923182,8.888330,...,4.948582,8.596442,9.987768e+09,9.340664e+09,6.757419e+09,5.160977e+08,6.720500e+08,4.862854e+08,617.969448,622.598794
1986-05-01,1.375520,15.407045,5.636136,18.788409,101.247727,109.288636,108.247727,7.200000,6.883409,9.021989,...,-2.011849,4.617209,9.644464e+09,9.151795e+09,6.618074e+09,6.516636e+08,6.139386e+08,4.442589e+08,624.767260,627.841625
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-01,1.259393,67.943043,220.541304,15.079565,289.881826,315.584609,279.339652,5.008696,0.085217,1.225907,...,-2.861483,11.774016,5.165626e+10,5.043196e+10,3.993043e+10,1.424896e+09,2.484222e+09,1.967285e+09,4330.822423,4351.795575
2021-09-01,1.266664,71.522045,221.021364,17.491591,295.572977,316.726227,281.726727,4.704545,0.080000,1.422409,...,-3.460786,8.557485,5.079560e+10,4.965300e+10,3.919730e+10,1.560600e+09,2.277300e+09,1.797757e+09,4346.662100,4367.402410
2021-10-01,1.243476,81.476667,222.055238,17.870000,298.253000,316.836000,282.952000,4.409524,0.080000,1.597000,...,-3.460786,8.557485,5.079560e+10,4.965300e+10,3.919730e+10,1.560600e+09,2.277300e+09,1.797757e+09,4346.662100,4367.402410
2021-11-01,1.256618,78.858636,232.091136,17.870000,298.253000,316.836000,282.952000,4.200000,0.080000,1.597000,...,-3.460786,8.557485,5.079560e+10,4.965300e+10,3.919730e+10,1.560600e+09,2.277300e+09,1.797757e+09,4346.662100,4367.402410


In [6]:
combined_df = pd.concat((foodprice_df, expl_df_monthly), axis=1).dropna(axis=0)
combined_df.to_csv("all_data.csv")
combined_df

Unnamed: 0,Bakery and cereal products (excluding baby food),Dairy products and eggs,"Fish, seafood and other marine products",Food purchased from restaurants,Food,"Fruit, fruit preparations and nuts",Meat,Other food products and non-alcoholic beverages,Vegetables and vegetable preparations,DEXCAUS,...,XTIMVA01CAM657S,XTIMVA01CAM659S,XTIMVA01CAM664N,XTIMVA01CAM664S,XTIMVA01CAM667S,XTNTVA01CAM664N,XTNTVA01CAM664S,XTNTVA01CAM667S,TOTALNS,TOTALSL
1986-01-01,69.3,70.9,60.6,59.1,67.3,76.0,65.1,77.5,76.0,1.406561,...,2.840961,14.296042,9.428739e+09,9.561243e+09,6.812040e+09,3.130609e+08,6.226826e+08,4.427938e+08,606.621956,608.082497
1986-02-01,70.3,70.8,61.3,59.1,66.9,77.6,64.2,78.1,68.4,1.405033,...,-4.477950,9.559469,9.165286e+09,9.106676e+09,6.502263e+09,3.833905e+08,5.129476e+08,3.660732e+08,606.279391,611.963209
1986-03-01,70.6,71.1,61.3,59.3,67.0,79.2,64.2,78.6,66.2,1.401948,...,0.318618,6.029776,9.538316e+09,9.025059e+09,6.472681e+09,6.432818e+08,7.878182e+08,5.643973e+08,610.410928,616.374423
1986-04-01,71.3,71.0,61.4,59.7,67.7,82.2,63.6,79.5,71.1,1.387868,...,4.948582,8.596442,9.987768e+09,9.340664e+09,6.757419e+09,5.160977e+08,6.720500e+08,4.862854e+08,617.969448,622.598794
1986-05-01,71.2,71.4,61.9,59.9,68.2,83.5,64.0,79.8,75.3,1.375520,...,-2.011849,4.617209,9.644464e+09,9.151795e+09,6.618074e+09,6.516636e+08,6.139386e+08,4.442589e+08,624.767260,627.841625
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-06-01,157.7,145.3,146.2,163.9,156.8,144.5,176.7,142.2,153.4,1.222041,...,0.381299,25.365552,5.133562e+10,5.087006e+10,4.113955e+10,2.094114e+09,2.165032e+09,1.761199e+09,4267.525674,4315.056565
2021-07-01,157.9,146.4,146.6,165.2,157.6,141.7,180.9,141.9,154.8,1.252477,...,-0.063305,16.739067,5.171882e+10,5.161781e+10,4.110480e+10,8.182841e+08,1.866495e+09,1.484498e+09,4295.340912,4330.297053
2021-08-01,158.5,148.3,146.8,165.9,158.0,142.5,182.1,141.7,152.2,1.259393,...,-2.861483,11.774016,5.165626e+10,5.043196e+10,3.993043e+10,1.424896e+09,2.484222e+09,1.967285e+09,4330.822423,4351.795575
2021-09-01,158.1,148.0,147.1,165.9,158.5,141.5,184.8,144.3,150.0,1.266664,...,-3.460786,8.557485,5.079560e+10,4.965300e+10,3.919730e+10,1.560600e+09,2.277300e+09,1.797757e+09,4346.662100,4367.402410
