In [None]:
from utils.data import update_expl_data, update_target_data, food_categories, preprocess_expl, load_statscan
import pandas as pd
import os
from dotenv import load_dotenv

In [None]:
"""
Set sample rate. In this notebook, all data will be resampled at the chosen frequency.
'MS' : Monthly (Month Start)
'W' : Weekly
'D' : Daily
"""

frequency = 'MS'

In [None]:
"""
Load food CPI data from January 1986 to the most recently available data.
"""
if not os.path.exists("./data_files"):
    os.mkdir("./data_files")
if not os.path.exists("./statscan_data"):
    os.mkdir("./statscan_data")
foodprice_df = update_target_data(food_categories, './data_files/food_cpi.csv')
foodprice_df = foodprice_df.resample(frequency).mean().interpolate()
foodprice_df

In [None]:
foodprice_df.to_csv("./datasets/food_CPI.csv")

### Get energy CPI from StatCan as well

In [None]:
"""
Load energy CPI data from January 1986 to the most recently available data.
"""
energy_df = load_statscan(['Energy'])
energy_df = energy_df.resample(frequency).mean().interpolate()
energy_df

### FRED

In [None]:
fred_data_sources = list(pd.read_csv("./data_files/fred_series.csv").id.unique())
fred_data_sources

In [None]:
"""
Load exogenous/auxiliary explanatory variables from FRED: https://fred.stlouisfed.org/
These data sources reflect various economic factors that may improve forecasts. 
Please visit the FRED website to learn more about these series, and to find others
that may be useful for food CPI forecasting. 
"""

load_dotenv("../.env")
FRED_API_KEY = os.getenv("FRED_API_KEY")

data_sources = fred_data_sources

expl_df = preprocess_expl(update_expl_data(data_sources, './data_files/expl_vars.csv', sleep_sec=0.5, api_key=FRED_API_KEY))
expl_df_monthly = expl_df.resample(frequency).mean().interpolate()
expl_df_monthly

In [None]:
combined_df = pd.concat((foodprice_df, energy_df, expl_df_monthly,), axis=1).dropna(axis=0)
combined_df.to_csv("./datasets/all_data.csv")
combined_df