In [1]:
import pandas as pd
import numpy as np
from dotenv import dotenv_values, find_dotenv
import os
from datacleaning.functions import filter_by_granularity
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic

In [2]:
config = dotenv_values(find_dotenv())
path_rawdata = os.path.abspath(config["RAWDATA"]) + '\\'
path_cleandata = os.path.abspath(config["CLEANDATA"]) + '\\'

In [3]:
bea_products = pd.read_pickle(path_cleandata + 'BEA_PCE.pkl')

# try using the data i already made to get everything by product including the io matrix
mergeddata = pd.read_pickle(path_cleandata + 'BEA6_IOuse_merged.pkl')

In [4]:
# these are by product (as in the original BEA tables)

beadata = filter_by_granularity(bea_products, target_granularity=6)

prices = beadata[['product', 'date', 'priceindex']]
expenditures = beadata[['product', 'date', 'expenditures']]

prices.to_pickle(path_cleandata + 'firstinversion//prices.pkl')
expenditures.to_pickle(path_cleandata + 'firstinversion//expenditures.pkl')

In [5]:
prices.head()

Unnamed: 0,product,date,priceindex
0,Personal consumption expenditures,1959-01-31,15.177
6,New domestic autos,1959-01-31,37.387
7,New foreign autos,1959-01-31,37.396
9,New domestic light trucks,1959-01-31,
10,New foreign light trucks,1959-01-31,


In [6]:
expenditures.head()

Unnamed: 0,product,date,expenditures
0,Personal consumption expenditures,1959-01-31,309449.0
6,New domestic autos,1959-01-31,11794.0
7,New foreign autos,1959-01-31,1114.0
9,New domestic light trucks,1959-01-31,
10,New foreign light trucks,1959-01-31,


In [7]:
# create io matrix: 

iomatrix = mergeddata[['product_I', 'product_O', 'IO_value']]
iomatrix = iomatrix.pivot_table(index='product_I', columns='product_O', values='IO_value', aggfunc='mean')

iomatrix.to_pickle(path_cleandata + 'firstinversion//iomatrix.pkl')

# fill nans in another copy
iomatrix_fillna = iomatrix.fillna(value=0)
iomatrix_fillna.to_pickle(path_cleandata + 'firstinversion//iomatrix_fillna.pkl')

In [8]:
# run vars

allproducts = list(set(beadata['product']))
lags = 8
residuals = pd.DataFrame(columns=['date', 'product', 'resid_price', 'resid_quantity'])

for product in allproducts:
    # filter for product
    tovar = beadata[beadata['product'] == product][['date', 'priceindex', 'quantityindex']]
    # datetimeindex
    tovar = tovar.set_index('date')
    # zeros for index values should be removed!!
    tovar = tovar.loc[~(tovar==0).any(axis=1)]

    # using first differences
    tovar['priceindex'] = np.log(tovar['priceindex']).diff()
    tovar['quantityindex'] = np.log(tovar['quantityindex']).diff()
    # drop nans for model
    tovar.dropna(inplace=True)

    model = VAR(tovar.asfreq('Q-OCT'))
    result = model.fit(lags)

    # print(product)
    # print('AIC : ', result.aic)
    # print('BIC : ', result.bic)
    # print('FPE : ', result.fpe)
    # print('HQIC: ', result.hqic)

    # residuals
    product_residuals = result.resid.reset_index()
    product_residuals['product'] = product
    product_residuals.rename(columns={'priceindex': 'resid_price', 'quantityindex': 'resid_quantity'}, inplace=True)
    residuals = pd.concat([residuals, product_residuals])


  residuals = pd.concat([residuals, product_residuals])


In [9]:
residuals['resid_expenditure_calculated'] = residuals['resid_price'] * residuals['resid_quantity']

In [10]:
residuals

Unnamed: 0,date,product,resid_price,resid_quantity,resid_expenditure_calculated
0,2004-04-30,New foreign light trucks,0.001778,-0.070893,-0.000126
1,2004-07-31,New foreign light trucks,-0.007288,-0.000983,0.000007
2,2004-10-31,New foreign light trucks,0.014306,0.028648,0.000410
3,2005-01-31,New foreign light trucks,-0.000706,-0.016735,0.000012
4,2005-04-30,New foreign light trucks,-0.005174,0.027925,-0.000144
...,...,...,...,...,...
157,2022-07-31,Personal computers/tablets...,0.001465,-0.027989,-0.000041
158,2022-10-31,Personal computers/tablets...,-0.018419,-0.013744,0.000253
159,2023-01-31,Personal computers/tablets...,0.010118,0.016872,0.000171
160,2023-04-30,Personal computers/tablets...,0.007059,-0.011326,-0.000080


In [12]:
residuals.to_pickle(path_cleandata + 'firstinversion//residuals.pkl')