In [1]:
import pandas as pd
import numpy as np
from dotenv import dotenv_values, find_dotenv
import os
from datacleaning.functions import filter_by_granularity
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic

In [2]:
config = dotenv_values(find_dotenv())
path_rawdata = os.path.abspath(config["RAWDATA"]) + '\\'
path_cleandata = os.path.abspath(config["CLEANDATA"]) + '\\'

In [3]:
bea_products = pd.read_pickle(path_cleandata + 'BEA_PCE.pkl')

# try using the data i already made to get everything by product including the io matrix
mergeddata = pd.read_pickle(path_cleandata + 'BEA6_IOuse_merged.pkl')

In [4]:
# these are by product (as in the original BEA tables)

beadata = filter_by_granularity(bea_products, target_granularity=6)

prices = beadata[['product', 'date', 'priceindex']]
expenditures = beadata[['product', 'date', 'expenditures']]

prices.to_pickle(path_cleandata + 'firstinversion//prices.pkl')
expenditures.to_pickle(path_cleandata + 'firstinversion//expenditures.pkl')

In [5]:
prices.head()

Unnamed: 0,product,date,priceindex
0,Personal consumption expenditures,1959-01-31,15.177
6,New domestic autos,1959-01-31,37.387
7,New foreign autos,1959-01-31,37.396
9,New domestic light trucks,1959-01-31,
10,New foreign light trucks,1959-01-31,


In [6]:
expenditures.head()

Unnamed: 0,product,date,expenditures
0,Personal consumption expenditures,1959-01-31,309449.0
6,New domestic autos,1959-01-31,11794.0
7,New foreign autos,1959-01-31,1114.0
9,New domestic light trucks,1959-01-31,
10,New foreign light trucks,1959-01-31,


In [7]:
# create io matrix: 

iomatrix = mergeddata[['product_I', 'product_O', 'IO_value']]
iomatrix = iomatrix.pivot_table(index='product_I', columns='product_O', values='IO_value', aggfunc='mean')
iomatrix.to_pickle(path_cleandata + 'firstinversion//iomatrix.pkl')

# fill nans in another copy to use for inversion

iomatrix_fillna = iomatrix.fillna(value=0)
iomatrix_fillna.to_pickle(path_cleandata + 'firstinversion//iomatrix_fillna.pkl')

In [18]:
# see which goods aren't in input vs output
inputproducts = list(iomatrix_fillna.index)
outputproducts = list(iomatrix_fillna.columns)
to_remove = [x for x in inputproducts + outputproducts if x not in inputproducts or x not in outputproducts]

# remove rows and columns
iomatrix_fillna.drop(columns=[col for col in to_remove if col in iomatrix_fillna.columns], inplace=True)
iomatrix_fillna.drop(index=[idx for idx in to_remove if idx in iomatrix_fillna.index], inplace=True)


In [19]:
iomatrix_fillna

product_O,All other professional medical services,"Audio discs, tapes, vinyl, and permanent digital downloads",Auto leasing,Bakery products,Beef and veal,Cereals,Direct commissions,Eggs,Fresh milk,Fruit (fresh),...,Recreational books (part of 90),Religious organizations' services to households,"Religious organizations, gross output",Services of social advocacy establishments to households,"Social advocacy establishments, gross output",Social services to households,"Social services, gross output","Sporting equipment, supplies, guns, and ammunition (part of 80)",Telephone and related communication equipment,Tobacco (127)
product_I,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
All other professional medical services,1147.0,719.0,115.0,302.0,107.0,5.0,1041.0,5.0,143.0,49.0,...,2016.0,937.0,937.0,2259.0,2259.0,699.0,2469.0,9.0,59.0,48.0
"Audio discs, tapes, vinyl, and permanent digital downloads",3.0,2480.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,48.0,321.0,321.0,2093.0,2093.0,0.0,6.0,0.0,8.0,0.0
Auto leasing,480.0,299.0,837.0,56.0,159.0,1.0,398.0,94.0,225.0,259.0,...,345.0,15.0,15.0,240.0,240.0,129.0,602.0,7.0,2.0,31.0
Bakery products,0.0,0.0,0.0,243.0,0.0,0.0,0.0,0.0,0.0,0.0,...,96.0,52.0,52.0,211.0,211.0,953.0,0.0,0.0,0.0,0.0
Beef and veal,6.0,0.0,0.0,232.0,131188.0,8.0,46.0,21302.0,11200.0,748.0,...,390.0,110.0,110.0,165.0,165.0,3873.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Social services to households,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Social services, gross output",82240.0,50256.0,21778.0,3947.0,34700.0,1673.0,94603.0,23240.0,15490.0,20392.0,...,26737.0,12091.0,12091.0,60870.0,60870.0,10855.0,153330.0,2388.0,7705.0,39236.0
"Sporting equipment, supplies, guns, and ammunition (part of 80)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,15.0,0.0,0.0,0.0,0.0,0.0,0.0,1254.0,0.0,0.0
Telephone and related communication equipment,81.0,240.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,...,529.0,36.0,36.0,218.0,218.0,115.0,1622.0,0.0,2049.0,0.0


In [20]:
# run vars

allproducts = list(set(beadata['product']))
lags = 8
residuals = pd.DataFrame(columns=['date', 'product', 'resid_price', 'resid_quantity'])

for product in allproducts:
    # filter for product
    tovar = beadata[beadata['product'] == product][['date', 'priceindex', 'quantityindex']]
    # datetimeindex
    tovar = tovar.set_index('date')
    # zeros for index values should be removed!!
    tovar = tovar.loc[~(tovar==0).any(axis=1)]

    # using first differences
    tovar['priceindex'] = np.log(tovar['priceindex']).diff()
    tovar['quantityindex'] = np.log(tovar['quantityindex']).diff()
    # drop nans for model
    tovar.dropna(inplace=True)

    model = VAR(tovar.asfreq('Q-OCT'))
    result = model.fit(lags)

    # print(product)
    # print('AIC : ', result.aic)
    # print('BIC : ', result.bic)
    # print('FPE : ', result.fpe)
    # print('HQIC: ', result.hqic)

    # residuals
    product_residuals = result.resid.reset_index()
    product_residuals['product'] = product
    product_residuals.rename(columns={'priceindex': 'resid_price', 'quantityindex': 'resid_quantity'}, inplace=True)
    residuals = pd.concat([residuals, product_residuals])


  residuals = pd.concat([residuals, product_residuals])


In [23]:
# calculate expenditure residual
residuals['resid_expenditure_calculated'] = residuals['resid_price'] * residuals['resid_quantity']

# supply vs demand driven
residuals['majority_demand'] = ((residuals['resid_price'] >= 0) & (residuals['resid_quantity'] >= 0) | (residuals['resid_price'] <= 0) & (residuals['resid_quantity'] <= 0)).astype(int)
residuals['majority_supply'] = ((residuals['resid_price'] * residuals['resid_quantity']) < 0).astype(int)

residuals.to_pickle(path_cleandata + 'firstinversion//residuals.pkl')

In [24]:
residuals

Unnamed: 0,date,product,resid_price,resid_quantity,resid_expenditure_calculated,majority_demand,majority_supply
0,1961-04-30,Other purchased meals,-0.000614,-0.016412,1.007334e-05,1,0
1,1961-07-31,Other purchased meals,0.000604,-0.020822,-1.257531e-05,0,1
2,1961-10-31,Other purchased meals,0.002058,0.007646,1.573635e-05,1,0
3,1962-01-31,Other purchased meals,-0.002607,0.001176,-3.065572e-06,0,1
4,1962-04-30,Other purchased meals,0.001465,0.006473,9.485035e-06,1,0
...,...,...,...,...,...,...,...
245,2022-07-31,Nonprofit nursing homes servic...,0.017527,0.002288,4.010987e-05,1,0
246,2022-10-31,Nonprofit nursing homes servic...,0.001582,-0.030153,-4.769947e-05,0,1
247,2023-01-31,Nonprofit nursing homes servic...,0.005679,0.023197,1.317279e-04,1,0
248,2023-04-30,Nonprofit nursing homes servic...,-0.011372,-0.019190,2.182207e-04,1,0
