In [1]:
import pandas as pd
import numpy as np
from dotenv import dotenv_values, find_dotenv
import os
from datacleaning.functions import filter_by_granularity
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic
import datetime as dt

In [2]:
# set path parameters
config = dotenv_values(find_dotenv())
path_rawdata = os.path.abspath(config["RAWDATA"]) + '\\'
path_cleandata = os.path.abspath(config["CLEANDATA"]) + '\\'

In [3]:
# import my data
bea_products = pd.read_pickle(path_cleandata + 'BEA_PCE.pkl')
mergeddata = pd.read_pickle(path_cleandata + 'BEA6_naics6_merged.pkl')

In [4]:
# these are by product (as in the original BEA tables)

beadata = filter_by_granularity(bea_products, target_granularity=6)
beadata['product'] = beadata['product'].str.lstrip()

prices = beadata[['product', 'date', 'priceindex']]
expenditures = beadata[['product', 'date', 'expenditures']]

prices.to_pickle(path_cleandata + 'firstinversion//prices.pkl')
expenditures.to_pickle(path_cleandata + 'firstinversion//expenditures.pkl')

In [5]:
# create io matrix: 

iomatrix_long = mergeddata[['product_I', 'product_O', 'IO_value']]
iomatrix_wide = iomatrix_long.pivot_table(index='product_I', columns='product_O', values='IO_value', aggfunc='mean')
iomatrix_wide.to_pickle(path_cleandata + 'firstinversion//iomatrix.pkl')

# fill nans in another copy to use for inversion
iomatrix_wide_fillna = iomatrix_wide.fillna(value=0)

# save
iomatrix_wide_fillna.to_pickle(path_cleandata + 'firstinversion//iomatrix_fillna.pkl')


In [6]:
# run vars, get resid

# use the list of I products
lags = 8
residuals = pd.DataFrame(columns=['date', 'product', 'resid_price', 'resid_quantity'])

to_run = list(set(beadata[(beadata['quantityindex'].notnull()) & (beadata['priceindex'].notnull())]['product']))
for product in to_run:
    # filter for product
    tovar = beadata[beadata['product'] == product][['date', 'priceindex', 'quantityindex']]
    # datetimeindex
    tovar = tovar.set_index('date')
    # zeros for index values should be removed!!
    tovar = tovar.loc[~(tovar==0).any(axis=1)]

    # using first differences
    tovar['priceindex'] = np.log(tovar['priceindex']).diff()
    tovar['quantityindex'] = np.log(tovar['quantityindex']).diff()
    # drop nans for model
    tovar.dropna(inplace=True)

    model = VAR(tovar.asfreq('Q-OCT'))
    result = model.fit(lags)

    # print(product)
    # print('AIC : ', result.aic)
    # print('BIC : ', result.bic)
    # print('FPE : ', result.fpe)
    # print('HQIC: ', result.hqic)

    # residuals
    product_residuals = result.resid.reset_index()
    product_residuals['product'] = product
    product_residuals.rename(columns={'priceindex': 'resid_price', 'quantityindex': 'resid_quantity'}, inplace=True)
    residuals = pd.concat([residuals, product_residuals])
    
# calculate expenditure residual
residuals['resid_expenditure_calculated'] = residuals['resid_price'] * residuals['resid_quantity']

# supply vs demand driven
residuals['majority_demand'] = ((residuals['resid_price'] >= 0) & (residuals['resid_quantity'] >= 0) | (residuals['resid_price'] <= 0) & (residuals['resid_quantity'] <= 0)).astype(int)
residuals['majority_supply'] = ((residuals['resid_price'] * residuals['resid_quantity']) < 0).astype(int)

# save
residuals = residuals.sort_values('product')
residuals.to_pickle(path_cleandata + 'firstinversion//residuals.pkl')

  residuals = pd.concat([residuals, product_residuals])


In [7]:
# FILTER

# first, i need products that actually have a residual at each date
# total number of unique dates
total_dates = residuals['date'].nunique()
# group by product and count the number of unique dates for each product
product_dates_count = residuals.groupby('product')['date'].nunique()
# filter products that appear at all dates
products_appear_all_dates = product_dates_count[product_dates_count == total_dates].index.tolist()

# next, i need products that show up as both buyers and sellers
# list of sellers
inputproducts = list(iomatrix_wide_fillna.index)
# list of buyers
outputproducts = list(iomatrix_wide_fillna.columns)

# get the intersection of these lists
products_to_include = list(set(products_appear_all_dates) & set(inputproducts) & set(outputproducts))

# filter I-O table and residuals
iomatrix_wide_fillna.drop(columns=[col for col in iomatrix_wide_fillna.columns if col not in products_to_include], inplace=True)
iomatrix_wide_fillna.drop(index=[idx for idx in iomatrix_wide_fillna.index if idx not in products_to_include], inplace=True)
residuals = residuals[residuals['product'].isin(products_to_include)]

In [8]:
# transpose of the I-O matrix so that rows are buyers and columns are sellers
iomatrix_fillna_T = iomatrix_wide_fillna.T

# grouping sums by buyer/seller
io_sum_buyers = iomatrix_fillna_T.sum(axis=1) # sum by columns so that each buyer (index) has its total purchases
io_sum_sellers = iomatrix_fillna_T.sum(axis=0) # sum by rows so that each seller (column) has its total sales

# final demand vector
fd = iomatrix_wide['Personal consumption expenditures']
fd = fd.loc[products_to_include].fillna(value=0)

# quantity and price residuals
priceresiduals = residuals[['date', 'product', 'resid_price']]
quantityresiduals = residuals[['date', 'product', 'resid_quantity']]

#  I-O matrix INTERMEDIATE COST SHARES
intermediate_costshares = pd.DataFrame(index=iomatrix_wide_fillna.index, columns=iomatrix_wide_fillna.columns)
for col in iomatrix_wide_fillna.columns:
    intermediate_costshares[col] = iomatrix_wide_fillna[col] / io_sum_buyers[col]

# I-O matrix INTERMEDIATE SALES SHARES
intermediate_salesshares = pd.Series(index=iomatrix_wide_fillna.index)
for col in iomatrix_wide_fillna.columns:
    hello = io_sum_sellers[col] / (io_sum_buyers[col] + fd[col])
    intermediate_salesshares[col] = io_sum_sellers[col] / (io_sum_buyers[col] + fd[col])

In [29]:
# dates!
dates = set(set(priceresiduals['date'].unique()) & set(quantityresiduals['date'].unique()))

# I-O adjusted price residual

priceresiduals_adjusted = pd.DataFrame(columns=['date', 'product', 'resid_price_adjusted'])
for date in dates:
    # filter priceresiduals for the current date
    priceresiduals_date = priceresiduals[priceresiduals['date'] == date].set_index('product')[['resid_price']]
    priceresiduals_date = priceresiduals_date.sort_index()

    # Create the diagonal matrix from intermediate_salesshares
    diag_matrix = np.diag(intermediate_salesshares)
    
    # Calculate the adjustment matrix: (I - diag(intermediate_salesshares) * intermediate_costshares)^(-1)
    adjustment_matrix = np.linalg.inv(np.identity(len(intermediate_costshares)) - (diag_matrix @ intermediate_costshares))

    # Calculate priceresiduals_adjusted for the current date
    priceresiduals_adjusted_date = adjustment_matrix @ priceresiduals_date
    # set some columns to append
    priceresiduals_adjusted_date['date'] = date
    priceresiduals_adjusted_date['product'] = priceresiduals_date.index
    priceresiduals_adjusted_date.rename(columns={'resid_price': 'resid_price_adjusted'}, inplace=True)
    # append
    priceresiduals_adjusted = pd.concat([priceresiduals_adjusted, priceresiduals_adjusted_date], ignore_index=True)


  priceresiduals_adjusted = pd.concat([priceresiduals_adjusted, priceresiduals_adjusted_date], ignore_index=True)


In [45]:
# I-O adjusted quantity residual

quantityresiduals_adjusted = pd.DataFrame(columns=['date', 'product', 'resid_quantity_adjusted'])
for date in dates:
    # filter priceresiduals for the current date
    priceresiduals_date = priceresiduals[priceresiduals['date'] == date].set_index('product')[['resid_price']]
    priceresiduals_date = priceresiduals_date.sort_index()

    # filter quantityresiduals for the current date
    quantityresiduals_date = quantityresiduals[quantityresiduals['date'] == date].set_index('product')[['resid_quantity']]
    quantityresiduals_date = quantityresiduals_date.sort_index()

    # price * quantity
    pricequantity_date = priceresiduals_date['resid_price'] * quantityresiduals_date['resid_quantity']

    # filter adjusted price residuals for the current date
    priceresiduals_adjusted_date = priceresiduals_adjusted[priceresiduals_adjusted['date'] == date].set_index('product')[['resid_price_adjusted']]
    priceresiduals_adjusted_date = priceresiduals_adjusted_date.sort_index()

    # Create the diagonal matrix from intermediate_salesshares
    diag_matrix = np.diag(intermediate_salesshares)
    
    # calculate "sales" in each sector
    sales_date = np.linalg.inv(np.identity(len(intermediate_costshares)) - (intermediate_costshares.T @ diag_matrix)) @ pricequantity_date

    # Calculate quantityresiduals_adjusted for the current date
    quantityresiduals_adjusted_date = sales_date / priceresiduals_adjusted_date['resid_price_adjusted']
    quantityresiduals_adjusted_date = quantityresiduals_adjusted_date.reset_index()
    # set some columns to append
    quantityresiduals_adjusted_date['date'] = date
    quantityresiduals_adjusted_date.rename(columns={'resid_price_adjusted': 'resid_quantity_adjusted'}, inplace=True)
    # append
    quantityresiduals_adjusted = pd.concat([quantityresiduals_adjusted, quantityresiduals_adjusted_date], ignore_index=True)



  quantityresiduals_adjusted = pd.concat([quantityresiduals_adjusted, quantityresiduals_adjusted_date], ignore_index=True)


In [None]:
# difference between adjustment and original price residual