# Requirements to collpase requirements tables Note + Making Concordances Concord - Tony Gui

In [20]:
import pandas as pd
import numpy as np
from statsmodels.tsa.api import VAR
import os
from data_cleaning_functions import requirements_clean, concordance_PCE_clean, \
    find_intermediate_industries, concordance_PCQ_clean, get_sales_from_make_matrix, clean_make_matrix, \
    get_demand_shock_from_shaipro_output, get_expenditure_weights_from_shapiro_outputs,plot_shapiro_graph_from_shapiro_ouput,clean_bea_PQE_table, get_final_demand_from_use_table

from pathlib import Path
import matplotlib.pyplot as plt

script_dir = str(Path().resolve().parent)
file_path = os.path.join(script_dir) + "/" 
shapiro_file =  file_path + "Shapiro"
raw_data_path = file_path + "raw_bea_data" 

In [33]:
use_table = pd.read_excel(os.path.join(raw_data_path, 'Use_SUT_Framework_2017_DET.xlsx'), sheet_name='2017')
use_table_filtered = find_intermediate_industries(use_table)
use_table_filtered

Unnamed: 0,Industry,PCE Expenditure
5,oilseed farming,
15,forestry and logging,
18,oil and gas extraction,
20,"copper, nickel, lead, and zinc mining",
21,"iron, gold, silver, and other metal ore mining",
...,...,...
396,federal general government (nondefense),
398,other federal government enterprises,
399,state and local government (educational services),
400,state and local government (hospitals and heal...,


In [34]:
use_table = pd.read_excel(os.path.join(raw_data_path, 'Use_SUT_Framework_2017_DET.xlsx'), sheet_name='2017')
use_table = use_table.iloc[4:-11]
use_table = use_table.loc[:, use_table.iloc[0].isin(['Commodity Description', 'T019'])]
use_table = use_table.iloc[1:]
use_table.rename(columns={'Unnamed: 1': 'Industry' , 'Unnamed: 405': 'PCE Expenditure'}, inplace=True)
use_table.loc[use_table['Industry'] == 'Drugs and druggistsâ€™ sundries', 'Industry'] = 'Drugs and druggists sundries'
use_table.loc[use_table['Industry'] == 'Insurance Carriers, except Direct Life Insurance', 'Industry'] = 'Insurance carriers, except direct life'
use_table.loc[use_table['Industry'] == 'Tobacco product manufacturing', 'Industry'] = 'Tobacco manufacturing'
use_table.loc[use_table['Industry'] == 'Scenic and sightseeing transportation and support activities for transportatio', 'Industry'] = 'scenic and sightseeing transportation and support activities'
use_table.loc[use_table['Industry'] == 'Community food, housing, and other relief services, including rehabilitation services', 'Industry'] = 'community food, housing, and other relief services, including vocational rehabilitation services'
use_table["Industry"] = use_table["Industry"].str.lower()
use_table["Industry"] = use_table["Industry"].str.strip()
use_table = use_table.dropna(subset=['Industry'])
# # use_table = use_table[use_table['PCE Expenditure'].isna()]
use_table 


Unnamed: 0,Industry,Unnamed: 424
5,oilseed farming,61204
6,grain farming,96281
7,vegetable and melon farming,50338
8,fruit and tree nut farming,82686
9,"greenhouse, nursery, and floriculture production",44491
...,...,...
402,other state and local government enterprises,107059
403,scrap,46908
404,used and secondhand goods,164495
405,noncomparable imports,260421


In [35]:
merged = use_table.merge(use_table_filtered, on="Industry", how="inner")
merged

Unnamed: 0,Industry,Unnamed: 424,PCE Expenditure
0,oilseed farming,61204,
1,forestry and logging,31674,
2,oil and gas extraction,423371,
3,"copper, nickel, lead, and zinc mining",13889,
4,"iron, gold, silver, and other metal ore mining",20317,
...,...,...,...
138,federal general government (nondefense),379143,
139,other federal government enterprises,1233,
140,state and local government (educational services),832472,
141,state and local government (hospitals and heal...,68698,


# 1. Load in Requirements Matrix 

In [None]:
requirements = pd.read_excel(os.path.join(raw_data_path, 'IxI_TR_2017_PRO_Det.xlsx'), sheet_name='2017')
requirements = requirements_clean(requirements)
requirements

# 2. Calculate Delta

In [None]:
requirements = requirements.T
requirements = requirements.fillna(0)
delta = np.identity(len(requirements)) - np.linalg.inv(requirements)

# 2 (Making Concordances Concord) Adding Scrap, Used and secondhand goods and ROW adjustments to Delta

In [None]:
"""These 4 industries are found in the concordance table but not in the Reqirements table so I am 
adding them manually with row and column inputs of zero"""
concordance_but_not_requirments = ["Scrap", "Used and secondhand goods", "Rest of the world adjustment", "noncomparable imports"] 
delta = pd.DataFrame(delta, index=requirements.index, columns=requirements.columns)
delta = delta.reindex(index=requirements.index.append(pd.Index(concordance_but_not_requirments)).str.lower() , columns=requirements.columns.append(pd.Index(concordance_but_not_requirments)).str.lower() , fill_value=0)
delta

# 2.5b Making Negative Values in Delta Zero

In [None]:
negative_count = (delta < 0).sum().sum()
percent_of_negatives_before = negative_count/402**2
delta[delta < 0] = 0
negative_count_new = (delta < 0).sum().sum()
percent_of_negatives_after = negative_count_new/402**2

# 2.5b Force negatives to be zero but adjust row sums to be same as before

In [None]:
# V = delta.sum(axis=0)
# P = delta[delta > 0].sum()
# delta.loc['norm'] = V/P
# last_values = delta.iloc[-1]
# delta[delta < 0] = 0
# delta = delta.iloc[:-1].div(last_values)
# delta

# Dealting with Intermediate Industries - Making Concordances Concord Section 3.3.1. - Operationalizing Industries without Products

# Step 1. Making Concordances Concord

In [None]:
use_table = pd.read_excel(os.path.join(raw_data_path, "Use_SUT_Framework_2017_DET.xlsx"), sheet_name="2017")

# Returns all industries with zero PCE 
intermediate_industries = find_intermediate_industries(use_table)

intermediate_industries = intermediate_industries.iloc[:, [0]]
intermediate_industries = intermediate_industries.sort_values(by="Industry")
intermediate_industries

# Steps 2 - 5. Making Concordances Concord - Loop over Intermediate Industries and Remove

In [None]:
# For the Loop 
big_lambda = np.eye(len(delta)) # create a big_lambda identity matrix that will become out final output
big_lambda = pd.DataFrame(big_lambda)
big_lambda = big_lambda.set_index(delta.index)
big_lambda.columns = delta.index
delta_industries = delta.index # save all the requirment table industries outside the loop 
removed = [] # stores all removed industries through the loop 

i = 0
for row in intermediate_industries.iloc[::-1].itertuples(): # looping backwards
    i += 1 
    current = row.Industry # industry we are currently removing
    # Create identity matrix for current loop iteration
    phi_i = np.eye(len(delta) - (i - 1))
    phi_i = pd.DataFrame(phi_i)
    phi_i = phi_i.set_index(delta_industries)
    phi_i.columns = delta_industries

    current_intermediate_use_table = delta.copy() # use a new intermediates use matrix each time we do this
    # Remove all the industries we've previously removed in earlier loop iterations 
    current_intermediate_use_table.drop(columns=removed, inplace=True)
    current_intermediate_use_table = current_intermediate_use_table.loc[[current]] # the row of the intermediates use table associated with the current industry  
    current_intermediate_use_table.drop(columns=[current], inplace=True) # drop the column associated with the current industry so we dont include it in our sum
    current_intermediate_use_table = current_intermediate_use_table.astype(float)
    current_industry_sum = current_intermediate_use_table.loc[current].sum() # find sum of row

    # just making sure that we aren't dividing by zero 
    if current_industry_sum != 0:
        current_intermediate_use_table = current_intermediate_use_table.astype(float)
        current_intermediate_use_table.loc[current] = current_intermediate_use_table.loc[current] / current_industry_sum


    current_intermediate_use_table = current_intermediate_use_table[sorted(current_intermediate_use_table.columns)] # sort the normazlied row alphabetically

    # remove the current industry from intermediate list
    intermediate_industries = intermediate_industries[intermediate_industries['Industry'] != current]
    # drop the current industry column from phi_i and industry indexing
    phi_i = phi_i.drop(columns=current)
    delta_industries = delta_industries.drop(current) 
    
    removed.append(current)

    # Update phi_i with the current industry sale shares
    phi_i.loc[current_intermediate_use_table.index] = current_intermediate_use_table.loc[current_intermediate_use_table.index].astype(np.float64).values
    big_lambda_old = big_lambda
    big_lambda = big_lambda @ phi_i

# Step 6. Making Concordances Concord - Converting IO matrix from sales shares to dollars

In [None]:
make_matrix = pd.read_excel(os.path.join(raw_data_path, "Supply_2017_DET.xlsx"), sheet_name="2017")
sales_vector = get_sales_from_make_matrix(make_matrix) # returns the sales for each industry

# these industries are not in the make matrix so i will add them manually with zero entries to preserve df sizes
industries_not_in_make_matrix = ["state and local government passenger transit", "state and local government electric utilities", \
                      "secondary smelting and alloying of aluminum", "federal electric utilities"]
not_in_make_matrix = pd.DataFrame({'Industries': industries_not_in_make_matrix, 'Sales': [0,0,0,0]})
sales_vector = pd.concat([sales_vector, not_in_make_matrix], ignore_index=True)
sales_vector.set_index('Industries', inplace=True)

# repeat the sales vector n times to make a sales matrix
sales_repeated = pd.DataFrame({f'{i}': sales_vector['Sales'].values for i in range(1, len(delta))})
sales_repeated.set_index(sales_vector.index, inplace=True)
sales_matrix = pd.concat([sales_vector, sales_repeated], axis=1)
sales_matrix.columns = delta.columns

# Y is IO matrix in dollars 
Y = delta * sales_matrix 

# Step 7. Making Concordances Concord - Value Added for the Economy

In [None]:
sum_Y = Y.sum(axis=1).to_frame()
sum_Y.columns = ['Sales']
VA_pre_transformation = (sales_vector - sum_Y).sum()

# Step 8. Making Concordances Concord - IO Matrix in USD

In [None]:
# new IO matrix in USD 
Y_tilde = big_lambda.T @ Y @ big_lambda
sales_tilde = big_lambda.T @ sales_vector

# Verifing that economy wide value added is identical to the pre-transformation level
sum_Y_tilde = Y_tilde.sum(axis=1).to_frame()
sum_Y_tilde.columns = ['Sales']
VA_post_transformation = (sales_tilde - sum_Y_tilde).sum()
# VA_post_transformation

In [None]:
"""Pre transformation and post transformation are not the same"""
differnece = VA_post_transformation - VA_pre_transformation
differnece

# Step 9. Making Concordances Concord - Calculating New Delta

In [None]:
recip_sales = 1/sales_tilde
sales_tilde_repeated = pd.DataFrame({f'{i}': recip_sales['Sales'].values for i in range(1, len(delta))})
sales_tilde_repeated.set_index(recip_sales.index, inplace=True)
sales_tilde_matrix = pd.concat([recip_sales, sales_tilde_repeated], axis=1)
sales_tilde_matrix.columns = delta.columns

delta_tilde = Y_tilde * sales_tilde_matrix
delta = delta_tilde
# delta

# End of Making Concordances Concord - Back to Requirements to Collapse Requirements Table

# 3. Merge Concordance with Delta

In [None]:
concordance = pd.read_excel(os.path.join(raw_data_path, "PCEBridge_2017_DET.xlsx"), sheet_name="2017")
concordance = concordance_PCE_clean(concordance)
# concordance

In [None]:
"""'federal electric utilities', 'secondary smelting and alloying of aluminum', 'state and local government electric utilities', 'state and local government passenger transit' 
are not an intermediate industry nor do they exist in the PCE concordance so we will simply drop"""
industries_in_requirments = set(delta.columns)
indsutries_in_concordance = set(concordance["PCE Bridge Industries"])
industires_not_in_concordance = industries_in_requirments - indsutries_in_concordance
delta = delta.drop(index=industires_not_in_concordance, columns=industires_not_in_concordance, errors='ignore')
# delta

In [None]:
in_num_products = concordance['PCE Bridge Industries'].value_counts()

delta_product_columns = pd.DataFrame(index=delta.index, columns=concordance['PCE Bridge Products'])

for column in delta_product_columns: 
    industry = concordance.loc[concordance['PCE Bridge Products'] == column, 'PCE Bridge Industries'].values
    delta_product_columns[column] = delta[industry] / in_num_products[industry]

delta_product_cr = pd.DataFrame(columns=delta_product_columns.columns)
lst_for_industry = []
for row in delta_product_columns.index:
    final_value = delta_product_columns.loc[row] / in_num_products[row]
    products = concordance[concordance['PCE Bridge Industries'] == row]['PCE Bridge Products'].tolist()
    for product in products: 
        lst_for_industry.append(row)
        final_value_row = pd.DataFrame(final_value).T
        final_value_row.index = [product]
        
        delta_product_cr = pd.concat([delta_product_cr, final_value_row])

industry = pd.DataFrame(lst_for_industry, columns=['industry'])
industry.index = delta_product_cr.index
delta_product_cr = pd.concat([delta_product_cr, industry], axis = 1)
# delta_product_cr

# 4. Collapse columns

In [None]:
delta_products = delta_product_cr.T.groupby(delta_product_cr.columns).sum().T
# delta_products

# 5. Make Matrix to Sales

In [None]:
make_matrix = pd.read_excel(os.path.join(raw_data_path, "Supply_2017_DET.xlsx"), sheet_name='2017')
sales = get_sales_from_make_matrix(make_matrix)
# sales

# 6. Merge concordance with sales 

In [None]:
product_sales = pd.merge(concordance, sales, left_on='PCE Bridge Industries', right_on='Industries')
product_sales = product_sales[["PCE Bridge Products", "Industries", 'Sales']]
# product_sales

# 7. Group sum of Sales for every Product

In [None]:
# calculates the number of product categories an Industry belongs to 
product_sales['In#Products'] = product_sales['Industries'].map(product_sales['Industries'].value_counts())
product_sales["Ratio_in_Product"] = product_sales['Sales'] / product_sales['In#Products'] 
product_sales['Sales_Sum'] = product_sales.groupby('PCE Bridge Products')['Ratio_in_Product'].transform('sum')
# product_sales

# 8. Sales Share 

In [None]:
product_sales['Sale Share'] = product_sales["Ratio_in_Product"] / product_sales["Sales_Sum"]
product_sales = product_sales.rename(columns={'Industries': 'industry'})
product_sales = product_sales.rename(columns={'PCE Bridge Products': 'product'})
# product_sales

# 9. Merge Sale shares and Delta

In [None]:
df_reset = delta_products.reset_index()
delta_products = df_reset.rename(columns={'index': 'product'})
delta_products_saleshare = delta_products.merge(product_sales, how = 'inner', on = ['industry', 'product'])
delta_products_saleshare = delta_products_saleshare.drop(columns=['Sales_Sum', "Ratio_in_Product", "In#Products", "Sales", "industry"])
delta_products_saleshare = delta_products_saleshare.set_index(delta_products_saleshare.columns[0])
# delta_products_saleshare

# 10. weightTimesDeltaValue + 11.

In [None]:
# Convert from Wide to Long
delta_final = delta_products_saleshare.reset_index().melt(id_vars=[delta_products_saleshare.index.name, "Sale Share"],
                                var_name="Column Products", value_name="value")

# Just Renaming and Reordering
delta_final = delta_final.rename(columns={delta_products_saleshare.index.name: "Row Products"})
delta_final = delta_final[['Row Products', 'Column Products', 'value', 'Sale Share']]
# Calculating weightTimesDeltaValue
delta_final["weightTimesDeltaValue"] = delta_final["value"] * delta_final["Sale Share"]

# Sum weightTimesDeltaValue grouping by Row Products AND Column Products
delta_final = delta_final.groupby(['Row Products', 'Column Products']).sum()
delta_final
# Convert Back to Wide Format 
delta_final = delta_final.pivot_table(values='weightTimesDeltaValue', index='Row Products', columns='Column Products')

# Removes Index and Column Names cuz it looks better. Both Index and Columns are simply Product Categories now
delta_final.columns.name = None 
delta_final.index.name = None 


labels_to_drop = ["government employees' expenditures abroad", "private employees' expenditures abroad","u.s. travel outside the united states","u.s. student expenditures"]
delta_final = delta_final.drop(index=labels_to_drop, columns=labels_to_drop, errors="ignore")

# delta_final

# Fixing Products with no price, quantity or expendiuture data 

In [None]:
bea_quantities = pd.read_excel(os.path.join(raw_data_path, 'BEA Monthly Quantities.xlsx')) # read raw BEA quantities
bea_quantities = clean_bea_PQE_table(bea_quantities, "Quantities", long=True) 
bea_quantities = bea_quantities[bea_quantities['products'].isin(delta_final.index)] # drop products taht delta doesnt have
bea_quantities = bea_quantities.drop_duplicates()
bea_quantities['Quantities'] = bea_quantities['Quantities'].replace('---', 0).astype(float)
bea_quantities = bea_quantities.fillna(0)

bea_prices = pd.read_excel(os.path.join(raw_data_path, 'BEA Monthly Prices.xlsx'))
bea_prices = clean_bea_PQE_table(bea_prices, "Prices", long=True)
bea_prices = bea_prices[bea_prices['products'].isin(delta_final.index)]
bea_prices = bea_prices.drop_duplicates()
bea_prices['Prices'] = bea_prices['Prices'].replace('---', 0).astype(float)
bea_prices = bea_prices.fillna(0)

bea_expenditures = pd.read_excel(os.path.join(raw_data_path, 'BEA Monthly Expenditures.xlsx'))
bea_expenditures = clean_bea_PQE_table(bea_expenditures, "Expenditures", long=True)
bea_expenditures = bea_expenditures[bea_expenditures['products'].isin(delta_final.index)]
bea_expenditures = bea_expenditures.drop_duplicates()
bea_expenditures['Expenditures'] = bea_expenditures['Expenditures'].replace('---', 0).astype(float)
bea_expenditures = bea_expenditures.fillna(0)

bea_PQE_merged = pd.merge(left=bea_quantities, right=bea_prices, on=['products', 'date'], how='outer')
bea_PQE_merged = pd.merge(left=bea_PQE_merged, right=bea_expenditures, on=['products', 'date'], how='outer')

# dropping this because 2024-10 has no data yet 
bea_PQE_merged = bea_PQE_merged.dropna(subset=['Expenditures'])

dates = list(set(bea_expenditures["date"]) & set(bea_prices["date"]) & set(bea_quantities["date"]))
dates.sort() # get all BEA data monthly dates

cols_to_check = ['Quantities', 'Prices', 'Expenditures']

# Create filtered DataFrame where at least one value is missing or zero
products_with_zero_somwhere = bea_PQE_merged[
    bea_PQE_merged[cols_to_check].isnull().any(axis=1) |
    (bea_PQE_merged[cols_to_check] == 0).any(axis=1)
]

all_bad_products_dict = {} # For every date products that dont have a value somewhere

for date in dates:
    bad_products_currnet = products_with_zero_somwhere[products_with_zero_somwhere['date'] == date]["products"]
    all_bad_products_dict[date] = bad_products_currnet.tolist()

""""For BEA I will set the entire BEA row to zero if any of the price, quantity or expenditure data is missing"""
zero_mask = (bea_PQE_merged[cols_to_check] == 0).any(axis=1)
# Set all three columns to 0 where the mask is True
bea_PQE_merged.loc[zero_mask, cols_to_check] = 0

bea_prices = bea_PQE_merged[["products", "date", "Prices"]]

# 12. Calculate Gamma

In [None]:
row_sums = delta_final.sum(axis=1)
gamma = pd.DataFrame(row_sums, columns=['Row_Sum'])
gamma.index = delta_final.index 
gamma = gamma.apply(pd.to_numeric, errors='coerce')
# gamma

# 13. Calculate Omega

In [None]:
delta_gamma = delta_final.merge(gamma, left_on=gamma.index, right_on=delta_final.index)
delta_gamma = delta_gamma.set_index(delta_gamma.columns[0])
row_sums = delta_gamma.iloc[:, -1]
row_sums[row_sums == 0] = np.nan
omega = delta_gamma.iloc[:, :-1].div(row_sums, axis=0)
omega.columns.name = None 
omega.index.name = None 
omega = omega.apply(pd.to_numeric, errors='coerce')
# omega

In [None]:
delta_final

# Different gamma and omega for missing products

In [None]:
gamma_omega_monthly = {} # this will store a different gamma and omega for each month

for date in all_bad_products_dict:

    zero_products = all_bad_products_dict.get(date) # products that have no time series data for current month

    current_gamma = gamma.copy()
    current_delta = delta_final.copy()

    for product in zero_products:
        current_gamma.loc[current_gamma.index == product, 'Row_Sum'] = 0 # making missing sector expenditure 0 
        if product in delta_final.columns: 
            current_delta[product] = 0 # missing time series products dont sell to any other sector
            
    current_row_sum = current_delta.sum(axis=1)

    current_omega = current_delta.div(current_row_sum, axis=0)
    checking_omega = current_omega

    current_omega.columns.name = None 
    current_omega.index.name = None 
    current_omega = current_omega.apply(pd.to_numeric, errors='coerce')

    gamma_omega_monthly[date] = [current_gamma, current_omega]

In [None]:
gamma_omega_monthly

# Start of Doing our Supply/demand contribution graph properly (Which influenceer has hte most influence?)

# Find Sales

In [None]:
# calculate sales in product and time period
sales = pd.DataFrame({'date': pd.Series(dtype='datetime64[ns]'),
                   'products': pd.Series(dtype='str'),
                   'sales': pd.Series(dtype='float')})

for date in dates:

    gamma = gamma_omega_monthly.get(date)[0]
    omega = gamma_omega_monthly.get(date)[1]
 
    # filter expenditures for the current date
    expenditures_date = bea_expenditures[bea_expenditures['date'] == date][['products', 'Expenditures']].set_index('products')
    expenditures_date = expenditures_date.sort_index()
    expenditures_date = expenditures_date.apply(pd.to_numeric, errors='coerce')

    gamma_series = gamma["Row_Sum"]
    diag_matrix = np.diag(gamma_series)

    x = np.identity(len(omega)) - (omega.T @ diag_matrix)

    sales_date = np.linalg.inv(x) @ expenditures_date

    sales_date['date'] = date
    sales_date['products'] = expenditures_date.index
    sales_date.rename(columns={'Expenditures': 'sales'}, inplace=True)

    sales = pd.concat([sales, sales_date], ignore_index=True)

# Find Prices of Intermediates

In [None]:
# calculate prices of intermediates (we use cobb douglas production with intermediates)
intermediates = pd.DataFrame({'date': pd.Series(dtype='datetime64[ns]'),
                   'products': pd.Series(dtype='str'),
                   'intermediates': pd.Series(dtype='float')})
for date in dates:

    prices_date = bea_prices[bea_prices['date'] == date][['products', 'Prices']].set_index('products')
    prices_date = prices_date.sort_index()

    gamma = gamma_omega_monthly.get(date)[0]
    omega = gamma_omega_monthly.get(date)[1]
    
    for i in gamma.index:
        log_prices = np.log(prices_date['Prices'])
        log_prices.replace(-np.inf, 0, inplace=True)        
        intermediates.loc[len(intermediates)] = [date, i, np.exp(omega.loc[i] @ log_prices)]

# Find Price of Value Added

In [None]:
# calculate prices of value added
value_added = pd.DataFrame({'date': pd.Series(dtype='datetime64[ns]'),
                   'products': pd.Series(dtype='str'),
                   'value_added': pd.Series(dtype='float')})

for date in dates:
    # filter prices for the current date
    prices_date = bea_prices[bea_prices['date'] == date][['products', 'Prices']].set_index('products')
    prices_date = prices_date.sort_index()

    # filter intermediates for the current date
    intermediates_date = intermediates[intermediates['date'] == date][['products', 'intermediates']].set_index('products')
    intermediates_date = intermediates_date.sort_index()

    gamma = gamma_omega_monthly.get(date)[0]    
    gamma_series = gamma["Row_Sum"]

    value_added_date = np.exp((1/(1 - gamma_series.sort_index()))*(np.log(prices_date['Prices']) - gamma_series.sort_index() * np.log(intermediates_date['intermediates'])))

    value_added_date = value_added_date.reset_index().rename(columns={0: 'value_added'})
    value_added_date['date'] = date
    value_added_date.rename(columns={"index": "products"}, inplace=True)

    value_added = pd.concat([value_added, value_added_date], ignore_index=True)

# Sales and Value Added VAR

In [None]:
lags = 12
residuals_part = []

for product in gamma.index:
    product_bad_dates = products_with_zero_somwhere[products_with_zero_somwhere["products"] == product]
    calculated = pd.merge(left=value_added, right=sales, on=['products', 'date'], how='inner')
    calculated = calculated[calculated['products'] == product][['date', 'value_added', 'sales']].sort_values(['date'])
    calculated = calculated.set_index('date')

    if not product_bad_dates.empty: 
        calculated = calculated[~calculated.index.isin(product_bad_dates['date'])] # remove rows before we have all time series data
    
    calculated['value_added'] = np.log(calculated['value_added']).diff()
    calculated['sales'] = np.log(calculated['sales']).diff()

    calculated.replace([np.inf, -np.inf, np.nan], 0, inplace=True)

    full_index_calc = pd.date_range(start=calculated.index.min(), end=calculated.index.max(), freq='MS')
    calculated = calculated.reindex(full_index_calc)

    model_calculated = VAR(calculated)
    result_calculated = model_calculated.fit(lags)

    residuals_calculated = result_calculated.resid.reset_index()
    residuals_calculated['products'] = product
    
    residuals_part.append(residuals_calculated)

IO_residuals = pd.concat(residuals_part, ignore_index=True)
IO_residuals.rename(columns={'index': 'date', 'value_added': 'residual_value_added', 'sales': 'residual_sales'}, inplace=True)
IO_residuals = IO_residuals.groupby(['date', 'products']).sum(min_count=1).reset_index()
IO_residuals = IO_residuals.sort_values(['date', 'products'])

# Price and Quantity VAR

In [None]:
lags = 12
residual_temp = []

for product in gamma.index:

    original = bea_PQE_merged[bea_PQE_merged['products'] == product][['date', 'Prices', 'Quantities']].sort_values(['date'])
    original = original.set_index('date')

    original.dropna(inplace=True)

    original['Prices'] = np.log(original['Prices']).diff()
    original['Quantities'] = np.log(original['Quantities']).diff()
     

    original.replace([np.inf, -np.inf], np.nan, inplace=True)
    original.dropna(inplace=True)

    full_index = pd.date_range(start=original.index.min(), end=original.index.max(), freq='MS')
    original = original.reindex(full_index)

    model_original = VAR(original)
    result_original = model_original.fit(lags)

    residuals_original = result_original.resid.reset_index()
    residuals_original['products'] = product
    
    residual_temp.append(residuals_original)
    
residuals_normal = pd.concat(residual_temp, ignore_index=True)
residuals_normal.rename(columns={'index': 'date', 'Prices': 'residual_prices', 'Quantities': 'residual_quantities'}, inplace=True)
residuals_normal = residuals_normal.groupby(['date', 'products']).sum(min_count=1).reset_index()
residuals_normal = residuals_normal.sort_values(['date', 'products'])

# Classification of Price of Value Added and Real Production

In [None]:
residuals_normal['majority_demand'] = ((residuals_normal['residual_prices'] * residuals_normal['residual_quantities']) >= 0).astype(int)
residuals_normal['majority_supply'] = ((residuals_normal['residual_prices'] * residuals_normal['residual_quantities']) < 0).astype(int)

IO_residuals['majority_demand_a'] = ((IO_residuals['residual_value_added'] * IO_residuals['residual_sales']) >= 0).astype(int)
IO_residuals['majority_supply_a'] = ((IO_residuals['residual_value_added'] * IO_residuals['residual_sales']) < 0).astype(int)

# Start of Rethinking Supply and Demand Influence

# 1. Find N X T Value Added Prices and Output Prices

In [None]:
log_P_VA = value_added.pivot(index='products', columns='date', values='value_added')
log_P_VA = log_P_VA.applymap(lambda x: np.log(x) if x > 0 else 0)
log_P_VA = log_P_VA.iloc[:, 12:]

log_P = bea_prices.pivot(index='products', columns='date', values='Prices')
log_P = log_P.applymap(lambda x: np.log(x) if x > 0 else 0)
log_P = log_P.iloc[:, 12:]
log_P

# 2. Get Big_Theta

In [None]:
# I'm gonna make a different big_theta for every month based on that month's gamma and omega
big_theta_monthly = {} # this will store a different big_theta for each month

for date in dates: 
    
    gamma = gamma_omega_monthly.get(date)[0]
    omega = gamma_omega_monthly.get(date)[1]

    # first matrix
    step_1 = np.diag(gamma["Row_Sum"]) @ omega
    step_2 = np.eye(len(gamma)) - step_1
    step_3 = np.linalg.inv(step_2)
    # second matrix
    step_4 = np.ones(len(gamma)) - gamma["Row_Sum"]
    step_5 = np.diag(step_4)
    big_theta = step_3 @ step_5
    big_theta = pd.DataFrame(big_theta, index=omega.index, columns=omega.columns)

    big_theta_monthly[date] = big_theta

# Detour to Some possible checks 

### Check 1 

In [None]:
theta_1960 = big_theta_monthly.get(pd.Timestamp('1960-02-01 00:00:00'))
theta_1983 = big_theta_monthly.get(pd.Timestamp('1983-09-01 00:00:00'))
theta_2024 = big_theta_monthly.get(pd.Timestamp('2024-09-01 00:00:00'))

In [None]:
# Get expenditure weights
bea_expenditures = bea_expenditures.pivot(index='products', columns='date', values='Expenditures')
expenditure_weights = bea_expenditures.div(bea_expenditures.sum(axis=0), axis=1)
expenditure_weights = expenditure_weights.iloc[:, 13:]
expenditure_weights

In [None]:
expenditure_weights["1960-02-01 00:00:00"]

In [None]:
expenditure_weight_1960 = expenditure_weights[["1960-02-01 00:00:00"]]
expenditure_weight_1983 = expenditure_weights[["1983-01-01 00:00:00"]]
expenditure_weight_2024 = expenditure_weights[["2024-01-01 00:00:00"]]

# W_va_1960 = (theta_1960.T @ expenditure_weight_1960).sort_values(by="1960-02-01 00:00:00",ascending=False)
# W_va_1983 = (theta_1983.T @ expenditure_weight_1983).sort_values(by="1983-01-01 00:00:00",ascending=False)
# W_va_2024 = (theta_2024.T @ expenditure_weight_2024).sort_values(by="2024-01-01 00:00:00",ascending=False)

W_va_1960 = (theta_1960.T @ expenditure_weight_1960)
W_va_1983 = (theta_1983.T @ expenditure_weight_1983)
W_va_2024 = (theta_2024.T @ expenditure_weight_2024)



expenditure_weight_1960_sorted = expenditure_weight_1960.sort_values(by="1960-02-01 00:00:00",ascending=False)
expenditure_weight_1983_sorted = expenditure_weight_1983.sort_values(by="1983-01-01 00:00:00",ascending=False)
expenditure_weight_2024_sorted = expenditure_weight_2024.sort_values(by="2024-01-01 00:00:00",ascending=False)

In [None]:
expenditure_weight_1983_sorted

In [None]:
W_va_1983

In [None]:
plot_df

In [None]:
plot_df = pd.concat(
    [expenditure_weight_1960_sorted.rename(columns={expenditure_weight_1960_sorted.columns[0]: "expenditure_weight"}),
     W_va_1960.rename(columns={W_va_1960.columns[0]: "value_added_weight"})],
    axis=1
)

ax = plot_df.plot(figsize=(20, 4))   # pandas uses matplotlib under the hood
ax.set_xlabel("Index")
ax.set_ylabel("Value")
ax.set_title("Value_Added vs Expenditure Weight 1960")
plt.show()

In [None]:
plot_df = pd.concat(
    [expenditure_weight_1983_sorted.rename(columns={expenditure_weight_1983_sorted.columns[0]: "expenditure_weight"}),
     W_va_1983.rename(columns={W_va_1983.columns[0]: "value_added_weight"})],
    axis=1
)

ax = plot_df.plot(figsize=(20, 4))   # pandas uses matplotlib under the hood
ax.set_xlabel("Index")
ax.set_ylabel("Value")
ax.set_title("Value_Added vs Expenditure Weight 1983")
plt.show()

In [None]:
plot_df = pd.concat(
    [expenditure_weight_2024_sorted.rename(columns={expenditure_weight_2024_sorted.columns[0]: "expenditure_weight"}),
     W_va_2024.rename(columns={W_va_2024.columns[0]: "value_added_weight"})],
    axis=1
)

ax = plot_df.plot(figsize=(20, 4))   # pandas uses matplotlib under the hood
ax.set_xlabel("Index")
ax.set_ylabel("Value")
ax.set_title("Value_Added vs Expenditure Weight 2024")
plt.show()

# a) All entries are positive or zero ðŸ™‚

In [None]:
negative_theta = {} # Stores True if any big_theta contians a negative for a given month

for date in dates: 
    current_big_theta = big_theta_monthly.get(date)
    psotive = (current_big_theta < 0).any().any()
    negative_theta[date] = psotive

any(negative_theta.values())

# b) Row Sum does equal one ðŸ˜ƒ

In [None]:
row_sum_not_one = {} # store True if a big_theta column sum isnt 1 for a given month

for date in dates: 
    current_big_theta = big_theta_monthly.get(date)
    current_row_sums = current_big_theta.sum(axis=1).to_frame()
    is_one = current_row_sums.iloc[:, 0].sub(1).abs().le(0.001).all()
    row_sum_not_one[date] = is_one

any(not v for v in row_sum_not_one.values())

# c) log(p) == big_theta @ log(p)_VA ðŸ¤ 

In [None]:
theta_log_p_check = [] # we will make a matrix where we use the big_theta for a given month @ log_P_VA then store the column corresponding to that month

for date in dates[12:]: 
    current_big_theta = big_theta_monthly.get(date)
    theta_log_p_month = current_big_theta @ log_P_VA
    theta_log_p = theta_log_p_month[date]
    theta_log_p_check.append(theta_log_p)

theta_log_p_check_df = pd.concat(theta_log_p_check, axis=1)

diff = (log_P - theta_log_p_check_df).abs() # take the differnce 

mask = diff > 0.00000000001 # acceptable difference threshold 

mismatch_locs = mask.stack()
mismatches_df = pd.DataFrame({
    'log_P': log_P.stack()[mismatch_locs],
    'check_against_log_p': theta_log_p_check_df.stack()[mismatch_locs],
    'abs_diff': diff.stack()[mismatch_locs]
})

# Returns postions where difference between the two dfs are greater than threshold
mismatches_df

# 3. Find log_P_VA|D=1 and log_P_VA|D=0

In [None]:
IO_cassification = IO_residuals.pivot(index='products', columns='date', values='majority_demand_a')
log_P_VA_D1 = log_P_VA * IO_cassification
log_P_VA_D0 = log_P_VA * (np.ones((IO_cassification.shape[0], IO_cassification.shape[1])) - IO_cassification)

# im gonna fill Nans with 0 
log_P_VA_D1_filled = log_P_VA_D1.fillna(0)
log_P_VA_D0_filled = log_P_VA_D0.fillna(0)

## Check 2

In [None]:
log_P_VA_D1_filled_1960 = log_P_VA_D1_filled.loc[log_P_VA_D1_filled["1960-02-01 00:00:00"] != 0,["1960-02-01 00:00:00"]]
log_P_VA_D1_filled_1983 = log_P_VA_D1_filled.loc[log_P_VA_D1_filled["1983-01-01 00:00:00"] != 0,["1983-01-01 00:00:00"]]
log_P_VA_D1_filled_2024 = log_P_VA_D1_filled.loc[log_P_VA_D1_filled["2024-01-01 00:00:00"] != 0,["2024-01-01 00:00:00"]]

log_P_VA_D0_filled_1960 = log_P_VA_D0_filled.loc[log_P_VA_D0_filled["1960-02-01 00:00:00"] != 0,["1960-02-01 00:00:00"]]
log_P_VA_D0_filled_1983 = log_P_VA_D0_filled.loc[log_P_VA_D0_filled["1983-01-01 00:00:00"] != 0,["1983-01-01 00:00:00"]]
log_P_VA_D0_filled_2024 = log_P_VA_D0_filled.loc[log_P_VA_D0_filled["2024-01-01 00:00:00"] != 0,["2024-01-01 00:00:00"]]


def smooth_counts(counts, window=5):
    return (
        pd.Series(counts)
        .rolling(window, center=True, min_periods=1)
        .mean()
        .to_numpy()
    )

def plot_smoothed_hist_overlay(s1, s2, label1="series1", label2="series2", bins=80, title=""):
    x1 = np.asarray(s1).ravel()
    x2 = np.asarray(s2).ravel()

    xmin = min(x1.min(), x2.min())
    xmax = max(x1.max(), x2.max())
    edges = np.linspace(xmin, xmax, bins + 1)

    counts1, _ = np.histogram(x1, bins=edges, density=False)
    counts2, _ = np.histogram(x2, bins=edges, density=False)

    centers = (edges[:-1] + edges[1:]) / 2

    smooth1 = smooth_counts(counts1, window=5)
    smooth2 = smooth_counts(counts2, window=5)

    plt.figure(figsize=(10, 4))
    plt.plot(centers, smooth1, label=label1, alpha=0.8)
    plt.fill_between(centers, smooth1, alpha=0.3)

    plt.plot(centers, smooth2, label=label2, alpha=0.8)
    plt.fill_between(centers, smooth2, alpha=0.3)

    plt.xlabel("Value")
    plt.ylabel("Count (smoothed)")
    plt.title(title)
    plt.legend()
    plt.tight_layout()
    plt.show()

plot_smoothed_hist_overlay(log_P_VA_D1_filled_1960, log_P_VA_D0_filled_1960, label1="D1", label2="D0", title="1960 not normalized")
plot_smoothed_hist_overlay(log_P_VA_D1_filled_1983, log_P_VA_D0_filled_1983, label1="D1", label2="D0", title="1983 not normalized")
plot_smoothed_hist_overlay(log_P_VA_D1_filled_2024, log_P_VA_D0_filled_2024, label1="D1", label2="D0", title="2024 not normalized")

## Check 3

In [None]:
p_va = value_added.pivot(index='products', columns='date', values='value_added')

p_va_1960 = p_va["1960-02-01 00:00:00"]
p_va_1983 = p_va[["1983-01-01 00:00:00"]]
p_va_2024 = p_va[["2024-01-01 00:00:00"]]

# Import Raw Price again and clean
prices = pd.read_excel(os.path.join(raw_data_path, 'BEA Monthly Prices.xlsx'))
prices = clean_bea_PQE_table(prices, "prices")
# Filter for the 210 BEA products that we actually use
prices = prices.loc[prices.index.intersection(p_va_2024.index)]
prices = prices[~prices.index.duplicated(keep='first')]
prices = prices.reset_index()

# Find month over month product specific infaltion starting form 1960-02
prices_long = pd.melt(prices, id_vars='products', var_name='month', value_name='prices')
prices_long['prices'] = pd.to_numeric(prices_long['prices'], errors='coerce')
def month_over_month_product_inflation(df):
    """Function to Calculate month over month product specific inflation"""
    # Ensure sorted by product and time
    df = df.sort_values(by=['products', 'month'])
    # Group by product and compute year-over-year inflation
    df['inflation percent'] = df.groupby('products')['prices'].transform(lambda x: ((x - x.shift(1)) / x.shift(1)))
        
    return df
inflation = month_over_month_product_inflation(prices_long)
inflation = inflation.pivot(index='products', columns='month', values='inflation percent')
inflation = inflation.iloc[:, 13:]

inflation_1960 = inflation[["1960-02-01 00:00:00"]]
inflation_1983 = inflation[["1983-01-01 00:00:00"]]
inflation_2024 = inflation[["2024-01-01 00:00:00"]]

# contribution_1960 = ((W_va_1960 * p_va_1960) / inflation_1960).replace([np.inf, -np.inf], 0) 
# contribution_1983 = ((W_va_1983 * p_va_1983) / inflation_1983).replace([np.inf, -np.inf], 0) 
# contribution_2024 = ((W_va_2024 * p_va_2024) / inflation_2024).replace([np.inf, -np.inf], 0) 

contribution_1960 = ((W_va_1960.T @ np.diag(p_va_1960))).T / (inflation_1960 * expenditure_weight_1960).sum()

In [None]:
contribution_1960.sum()

# Extra Checks

## 1. log_P_VA_D0 + log_P_VA_D1 == log_P_VA

In [None]:
log_P_VA_D01 = log_P_VA_D0 + log_P_VA_D1
log_P_VA_D01 = log_P_VA_D01.fillna(0)

diff = (log_P_VA_D01 - log_P_VA).abs() # take the differnce 
mask = diff > 0.00000001 # acceptable difference threshold 
mismatch_locs = mask.stack()
mismatches_df = pd.DataFrame({
    'log_P_VA_D01': log_P_VA_D01.stack()[mismatch_locs],
    'log_P_VA': log_P_VA.stack()[mismatch_locs],
    'abs_diff': diff.stack()[mismatch_locs]
})
# Returns postions where difference between the two dfs are greater than threshold
mismatches_df # Mismatches are where var data is lagged and thus missing

## 2. log_P == big_theta * [log_P_VA_D0 + log_P_VA_D1]

In [None]:
check_2 = [] 

for date in dates[12:]: 
    current_big_theta = big_theta_monthly.get(date)
    current_check_2 = current_big_theta @ log_P_VA_D01
    check_2_date = current_check_2[date]
    check_2.append(check_2_date)

check_2_df = pd.concat(check_2, axis=1)

diff = (log_P_VA_D01 - log_P_VA).abs() # take the differnce 
mask = diff > 0.00000001 # acceptable difference threshold 
mismatch_locs = mask.stack()
mismatches_df = pd.DataFrame({
    'log_P': log_P.stack()[mismatch_locs],
    'check_against_log_p': log_P_VA.stack()[mismatch_locs],
    'abs_diff': diff.stack()[mismatch_locs]
})
# Returns postions where difference between the two dfs are greater than threshold
mismatches_df # Mismatches are where var data is lagged and thus missing

# 4. Find log_P|D=1 and log_P|D=0

In [None]:
log_P_D1 = []
log_P_D0 = []

for date in dates[12:]: 

    log_p_D1_current =  big_theta_monthly.get(date) @ log_P_VA_D1_filled
    log_p_D1_month = log_p_D1_current[date]
    
    log_p_D0_current =  big_theta_monthly.get(date) @ log_P_VA_D0_filled
    log_p_D0_month = log_p_D0_current[date]

    log_P_D1.append(log_p_D1_month)
    log_P_D0.append(log_p_D0_month)


log_P_D1 = pd.concat(log_P_D1, axis=1)
log_P_D0 = pd.concat(log_P_D0, axis=1)

# log_P_VA|D=1 + log_P_VA|D=0 == log(P)

In [None]:
check_against_log_p = log_P_D0 + log_P_D1
check_against_log_p

In [None]:
diff = (log_P - check_against_log_p).abs()

mask = diff > 1

mismatch_locs = mask.stack()

mismatches_df = pd.DataFrame({
    'log_P': log_P.stack()[mismatch_locs],
    'check_against_log_p': check_against_log_p.stack()[mismatch_locs],
    'abs_diff': diff.stack()[mismatch_locs]
})
mismatches_df # Mismatches are where var data is lagged and thus missing

# 5. Calculate Final D Matrix

In [None]:
# log_P contains zeros so I will manually change inf to zeros. 
D_influence = log_P_D1 / log_P 
D_influence.replace([float('inf'), float('-inf')], 0, inplace=True)

# Check that All elements of D are between 0 and 1 

In [None]:
((D_influence.dropna() >= 0) & (D_influence.dropna() <= 1)).all().all()

# Graphing Code

In [None]:
def plot_graphs(data, plot_title, plot_text):
    """Function to Graph Stacked Plot"""
    data = data.loc[data.index >= pd.Timestamp('1970-01-01')]

    supply_inflation = data[["annual_supply_inflation"]].copy()
    supply_inflation.rename(columns={'annual_supply_inflation': 'Supply Inflation'}, inplace=True)

    demand_inflation = data[["annual_demand_inflation"]].copy()
    demand_inflation.rename(columns={'annual_demand_inflation': 'Demand Inflation'}, inplace=True)

    supply_inflation['supply_pos'] = supply_inflation['Supply Inflation'].apply(lambda x: x if x > 0 else 0)
    demand_inflation['demand_pos'] = demand_inflation['Demand Inflation'].apply(lambda x: x if x > 0 else 0)
    supply_inflation['supply_neg'] = supply_inflation['Supply Inflation'].apply(lambda x: x if x < 0 else 0)
    demand_inflation['demand_neg'] = demand_inflation['Demand Inflation'].apply(lambda x: x if x < 0 else 0)

    demand_inflation = demand_inflation.iloc[:-1]
    supply_inflation = supply_inflation.iloc[:-1]

    plt.figure(figsize=(26, 12))

    plt.stackplot(supply_inflation.index, demand_inflation['demand_pos'], supply_inflation['supply_pos'], colors= ["#008000", "#FF0000"], labels = ["Deamnd", "Supply"])
    plt.stackplot(supply_inflation.index, demand_inflation['demand_neg'], supply_inflation['supply_neg'], colors= ["#008000", "#FF0000"])
    plt.xlabel('Date')
    plt.ylabel('Inflation')
    plt.title(f'{plot_title}')
    plt.legend()

    plt.text(0.02, 0.95, 
         f'{plot_text}', 
         transform=plt.gca().transAxes, fontsize=9,
         bbox=dict(facecolor='white', alpha=0.8))

    return plt

## 1. Inflation Graphs A La Shapiro

In [None]:
# IO VAR classifictions
D_IO = IO_residuals.pivot(index='products', columns='date', values='majority_demand_a')
# Shapiro VAR classifictions
D_shapiro = residuals_normal.pivot(index='products', columns='date', values='majority_demand')

In [None]:
def month_over_month_product_inflation(df):
    """Function to Calculate month over month product specific inflation"""
    # Ensure sorted by product and time
    df = df.sort_values(by=['products', 'month'])
    # Group by product and compute year-over-year inflation
    df['inflation percent'] = df.groupby('products')['prices'].transform(lambda x: ((x - x.shift(1)) / x.shift(1)))
        
    return df

## Get Product Level month over month price changes

In [None]:
# Import Raw Price again and clean
prices = pd.read_excel(os.path.join(raw_data_path, 'BEA Monthly Prices.xlsx'))
prices = clean_bea_PQE_table(prices, "prices")
# Filter for the 210 BEA products that we actually use
prices = prices.loc[prices.index.intersection(D_shapiro.index)]
prices = prices[~prices.index.duplicated(keep='first')]
prices = prices.reset_index()

# Find month over month product specific infaltion starting form 1960-02
prices_long = pd.melt(prices, id_vars='products', var_name='month', value_name='prices')
prices_long['prices'] = pd.to_numeric(prices_long['prices'], errors='coerce')
inflation = month_over_month_product_inflation(prices_long)
inflation = inflation.pivot(index='products', columns='month', values='inflation percent')
inflation = inflation.iloc[:, 13:]

## Get expenditure share for each product per month

In [None]:
# Get expenditure weights
bea_expenditures = bea_expenditures.pivot(index='products', columns='date', values='Expenditures')
expenditure_weights = bea_expenditures.div(bea_expenditures.sum(axis=0), axis=1)
expenditure_weights = expenditure_weights.iloc[:, 13:]
expenditure_weights

## Use IO and Shapiro Supply and Demand Classifications to get Demand and Supply Driven Inflation based on product price changes and expenditure weights

In [None]:
# IO Classification Aggregated Monthly Inflation
inflation_parts_IO = {}

for date in expenditure_weights:
    current_demand = D_IO[str(date)] # all products calssified as demand for this period
    current_supply = 1 - current_demand # all products calssified as supply for this period

    current_weights = expenditure_weights[date] # all product weights for this period
    current_inflation = inflation[date] # all product specific inflation for this period

    demand_inflation = (current_demand * current_weights * current_inflation).sum() # Aggregated montly demand driven inflation
    supply_inflation = (current_supply * current_weights * current_inflation).sum() # Aggregated montly supply driven inflation

    inflation_parts_IO[date] = [demand_inflation, supply_inflation] # Store each month's data

# Final demand and supply driven inflation
inflation_IO_final = pd.DataFrame(inflation_parts_IO, index=['demand_inflation', 'supply_inflation'])
inflation_IO_final = inflation_IO_final.T

# Shapiro Classification Aggregated Monthly Inflation
inflation_parts_shapiro = {}

for date in expenditure_weights:
    current_demand = D_shapiro[str(date)]
    current_supply = 1 - current_demand

    current_weights = expenditure_weights[date]
    current_inflation = inflation[date]

    demand_inflation = (current_demand * current_weights * current_inflation).sum()
    supply_inflation = (current_supply * current_weights * current_inflation).sum()

    inflation_parts_shapiro[date] = [demand_inflation, supply_inflation]


inflation_shapiro_final = pd.DataFrame(inflation_parts_shapiro, index=['demand_inflation', 'supply_inflation'])
inflation_shapiro_final = inflation_shapiro_final.T

## Get Anual Supply and Deamnd Driven Inflation

In [None]:
# IO Classification - Anual Supply Inflation
inflation_IO_final['annual_supply_inflation'] = (
    (inflation_IO_final['supply_inflation'].add(1).shift(1).rolling(window=12, min_periods=12).apply(np.prod, raw=True) - 1) * 100
)
# IO Classification - Anual Demand Inflation
inflation_IO_final['annual_demand_inflation'] = (
    (inflation_IO_final['demand_inflation'].add(1).shift(1).rolling(window=12, min_periods=12).apply(np.prod, raw=True) - 1) * 100
)
# Shapiro Classification - Anual Supply Inflation
inflation_shapiro_final['annual_supply_inflation'] = (
    (inflation_shapiro_final['supply_inflation'].add(1).shift(1).rolling(window=12, min_periods=12).apply(np.prod, raw=True) - 1) * 100
)
# Shapiro Classification - Anual Demand Inflation
inflation_shapiro_final['annual_demand_inflation'] = (
    (inflation_shapiro_final['demand_inflation'].add(1).shift(1).rolling(window=12, min_periods=12).apply(np.prod, raw=True) - 1) * 100
)

In [None]:
plot_graphs(inflation_shapiro_final, "Shapiro Classification - Inflation Calculated a la Shapiro", "â€¢ Using our 210 products \nâ€¢ Using Price and Quantities for Residuals and Classification")

In [None]:
plot_graphs(inflation_IO_final, "ShapirIO Classification - Inflation Calculated a la Shapiro", "â€¢ Using our 210 products \nâ€¢ Using Value-Added and Sales for Residuals and Classification")

## Data from Shapiro's Code to Recreate the Exact Graph from his paper

In [None]:
shapiro_code_output = pd.read_excel(os.path.join(shapiro_file, 'shaprio_stata_output_excel.xlsx'))
shapiro_graph = plot_shapiro_graph_from_shapiro_ouput(shapiro_code_output, "Shapiro Graph Using His 130 Products, Data as Found in His Paper")
shapiro_graph

## 2. An Alternative way to Calculate Inflation - Log Difference

In [None]:
def month_inflation_althernative(df):
    """Function to Calculate month over month product specific inflation using log differneces"""
    # Ensure sorted by product and time
    df = df.sort_values(by=['products', 'month'])
    # Group by product and compute year-over-year inflation
    df['inflation log differnece'] = df.groupby('products')['prices'].transform(lambda x: (np.log(x) - np.log(x.shift(1))))
        
    return df

## Get log difference month over month product specific price changes 

In [None]:
inflation_log_difference = month_inflation_althernative(prices_long)
inflation_log_difference = inflation_log_difference.pivot(index='products', columns='month', values='inflation log differnece')
inflation_log_difference = inflation_log_difference.iloc[:, 13:]

In [None]:
expenditure_weights

## Use IO and Shapiro Supply and Demand Classifications to get Demand and Supply Driven Inflation based on product price changes and expenditure weights

In [None]:
# IO Classification Aggregated Monthly Inflation
inflation_log_parts_IO = {}

for date in expenditure_weights:
    current_demand = D_IO[str(date)] # all products calssified as demand for this period
    current_supply = 1 - current_demand # all products calssified as supply for this period

    current_weights = expenditure_weights[date] # all product weights for this period
    current_inflation = inflation_log_difference[date] # all product specific inflation for this period

    demand_inflation = (current_demand * current_weights * current_inflation).sum() # Aggregated montly demand driven inflation
    supply_inflation = (current_supply * current_weights * current_inflation).sum() # Aggregated montly supply driven inflation

    inflation_log_parts_IO[date] = [demand_inflation, supply_inflation] # Store each month's data

# Final demand and supply driven inflation
inflation_log_IO_final = pd.DataFrame(inflation_log_parts_IO, index=['demand_inflation_log', 'supply_inflation_log'])
inflation_log_IO_final = inflation_log_IO_final.T

# Shapiro Classification Aggregated Monthly Inflation
inflation_log_parts_shapiro = {}

for date in expenditure_weights:
    current_demand = D_shapiro[str(date)]
    current_supply = 1 - current_demand

    current_weights = expenditure_weights[date]
    current_inflation = inflation_log_difference[date]

    demand_inflation = (current_demand * current_weights * current_inflation).sum()
    supply_inflation = (current_supply * current_weights * current_inflation).sum()

    inflation_log_parts_shapiro[date] = [demand_inflation, supply_inflation]


inflation_log_shapiro_final = pd.DataFrame(inflation_log_parts_shapiro, index=['demand_inflation_log', 'supply_inflation_log'])
inflation_log_shapiro_final = inflation_log_shapiro_final.T

## Get Anual Supply and Deamnd Driven Inflation

In [None]:
# IO Classification - Anual Supply Inflation
inflation_log_IO_final['annual_supply_inflation'] = (
    (inflation_log_IO_final['supply_inflation_log'].rolling(window=12, min_periods=12).sum()) * 100
)
# IO Classification - Anual Demand Inflation
inflation_log_IO_final['annual_demand_inflation'] = (
    (inflation_log_IO_final['demand_inflation_log'].rolling(window=12, min_periods=12).sum()) * 100
)
# Shapiro Classification - Anual Supply Inflation
inflation_log_shapiro_final['annual_supply_inflation'] = (
    (inflation_log_shapiro_final['supply_inflation_log'].rolling(window=12, min_periods=12).sum()) * 100
)
# Shapiro Classification - Anual Demand Inflation
inflation_log_shapiro_final['annual_demand_inflation'] = (
    (inflation_log_shapiro_final['demand_inflation_log'].rolling(window=12, min_periods=12).sum()) * 100
)

In [None]:
plot_graphs(inflation_log_IO_final, "ShapirIO Classification - Inflation Log Difference", "â€¢ Using our 210 products \nâ€¢ Using Value-Added and Sales for Residuals and Classification")

In [None]:
plot_graphs(inflation_log_shapiro_final, "Shapiro Classification - Inflation Log Difference", "â€¢ Using our 210 products \nâ€¢ Using Price and Quantities for Residuals and Classification")

## 3. Using D Influence Instead of Dummy Variable for Demand and Supply Driven Inflation

In [None]:
"""Inflation a la Shapiro"""
inflation_parts_IO_influence = {}

for date in expenditure_weights:
    current_demand = D_influence[str(date)] # all products calssified as demand for this period
    current_supply = 1 - current_demand # all products calssified as supply for this period

    current_weights = expenditure_weights[date] # all product weights for this period
    current_inflation = inflation[date] # all product specific inflation for this period

    demand_inflation = (current_demand * current_weights * current_inflation).sum() # Aggregated montly demand driven inflation
    supply_inflation = (current_supply * current_weights * current_inflation).sum() # Aggregated montly supply driven inflation

    inflation_parts_IO_influence[date] = [demand_inflation, supply_inflation] # Store each month's data 

# Final demand and supply driven inflation
inflation_IO_influence = pd.DataFrame(inflation_parts_IO_influence, index=['demand_inflation', 'supply_inflation'])
inflation_IO_influence = inflation_IO_influence.T

"""Inflation Log Difference"""
inflation_parts_IO_log = {}

for date in expenditure_weights:
    current_demand = D_influence[str(date)] # all products calssified as demand for this period
    current_supply = 1 - current_demand # all products calssified as supply for this period

    current_weights = expenditure_weights[date] # all product weights for this period
    current_inflation = inflation_log_difference[date] # all product specific inflation for this period

    demand_inflation = (current_demand * current_weights * current_inflation).sum() # Aggregated montly demand driven inflation
    supply_inflation = (current_supply * current_weights * current_inflation).sum() # Aggregated montly supply driven inflation

    inflation_parts_IO_log[date] = [demand_inflation, supply_inflation] # Store each month's data

# Final demand and supply driven inflation
inflation_IO_influence_log = pd.DataFrame(inflation_parts_IO_log, index=['demand_inflation_log', 'supply_inflation_log'])
inflation_IO_influence_log = inflation_IO_influence_log.T

# Anual Supply Inflation - a la Shapiro
inflation_IO_influence['annual_supply_inflation'] = (
    (inflation_IO_influence['supply_inflation'].add(1).shift(1).rolling(window=12, min_periods=12).apply(np.prod, raw=True) - 1) * 100
)
# Anual Demand Inflation - a la Shapiro
inflation_IO_influence['annual_demand_inflation'] = (
    (inflation_IO_influence['demand_inflation'].add(1).shift(1).rolling(window=12, min_periods=12).apply(np.prod, raw=True) - 1) * 100
)
# Anual Supply Inflation - Log Difference
inflation_IO_influence_log['annual_supply_inflation'] = (
    (inflation_IO_influence_log['supply_inflation_log'].rolling(window=12, min_periods=12).sum()) * 100
)
# Anual Demand Inflation - Log Difference 
inflation_IO_influence_log['annual_demand_inflation'] = (
    (inflation_IO_influence_log['demand_inflation_log'].rolling(window=12, min_periods=12).sum()) * 100
)

In [None]:
plot_graphs(inflation_IO_influence, "ShapirIO Influence Matrix - Inflation a la Shapiro", "â€¢ Using our 210 products \nâ€¢ Using Value-Added and Sales for Residuals and Classification")

In [None]:
plot_graphs(inflation_IO_influence_log, "ShapirIO Influence Matrix - Inflation Log Difference", "â€¢ Using our 210 products \nâ€¢ Using Value-Added and Sales for Residuals and Classification")

In [None]:
inflation_IO_influence

In [None]:
inflation_IO_final