# Setup

In [1]:
import pandas as pd
import numpy as np

In [2]:
data_config = {
    'file_path':'data/',
    'pce_fname':'PCEBridge.xlsx', # CPI makeup
    'mkt_share_fname':'market_share.xlsx', # D matrix
    'dom_comm_use_fname':'domestic_comm_use.xlsx', # B^d matrix
    'gross_output_fname':'GrossOutput.xlsx', # used to calc v, m, delta P
    'int_in_fname':'IntermediateInputs.xlsx', # M
    'val_add_fname':'ValueAdded.xlsx', # V
    'make_fname':'MakeMatrices.xlsx', # make matrix
    'use_fname':'UseMatrices.xlsx', # use matrix
    'import_fname':'ImportMatrices.xlsx' # import matrix
}
time_config = {
    'start_year':1997, # inclusive
    'end_year':2021 # inclusive
}

# Read Data

In [3]:
def read_mkt_share(year):
    # read in market share matrix for input year 
    # this is fraction of each commodities' total output produced by each industry
    df = pd.read_excel(
        io=data_config['file_path']+data_config['mkt_share_fname'],
        sheet_name=str(year),
        header=[5,6],
        index_col=[0,1]
    )
    df.dropna(inplace=True)
    return df

def read_mkt_share_all(time_config):
    # read all market share matrix for all years in time config
    # this is fraction of each commodities' total output produced by each industry   
    mkt_share_dfs = {}
    for time in time_config:
        df = read_mkt_share(year)
        mkt_share_dfs[time] = df
    return mkt_share_dfs

In [4]:
def read_dom_use(year):
    # read in domestic comm use matrix for input year 
    df = pd.read_excel(
        io=data_config['file_path']+data_config['dom_comm_use_fname'],
        sheet_name=str(year),
        header=[5,6],
        index_col=[0,1]
    )
    return df

def read_dom_use_all(time_config):
    # read all domestict comm use matrix for all years in time config
    dom_use_dfs = {}
    for time in time_config:
        df = read_dom_use(time)
        dom_use_dfs[time] = df
    return dom_use_dfs

In [5]:
raw_dom_use = read_dom_use(2021)
raw_dom_use

Unnamed: 0_level_0,Commodities/Industries,111CA,113FF,211,212,213,22,23,321,327,331,...,711AS,713,721,722,81,GFGD,GFGN,GFE,GSLG,GSLE
Unnamed: 0_level_1,Commodity Description,Farms,"Forestry, fishing, and related activities",Oil and gas extraction,"Mining, except oil and gas",Support activities for mining,Utilities,Construction,Wood products,Nonmetallic mineral products,Primary metals,...,"Performing arts, spectator sports, museums, and related activities","Amusements, gambling, and recreation industries",Accommodation,Food services and drinking places,"Other services, except government",Federal general government (defense),Federal general government (nondefense),Federal government enterprises,State and local general government,State and local government enterprises
111CA,Farms,0.173553,0.003526,0.000000,0.000511,0.000208,0.000000,0.001356,0.000000,0.000000,0.000000,...,0.000000,0.002113,0.000155,0.000698,0.000014,0.000003,0.000573,0.000000,0.000700,0.000000
113FF,"Forestry, fishing, and related activities",0.052062,0.144284,0.000000,0.000741,0.000000,0.000000,0.000000,0.097674,0.000001,0.000000,...,0.000000,0.000107,0.000144,0.000329,0.000011,0.000000,0.000000,0.000013,0.001209,0.000000
211,Oil and gas extraction,0.000000,0.000000,0.103113,0.000083,0.000009,0.044571,0.000000,0.000006,0.000002,0.000063,...,0.000003,0.000064,0.000157,0.000077,0.000005,0.000000,0.000000,0.006025,0.000000,0.044277
212,"Mining, except oil and gas",0.003139,0.000005,0.000128,0.079912,0.000344,0.009573,0.010456,0.000009,0.046471,0.037799,...,0.001365,0.002593,0.000275,0.000225,0.000764,0.000000,0.001053,0.000000,0.000330,0.008645
213,Support activities for mining,0.000000,0.000000,0.020004,0.018675,0.010956,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Other,Noncomparable imports and rest-of-the-world adjustment,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
V001,Compensation of employees,0.067880,0.496264,0.017889,0.113505,0.374724,0.142098,0.337109,0.188588,0.226096,0.128616,...,0.321720,0.548203,0.245857,0.289591,0.514410,0.390153,0.368424,0.706385,0.581872,0.313837
V002,"Taxes on production and imports, less subsidies",-0.031005,0.017558,0.088914,0.059465,-0.001070,0.110436,-0.010530,-0.000598,0.006626,0.007833,...,-0.035355,0.096534,0.076055,0.006220,0.018094,0.000000,0.000000,-0.101775,0.000000,-0.100576
V003,Gross operating surplus,0.316915,0.180498,0.397611,0.285428,0.235371,0.330816,0.174104,0.216989,0.239375,0.152975,...,0.352920,0.000341,0.334637,0.234739,0.145769,0.236624,0.234864,0.130127,0.079500,0.206014


In [6]:
def read_delta_prices():
    df = pd.read_excel(
        io=data_config['file_path']+data_config['gross_output_fname'],
        sheet_name='TGO107-A',
        header=[7],
        index_col=[0,1]
    )
    df.drop(columns=df.columns[0], axis=1, inplace=True)
    return df

In [7]:
raw_delta_prices = read_delta_prices()
raw_delta_prices

Unnamed: 0_level_0,Unnamed: 1_level_0,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
Line,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,All industries,0.0,1.3,3.2,1.6,0.9,2.8,3.9,4.3,3.8,3.3,...,1.5,1.7,1.7,-1.1,0.2,2.6,3.2,1.4,0.6,6.6
2,Private industries,-0.2,1.1,3.1,1.3,0.6,2.7,3.8,4.2,3.7,3.1,...,1.5,1.5,1.6,-1.2,0.2,2.6,3.1,1.4,0.4,6.7
3,"Agriculture, forestry, fishing, and hunting",-5.2,-5.0,0.7,3.1,-4.0,8.0,11.1,-3.3,0.9,16.6,...,4.5,2.0,-0.7,-9.8,-9.8,2.6,0.2,-0.3,-2.4,20.8
4,Farms,-5.8,-5.9,0.9,5.3,-4.5,9.2,12.2,-4.5,0.4,18.5,...,4.9,1.8,-1.1,-10.9,-11.1,2.7,0.0,-0.4,-2.7,23.0
5,"Forestry, fishing, and related activities",-1.6,-0.8,0.1,-7.9,-1.1,2.3,5.2,3.7,3.4,6.0,...,1.4,3.3,2.9,-0.6,0.3,1.7,2.1,0.2,-0.2,5.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,Information-communications-technology-producing industries\3\,-7.2,-5.4,-4.2,-4.6,-2.5,-3.0,-2.7,-2.5,-2.0,-2.3,...,-0.3,-0.1,-0.5,-1.5,-1.3,-1.2,-0.4,0.1,0.1,0.2
100,Information-communications-technology-producing industries\3\,,,,,,,,,,,...,,,,,,,,,,
"1. Consists of agriculture, forestry, fishing, and hunting; mining; construction; and manufacturing.",Information-communications-technology-producing industries\3\,,,,,,,,,,,...,,,,,,,,,,
"2. Consists of utilities; wholesale trade; retail trade; transportation and warehousing; information; finance, insurance, real estate, rental, and leasing; professional and business services; educational services, health care, and social assistance; arts, entertainment, recreation, accommodation, and food services; and other services, except government.",Information-communications-technology-producing industries\3\,,,,,,,,,,,...,,,,,,,,,,


In [8]:
def read_pce_bridge(year):
    df = pd.read_excel(
        io=data_config['file_path']+data_config['pce_fname'],
        sheet_name=str(year),
        header=[4]
    )
    return df
    
def read_pce_bridge_all(time_config):
    # read all domestict comm use matrix for all years in time config
    pce_dfs = {}
    for time in time_config:
        df = read_pce_bridge(time)
        pce_dfs[time] = df
    return pce_dfs

In [9]:
raw_pce = read_pce_bridge(2021)
raw_pce

Unnamed: 0,NIPA Line,PCE Category,Commodity Code,Commodity Description,Unnamed: 4,Unnamed: 5,Wholesale,Retail,Unnamed: 8,Year
0,5,New motor vehicles,3361MV,"Motor vehicles, bodies and trailers, and parts",266942,4284,5851,74165,351242,2021
1,6,Net purchases of used motor vehicles,Used,"Scrap, used and secondhand goods",156903,2514,3168,80455,243040,2021
2,7,Motor vehicles parts and accessories,327,Nonmetallic mineral products,162,29,24,126,342,2021
3,7,Motor vehicles parts and accessories,331,Primary metals,19,1,2,15,36,2021
4,7,Motor vehicles parts and accessories,332,Fabricated metal products,752,25,320,583,1680,2021
...,...,...,...,...,...,...,...,...,...,...
294,111,Final consumption expenditures of nonprofit in...,623,Nursing and residential care facilities,22282,0,0,0,22282,2021
295,111,Final consumption expenditures of nonprofit in...,624,Social assistance,94434,0,0,0,94434,2021
296,111,Final consumption expenditures of nonprofit in...,711AS,"Performing arts, spectator sports, museums, an...",17624,0,0,0,17624,2021
297,111,Final consumption expenditures of nonprofit in...,713,"Amusements, gambling, and recreation industries",7954,0,0,0,7954,2021


In [10]:
def read_make():
    df = pd.read_excel(
        io=data_config['file_path']+data_config['make_fname'],
        header=[5,6],
        index_col=[0,1]
    )
    return df

In [11]:
raw_make = read_make()
raw_make

  warn("Workbook contains no default style, apply openpyxl's default")


Unnamed: 0_level_0,Industries/Commodities,111CA,113FF,211,212,213,22,23,321,327,331,...,722,81,GFGD,GFGN,GFE,GSLG,GSLE,Used,Other,Other
Unnamed: 0_level_1,Name,Farms,"Forestry, fishing, and related activities",Oil and gas extraction,"Mining, except oil and gas",Support activities for mining,Utilities,Construction,Wood products,Nonmetallic mineral products,Primary metals,...,Food services and drinking places,"Other services, except government",Federal general government (defense),Federal general government (nondefense),Federal government enterprises,State and local general government,State and local government enterprises,"Scrap, used and secondhand goods /1/",Noncomparable imports and rest-of-the-world adjustment /2/,Total Industry Output
111CA,Farms,460538,4698,---,---,---,---,---,10,---,---,...,---,---,---,---,---,---,---,---,---,467317.0
113FF,"Forestry, fishing, and related activities",35,61535,---,---,---,---,---,---,---,---,...,---,---,---,---,---,---,---,---,---,61570.0
211,Oil and gas extraction,---,---,386002,75,---,---,---,---,---,---,...,---,---,---,---,---,---,---,---,---,424194.0
212,"Mining, except oil and gas",---,---,0,84387,157,---,---,---,468,---,...,---,---,---,---,---,---,---,---,---,87854.0
213,Support activities for mining,---,---,---,73,91461,---,---,---,---,---,...,---,---,---,---,---,---,---,---,---,92587.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Legend / Footnotes:,Total Commodity Output,,,,,,,,,,,...,,,,,,,,,,
1. Consists of only scrap in the make table.,Total Commodity Output,,,,,,,,,,,...,,,,,,,,,,
2. Consists of only the rest-of-the-world adjustment in the make table.,Total Commodity Output,,,,,,,,,,,...,,,,,,,,,,
Note. Selected data with zero values are not shown.,Total Commodity Output,,,,,,,,,,,...,,,,,,,,,,


In [12]:
def read_use():
    df = pd.read_excel(
        io=data_config['file_path']+data_config['use_fname'],
        header=[5,6],
        index_col=[0,1]
    )
    return df

In [13]:
raw_use = read_use()
raw_use

  warn("Workbook contains no default style, apply openpyxl's default")


Unnamed: 0_level_0,Commodities/Industries,111CA,113FF,211,212,213,22,23,321,327,331,...,F07C,F07S,F07E,F07N,F10C,F10S,F10E,F10N,F10N,F10N
Unnamed: 0_level_1,Name,Farms,"Forestry, fishing, and related activities",Oil and gas extraction,"Mining, except oil and gas",Support activities for mining,Utilities,Construction,Wood products,Nonmetallic mineral products,Primary metals,...,Federal national nondefense: Consumption expenditures,Federal national nondefense: Gross investment in structures,Federal national nondefense: Gross investment in equipment,Federal national nondefense: Gross investment in intellectual property products,State and local: Consumption expenditures,State and local: Gross investment in structures,State and local: Gross investment in equipment,State and local: Gross investment in intellectual property products,Total Final Uses (GDP),Total Commodity Output
111CA,Farms,82371,319,---,45,19,---,2876,---,---,---,...,---,---,---,---,---,---,---,---,108824,460740
113FF,"Forestry, fishing, and related activities",24562,8909,---,66,---,---,---,14642,1,---,...,---,---,---,---,---,---,---,---,-8450,69838
211,Oil and gas extraction,---,---,50693,7,1,32311,---,1,0,16,...,---,---,---,---,---,---,---,---,-56285,386246
212,"Mining, except oil and gas",1604,0,58,7199,33,6298,22110,1,7286,11499,...,---,---,---,---,---,---,---,---,10096,85436
213,Support activities for mining,---,---,8699,1676,1042,---,---,---,---,---,...,---,---,---,---,---,---,---,---,80131,91619
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
V003,Total Industry Output,467317,61570,424194,87854,92587,637014,2051156,148411,143395,260697,...,533783,13216,24944,133191,2106940,347215,48729,48680,,41404764
Legend / Footnotes:,Total Industry Output,,,,,,,,,,,...,,,,,,,,,,
1. Consists of noncomparable imports in the intermediate section of the use table and noncomparable imports and the rest-of-the-world adjustment in the final use section of the use table.,Total Industry Output,,,,,,,,,,,...,,,,,,,,,,
Note. Selected data with zero values are not shown.,Total Industry Output,,,,,,,,,,,...,,,,,,,,,,


In [14]:
def read_import(year):
    df = pd.read_excel(
        io=data_config['file_path']+data_config['import_fname'],
        sheet_name=str(year),
        header=[5,6],
        index_col=[0,1]
    )
    return df

In [15]:
raw_imports = read_import(2021)
raw_imports

Unnamed: 0_level_0,Commodity / Industry,111CA,113FF,211,212,213,22,23,321,327,331,...,F06S,F07C,F07E,F07N,F07S,F10C,F10E,F10N,F10S,T004
Unnamed: 0_level_1,Name,Farms,"Forestry, fishing, and related activities",Oil and gas extraction,"Mining, except oil and gas",Support activities for mining,Utilities,Construction,Wood products,Nonmetallic mineral products,Primary metals,...,Federal national defense: Gross investment in structures,Nondefense: Consumption expenditures,Federal nondefense: Gross investment in equipment,Federal nondefense: Gross investment in intellectual property products,Federal nondefense: Gross investment in structures,State and local government consumption expenditures,State and local: Gross investment in equipment,State and local: Gross investment in intellectual property products,State and local: Gross investment in structures,Total Final Uses (GDP)
111CA,Farms,1267,102,...,0,0,...,95,...,...,...,...,...,...,...,...,...,...,...,...,...,-20322
113FF,"Forestry, fishing, and related activities",232,26,...,1,...,...,...,146,1,...,...,...,...,...,...,...,...,...,...,...,-18803
211,Oil and gas extraction,...,...,6953,0,0,3918,...,0,0,0,...,...,...,...,...,...,...,...,...,...,-145930
212,"Mining, except oil and gas",137,0,4,179,1,200,663,0,623,1645,...,...,...,...,...,...,...,...,...,...,-6384
213,Support activities for mining,...,...,214,36,27,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,-278
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GFE,Federal government enterprises,0,0,...,0,...,1,0,...,...,0,...,...,...,...,...,...,...,...,...,...,-243
GSLG,State and local general government,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GSLE,State and local government enterprises,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Used,"Scrap, used and secondhand goods",...,...,...,...,...,29,13,...,105,6748,...,...,...,102,...,...,...,276,...,...,-11591


# Make mapping for commodities and industry codes

In [16]:
def infer_comm_map(year):
    # infers commodity code using mkt share data for given year
    df = read_mkt_share(year)
    comm_map = {}
    for pair in df.columns:
        comm_map[pair[0]] = pair[1]
    if len(comm_map) != 73:
        raise ValueError('Expected 73 commodities from inference. Has this changed from BEA?')
    return comm_map

def infer_ind_map(year):
    # infers industry code using mkt share data for given year
    df = read_mkt_share(year)
    ind_map = {}
    for pair in df.index:
        ind_map[pair[0]] = pair[1]
    if len(ind_map) != 71:
        raise ValueError('Expected 71 industries from inference. Has this changed from BEA?')
    return ind_map

In [17]:
comm_map = infer_comm_map(2021)
ind_map = infer_ind_map(2021)

In [18]:
comm_map

{'111CA': 'Farms',
 '113FF': 'Forestry, fishing, and related activities',
 '211': 'Oil and gas extraction',
 '212': 'Mining, except oil and gas',
 '213': 'Support activities for mining',
 '22': 'Utilities',
 '23': 'Construction',
 '321': 'Wood products',
 '327': 'Nonmetallic mineral products',
 '331': 'Primary metals',
 '332': 'Fabricated metal products',
 '333': 'Machinery',
 '334': 'Computer and electronic products',
 '335': 'Electrical equipment, appliances, and components',
 '3361MV': 'Motor vehicles, bodies and trailers, and parts',
 '3364OT': 'Other transportation equipment',
 '337': 'Furniture and related products',
 '339': 'Miscellaneous manufacturing',
 '311FT': 'Food and beverage and tobacco products',
 '313TT': 'Textile mills and textile product mills',
 '315AL': 'Apparel and leather and allied products',
 '322': 'Paper products',
 '323': 'Printing and related support activities',
 '324': 'Petroleum and coal products',
 '325': 'Chemical products',
 '326': 'Plastics and rubbe

In [19]:
ind_map

{'111CA': 'Farms',
 '113FF': 'Forestry, fishing, and related activities',
 '211': 'Oil and gas extraction',
 '212': 'Mining, except oil and gas',
 '213': 'Support activities for mining',
 '22': 'Utilities',
 '23': 'Construction',
 '321': 'Wood products',
 '327': 'Nonmetallic mineral products',
 '331': 'Primary metals',
 '332': 'Fabricated metal products',
 '333': 'Machinery',
 '334': 'Computer and electronic products',
 '335': 'Electrical equipment, appliances, and components',
 '3361MV': 'Motor vehicles, bodies and trailers, and parts',
 '3364OT': 'Other transportation equipment',
 '337': 'Furniture and related products',
 '339': 'Miscellaneous manufacturing',
 '311FT': 'Food and beverage and tobacco products',
 '313TT': 'Textile mills and textile product mills',
 '315AL': 'Apparel and leather and allied products',
 '322': 'Paper products',
 '323': 'Printing and related support activities',
 '324': 'Petroleum and coal products',
 '325': 'Chemical products',
 '326': 'Plastics and rubbe

# Clean data

In [20]:
def clean_dom_use(in_dom_share,comm_map,ind_map):
    dom_use_df = in_dom_share.copy()
    # filter commodities
    dom_use_df = dom_use_df.loc[dom_use_df.index.get_level_values(0).isin(comm_map)]
    # filter industries
    dom_use_df = dom_use_df.loc[:,dom_use_df.columns.get_level_values(0).isin(ind_map)]
    dom_use_df.sort_index(inplace=True)  # sort indices
    dom_use_df = dom_use_df.reindex(sorted(dom_use_df.columns), axis=1) # sort columns
    return dom_use_df

In [21]:
dom_use = clean_dom_use(raw_dom_use,comm_map,ind_map)
dom_use

Unnamed: 0_level_0,Commodities/Industries,111CA,113FF,211,212,213,22,23,311FT,313TT,315AL,...,721,722,81,GFE,GFGD,GFGN,GSLE,GSLG,HS,ORE
Unnamed: 0_level_1,Commodity Description,Farms,"Forestry, fishing, and related activities",Oil and gas extraction,"Mining, except oil and gas",Support activities for mining,Utilities,Construction,Food and beverage and tobacco products,Textile mills and textile product mills,Apparel and leather and allied products,...,Accommodation,Food services and drinking places,"Other services, except government",Federal government enterprises,Federal general government (defense),Federal general government (nondefense),State and local government enterprises,State and local general government,Housing,Other real estate
111CA,Farms,0.173553,0.003526,0.000000,0.000511,0.000208,0.000000,0.001356,2.191741e-01,0.035267,0.000000,...,0.000155,0.000698,0.000014,0.000000,0.000003,0.000573,0.000000,0.000700,0.000001,0.000001
113FF,"Forestry, fishing, and related activities",0.052062,0.144284,0.000000,0.000741,0.000000,0.000000,0.000000,4.764000e-04,0.000000,0.008713,...,0.000144,0.000329,0.000011,0.000013,0.000000,0.000000,0.000000,0.001209,0.000000,0.000000
211,Oil and gas extraction,0.000000,0.000000,0.103113,0.000083,0.000009,0.044571,0.000000,6.000000e-07,0.000000,0.000000,...,0.000157,0.000077,0.000005,0.006025,0.000000,0.000000,0.044277,0.000000,0.000000,0.000000
212,"Mining, except oil and gas",0.003139,0.000005,0.000128,0.079912,0.000344,0.009573,0.010456,2.308000e-04,0.000351,0.000000,...,0.000275,0.000225,0.000764,0.000000,0.000000,0.001053,0.008645,0.000330,0.000189,0.000000
213,Support activities for mining,0.000000,0.000000,0.020004,0.018675,0.010956,0.000000,0.000000,0.000000e+00,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GSLG,State and local general government,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
HS,Housing,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
ORE,Other real estate,0.059868,0.001112,0.004134,0.000767,0.000831,0.009790,0.012780,2.730400e-03,0.008933,0.003650,...,0.007600,0.081735,0.047730,0.012226,0.000205,0.010466,0.010350,0.010790,0.007730,0.087659
Other,Noncomparable imports and rest-of-the-world adjustment,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [22]:
def clean_delta_prices(input_delta_prices_data,comm_map,ind_map):
    delta_p_df = input_delta_prices_data.copy()
    delta_p_df = delta_p_df.dropna(how='all') # drop rows where all values are na
    
    index_1_val = delta_p_df.index.get_level_values(1).str.strip()
    index_1_val = index_1_val.tolist()
    
    rev_ind_map = {}
    for k,v in ind_map.items():
        rev_ind_map[v] = k
        
    new_index = []
    
    prefix = 'Federal'
    for ind in range(len(index_1_val)):
        ind_val = index_1_val[ind]
        
        if (ind_val == 'National defense'):
            ind_val = 'Federal general government (defense)'
        elif (ind_val == 'Nondefense'):
            ind_val = 'Federal general government (nondefense)'
        elif (ind_val == 'Government enterprises'):
            ind_val = prefix + ' government enterprises'
            prefix = 'State and local'
        elif (ind_val == 'General government'):
            ind_val = prefix + ' general government'
        
        if ind_val in rev_ind_map:
            new_index.append((rev_ind_map[ind_val],ind_val))
        else:
            new_index.append(('NA',ind_val))
            
    delta_p_df = delta_p_df.set_index(pd.MultiIndex.from_tuples(new_index, names=delta_p_df.index.names))
    delta_p_df = delta_p_df.loc[delta_p_df.index.get_level_values(1).isin(ind_map.values())]

    return delta_p_df

In [23]:
delta_prices = clean_delta_prices(raw_delta_prices,comm_map,ind_map)
delta_prices

Unnamed: 0_level_0,Unnamed: 1_level_0,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
Line,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
111CA,Farms,-5.8,-5.9,0.9,5.3,-4.5,9.2,12.2,-4.5,0.4,18.5,...,4.9,1.8,-1.1,-10.9,-11.1,2.7,0.0,-0.4,-2.7,23.0
113FF,"Forestry, fishing, and related activities",-1.6,-0.8,0.1,-7.9,-1.1,2.3,5.2,3.7,3.4,6.0,...,1.4,3.3,2.9,-0.6,0.3,1.7,2.1,0.2,-0.2,5.3
211,Oil and gas extraction,-24.2,20.1,60.7,-4.1,-12.5,42.0,19.3,31.4,2.5,6.2,...,-12.4,6.7,-1.1,-42.3,-11.0,22.7,15.9,-14.9,-26.7,85.8
212,"Mining, except oil and gas",-3.4,-1.8,1.6,1.7,3.7,4.9,13.4,18.3,13.0,7.5,...,-0.3,-5.0,-1.9,-6.4,-1.4,8.3,5.1,0.2,-0.3,9.6
213,Support activities for mining,5.0,-9.0,-4.9,-2.8,0.5,2.5,4.0,8.9,6.6,3.8,...,4.3,1.8,1.6,0.1,-1.9,2.1,-2.0,1.4,-3.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GFGD,Federal general government (defense),1.5,2.6,3.9,2.5,4.3,5.9,4.0,5.2,4.0,3.4,...,1.1,0.7,1.5,0.3,0.5,1.7,3.1,1.8,1.2,3.9
GFGN,Federal general government (nondefense),1.6,3.1,3.9,2.2,4.3,4.4,4.0,4.0,3.1,3.2,...,0.7,1.7,2.3,1.1,1.1,2.4,3.5,2.2,1.1,3.2
GFE,Federal government enterprises,0.0,2.2,0.5,5.1,3.4,2.6,0.4,0.4,5.8,3.9,...,1.2,1.2,4.1,1.4,-1.8,0.8,1.5,1.3,1.1,4.9
GSLG,State and local general government,2.0,3.9,4.7,3.5,2.5,3.8,4.7,4.9,4.6,4.8,...,2.2,3.4,2.3,0.1,0.3,2.5,3.8,1.5,2.5,5.7


In [24]:
def clean_pce_bridge(in_pce,comm_map):
    pce_df = in_pce.copy()
    for comm in pce_df['Commodity Code']:
        if str(comm) not in comm_map:
            print(f'{comm} not in comm')
    pce_df.rename(
        columns = {
            'Unnamed: 4': "Producers' Value",
            'Unnamed: 5': 'Transportation Costs',
            'Unnamed: 8': "Purchasers' Value"
        },
        inplace=True
    )
    return pce_df
                

In [25]:
pce = clean_pce_bridge(raw_pce,comm_map)
pce

Unnamed: 0,NIPA Line,PCE Category,Commodity Code,Commodity Description,Producers' Value,Transportation Costs,Wholesale,Retail,Purchasers' Value,Year
0,5,New motor vehicles,3361MV,"Motor vehicles, bodies and trailers, and parts",266942,4284,5851,74165,351242,2021
1,6,Net purchases of used motor vehicles,Used,"Scrap, used and secondhand goods",156903,2514,3168,80455,243040,2021
2,7,Motor vehicles parts and accessories,327,Nonmetallic mineral products,162,29,24,126,342,2021
3,7,Motor vehicles parts and accessories,331,Primary metals,19,1,2,15,36,2021
4,7,Motor vehicles parts and accessories,332,Fabricated metal products,752,25,320,583,1680,2021
...,...,...,...,...,...,...,...,...,...,...
294,111,Final consumption expenditures of nonprofit in...,623,Nursing and residential care facilities,22282,0,0,0,22282,2021
295,111,Final consumption expenditures of nonprofit in...,624,Social assistance,94434,0,0,0,94434,2021
296,111,Final consumption expenditures of nonprofit in...,711AS,"Performing arts, spectator sports, museums, an...",17624,0,0,0,17624,2021
297,111,Final consumption expenditures of nonprofit in...,713,"Amusements, gambling, and recreation industries",7954,0,0,0,7954,2021


In [26]:
def clean_make(make_in,comm_map,ind_map):
    make_data = make_in.copy()
    # fix column names
    make_data.rename(
        columns={
            'Scrap, used and secondhand goods /1/':'Scrap, used and secondhand goods',
            'Noncomparable imports and rest-of-the-world adjustment /2/':'Noncomparable imports and rest-of-the-world adjustment'
        },
        inplace=True
    )
    # filter commodities
    make_data = make_data.loc[:,make_data.columns.get_level_values(1).isin(comm_map.values())]
    # filter industries
    make_data = make_data.loc[make_data.index.get_level_values(1).isin(ind_map.values()),:]
    
    # replace ... with 0
    for c in make_data.columns:
        make_data[c] = pd.to_numeric(make_data[c],errors='coerce')
    make_data.fillna(0,inplace=True)
    return make_data

In [27]:
make_matrix = clean_make(raw_make,comm_map,ind_map)
make_matrix

Unnamed: 0_level_0,Industries/Commodities,111CA,113FF,211,212,213,22,23,321,327,331,...,721,722,81,GFGD,GFGN,GFE,GSLG,GSLE,Used,Other
Unnamed: 0_level_1,Name,Farms,"Forestry, fishing, and related activities",Oil and gas extraction,"Mining, except oil and gas",Support activities for mining,Utilities,Construction,Wood products,Nonmetallic mineral products,Primary metals,...,Accommodation,Food services and drinking places,"Other services, except government",Federal general government (defense),Federal general government (nondefense),Federal government enterprises,State and local general government,State and local government enterprises,"Scrap, used and secondhand goods",Noncomparable imports and rest-of-the-world adjustment
111CA,Farms,460538.0,4698.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
113FF,"Forestry, fishing, and related activities",35.0,61535.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
211,Oil and gas extraction,0.0,0.0,386002.0,75.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
212,"Mining, except oil and gas",0.0,0.0,0.0,84387.0,157.0,0.0,0.0,0.0,468.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
213,Support activities for mining,0.0,0.0,0.0,73.0,91461.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GFGD,Federal general government (defense),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,713008.0,0.0,0.0,0.0,0.0,0.0,0.0
GFGN,Federal general government (nondefense),0.0,0.0,0.0,0.0,0.0,79.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,533783.0,0.0,0.0,0.0,0.0,4226.0
GFE,Federal government enterprises,0.0,0.0,0.0,0.0,0.0,14791.0,0.0,0.0,0.0,0.0,...,0.0,1415.0,0.0,0.0,0.0,70166.0,0.0,0.0,0.0,0.0
GSLG,State and local general government,168.0,1943.0,0.0,0.0,0.0,1249.0,0.0,0.0,0.0,0.0,...,891.0,0.0,0.0,0.0,0.0,0.0,2106940.0,0.0,4280.0,0.0


In [28]:
def clean_use(use_in,comm_map,ind_map):
    use_data = use_in.copy()
    
    # fix index names
    new_index = []
    for ind in use_data.index:
        if ind[1] == 'Noncomparable imports and rest-of-the-world adjustment [1]':
            new_index.append((ind[0],'Noncomparable imports and rest-of-the-world adjustment'))
        else:
            new_index.append((ind[0],ind[1]))

    use_data = use_data.set_index(pd.MultiIndex.from_tuples(new_index, names=use_data.index.names))
    
    # filter commodities
    use_data = use_data.loc[use_data.index.get_level_values(1).isin(comm_map.values()),:]
    # filter industries
    use_data = use_data.loc[:,use_data.columns.get_level_values(1).isin(ind_map.values())]
    
    # replace ... with 0
    for c in use_data.columns:
        use_data[c] = pd.to_numeric(use_data[c],errors='coerce')
    use_data.fillna(0,inplace=True)
    return use_data

In [29]:
raw_use

Unnamed: 0_level_0,Commodities/Industries,111CA,113FF,211,212,213,22,23,321,327,331,...,F07C,F07S,F07E,F07N,F10C,F10S,F10E,F10N,F10N,F10N
Unnamed: 0_level_1,Name,Farms,"Forestry, fishing, and related activities",Oil and gas extraction,"Mining, except oil and gas",Support activities for mining,Utilities,Construction,Wood products,Nonmetallic mineral products,Primary metals,...,Federal national nondefense: Consumption expenditures,Federal national nondefense: Gross investment in structures,Federal national nondefense: Gross investment in equipment,Federal national nondefense: Gross investment in intellectual property products,State and local: Consumption expenditures,State and local: Gross investment in structures,State and local: Gross investment in equipment,State and local: Gross investment in intellectual property products,Total Final Uses (GDP),Total Commodity Output
111CA,Farms,82371,319,---,45,19,---,2876,---,---,---,...,---,---,---,---,---,---,---,---,108824,460740
113FF,"Forestry, fishing, and related activities",24562,8909,---,66,---,---,---,14642,1,---,...,---,---,---,---,---,---,---,---,-8450,69838
211,Oil and gas extraction,---,---,50693,7,1,32311,---,1,0,16,...,---,---,---,---,---,---,---,---,-56285,386246
212,"Mining, except oil and gas",1604,0,58,7199,33,6298,22110,1,7286,11499,...,---,---,---,---,---,---,---,---,10096,85436
213,Support activities for mining,---,---,8699,1676,1042,---,---,---,---,---,...,---,---,---,---,---,---,---,---,80131,91619
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
V003,Total Industry Output,467317,61570,424194,87854,92587,637014,2051156,148411,143395,260697,...,533783,13216,24944,133191,2106940,347215,48729,48680,,41404764
Legend / Footnotes:,Total Industry Output,,,,,,,,,,,...,,,,,,,,,,
1. Consists of noncomparable imports in the intermediate section of the use table and noncomparable imports and the rest-of-the-world adjustment in the final use section of the use table.,Total Industry Output,,,,,,,,,,,...,,,,,,,,,,
Note. Selected data with zero values are not shown.,Total Industry Output,,,,,,,,,,,...,,,,,,,,,,


In [30]:
use_matrix = clean_use(raw_use,comm_map,ind_map)
use_matrix

Unnamed: 0_level_0,Commodities/Industries,111CA,113FF,211,212,213,22,23,321,327,331,...,711AS,713,721,722,81,GFGD,GFGN,GFE,GSLG,GSLE
Unnamed: 0_level_1,Name,Farms,"Forestry, fishing, and related activities",Oil and gas extraction,"Mining, except oil and gas",Support activities for mining,Utilities,Construction,Wood products,Nonmetallic mineral products,Primary metals,...,"Performing arts, spectator sports, museums, and related activities","Amusements, gambling, and recreation industries",Accommodation,Food services and drinking places,"Other services, except government",Federal general government (defense),Federal general government (nondefense),Federal government enterprises,State and local general government,State and local government enterprises
111CA,Farms,82371.0,319.0,0.0,45.0,19.0,0.0,2876.0,0.0,0.0,0.0,...,0.0,534.0,38.0,2802.0,17.0,5.0,444.0,0.0,1967.0,0.0
113FF,"Forestry, fishing, and related activities",24562.0,8909.0,0.0,66.0,0.0,0.0,0.0,14642.0,1.0,0.0,...,0.0,273.0,468.0,6157.0,231.0,0.0,0.0,19.0,3756.0,0.0
211,Oil and gas extraction,0.0,0.0,50693.0,7.0,1.0,32311.0,0.0,1.0,0.0,16.0,...,1.0,10.0,33.0,92.0,5.0,0.0,0.0,608.0,0.0,19082.0
212,"Mining, except oil and gas",1604.0,0.0,58.0,7199.0,33.0,6298.0,22110.0,1.0,7286.0,11499.0,...,234.0,420.0,57.0,272.0,737.0,0.0,599.0,0.0,1712.0,3362.0
213,Support activities for mining,0.0,0.0,8699.0,1676.0,1042.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GFE,Federal government enterprises,9.0,2.0,0.0,3.0,0.0,207.0,5.0,0.0,0.0,5.0,...,77.0,59.0,576.0,2909.0,1086.0,0.0,140.0,1.0,1225.0,121.0
GSLG,State and local general government,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
GSLE,State and local government enterprises,0.0,3.0,0.0,0.0,0.0,4453.0,0.0,27.0,108.0,129.0,...,40.0,246.0,482.0,1812.0,998.0,440.0,535.0,101.0,6357.0,962.0
Used,"Scrap, used and secondhand goods",-83.0,58.0,0.0,-11.0,1131.0,5643.0,2960.0,0.0,78.0,16666.0,...,-124.0,379.0,0.0,0.0,4399.0,-33.0,0.0,0.0,0.0,0.0


In [31]:
def clean_import(import_in,comm_map,ind_map):
    import_data = import_in.copy()
    
    # filter commodities
    import_data = import_data.loc[import_data.index.get_level_values(1).isin(comm_map.values()),:]
    # filter industries
    import_data = import_data.loc[:,import_data.columns.get_level_values(1).isin(ind_map.values())]
    
    # replace ... with 0
    for c in import_data.columns:
        import_data[c] = pd.to_numeric(import_data[c],errors='coerce')
    import_data.fillna(0,inplace=True)
    return import_data

In [32]:
raw_imports

Unnamed: 0_level_0,Commodity / Industry,111CA,113FF,211,212,213,22,23,321,327,331,...,F06S,F07C,F07E,F07N,F07S,F10C,F10E,F10N,F10S,T004
Unnamed: 0_level_1,Name,Farms,"Forestry, fishing, and related activities",Oil and gas extraction,"Mining, except oil and gas",Support activities for mining,Utilities,Construction,Wood products,Nonmetallic mineral products,Primary metals,...,Federal national defense: Gross investment in structures,Nondefense: Consumption expenditures,Federal nondefense: Gross investment in equipment,Federal nondefense: Gross investment in intellectual property products,Federal nondefense: Gross investment in structures,State and local government consumption expenditures,State and local: Gross investment in equipment,State and local: Gross investment in intellectual property products,State and local: Gross investment in structures,Total Final Uses (GDP)
111CA,Farms,1267,102,...,0,0,...,95,...,...,...,...,...,...,...,...,...,...,...,...,...,-20322
113FF,"Forestry, fishing, and related activities",232,26,...,1,...,...,...,146,1,...,...,...,...,...,...,...,...,...,...,...,-18803
211,Oil and gas extraction,...,...,6953,0,0,3918,...,0,0,0,...,...,...,...,...,...,...,...,...,...,-145930
212,"Mining, except oil and gas",137,0,4,179,1,200,663,0,623,1645,...,...,...,...,...,...,...,...,...,...,-6384
213,Support activities for mining,...,...,214,36,27,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,-278
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GFE,Federal government enterprises,0,0,...,0,...,1,0,...,...,0,...,...,...,...,...,...,...,...,...,...,-243
GSLG,State and local general government,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GSLE,State and local government enterprises,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Used,"Scrap, used and secondhand goods",...,...,...,...,...,29,13,...,105,6748,...,...,...,102,...,...,...,276,...,...,-11591


In [33]:
imports_matrix = clean_import(raw_imports,comm_map,ind_map)
imports_matrix

Unnamed: 0_level_0,Commodity / Industry,111CA,113FF,211,212,213,22,23,321,327,331,...,711AS,713,721,722,81,GFGD,GFGN,GFE,GSLG,GSLE
Unnamed: 0_level_1,Name,Farms,"Forestry, fishing, and related activities",Oil and gas extraction,"Mining, except oil and gas",Support activities for mining,Utilities,Construction,Wood products,Nonmetallic mineral products,Primary metals,...,"Performing arts, spectator sports, museums, and related activities","Amusements, gambling, and recreation industries",Accommodation,Food services and drinking places,"Other services, except government",Federal general government (defense),Federal general government (nondefense),Federal government enterprises,State and local general government,State and local government enterprises
111CA,Farms,1267.0,102.0,0.0,0.0,0.0,0.0,95.0,0.0,0.0,0.0,...,0.0,196.0,6.0,1979.0,4.0,3.0,135.0,0.0,204.0,0.0
113FF,"Forestry, fishing, and related activities",232.0,26.0,0.0,1.0,0.0,0.0,0.0,146.0,1.0,0.0,...,0.0,256.0,439.0,5769.0,221.0,0.0,0.0,17.0,712.0,0.0
211,Oil and gas extraction,0.0,0.0,6953.0,0.0,0.0,3918.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,70.0,0.0,2315.0
212,"Mining, except oil and gas",137.0,0.0,4.0,179.0,1.0,200.0,663.0,0.0,623.0,1645.0,...,2.0,5.0,1.0,6.0,8.0,0.0,32.0,0.0,882.0,88.0
213,Support activities for mining,0.0,0.0,214.0,36.0,27.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GFE,Federal government enterprises,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,3.0,13.0,6.0,0.0,0.0,0.0,6.0,1.0
GSLG,State and local general government,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
GSLE,State and local government enterprises,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Used,"Scrap, used and secondhand goods",0.0,0.0,0.0,0.0,0.0,29.0,13.0,0.0,105.0,6748.0,...,19.0,161.0,0.0,0.0,98.0,1.0,0.0,0.0,0.0,0.0


# Estimate Matrices

In [34]:
def calc_gross_output(raw_use_in):
    
    raw_use = raw_use_in.copy()
    raw_use = raw_use.loc[:,raw_use.columns.get_level_values(1).isin(ind_map.values())] # filter cols
    gross_output = raw_use.loc[raw_use.index.get_level_values(1) == 'Total Industry Output']
    gross_output = gross_output.iloc[0,:].T
    gross_output = gross_output.to_frame()
    gross_output.columns = ['Total Industry Output']
    return gross_output

gross_out = calc_gross_output(raw_use)
gross_out

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Industry Output
Commodities/Industries,Name,Unnamed: 2_level_1
111CA,Farms,467317
113FF,"Forestry, fishing, and related activities",61570
211,Oil and gas extraction,424194
212,"Mining, except oil and gas",87854
213,Support activities for mining,92587
...,...,...
GFGD,Federal general government (defense),713008
GFGN,Federal general government (nondefense),538088
GFE,Federal government enterprises,89241
GSLG,State and local general government,2517051


In [35]:
def calc_value_add(raw_use_in):
    
    raw_use = raw_use_in.copy()
    raw_use = raw_use.loc[:,raw_use.columns.get_level_values(1).isin(ind_map.values())] # filter cols
    raw_use = raw_use.reset_index() # reset index
    value_add = raw_use.loc[raw_use.level_1 == 'Total Value Added'] # select value add row
    value_add = value_add.iloc[:,2:].T # change from row to column
    value_add.rename(columns={77:'Value Add'},inplace=True) # rename

    return value_add
    
value_add = calc_value_add(raw_use)
value_add

Unnamed: 0_level_0,Unnamed: 1_level_0,Value Add
Commodities/Industries,Name,Unnamed: 2_level_1
111CA,Farms,165332
113FF,"Forestry, fishing, and related activities",42749
211,Oil and gas extraction,213969
212,"Mining, except oil and gas",40272
213,Support activities for mining,56388
...,...,...
GFGD,Federal general government (defense),446897
GFGN,Federal general government (nondefense),324622
GFE,Federal government enterprises,65569
GSLG,State and local general government,1664708


In [36]:
def calc_unit_val(value,output):
    full = value.join(output)
    unit_value_add = full['Value Add'] / full['Total Industry Output']
    return unit_value_add

unit_value_add = calc_unit_val(value_add,gross_out)
unit_value_add

Commodities/Industries  Name                                     
111CA                   Farms                                         0.35379
113FF                   Forestry, fishing, and related activities    0.694315
211                     Oil and gas extraction                       0.504413
212                     Mining, except oil and gas                   0.458397
213                     Support activities for mining                0.609027
                                                                       ...   
GFGD                    Federal general government (defense)         0.626777
GFGN                    Federal general government (nondefense)      0.603288
GFE                     Federal government enterprises               0.734741
GSLG                    State and local general government           0.661372
GSLE                    State and local government enterprises       0.419277
Length: 71, dtype: object

In [37]:
def calc_int_import(raw_use_in):
    
    raw_use = raw_use_in.copy()
    raw_use = raw_use.loc[:,raw_use.columns.get_level_values(1).isin(ind_map.values())] # filter cols
    raw_use = raw_use.reset_index() # reset index
    int_in = raw_use.loc[raw_use.level_1 == 'Total Intermediate'] # select value add row
    int_in = int_in.iloc[:,2:].T # change from row to column
    int_in.rename(columns={73:'Total Intermediate'},inplace=True) # rename
    
    return int_in

int_in = calc_int_import(raw_use)
int_in

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Intermediate
Commodities/Industries,Name,Unnamed: 2_level_1
111CA,Farms,301985
113FF,"Forestry, fishing, and related activities",18821
211,Oil and gas extraction,210224
212,"Mining, except oil and gas",47582
213,Support activities for mining,36199
...,...,...
GFGD,Federal general government (defense),266111
GFGN,Federal general government (nondefense),213466
GFE,Federal government enterprises,23672
GSLG,State and local general government,852343


In [38]:
def calc_unit_int_import(imports,interm_in,output):
    total_imports = imports.sum() # total imports of each industry
    
    full = pd.concat([total_imports,interm_in,output],axis=1)
    full.rename(columns={0:'Import Inputs'},inplace=True)
    
    unit_int_import = (full['Total Intermediate']-full['Import Inputs']) / full['Total Industry Output']
    return unit_int_import

unit_int_import = calc_unit_int_import(imports_matrix,int_in,gross_out)
unit_int_import

       Name                                     
111CA  Farms                                        0.597537
113FF  Forestry, fishing, and related activities    0.279877
211    Oil and gas extraction                       0.442906
212    Mining, except oil and gas                   0.489289
213    Support activities for mining                0.343644
                                                      ...   
GFGD   Federal general government (defense)         0.331103
GFGN   Federal general government (nondefense)      0.361601
GFE    Federal government enterprises               0.244641
GSLG   State and local general government           0.308577
GSLE   State and local government enterprises       0.548661
Length: 71, dtype: object

In [39]:
test = pd.concat([unit_value_add,unit_int_import],axis=1)
test[0+1].sort_values()

       Name                                                                
334    Computer and electronic products                                        0.113753
HS     Housing                                                                 0.120547
521CI  Federal Reserve banks, credit intermediation, and related activities    0.165875
5415   Computer systems design and related services                            0.180738
511    Publishing industries, except internet (includes software)              0.204117
                                                                                 ...   
111CA  Farms                                                                   0.597537
ORE    Other real estate                                                       0.628192
311FT  Food and beverage and tobacco products                                   0.63635
483    Water transportation                                                      0.6595
525    Funds, trusts, and other financial ve

In [40]:
def calc_mkt_share(make_table):
    mkt_share = make_table.copy()
    for c in mkt_share.columns:
        mkt_share[c] = mkt_share[c] / mkt_share[c].sum()
    return mkt_share
mkt_share = calc_mkt_share(make_matrix)
mkt_share

Unnamed: 0_level_0,Industries/Commodities,111CA,113FF,211,212,213,22,23,321,327,331,...,721,722,81,GFGD,GFGN,GFE,GSLG,GSLE,Used,Other
Unnamed: 0_level_1,Name,Farms,"Forestry, fishing, and related activities",Oil and gas extraction,"Mining, except oil and gas",Support activities for mining,Utilities,Construction,Wood products,Nonmetallic mineral products,Primary metals,...,Accommodation,Food services and drinking places,"Other services, except government",Federal general government (defense),Federal general government (nondefense),Federal government enterprises,State and local general government,State and local government enterprises,"Scrap, used and secondhand goods",Noncomparable imports and rest-of-the-world adjustment
111CA,Farms,0.999559,0.067270,0.000000,0.000000,0.000000,0.000000,0.0,0.000067,0.00000,0.0,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0
113FF,"Forestry, fishing, and related activities",0.000076,0.881111,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00000,0.0,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0
211,Oil and gas extraction,0.000000,0.000000,0.999368,0.000878,0.000000,0.000000,0.0,0.000000,0.00000,0.0,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0
212,"Mining, except oil and gas",0.000000,0.000000,0.000000,0.987733,0.001714,0.000000,0.0,0.000000,0.00327,0.0,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0
213,Support activities for mining,0.000000,0.000000,0.000000,0.000854,0.998275,0.000000,0.0,0.000000,0.00000,0.0,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GFGD,Federal general government (defense),0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00000,0.0,...,0.000000,0.000000,0.000000,1.0,0.0,0.0,0.0,0.000000,0.000000,0.0
GFGN,Federal general government (nondefense),0.000000,0.000000,0.000000,0.000000,0.000000,0.000098,0.0,0.000000,0.00000,0.0,...,0.000000,0.000000,0.000000,0.0,1.0,0.0,0.0,0.000000,0.000000,1.0
GFE,Federal government enterprises,0.000000,0.000000,0.000000,0.000000,0.000000,0.018339,0.0,0.000000,0.00000,0.0,...,0.000000,0.001197,0.000000,0.0,0.0,1.0,0.0,0.000000,0.000000,0.0
GSLG,State and local general government,0.000365,0.027822,0.000000,0.000000,0.000000,0.001549,0.0,0.000000,0.00000,0.0,...,0.004334,0.000000,0.000000,0.0,0.0,0.0,1.0,0.000000,0.478319,0.0


In [41]:
def calc_cpi_share(pce_data):
    pce_group = pce_data.groupby(['Commodity Code']).sum()["Purchasers' Value"]
    cpi_pct = pce_group / pce_group.sum()
    return cpi_pct

In [42]:
cpi_share = calc_cpi_share(pce)

In [43]:
cpi_share

Commodity Code
22        0.019287
61        0.023655
81        0.039691
212       0.000004
321       0.001032
322       0.003339
323       0.000508
324       0.023197
325       0.048465
326       0.005589
327       0.001814
331       0.000265
332       0.004213
333       0.002511
334       0.015115
335       0.007508
337       0.010709
339       0.024424
481       0.007553
482       0.000049
483       0.000701
484       0.000984
485       0.002101
493       0.000007
511       0.012282
512       0.001578
513       0.022400
514       0.008182
523       0.019550
524       0.027526
525       0.012019
561       0.004081
562       0.002024
621       0.075885
622       0.078544
623       0.016425
624       0.015854
713       0.012097
721       0.010133
722       0.056814
5411      0.007678
111CA     0.012179
113FF     0.000803
311FT     0.078799
313TT     0.006711
315AL     0.029715
3361MV    0.027284
3364OT    0.003136
487OS     0.002067
4A0       0.000646
521CI     0.021071
532RL     0.0082

In [44]:
def mat_mul_check(a,b):
    # matrix multiplication a times b, checking for consistent inner dimensions
    # check columns of a are same as rows of b
    diff = a.columns.difference(b.index) # multiindex of difference
    if len(diff) != 0:
        raise ValueError(f'columns of a do not match index of b')
    return a @ b

In [45]:
def calc_dom_dir_reqs(d,b):
    # create matrix A
    # d is domestic direct requirements matrix (industries x commodities)
    # b is domestic comm use (commodities x industries)
    A = mat_mul_check(d,b)
    return A

In [46]:
A = calc_dom_dir_reqs(mkt_share,dom_use)
A

Unnamed: 0_level_0,Commodities/Industries,111CA,113FF,211,212,213,22,23,311FT,313TT,315AL,...,721,722,81,GFE,GFGD,GFGN,GSLE,GSLG,HS,ORE
Unnamed: 0_level_1,Commodity Description,Farms,"Forestry, fishing, and related activities",Oil and gas extraction,"Mining, except oil and gas",Support activities for mining,Utilities,Construction,Food and beverage and tobacco products,Textile mills and textile product mills,Apparel and leather and allied products,...,Accommodation,Food services and drinking places,"Other services, except government",Federal government enterprises,Federal general government (defense),Federal general government (nondefense),State and local government enterprises,State and local general government,Housing,Other real estate
111CA,Farms,0.176979,1.323032e-02,0.000000,0.000561,2.098259e-04,0.000001,1.357514e-03,2.191101e-01,0.035252,5.862700e-04,...,0.000178,0.000724,1.813393e-05,9.540617e-07,6.219138e-06,5.732518e-04,8.243671e-06,0.000786,1.221939e-06,7.540705e-06
113FF,"Forestry, fishing, and related activities",0.045886,1.271301e-01,0.000000,0.000653,1.581583e-08,0.000000,1.029776e-07,4.364105e-04,0.000003,7.677028e-03,...,0.000127,0.000290,9.781406e-06,1.163066e-05,2.278938e-10,4.355809e-08,0.000000e+00,0.001066,8.356105e-11,9.115750e-11
211,Oil and gas extraction,0.000676,2.743826e-04,0.103978,0.002011,1.060223e-03,0.046347,1.531888e-03,1.688409e-04,0.000136,7.960752e-05,...,0.000404,0.000256,1.952445e-04,7.099882e-03,8.242493e-04,7.994798e-05,4.599178e-02,0.002154,6.331398e-06,2.511999e-04
212,"Mining, except oil and gas",0.003259,6.472099e-05,0.000234,0.079059,4.063406e-04,0.009478,1.047279e-02,2.615743e-04,0.000835,4.524871e-05,...,0.000280,0.000231,7.726015e-04,6.543860e-06,9.399648e-06,1.052374e-03,8.584517e-03,0.000374,1.891729e-04,2.023141e-05
213,Support activities for mining,0.000011,3.852512e-06,0.020011,0.018739,1.095239e-02,0.000011,1.330821e-04,3.846152e-05,0.000039,5.159273e-05,...,0.000011,0.000009,6.784940e-06,1.547404e-05,1.556525e-05,5.875887e-06,3.717731e-05,0.000004,5.270758e-07,1.167456e-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GFGD,Federal general government (defense),0.000000,0.000000e+00,0.000000,0.000000,0.000000e+00,0.000000,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,...,0.000000,0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000,0.000000e+00,0.000000e+00
GFGN,Federal general government (nondefense),0.000001,1.756964e-07,0.000002,0.000003,9.354410e-08,0.000007,3.634996e-07,7.301925e-07,0.000002,5.144730e-07,...,0.000003,0.000003,3.935219e-07,9.644837e-07,2.358781e-07,2.342912e-07,7.786689e-07,0.000001,8.325915e-10,8.853738e-06
GFE,Federal government enterprises,0.000254,7.445580e-05,0.000375,0.000640,4.117068e-05,0.001555,7.809526e-05,1.427090e-04,0.000374,1.182883e-04,...,0.003379,0.003010,1.245959e-03,2.139701e-04,4.497154e-05,6.017760e-04,5.134697e-04,0.000719,1.805503e-05,1.986772e-03
GSLG,State and local general government,0.001508,4.501413e-03,0.000476,0.000306,6.074891e-03,0.004653,9.307446e-04,2.492757e-04,0.000197,4.208134e-04,...,0.000683,0.000484,4.248992e-03,1.362700e-03,1.000431e-03,2.749660e-04,1.254268e-03,0.003173,1.919567e-05,1.601403e-03


# Model

In [65]:
def stddev_shock(delta_prices):
    return (delta_prices/100).std(axis=1)
shock_levels = stddev_shock(delta_prices)
shock_levels

Line                                            
111CA  Farms                                        0.099283
113FF  Forestry, fishing, and related activities    0.034329
211    Oil and gas extraction                       0.307962
212    Mining, except oil and gas                   0.065472
213    Support activities for mining                0.041765
                                                      ...   
GFGD   Federal general government (defense)         0.016632
GFGN   Federal general government (nondefense)      0.011271
GFE    Federal government enterprises               0.019495
GSLG   State and local general government           0.016315
GSLE   State and local government enterprises       0.027435
Length: 71, dtype: float64

In [63]:
def shock_dict_to_df(shocks_cfg,ind_map):
    rev_ind_map = {v:k for k,v in ind_map.items()}
    
    indices = []
    codes = []
    industries = []
    shocks = []
    shocks_convert = {}
    for ind, shock_strength in shocks_cfg.items():
        indices.append((rev_ind_map[ind],ind))
        codes.append(rev_ind_map[ind])
        industries.append(ind)
        shocks.append(shock_strength)

    shock_df = pd.DataFrame(data={'code':codes,'industry':industries,'shock':shocks})
    shock_df.set_index(['code','industry'],inplace=True)

    return shock_df

In [66]:
shock_dict_to_df({'Farms':0.99283,'Oil and gas extraction':0.307962},ind_map)

Unnamed: 0_level_0,Unnamed: 1_level_0,shock
code,industry,Unnamed: 2_level_1
111CA,Farms,0.99283
211,Oil and gas extraction,0.307962


In [93]:
def shock_sim(A, unit_v, unit_m, exo_shocks, ind_map):
    # A: indutsry-by-industry direct requirements table
    # unit_v: industry-level value add
    # unit_m: industry-level intermediate inputs
    # endo_shocks (dict): dict of key is industry and value is price shock, industries changed by effect of other vars
    
    # check indices
    diff1 = A.columns.difference(A.index)
    if len(diff1) != 0:
        raise ValueError('Input A index and column are not aligned.')
    
    
    all_ind = A.index.get_level_values(1)
    exo_ind = exo_shocks.keys()
    endo_ind = [ind for ind in all_ind if ind not in exo_ind]
    
    
    A_ee = A.loc[A.index.get_level_values(1).isin(endo_ind),A.columns.get_level_values(1).isin(endo_ind)] # e by e
    A_xe = A.loc[A.index.get_level_values(1).isin(exo_ind),A.columns.get_level_values(1).isin(endo_ind)]# x by e
    
    exo_shocks = shock_dict_to_df(exo_shocks,ind_map)
    diff2 = A_xe.index.difference(exo_shocks.index)
    if len(diff2) != 0:
        raise ValueError('Matrix A_xe index and exogenous are not aligned.')
    
#     first_elem = (I - A_ee.T)
    first_elem = np.identity(A_ee.shape[0]) - A_ee.T
#     first_elem = inverse(first_elem)
    first_elem = pd.DataFrame(np.linalg.inv(first_elem),index=first_elem.index,columns=first_elem.columns)
#     second_elem = A_xe.T
    second_elem = A_xe.T
#     third_elem = exo_shocks
    third_elem = exo_shocks
#     endo_shocks = first_elem matmul second_elem matmul third_elem
#     print(second_elem.columns, third_elem.index)
    diff3 = second_elem.columns.difference(third_elem.index)
#     print(diff3)
    if len(diff3) != 0:
        raise ValueError('Matrix A_xe.T column and exogenous shocks are not aligned')
    endo_shocks = mat_mul_check(first_elem,second_elem)
#     print(endo_shocks)
    endo_shocks = mat_mul_check(endo_shocks,third_elem)
    
    # check if endo shocks matches second term's dimension
   
    # combine endo shocks and exo shocks and label which are endo/exo
    return A_ee, A_xe, endo_shocks

In [94]:
_temp1, _temp2, _endo_shocks = shock_sim(A, unit_value_add, unit_int_import, {'Farms':0.99283,'Oil and gas extraction':0.307962}, ind_map)

In [95]:
_endo_shocks

Unnamed: 0_level_0,Unnamed: 1_level_0,shock
Commodities/Industries,Commodity Description,Unnamed: 2_level_1
113FF,"Forestry, fishing, and related activities",0.017446
212,"Mining, except oil and gas",0.007265
213,Support activities for mining,0.003473
22,Utilities,0.019635
23,Construction,0.007500
...,...,...
GFGN,Federal general government (nondefense),0.001136
GSLE,State and local government enterprises,0.005977
GSLG,State and local general government,0.012033
HS,Housing,0.001874


In [None]:
_first_elem = np.identity(_temp1.shape[0]) - _temp1.T
_first_elem

In [None]:
_first_elem = np.linalg.inv(_first_elem)
_first_elem

In [None]:
pd.DataFrame(_first_elem,index=_temp1.index,columns=_temp1.columns)