# Computing Capital and Energy Shares

This notebook extracts sector-level factor shares from KLEMS. 

In [1]:
import pandas as pd

In [3]:
shares = pd.read_excel("../../data/raw/major-industry-total-factor-productivity-klems.xlsx", sheet_name="MachineReadable")

In [4]:
# Extract the needed measures
shares_2021 = shares[(shares['Measure'].isin(['Energy share', 'Capital share', \
                                              'Sectoral output'])) \
                     & (shares['Year'] == 2021)]

In [5]:
short_names = ['mining', 'trans', 'const', 'dur', 'nondur',
                'trade', 'info', 'fin','profserv', 'edhealth',
                'accom', 'other', 'gov']
sector_names = ['Mining', 'Transportation and Utilities', 'Construction',
                'Durable goods', 'Nondurable good', 'Wholesale and Retail trade',
                'Information', 'Financial Activities', 'Professional and business services',
                'Education and Health Services', 'Leisure  and Hospitality',
                'Other services, except government','Government']
sector_codes = ['21', ['22', '48TW'] ,'23', '33DG', ['31ND','11'], ['42', '44RT'],
                       '51', ['52', '53'], ['54','55','56'],['61','62'], ['71','72'],
                       '81','G']

In [6]:
# Create sector short name mappings 
sectors = pd.DataFrame({"short_names": short_names, "NAICS": sector_codes})
sectors = sectors.explode(column='NAICS')
replace_dict = {'48-49': '48TW', '44,45':'44RT', 'DM': '33DG', 'ND': '31ND'}

In [7]:
# Merge KLEMS sectors with our available sector definitions
shares_2021 = shares_2021.replace({'NAICS': replace_dict})
shares_2021 = pd.merge(shares_2021, sectors,
                       on='NAICS', how='inner')\
[['NAICS', 'short_names', 'Measure', 'Value']]
shares_2021 = shares_2021.pivot(index=['NAICS', 'short_names'],
                                columns='Measure', values='Value').reset_index()

In [8]:
# Compute output share to weight the factor shares by
shares_2021.loc[:, 'Output share'] = shares_2021['Sectoral output'] \
/ shares_2021.groupby('short_names')['Sectoral output'].transform('sum')

In [9]:
# Compute output-weighted factor shares
shares_2021.loc[:, 'Capital share'] = shares_2021['Capital share'] * shares_2021['Output share']
shares_2021.loc[:, 'Energy share'] = shares_2021['Energy share'] * shares_2021['Output share']
shares_2021 = shares_2021.groupby(['short_names'])\
.aggregate({'Capital share': 'sum', 'Energy share': 'sum'}).reset_index()

In [10]:
# Add in government
shares_2021.loc[len(shares_2021.index)] = ['gov', 0, 0] 
shares_2021.sort_values(by='short_names', inplace=True)
shares_2021

Measure,short_names,Capital share,Energy share
0,accom,0.221841,0.023295
1,const,0.097,0.024
2,dur,0.271,0.008
3,edhealth,0.126015,0.011191
4,fin,0.33083,0.035048
12,gov,0.0,0.0
5,info,0.377,0.004
6,mining,0.524,0.029
7,nondur,0.280069,0.022101
8,other,0.07,0.01


In [11]:
shares_2021.to_csv("../../data/clean/energy_capital_shares.csv", index=False)