In [1]:
import pandas as pd
import numpy as np

# Wage Schedule Estimation
In this notebook, we seek to to inform our wage schedule assumption with data on wages, prices, labor inputs, and productivity, all at the sector level from the BLS. We do not seek to estimate any causal effects, only to get a sense for a realistic assumption about how wages adjust in response to productivity shocks. We view this as key because how wages adjusts informs whether labor markets amplify or dampen the effects of shocks in our model and the scale of any potential amplification or dampening. 

Setting up sector names and corresponding NAICs 2-digit codes.

In [2]:
short_names = ['mining', 'trans', 'const', 'dur', 'nondur',
                'trade', 'info', 'fin','profserv', 'edhealth',
                'accom', 'other', 'gov']
sector_names = ['Mining', 'Transportation and Utilities', 'Construction',
                'Durable goods', 'Nondurable good', 'Wholesale and Retail trade',
                'Information', 'Financial Activities', 'Professional and business services',
                'Education and Health Services', 'Leisure  and Hospitality',
                'Other services, except government','Government']
sector_codes = ['21', ['22', '48TW'] ,'23', '33DG', ['31ND','11'], ['42', '44RT'],
                    '51', ['52', '53'], ['54','55','56'],['61','62'], ['71','72'],
                    '81','G']
dfCodes = pd.DataFrame({'short_names':short_names,'NAICS Code':sector_codes})
dfCodes = dfCodes.explode('NAICS Code')
dfCodes.head()

Unnamed: 0,short_names,NAICS Code
0,mining,21
1,trans,22
1,trans,48TW
2,const,23
3,dur,33DG


Reading in Data.

In [3]:
data_path = '../data/'
dfRaw = pd.read_csv(data_path + 'update_raw/labor_costs_productivity_table.csv')
dfRaw = dfRaw.merge(dfCodes, on='NAICS Code')
dfRaw.tail()

Unnamed: 0,NAICS Code,Variable,Series ID,1987,1988,1989,1990,1991,1992,1993,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,short_names
115,81,Labor input,MPU0081052,73.715,79.846,85.355,89.15,88.077,87.511,90.461,...,102.52,103.128,103.723,104.493,107.729,111.704,111.418,96.998,104.899,other
116,81,Labor productivity,MPU0081062,78.133,77.855,77.24,76.408,74.888,82.163,81.373,...,96.574,100.091,101.298,103.915,102.799,100.779,102.633,107.996,101.259,other
117,81,"Labor costs, billions of current dollars",MPU0081631,78.028,86.054,93.066,100.039,100.374,107.189,116.915,...,224.514,237.15,246.127,252.346,259.031,270.739,279.97,266.914,294.325,other
118,81,Real sectoral output,MPU0081512,61.647,66.411,70.02,71.928,69.188,74.818,76.066,...,98.717,102.798,104.314,107.613,109.099,110.643,111.826,100.246,102.803,other
119,81,"Sectoral output, billions of current dollars",MPU0081611,147.223,163.93,178.391,189.488,187.617,207.363,216.045,...,466.839,496.966,513.504,539.334,558.562,581.908,603.609,575.856,615.747,other


Aggregating to 13 sector level for consistency with other variables in the paper. 

In [4]:
dfTFP = dfRaw[dfRaw.Variable=='Total factor productivity']
dfLab_input = dfRaw[dfRaw.Variable =='Labor input']
dfLab_prod = dfRaw[dfRaw.Variable =='Labor productivity']
dfLab_cost = dfRaw[dfRaw.Variable =='Labor costs, billions of current dollars']
dfOutput_real = dfRaw[dfRaw.Variable =='Real sectoral output']
dfOutput_nom = dfRaw[dfRaw.Variable =='Sectoral output, billions of current dollars']
dfOutput_real.head()

Unnamed: 0,NAICS Code,Variable,Series ID,1987,1988,1989,1990,1991,1992,1993,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,short_names
4,11,Real sectoral output,MPU0011512,76.163,73.078,76.745,79.74,80.459,84.714,82.603,...,109.037,108.007,109.015,115.147,114.651,112.769,110.218,118.657,112.174,nondur
10,21,Real sectoral output,MPU0021512,72.773,75.318,74.486,76.542,75.73,73.857,72.986,...,109.989,124.9,116.628,99.408,109.463,124.484,134.85,112.35,112.498,mining
16,22,Real sectoral output,MPU0022512,68.403,71.962,74.191,74.574,76.393,76.852,83.768,...,101.045,102.987,103.494,103.757,102.734,108.332,107.118,104.131,105.783,trans
22,23,Real sectoral output,MPU0023512,108.283,107.317,105.985,103.783,94.897,98.203,100.582,...,104.683,110.09,119.119,126.181,128.255,129.132,130.438,132.777,134.056,const
28,31ND,Real sectoral output,MPU9910512,83.54,85.755,86.938,87.739,87.769,90.286,92.344,...,101.761,102.122,102.595,103.824,102.925,104.168,103.33,98.155,102.621,nondur


Start with the nominal and real output series, need to aggregate to our 13-sector level. First, want to generate growth rates. Then get shares by sector.

In [5]:
dfOutput_real_growth = pd.DataFrame(data=np.log(np.array(dfOutput_real.iloc[:,4:-1]))
                                    -np.log(np.array(dfOutput_real.iloc[:,3:-2])),
                                    index=dfOutput_real['NAICS Code'],
                                    columns=range(1988,2022))
dfOutput_real_growth = dfOutput_real_growth.merge(dfCodes,on='NAICS Code')
dfOutput_real_growth.head()

Unnamed: 0,NAICS Code,1988,1989,1990,1991,1992,1993,1994,1995,1996,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,short_names
0,11,-0.041348,0.048961,0.038283,0.008976,0.051533,-0.025235,0.08257,-0.035501,0.01513,...,0.086517,-0.009491,0.009289,0.054724,-0.004317,-0.016551,-0.022881,0.073777,-0.056186,nondur
1,21,0.034374,-0.011108,0.027228,-0.010665,-0.025044,-0.011863,0.01896,-0.010161,0.01345,...,0.09521,0.127133,-0.068524,-0.159757,0.096354,0.128591,0.079986,-0.182544,0.001316,mining
2,22,0.050722,0.030505,0.005149,0.024099,0.00599,0.08617,0.061113,0.070736,0.068927,...,0.010396,0.019037,0.004911,0.002538,-0.009909,0.053057,-0.01127,-0.028281,0.01574,trans
3,23,-0.008961,-0.012489,-0.020995,-0.08951,0.034245,0.023937,0.038032,0.004869,0.062416,...,0.045767,0.050361,0.078825,0.057594,0.016303,0.006815,0.010063,0.017773,0.009587,const
4,31ND,0.026169,0.013701,0.009171,0.000342,0.028274,0.022538,0.027166,0.009962,0.002834,...,0.017457,0.003541,0.004621,0.011908,-0.008697,0.012004,-0.008077,-0.05138,0.044495,nondur


In [6]:
dfOutput_nom_growth = pd.DataFrame(data=np.log(np.array(dfOutput_nom.iloc[:,4:-1]))
                                    -np.log(np.array(dfOutput_nom.iloc[:,3:-2])),
                                    index=dfOutput_nom['NAICS Code'],
                                    columns=range(1988,2022))
dfOutput_nom_growth = dfOutput_nom_growth.merge(dfCodes,on='NAICS Code')
dfOutput_nom_growth.head()

Unnamed: 0,NAICS Code,1988,1989,1990,1991,1992,1993,1994,1995,1996,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,short_names
0,11,0.033302,0.080526,0.035192,-0.030067,0.05645,0.023128,0.060958,-0.022825,0.092227,...,0.104788,-0.019132,-0.089829,-0.043162,0.017315,-0.009713,-0.006455,0.102067,0.084893,nondur
1,21,-0.019706,0.065415,0.110468,-0.089365,-0.028477,-0.007746,-0.022215,-0.012889,0.163135,...,0.1438,0.116636,-0.444565,-0.236575,0.238799,0.246826,0.003363,-0.400734,0.412512,mining
2,22,0.044272,0.048961,0.023236,0.034279,0.021998,0.11369,0.061203,0.044328,0.104306,...,0.050592,0.074618,-0.043488,-0.024461,0.032212,0.050074,-0.023584,-0.045115,0.133959,trans
3,23,0.030301,0.028697,0.012141,-0.077146,0.047869,0.056871,0.07451,0.043189,0.082739,...,0.072714,0.085364,0.096388,0.075768,0.049031,0.048292,0.04438,0.077145,0.07171,const
4,31ND,0.075104,0.068153,0.050114,-0.004466,0.027336,0.022097,0.038684,0.063726,0.027863,...,0.014479,-0.015418,-0.115038,-0.026035,0.046611,0.06946,-0.02476,-0.107822,0.200489,nondur


In [7]:
dfOutput_price_growth = pd.DataFrame(data=np.array(dfOutput_nom_growth.iloc[:,1:-1]) 
                                   - np.array(dfOutput_real_growth.iloc[:,1:-1]),
                                   index = dfOutput_nom_growth['NAICS Code'],
                                   columns = range(1988,2022)) 
dfOutput_price_growth = dfOutput_price_growth.merge(dfCodes,on='NAICS Code')
dfOutput_price_growth.head()


Unnamed: 0,NAICS Code,1988,1989,1990,1991,1992,1993,1994,1995,1996,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,short_names
0,11,0.07465,0.031565,-0.003091,-0.039043,0.004917,0.048363,-0.021612,0.012676,0.077097,...,0.018271,-0.009641,-0.099118,-0.097886,0.021632,0.006838,0.016426,0.02829,0.141079,nondur
1,21,-0.05408,0.076523,0.08324,-0.0787,-0.003434,0.004117,-0.041175,-0.002728,0.149685,...,0.04859,-0.010498,-0.376041,-0.076818,0.142445,0.118235,-0.076623,-0.21819,0.411196,mining
2,22,-0.006449,0.018456,0.018087,0.01018,0.016007,0.02752,9e-05,-0.026408,0.035379,...,0.040197,0.055581,-0.048399,-0.026999,0.04212,-0.002983,-0.012315,-0.016834,0.118219,trans
3,23,0.039263,0.041186,0.033137,0.012364,0.013624,0.032934,0.036478,0.038319,0.020323,...,0.026947,0.035003,0.017563,0.018173,0.032728,0.041478,0.034317,0.059372,0.062123,const
4,31ND,0.048935,0.054452,0.040942,-0.004808,-0.000938,-0.000442,0.011518,0.053764,0.025029,...,-0.002978,-0.018959,-0.119659,-0.037943,0.055307,0.057456,-0.016683,-0.056442,0.155994,nondur


In [8]:
dfTFP_growth = pd.DataFrame(data=np.log(np.array(dfTFP.iloc[:,4:-1]))
                                    -np.log(np.array(dfTFP.iloc[:,3:-2])),
                                   index = dfOutput_nom_growth['NAICS Code'],
                                   columns = range(1988,2022)) 
dfTFP_growth = dfTFP_growth.merge(dfCodes,on='NAICS Code')
dfTFP_growth.head()

Unnamed: 0,NAICS Code,1988,1989,1990,1991,1992,1993,1994,1995,1996,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,short_names
0,11,-0.079275,0.054746,0.051264,0.007549,0.065757,-0.056085,0.059967,-0.089345,0.068766,...,0.075562,-0.007688,0.025626,0.031237,-0.012578,0.009935,-0.03207,0.015633,-0.055115,nondur
1,21,0.068965,-0.017436,-0.009013,0.012667,0.002968,0.015673,0.051211,0.032483,-0.041456,...,0.032496,0.054177,0.052236,0.005451,0.04065,0.000781,0.054879,0.032894,-0.038736,mining
2,22,-0.031504,0.059537,0.043577,-0.020595,-0.00355,0.007041,0.048505,0.069305,0.020136,...,-0.02911,-0.039651,-0.017582,0.024712,0.009305,0.010054,0.005991,0.013666,-0.015067,trans
3,23,0.00098,-0.009399,-0.005343,-0.005228,0.018148,-0.013446,-0.001724,-0.010398,-0.004479,...,-0.007035,-0.015139,0.002379,-0.00049,0.00512,-0.012669,-0.008073,-4.1e-05,-0.007492,const
4,31ND,0.006195,-0.013575,-0.000322,-6e-05,0.017822,0.011515,0.006257,-0.009607,-0.015103,...,0.001828,0.02602,-0.009842,-0.029442,0.010665,0.008361,-0.007831,-0.005074,0.045429,nondur


In [9]:
dfOutput_shares = dfOutput_nom.merge(dfOutput_nom.groupby('short_names').sum(), on='short_names', suffixes=['','_share'])
dfOutput_shares.iloc[:,39:] = np.array(dfOutput_shares.iloc[:,3:38])/np.array(dfOutput_shares.iloc[:,39:])
dfOutput_shares.head()

Unnamed: 0,NAICS Code,Variable,Series ID,1987,1988,1989,1990,1991,1992,1993,...,2012_share,2013_share,2014_share,2015_share,2016_share,2017_share,2018_share,2019_share,2020_share,2021_share
0,11,"Sectoral output, billions of current dollars",MPU0011611,149.014,154.06,166.979,172.96,167.837,177.584,181.739,...,0.128586,0.139049,0.138605,0.141642,0.139573,0.136091,0.127048,0.129092,0.15458,0.140069
1,31ND,"Sectoral output, billions of current dollars",MPU9910611,872.22,940.25,1006.565,1058.293,1053.577,1082.775,1106.967,...,0.871414,0.860951,0.861395,0.858358,0.860427,0.863909,0.872952,0.870908,0.84542,0.859931
2,21,"Sectoral output, billions of current dollars",MPU0021611,109.877,107.733,115.016,128.45,117.469,114.171,113.29,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,22,"Sectoral output, billions of current dollars",MPU0022611,217.709,227.564,238.983,244.601,253.131,258.761,289.917,...,0.368682,0.372195,0.375098,0.364023,0.357232,0.349638,0.34086,0.328825,0.340301,0.3273
4,48TW,"Sectoral output, billions of current dollars",MPU0048611,235.557,259.665,275.076,295.269,297.39,314.051,331.416,...,0.631318,0.627805,0.624902,0.635977,0.642768,0.650362,0.65914,0.671175,0.659699,0.6727


Now using shares to aggregate production side terms (TFP, prices).

In [10]:
dfOutput_price_growth.iloc[:,1:35] = np.array(dfOutput_price_growth.iloc[:,1:35]) * np.array(dfOutput_shares.iloc[:,39:73])
dfTFP_growth.iloc[:,1:35] = np.array(dfTFP_growth.iloc[:,1:35]) * np.array(dfOutput_shares.iloc[:,39:73])
dfOutput_price_growth = dfOutput_price_growth.groupby('short_names').sum()
dfTFP_growth = dfTFP_growth.groupby('short_names').sum()

dfOutput_price_growth.head()

Unnamed: 0_level_0,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
short_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
accom,0.038962,0.042868,0.042304,0.034486,0.01836,0.022179,0.017838,0.023595,0.025979,0.028338,...,0.026161,0.01794,0.025257,0.024554,0.023865,0.018299,0.030856,0.027036,0.049858,0.07926
const,0.018858,0.019236,0.015405,0.005602,0.006264,0.014878,0.017021,0.017895,0.009461,0.015299,...,0.010365,0.009935,0.013028,0.006588,0.006615,0.011692,0.014502,0.011697,0.019523,0.021141
dur,0.031712,0.03046,0.009791,0.010896,0.008245,0.010328,0.013634,0.003174,-0.020991,-0.015162,...,0.009783,0.004308,0.011748,3e-05,-0.001435,0.018478,0.028091,0.01155,0.020276,0.07956
edhealth,0.058822,0.055839,0.055357,0.049027,0.041461,0.033735,0.035693,0.033016,0.019861,0.022964,...,0.012946,0.013808,0.015551,0.006737,0.010593,0.016004,0.017324,0.016652,0.060978,0.019538
fin,0.047316,0.041361,0.044126,0.036065,0.038458,0.024272,0.020012,0.03573,0.029012,0.028837,...,0.024482,0.027596,0.034308,0.019893,0.026395,0.033686,0.042971,0.024198,0.016251,0.037063


For the labor inputs, we perform a similar aggregation use nominal labor cost shares as weights. 

In [11]:
dfLab_cost.iloc[11,8:9] = np.abs(dfLab_cost.iloc[11,8:9])

In [12]:
dfLab_input_growth = pd.DataFrame(data=np.log(np.array(dfLab_input.iloc[:,4:-1]))
                                    -np.log(np.array(dfLab_input.iloc[:,3:-2])),
                                   index = dfOutput_nom_growth['NAICS Code'],
                                   columns = range(1988,2022)) 
dfLab_input_growth = dfLab_input_growth.merge(dfCodes,on='NAICS Code')
dfLab_input_growth.head()

Unnamed: 0,NAICS Code,1988,1989,1990,1991,1992,1993,1994,1995,1996,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,short_names
0,11,0.002507,0.00654,-0.052789,0.001032,-0.006459,-0.021434,0.13654,0.03614,-0.030017,...,-0.042052,0.055047,0.076516,0.01989,-0.011993,-0.008372,0.012807,-0.004354,-0.013976,nondur
1,21,-0.002545,0.010005,0.026976,-0.028784,-0.083932,-0.022208,0.017494,-0.052186,0.005093,...,0.010416,0.078465,-0.110574,-0.21044,0.001336,0.095585,0.01608,-0.24122,-0.00838,mining
2,22,-0.003281,0.036042,0.018105,-0.004648,-0.00283,0.004051,-0.021237,-0.027362,-0.034591,...,0.02089,0.015953,0.01269,0.003268,-0.005727,-0.002311,-0.014593,-0.012566,-0.001229,trans
3,23,0.041708,0.024792,0.000659,-0.080586,-0.02125,0.054577,0.053219,0.022504,0.049629,...,0.040576,0.052969,0.038956,0.036651,0.025658,0.053307,0.027551,-0.058912,0.037049,const
4,31ND,0.017332,0.015902,-0.010552,-0.009689,0.016733,0.021856,0.018538,0.001953,-0.006615,...,0.011454,0.007721,0.021074,0.011926,0.021066,0.016965,0.001716,-0.019267,0.026194,nondur


In [13]:
dfLab_cost_growth = pd.DataFrame(data=np.log(np.array(dfLab_cost.iloc[:,4:-1]))
                                    -np.log(np.array(dfLab_cost.iloc[:,3:-2])),
                                   index = dfOutput_nom_growth['NAICS Code'],
                                   columns = range(1988,2022))
dfLab_cost_growth = dfLab_cost_growth.merge(dfCodes,on='NAICS Code')
dfLab_cost_growth.head()

Unnamed: 0,NAICS Code,1988,1989,1990,1991,1992,1993,1994,1995,1996,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,short_names
0,11,0.098926,0.02772,0.179645,-0.116594,0.040254,0.065467,0.096247,-0.098708,0.097472,...,0.049717,-0.029681,-0.021467,0.027467,0.032477,-0.024267,0.010431,0.10267,0.011591,nondur
1,21,-0.000923,0.006738,0.063703,-0.004034,-0.018186,0.001193,-0.015914,-0.006495,0.053741,...,0.023221,0.086319,-0.107734,-0.217029,0.027909,0.108224,0.016098,-0.209249,-0.043993,mining
2,22,0.009901,0.04705,0.059171,0.062862,0.060538,0.015714,0.006316,-0.013301,0.012334,...,0.042353,0.032578,0.031937,0.047558,0.002074,0.035097,0.040146,0.055775,0.003178,trans
3,23,0.080373,0.054622,0.027689,-0.065012,0.016985,0.055655,0.091123,0.048968,0.075308,...,0.043734,0.07668,0.070171,0.052165,0.064999,0.054496,0.0651,-0.010214,0.05418,const
4,31ND,0.061043,0.047525,0.038994,0.030116,0.053377,0.021555,0.022902,0.014419,0.012587,...,0.019848,0.04677,0.028131,0.009783,0.034952,0.02682,0.028873,0.023711,0.039001,nondur


In [14]:
dfWage_growth = pd.DataFrame(data=np.array(dfLab_cost_growth.iloc[:,1:-1]) 
                                   - np.array(dfLab_input_growth.iloc[:,1:-1]),
                                   index = dfLab_cost_growth['NAICS Code'],
                                   columns = range(1988,2022)) 
dfWage_growth = dfWage_growth.merge(dfCodes,on='NAICS Code')
dfWage_growth.head()

Unnamed: 0,NAICS Code,1988,1989,1990,1991,1992,1993,1994,1995,1996,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,short_names
0,11,0.096419,0.021179,0.232434,-0.117625,0.046713,0.086901,-0.040293,-0.134848,0.127489,...,0.091769,-0.084728,-0.097983,0.007578,0.044471,-0.015895,-0.002377,0.107024,0.025567,nondur
1,21,0.001622,-0.003268,0.036726,0.024749,0.065746,0.023401,-0.033408,0.045691,0.048648,...,0.012806,0.007854,0.00284,-0.006589,0.026573,0.012639,1.8e-05,0.031972,-0.035613,mining
2,22,0.013182,0.011009,0.041066,0.06751,0.063368,0.011663,0.027554,0.014061,0.046924,...,0.021463,0.016625,0.019247,0.044289,0.007801,0.037409,0.054738,0.068342,0.004407,trans
3,23,0.038665,0.02983,0.027029,0.015574,0.038236,0.001078,0.037904,0.026464,0.025679,...,0.003159,0.023711,0.031215,0.015514,0.039341,0.001188,0.037549,0.048699,0.017131,const
4,31ND,0.043711,0.031623,0.049545,0.039805,0.036644,-0.0003,0.004364,0.012466,0.019202,...,0.008394,0.039049,0.007057,-0.002143,0.013886,0.009855,0.027157,0.042978,0.012807,nondur


In [15]:
dfLab_prod_growth = pd.DataFrame(data=np.log(np.array(dfLab_prod.iloc[:,4:-1]))
                                    -np.log(np.array(dfLab_prod.iloc[:,3:-2])),
                                   index = dfOutput_nom_growth['NAICS Code'],
                                   columns = range(1988,2022))
dfLab_prod_growth = dfLab_prod_growth.merge(dfCodes,on='NAICS Code')
dfLab_prod_growth.head()

Unnamed: 0,NAICS Code,1988,1989,1990,1991,1992,1993,1994,1995,1996,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,short_names
0,11,-0.041493,0.044062,0.094335,0.01205,0.060054,0.002654,-0.045915,-0.071367,0.045955,...,0.128771,-0.066266,-0.066393,0.036771,0.011736,0.000277,-0.030225,0.103573,-0.051344,nondur
1,21,0.043636,-0.019462,-0.000444,0.019173,0.070873,0.02977,0.014502,0.029253,-0.009913,...,0.097644,0.060112,0.055604,0.063431,0.093248,0.043461,0.058315,0.073563,-0.000603,mining
2,22,0.05823,0.000325,-0.010728,0.030252,0.01675,0.094389,0.09191,0.103158,0.108306,...,-0.005867,0.005647,-0.006956,0.002093,-0.004988,0.054757,0.005931,-0.009254,0.016812,trans
3,23,-0.046219,-0.032585,-0.013479,0.000718,0.067238,-0.025331,-0.015129,-0.014223,0.017282,...,0.005206,-0.003298,0.036649,0.022369,-0.007423,-0.042375,-0.013912,0.09404,-0.038579,const
4,31ND,0.010754,0.001874,0.026341,0.014863,0.023229,0.010169,0.016701,0.0182,0.020511,...,0.016474,-0.000856,-0.012522,0.003652,-0.023731,0.00124,-0.007669,-0.001567,0.012218,nondur


In [16]:
dfLab_cost_shares = dfLab_cost.merge(dfLab_cost.groupby('short_names').sum(), on='short_names', suffixes=['','_share'])
dfLab_cost_shares.iloc[:,39:] = np.array(dfLab_cost_shares.iloc[:,3:38])/np.array(dfLab_cost_shares.iloc[:,39:])
dfLab_cost_shares.head()

Unnamed: 0,NAICS Code,Variable,Series ID,1987,1988,1989,1990,1991,1992,1993,...,2012_share,2013_share,2014_share,2015_share,2016_share,2017_share,2018_share,2019_share,2020_share,2021_share
0,11,"Labor costs, billions of current dollars",MPU0011631,28.456,31.415,32.298,38.654,34.4,35.813,38.236,...,0.15721,0.161209,0.151137,0.144883,0.147088,0.146778,0.140495,0.138282,0.147963,0.14454
1,31ND,"Labor costs, billions of current dollars",MPU9910631,197.794,210.244,220.477,229.244,236.253,249.206,254.636,...,0.84279,0.838791,0.848863,0.855117,0.852912,0.853222,0.859505,0.861718,0.852037,0.85546
2,21,"Labor costs, billions of current dollars",MPU0021631,30.351,30.323,30.528,32.536,32.405,31.821,31.859,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,22,"Labor costs, billions of current dollars",MPU0022631,31.056,31.365,32.876,34.88,37.143,39.461,40.086,...,0.180641,0.181731,0.179003,0.17328,0.176194,0.168197,0.162777,0.158362,0.166407,0.15373
4,48TW,"Labor costs, billions of current dollars",MPU0048631,112.505,117.684,124.145,130.907,135.882,145.125,150.421,...,0.819359,0.818269,0.820997,0.82672,0.823806,0.831803,0.837223,0.841638,0.833593,0.84627


In [17]:
dfWage_growth.iloc[:,1:35] = np.array(dfWage_growth.iloc[:,1:35]) * np.array(dfLab_cost_shares.iloc[:,39:73])
dfLab_prod_growth.iloc[:,1:35] = np.array(dfLab_prod_growth.iloc[:,1:35]) * np.array(dfLab_cost_shares.iloc[:,39:73])
dfWage_growth = dfWage_growth.groupby('short_names').sum()
dfLab_prod_growth = dfLab_prod_growth.groupby('short_names').sum()

dfWage_growth.head()

Unnamed: 0_level_0,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
short_names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
accom,0.088085,0.032343,0.08216,0.040113,0.025158,-0.001335,-0.00855,0.018583,0.028256,0.045765,...,0.052808,0.000808,0.037882,0.028042,0.040581,0.027526,0.034829,0.041731,0.004576,0.157675
const,0.008364,0.006277,0.005659,0.003277,0.008208,0.00023,0.007976,0.005358,0.005051,0.007221,...,0.005949,0.000571,0.004309,0.005588,0.002688,0.006932,0.0002,0.006112,0.007712,0.002851
dur,0.032102,0.025736,0.043633,0.040263,0.048265,0.00578,0.009148,0.023064,0.019931,0.007333,...,0.013131,0.000955,0.022552,0.029455,0.010727,0.036588,0.026107,0.008918,0.060294,0.031542
edhealth,-0.007933,0.04402,0.068224,0.052257,0.011395,0.021646,-0.014785,-0.023673,0.003614,-0.001597,...,-0.004365,0.015332,0.017357,0.015079,0.011588,0.009848,0.027927,0.019604,0.026821,0.039519
fin,0.298737,-0.41174,0.173658,0.010565,0.160431,-0.085897,-0.014534,-0.020676,0.093649,0.08101,...,0.051506,-0.000223,0.025951,0.041744,0.027935,0.04206,0.026062,0.038137,0.04446,0.067218


Now we have what we need to estimate, in a reduced form way, how wages respond to productivity changes in their own and in all other industries. In particular we want to estimate the parameter matrix of 
\begin{align*}
    d\log\bm{w} - \bm{\mathcal{L}} d\log\bm{p} = \bm{\Lambda}_A d\log \bm{A} + \bm{\Lambda}_H d\log \bm{H}
\end{align*}
We still need to think about whether there is some way of estimating $\bm{\Lambda}_H$. 

In [18]:
def Lambda_estimation(dfX, dfY):
    Lam = np.zeros((dfY.shape[1],dfY.shape[1]+1))
    X = np.ones((dfX.shape[0],dfX.shape[1]+1))
    X[:,:-1] = np.array(dfX)
    for i in range(dfY.shape[1]):
        Y = np.array(dfY.iloc[:,i]).reshape((dfY.shape[0],1))
        est = np.linalg.inv(X.T @ X) @ (X.T @ Y)
        Lam[i,:] = est.flatten()
    return Lam

In [23]:
dfY = (dfWage_growth - dfOutput_price_growth).transpose()
dfX = dfLab_prod_growth.transpose()
dfLam = pd.DataFrame(data = Lambda_estimation(dfX, dfY),
                                              index = dfTFP_growth.index.values,
                                              columns = np.append(dfTFP_growth.index.values,  ['constant']))
dfLam

Unnamed: 0,accom,const,dur,edhealth,fin,info,mining,nondur,other,profserv,trade,trans,constant
accom,0.521333,-0.812447,0.077107,0.606734,0.002824,-0.24407,0.138996,-0.003579,0.115802,0.091532,0.024244,0.040026,0.005557
const,-0.022882,0.764191,0.093689,0.131358,-0.04396,-0.083018,0.065316,-0.083298,-0.049322,-0.061007,-0.002853,0.025654,-0.004533
dur,-0.183231,0.839326,0.690819,0.474346,0.285294,-0.246221,0.119402,-0.003325,0.229678,0.013316,-0.24361,-0.090359,0.017734
edhealth,0.157837,-0.046392,-0.172963,0.427644,0.188441,-0.164521,-0.043148,0.114864,-0.083884,-0.356544,0.162724,-0.182947,0.003398
fin,1.243607,2.502166,-1.809102,1.481563,0.112794,1.376283,0.639746,1.641521,0.646544,-0.981724,0.091925,1.403783,-0.08756
info,0.355798,-2.104546,-0.211034,-0.357863,1.011364,0.175999,0.451643,0.37312,0.002459,-0.039213,0.313735,-0.507781,-0.007977
mining,-1.4439,3.6414,0.661194,0.621598,-1.384571,1.085701,1.055758,-0.423774,0.873035,-1.483028,-1.392196,0.250345,0.045643
nondur,-0.355009,1.433253,-0.168686,0.252649,-0.056177,-0.178465,0.43762,0.925873,0.36315,-0.090378,-0.220377,3.6e-05,0.01462
other,0.131644,-0.284027,-0.152494,-0.53354,0.663474,0.496225,0.174065,0.030544,0.38984,-0.052818,0.094742,0.017683,-0.044542
profserv,0.063907,0.457346,-0.025622,0.243939,-0.061734,-0.281929,-0.070275,-0.0707,0.007934,0.841226,0.093121,-0.101902,0.011835


In [24]:
dfY = (dfWage_growth - dfOutput_price_growth).transpose()
dfX = dfTFP_growth.transpose()
dfLam = pd.DataFrame(data = Lambda_estimation(dfX, dfY),
                                              index = dfTFP_growth.index.values,
                                              columns = np.append(dfTFP_growth.index.values,  ['constant']))
dfLam

Unnamed: 0,accom,const,dur,edhealth,fin,info,mining,nondur,other,profserv,trade,trans,constant
accom,0.856567,0.85751,0.094555,0.226095,-0.01163,-0.182716,0.216949,-0.338524,-0.169322,0.170908,0.182,-0.071416,0.004812
const,-0.023623,1.137538,-0.001045,0.209321,0.061516,-0.096508,0.083566,-0.050329,-0.117785,-0.005112,0.118581,0.040352,-0.006907
dur,-0.690016,1.735466,0.713542,0.995768,0.283687,-0.262295,0.272062,0.244345,0.185737,-0.304408,0.27721,-0.076303,0.017229
edhealth,0.043057,-0.052475,-0.174034,1.260578,0.022537,0.045341,-0.021623,0.177672,-0.022519,-0.065179,0.094291,-0.02909,-0.000558
fin,0.474314,6.180547,1.687433,-0.907813,1.529712,-0.421905,0.850427,1.452619,-0.37913,-0.224519,-0.560924,-0.534674,-0.00311
info,-0.505274,1.555926,0.885359,0.440917,0.768481,0.188809,0.217391,1.627097,0.041023,0.15011,-0.780495,-0.361335,0.039243
mining,-2.182134,5.178549,-0.687713,2.605763,0.788799,-0.183512,2.247851,-2.031333,0.872869,-1.520239,1.415452,0.384897,0.006244
nondur,-0.952671,2.999726,-0.150279,1.036733,0.351312,-0.200315,0.536412,0.769294,0.238442,-0.392922,0.494672,0.041232,0.014684
other,-0.601297,1.013208,0.325923,0.822539,0.200974,-0.036984,0.240207,-0.112059,1.638527,-0.011812,0.158697,0.094839,0.005868
profserv,0.008065,0.33261,0.020362,-0.141923,-0.0196,-0.089711,0.084465,0.072105,-0.041693,1.152305,0.03186,-0.044607,0.007219


In [20]:
dfLam.to_csv('../data/clean/wage_response_estimates.csv')