In [1]:
import numpy as np
import pandas as pd

In [2]:
data_dir = '../../data/clean/'
dfTau    = pd.read_csv(data_dir + 'tau_2021.csv').sort_values(by=['short_names'])
dfepsN   = pd.read_csv(data_dir + 'epsN_2021.csv', index_col=0).sort_index(axis=1)
dfepsN_weights = dfepsN.rename_axis('short_names').reset_index().melt(id_vars='short_names')

First, we load the $\varepsilon^f_N$ weights that we use to impute the occupation-level labor market variables.

In [3]:
dfepsN_weights.groupby('short_names')['value'].sum()

short_names
accom       1.0
const       1.0
dur         1.0
edhealth    1.0
fin         1.0
gov         1.0
info        1.0
mining      1.0
nondur      1.0
other       1.0
profserv    1.0
trade       1.0
trans       1.0
Name: value, dtype: float64

We first impute the number of vacancies and unemployment in each occupation using the following:
\begin{align*}
    V_o &= \sum_i \frac{\varepsilon^f_{N_{io}}}{\varepsilon^f_{N_i}} V_{i}, \\
    U_o &= \sum_i \frac{\varepsilon^f_{N_{io}}}{\varepsilon^f_{N_i}} U_{i} ,
\end{align*}

In [5]:
L = pd.read_csv(data_dir + 'L_2021.csv')

In [16]:
occu_L = L.set_index('OCC_TITLE').sum(axis=1).reset_index().rename(columns={0: 'L', 'OCC_TITLE': 'variable'})
occu_L['Year'] = 2021
occu_L

Unnamed: 0,variable,L,Year
0,Eng,2407010.0,2021
1,Arts,1808320.0,2021
2,Clean,4103390.0,2021
3,Bus Ops,8911530.0,2021
4,Soc S,2213790.0,2021
5,Math,4617830.0,2021
6,Cons,5848120.0,2021
7,Educ,8190330.0,2021
8,Agg,156180.0,2021
9,Food S,11195800.0,2021


In [4]:
uvh= pd.read_csv(data_dir + 'uvh_updated.csv')
uvh = uvh.sort_values(by=['Date', 'short_names']).dropna()
uvh = uvh.dropna(axis=0)
uvhOccu = pd.merge(uvh, dfepsN_weights, on=['short_names'])
uvhOccu.loc[:, "Unemployment"] = uvhOccu['value'] * uvhOccu['Unemployment']
uvhOccu.loc[:, "Vacancy"] = uvhOccu['value'] * uvhOccu['Vacancy']
uvhOccu.loc[:, "Hires"] = uvhOccu['value'] * uvhOccu['Hires']
uvhOccu = uvhOccu.groupby(['variable', 'Date']).aggregate({'Unemployment': 'sum', 'Vacancy': 'sum', 'Hires': 'sum'})
uvhOccu.loc[:, 'Tightness'] = uvhOccu['Vacancy'] / uvhOccu['Unemployment']
uvhOccu = uvhOccu.reset_index()
uvhOccu.to_csv("../../data/clean/uvh_updated_occu.csv", index=False)
uvhOccu

Unnamed: 0,variable,Date,Unemployment,Vacancy,Hires,Tightness
0,Admin,2000-12-01,401.652763,472.240013,486.385873,1.175742
1,Admin,2001-01-01,515.175272,484.882378,513.372481,0.941199
2,Admin,2001-02-01,505.853061,474.463734,476.860111,0.937948
3,Admin,2001-03-01,498.533067,437.249131,490.399002,0.877071
4,Admin,2001-04-01,468.959882,426.244377,460.446630,0.908914
...,...,...,...,...,...,...
5869,Trans,2022-10-01,367.740983,674.910704,478.614977,1.835288
5870,Trans,2022-11-01,413.172366,674.182095,487.533580,1.631721
5871,Trans,2022-12-01,401.700062,742.819491,486.529378,1.849189
5872,Trans,2023-01-01,467.079602,782.853482,493.697452,1.676060


In [28]:
uvhOccu.loc[:, 'Year'] = pd.to_datetime(uvhOccu['Date']).apply(lambda x: x.year)
uvhOccu_annual = uvhOccu.dropna().groupby(['variable', 'Year']).aggregate({'Unemployment': "mean", 'Vacancy': "mean", 'Hires':"mean"}).reset_index()
uvhOccu_annual.loc[:, 'Tightness'] = uvhOccu_annual['Vacancy'] / uvhOccu_annual['Unemployment']
uvhOccu_annual = pd.merge(uvhOccu_annual, occu_L, on=['variable', 'Year'])
uvhOccu_annual.loc[:, 'H'] = uvhOccu_annual['Unemployment'] + uvhOccu_annual['L'] / 1000
uvhOccu_annual.loc[:, 'u'] = uvhOccu_annual['Unemployment'] / uvhOccu_annual['H']
uvhOccu_annual.to_csv('../../data/clean/uvh_annual_updated_occu.csv', index=False)
uvhOccu_annual

Unnamed: 0,variable,Year,Unemployment,Vacancy,Hires,Tightness,L,H,u
0,Admin,2021,687.700141,935.566078,576.794062,1.360427,18279490.0,18967.190141,0.036257
1,Agg,2021,6.139996,7.88284,5.211838,1.283851,156180.0,162.319996,0.037826
2,Arts,2021,110.770178,152.503404,102.510535,1.376755,1808320.0,1919.090178,0.05772
3,Bus Ops,2021,560.159346,825.985556,505.670021,1.474555,8911530.0,9471.689346,0.05914
4,Care,2021,137.13134,166.108155,112.158117,1.211307,2559430.0,2696.56134,0.050854
5,Clean,2021,133.274325,198.635436,134.294468,1.490425,4103390.0,4236.664325,0.031457
6,Cons,2021,434.006635,287.337148,254.045229,0.662057,5848120.0,6282.126635,0.069086
7,Educ,2021,231.138931,465.321682,207.414198,2.013169,8190330.0,8421.468931,0.027446
8,Eng,2021,178.419212,261.181153,158.08059,1.463862,2407010.0,2585.429212,0.06901
9,Food S,2021,856.538251,949.552445,782.396213,1.108593,11195800.0,12052.338251,0.071068


In [26]:
# Sanity check
# High unemployment in first half of 2021
# 6% first half -> 4% second half
import numpy as np
np.average(uvhOccu_annual.u, weights=uvhOccu_annual.H)

0.050183899208013395

We then estimate the matching parameters according to:

\begin{align*}
    \log H_{o,t} = \log \phi_o  + \eta_o \log U_{o,t} + (1-\eta_o) \log V_{o,t} + \epsilon_{o,t}  
\end{align*}

In [7]:
def matching_estimation2(df):
    log_H = np.log(np.array(df[['Hires']]))
    log_U = np.log(np.array(df[['Unemployment']]))
    log_U = log_U[~np.isnan(log_H)]
    log_V = np.log(np.array(df[['Vacancy']]))
    log_V = log_V[~np.isnan(log_H)]
    log_H = log_H[~np.isnan(log_H)]
    Y = np.ones((log_H.shape[0],1))
    Y[:,0] = log_H - log_V
    X = np.ones((log_H.shape[0],2))
    X[:,1] = log_U - log_V
    theta = np.linalg.inv(X.T @ X) @ (X.T @ Y)
    theta[0] = np.exp(theta[0])
    return theta.flatten()

matching_param2 = uvhOccu.groupby("variable").apply(matching_estimation2) 
matching_efficiency = []
unemployment_elasticity = []
for i in range(matching_param2.size):
    matching_efficiency.append(matching_param2.iloc[i][0])
    unemployment_elasticity.append(matching_param2.iloc[i][1])
df_matching_param = pd.DataFrame({'occupation':matching_param2.index.values,'matching_efficiency':matching_efficiency,
                                   'unemployment_elasticity':unemployment_elasticity})
df_matching_param.to_csv('../../data/clean/matching_param_estimates_occu.csv', index=False)
df_matching_param.head()

Unnamed: 0,occupation,matching_efficiency,unemployment_elasticity
0,Admin,0.882588,0.368118
1,Agg,0.906748,0.389782
2,Arts,0.939268,0.351735
3,Bus Ops,0.878515,0.355697
4,Care,0.978076,0.377744


In [8]:
print(df_matching_param.set_index('occupation').round(3).to_latex())

\begin{tabular}{lrr}
\toprule
{} &  matching\_efficiency &  unemployment\_elasticity \\
occupation &                      &                          \\
\midrule
Admin      &                0.883 &                    0.368 \\
Agg        &                0.907 &                    0.390 \\
Arts       &                0.939 &                    0.352 \\
Bus Ops    &                0.879 &                    0.356 \\
Care       &                0.978 &                    0.378 \\
Clean      &                1.009 &                    0.372 \\
Cons       &                1.054 &                    0.461 \\
Educ       &                0.713 &                    0.338 \\
Eng        &                0.871 &                    0.357 \\
Food S     &                1.163 &                    0.398 \\
Health P   &                0.749 &                    0.346 \\
Health S   &                0.731 &                    0.343 \\
Legal      &                0.923 &                    0.354 \\
Manag  

  print(df_matching_param.set_index('occupation').round(3).to_latex())


We compute occupation-level recruiter-producer ratio by summing the number of recruiters designated to each occupation in each sector and dividing that by the number of workers in each occupation. We impute the number of recruiters for occupation o by:
\begin{align*}
    R_o &= \sum_i \frac{\varepsilon^f_{N_{io}}}{\varepsilon^f_{N_i}} R_{i}, \\
\end{align*}
where $R_{i}$ is the number of recruiters in sector $i$. This is also implicitly assuming that the recruiting cost for the occupations are the same. 

In [9]:
dfOccuTau = pd.merge(dfTau, dfepsN_weights, on=['short_names']) 
dfOccuTau.loc[:, "TOT_EMP_HR"] = dfOccuTau['value'] * dfOccuTau['TOT_EMP_HR']
dfOccuTau = dfOccuTau.groupby('variable').aggregate({'TOT_EMP_HR': 'sum'}).reset_index()
dfOccuTau.head()

Unnamed: 0,variable,TOT_EMP_HR
0,Admin,104065.650761
1,Agg,735.273165
2,Arts,19461.245237
3,Bus Ops,125385.402737
4,Care,14811.692624


In [10]:
occ_by_ind = pd.read_excel("../../data/raw/OES/natsector_M2021_dl.xlsx")
occ_by_ind = occ_by_ind[occ_by_ind['O_GROUP'] == 'major'][['NAICS', 'NAICS_TITLE', "OCC_CODE", "OCC_TITLE", "TOT_EMP", "A_MEAN"]]
occ_by_ind.loc[:, 'TOT_EMP'] = pd.to_numeric(occ_by_ind['TOT_EMP'], errors='coerce').fillna(0)
occ_map = {'Architecture and Engineering Occupations': 'Eng', 
           'Arts, Design, Entertainment, Sports, and Media Occupations': 'Arts',
           'Building and Grounds Cleaning and Maintenance Occupations': 'Clean',
           'Business and Financial Operations Occupations': 'Bus Ops',
           'Community and Social Service Occupations': 'Soc S',
           'Computer and Mathematical Occupations': 'Math',
           'Construction and Extraction Occupations': 'Cons',
           'Educational Instruction and Library Occupations': 'Educ',
           'Farming, Fishing, and Forestry Occupations': 'Agg',
           'Food Preparation and Serving Related Occupations': 'Food S',
           'Healthcare Practitioners and Technical Occupations': 'Health P',
           'Healthcare Support Occupations': 'Health S', 
           'Installation, Maintenance, and Repair Occupations': 'Repair', 
           'Legal Occupations': 'Legal', 'Life, Physical, and Social Science Occupations':'Science',
           'Management Occupations': 'Manag', 'Office and Administrative Support Occupations': 'Admin',
           'Personal Care and Service Occupations': 'Care', 'Production Occupations': 'Prod',
           'Protective Service Occupations': 'Prot S', 'Sales and Related Occupations': 'Sales', 
           'Transportation and Material Moving Occupations': 'Trans'}
occ_emp = occ_by_ind.groupby('OCC_TITLE')['TOT_EMP'].sum().rename(index=occ_map).reset_index()
occ_emp.head()

Unnamed: 0,OCC_TITLE,TOT_EMP
0,Eng,2407010.0
1,Arts,1808320.0
2,Clean,4108790.0
3,Bus Ops,8911520.0
4,Soc S,2213830.0


In [11]:
dfOccuTau = pd.merge(dfOccuTau, occ_emp, left_on='variable', right_on='OCC_TITLE').drop(columns='OCC_TITLE')
dfOccuTau.loc[:, 'Workers'] = dfOccuTau['TOT_EMP'] - dfOccuTau['TOT_EMP_HR']
dfOccuTau.loc[:, 'Tau'] = dfOccuTau['TOT_EMP_HR'] / dfOccuTau['Workers']
dfOccuTau.to_csv('../../data/clean/tau_2021_occu.csv', index=False)
dfOccuTau.head()

Unnamed: 0,variable,TOT_EMP_HR,TOT_EMP,Workers,Tau
0,Admin,104065.650761,18299400.0,18195330.0,0.005719
1,Agg,735.273165,447120.0,446384.7,0.001647
2,Arts,19461.245237,1808320.0,1788859.0,0.010879
3,Bus Ops,125385.402737,8911520.0,8786135.0,0.014271
4,Care,14811.692624,2566440.0,2551628.0,0.005805


In [12]:
print(dfOccuTau[['variable', 'Tau']].set_index('variable').round(3).to_latex())

\begin{tabular}{lr}
\toprule
{} &    Tau \\
variable &        \\
\midrule
Admin    &  0.006 \\
Agg      &  0.002 \\
Arts     &  0.011 \\
Bus Ops  &  0.014 \\
Care     &  0.006 \\
Clean    &  0.006 \\
Cons     &  0.004 \\
Educ     &  0.005 \\
Eng      &  0.017 \\
Food S   &  0.002 \\
Health P &  0.009 \\
Health S &  0.003 \\
Legal    &  0.027 \\
Manag    &  0.019 \\
Math     &  0.020 \\
Prod     &  0.006 \\
Prot S   &  0.009 \\
Repair   &  0.007 \\
Sales    &  0.005 \\
Science  &  0.014 \\
Soc S    &  0.006 \\
Trans    &  0.003 \\
\bottomrule
\end{tabular}



  print(dfOccuTau[['variable', 'Tau']].set_index('variable').round(3).to_latex())
