In [1]:
import numpy as np
import pandas as pd

In [2]:
data_dir = '../../data/clean/'
dfTau    = pd.read_csv(data_dir + 'tau_2021.csv').sort_values(by=['short_names'])
dfepsN   = pd.read_csv(data_dir + 'epsN_2021.csv', index_col=0).sort_index(axis=1)
dfcurlyL = pd.read_csv(data_dir + 'curlyL_2021.csv', index_col=0).sort_index()
dfLshare = pd.read_csv(data_dir + 'labor_tab.csv')
occupation_names = list(dfcurlyL.index)
dfepsN_weights = dfepsN.rename_axis('short_names').reset_index().melt(id_vars='short_names')

In [3]:
dfepsN_weights.groupby('short_names')['value'].sum()

short_names
accom       1.0
const       1.0
dur         1.0
edhealth    1.0
fin         1.0
gov         1.0
info        1.0
mining      1.0
nondur      1.0
other       1.0
profserv    1.0
trade       1.0
trans       1.0
Name: value, dtype: float64

We compute occupation-level recruiter-producer ratio by computing an average of recruiter-producer ratios for industries that employ this occupation, weighted by their wage expenditure 

In [4]:
dfTau.head()

Unnamed: 0,Industry,TOT_EMP,TOT_EMP_HR,Workers,Tau,short_names
6,Leisure and hospitality,13558650.0,21710.0,13536940.0,0.001604,accom
0,Construction,7368800.0,18050.0,7350750.0,0.002456,const
1,Durable goods manufacturing,7511780.0,51040.0,7460740.0,0.006841,dur
2,Education and health services,33322000.0,163750.0,33158250.0,0.004938,edhealth
3,Financial activities,8198430.0,65380.0,8133050.0,0.008039,fin


In [5]:
dfOccuTau = pd.merge(dfTau, dfepsN_weights, on=['short_names']) 
dfOccuTau.loc[:, "OccuTau"] = dfOccuTau['value'] * dfOccuTau['Tau']
dfOccuTau = dfOccuTau.groupby('variable').aggregate({'OccuTau': 'sum'})
dfOccuTau.head()

Unnamed: 0_level_0,OccuTau
variable,Unnamed: 1_level_1
Admin,0.009965
Agg,7.5e-05
Arts,0.002922
Bus Ops,0.012017
Care,0.002922


In [6]:
uvh= pd.read_csv(data_dir + 'uvh_updated.csv')
uvh = uvh.sort_values(by=['Date', 'short_names']).dropna()
uvh = uvh.dropna(axis=0)
uvhOccu = pd.merge(uvh, dfepsN_weights, on=['short_names'])
uvhOccu.loc[:, "Unemployment"] = uvhOccu['value'] * uvhOccu['Unemployment']
uvhOccu.loc[:, "Vacancy"] = uvhOccu['value'] * uvhOccu['Vacancy']
uvhOccu.loc[:, "Hires"] = uvhOccu['value'] * uvhOccu['Hires']
uvhOccu = uvhOccu.groupby(['variable', 'Date']).aggregate({'Unemployment': 'sum', 'Vacancy': 'sum', 'Hires': 'sum'})
uvhOccu.loc[:, 'Tightness'] = uvhOccu['Vacancy'] / uvhOccu['Unemployment']
uvhOccu = uvhOccu.reset_index()
uvhOccu.to_csv("../../data/clean/uvh_updated_occu.csv", index=False)
uvhOccu

Unnamed: 0,variable,Date,Unemployment,Vacancy,Hires,Tightness
0,Admin,2000-12-01,401.652763,472.240013,486.385873,1.175742
1,Admin,2001-01-01,515.175272,484.882378,513.372481,0.941199
2,Admin,2001-02-01,505.853061,474.463734,476.860111,0.937948
3,Admin,2001-03-01,498.533067,437.249131,490.399002,0.877071
4,Admin,2001-04-01,468.959882,426.244377,460.446630,0.908914
...,...,...,...,...,...,...
5869,Trans,2022-10-01,367.740983,674.910704,478.614977,1.835288
5870,Trans,2022-11-01,413.172366,674.182095,487.533580,1.631721
5871,Trans,2022-12-01,401.700062,742.819491,486.529378,1.849189
5872,Trans,2023-01-01,467.079602,782.853482,493.697452,1.676060


In [7]:
uvhOccu.loc[:, 'Year'] = pd.to_datetime(uvhOccu['Date']).apply(lambda x: x.year)
uvhOccu_annual = uvhOccu.dropna().groupby(['variable', 'Year']).aggregate({'Unemployment': sum, 'Vacancy': sum, 'Hires':sum}).reset_index()
uvhOccu_annual.loc[:, 'Tightness'] = uvhOccu_annual['Vacancy'] / uvhOccu_annual['Unemployment']
uvhOccu_annual.to_csv('../../data/clean/uvh_annual_updated_occu.csv', index=False)
uvhOccu_annual.head()

Unnamed: 0,variable,Year,Unemployment,Vacancy,Hires,Tightness
0,Admin,2000,401.652763,472.240013,486.385873,1.175742
1,Admin,2001,6367.115602,4880.02025,5595.085446,0.766441
2,Admin,2002,7954.439156,3917.998302,5235.329648,0.492555
3,Admin,2003,8255.590229,3694.111197,5098.045391,0.447468
4,Admin,2004,7566.42734,4127.036589,5454.298855,0.545441


In [8]:
def matching_estimation2(df):
    log_H = np.log(np.array(df[['Hires']]))
    log_U = np.log(np.array(df[['Unemployment']]))
    log_U = log_U[~np.isnan(log_H)]
    log_V = np.log(np.array(df[['Vacancy']]))
    log_V = log_V[~np.isnan(log_H)]
    log_H = log_H[~np.isnan(log_H)]
    Y = np.ones((log_H.shape[0],1))
    Y[:,0] = log_H - log_V
    X = np.ones((log_H.shape[0],2))
    X[:,1] = log_U - log_V
    theta = np.linalg.inv(X.T @ X) @ (X.T @ Y)
    theta[0] = np.exp(theta[0])
    return theta.flatten()

matching_param2 = uvhOccu.groupby("variable").apply(matching_estimation2) 
matching_efficiency = []
unemployment_elasticity = []
for i in range(matching_param2.size):
    matching_efficiency.append(matching_param2.iloc[i][0])
    unemployment_elasticity.append(matching_param2.iloc[i][1])
df_matching_param = pd.DataFrame({'occupation':matching_param2.index.values,'matching_efficiency':matching_efficiency,
                                   'unemployment_elasticity':unemployment_elasticity})
df_matching_param.to_csv('../../data/clean/matching_param_estimates_occu.csv', index=False)
df_matching_param.head()

Unnamed: 0,occupation,matching_efficiency,unemployment_elasticity
0,Admin,0.882588,0.368118
1,Agg,0.906748,0.389782
2,Arts,0.939268,0.351735
3,Bus Ops,0.878515,0.355697
4,Care,0.978076,0.377744
