# Industry-occupation crosswalk
Felix Zaussinger | 10.06.2021

## Core Analysis Goal(s)
1. Create a crosswalk that links US industry-occupation matrix with Exiobase
industry-level forecasts to obtain granular occupational changes

## Key Insight(s)
1.

In [2]:
import os
import sys
import logging
from pathlib import Path

import numpy as np
import scipy as sp
import statsmodels.api as sm
from statsmodels.formula.api import ols

%load_ext autoreload
%autoreload 2

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set_context("poster")
sns.set(rc={'figure.figsize': (16, 9.)})
sns.set_style("ticks")

import pandas as pd
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)

logging.basicConfig(level=logging.INFO, stream=sys.stdout)

Define directory structure

In [3]:
# project directory
abspath = os.path.abspath('')
project_dir = str(Path(abspath).parents[0])

# sub-directories
data_raw = os.path.join(project_dir, "data", "raw")
data_interim = os.path.join(project_dir, "data", "interim")
data_processed = os.path.join(project_dir, "data", "processed")
figure_dir = os.path.join(project_dir, "reports", "figures")

### Step 1: Industry-specific occupational employment (US)

In [5]:
fpath_oews = os.path.join(
    data_raw,
    "labour_market_data",
    "us_oews_survey_industry_occupations_matrix_2020.xlsx"
)

ind_occ_matrix = pd.read_excel(fpath_oews, sheet_name="All May 2020 data")

In [12]:
ind_occ_matrix = ind_occ_matrix.query("AREA_TITLE == 'U.S.'")

Unnamed: 0,AREA,AREA_TITLE,AREA_TYPE,PRIM_STATE,NAICS,NAICS_TITLE,I_GROUP,OWN_CODE,OCC_CODE,OCC_TITLE,O_GROUP,TOT_EMP,EMP_PRSE,JOBS_1000,LOC_QUOTIENT,PCT_TOTAL,H_MEAN,A_MEAN,MEAN_PRSE,H_PCT10,H_PCT25,H_MEDIAN,H_PCT75,H_PCT90,A_PCT10,A_PCT25,A_MEDIAN,A_PCT75,A_PCT90,ANNUAL,HOURLY
0,99,U.S.,1,US,000000,Cross-industry,cross-industry,1235,00-0000,All Occupations,total,139099570,0.1,,,,27.07,56310,0.1,10.97,13.95,20.17,32.41,50.99,22810,29020,41950,67410,106050,,
1,99,U.S.,1,US,000000,Cross-industry,cross-industry,1235,11-0000,Management Occupations,major,7947300,0.2,,,,60.81,126480,0.2,24.84,35.7,52.77,76.71,#,51670,74250,109760,159550,#,,
2,99,U.S.,1,US,000000,Cross-industry,cross-industry,1235,11-1000,Top Executives,minor,2601070,0.4,,,,62.46,129920,0.2,21.41,32.57,51.05,80.73,#,44530,67740,106180,167930,#,,
3,99,U.S.,1,US,000000,Cross-industry,cross-industry,1235,11-1010,Chief Executives,broad,202360,1,,,,95.12,197840,0.5,30.18,55.06,89.4,#,#,62780,114530,185950,#,#,,
4,99,U.S.,1,US,000000,Cross-industry,cross-industry,1235,11-1011,Chief Executives,detailed,202360,1,,,,95.12,197840,0.5,30.18,55.06,89.4,#,#,62780,114530,185950,#,#,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169256,99,U.S.,1,US,5320A1,"Rental and Leasing Services (5322, 5323, and 5...",4-digit,5,53-7065,Stockers and Order Fillers,detailed,3360,8.8,,,1.08,15.69,32630,2.2,10.28,12.15,14.71,18.79,23.24,21380,25280,30590,39070,48330,,
169257,99,U.S.,1,US,5320A1,"Rental and Leasing Services (5322, 5323, and 5...",4-digit,5,53-7070,Pumping Station Operators,broad,1200,14.8,,,0.38,22.46,46720,8.4,14.08,15.96,20.27,25.65,36.21,29290,33200,42150,53350,75330,,
169258,99,U.S.,1,US,5320A1,"Rental and Leasing Services (5322, 5323, and 5...",4-digit,5,53-7072,"Pump Operators, Except Wellhead Pumpers",detailed,1190,14.8,,,0.38,22.46,46710,8.4,14.08,15.96,20.26,25.64,36.22,29280,33200,42130,53330,75340,,
169259,99,U.S.,1,US,5320A1,"Rental and Leasing Services (5322, 5323, and 5...",4-digit,5,53-7190,Miscellaneous Material Moving Workers,broad,50,34.6,,,0.02,18.67,38840,7.3,12.94,14.43,17.7,22.7,26.18,26920,30020,36820,47210,54450,,


In [16]:
ind_occ_matrix.groupby("NAICS")["OCC_CODE"].count()

NAICS
000000    1329
000001    1310
11         304
113000     108
113300     108
          ... 
999101     684
999200     867
999201    1024
999300    1053
999301    1090
Name: OCC_CODE, Length: 420, dtype: int64

In [22]:
ind_occ_matrix.NAICS.unique()

array(['000000', '000001', '11', '21', '22', '23', '42', '51', '52', '53',
       '54', '55', '56', '61', '62', '71', '72', '81', '999200', '999300',
       '113000', '113300', '115000', '115100', '115200', '211000',
       '211100', '212000', '212100', '212200', '212300', '213000',
       '213100', '221000', '221100', '221111', '221112', '221113',
       '221114', '221115', '221116', '221117', '221118', '221200',
       '221300', '236000', '236100', '236200', '237000', '237100',
       '237130', '237200', '237300', '237900', '238000', '238100',
       '238110', '238140', '238160', '238200', '238210', '238220',
       '238290', '238300', '238310', '238320', '238900', '311000',
       '311100', '311200', '311300', '311400', '311500', '311600',
       '311700', '311800', '311900', '312000', '312100', '312200',
       '313000', '313100', '313200', '313300', '314000', '314100',
       '314900', '315000', '315100', '315200', '315900', '316000',
       '316100', '316200', '316900', '321000',

In [21]:
# power generation matches: '221111', '221112', '221113', '221114', '221115', '221116', '221117', '221118'
ind_occ_matrix.query("NAICS == '221113'")

Unnamed: 0,AREA,AREA_TITLE,AREA_TYPE,PRIM_STATE,NAICS,NAICS_TITLE,I_GROUP,OWN_CODE,OCC_CODE,OCC_TITLE,O_GROUP,TOT_EMP,EMP_PRSE,JOBS_1000,LOC_QUOTIENT,PCT_TOTAL,H_MEAN,A_MEAN,MEAN_PRSE,H_PCT10,H_PCT25,H_MEDIAN,H_PCT75,H_PCT90,A_PCT10,A_PCT25,A_MEDIAN,A_PCT75,A_PCT90,ANNUAL,HOURLY
20505,99,U.S.,1,US,221113,Nuclear Electric Power Generation,6-digit,5,00-0000,All Occupations,total,40080,5.8,,,100,48.63,101150,1.8,23.61,34.5,47.52,60.33,74.69,49100,71750,98840,125500,155360,,
20506,99,U.S.,1,US,221113,Nuclear Electric Power Generation,6-digit,5,11-0000,Management Occupations,major,3560,7.3,,,8.89,74.05,154030,2.7,43.82,56.66,71.07,87.32,#,91140,117860,147830,181630,#,,
20507,99,U.S.,1,US,221113,Nuclear Electric Power Generation,6-digit,5,11-1000,Top Executives,minor,300,9.7,,,0.76,91.25,189800,4.6,37.73,64.84,83.86,#,#,78470,134860,174420,#,#,,
20508,99,U.S.,1,US,221113,Nuclear Electric Power Generation,6-digit,5,11-1020,General and Operations Managers,broad,290,10.1,,,0.71,90,187210,4.8,38.42,64.77,82.42,#,#,79920,134720,171430,#,#,,
20509,99,U.S.,1,US,221113,Nuclear Electric Power Generation,6-digit,5,11-1021,General and Operations Managers,detailed,290,10.1,,,0.71,90,187210,4.8,38.42,64.77,82.42,#,#,79920,134720,171430,#,#,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20719,99,U.S.,1,US,221113,Nuclear Electric Power Generation,6-digit,5,53-0000,Transportation and Material Moving Occupations,major,350,4.4,,,0.86,35.13,73060,3,15.43,29.56,37.3,42.75,49.25,32100,61490,77580,88910,102450,,
20720,99,U.S.,1,US,221113,Nuclear Electric Power Generation,6-digit,5,53-7000,Material Moving Workers,minor,320,4.7,,,0.79,33.99,70690,3,14.86,28.16,36.7,41.52,47.42,30900,58580,76330,86360,98630,,
20721,99,U.S.,1,US,221113,Nuclear Electric Power Generation,6-digit,5,53-7060,Laborers and Material Movers,broad,250,5.2,,,0.61,33.66,70010,3.7,14.01,26.38,36.55,41.54,47.37,29150,54870,76020,86400,98540,,
20722,99,U.S.,1,US,221113,Nuclear Electric Power Generation,6-digit,5,53-7062,"Laborers and Freight, Stock, and Material Move...",detailed,120,6.4,,,0.29,35.36,73550,5.1,20.93,26.24,38.17,43.93,48.23,43530,54570,79400,91370,100310,,
