# Classification of brown jobs
Felix Zaussinger | 05.08.2021

## Core Analysis Goal(s)
1. Match Vona et al. 2018 SOC brown jobs to ESCO

## Key Insight(s)
1.

In [1]:
import os
import sys
import logging
from pathlib import Path

import numpy as np
import scipy as sp
import statsmodels.api as sm
from statsmodels.formula.api import ols

%load_ext autoreload
%autoreload 2

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set_context("poster")
sns.set(rc={'figure.figsize': (16, 9.)})
sns.set_style("ticks")

import pandas as pd
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)

logging.basicConfig(level=logging.INFO, stream=sys.stdout)

Define directory structure

In [2]:
# project directory
abspath = os.path.abspath('')
project_dir = str(Path(abspath).parents[0])

# sub-directories
data_raw = os.path.join(project_dir, "data", "raw")
data_interim = os.path.join(project_dir, "data", "interim")
data_processed = os.path.join(project_dir, "data", "processed")
figure_dir = os.path.join(project_dir, "reports", "figures")

Code ...

In [7]:
# vona 6-digit soc brown occupations
vona_occs = pd.read_csv(
    os.path.join(data_raw, "onet", "brown_occupations_vona2018.csv")
)

vona_occs["soc_code_padded"] = vona_occs["soc_code"] + '.00'
vona_occs

Unnamed: 0,soc_code,occupation,soc_code_padded
0,17-2041,Chemical Engineers,17-2041.00
1,17-2151,"Mining and Geological Engineers, Including Min...",17-2151.00
2,17-2171,Petroleum Engineers,17-2171.00
3,19-1012,Food Scientists and Technologists,19-1012.00
4,19-2031,Chemists,19-2031.00
5,19-4031,Chemical Technicians,19-4031.00
6,43-5041,"Meter Readers, Utilities",43-5041.00
7,45-4023,Log Graders and Scalers,45-4023.00
8,47-4071,Septic Tank Servicers and Sewer Pipe Cleaners,47-4071.00
9,47-5011,"Derrick Operators, Oil and Gas",47-5011.00


In [8]:
# crosswalk soc-esco
crosswalk = pd.read_csv(
    os.path.join(data_raw, "mcc_data", "processed", "ESCO_ONET_xwalk_full.csv"),
    index_col=0
)

crosswalk

Unnamed: 0_level_0,concept_uri,preferred_label,isco_level_4,onet_code,onet_occupation
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,http://data.europa.eu/esco/occupation/00030d09...,technical director,2166,27-1011.00,art directors
1,http://data.europa.eu/esco/occupation/000e93a3...,metal drawing machine operator,8121,51-4021.00,"extruding and drawing machine setters, operato..."
2,http://data.europa.eu/esco/occupation/0019b951...,precision device inspector,7543,51-9061.00,"inspectors, testers, sorters, samplers, and we..."
3,http://data.europa.eu/esco/occupation/0022f466...,air traffic safety technician,3155,17-3023.01,electronics engineering technicians
4,http://data.europa.eu/esco/occupation/002da35b...,hospitality revenue manager,2431,13-1161.00,market research analysts and marketing special...
...,...,...,...,...,...
2937,http://data.europa.eu/esco/occupation/ff656b3a...,demographer,2120,15-2041.00,statisticians
2938,http://data.europa.eu/esco/occupation/ff8d4065...,sorter labourer,9612,51-9199.01,recycling and reclamation workers
2939,http://data.europa.eu/esco/occupation/ffa4dd5d...,armoured car guard,5414,33-9032.00,security guards
2940,http://data.europa.eu/esco/occupation/ffade2f4...,civil service administrative officer,2422,11-3011.00,administrative services managers


In [14]:
# merge
df_merged = pd.merge(
    vona_occs,
    crosswalk,
    left_on="soc_code_padded",
    right_on="onet_code",
    how="left"
)

df_merged = df_merged.dropna(subset=["concept_uri"])

In [15]:
df_merged.to_csv(
    os.path.join(data_interim, "occupations_brown_vona_esco.csv")
)