In [1]:
from IPython.core.display import display, HTML
display(HTML('<style>.container { width:90% !important; }</style>'))

# BLS Employment Projection 2016

* In this notebook, we get together three data source of BLS (Bureau of Labor Statistics) Employment Projection:
    1. Table 1.2 Employment by detailed occupation, 2016 and projected 2026
    2. Table 1.7 Occupational projections, 2016–26, and worker characteristics, 2016
    3. Table 1.10 Occupational separations and openings, projected 2016–26
---


* The aim is to generate a complete dataset where we have related parts of BLS Employment Projection
* The (updated 2019) tables can be obtained from the [link](https://www.bls.gov/emp/data/occupational-data.htm).


    
    

In [2]:
import pandas as pd
import numpy as np

## Employment by Detailed Occupation

* Read the Employment by Detailed Occupation
* The BLS detailed occupations are the main source of O*NET structure. For this reason it is important to have the employment projection for those titles.

In [3]:
df_employment = pd.read_excel('csv_files/occupation.xlsx', sheet_name='Table 1.2')
df_employment.head()

FileNotFoundError: [Errno 2] No such file or directory: 'csv_files/occupation.xlsx'

In [5]:
columns = ['bls_title', 'onetsoccode', 'type', 'emp_number_2016',
           'emp_number_2026', 'per_distr_2016', 'per_distr_2026',
           'chng_number', 'chng_perc', 'open_average']
df_employment.columns = columns

In [6]:
df_employment.head(7)

Unnamed: 0,bls_title,onetsoccode,type,emp_number_2016,emp_number_2026,per_distr_2016,per_distr_2026,chng_number,chng_perc,open_average
0,2016 National Employment Matrix title and code,,Occupation type,Employment,,,,"Change, 2016-26",,"Occupational openings, 2016-26 annual average"
1,,,,Number,,Percent distribution,,,,
2,,,,2016,2026.0,2016,2026.0,Number,Percent,
3,"Total, all occupations",00-0000,Summary,156064,167582.3,100,100.0,11518.6,7.4,18742
4,Management occupations,11-0000,Summary,9533.1,10340.4,6.1,6.2,807.3,8.5,841.5
5,Top executives,11-1000,Summary,2627.5,2824.5,1.7,1.7,197,7.5,235
6,Chief executives,11-1011,Line item,308.9,296.8,0.2,0.2,-12.1,-3.9,20


In [7]:
df_employment = df_employment.iloc[4:]
df_employment.head()

Unnamed: 0,bls_title,onetsoccode,type,emp_number_2016,emp_number_2026,per_distr_2016,per_distr_2026,chng_number,chng_perc,open_average
4,Management occupations,11-0000,Summary,9533.1,10340.4,6.1,6.2,807.3,8.5,841.5
5,Top executives,11-1000,Summary,2627.5,2824.5,1.7,1.7,197.0,7.5,235.0
6,Chief executives,11-1011,Line item,308.9,296.8,0.2,0.2,-12.1,-3.9,20.0
7,General and operations managers,11-1021,Line item,2263.1,2468.3,1.5,1.5,205.2,9.1,210.7
8,Legislators,11-1031,Line item,55.5,59.4,0.0,0.0,3.9,7.1,4.4


In [8]:
summary_idx = df_employment[df_employment.type == 'Summary'].index
df_employment.drop(summary_idx, axis=0, inplace=True)
df_employment.drop('type',  axis=1, inplace=True)
df_employment.head(3)

Unnamed: 0,bls_title,onetsoccode,emp_number_2016,emp_number_2026,per_distr_2016,per_distr_2026,chng_number,chng_perc,open_average
6,Chief executives,11-1011,308.9,296.8,0.2,0.2,-12.1,-3.9,20.0
7,General and operations managers,11-1021,2263.1,2468.3,1.5,1.5,205.2,9.1,210.7
8,Legislators,11-1031,55.5,59.4,0.0,0.0,3.9,7.1,4.4


In [9]:
df_employment.fillna(' ', inplace=True)
empty_code_idx = df_employment[df_employment.onetsoccode == ' '].index
df_employment.drop(empty_code_idx, axis=0, inplace=True)

In [10]:
df_employment.reset_index(drop=True, inplace=True)
df_employment.head()

Unnamed: 0,bls_title,onetsoccode,emp_number_2016,emp_number_2026,per_distr_2016,per_distr_2026,chng_number,chng_perc,open_average
0,Chief executives,11-1011,308.9,296.8,0.2,0.2,-12.1,-3.9,20.0
1,General and operations managers,11-1021,2263.1,2468.3,1.5,1.5,205.2,9.1,210.7
2,Legislators,11-1031,55.5,59.4,0.0,0.0,3.9,7.1,4.4
3,Advertising and promotions managers,11-2011,31.3,33.0,0.0,0.0,1.7,5.5,3.4
4,Marketing managers,11-2021,218.3,240.4,0.1,0.1,22.1,10.1,21.3


## Occupational Projections 2016 - 2026

In [11]:
df_projection = pd.read_excel('csv_files/occupation.xlsx', sheet_name='Table 1.7')
columns = ['bls_title', 'onetsoccode', 'type', 'emp_number_2016',
           'emp_number_2026', 'chng_number', 'chng_perc', 'self_employed',
           'open_average', 'median_wage', 'education', 'experience', 'training']
df_projection.columns = columns
df_projection.head()

Unnamed: 0,bls_title,onetsoccode,type,emp_number_2016,emp_number_2026,chng_number,chng_perc,self_employed,open_average,median_wage,education,experience,training
0,2016 National Employment Matrix title and code,,Occupation\ntype,Employment,,"Employment Change, 2016-26",,"Percent self employed, 2016","Occupational openings, 2016-26 annual average","Median annual wage, 2017(1)",Typical education needed for entry,Work experience in a related occupation,Typical on-the-job training needed to attain c...
1,,,,2016,2026.0,Number,Percent,,,,,,
2,"Total, all occupations",00-0000,Summary,156064,167582.3,11518.6,7.4,6.1,18742,37690,—,—,—
3,Management occupations,11-0000,Summary,9533.1,10340.4,807.3,8.5,19.8,841.5,102590,—,—,—
4,Top executives,11-1000,Summary,2627.5,2824.5,197,7.5,3.2,235,103120,—,—,—


In [12]:
df_projection = df_projection.iloc[3:]
df_projection.fillna(' ', inplace=True)
summary_idx = df_projection[df_projection.type == 'Summary'].index
empty_code_idx = df_projection[df_projection.onetsoccode == ' '].index
df_projection.drop(summary_idx, axis=0, inplace=True)
df_projection.drop(empty_code_idx, axis=0, inplace=True)
df_projection.drop('type', axis=1, inplace=True)
df_projection.head()

Unnamed: 0,bls_title,onetsoccode,emp_number_2016,emp_number_2026,chng_number,chng_perc,self_employed,open_average,median_wage,education,experience,training
5,Chief executives,11-1011,308.9,296.8,-12.1,-3.9,22.8,20.0,183270,Bachelor's degree,5 years or more,
6,General and operations managers,11-1021,2263.1,2468.3,205.2,9.1,0.6,210.7,100410,Bachelor's degree,5 years or more,
7,Legislators,11-1031,55.5,59.4,3.9,7.1,—,4.4,25630,Bachelor's degree,Less than 5 years,
9,Advertising and promotions managers,11-2011,31.3,33.0,1.7,5.5,5.2,3.4,106130,Bachelor's degree,Less than 5 years,
11,Marketing managers,11-2021,218.3,240.4,22.1,10.1,3.5,21.3,132230,Bachelor's degree,5 years or more,


In [13]:
education_unique = sorted(df_projection.education.unique())
education_numeric = [3, 5, 8, 2, 7, 1, 6, 4]
for val, num in zip(education_unique, education_numeric):
    print(val + '--> ' + str(num))

Associate's degree--> 3
Bachelor's degree--> 5
Doctoral or professional degree--> 8
High school diploma or equivalent--> 2
Master's degree--> 7
No formal educational credential--> 1
Postsecondary nondegree award--> 6
Some college, no degree--> 4


In [14]:
df_projection.education.replace(to_replace=education_unique, 
                                value=education_numeric, 
                                inplace=True)

In [15]:
experience_unique = sorted(df_projection.experience.unique())
experience_numeric = [3, 2, 1]
for val, num in zip(experience_unique, experience_numeric):
    print(val + '--> ' + str(num))

5 years or more--> 3
Less than 5 years--> 2
None--> 1


In [16]:
df_projection.experience.replace(to_replace=experience_unique,
                                 value=experience_numeric,
                                 inplace=True)

In [17]:
training_unique = sorted(df_projection.training.unique())
training_numeric = [2, 3, 6, 5, 1, 4]
for val, num in zip(training_unique, training_numeric):
    print(val + '--> ' + str(num))

Apprenticeship--> 2
Internship/residency--> 3
Long-term on-the-job training--> 6
Moderate-term on-the-job training--> 5
None--> 1
Short-term on-the-job training--> 4


In [18]:
df_projection.training.replace(to_replace=training_unique,
                               value=training_numeric,
                               inplace=True)

In [19]:
df_projection.head()

Unnamed: 0,bls_title,onetsoccode,emp_number_2016,emp_number_2026,chng_number,chng_perc,self_employed,open_average,median_wage,education,experience,training
5,Chief executives,11-1011,308.9,296.8,-12.1,-3.9,22.8,20.0,183270,5,3,1
6,General and operations managers,11-1021,2263.1,2468.3,205.2,9.1,0.6,210.7,100410,5,3,1
7,Legislators,11-1031,55.5,59.4,3.9,7.1,—,4.4,25630,5,2,1
9,Advertising and promotions managers,11-2011,31.3,33.0,1.7,5.5,5.2,3.4,106130,5,2,1
11,Marketing managers,11-2021,218.3,240.4,22.1,10.1,3.5,21.3,132230,5,3,1


## Occupational Separations and Openings

In [20]:
df_openings = pd.read_excel('csv_files/occupation.xlsx', sheet_name='Table 1.10')
columns = ['bls_title', 'onetsoccode', 'type', 'emp_number_2016',
           'emp_number_2026', 'chng_number', 'chng_perc',
           'lab_force_exit_rate', 'occ_transfer_rate', 'total_rate',
           'lab_force_exit_number', 'occ_transfer_number',
           'total_number', 'open_average']
df_openings.columns = columns
df_openings.head()

Unnamed: 0,bls_title,onetsoccode,type,emp_number_2016,emp_number_2026,chng_number,chng_perc,lab_force_exit_rate,occ_transfer_rate,total_rate,lab_force_exit_number,occ_transfer_number,total_number,open_average
0,2016 National Employment Matrix title and code,,Occupation\ntype,Employment,,"Employment Change, 2016-26",,"Occupational separations rate, 2016-26 annual ...",,,"Occupational separations, 2016-26 annual average",,,"Occupational openings, 2016-26 annual average"
1,,,,2016,2026.0,Number,Percent,Labor force exits,Occupational transfers,Total,Labor force exits,Occupational transfers,Total,
2,"Total, all occupations",00-0000,Summary,156064,167582.3,11518.6,7.4,4.7,6.2,10.9,7548.6,10041.5,17590.2,18742
3,Management occupations,11-0000,Summary,9533.1,10340.4,807.3,8.5,2.8,4.8,7.7,281.6,479.1,760.8,841.5
4,Top executives,11-1000,Summary,2627.5,2824.5,197,7.5,2.2,5.7,7.9,59.3,156,215.3,235


In [21]:
df_openings = df_openings.iloc[3:]
df_openings.fillna(' ', inplace=True)
summary_idx = df_openings[df_openings.type == 'Summary'].index
empty_code_idx = df_openings[df_openings.onetsoccode == ' '].index
df_openings.drop(summary_idx, axis=0, inplace=True)
df_openings.drop(empty_code_idx, axis=0, inplace=True)
df_openings.drop('type', axis=1, inplace=True)
df_openings.reset_index(drop=True, inplace=True)
df_openings.head()

Unnamed: 0,bls_title,onetsoccode,emp_number_2016,emp_number_2026,chng_number,chng_perc,lab_force_exit_rate,occ_transfer_rate,total_rate,lab_force_exit_number,occ_transfer_number,total_number,open_average
0,Chief executives,11-1011,308.9,296.8,-12.1,-3.9,2.9,4.1,7.0,8.7,12.4,21.2,20.0
1,General and operations managers,11-1021,2263.1,2468.3,205.2,9.1,2.1,6.0,8.0,49.0,141.1,190.2,210.7
2,Legislators,11-1031,55.5,59.4,3.9,7.1,2.7,4.2,6.9,1.6,2.4,4.0,4.4
3,Advertising and promotions managers,11-2011,31.3,33.0,1.7,5.5,2.6,7.5,10.1,0.8,2.4,3.2,3.4
4,Marketing managers,11-2021,218.3,240.4,22.1,10.1,2.3,6.1,8.3,5.2,13.9,19.1,21.3


## Merge all files

In [22]:
emp_proj = set(df_employment.columns).intersection(df_projection.columns)
df_final = df_employment.merge(df_projection, on=list(emp_proj), validate='one_to_one')

In [23]:
df_final

Unnamed: 0,bls_title,onetsoccode,emp_number_2016,emp_number_2026,per_distr_2016,per_distr_2026,chng_number,chng_perc,open_average,self_employed,median_wage,education,experience,training
0,Chief executives,11-1011,308.9,296.8,0.2,0.2,-12.1,-3.9,20,22.8,183270,5,3,1
1,General and operations managers,11-1021,2263.1,2468.3,1.5,1.5,205.2,9.1,210.7,0.6,100410,5,3,1
2,Legislators,11-1031,55.5,59.4,0,0,3.9,7.1,4.4,—,25630,5,2,1
3,Advertising and promotions managers,11-2011,31.3,33,0,0,1.7,5.5,3.4,5.2,106130,5,2,1
4,Marketing managers,11-2021,218.3,240.4,0.1,0.1,22.1,10.1,21.3,3.5,132230,5,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
814,Wellhead pumpers,53-7073,11.5,14,0,0,2.5,21.7,1.8,0.5,52260,2,2,5
815,Refuse and recyclable material collectors,53-7081,136,153.9,0.1,0.1,17.9,13.2,18.9,14.1,36160,1,1,4
816,Mine shuttle car operators,53-7111,1.5,1.2,0,0,-0.3,-21.9,0.1,—,56890,1,1,4
817,"Tank car, truck, and ship loaders",53-7121,10.8,11.4,0,0,0.6,5.2,1.4,—,36860,1,1,4


In [24]:
final_open = set(df_final.columns).intersection(set(df_openings.columns))
df_final = df_final.merge(df_openings, on=list(final_open), validate='one_to_one')

In [25]:
pd.set_option('display.max_columns', None)
df_final.head()

Unnamed: 0,bls_title,onetsoccode,emp_number_2016,emp_number_2026,per_distr_2016,per_distr_2026,chng_number,chng_perc,open_average,self_employed,median_wage,education,experience,training,lab_force_exit_rate,occ_transfer_rate,total_rate,lab_force_exit_number,occ_transfer_number,total_number
0,Chief executives,11-1011,308.9,296.8,0.2,0.2,-12.1,-3.9,20.0,22.8,183270,5,3,1,2.9,4.1,7.0,8.7,12.4,21.2
1,General and operations managers,11-1021,2263.1,2468.3,1.5,1.5,205.2,9.1,210.7,0.6,100410,5,3,1,2.1,6.0,8.0,49.0,141.1,190.2
2,Legislators,11-1031,55.5,59.4,0.0,0.0,3.9,7.1,4.4,—,25630,5,2,1,2.7,4.2,6.9,1.6,2.4,4.0
3,Advertising and promotions managers,11-2011,31.3,33.0,0.0,0.0,1.7,5.5,3.4,5.2,106130,5,2,1,2.6,7.5,10.1,0.8,2.4,3.2
4,Marketing managers,11-2021,218.3,240.4,0.1,0.1,22.1,10.1,21.3,3.5,132230,5,3,1,2.3,6.1,8.3,5.2,13.9,19.1


In [26]:
df_final.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 819 entries, 0 to 818
Data columns (total 20 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   bls_title              819 non-null    object
 1   onetsoccode            819 non-null    object
 2   emp_number_2016        819 non-null    object
 3   emp_number_2026        819 non-null    object
 4   per_distr_2016         819 non-null    object
 5   per_distr_2026         819 non-null    object
 6   chng_number            819 non-null    object
 7   chng_perc              819 non-null    object
 8   open_average           819 non-null    object
 9   self_employed          819 non-null    object
 10  median_wage            819 non-null    object
 11  education              819 non-null    int64 
 12  experience             819 non-null    int64 
 13  training               819 non-null    int64 
 14  lab_force_exit_rate    819 non-null    object
 15  occ_transfer_rate      

In [27]:
df_final.replace('—', 0, inplace=True)

In [28]:
columns = df_final.columns.drop(['bls_title', 'onetsoccode'])
df_final.median_wage = np.where(df_final.median_wage == '>=$208,000',
                                208000, df_final.median_wage)
df_final[columns] = df_final[columns].apply(pd.to_numeric)

In [29]:
def add_00(s):
    return s + '.00'
df_final.onetsoccode = df_final.onetsoccode.apply(add_00)

In [30]:
df_final.head()

Unnamed: 0,bls_title,onetsoccode,emp_number_2016,emp_number_2026,per_distr_2016,per_distr_2026,chng_number,chng_perc,open_average,self_employed,median_wage,education,experience,training,lab_force_exit_rate,occ_transfer_rate,total_rate,lab_force_exit_number,occ_transfer_number,total_number
0,Chief executives,11-1011.00,308.9,296.8,0.2,0.2,-12.1,-3.9,20.0,22.8,183270,5,3,1,2.9,4.1,7.0,8.7,12.4,21.2
1,General and operations managers,11-1021.00,2263.1,2468.3,1.5,1.5,205.2,9.1,210.7,0.6,100410,5,3,1,2.1,6.0,8.0,49.0,141.1,190.2
2,Legislators,11-1031.00,55.5,59.4,0.0,0.0,3.9,7.1,4.4,0.0,25630,5,2,1,2.7,4.2,6.9,1.6,2.4,4.0
3,Advertising and promotions managers,11-2011.00,31.3,33.0,0.0,0.0,1.7,5.5,3.4,5.2,106130,5,2,1,2.6,7.5,10.1,0.8,2.4,3.2
4,Marketing managers,11-2021.00,218.3,240.4,0.1,0.1,22.1,10.1,21.3,3.5,132230,5,3,1,2.3,6.1,8.3,5.2,13.9,19.1


In [31]:
df_final.to_csv('bls_projection.csv')