# Compute transition numbers for IEA scenario occupations
Felix Zaussinger | 20.08.2021

## Core Analysis Goal(s)
1. Map transition pathways based on IEA scenario

## Key Insight(s)
1.

In [1]:
import os
import sys
import logging
from pathlib import Path

import numpy as np
import scipy as sp
import statsmodels.api as sm
from statsmodels.formula.api import ols

%load_ext autoreload
%autoreload 2

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set_context("paper")
#sns.set(rc={'figure.figsize': (16, 9.)})
sns.set_style("ticks")

import pandas as pd
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)

from tqdm import tqdm
from scipy.spatial.distance import pdist, squareform

logging.basicConfig(level=logging.INFO, stream=sys.stdout)

Define directory structure

In [2]:
# project directory
abspath = os.path.abspath('')
project_dir = str(Path(abspath).parents[0])

# sub-directories
data_raw = os.path.join(project_dir, "data", "raw")
data_interim = os.path.join(project_dir, "data", "interim")
data_processed = os.path.join(project_dir, "data", "processed")
figure_dir = os.path.join(project_dir, "reports", "figures")

ESCO data set components

In [3]:
occ = pd.read_csv(os.path.join(data_raw, "esco", "v1.0.3", "occupations_en.csv"))
skill_groups = pd.read_csv(os.path.join(data_raw, "esco", "v1.0.3", "skillGroups_en.csv"))
skills = pd.read_csv(os.path.join(data_raw, "esco", "v1.0.3", "skills_en.csv"))
occ_skills_mapping = pd.read_csv(os.path.join(data_raw, "esco", "v1.0.3", "occupationSkillRelations.csv"))

In [29]:
# metadata
df_metadata = pd.read_csv(
    os.path.join(data_interim, "ESCO_ONET_METADATA_gbn.csv"),
    index_col=0
)

df_metadata = df_metadata.set_index("id")

Unnamed: 0_level_0,concept_uri,preferred_label,isco_level_4,onet_code,onet_occupation,isco_level_1,isco_level_2,isco_level_3,occupation_type,n_new_green_tasks,n_existing_green_tasks,n_non_green_tasks,greenness_vona_2018,greenness_vona_2018_v2,total_paid_hours,annual_earnings,employment_share,employment_count,job_zone,education_level,related_work_experience,on_the_job_training,skill_level_ilo,new_jobs_absorbing_ilo_2030_es,new_jobs_net_ilo_2030_es,jobs_lost_net_ilo_2030_es,jobs_lost_reallocated_ilo_2030_es,net_change_ilo_2030_es,new_jobs_absorbing_ilo_2030_ce,new_jobs_net_ilo_2030_ce,jobs_lost_net_ilo_2030_ce,jobs_lost_reallocated_ilo_2030_ce,net_change_ilo_2030_ce,is_brown,is_green,is_neutral
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1
0,http://data.europa.eu/esco/occupation/00030d09...,technical director,2166,27-1011.00,art directors,2,21,216,Non Green,,,,,0.0000,36.670202,37555.308176,,,4.0,6.2477,8.0309,4.3843,H,500000.0,263000.0,-253000.0,-500000.0,10000.0,1379000.0,1274000.0,-397000.0,-1379000.0,877000.0,False,False,True
1,http://data.europa.eu/esco/occupation/000e93a3...,metal drawing machine operator,8121,51-4021.00,"extruding and drawing machine setters, operato...",8,81,812,Non Green,,,,,0.0000,39.904110,22031.963636,0.000020,645.0,2.0,2.1163,2.4383,3.6486,M,339000.0,712000.0,-34000.0,-339000.0,678000.0,2913000.0,500.0,-5589000.0,-2913000.0,-5588500.0,True,False,False
2,http://data.europa.eu/esco/occupation/0019b951...,precision device inspector,7543,51-9061.00,"inspectors, testers, sorters, samplers, and we...",7,75,754,Green Enhanced Skills,2.0,0.0,30.0,0.066667,0.0625,38.709386,22430.367647,,,2.0,2.5397,4.8863,4.0288,M,48000.0,316000.0,-500.0,-48000.0,315500.0,1315000.0,408000.0,-440000.0,-1315000.0,-32000.0,False,True,False
3,http://data.europa.eu/esco/occupation/0022f466...,air traffic safety technician,3155,17-3023.01,electronics engineering technicians,3,31,315,Non Green,,,,,0.0000,42.036716,32754.000000,0.000087,2797.0,3.0,4.3848,5.4808,4.0782,H,748000.0,528000.0,-206000.0,-748000.0,322000.0,3042000.0,1225000.0,-965000.0,-3042000.0,260000.0,False,False,True
4,http://data.europa.eu/esco/occupation/002da35b...,hospitality revenue manager,2431,13-1161.00,market research analysts and marketing special...,2,24,243,Non Green,,,,,0.0000,36.354676,38839.816377,0.000648,20834.0,4.0,6.8261,6.3481,3.8265,H,218000.0,147000.0,-75000.0,-218000.0,72000.0,823000.0,837000.0,-53000.0,-823000.0,784000.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2937,http://data.europa.eu/esco/occupation/ff656b3a...,demographer,2120,15-2041.00,statisticians,2,21,212,Non Green,,,,,0.0000,35.494487,36135.290249,0.000286,9184.0,5.0,8.3000,5.5000,4.6000,H,500000.0,263000.0,-253000.0,-500000.0,10000.0,1379000.0,1274000.0,-397000.0,-1379000.0,877000.0,False,False,True
2938,http://data.europa.eu/esco/occupation/ff8d4065...,sorter labourer,9612,51-9199.01,recycling and reclamation workers,9,96,961,New Green N&E,18.0,0.0,0.0,1.000000,1.0000,39.443972,21357.054545,0.000417,13414.0,2.0,3.1532,3.3559,2.8314,L,153000.0,305000.0,-17000.0,-153000.0,288000.0,860000.0,1152000.0,-111000.0,-860000.0,1041000.0,False,True,False
2939,http://data.europa.eu/esco/occupation/ffa4dd5d...,armoured car guard,5414,33-9032.00,security guards,5,54,541,Non Green,,,,,0.0000,37.021429,24315.540984,,,2.0,2.2215,3.2642,2.0230,M,141000.0,179000.0,-27000.0,-141000.0,152000.0,744000.0,579000.0,-497000.0,-744000.0,82000.0,False,False,True
2940,http://data.europa.eu/esco/occupation/ffade2f4...,civil service administrative officer,2422,11-3011.00,administrative services managers,2,24,242,Non Green,,,,,0.0000,35.240566,33292.986333,0.001128,36261.0,3.0,3.9636,6.9711,3.4248,H,218000.0,147000.0,-75000.0,-218000.0,72000.0,823000.0,837000.0,-53000.0,-823000.0,784000.0,False,False,True


In [30]:
df_iea = pd.read_excel(
    os.path.join(data_raw, "iea_nze_2050", "NZE2021_AnnexA_occupation_matching_merged.xlsx"),
    sheet_name="Occupations_v2",
    header=1,
    #dtype={"id": np.int16, "isco_level_4": np.int16}
)

Unnamed: 0,id,concept_uri,preferred_label,isco_level_4,nze_category_1,nze_category_2,nze_category_3,nze_category_4,nze_category_5,nze_category_6,nze_category_7,nze_category_8,nze_category_9,nze_category_10,nze_category_11,nze_category_12,tes_share_2019,tes_share_2020,tes_share_2030,tes_share_2040,tes_share_2050,tes_2019,tes_2020,tes_2030,tes_2040,tes_2050,tes_pc_2019_2020,tes_pc_2019_2030,tes_pc_2019_2040,tes_pc_2019_2050
0,794,http://data.europa.eu/esco/occupation/42dbb769...,solar energy engineer,2149,Solar,,,,,,,,,,,,0.006536,0.008518,0.058501,0.145794,0.200737,4,5,32,78,109,0.25,7.0,18.5,26.25
1,2914,http://data.europa.eu/esco/occupation/fd4b90ed...,solar energy sales consultant,2433,Solar,,,,,,,,,,,,0.006536,0.008518,0.058501,0.145794,0.200737,4,5,32,78,109,0.25,7.0,18.5,26.25
2,1305,http://data.europa.eu/esco/occupation/6d823dbb...,solar power plant operator,3131,Solar,,,,,,,,,,,,0.006536,0.008518,0.058501,0.145794,0.200737,4,5,32,78,109,0.25,7.0,18.5,26.25
3,1407,http://data.europa.eu/esco/occupation/75b63949...,solar energy technician,7411,Solar,,,,,,,,,,,,0.006536,0.008518,0.058501,0.145794,0.200737,4,5,32,78,109,0.25,7.0,18.5,26.25
4,340,http://data.europa.eu/esco/occupation/1b99cad3...,wind energy engineer,2149,Wind,,,,,,,,,,,,0.00817,0.010221,0.053016,0.125234,0.163904,5,6,29,67,89,0.2,4.8,12.4,16.8
5,1817,http://data.europa.eu/esco/occupation/98562ce3...,wind turbine technician,7412,Wind,,,,,,,,,,,,0.00817,0.010221,0.053016,0.125234,0.163904,5,6,29,67,89,0.2,4.8,12.4,16.8
6,2312,http://data.europa.eu/esco/occupation/c3959398...,geothermal power plant operator,3131,Other renewables,,,,,,,,,,,,0.006536,0.008518,0.023766,0.04486,0.058932,4,5,13,24,32,0.25,2.25,5.0,7.0
7,181,http://data.europa.eu/esco/occupation/0e99c929...,renewable energy engineer,2149,Solar,Wind,Hydro,Modern bioenergy,Other renewables,,,,,,,,0.106209,0.117547,0.303473,0.547664,0.666667,65,69,166,293,362,0.061538,1.553846,3.507692,4.569231
8,215,http://data.europa.eu/esco/occupation/114e1eff...,renewable energy consultant,2433,Solar,Wind,Hydro,Modern bioenergy,Other renewables,,,,,,,,0.106209,0.117547,0.303473,0.547664,0.666667,65,69,166,293,362,0.061538,1.553846,3.507692,4.569231
9,2932,http://data.europa.eu/esco/occupation/ff3a164d...,renewable energy sales representative,3322,Solar,Wind,Hydro,Modern bioenergy,Other renewables,,,,,,,,0.106209,0.117547,0.303473,0.547664,0.666667,65,69,166,293,362,0.061538,1.553846,3.507692,4.569231


In [34]:
# left-join
df_iea_merged = pd.merge(
    df_iea,
    df_metadata[["concept_uri", "annual_earnings", "job_zone"]],
    on="concept_uri",
    how="left"
)

In [35]:
df_iea_merged_falling = df_iea_merged.loc[df_iea["tes_pc_2019_2030"] < 0]
df_iea_merged_falling

Unnamed: 0,id,concept_uri,preferred_label,isco_level_4,nze_category_1,nze_category_2,nze_category_3,nze_category_4,nze_category_5,nze_category_6,nze_category_7,nze_category_8,nze_category_9,nze_category_10,nze_category_11,nze_category_12,tes_share_2019,tes_share_2020,tes_share_2030,tes_share_2040,tes_share_2050,tes_2019,tes_2020,tes_2030,tes_2040,tes_2050,tes_pc_2019_2020,tes_pc_2019_2030,tes_pc_2019_2040,tes_pc_2019_2050,annual_earnings,job_zone
25,2108,http://data.europa.eu/esco/occupation/b2cede50...,energy engineer,2149,Solar,Wind,Hydro,Modern bioenergy,Other renewables,Traditional use of biomass,Nuclear,Unabated natural gas,Natural gas with CCUS,Oil,Unabated coal,Coal with CCUS,0.995098,1.0,0.996344,0.996262,0.998158,609,587,545,533,542,-0.036125,-0.10509,-0.124795,-0.110016,35911.871608,4.0
26,2598,http://data.europa.eu/esco/occupation/de74eb4e...,energy consultant,3112,Solar,Wind,Hydro,Modern bioenergy,Other renewables,Traditional use of biomass,Nuclear,Unabated natural gas,Natural gas with CCUS,Oil,Unabated coal,Coal with CCUS,0.995098,1.0,0.996344,0.996262,0.998158,609,587,545,533,542,-0.036125,-0.10509,-0.124795,-0.110016,31261.073394,3.0
27,1361,http://data.europa.eu/esco/occupation/72381086...,power production plant operator,3131,Solar,Wind,Hydro,Modern bioenergy,Other renewables,Traditional use of biomass,Nuclear,Unabated natural gas,Natural gas with CCUS,Oil,Unabated coal,Coal with CCUS,0.995098,1.0,0.996344,0.996262,0.998158,609,587,545,533,542,-0.036125,-0.10509,-0.124795,-0.110016,21750.916667,2.0
28,390,http://data.europa.eu/esco/occupation/1ff61522...,energy systems engineer,2149,Solar,Wind,Hydro,Modern bioenergy,Other renewables,Traditional use of biomass,Nuclear,Unabated natural gas,Natural gas with CCUS,Oil,Unabated coal,Coal with CCUS,0.995098,1.0,0.996344,0.996262,0.998158,609,587,545,533,542,-0.036125,-0.10509,-0.124795,-0.110016,35911.871608,4.0
29,2412,http://data.europa.eu/esco/occupation/cbde1a3a...,energy analyst,3112,Solar,Wind,Hydro,Modern bioenergy,Other renewables,Traditional use of biomass,Nuclear,Unabated natural gas,Natural gas with CCUS,Oil,Unabated coal,Coal with CCUS,0.995098,1.0,0.996344,0.996262,0.998158,609,587,545,533,542,-0.036125,-0.10509,-0.124795,-0.110016,31261.073394,3.0
30,2404,http://data.europa.eu/esco/occupation/cb446db8...,power plant manager,1219,Solar,Wind,Hydro,Modern bioenergy,Other renewables,Traditional use of biomass,Nuclear,Unabated natural gas,Natural gas with CCUS,Oil,Unabated coal,Coal with CCUS,0.995098,1.0,0.996344,0.996262,0.998158,609,587,545,533,542,-0.036125,-0.10509,-0.124795,-0.110016,34973.799762,4.0
31,612,http://data.europa.eu/esco/occupation/3288a2fe...,utilities inspector,3119,Solar,Wind,Hydro,Modern bioenergy,Other renewables,Traditional use of biomass,Nuclear,Unabated natural gas,Natural gas with CCUS,Oil,Unabated coal,Coal with CCUS,0.995098,1.0,0.996344,0.996262,0.998158,609,587,545,533,542,-0.036125,-0.10509,-0.124795,-0.110016,27329.903418,3.0
32,1894,http://data.europa.eu/esco/occupation/a005f8a2...,energy manager,1349,Solar,Wind,Hydro,Modern bioenergy,Other renewables,Traditional use of biomass,Nuclear,Unabated natural gas,Natural gas with CCUS,Oil,Unabated coal,Coal with CCUS,0.995098,1.0,0.996344,0.996262,0.998158,609,587,545,533,542,-0.036125,-0.10509,-0.124795,-0.110016,42935.325666,4.0
33,2792,http://data.europa.eu/esco/occupation/f33a70bd...,power plant control room operator,3131,Solar,Wind,Hydro,Modern bioenergy,Other renewables,Traditional use of biomass,Nuclear,Unabated natural gas,Natural gas with CCUS,Oil,Unabated coal,Coal with CCUS,0.995098,1.0,0.996344,0.996262,0.998158,609,587,545,533,542,-0.036125,-0.10509,-0.124795,-0.110016,21750.916667,2.0
34,846,http://data.europa.eu/esco/occupation/47dd863b...,liquid fuel engineer,2146,Oil,Unabated natural gas,Natural gas with CCUS,Modern bioenergy,,,,,,,,,0.598039,0.591141,0.616088,0.469159,0.375691,366,347,337,251,204,-0.051913,-0.079235,-0.314208,-0.442623,36554.033333,4.0


In [24]:
# nesta report
sim_kanders = np.load(
    os.path.join(data_raw, "mcc_data", "processed", "sim_matrices", "OccupationSimilarity_Combined.npy")
)

np.fill_diagonal(sim_kanders, 0)

In [54]:
x, y = np.meshgrid(df_iea_merged.id.values, df_iea_merged.id.values)
sim_kanders_sub = sim_kanders[x, y]

Unnamed: 0,id,concept_uri,preferred_label,isco_level_4,nze_category_1,nze_category_2,nze_category_3,nze_category_4,nze_category_5,nze_category_6,nze_category_7,nze_category_8,nze_category_9,nze_category_10,nze_category_11,nze_category_12,tes_share_2019,tes_share_2020,tes_share_2030,tes_share_2040,tes_share_2050,tes_2019,tes_2020,tes_2030,tes_2040,tes_2050,tes_pc_2019_2020,tes_pc_2019_2030,tes_pc_2019_2040,tes_pc_2019_2050,annual_earnings,job_zone
0,794,http://data.europa.eu/esco/occupation/42dbb769...,solar energy engineer,2149,Solar,,,,,,,,,,,,0.006536,0.008518,0.058501,0.145794,0.200737,4,5,32,78,109,0.25,7.0,18.5,26.25,35911.871608,4.0
1,2914,http://data.europa.eu/esco/occupation/fd4b90ed...,solar energy sales consultant,2433,Solar,,,,,,,,,,,,0.006536,0.008518,0.058501,0.145794,0.200737,4,5,32,78,109,0.25,7.0,18.5,26.25,41242.61354,3.0
2,1305,http://data.europa.eu/esco/occupation/6d823dbb...,solar power plant operator,3131,Solar,,,,,,,,,,,,0.006536,0.008518,0.058501,0.145794,0.200737,4,5,32,78,109,0.25,7.0,18.5,26.25,21750.916667,2.0
3,1407,http://data.europa.eu/esco/occupation/75b63949...,solar energy technician,7411,Solar,,,,,,,,,,,,0.006536,0.008518,0.058501,0.145794,0.200737,4,5,32,78,109,0.25,7.0,18.5,26.25,32805.0,3.0
4,340,http://data.europa.eu/esco/occupation/1b99cad3...,wind energy engineer,2149,Wind,,,,,,,,,,,,0.00817,0.010221,0.053016,0.125234,0.163904,5,6,29,67,89,0.2,4.8,12.4,16.8,35911.871608,4.0
5,1817,http://data.europa.eu/esco/occupation/98562ce3...,wind turbine technician,7412,Wind,,,,,,,,,,,,0.00817,0.010221,0.053016,0.125234,0.163904,5,6,29,67,89,0.2,4.8,12.4,16.8,32003.736923,3.0
6,2312,http://data.europa.eu/esco/occupation/c3959398...,geothermal power plant operator,3131,Other renewables,,,,,,,,,,,,0.006536,0.008518,0.023766,0.04486,0.058932,4,5,13,24,32,0.25,2.25,5.0,7.0,21750.916667,2.0
7,181,http://data.europa.eu/esco/occupation/0e99c929...,renewable energy engineer,2149,Solar,Wind,Hydro,Modern bioenergy,Other renewables,,,,,,,,0.106209,0.117547,0.303473,0.547664,0.666667,65,69,166,293,362,0.061538,1.553846,3.507692,4.569231,35911.871608,4.0
8,215,http://data.europa.eu/esco/occupation/114e1eff...,renewable energy consultant,2433,Solar,Wind,Hydro,Modern bioenergy,Other renewables,,,,,,,,0.106209,0.117547,0.303473,0.547664,0.666667,65,69,166,293,362,0.061538,1.553846,3.507692,4.569231,41242.61354,4.0
9,2932,http://data.europa.eu/esco/occupation/ff3a164d...,renewable energy sales representative,3322,Solar,Wind,Hydro,Modern bioenergy,Other renewables,,,,,,,,0.106209,0.117547,0.303473,0.547664,0.666667,65,69,166,293,362,0.061538,1.553846,3.507692,4.569231,33113.643212,4.0


## Determine closest matches based on skill similarity

In [55]:
# source: https://github.com/nestauk/mapping-career-causeways/blob/main/mapping_career_causeways/compare_nodes_utils.py
def find_closest(i, similarity_matrix, df):
    """
    Method for reporting the closest neighbours to a node i given a similarity matrix;
    useful during exploratory data analysis.
    Parameters
    ----------
    i (int OR None):
        Determines for which node where are assessing the closest neighbours;
        if i==None, a random node is chosen.
    similarity_matrix (numpy.ndarray):
        Similarity matrix determining the closeness between each pair of nodes.
    df (pandas.DataFrame):
        Dataframe to be used for reporting the closest neighbours; must have then
        same number of rows as the similarity matrix
    Returns
    -------
    df (pandas.DataFrame):
        The same input dataframe with an added column for similarity values
        between node i and the rest of the nodes, ordered in a descending order
        of similarity.
    """
    if type(i) == type(None):
        i = np.random.randint(similarity_matrix.shape[0])

    most_similar = np.flip(np.argsort(similarity_matrix[i,:]))
    similarity = np.flip(np.sort(similarity_matrix[i,:]))

    df = df.copy().loc[most_similar]
    df['similarity'] = similarity
    return df

## Iterate over energy sector occupations and identify transition options

In [56]:
VIABLE = 0.3
HIGHLY_VIABLE = 0.4
EARNINGS_RATIO = 0.75
JOB_ZONE_DELTA = 1

In [64]:
df_iea_merged

Unnamed: 0,id,concept_uri,preferred_label,isco_level_4,nze_category_1,nze_category_2,nze_category_3,nze_category_4,nze_category_5,nze_category_6,nze_category_7,nze_category_8,nze_category_9,nze_category_10,nze_category_11,nze_category_12,tes_share_2019,tes_share_2020,tes_share_2030,tes_share_2040,tes_share_2050,tes_2019,tes_2020,tes_2030,tes_2040,tes_2050,tes_pc_2019_2020,tes_pc_2019_2030,tes_pc_2019_2040,tes_pc_2019_2050,annual_earnings,job_zone
0,794,http://data.europa.eu/esco/occupation/42dbb769...,solar energy engineer,2149,Solar,,,,,,,,,,,,0.006536,0.008518,0.058501,0.145794,0.200737,4,5,32,78,109,0.25,7.0,18.5,26.25,35911.871608,4.0
1,2914,http://data.europa.eu/esco/occupation/fd4b90ed...,solar energy sales consultant,2433,Solar,,,,,,,,,,,,0.006536,0.008518,0.058501,0.145794,0.200737,4,5,32,78,109,0.25,7.0,18.5,26.25,41242.61354,3.0
2,1305,http://data.europa.eu/esco/occupation/6d823dbb...,solar power plant operator,3131,Solar,,,,,,,,,,,,0.006536,0.008518,0.058501,0.145794,0.200737,4,5,32,78,109,0.25,7.0,18.5,26.25,21750.916667,2.0
3,1407,http://data.europa.eu/esco/occupation/75b63949...,solar energy technician,7411,Solar,,,,,,,,,,,,0.006536,0.008518,0.058501,0.145794,0.200737,4,5,32,78,109,0.25,7.0,18.5,26.25,32805.0,3.0
4,340,http://data.europa.eu/esco/occupation/1b99cad3...,wind energy engineer,2149,Wind,,,,,,,,,,,,0.00817,0.010221,0.053016,0.125234,0.163904,5,6,29,67,89,0.2,4.8,12.4,16.8,35911.871608,4.0
5,1817,http://data.europa.eu/esco/occupation/98562ce3...,wind turbine technician,7412,Wind,,,,,,,,,,,,0.00817,0.010221,0.053016,0.125234,0.163904,5,6,29,67,89,0.2,4.8,12.4,16.8,32003.736923,3.0
6,2312,http://data.europa.eu/esco/occupation/c3959398...,geothermal power plant operator,3131,Other renewables,,,,,,,,,,,,0.006536,0.008518,0.023766,0.04486,0.058932,4,5,13,24,32,0.25,2.25,5.0,7.0,21750.916667,2.0
7,181,http://data.europa.eu/esco/occupation/0e99c929...,renewable energy engineer,2149,Solar,Wind,Hydro,Modern bioenergy,Other renewables,,,,,,,,0.106209,0.117547,0.303473,0.547664,0.666667,65,69,166,293,362,0.061538,1.553846,3.507692,4.569231,35911.871608,4.0
8,215,http://data.europa.eu/esco/occupation/114e1eff...,renewable energy consultant,2433,Solar,Wind,Hydro,Modern bioenergy,Other renewables,,,,,,,,0.106209,0.117547,0.303473,0.547664,0.666667,65,69,166,293,362,0.061538,1.553846,3.507692,4.569231,41242.61354,4.0
9,2932,http://data.europa.eu/esco/occupation/ff3a164d...,renewable energy sales representative,3322,Solar,Wind,Hydro,Modern bioenergy,Other renewables,,,,,,,,0.106209,0.117547,0.303473,0.547664,0.666667,65,69,166,293,362,0.061538,1.553846,3.507692,4.569231,33113.643212,4.0


In [65]:
df_occ = pd.DataFrame(occ.conceptUri.values[df_iea_merged.id.values], columns=["conceptUri"])

# find closest occupations
find_closest(
    i=df_iea_merged_falling.index.values[0],
    similarity_matrix=sim_kanders_sub,
    df=df_iea_merged
)

Unnamed: 0,id,concept_uri,preferred_label,isco_level_4,nze_category_1,nze_category_2,nze_category_3,nze_category_4,nze_category_5,nze_category_6,nze_category_7,nze_category_8,nze_category_9,nze_category_10,nze_category_11,nze_category_12,tes_share_2019,tes_share_2020,tes_share_2030,tes_share_2040,tes_share_2050,tes_2019,tes_2020,tes_2030,tes_2040,tes_2050,tes_pc_2019_2020,tes_pc_2019_2030,tes_pc_2019_2040,tes_pc_2019_2050,annual_earnings,job_zone,similarity
28,390,http://data.europa.eu/esco/occupation/1ff61522...,energy systems engineer,2149,Solar,Wind,Hydro,Modern bioenergy,Other renewables,Traditional use of biomass,Nuclear,Unabated natural gas,Natural gas with CCUS,Oil,Unabated coal,Coal with CCUS,0.995098,1.0,0.996344,0.996262,0.998158,609,587,545,533,542,-0.036125,-0.10509,-0.124795,-0.110016,35911.871608,4.0,0.902372
0,794,http://data.europa.eu/esco/occupation/42dbb769...,solar energy engineer,2149,Solar,,,,,,,,,,,,0.006536,0.008518,0.058501,0.145794,0.200737,4,5,32,78,109,0.25,7.0,18.5,26.25,35911.871608,4.0,0.876538
35,1482,http://data.europa.eu/esco/occupation/7bc02fa7...,gas distribution engineer,2145,Unabated natural gas,Natural gas with CCUS,,,,,,,,,,,0.227124,0.23339,0.235832,0.140187,0.110497,139,137,129,75,60,-0.014388,-0.071942,-0.460432,-0.568345,35709.341373,4.0,0.834921
13,1060,http://data.europa.eu/esco/occupation/58db3ac6...,electric power generation engineer,2151,Solar,Wind,Hydro,Modern bioenergy,Other renewables,,,,,,,,0.106209,0.117547,0.303473,0.547664,0.666667,65,69,166,293,362,0.061538,1.553846,3.507692,4.569231,36489.627397,4.0,0.798391
7,181,http://data.europa.eu/esco/occupation/0e99c929...,renewable energy engineer,2149,Solar,Wind,Hydro,Modern bioenergy,Other renewables,,,,,,,,0.106209,0.117547,0.303473,0.547664,0.666667,65,69,166,293,362,0.061538,1.553846,3.507692,4.569231,35911.871608,4.0,0.773122
36,1645,http://data.europa.eu/esco/occupation/89860c2a...,gas production engineer,2145,Unabated natural gas,Natural gas with CCUS,,,,,,,,,,,0.227124,0.23339,0.235832,0.140187,0.110497,139,137,129,75,60,-0.014388,-0.071942,-0.460432,-0.568345,35709.341373,4.0,0.764218
4,340,http://data.europa.eu/esco/occupation/1b99cad3...,wind energy engineer,2149,Wind,,,,,,,,,,,,0.00817,0.010221,0.053016,0.125234,0.163904,5,6,29,67,89,0.2,4.8,12.4,16.8,35911.871608,4.0,0.754853
15,2035,http://data.europa.eu/esco/occupation/ac37627c...,power distribution engineer,2151,Solar,Wind,Hydro,Modern bioenergy,Other renewables,,,,,,,,0.106209,0.117547,0.303473,0.547664,0.666667,65,69,166,293,362,0.061538,1.553846,3.507692,4.569231,36489.627397,4.0,0.717608
17,2774,http://data.europa.eu/esco/occupation/f14194e2...,nuclear engineer,2149,Nuclear,,,,,,,,,,,,0.04902,0.049404,0.074954,0.100935,0.112339,30,29,41,54,61,-0.033333,0.366667,0.8,1.033333,35911.871608,4.0,0.710175
14,1613,http://data.europa.eu/esco/occupation/86ca306c...,electrical engineer,2151,Solar,Wind,Hydro,Modern bioenergy,Other renewables,,,,,,,,0.106209,0.117547,0.303473,0.547664,0.666667,65,69,166,293,362,0.061538,1.553846,3.507692,4.569231,36489.627397,4.0,0.681365


In [None]:
transition_numbers_all = {}
df_occ = pd.DataFrame(occ.conceptUri.values, columns=["conceptUri"])

# iterate over mining occupations
for i in tqdm(df_metadata.index.values):

    # find closest occupations
    sim_ranking = find_closest(
        i=i,
        similarity_matrix=sim_kanders,
        df=df_occ
    )

    # print source occupation
    source_occ = df_metadata.iloc[i, :]
    #print("Source occupation: {}".format(source_occ.preferred_label))

    # merge metadata
    result = pd.merge(
        sim_ranking,
        df_metadata[[
            "concept_uri",
            "preferred_label",
            "annual_earnings",
            "is_brown",
            "is_green",
            "is_neutral",
            "job_zone"
        ]],
        left_on="conceptUri",
        right_on="concept_uri",
        how="left"
    ).drop(columns=["concept_uri"], axis=1)

    # add transitions metadata
    result["job_zone_delta"] = np.abs(source_occ.job_zone - result.job_zone)
    result["earnings_ratio"] = result.annual_earnings / source_occ.annual_earnings

    result["is_viable"] = (result.similarity >= VIABLE) & (result.job_zone_delta <= JOB_ZONE_DELTA)
    result["is_highly_viable"] = (result.similarity >= HIGHLY_VIABLE)  & (result.job_zone_delta <= JOB_ZONE_DELTA)
    result["is_desirable"] = result.is_viable & (result.earnings_ratio >= EARNINGS_RATIO)
    result["is_highly_desirable"] = result.is_highly_viable & (result.earnings_ratio >= EARNINGS_RATIO)
    #result["is_green"] = result.greenness_vona_2018_v2 > 0
    #result["is_neutral"] = (result.greenness_vona_2018_v2 == 0) & (result.is_brown == False)

    # viable transitions by gbn
    result["is_viable_brown"] = result.is_viable & (result.is_brown == True)
    result["is_viable_neutral"] = result.is_viable & (result.is_neutral == True)
    result["is_viable_green"] = result.is_viable & (result.is_green == True)

    # highly viable transitions by gbn
    result["is_highly_viable_brown"] = result.is_highly_viable & (result.is_brown == True)
    result["is_highly_viable_neutral"] = result.is_highly_viable & (result.is_neutral == True)
    result["is_highly_viable_green"] = result.is_highly_viable & (result.is_green == True)

    # desirable transitions by gbn
    result["is_desirable_brown"] = result.is_desirable & (result.is_brown == True)
    result["is_desirable_neutral"] = result.is_desirable & (result.is_neutral == True)
    result["is_desirable_green"] = result.is_desirable & (result.is_green == True)

    # highly desirable transitions by gbn
    result["is_highly_desirable_brown"] = result.is_highly_desirable & (result.is_brown == True)
    result["is_highly_desirable_neutral"] = result.is_highly_desirable & (result.is_neutral == True)
    result["is_highly_desirable_green"] = result.is_highly_desirable & (result.is_green == True)

    # subset to viable transitions
    result_subset = result[result.is_viable == True]

    # subset to boolean cols
    boolean_cols = [
        'is_viable', 'is_highly_viable', 'is_desirable', 'is_highly_desirable',
        'is_viable_brown', 'is_viable_neutral', 'is_viable_green',
        'is_highly_viable_brown', 'is_highly_viable_neutral', 'is_highly_viable_green',
        'is_desirable_brown', 'is_desirable_neutral', 'is_desirable_green',
        'is_highly_desirable_brown', 'is_highly_desirable_neutral', 'is_highly_desirable_green'
    ]

    # subset to viable transitions only
    result_subset_bools = result_subset[boolean_cols]

    # count True's per boolean column
    s_sum = result_subset_bools.sum()

    transition_numbers_all[i] = s_sum

In [None]:
# to df
df_transition_numbers_all = pd.DataFrame.from_dict(transition_numbers_all).transpose()

# merge transition numbers to all the other metadata, export csv
df_metadata_transitions = pd.concat(
    [df_metadata, df_transition_numbers_all], axis=1
)

df_metadata_transitions.to_csv(
    os.path.join(data_interim, "esco_onet_metadata_transition_numbers_v2.csv")
)