In [None]:
# dependencies
import os
import re
import numpy as np
import pandas as pd
from google.colab import drive, userdata

# file management
drive.mount('/content/drive')
WORK_DIR = '/content/drive/MyDrive/Projects/Master\'s Thesis'

# work dir shortcut function
def work_dir(*args):
    return os.path.join(WORK_DIR, *args)

Mounted at /content/drive


In [None]:
# load skills and skill-to-occupation relations
skills = pd.read_csv(work_dir('ESCO', 'ESCO dataset - v1.1.2 - classification - en - csv', 'skills_en.csv'))
relations = pd.read_csv(work_dir('ESCO', 'ESCO dataset - v1.1.2 - classification - en - csv', 'occupationSkillRelations_en.csv'))
print(relations.shape)
relations.head()

(123855, 4)


Unnamed: 0,occupationUri,relationType,skillType,skillUri
0,http://data.europa.eu/esco/occupation/00030d09...,essential,knowledge,http://data.europa.eu/esco/skill/fed5b267-73fa...
1,http://data.europa.eu/esco/occupation/00030d09...,essential,skill/competence,http://data.europa.eu/esco/skill/05bc7677-5a64...
2,http://data.europa.eu/esco/occupation/00030d09...,essential,skill/competence,http://data.europa.eu/esco/skill/271a36a0-bc7a...
3,http://data.europa.eu/esco/occupation/00030d09...,essential,skill/competence,http://data.europa.eu/esco/skill/47ed1d37-971b...
4,http://data.europa.eu/esco/occupation/00030d09...,essential,skill/competence,http://data.europa.eu/esco/skill/591dd514-735b...


In [None]:
# calculate co-occurences of skills
cross_freq = pd.crosstab(relations['occupationUri'], relations['skillUri'])
#co_occur = cross_freq.T @ cross_freq
#co_occur.to_csv(work_dir('Data', 'co_occurrences.csv'))
co_occur = pd.read_csv(work_dir('Data', 'co_occurrences.csv'), index_col=0)

In [None]:
# calculate idf-adjusted related skill frequency scores (i.e. co-occurrences of skills within occupations per skill)
scores = co_occur * np.log(len(cross_freq) / cross_freq.sum())

# set diagonal to -inf for not selecting skill itself as related
np.fill_diagonal(scores.values, -np.inf)

# get top k related per skill
k = 10
related = pd.DataFrame(
    np.argsort(-scores.values, axis=1)[:, :k],
    index=scores.index,
    columns=[f'related_{i+1}' for i in range(k)]
).apply(lambda x: scores.columns[x])

In [None]:
# check related distribution
related[[f'related_{i}' for i in range(1, 11)]].stack().value_counts()

Unnamed: 0,count
http://data.europa.eu/esco/skill/5ba1d5b9-3f8d-46ca-bf9f-2e9ca65d1350,158
http://data.europa.eu/esco/skill/bf7a45c4-0d72-4e6e-9da7-127b2b6656b7,155
http://data.europa.eu/esco/skill/bc250150-3372-42c9-834a-a1b65f89ca0b,152
http://data.europa.eu/esco/skill/efc1d46f-39ab-4c73-ade6-0a99a90dbeba,113
http://data.europa.eu/esco/skill/f66b2fc3-2952-41e5-89bf-a9157ea29420,96
...,...
http://data.europa.eu/esco/skill/c5fdbd81-db10-4fc3-8cee-8b1a7169c472,1
http://data.europa.eu/esco/skill/c2f06797-190e-4a74-b8c2-a988c3cc8186,1
http://data.europa.eu/esco/skill/91571a67-f9e1-4c5b-80c5-43f3d14a3747,1
http://data.europa.eu/esco/skill/b7c00b85-e5b5-4c97-8bef-f0d7bd9db8d7,1


In [None]:
# test skill by idx
idx = 10000
print('Skills related to:', skills.loc[idx, 'preferredLabel'])
pd.Series(related.loc[skills.loc[idx, 'conceptUri']]).map(
    skills.set_index('conceptUri')['preferredLabel']
)

Skills related to: deal with departures in accommodation


Unnamed: 0,http://data.europa.eu/esco/skill/b83669f2-bf53-47ed-af9f-397c5f2dfd4d
related_1,deal with arrivals in accommodation
related_2,carry out end of day accounts
related_3,process reservations
related_4,assess cleanliness of areas
related_5,maintain customer records
related_6,greet guests
related_7,process booking
related_8,provide tourism related information
related_9,comply with food safety and hygiene
related_10,handle customer complaints


In [None]:
# save to csv
related.to_csv(work_dir('Data', 'related_skills.csv'))

# sanity check
pd.read_csv(work_dir('Data', 'related_skills.csv'), index_col=0)

Unnamed: 0_level_0,related_1,related_2,related_3,related_4,related_5,related_6,related_7,related_8,related_9,related_10
skillUri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
http://data.europa.eu/esco/skill/0005c151-5b5a-4a66-8aac-60e734beb1ab,http://data.europa.eu/esco/skill/4109c79f-0332...,http://data.europa.eu/esco/skill/339f165c-0002...,http://data.europa.eu/esco/skill/aa755e1d-81cd...,http://data.europa.eu/esco/skill/3e2d3720-84e3...,http://data.europa.eu/esco/skill/fbb9ceec-26c2...,http://data.europa.eu/esco/skill/d11e4683-d2fe...,http://data.europa.eu/esco/skill/e3840f20-1928...,http://data.europa.eu/esco/skill/d205b276-7f2b...,http://data.europa.eu/esco/skill/1e3e7f8b-6416...,http://data.europa.eu/esco/skill/52eb7ab6-269f...
http://data.europa.eu/esco/skill/00064735-8fad-454b-90c7-ed858cc993f2,http://data.europa.eu/esco/skill/8012b58f-58a3...,http://data.europa.eu/esco/skill/70165e8b-322c...,http://data.europa.eu/esco/skill/d4a0744a-508b...,http://data.europa.eu/esco/skill/3aeab77e-daa2...,http://data.europa.eu/esco/skill/af43a17b-8ac2...,http://data.europa.eu/esco/skill/03f922b0-d518...,http://data.europa.eu/esco/skill/6833e58b-eceb...,http://data.europa.eu/esco/skill/42645b84-0699...,http://data.europa.eu/esco/skill/7837ef5c-2c81...,http://data.europa.eu/esco/skill/f0153530-9ff3...
http://data.europa.eu/esco/skill/000709ed-2be5-4193-b056-45a97698d828,http://data.europa.eu/esco/skill/758a4e08-3133...,http://data.europa.eu/esco/skill/2ffc7a73-7e1d...,http://data.europa.eu/esco/skill/c3da8380-7049...,http://data.europa.eu/esco/skill/486df8bc-3498...,http://data.europa.eu/esco/skill/e55e230e-c308...,http://data.europa.eu/esco/skill/ee4a78d4-af84...,http://data.europa.eu/esco/skill/72cde4b2-b2ee...,http://data.europa.eu/esco/skill/0d9c959a-cdd4...,http://data.europa.eu/esco/skill/4211283b-617e...,http://data.europa.eu/esco/skill/2e62b29d-3aa8...
http://data.europa.eu/esco/skill/0007bdc2-dd15-4824-b7d6-416522c46f35,http://data.europa.eu/esco/skill/af0bbc31-b54d...,http://data.europa.eu/esco/skill/2bbdea06-a265...,http://data.europa.eu/esco/skill/bbd57056-80e0...,http://data.europa.eu/esco/skill/72a74f69-5cf1...,http://data.europa.eu/esco/skill/c5c51528-ccfc...,http://data.europa.eu/esco/skill/209a5498-3449...,http://data.europa.eu/esco/skill/b07daddc-8625...,http://data.europa.eu/esco/skill/74479992-60e3...,http://data.europa.eu/esco/skill/68b6ef08-d823...,http://data.europa.eu/esco/skill/efa141df-f382...
http://data.europa.eu/esco/skill/00090cc1-1f27-439e-a4e0-19a87a501bfc,http://data.europa.eu/esco/skill/44639cff-84b4...,http://data.europa.eu/esco/skill/9f0e78ef-bbe4...,http://data.europa.eu/esco/skill/e83d8e27-542c...,http://data.europa.eu/esco/skill/695c3249-18e7...,http://data.europa.eu/esco/skill/50b631f8-1ed7...,http://data.europa.eu/esco/skill/03f922b0-d518...,http://data.europa.eu/esco/skill/b88fde8f-1b98...,http://data.europa.eu/esco/skill/415f816c-07ae...,http://data.europa.eu/esco/skill/a4fd17bf-6309...,http://data.europa.eu/esco/skill/66d47084-9437...
...,...,...,...,...,...,...,...,...,...,...
http://data.europa.eu/esco/skill/ffef5eb3-a15e-47f5-8c2f-490516c16f6f,http://data.europa.eu/esco/skill/30a4d870-d6fc...,http://data.europa.eu/esco/skill/84a67aa9-1d83...,http://data.europa.eu/esco/skill/a10aabfa-beb1...,http://data.europa.eu/esco/skill/8460496d-aa54...,http://data.europa.eu/esco/skill/50ed4d18-60a8...,http://data.europa.eu/esco/skill/3e20dad4-2b69...,http://data.europa.eu/esco/skill/9646b1e4-7440...,http://data.europa.eu/esco/skill/cd2efd98-b3c7...,http://data.europa.eu/esco/skill/a271a40e-ec1e...,http://data.europa.eu/esco/skill/af5e7f45-325f...
http://data.europa.eu/esco/skill/fff0b074-5a76-4acd-a33b-ea464086f159,http://data.europa.eu/esco/skill/cafc2723-c308...,http://data.europa.eu/esco/skill/cffc3e97-e942...,http://data.europa.eu/esco/skill/6a152c3a-5d86...,http://data.europa.eu/esco/skill/5bf7f85a-b080...,http://data.europa.eu/esco/skill/dcc96a49-0bac...,http://data.europa.eu/esco/skill/68b6ef08-d823...,http://data.europa.eu/esco/skill/5133ed7b-0785...,http://data.europa.eu/esco/skill/69bb4be4-8b8e...,http://data.europa.eu/esco/skill/c8ac1986-38fa...,http://data.europa.eu/esco/skill/7d895bdf-bc85...
http://data.europa.eu/esco/skill/fff0e2cd-d0bd-4b02-9daf-158b79d9688a,http://data.europa.eu/esco/skill/7482a123-e801...,http://data.europa.eu/esco/skill/2618f336-8e71...,http://data.europa.eu/esco/skill/fda917eb-0d6a...,http://data.europa.eu/esco/skill/e465a154-93f7...,http://data.europa.eu/esco/skill/a9bf8565-8a09...,http://data.europa.eu/esco/skill/d9e5349e-8791...,http://data.europa.eu/esco/skill/1bba98a7-92b9...,http://data.europa.eu/esco/skill/32a2c63d-2d13...,http://data.europa.eu/esco/skill/a056851d-4a8b...,http://data.europa.eu/esco/skill/2b24cbb7-f94e...
http://data.europa.eu/esco/skill/fff5bc45-b506-4466-8977-4869079c1cb2,http://data.europa.eu/esco/skill/6181f475-110c...,http://data.europa.eu/esco/skill/5518db9d-4a2f...,http://data.europa.eu/esco/skill/86df7af2-f9f3...,http://data.europa.eu/esco/skill/4fa2d6ba-4fff...,http://data.europa.eu/esco/skill/c643e59b-d74c...,http://data.europa.eu/esco/skill/7c05199f-7b03...,http://data.europa.eu/esco/skill/be7c80ee-0b7a...,http://data.europa.eu/esco/skill/50f2d23c-b3fb...,http://data.europa.eu/esco/skill/a3e0224a-c952...,http://data.europa.eu/esco/skill/85aa1871-db96...
