In [2]:
import json
import requests
import sys
import pandas as pd

In [22]:
url_root = 'https://ec.europa.eu/esco/api/resource/'

esco_links = ['http://data.europa.eu/esco/occupation/1c5a45b9-440e-4726-b565-16a952abd341',
 'http://data.europa.eu/esco/occupation/1c5a896a-e010-4217-a29a-c44db26e25da',
 'http://data.europa.eu/esco/occupation/2079755f-d809-49e6-8037-4de6180e54c0',
 'http://data.europa.eu/esco/occupation/24135b84-cbdd-4d42-9ed2-02fd982d15b2',
 'http://data.europa.eu/esco/occupation/2fb96c6c-8d0b-4ef0-b1ee-3e493305e4eb',
 'http://data.europa.eu/esco/occupation/349ee6f6-c295-4c38-9b98-48765b55280e',
 'http://data.europa.eu/esco/occupation/3b1ea27c-781c-41eb-821f-214285260dd2',
 'http://data.europa.eu/esco/occupation/6ce5f7e3-a534-4aa2-ab23-255408ddb53a',
 'http://data.europa.eu/esco/occupation/781a6350-e686-45b9-b075-e4c8d5a05ff7',
 'http://data.europa.eu/esco/occupation/78faf623-2543-43a5-acb2-3c43a22d36e4',
 'http://data.europa.eu/esco/occupation/cc325a8f-702e-4bf0-893f-5b5d456475ad',
 'http://data.europa.eu/esco/occupation/cc867bee-ab5c-427f-9244-f7a204d9574b']

# Headers and parameters for API request
language = 'en'
header = {
    'Content-Type': 'application/json',
    'charset': 'UTF-8'
}
failed_links = []

# List to store all occupation data
occupation_data_list = []

# Loop through ESCO links
for link in esco_links:
    url = url_root + 'occupation'
    params = {
        'uri': link,
        'language': language
    }

    try:
        # API Request
        resp = requests.get(url, headers=header, params=params)
        resp.raise_for_status()  # Raise an error for failed requests
        esco_data = resp.json()

        # Extract skills by category
        essential_skills = {"Skill": [], "Knowledge": []}
        optional_skills = {"Skill": [], "Knowledge": []}

        # Process essential skills
        if "_links" in esco_data and "hasEssentialSkill" in esco_data["_links"]:
            for skill in esco_data["_links"]["hasEssentialSkill"]:
                if "title" in skill and "skillType" in skill:
                    skill_title = skill["title"]
                    skill_type = "Skill" if "skill-type/skill" in skill["skillType"] else "Knowledge"
                    essential_skills[skill_type].append(skill_title)

        # Process optional skills
        if "_links" in esco_data and "hasOptionalSkill" in esco_data["_links"]:
            for skill in esco_data["_links"]["hasOptionalSkill"]:
                if "title" in skill and "skillType" in skill:
                    skill_title = skill["title"]
                    skill_type = "Skill" if "skill-type/skill" in skill["skillType"] else "Knowledge"
                    optional_skills[skill_type].append(skill_title)

        # Extract broader occupations
        broader_occupations = []
        if "_links" in esco_data and "broaderOccupation" in esco_data["_links"]:
            for occupation in esco_data["_links"]["broaderOccupation"]:
                if "title" in occupation:
                    broader_occupations.append(occupation["title"])

        # Store occupation data in dictionary
        occupation_data = {
            "Occupation Title": esco_data.get("title", "N/A"),
            "ESCO Code": esco_data.get("code", "N/A"),
            "Description": esco_data.get("description", {}).get("en", {}).get("literal", "No description available"),
            "Alternative Labels": ", ".join(esco_data.get("alternativeLabel", {}).get("en", [])),
            "Essential Skills (Skill)": ", ".join(essential_skills["Skill"]) if essential_skills["Skill"] else "None",
            "Essential Skills (Knowledge)": ", ".join(essential_skills["Knowledge"]) if essential_skills["Knowledge"] else "None",
            "Optional Skills (Skill)": ", ".join(optional_skills["Skill"]) if optional_skills["Skill"] else "None",
            "Optional Skills (Knowledge)": ", ".join(optional_skills["Knowledge"]) if optional_skills["Knowledge"] else "None",
            "Broader Occupation": ", ".join(broader_occupations) if broader_occupations else "None",
        }

        # Append the occupation data to the list
        occupation_data_list.append(occupation_data)

    except requests.exceptions.RequestException as e:
        print(f"Request failed for {link}: {e}")
        failed_links.append(link)
# Convert the list to a Pandas DataFrame
df_esco_occupations = pd.DataFrame(occupation_data_list)


Request failed for http://data.europa.eu/esco/occupation/1c5a45b9-440e-4726-b565-16a952abd341: 404 Client Error:  for url: https://ec.europa.eu/esco/api/resource/occupation?uri=http%3A%2F%2Fdata.europa.eu%2Fesco%2Foccupation%2F1c5a45b9-440e-4726-b565-16a952abd341&language=en
Request failed for http://data.europa.eu/esco/occupation/1c5a896a-e010-4217-a29a-c44db26e25da: 404 Client Error:  for url: https://ec.europa.eu/esco/api/resource/occupation?uri=http%3A%2F%2Fdata.europa.eu%2Fesco%2Foccupation%2F1c5a896a-e010-4217-a29a-c44db26e25da&language=en
Request failed for http://data.europa.eu/esco/occupation/2079755f-d809-49e6-8037-4de6180e54c0: 404 Client Error:  for url: https://ec.europa.eu/esco/api/resource/occupation?uri=http%3A%2F%2Fdata.europa.eu%2Fesco%2Foccupation%2F2079755f-d809-49e6-8037-4de6180e54c0&language=en
Request failed for http://data.europa.eu/esco/occupation/24135b84-cbdd-4d42-9ed2-02fd982d15b2: 404 Client Error:  for url: https://ec.europa.eu/esco/api/resource/occupation

In [43]:
urls = [
  "https://esco.ec.europa.eu/sites/default/files/blockchain%20architect.json"
]

# List to store extracted data
occupation_data_list = []
failed_links = []

for url in urls:
    try:
        print(f"Fetching ESCO data from: {url}")
        resp = requests.get(url)
        if resp.status_code != 200:
            print(f"Failed to fetch data for {url}. Status Code: {resp.status_code}")
            failed_links.append(url)
            continue
        
        esco_data = resp.json()
        
        # Extract skills directly from "essentialSkills" and "optionalSkills"
        essential_skills = esco_data.get("essentialSkills", {})
        optional_skills = esco_data.get("optionalSkills", {})
        
        # Parse skills from essential and optional skills
        essential_skill_titles = [skill["title"] for skill in essential_skills.get("skill", [])]
        essential_knowledge_titles = [skill["title"] for skill in essential_skills.get("knowledge", [])]
        
        optional_skill_titles = [skill["title"] for skill in optional_skills.get("skill", [])]
        optional_knowledge_titles = [skill["title"] for skill in optional_skills.get("knowledge", [])]

        # Extract broader occupations
        broader_occupations = []
        if "_links" in esco_data and "broaderOccupation" in esco_data["_links"]:
            for occupation in esco_data["_links"]["broaderOccupation"]:
                if "title" in occupation:
                    broader_occupations.append(occupation["title"])
        
        # Handle alternative labels properly
        alternative_labels_raw = esco_data.get("alternativeLabel", {}).get("en", {})
        if isinstance(alternative_labels_raw, dict):
            alternative_labels = list(alternative_labels_raw.values())  # Extract values from numbered keys
        elif isinstance(alternative_labels_raw, list):
            alternative_labels = alternative_labels_raw  # Already a list
        else:
            alternative_labels = []

        # Store occupation data in dictionary
        occupation_data = {
            "Occupation Title": esco_data.get("title", "N/A"),
            "ESCO Code": esco_data.get("code", "N/A"),
            "Description": esco_data.get("description", {}).get("en", {}).get("literal", "No description available"),
            "Alternative Labels": ", ".join(alternative_labels),
            "Essential Skills (Skill)": ", ".join(essential_skill_titles) if essential_skill_titles else "None",
            "Essential Skills (Knowledge)": ", ".join(essential_knowledge_titles) if essential_knowledge_titles else "None",
            "Optional Skills (Skill)": ", ".join(optional_skill_titles) if optional_skill_titles else "None",
            "Optional Skills (Knowledge)": ", ".join(optional_knowledge_titles) if optional_knowledge_titles else "None",
            "Broader Occupation": ", ".join(broader_occupations) if broader_occupations else "None",
        }

        # Append the occupation data to the list
        occupation_data_list.append(occupation_data)
        
        # Pause between requests to avoid rate limits
        time.sleep(1)

    except requests.exceptions.RequestException as e:
        print(f"Request failed for {url}: {e}")
        failed_links.append(url)

# Convert the list to a Pandas DataFrame
df_esco_occupations = pd.DataFrame(occupation_data_list)

# Display the DataFrame
df_esco_occupations


Fetching ESCO data from: https://esco.ec.europa.eu/sites/default/files/blockchain%20architect.json


Unnamed: 0,Occupation Title,ESCO Code,Description,Alternative Labels,Essential Skills (Skill),Essential Skills (Knowledge),Optional Skills (Skill),Optional Skills (Knowledge),Broader Occupation
0,blockchain architect,2511.14.1,Blockchain architects are ICT system architect...,"DLT specialist, blockchain technology speciali...","define technical requirements, create business...","principles of distributed ledger technology, b...","develop software prototype, design cloud archi...","data analytics, cloud technologies, software c...",


In [31]:
df_esco_occupations

Unnamed: 0,Occupation Title,ESCO Code,Description,Alternative Labels,Essential Skills (Skill),Essential Skills (Knowledge),Optional Skills (Skill),Optional Skills (Knowledge),Broader Occupation
0,computer vision engineer,2511.2,"Computer vision engineers research, design, de...","2, 4, 1, 0, 3",,,,,


In [17]:
failed_links

['http://data.europa.eu/esco/occupation/1c5a45b9-440e-4726-b565-16a952abd341',
 'http://data.europa.eu/esco/occupation/1c5a896a-e010-4217-a29a-c44db26e25da',
 'http://data.europa.eu/esco/occupation/2079755f-d809-49e6-8037-4de6180e54c0',
 'http://data.europa.eu/esco/occupation/24135b84-cbdd-4d42-9ed2-02fd982d15b2',
 'http://data.europa.eu/esco/occupation/2fb96c6c-8d0b-4ef0-b1ee-3e493305e4eb',
 'http://data.europa.eu/esco/occupation/349ee6f6-c295-4c38-9b98-48765b55280e',
 'http://data.europa.eu/esco/occupation/3b1ea27c-781c-41eb-821f-214285260dd2',
 'http://data.europa.eu/esco/occupation/6ce5f7e3-a534-4aa2-ab23-255408ddb53a',
 'http://data.europa.eu/esco/occupation/781a6350-e686-45b9-b075-e4c8d5a05ff7',
 'http://data.europa.eu/esco/occupation/78faf623-2543-43a5-acb2-3c43a22d36e4',
 'http://data.europa.eu/esco/occupation/cc325a8f-702e-4bf0-893f-5b5d456475ad',
 'http://data.europa.eu/esco/occupation/cc867bee-ab5c-427f-9244-f7a204d9574b']

In [44]:
df_esco_occupations.to_csv('esco_data.csv')

In [11]:
df_esco_occupations