In [1]:
# %%
import pandas as pd
import json
import os
from deepdiff import DeepDiff

os.getcwd()

# %%
# read in json from different folder
# os.chdir("../results")
with open("../results/job_gpt-3.5-turbo_2sent_n100_jBd_V4_detailed.json", "r") as f:
    jobs_v4 = json.load(f)

with open("../results/job_gpt-3.5-turbo_2sent_n100_jBd_esco_detailed.json", "r") as f:
    jobs_esco = json.load(f)

with open("../results/course_gpt-3.5-turbo_2sent_n100_jBd_V4_detailed.json", "r") as f:
    courses_v4 = json.load(f)

with open(
    "../results/course_gpt-3.5-turbo_2sent_n100_jBd_esco_detailed.json", "r"
) as f:
    courses_esco = json.load(f)
# %%
# read in taxonomy from different folder
os.chdir("../../data/taxonomy")
taxonomy = pd.read_csv("taxonomy_V4.csv")
keep_cols = [
    "unique_id",
    "Type Level 1",
    "Type Level 2",
    "Type Level 3",
    "Type Level 4",
]
taxonomy = taxonomy[keep_cols]

In [2]:
# get list of job_ids which is the key in the json
job_ids = list(jobs_v4.keys())
assert job_ids == list(jobs_esco.keys())

course_ids = list(courses_v4.keys())
assert course_ids == list(courses_esco.keys())

In [39]:
job_matched_esco = {}
job_matched_v4 = {}

course_matched_esco = {}
course_matched_v4 = {}

for job_id in job_ids:
    job_matched_esco[job_id] = []
    job_matched_v4[job_id] = []
    for sentence in jobs_v4[job_id]:
        job_matched_v4[job_id].append(sentence["matched_skills"])
    for sentence in jobs_esco[job_id]:
        job_matched_esco[job_id].append(sentence["matched_skills"])

for course_id in course_ids:
    course_matched_v4[course_id] = {}
    course_matched_esco[course_id] = {}
    for type_skill in courses_v4[course_id]:
        course_matched_v4[course_id][type_skill] = []
        course_matched_esco[course_id][type_skill] = []
        for sentence in courses_v4[course_id][type_skill]:
            course_matched_v4[course_id][type_skill].append(sentence["matched_skills"])
        for sentence in courses_esco[course_id][type_skill]:
            course_matched_esco[course_id][type_skill].append(
                sentence["matched_skills"]
            )

In [40]:
# get how many skills are matched in each
job_matched_v4_count = {}
job_matched_esco_count = {}
job_matched_v4_count["total"] = 0
job_matched_esco_count["total"] = 0

course_matched_v4_count = {}
course_matched_esco_count = {}
course_matched_v4_count["total"] = 0
course_matched_esco_count["total"] = 0

for job_id in job_ids:
    job_matched_v4_count[job_id] = 0
    job_matched_esco_count[job_id] = 0
    for sentence in job_matched_v4[job_id]:
        job_matched_v4_count[job_id] += len(sentence)
        job_matched_v4_count["total"] += len(sentence)
    for sentence in job_matched_esco[job_id]:
        job_matched_esco_count[job_id] += len(sentence)
        job_matched_esco_count["total"] += len(sentence)

for course_id in course_ids:
    course_matched_v4_count[course_id] = {}
    course_matched_esco_count[course_id] = {}
    for type_skill in courses_v4[course_id]:
        course_matched_v4_count[course_id][type_skill] = 0
        course_matched_esco_count[course_id][type_skill] = 0
        for sentence in course_matched_v4[course_id][type_skill]:
            course_matched_v4_count[course_id][type_skill] += len(sentence)
            course_matched_v4_count["total"] += len(sentence)
        for sentence in course_matched_esco[course_id][type_skill]:
            course_matched_esco_count[course_id][type_skill] += len(sentence)
            course_matched_esco_count["total"] += len(sentence)

print("job_matched_v4_count total", job_matched_v4_count["total"])
print("job_matched_esco_count total", job_matched_esco_count["total"])
print("course_matched_v4_count total", course_matched_v4_count["total"])
print("course_matched_esco_count total", course_matched_esco_count["total"])

job_matched_v4_count total 326
job_matched_esco_count total 255
course_matched_v4_count total 365
course_matched_esco_count total 220


In [71]:
# for each job_id or course_id, we want to see what are the skills that are matched in common between v4 and esco

job_matched_common = {}
job_matched_common["total"] = 0
course_matched_common = {}
course_matched_common["total"] = 0

for job_id in job_ids:
    job_matched_common[job_id] = []
    for v4_extracted in job_matched_v4[job_id]:
        for v4_matched_skill in v4_extracted:
            for esco_extracted in job_matched_esco[job_id]:
                for esco_matched_skill in esco_extracted:
                    if v4_matched_skill == esco_matched_skill:
                        job_matched_common[job_id].append(v4_matched_skill)
                        job_matched_common["total"] += 1

for course_id in course_ids:
    course_matched_common[course_id] = {}
    for type_skill in courses_v4[course_id]:
        course_matched_common[course_id][type_skill] = []
        for v4_extracted in course_matched_v4[course_id][type_skill]:
            for v4_matched_skill in v4_extracted:
                for esco_extracted in course_matched_esco[course_id][type_skill]:
                    for esco_matched_skill in esco_extracted:
                        if v4_matched_skill == esco_matched_skill:
                            course_matched_common[course_id][type_skill].append(
                                v4_matched_skill
                            )
                            course_matched_common["total"] += 1

print("job_matched_common total", job_matched_common["total"])
print("course_matched_common total", course_matched_common["total"])

job_matched_common total 166
course_matched_common total 167


In [78]:
# check which of the common matched jobs and courses are matched to the same items in the taxonomy (same unique id)

job_matched_common_taxonomy = {}
job_matched_common_taxonomy["total"] = 0
course_matched_common_taxonomy = {}
course_matched_common_taxonomy["total"] = 0

for job_id in job_ids:
    job_matched_common_taxonomy[job_id] = []
    for v4_extracted in job_matched_v4[job_id]:
        for v4_matched_skill in v4_extracted:
            for esco_extracted in job_matched_esco[job_id]:
                for esco_matched_skill in esco_extracted:
                    if v4_matched_skill == esco_matched_skill:
                        job_

{'service & support': {'unique_id': 107, 'name+definition': 'Service Engineering: Realisiert auf der Basis eines erarbeiteten innovativen Lösungsansatzes einen Proof of Concept. Präsentiert diesen anderen Fachpersonen in Form eines kurzen Pitch.'}}
{'unterstützung bei inbetriebnahme': {'unique_id': 5, 'name+definition': 'Application Operation: Ablösung bzw. Erstinstallation eines IT-Systems (Server, Einzelplatz-Computer, Betriebssysteme und Applikationen) gemäss Auftrag, planen, initialisieren und durchführen.'}}
{'selbständig': {'unique_id': 69, 'name+definition': 'Leistungsorientierung: Die Person ist bestrebt, persönliche Ziele zu erreichen und in der eigenen Arbeit kompetent zu sein. '}}
{'neugierig und lernst gerne dazu': {'unique_id': 114, 'name+definition': 'Beziehungsorientierung: Teamorientierung: Vorliebe für den persönlichen Kontakt und die Zusammenarbeit mit anderen, statt alleine zu arbeiten'}, 'lösungsorientiert': {'unique_id': 69, 'name+definition': 'Leistungsorientierun

Notes: in skill_candidates, we observe candidates generated from ESCO but we actually don't see any items from ESCO selected
Concerns: we do however see different items in v4 taxonomy selected

In [28]:
# get how many matched skills in esco that has unique ids over 999

job_esco_matches = {}
job_esco_matches["total"] = 0

for job_id in job_ids:
    job_esco_matches[job_id] = []
    for job_skill in job_matched_esco[job_id]:
        if job_skill["unique_id"] > 999:
            job_esco_matches[job_id].append(job_skill)
            job_esco_matches["total"] += 1

{}
{'service & support': {'unique_id': 107, 'name+definition': 'Service Engineering: Realisiert auf der Basis eines erarbeiteten innovativen Lösungsansatzes einen Proof of Concept. Präsentiert diesen anderen Fachpersonen in Form eines kurzen Pitch.'}}
{}
{'weiterentwicklung und kontinuierliche verbesserung der m365-dienste einschließlich der support- und wartungsprozesse': {'unique_id': 71, 'name+definition': 'Management der ICT: Analysiert das Umfeld eines Veränderungsvorhabens, moderiert die Entwicklung der Vision und Ziele für die Veränderung und unterstützt die Organisation bei der Umsetzung der Veränderung.'}}
{}
{'krankenhausinformationssystem': {'unique_id': 18, 'name+definition': 'Data Engineering/Data Management: Datenbanksystem in einer produktiven Umgebung betreiben. User, Rollen und Rechte nach Vorgabe einpflegen, Extrakte erzeugen und Datenbankberichte auswerten.'}}
{}
{}
{}
{'plattformentwicklung': {'unique_id': 80, 'name+definition': 'Network Operation: Nimmt gemäss Vorg

In [26]:
job_esco_matches["total"]

508