In [None]:
%load_ext autoreload

%autoreload 1

In [None]:
import random
import numpy as np
import pandas as pd
from collections import Counter

import sys

sys.path.append('../protosp03/recommendation/')
%aimport matchings

In [None]:
path = '../data/raw/evrlearn/taxonomy_V4.csv'

In [None]:
taxonomy = pd.read_csv(path)

# remove all rows where the column 'ElementID' is null
taxonomy = taxonomy[taxonomy['ElementID'].notna()]

keep = ['ElementID', 'Dimension', 'Type Level 1', 'Type Level 1 E',
       'Type Level 2', 'Type Level 2 E', 'Type Level 3', 'Type Level 4']

# keep only the columns in the list 'keep'
taxonomy = taxonomy[keep]
names = ['Type Level 1', 'Type Level 2', 'Type Level 3', 'Type Level 4']

taxonomy['last_name'] = (taxonomy['ElementID'].str.len() - 1 )//2
taxonomy['last_name'] = taxonomy['last_name'].map(lambda x: names[x])
taxonomy['last_name'] = taxonomy.apply(lambda x: x[x['last_name']], axis=1)

In [None]:
taxonomy

# Mastery Levels


In [None]:
mastery_levels = [1, 2, 3, 4]
nb_mastery_levels = len(mastery_levels)
mastery_levels_probabilities = [1/np.log(i+1) for i in range(1, nb_mastery_levels + 1)]
mastery_levels_normalized_probabilities = np.array(mastery_levels_probabilities) / sum(mastery_levels_probabilities)

# Skills

In [None]:
# make a dict from the dataframe taxonomy using the column 'last_name' as key and the column 'Type Level 1 E' as value
levels_dict = taxonomy.set_index('last_name')['ElementID'].to_dict()
levels_dict = {key:[int(level) for level in value.split('.')] for key, value in levels_dict.items()}
groups_dict = taxonomy.set_index('ElementID')['Type Level 1'].to_dict()
groups_dict = {int(key.split('.')[0]):value for key, value in groups_dict.items()}
# group_dict = {key:int(value.split('.')[0]) for key, value in group_dict.items()}

skills = list(levels_dict.keys())
random.shuffle(skills)
nb_skills = len(skills)
skills_probabilities = [1/np.log(i+1) for i in range(1, nb_skills + 1)]
skills_normalized_probabilities = np.array(skills_probabilities) / sum(skills_probabilities)

# Years

In [None]:
years = [i for i in range(2023, 2017, -1)]
years_probabilities = [1/np.log(i+1) for i in range(1, len(years) + 1)]
years_normalized_probabilities = np.array(years_probabilities) / sum(years_probabilities)

# Learners

In [None]:
def get_random_learner(skills, mastery_levels, years, min_n_skills=5, max_n_skills=10):
    n_skills = random.randint(min_n_skills, max_n_skills)
    possessed = {skill: level for skill, level in zip(np.random.choice(skills, n_skills, p=skills_normalized_probabilities, replace=False), np.random.choice(mastery_levels, n_skills, p=mastery_levels_normalized_probabilities, replace=True)) }
    year = np.random.choice(years, 1, p=years_normalized_probabilities)[0]
    return {'possessed_skills':possessed, 'year':year}

def get_all_learners(skills, mastery_levels, years, min_n_skills=5, max_n_skills=10, n_learners=100):
    return [get_random_learner(skills, mastery_levels, years, min_n_skills, max_n_skills) for _ in range(n_learners)]

learners = get_all_learners(skills, mastery_levels, years, min_n_skills=5, max_n_skills=10, n_learners=1000)

learners[0]

# Skill Supply

In [None]:
def get_skill_supply(learners, years):
    skill_supply = {year: Counter() for year in years}
    for learner in learners:
        for skill, level in learner['possessed_skills'].items():
            skill_supply[learner['year']][(skill, level)] += 1
    return skill_supply

skill_supply = get_skill_supply(learners, years)
skill_supply[2023].most_common(10)

# Jobs

In [None]:
def get_random_job(skills, skills_normalized_probabilities, mastery_levels, mastery_levels_normalized_probabilities, years, years_normalized_probabilities, min_n_skills=2, max_n_skills=5):
    n_skills = random.randint(min_n_skills, max_n_skills)
    required = {skill: level for skill, level in zip(np.random.choice(skills, n_skills, p=skills_normalized_probabilities, replace=False), np.random.choice(mastery_levels, n_skills, p=mastery_levels_normalized_probabilities, replace=True)) }
    year = np.random.choice(years, 1, p=years_normalized_probabilities)[0]
    return {'required_skills':required, 'year':year}

def get_all_jobs(skills, skills_normalized_probabilities, mastery_levels, mastery_levels_normalized_probabilities, years, years_normalized_probabilities, min_n_skills=2, max_n_skills=5, n_jobs=1000):
    return [get_random_job(skills, skills_normalized_probabilities, mastery_levels, mastery_levels_normalized_probabilities, years, years_normalized_probabilities, min_n_skills, max_n_skills) for _ in range(n_jobs)]

jobs = get_all_jobs(skills, skills_normalized_probabilities, mastery_levels, mastery_levels_normalized_probabilities, years, years_normalized_probabilities, min_n_skills=2, max_n_skills=5, n_jobs=1000)

jobs[0]

# Skill Demand

In [None]:
def get_skill_demand(jobs, years):
    skill_demand = {year: Counter() for year in years}
    for job in jobs:
        for skill, level in job['required_skills'].items():
            skill_demand[job['year']][(skill, level)] += 1
    return skill_demand

skill_demand = get_skill_demand(jobs, years)
skill_demand[2023].most_common(10)

# Skill Attractiveness

In [None]:
def skill_attractiveness(skill, years, skill_supply, skill_demand):
    skill_attractiveness = 0
    noramlization_factor = 0
    for i, year in enumerate(years):
        if skill in skill_supply[year]:
            skill_attractiveness += skill_demand[year][skill] / (skill_supply[year][skill] * (i+1))
        noramlization_factor += 1 / (i+1)
    return skill_attractiveness/noramlization_factor

In [None]:
print(f"Skill demand in 2023: {skill_demand[2023][('Konzentrationsfähigkeit', 1)]}")
print(f"Skill supply in 2023: {skill_supply[2023][('Konzentrationsfähigkeit', 1)]}")
print(f"Skill demand in 2022: {skill_demand[2022][('Konzentrationsfähigkeit', 1)]}")
print(f"Skill supply in 2022: {skill_supply[2022][('Konzentrationsfähigkeit', 1)]}")
print(f"Skill attractiveness: {skill_attractiveness(('Konzentrationsfähigkeit', 1), years, skill_supply, skill_demand):.2f}")

# Learner - job Matching

In [None]:
# def learner_job_matching(learner, job):
#     matching = 0
#     for skill in job['required_skills']:
#         if skill in learner['possessed_skills']:
#             sim = min(learner['possessed_skills'][skill], job['required_skills'][skill]) / job['required_skills'][skill]
#             matching += sim
#     matching = 100 * matching / len(job['required_skills'])
#     return matching


nb_matching = 0
for learner, job in zip(learners, jobs):
    matching = matchings.learner_job_matching(learner, job)
    if matching > 50:
        print(f"Matching: {matching:.2f}")
        print(f"Number of skills in common: {len(set(learner['possessed_skills'].keys()).intersection(set(job['required_skills'].keys())))}")
        print(f"Job: {job['required_skills']}")
        print(f"Learner: {learner['possessed_skills']}")
        print()
        nb_matching += 1
    if nb_matching > 2:
        break

In [None]:
learner

In [None]:
learner

In [None]:
levels_dict = {key:value for key, value in levels_dict.items() if key in learner['possessed_skills'] or key in job['required_skills']}

In [None]:
groups_dict

In [None]:
job = {
    'required_skills': {
        'Fokuswechsel': 1,
        'Organisation': 2
    },
    'year': 2022}

learner = {'possessed_skills': {'Organisation': 2,
  'Hand/Finger-Geschwindigkeit': 4},
 'year': 2019}


groups_dict = {2: 'Fertigkeit',
  5: 'Fachkompetenz'}



levels_dict = {
 'Fokuswechsel': [2, 1, 8, 2],
 'Hand/Finger-Geschwindigkeit': [2, 3, 3, 2],
 'Organisation': [5, 10]}


def shortest_distance(node1, node2):
    # Find the last common ancestor
    min_len = min(len(node1), len(node2))
    last_common_index = -1
    for i in range(min_len):
        if node1[i] == node2[i]:
            last_common_index = i
        else:
            break

    # If there's no common ancestor (e.g., [], [1,2,3]), set last_common_index to -1
    if last_common_index == -1:
        return sys.maxsize

    # Calculate distance from last common ancestor to each node and sum them up
    distance_node1 = len(node1) - last_common_index - 1
    distance_node2 = len(node2) - last_common_index - 1
    return distance_node1 + distance_node2




def learner_job_group_matching(learner, job, groups_dict, levels_dict):

    group_matchings = Counter()

    for group_id, group_name in groups_dict.items():
        matching = 0
        # For each required skill in the job
        for job_skill in job["required_skills"]:
            if levels_dict[job_skill][0] != group_id:
                continue
            # Check if the learner possesses the skill
            if job_skill in learner["possessed_skills"]:
                # Calculate similarity ratio based on mastery levels
                matching += matchings.skill_skill_similarity(
                    learner["possessed_skills"][job_skill], job["required_skills"][job_skill]
                )
            else:
                # If the learner does not possess the skill, calculate similarity ratio based on the distance between the skill and the learner's skills
                learner_skills = list(learner["possessed_skills"].keys())
                min_distance  = sys.maxsize
                closest_skill = None
                for learner_skill in learner_skills:
                    distance = shortest_distance(levels_dict[job_skill], levels_dict[learner_skill])
                    if distance < min_distance:
                        min_distance = distance
                        closest_skill = learner_skill
                    
                matching += matchings.skill_skill_similarity(learner["possessed_skills"][closest_skill], job["required_skills"][job_skill])/(min_distance + 1)

        # Convert total similarity into percentage form
        matching = 100 * matching / len(job["required_skills"])
        group_matchings[group_name] = matching
    return group_matchings


print(matchings.learner_job_group_matching(learner, job, groups_dict, levels_dict).most_common())

In [None]:
job = {
         'required_skills': {
             'Fokuswechsel': 1,
             'Organisation': 2
         },
         'year': 2022}

learner = {
         'possessed_skills': {
             'Organisation': 2,
             'Hand/Finger-Geschwindigkeit': 4
         },
         'year': 2019}

groups_dict = {
         2: 'Fertigkeit',
         5: 'Fachkompetenz'}


levels_dict = {
         'Fokuswechsel': [2, 1, 8, 2],
         'Hand/Finger-Geschwindigkeit': [2, 3, 3, 2],
         'Organisation': [5, 10]}

matchings = matchings.learner_job_group_matching(learner, job, groups_dict, levels_dict)

In [None]:
matchings.most_common()

# Courses

In [None]:
def get_random_provided_skills(skills, mastery_levels, required_skills, n_provided_skills):
    provided_skills = dict()
    while len(provided_skills) < n_provided_skills:
        candidate_skill = random.choice(skills)
        candidate_level = random.choice(mastery_levels)
        if (
            candidate_skill not in required_skills
            and candidate_skill not in provided_skills
        ):
            provided_skills[candidate_skill] = candidate_level
        elif (
            candidate_skill in required_skills
            and candidate_level > required_skills[candidate_skill]
        ):
            provided_skills[candidate_skill] = candidate_level

    return provided_skills


def get_random_course(skills, mastery_levels, min_n_required_skills=1, max_n_required_skills=5, min_n_provided_skills=1, max_n_provided_skills=2):
    n_required_skills = random.randint(min_n_required_skills, max_n_required_skills)
    required = {skill: level for skill, level in zip(np.random.choice(skills, n_required_skills, replace=False), np.random.choice(mastery_levels, n_required_skills, replace=True)) }

    n_provided_skills = random.randint(min_n_provided_skills, max_n_provided_skills)
    provided = get_random_provided_skills(skills, mastery_levels, required, n_provided_skills)

    return {'required_skills':required, 'provided_skills':provided}

def get_all_courses(skills, mastery_levels, min_n_required_skills=1, max_n_required_skills=5, min_n_provided_skills=1, max_n_provided_skills=2, n_courses=1000):
    return [get_random_course(skills, mastery_levels, min_n_required_skills=1, max_n_required_skills=5, min_n_provided_skills=1, max_n_provided_skills=2) for _ in range(n_courses)]

courses = get_all_courses(skills, mastery_levels, min_n_required_skills=1, max_n_required_skills=5, min_n_provided_skills=1, max_n_provided_skills=2, n_courses=1000)

courses[0]

# Learner-Course Matching

In [None]:
def learner_course_required_matching(learner, course):
    required_matching = 0
    for skill in course['required_skills']:
        if skill in learner['possessed_skills']:
            sim = min(learner['possessed_skills'][skill], course['required_skills'][skill]) / course['required_skills'][skill]
            required_matching += sim
    return required_matching / len(course['required_skills'])

def learner_course_provided_matching(learner, course):
    provided_matching = 0
    for skill in course['provided_skills']:
        if skill in learner['possessed_skills']:
            sim = min(learner['possessed_skills'][skill], course['provided_skills'][skill]) / course['provided_skills'][skill]
            provided_matching += sim
    return provided_matching / len(course['provided_skills'])

def learner_course_matching(learner, course):
    required_matching = learner_course_required_matching(learner, course)
    provided_matching = learner_course_provided_matching(learner, course)

    if provided_matching >= 1.0:
        return 0

    return required_matching / (provided_matching + 1)

In [None]:
nb_matching = 0
for learner, course in zip(learners, courses):
    matching = learner_course_matching(learner, course)
    if matching >= 0.1:
        print(f"Matching: {matching:.2f}")
        print(f"Course required: {course['required_skills']}")
        print(f"Course provided: {course['provided_skills']}")
        print(f"Learner: {learner['possessed_skills']}")
        nb_matching += 1
    if nb_matching > 5:
        break

# Learner-skill Achievability

In [None]:
def learner_skill_achievability(learner, skill, mastery_level, courses):
    achievability = 0
    nb_courses = 0
    for course in courses:
        if skill in course['provided_skills'] and mastery_level == course['provided_skills'][skill]:
            achievability += learner_course_required_matching(learner, course)
            nb_courses += 1
    return achievability, nb_courses

In [None]:
learners[0]

In [None]:
jobs[0]

In [None]:
nb_achievable = 0

for learner in learners: 
    achievability, nb_courses = learner_skill_achievability(learner, 'Motivieren', 1, courses)
    if achievability > 0:
        nb_achievable += 1
        print(f"Achievability: {achievability:.2f}")
        print(f"Number of courses: {nb_courses}")
        print(f"Learner: {learner['possessed_skills']}")
        print(f"Skill: Motivieren")
        if nb_achievable > 5:
            break