In [1]:
%load_ext autoreload

%autoreload 1

In [2]:
import random
import numpy as np
import pandas as pd
from collections import Counter
from copy import deepcopy
import sys

sys.path.append('../protosp03/recommendation/')
%aimport matchings
%aimport market
%aimport upskillings

In [3]:
path = '../data/raw/evrlearn/taxonomy_V4.csv'

In [4]:
taxonomy = pd.read_csv(path)

# remove all rows where the column 'ElementID' is null
taxonomy = taxonomy[taxonomy['ElementID'].notna()]

keep = ['ElementID', 'Dimension', 'Type Level 1', 'Type Level 1 E',
       'Type Level 2', 'Type Level 2 E', 'Type Level 3', 'Type Level 4']

# keep only the columns in the list 'keep'
taxonomy = taxonomy[keep]
names = ['Type Level 1', 'Type Level 2', 'Type Level 3', 'Type Level 4']

taxonomy['last_name'] = (taxonomy['ElementID'].str.len() - 1 )//2
taxonomy['last_name'] = taxonomy['last_name'].map(lambda x: names[x])
taxonomy['last_name'] = taxonomy.apply(lambda x: x[x['last_name']], axis=1)

# Mastery Levels


In [5]:
mastery_levels = [1, 2, 3, 4]
nb_mastery_levels = len(mastery_levels)
mastery_levels_probabilities = [1/np.log(i+1) for i in range(1, nb_mastery_levels + 1)]
mastery_levels_normalized_probabilities = np.array(mastery_levels_probabilities) / sum(mastery_levels_probabilities)

# Skills

In [6]:
# make a dict from the dataframe taxonomy using the column 'last_name' as key and the column 'Type Level 1 E' as value
levels_dict = taxonomy.set_index('last_name')['ElementID'].to_dict()
levels_dict = {key:[int(level) for level in value.split('.')] for key, value in levels_dict.items()}
groups_dict = taxonomy.set_index('ElementID')['Type Level 1'].to_dict()
groups_dict = {int(key.split('.')[0]):value for key, value in groups_dict.items()}
# group_dict = {key:int(value.split('.')[0]) for key, value in group_dict.items()}

skills = list(levels_dict.keys())
random.shuffle(skills)
nb_skills = len(skills)
skills_probabilities = [1/np.log(i+1) for i in range(1, nb_skills + 1)]
skills_normalized_probabilities = np.array(skills_probabilities) / sum(skills_probabilities)

# Years

In [7]:
years = [i for i in range(2023, 2017, -1)]
years_probabilities = [1/np.log(i+1) for i in range(1, len(years) + 1)]
years_normalized_probabilities = np.array(years_probabilities) / sum(years_probabilities)

# Learners

In [8]:
def get_random_learner(skills, mastery_levels, years, min_n_skills=5, max_n_skills=10):
    n_skills = random.randint(min_n_skills, max_n_skills)
    possessed = {skill: level for skill, level in zip(np.random.choice(skills, n_skills, p=skills_normalized_probabilities, replace=False), np.random.choice(mastery_levels, n_skills, p=mastery_levels_normalized_probabilities, replace=True)) }
    year = np.random.choice(years, 1, p=years_normalized_probabilities)[0]
    return {'possessed_skills':possessed, 'year':year}

def get_all_learners(skills, mastery_levels, years, min_n_skills=5, max_n_skills=10, n_learners=100):
    return [get_random_learner(skills, mastery_levels, years, min_n_skills, max_n_skills) for _ in range(n_learners)]

learners = get_all_learners(skills, mastery_levels, years, min_n_skills=5, max_n_skills=10, n_learners=1000)

learners[0]

{'possessed_skills': {'Kooperieren': 3,
  'Überzeugen': 2,
  'Dynamik': 2,
  'Kommunikation in Feedbacksituationen': 3,
  'Zusammenarbeit in virtuellen Teams': 3,
  'Hand/Finger-Geschwindigkeit': 2},
 'year': 2021}

# Skill Supply

In [9]:
def get_skill_supply(learners, years):
    skill_supply = {year: Counter() for year in years}
    for learner in learners:
        for skill, level in learner['possessed_skills'].items():
            skill_supply[learner['year']][(skill, level)] += 1
    return skill_supply

skill_supply = get_skill_supply(learners, years)
skill_supply[2023].most_common(10)

[(('Dynamik', 1), 30),
 (('Dynamik', 2), 27),
 (('Dynamik', 3), 20),
 (('Gliedmaßenkoordination', 1), 20),
 (('Kommunikation in Ambiguitätssituationen', 1), 18),
 (('Konzentrationsfähigkeit', 1), 16),
 (('Dynamik', 4), 16),
 (('Application Operation', 1), 15),
 (('Beziehungsorientierung', 1), 14),
 (('ICT Service Operation', 1), 14)]

# Jobs

In [10]:
def get_random_job(skills, skills_normalized_probabilities, mastery_levels, mastery_levels_normalized_probabilities, years, years_normalized_probabilities, min_n_skills=2, max_n_skills=5):
    n_skills = random.randint(min_n_skills, max_n_skills)
    required = {skill: level for skill, level in zip(np.random.choice(skills, n_skills, p=skills_normalized_probabilities, replace=False), np.random.choice(mastery_levels, n_skills, p=mastery_levels_normalized_probabilities, replace=True)) }
    year = np.random.choice(years, 1, p=years_normalized_probabilities)[0]
    return {'required_skills':required, 'year':year}

def get_all_jobs(skills, skills_normalized_probabilities, mastery_levels, mastery_levels_normalized_probabilities, years, years_normalized_probabilities, min_n_skills=2, max_n_skills=5, n_jobs=1000):
    return [get_random_job(skills, skills_normalized_probabilities, mastery_levels, mastery_levels_normalized_probabilities, years, years_normalized_probabilities, min_n_skills, max_n_skills) for _ in range(n_jobs)]

jobs = get_all_jobs(skills, skills_normalized_probabilities, mastery_levels, mastery_levels_normalized_probabilities, years, years_normalized_probabilities, min_n_skills=2, max_n_skills=5, n_jobs=1000)

jobs[0]

{'required_skills': {'Psychische Fertigkeiten': 1,
  'Kommunikation in interkulturellen Situationen': 4,
  'Technical Software Engineering': 4},
 'year': 2022}

# Skill Demand

In [11]:
def get_skill_demand(jobs, years):
    skill_demand = {year: Counter() for year in years}
    for job in jobs:
        for skill, level in job['required_skills'].items():
            skill_demand[job['year']][(skill, level)] += 1
    return skill_demand

skill_demand = get_skill_demand(jobs, years)
skill_demand[2023].most_common(10)

[(('Dynamik', 1), 13),
 (('Dynamik', 2), 11),
 (('Gliedmaßenkoordination', 1), 10),
 (('Dynamik', 4), 8),
 (('Zusammenarbeit in stabilen Teams', 1), 8),
 (('Gliedmaßenkoordination', 2), 8),
 (('Hardware Engineering', 1), 7),
 (('Network Engineering', 1), 7),
 (('Stresstoleranz', 1), 7),
 (('Beweglichkeit, Gleichgewicht und Koordination', 1), 7)]

# Skill Trend

In [12]:
def get_skill_trend(skill_demand, skill, years):
    current_year = years[0]
    last_year = years[1]
    current_demand = skill_demand[current_year][skill]
    last_demand = skill_demand[last_year][skill]
    if last_demand == 0:
        print('last year demand is 0')
        return None
    return 100*(current_demand - last_demand) / last_demand

# Skill Attractiveness

In [13]:
def get_skill_attractiveness(skill, years, skill_supply, skill_demand):
    skill_attractiveness = 0
    normalization_factor = 0
    for i, year in enumerate(years):
        if skill in skill_supply[year]:
            skill_attractiveness += skill_demand[year][skill] / (skill_supply[year][skill] * (i+1))
        normalization_factor += 1 / (i+1)
    return skill_attractiveness/normalization_factor

In [14]:
def get_learner_attractiveness(learner, years, skill_supply, skill_demand):
    learner_attractiveness = dict()
    for skill, level in learner["possessed_skills"].items():
        learner_attractiveness[skill] = get_skill_attractiveness(
            (skill, level), years, skill_supply, skill_demand
        )
    return learner_attractiveness

# upskilling advice

In [15]:

matching = matchings.learner_job_matching(learner, job)
new_matching = sys.maxsize
while new_matching > matching:
    upskilling_advice = upskillings.up_skilling_advice(learners[0], jobs[0], market.get_all_skills_attractiveness(
        skills, mastery_levels, years, skill_supply, skill_demand
    ))
    learners[0]['possessed_skills'][upskilling_advice[0]] = upskilling_advice[1]


('Psychische Fertigkeiten', 1)

In [19]:
jobs[0]

{'required_skills': {'Business- und Requirements-Engineerin': 4,
  'Anleiten und Begleiten': 4,
  'Mitfühlen': 2,
  'Psychische Fertigkeiten': 2},
 'year': 2019}

In [None]:
learner = {'possessed_skills': {'Business- und Requirements-Engineerin': 1,
  'Mitfühlen': 3,
  'Psychische Fertigkeiten': 3},
 'year': 2023}

In [30]:
skills_attractiveness = market.get_all_skills_attractiveness(
    skills, mastery_levels, years, skill_supply, skill_demand
)

In [32]:
skills_attractiveness[('Business- und Requirements-Engineerin', 2)], skills_attractiveness[('Anleiten und Begleiten', 2)]

(0.5464852607709751, 1.1916099773242632)

In [None]:
skills_attractiveness = dict()
for skill in skills:
    for level in mastery_levels:
        skills_attractiveness[(skill, level)] = get_skill_attractiveness((skill, level), years, skill_supply, skill_demand)

In [None]:
skills_attractiveness = market.get_all_skills_attractiveness(skills, mastery_levels, years, skill_supply, skill_demand)

In [None]:
upskillings.up_skilling_advice(learners[0], jobs[0], skills_attractiveness)

In [None]:
learners[0], jobs[0]

# Courses

In [None]:
def get_random_provided_skills(skills, mastery_levels, required_skills, n_provided_skills):
    provided_skills = dict()
    while len(provided_skills) < n_provided_skills:
        candidate_skill = random.choice(skills)
        candidate_level = random.choice(mastery_levels)
        if (
            candidate_skill not in required_skills
            and candidate_skill not in provided_skills
        ):
            provided_skills[candidate_skill] = candidate_level
        elif (
            candidate_skill in required_skills
            and candidate_level > required_skills[candidate_skill]
        ):
            provided_skills[candidate_skill] = candidate_level

    return provided_skills


def get_random_course(skills, mastery_levels, min_n_required_skills=1, max_n_required_skills=5, min_n_provided_skills=1, max_n_provided_skills=2):
    n_required_skills = random.randint(min_n_required_skills, max_n_required_skills)
    required = {skill: level for skill, level in zip(np.random.choice(skills, n_required_skills, replace=False), np.random.choice(mastery_levels, n_required_skills, replace=True)) }

    n_provided_skills = random.randint(min_n_provided_skills, max_n_provided_skills)
    provided = get_random_provided_skills(skills, mastery_levels, required, n_provided_skills)

    return {'required_skills':required, 'provided_skills':provided}

def get_all_courses(skills, mastery_levels, min_n_required_skills=1, max_n_required_skills=5, min_n_provided_skills=1, max_n_provided_skills=2, n_courses=1000):
    return [get_random_course(skills, mastery_levels, min_n_required_skills=1, max_n_required_skills=5, min_n_provided_skills=1, max_n_provided_skills=2) for _ in range(n_courses)]

courses = get_all_courses(skills, mastery_levels, min_n_required_skills=1, max_n_required_skills=5, min_n_provided_skills=1, max_n_provided_skills=2, n_courses=1000)

courses[0]

# Learner-Course Matching

In [None]:
def learner_course_required_matching(learner, course):
    required_matching = 0
    for skill in course['required_skills']:
        if skill in learner['possessed_skills']:
            sim = min(learner['possessed_skills'][skill], course['required_skills'][skill]) / course['required_skills'][skill]
            required_matching += sim
    return required_matching / len(course['required_skills'])

def learner_course_provided_matching(learner, course):
    provided_matching = 0
    for skill in course['provided_skills']:
        if skill in learner['possessed_skills']:
            sim = min(learner['possessed_skills'][skill], course['provided_skills'][skill]) / course['provided_skills'][skill]
            provided_matching += sim
    return provided_matching / len(course['provided_skills'])

def learner_course_matching(learner, course):
    required_matching = learner_course_required_matching(learner, course)
    provided_matching = learner_course_provided_matching(learner, course)

    if provided_matching >= 1.0:
        return 0

    return required_matching / (provided_matching + 1)

In [None]:
nb_matching = 0
for learner, course in zip(learners, courses):
    matching = learner_course_matching(learner, course)
    if matching >= 0.1:
        print(f"Matching: {matching:.2f}")
        print(f"Course required: {course['required_skills']}")
        print(f"Course provided: {course['provided_skills']}")
        print(f"Learner: {learner['possessed_skills']}")
        nb_matching += 1
    if nb_matching > 5:
        break

# Learner-skill Achievability

In [None]:
def learner_skill_achievability(learner, skill, mastery_level, courses):
    achievability = 0
    nb_courses = 0
    for course in courses:
        if skill in course['provided_skills'] and mastery_level == course['provided_skills'][skill]:
            achievability += learner_course_required_matching(learner, course)
            nb_courses += 1
    return achievability, nb_courses

In [None]:
learners[0]

In [None]:
jobs[0]

In [None]:
nb_achievable = 0

for learner in learners: 
    achievability, nb_courses = learner_skill_achievability(learner, 'Motivieren', 1, courses)
    if achievability > 0:
        nb_achievable += 1
        print(f"Achievability: {achievability:.2f}")
        print(f"Number of courses: {nb_courses}")
        print(f"Learner: {learner['possessed_skills']}")
        print(f"Skill: Motivieren")
        if nb_achievable > 5:
            break

In [None]:
def max(a):
    max = a[0]
    for i in range(1, len(a)):
        if a[i] > max:
            max = a[i]
    return max

max([7, 8, 3, 4, 5])