In [1]:
import json
from collections import Counter

In [None]:
inverted_index_course_path = "../data/inverted_index/synthetic/courses_provided_inverted_index.json"
inverted_index_job_path = "../data/inverted_index/synthetic/jobs_inverted_index.json"
resume_path = "../data/raw/synthetic/resumes.json"
skill_path = "../data/raw/synthetic/skills.json"
job_path = "../data/raw/synthetic/jobs.json"

In [None]:
def set_decoder(dct):
    new_dict = {}
    for k, v in dct.items():
        # Check if key is an integer in string format
        if k.isdigit():
            new_k = int(k)
        else:
            new_k = k

        # Check if value is a list
        if isinstance(v, list):
            new_v = set(v)
        else:
            new_v = v
            
        new_dict[new_k] = new_v
    return new_dict

In [None]:
course_inverted_index = json.load(open(inverted_index_course_path, "r"), object_hook=set_decoder)
job_inverted_index = json.load(open(inverted_index_job_path, "r"), object_hook=set_decoder)
skills = json.load(open(skill_path, 'r'))
profiles = json.load(open(resume_path, 'r'))
profiles = {key:{skills[skill] for skill in value} for key, value in profiles.items()}
jobs = json.load(open(job_path, 'r'))
jobs = {key:{skills[skill] for skill in value} for key, value in jobs.items()}

In [None]:
def profile_job_match(profile, job):
    """Computes the proportion of required skills for a job that the profile posseses. 

    Args:
        profile (set): set of skills that the profile has
        job (set): set of skills required for the job

    Returns:
        float: proporition of skills possesed
    """    
    possesed_skills = profile.intersection(job)
    matching = 100*len(possesed_skills)/len(job)
    return matching

In [None]:
def profile_alljobs_match(profile, jobs, job_inverted_index):
    """Computes a matching for all jobs based on the proportion of skills that the user possesses

    Args:
        profile (set): set of skills that the profile has
        jobs (dict): dictionnary of all jobs and the skills that they require
        job_inverted_index (dict): inverted index of jobs and skills for an efficient search

    Returns:
        dict: jobs matchings based on the proportion of skills that the user possesses
    """    
    ranked_jobs = Counter()
    for skill in profile:
        for job in job_inverted_index[skill]:
            ranked_jobs[job] += 100
    for job in ranked_jobs:
        ranked_jobs[job] /= len(jobs[job])
    return ranked_jobs

In [None]:
ranked_jobs = profile_alljobs_match(profiles['resume_0'], jobs, job_inverted_index)

In [None]:
ranked_jobs

In [None]:
for job in jobs:
    skillset = jobs[job]
    matching = profile_job_match(profiles['resume_0'], skillset)
    if matching > 0:
        print(job, skillset, profiles['resume_0'], matching)

In [2]:
# Write a function that takes a profile and a job and returns a matching score
def profile_job_match(profile, job):
    """Computes the proportion of required skills for a job that the profile posseses. 

    Args:
        profile (set): set of skills that the profile has
        job (set): set of skills required for the job

    Returns:
        float: proporition of skills possesed
    """    
    possesed_skills = profile.intersection(job)
    matching = 100*len(possesed_skills)/len(job)
    return matching
