In [3]:
import pickle
import json
import importlib
from collections import Counter

In [4]:
inverted_index_course_path = "../data/inverted_index/synthetic/courses_provided_inverted_index.pkl"
inverted_index_job_path = "../data/inverted_index/synthetic/jobs_inverted_index.pkl"
resume_path = "../data/raw/synthetic/resumes.json"
skill_path = "../data/raw/synthetic/skills.json"
job_path = "../data/raw/synthetic/jobs.json"

In [5]:
course_inverted_index = pickle.load(open(inverted_index_course_path, "rb"))
job_inverted_index = pickle.load(open(inverted_index_job_path, "rb"))
skills = json.load(open(skill_path, 'r'))
profiles = json.load(open(resume_path, 'r'))
profiles = {key:{skills[skill] for skill in value} for key, value in profiles.items()}
jobs = json.load(open(job_path, 'r'))
jobs = {key:{skills[skill] for skill in value} for key, value in jobs.items()}

In [6]:
def profile_job_match(profile, job):
    """Computes the proportion of required skills for a job that the profile posseses. 

    Args:
        profile (set): set of skills that the profile has
        job (set): set of skills required for the job

    Returns:
        float: proporition of skills possesed
    """    
    possesed_skills = profile.intersection(job)
    matching = 100*len(possesed_skills)/len(job)
    return matching

In [7]:
def profile_alljobs_match(profile, jobs, job_inverted_index):
    """Computes a matching for all jobs based on the proportion of skills that the user possesses

    Args:
        profile (set): set of skills that the profile has
        jobs (dict): dictionnary of all jobs and the skills that they require
        job_inverted_index (dict): inverted index of jobs and skills for an efficient search

    Returns:
        dict: jobs matchings based on the proportion of skills that the user possesses
    """    
    ranked_jobs = Counter()
    for skill in profile:
        for job in job_inverted_index[skill]:
            ranked_jobs[job] += 100
    for job in ranked_jobs:
        ranked_jobs[job] /= len(jobs[job])
    return ranked_jobs

In [8]:
ranked_jobs = profile_alljobs_match(profiles['resume_0'], jobs, job_inverted_index)

In [9]:
ranked_jobs

[('jobs_201', 100.0),
 ('jobs_447', 100.0),
 ('jobs_295', 100.0),
 ('jobs_89', 100.0),
 ('jobs_408', 100.0),
 ('jobs_59', 100.0),
 ('jobs_123', 100.0),
 ('jobs_325', 100.0),
 ('jobs_375', 100.0),
 ('jobs_328', 100.0),
 ('jobs_432', 100.0),
 ('jobs_13', 100.0),
 ('jobs_47', 100.0),
 ('jobs_235', 100.0),
 ('jobs_67', 66.66666666666667),
 ('jobs_272', 66.66666666666667),
 ('jobs_285', 66.66666666666667),
 ('jobs_182', 50.0),
 ('jobs_183', 50.0),
 ('jobs_309', 50.0),
 ('jobs_305', 50.0),
 ('jobs_78', 50.0),
 ('jobs_91', 50.0),
 ('jobs_270', 50.0),
 ('jobs_181', 50.0),
 ('jobs_414', 50.0),
 ('jobs_327', 50.0),
 ('jobs_430', 50.0),
 ('jobs_306', 50.0),
 ('jobs_79', 50.0),
 ('jobs_282', 50.0),
 ('jobs_346', 50.0),
 ('jobs_173', 50.0),
 ('jobs_37', 50.0),
 ('jobs_41', 40.0),
 ('jobs_419', 40.0),
 ('jobs_69', 40.0),
 ('jobs_497', 40.0),
 ('jobs_388', 40.0),
 ('jobs_487', 40.0),
 ('jobs_35', 40.0),
 ('jobs_456', 40.0),
 ('jobs_266', 33.333333333333336),
 ('jobs_230', 33.333333333333336),
 ('jobs

In [46]:
for job in jobs:
    skillset = jobs[job]
    matching = profile_job_match(profiles['resume_0'], skillset)
    print(job, skillset, profiles['resume_0'], matching)

jobs_0 {17, 11, 4} {2, 6} 0.0
jobs_1 {5} {2, 6} 0.0
jobs_2 {1, 8, 16, 17, 19} {2, 6} 0.0
jobs_3 {3, 15} {2, 6} 0.0
jobs_4 {10, 13, 15, 16, 19} {2, 6} 0.0
jobs_5 {9, 11} {2, 6} 0.0
jobs_6 {16, 18, 3, 4} {2, 6} 0.0
jobs_7 {3} {2, 6} 0.0
jobs_8 {3, 4, 5, 12, 16} {2, 6} 0.0
jobs_9 {16, 4} {2, 6} 0.0
jobs_10 {3, 13} {2, 6} 0.0
jobs_11 {2, 3, 12, 7} {2, 6} 25.0
jobs_12 {4, 5, 10, 11, 13} {2, 6} 0.0
jobs_13 {6} {2, 6} 100.0
jobs_14 {5} {2, 6} 0.0
jobs_15 {3, 5, 9, 11, 18} {2, 6} 0.0
jobs_16 {0, 4, 9, 10, 14} {2, 6} 0.0
jobs_17 {17, 1, 7} {2, 6} 0.0
jobs_18 {11, 4} {2, 6} 0.0
jobs_19 {8, 10, 12, 15} {2, 6} 0.0
jobs_20 {5, 9, 10, 14, 16} {2, 6} 0.0
jobs_21 {8, 0, 2} {2, 6} 33.333333333333336
jobs_22 {15, 14, 7} {2, 6} 0.0
jobs_23 {0, 18, 19} {2, 6} 0.0
jobs_24 {9, 5, 1} {2, 6} 0.0
jobs_25 {8, 4} {2, 6} 0.0
jobs_26 {16, 2, 3, 14} {2, 6} 25.0
jobs_27 {13, 7} {2, 6} 0.0
jobs_28 {13} {2, 6} 0.0
jobs_29 {8} {2, 6} 0.0
jobs_30 {8} {2, 6} 0.0
jobs_31 {17, 12, 9, 7} {2, 6} 0.0
jobs_32 {17, 11, 13, 7} {

In [43]:
jobs

{'jobs_0': {4, 11, 17},
 'jobs_1': {5},
 'jobs_2': {1, 8, 16, 17, 19},
 'jobs_3': {3, 15},
 'jobs_4': {10, 13, 15, 16, 19},
 'jobs_5': {9, 11},
 'jobs_6': {3, 4, 16, 18},
 'jobs_7': {3},
 'jobs_8': {3, 4, 5, 12, 16},
 'jobs_9': {4, 16},
 'jobs_10': {3, 13},
 'jobs_11': {2, 3, 7, 12},
 'jobs_12': {4, 5, 10, 11, 13},
 'jobs_13': {6},
 'jobs_14': {5},
 'jobs_15': {3, 5, 9, 11, 18},
 'jobs_16': {0, 4, 9, 10, 14},
 'jobs_17': {1, 7, 17},
 'jobs_18': {4, 11},
 'jobs_19': {8, 10, 12, 15},
 'jobs_20': {5, 9, 10, 14, 16},
 'jobs_21': {0, 2, 8},
 'jobs_22': {7, 14, 15},
 'jobs_23': {0, 18, 19},
 'jobs_24': {1, 5, 9},
 'jobs_25': {4, 8},
 'jobs_26': {2, 3, 14, 16},
 'jobs_27': {7, 13},
 'jobs_28': {13},
 'jobs_29': {8},
 'jobs_30': {8},
 'jobs_31': {7, 9, 12, 17},
 'jobs_32': {7, 11, 13, 17},
 'jobs_33': {2, 7, 10, 11, 18},
 'jobs_34': {7, 14, 17, 19},
 'jobs_35': {2, 6, 8, 17, 18},
 'jobs_36': {9, 12, 19},
 'jobs_37': {6, 15},
 'jobs_38': {7, 10, 11, 13},
 'jobs_39': {2, 3, 4, 10, 12},
 'jobs_40