# Set up elasticsearch

In [None]:
import json
from elasticsearch import Elasticsearch

In [2]:
with open('../credentials.json') as f:
    credentials = json.load(f)

bonsai_url = credentials['bonsai_url']
access_key = credentials['access_key']
access_secret = credentials['access_secret']


In [3]:
es = Elasticsearch(
    [bonsai_url],
    basic_auth=(access_key, access_secret),
    headers={'Content-Type': 'application/json'}
)
info = es.info()
version_number = info['version']['number']

print(f"Elasticsearch version: {version_number}")

Elasticsearch version: 7.2.0


### Defining a fake user and a fake job
1 means beginner

2 means advanced

3 means expert

In [180]:
fake_user = {
        'Leadership': 1,
        'Change management': 3,
        'Organisation + Planning': 3,
}

fake_job = {
        'Leadership': 3,
        'Change management': 3,
        'Organisation + Planning': 3,
        'Customer focus': 2
}

### Basic user/job matching function that does not use the levels

This basic matching simply calculated the proportion of skills required for the job that the user has.

In our example, the user has 3 skills ouf of the 4 that are required for the job.

So the score is 75%

In [182]:
def profile_job_matching(profile, job):
    """Computes a profile job matching score based on the proportion of skills that the user possesses

    Args:
        profile (dict): dictionnary of skills that the profile has
        job (dict): dictionnary of skills required for the job

    Returns:
        float: matching score
    """
    matching = 0
    for skill in job:
        if skill in profile:
            matching += 1
    matching = 100 * matching / len(job)
    return matching


print(f"Matching score: {profile_job_matching(fake_user, fake_job)}%")

Matching score: 75.0%


### User/job matching function that uses the levels

This matching function is similaro to the previous one: it is based on the proportion of required skills that the user has.

However, it takes into account the mastery levels of skills. For each skill:
- If the user has a lower mastery level than what is required for the job, the matching will decrease. The lower the mastery level, the bigger the decrease.
- If the user has a similar or higher mastery level than what is required for the job, the matching is not affected.

In [184]:

def profile_job_matching_with_level(profile, job):
    """Computes a profile job matching score based on the proportion of skills that the user possesses

    Args:
        profile (dict): dictionnary of skills that the profile has
        job (dict): dictionnary of skills required for the job

    Returns:
        float: matching score
    """
    matching = 0
    for skill in job:
        if skill in profile:
            sim = min(profile[skill], job[skill]) / job[skill]
            matching += sim
    matching = 100 * matching / len(job)
    return matching

print(f"Matching score: {profile_job_matching_with_level(fake_user, fake_job)}%")

Matching score: 58.33333333333333%


### Finding the skills required for the job that the user does not have (or has a lower level))

In [215]:
missing_skills = dict()
for skill in fake_job:
    if skill not in fake_user or fake_user[skill] < fake_job[skill]:
        missing_skills[skill] = fake_job[skill]
print(f"Missing skills: {missing_skills}")

Missing skills: {'Leadership': 3, 'Customer focus': 2}


# Use elasticsearch to find all the learning opportunities that can provide the missing skills

### Plus we rank then learning opportunities with a score such that the learning opporunities that teaches all the missing skills (and only them) have the higest score. 

### If a learning opportunity teaches irrelevant skills or not all the missing skills, it will be penalized.

### If a learning opportunities does not teaches any of the missing skills it will not be considered


In [219]:
skills_to_search = list(missing_skills.keys())

index_name = "learning_opportunities_production"
query = {
    "query": {
        "function_score": {
            "query": {
                "terms": {
                    "en_skills": skills_to_search
                }
            },
            "script_score": {
                "script": {
                    "source": """
                        double count = 0;
                        double total_skills = Math.max(doc['skill_ids'].length, params.skills_to_search.length);
                        for (String skill : params.skills_to_search) {
                            if (doc['en_skills'].contains(skill)) {
                                count++;
                            }
                        }
                        return total_skills > 0 ? count / total_skills : 0;
                    """,
                    "params": {
                        "skills_to_search": skills_to_search
                    }
                }
            }
        }
    }
}

response = es.search(
    index=index_name, 
    body=query,
    size=1000)

keywords = ['course', 'en_skills', 'skill_ids', 'learning_opportunity_skills']

# Get documents from the query response
learning_opportunities = [hit["_source"] for hit in response["hits"]["hits"]]
scores = [hit["_score"] for hit in response["hits"]["hits"]]

print(f"Number of retireved learning opportunities: {len(learning_opportunities)}")

Number of retireved learning opportunities: 786


### Some processing to make learning opportunities into a simple dict 

In [220]:
learning_opportunities = [{keyword:learning_opportunity[keyword] for keyword in keywords} for learning_opportunity in learning_opportunities]

levels = set()

for i,learning_opportunity in enumerate(learning_opportunities):
    new_skills = []
    for level in learning_opportunity['learning_opportunity_skills']:
        if "beginner" in level:
            new_skills.append(1)
        elif "expert" in level:
            new_skills.append(3)
        else:
            new_skills.append(2)
    learning_opportunity['skill_ids'] = {skill_id:level for skill_id,level in zip(learning_opportunity['skill_ids'], new_skills)}
    learning_opportunity['en_skills'] = {skill_id:level for skill_id,level in zip(learning_opportunity['en_skills'], new_skills)}

### Here we have a look at all the learning opportunities found by elasticsearch.
### We want to find the learning opportunity that (if taken) maximises users/job matching 
### To do so, we have a naive approach: 
1. We go trough all the learning opportunities
2. Update the user profile assuming that they will complete the learning opportunity
3. Compute the matching between the job and the updated skill
4. Keep the learning opportunity that gives the highest matching (If there is a tie, we prioritize the courses with the smallest amounf of skill)

In [218]:
matching = profile_job_matching_with_level(fake_user, fake_job)
best_matching = matching
best_learning_opportunity = None
for learning_opportunity in learning_opportunities:
    updated_user = fake_user.copy()
    updated_user.update(learning_opportunity['en_skills'])
    updated_matching = profile_job_matching_with_level(updated_user, fake_job)
    if updated_matching==best_matching and len(learning_opportunity['en_skills']) < len(best_learning_opportunity['en_skills']):
        best_learning_opportunity = learning_opportunity
    if updated_matching > best_matching:
        best_matching = updated_matching
        best_learning_opportunity = learning_opportunity
print(f"If the learner takes the learning opportunity below, the matching with the job will be: {best_matching}%")
best_learning_opportunity

If the learner takes the learning opportunity below, the matching with the job will be: 100.0%


{'course': 'PMI-ACP Certification',
 'en_skills': {'Project management': 3,
  'Creativity + Innovation ': 3,
  'Analysis + Conception': 3,
  'Critical thinking': 3,
  'Management': 3,
  'Leadership': 3,
  'Organisation + Planning': 3,
  'Initiative + Result orientation': 3,
  'Customer focus': 3},
 'skill_ids': {14: 3, 12: 3, 9: 3, 5: 3, 11: 3, 7: 3, 13: 3, 1: 3, 6: 3},
 'learning_opportunity_skills': ['14_expert',
  '12_expert',
  '9_expert',
  '5_expert',
  '11_expert',
  '7_expert',
  '13_expert',
  '1_expert',
  '6_expert']}