## Recommender System for Matching HealthCare Professionals with Jobs Using Cosine Similarity

### Importing the relevant packages

In [1]:
import pandas as pd
import numpy as np
import random #for generating random numbers

### Generating a synthetic data.
In this data, we will use syntheric data with 2000 professionals and 500 different jobs, using the random package

In [2]:
# Defining the parameters to randomize the synthetic data
num_professionals = 2000
num_jobs = 500

In [3]:
# Defining the variables 
skills = ['nursing', 'physician', 'radiology', 'pharmacy', 'lab']
locations = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Philadelphia']
certifications = ['BLS', 'ACLS', 'PALS', 'CPR', 'NRP']
education = ['Associate', 'Bachelor', 'Master', 'Doctorate']
work_preferences = ['part-time', 'full-time', 'day shift', 'night shift']
skill_levels = ['entry-level', 'intermediate', 'advanced']
max_experience_years = 30

In [20]:
# Generating synnthetic data for the job healthcare professionals 
professionals_df = pd.DataFrame(columns=['id', 'skills', 'location', 'certification', 'education', 'skill_level', 'work_preference', 'experience' ])
for i in range(num_professionals):
    id = i + 1,
    skills = random.choice(skills),
    location = random.choice(locations),
    certification = random.sample(certifications, random.randint(1, len(certifications))),
    education = random.choice(education),
    work_preference = random.choice(work_preferences),
    experience = random.randint(1, max_experience_years),
    skill_level = random.choice(skill_levels),
    professionals_df.loc[i] = [id, skills, location, certification, education, skill_level, work_preference, experience]
professionals_df.sample(5)
    
    

Unnamed: 0,id,skills,location,certification,education,skill_level,work_preference,experience
1553,"(1554,)","(pharmacy,)","(Chicago,)","([BLS, CPR, PALS],)","(Master,)","(entry-level,)","(day shift,)","(20,)"
1694,"(1695,)","(pharmacy,)","(Chicago,)","([BLS, PALS],)","(Master,)","(entry-level,)","(day shift,)","(3,)"
1805,"(1806,)","(pharmacy,)","(Houston,)","([NRP],)","(Master,)","(intermediate,)","(day shift,)","(12,)"
1522,"(1523,)","(pharmacy,)","(New York,)","([ACLS],)","(Master,)","(entry-level,)","(day shift,)","(26,)"
467,"(468,)","(pharmacy,)","(Los Angeles,)","([CPR, ACLS],)","(Master,)","(advanced,)","(day shift,)","(4,)"


The dataset looks ugly with the commas and the parentheses. Let us create a function to remove them

In [23]:
def clean_tuple(t):
    return str(t).replace("(", "").replace(",", "").replace(")", "").replace("'", "").replace("[", "").replace("]", "")
professionals_df = professionals_df.applymap(clean_tuple)
professionals_df.sample(5)

Unnamed: 0,id,skills,location,certification,education,skill_level,work_preference,experience
430,431,pharmacy,Houston,CPR ACLS,Master,entry-level,day shift,27
1961,1962,pharmacy,Philadelphia,PALS NRP CPR,Master,intermediate,day shift,10
265,266,pharmacy,Philadelphia,BLS,Master,entry-level,day shift,27
283,284,pharmacy,Houston,NRP PALS CPR BLS,Master,entry-level,day shift,13
1455,1456,pharmacy,Los Angeles,BLS,Master,entry-level,day shift,17


The dataset looks cleaner, so let us to the same for the jobs dataframe

In [26]:
jobs_df = pd.DataFrame(columns=['id', 'skills', 'location', 'certification', 'education', 'skill_level', 'work_preference', 'experience' ])
for i in range(num_jobs):
    id = i + 1,
    skills = random.choice(skills),
    location = random.choice(locations),
    certification = random.sample(certifications, random.randint(1, len(certifications))),
    education = random.choice(education),
    work_preference = random.choice(work_preferences),
    experience = random.randint(1, max_experience_years),
    skill_level = random.choice(skill_levels),
    jobs_df.loc[i] = [id, skills, location, certification, education, skill_level, work_preference, experience]
    
def clean_tuple(t):
    return str(t).replace("(", "").replace(",", "").replace(")", "").replace("'", "").replace("[", "").replace("]", "")
jobs_df = jobs_df.applymap(clean_tuple)
jobs_df.sample(5)
    

Unnamed: 0,id,skills,location,certification,education,skill_level,work_preference,experience
211,212,pharmacy,Philadelphia,NRP CPR BLS ACLS PALS,Master,intermediate,day shift,23
410,411,pharmacy,Los Angeles,PALS BLS NRP,Master,intermediate,day shift,6
319,320,pharmacy,Philadelphia,PALS NRP,Master,advanced,day shift,7
254,255,pharmacy,Los Angeles,CPR ACLS BLS NRP PALS,Master,advanced,day shift,30
13,14,pharmacy,Philadelphia,ACLS,Master,entry-level,day shift,23
