In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.metrics import mean_absolute_error
from datetime import date 

In [2]:
def load_clean_final_df():
    finalDF = pd.read_csv('../data/intermediate/final_df.csv')
    
    finalDF.rename(columns={'Unnamed: 0': 'job_title'}, inplace=True)
    
    finalDF.set_index('job_title', inplace=True)
    
    list_int_cols = []
    
    for each in finalDF.columns:
        if each.isdigit():
            list_int_cols.append(each)
    
    finalDF.drop(columns=list_int_cols, inplace=True)
    
    finalDF['sum_all'] = finalDF.sum(axis=1)
    finalDF = finalDF[finalDF.sum_all != 0]
    
    finalDF.drop(columns=['sum_all'], inplace=True)
    
    return finalDF

In [17]:
def get_rec_str(top_10, list_skill, skillsDict, finalDF, num_recs):
    fin_str = "The top {} recommendations are below: \n".format(num_recs)
    for each in top_10:
        df = finalDF.loc[each].values
        indices = [{v: k for k, v in skillsDict.items()}[i] for i, x in enumerate(df) if x == 1]
        req_skills = list(set(indices) - set(list_skill))
        
        if not req_skills:
            fin_str = fin_str + "For the job of: " + each + ", You don't need any extra skills!\n" 
        else:
            fin_str = fin_str + "For the job of: " + each + ", You need the following extra skills: {}".format(str(req_skills)) + '\n'
            
    return fin_str

In [18]:
# Finds best jobs/roles and additional skills required for the role    
def findJobs(ll , skills_list, finalDF , skills_ndx_dict, num_recs=10) :
    d = dict() 
    # assert len(ll) == 49 , "Length of input not correct !! " 
    for i , j  in finalDF.iterrows() : 
        mae = mean_absolute_error(j.values , ll)
        d[i] = mae 
    s = pd.Series(d) 
    top_10 = list(s.sort_values(ascending = True).iloc[:num_recs].keys())
    del(s)
    return get_rec_str(top_10, skills_list, skills_ndx_dict, finalDF, num_recs)

In [19]:
def createSkillDict(finalDF) : 
    skills_ndx_dict = dict()
    cnt = 0 
    for i in finalDF.columns : 
        skills_ndx_dict[i] = cnt  
        cnt+=1 
    return skills_ndx_dict 

In [21]:
def create_map(list_skills):
    list_vals = [0]*finalDF.shape[1]
    for each in list_skills:
        list_vals[skillsDict[each]] = 1
    return list_vals

In [31]:
def generate_recs(list_skills, num_recs):
    finalDF = load_clean_final_df()
    skillsDict = createSkillDict(finalDF) 
    return findJobs(create_map(list_skills), list_skills, finalDF, skillsDict, num_recs)

In [33]:
generate_recs(['Python', 'Java'], num_recs=10)

"The top 10 recommendations are below: \nFor the job of: Software Development Engineer, Ice Team (Systems Intelligence), You don't need any extra skills!\nFor the job of: Software QA Engineer - FW, You don't need any extra skills!\nFor the job of: Principal Computer Vision Scientist, You don't need any extra skills!\nFor the job of: Software Dev Engineer - Test,  Alexa, You don't need any extra skills!\nFor the job of: Senior Web Developer - EC2 Spot, You don't need any extra skills!\nFor the job of: Software Development Engineer, AWS, You don't need any extra skills!\nFor the job of: Software Development Engineer - Amazon Web Services - New Initiative, You need the following extra skills: ['C++']\nFor the job of: Research Engineer, Alexa, You don't need any extra skills!\nFor the job of: Rendering Engineer, You don't need any extra skills!\nFor the job of: Software Development Engineer - BIOS, You don't need any extra skills!\n"

In [None]:
finalDF