In [7]:
import numpy as np
import pandas as pd
import re
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer

In [8]:
dff= pd.read_csv("analyst.csv", sep=",")

In [9]:
dff.head()

Unnamed: 0,title,location,employment_type,level,category,salary_min,salary_max,salary_type,description,requirements
0,Credit Risk Analyst – Leading Renewable Energy...,Central,Full Time,Professional,"Risk Management, Banking and Finance",,,,Roles & Responsibilities\n• 6 years of relevan...,Requirements\n**Apply here**\nhttps://www.blue...
1,"Financial Analyst, Google Customer Solutions -...",South,Full Time,Executive,Banking and Finance,"$6,600","to$13,200",Monthly,Roles & Responsibilities\nCompany overview:\nG...,Requirements\nMinimum qualifications:\n- BA/BS...
2,Senior Business Analyst,Central,Permanent,Professional,Information Technology,,,,Roles & Responsibilities\nUnique Requirements\...,"Requirements\nDegree in Information Systems, B..."
3,Senior Financial Analyst (Pharmaceutical),Islandwide,Permanent,Senior Executive,Accounting / Auditing / Taxation,,,,Roles & Responsibilities\nWork for a Fortune 5...,Requirements\nThe successful candidate will be...
4,System Analyst (Java) (JD#4804),East,"Contract, Full Time","Executive, Senior Executive",Entertainment,"$4,000","to$5,000",Monthly,Roles & Responsibilities\nJob Summary\nAn exci...,Requirements\nMandatory Skill-set\nDegree in C...


In [10]:
df= dff[["title","location","employment_type","level","category","description","requirements"]]

In [11]:
def text_preprocessing(x):
    words = re.sub('[^a-zA-Z]', ' ', x).lower().split()
    lemma = nltk.wordnet.WordNetLemmatizer()
    words = [lemma.lemmatize(word) for word in words]
    words = ' '.join(words)
    return words

In [12]:
df = df.applymap(text_preprocessing)

In [13]:
# Remove sub-titles in job description and requirements.
df['description'] = df['description'].str.replace('role responsibility', ' ')
df['requirements'] = df['requirements'].str.replace('requirement', ' ')
df.head()

Unnamed: 0,title,location,employment_type,level,category,description,requirements
0,credit risk analyst leading renewable energy t...,central,full time,professional,risk management banking and finance,year of relevant experience renewable energy...,apply here http www bluechipcareers asia com...
1,financial analyst google customer solution sin...,south,full time,executive,banking and finance,company overview google is not a conventiona...,minimum qualification ba b degree or equival...
2,senior business analyst,central,permanent,professional,information technology,unique requirement strong understanding of p...,degree in information system business admini...
3,senior financial analyst pharmaceutical,islandwide,permanent,senior executive,accounting auditing taxation,work for a fortune u mnc financial planning ...,the successful candidate will be professiona...
4,system analyst java jd,east,contract full time,executive senior executive,entertainment,job summary an exciting opportunity for java...,mandatory skill set degree in computer scien...


In [14]:
vectorizer= TfidfVectorizer(ngram_range=(1,3))

In [15]:
tfidf= vectorizer.fit_transform(df["requirements"])

In [16]:
def job_rec(skills):
    
    skills=text_preprocessing(skills)
    
    query_vec= vectorizer.transform([skills])
    
    similarity= cosine_similarity(query_vec, tfidf).flatten()
    
    indice= np.argpartition(similarity, -5)[-5:]    #Recuperation des 5 qqQQQQQQQindices les plus similaires
    
    
    result= dff.iloc[indice][::-1] # recommandation des job les plus similaires aux compétences entrées
    
    return result

In [20]:
job_rec("python java")

Unnamed: 0,title,location,employment_type,level,category,salary_min,salary_max,salary_type,description,requirements
131,Data Analyst,Central,"Permanent, Full Time","Fresh/entry level, Professional","Engineering, Information Technology","$4,000","to$5,200",Monthly,Roles & Responsibilities\nPropertyGuru Group i...,Requirements\nBachelor’s degree in IT or relev...
509,System Analyst,Islandwide,Full Time,Senior Executive,Information Technology,"$5,600","to$7,000",Monthly,Roles & Responsibilities\nDevelopment of Web-b...,Requirements\nAt Least 3 Years web-based enter...
550,Technology Analyst - Python,Central,Full Time,Senior Executive,Information Technology,,,,"Roles & Responsibilities\nDesign, develop, tes...",Requirements\nStrong in Python or any classic ...
1003,"Lead Systems Analyst, Mobile Application",West,Full Time,"Executive, Senior Executive","Information Technology, Others, Telecommunicat...","$3,500","to$7,000",Monthly,Roles & Responsibilities\nLead the mobile appl...,Requirements\nDegree in in Computer Science wi...
1133,ANALYST PROGRAMMER,"South, Central","Contract, Full Time",Executive,Information Technology,"$4,700","to$7,000",Monthly,"Roles & Responsibilities\nSystem Development, ...",Requirements\nStrong technical skills in JAVA ...


In [18]:
# Création d'une interface
import ipywidgets as widgets
from IPython.display import display



job_input= widgets.Text(
value= "Entrez vos compétences",
description="Job Recom",
disable= False
)


job_list= widgets.Output()

def on_type(data):
    with job_list:
        job_list.clear_output()
        skills=data["new"]
        if len(skills)>5:
            display(job_rec(skills))
        
job_input.observe(on_type, names="value")
display(job_input, job_list)

Text(value='Entrez vos compétences', description='Job Recom')

Output()