Import Requirements

In [2]:
import pyodbc
import re

import numpy as np
import pandas as pd

from sqlalchemy import create_engine
from datetime import datetime, timedelta

from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

from transform import remove_html, maintain_alpha, remove_single, remove_morespace, remove_enumerate, clean_text, remove_insideparentheses, remove_standalonesymbols, stopwords_remover

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\eats\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


##### Code

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 50)

Connection

In [4]:
user = 'huda'
password = 'Vancha12'
host = '127.0.0.1'
port = 1433
database = 'HRSystemDB'


def get_connection():         
    return create_engine(
        url=f"mssql+pyodbc://{user}:{password}@{host}:{port}/{database}?driver=SQL Server",
    )

engine = get_connection()
conn = engine.connect()

In [5]:
tables = engine.table_names()

  tables = engine.table_names()


In [6]:
df_applicant = pd.DataFrame(engine.execute('SELECT ApplicantID, DiseaseHistory, Dob, ExpectedSalary, IsAbleToWorkRemote, CityID, ProvinceID, DriverLicenseType, Gender, IsUsingGlasses, Height, MaritalStatus, Nationality, Strengthness, Weaknesses, TypeOfVehicle FROM Applicant'))
df_applicant_education = pd.DataFrame(engine.execute('SELECT ApplicantEducationID, ApplicantID, DateStart, DateEnd, EducationInstituteName, Score, EducationLevelID, MajorID, Degree FROM ApplicantEducation'))
df_applicant_experience = pd.DataFrame(engine.execute('SELECT ApplicantExperienceID, ApplicantID, DateFrom, DateTo, Industry, CompanyName, JobDescription, Position, Salary FROM ApplicantExperience'))
df_applicant_document = pd.DataFrame(engine.execute('SELECT ApplicantID, DocumentName FROM ApplicantDocument'))
df_applicant_certificate = pd.DataFrame(engine.execute('SELECT ApplicantID, Description FROM ApplicantCertificate'))

df_pipeline = pd.DataFrame(engine.execute('SELECT PipelineID, ApplicantID, JobID, StageID FROM Pipeline'))
df_stage = pd.DataFrame(engine.execute('SELECT StageID, Label FROM Stage'))

df_job = pd.DataFrame(engine.execute('SELECT * FROM Job'))
df_function_position = pd.DataFrame(engine.execute('SELECT FunctionPositionID, FunctionPositionName FROM FunctionPosition'))
df_department = pd.DataFrame(engine.execute('SELECT DepartmentID, Name AS DepartmentName FROM Department'))
df_city = pd.DataFrame(engine.execute('SELECT CityID, Name AS CityName FROM City'))
df_province = pd.DataFrame(engine.execute('SELECT ProvinceID, Name AS ProvinceName FROM Province'))
df_major = pd.DataFrame(engine.execute('SELECT MajorID, MajorName FROM Major'))
df_education_level = pd.DataFrame(engine.execute('SELECT EducationLevelID, EducationLevelName FROM EducationLevel'))
df_company = pd.DataFrame(engine.execute('SELECT CompanyID, Name AS CompanyName FROM Company'))

# df_job = df_job[['JobID', 'Description', 'EducationLevelID', 'FunctionPositionID', 'DepartmentID', 'JobTitle', 'Requirement', 'CityID', 'ProvinceID', 'MajorID']]

In [7]:
df_job.head(3)

Unnamed: 0,JobID,Description,EducationLevelID,FunctionPositionID,JobStatus,JobTitle,PostedDate,Requirement,SalaryMax,SalaryMin,UserId,HiredQuota,ClosedDate,DepartmentID,CityID,CompanyID,ProvinceID,SkipTest,MajorID,DbName,UsiaMax,UsingGlasses,IQMin,IsSpecificEducationLevel,DriverLicenseType,IsSpesificDriverLicense,Gender,IsSpecificAge,IsSpecificGender,IsSpecificIQ,IsSpecificMarital,MaritalStatus,PublishDate,AdsStatisticID
0,1,Under Asst. Front Office Manager at Gunawangsa...,4,1,Close,NIGHT AUDIT,2018-04-10 00:00:00.0000000,"<p>\r\n\r\n</p><div style=""language:en-US;marg...",3600000.0,3300000.0,124839f6-8e58-48e8-8959-968da693a5b1,1,2018-05-19 08:54:53.3799529,1,1,7,1,True,1,WarnaWarni,0.0,False,0,False,,False,,False,False,False,False,,0001-01-01 00:00:00.0000000,
1,2,BECOMING BOOK KEEPER FOR GUNAWANGSA HOTEL MERR...,2,2,Close,BOOK KEEPER,2018-04-10 00:00:00.0000000,<p>Bachelor Degree of Accounting.<br>With 2 ye...,4250000.0,3500000.0,124839f6-8e58-48e8-8959-968da693a5b1,1,2018-05-15 15:34:57.8385703,2,1,7,1,True,2,WarnaWarni,0.0,False,0,False,,False,,False,False,False,False,,0001-01-01 00:00:00.0000000,
2,3,"<ul style=""padding: 0px 40px; color: rgb(51, 5...",2,3,Close,IT ANDROID PROGRAMMER,2018-04-10 00:00:00.0000000,"<p></p><p><ul></ul><span style=""font-size:12.0...",5000000.0,4000000.0,aab981ee-3ee0-464e-8cd4-8a033918050a,2,2018-12-10 09:01:00.9154229,8,1,1,1,False,4,WarnaWarni,0.0,False,0,False,,False,,False,False,False,False,,0001-01-01 00:00:00.0000000,


In [8]:
df_job.Description.fillna('', inplace=True)
df_job.Requirement.fillna('', inplace=True)

In [9]:
job_merged = pd.merge(df_job, df_education_level, on=['EducationLevelID'])
job_merged = pd.merge(job_merged, df_city, on=['CityID'])
job_merged = pd.merge(job_merged, df_province, on=['ProvinceID'])
job_merged = pd.merge(job_merged, df_function_position, on=['FunctionPositionID'])
job_merged = pd.merge(job_merged, df_department, on=['DepartmentID'])
job_merged = pd.merge(job_merged, df_major, on=['MajorID'])

job_merged.drop(columns=['EducationLevelID', 'CityID', 'ProvinceID', 'FunctionPositionID', 'DepartmentID', 'MajorID'], inplace=True)

In [10]:
job_merged.drop(
    index=job_merged.index[job_merged.JobTitle.map(str.lower).str.contains('test|123')].values, inplace=True
)

In [11]:
job_merged.head(2)

Unnamed: 0,JobID,Description,JobStatus,JobTitle,PostedDate,Requirement,SalaryMax,SalaryMin,UserId,HiredQuota,ClosedDate,CompanyID,SkipTest,DbName,UsiaMax,UsingGlasses,IQMin,IsSpecificEducationLevel,DriverLicenseType,IsSpesificDriverLicense,Gender,IsSpecificAge,IsSpecificGender,IsSpecificIQ,IsSpecificMarital,MaritalStatus,PublishDate,AdsStatisticID,EducationLevelName,CityName,ProvinceName,FunctionPositionName,DepartmentName,MajorName
0,1,Under Asst. Front Office Manager at Gunawangsa...,Close,NIGHT AUDIT,2018-04-10 00:00:00.0000000,"<p>\r\n\r\n</p><div style=""language:en-US;marg...",3600000.0,3300000.0,124839f6-8e58-48e8-8959-968da693a5b1,1,2018-05-19 08:54:53.3799529,7,True,WarnaWarni,0.0,False,0,False,,False,,False,False,False,False,,0001-01-01 00:00:00.0000000,,D3,SURABAYA,JAWA TIMUR,NIGHT AUDIT,FRONT OFFICE GUNAWANGSA HOTEL MERR,PERHOTELAN
2,48,<p>Becoming Executive Housekeeping For Gunawan...,Close,EXECUTIVE HOUSEKEEPING,2018-07-03 00:00:00.0000000,<p>Male<br>Experience in hospitality industry ...,6000000.0,3587000.0,124839f6-8e58-48e8-8959-968da693a5b1,1,2018-09-12 11:35:14.7251826,7,False,WarnaWarni,0.0,False,0,False,,False,,False,False,False,False,,0001-01-01 00:00:00.0000000,,D3,SURABAYA,JAWA TIMUR,EXECUTIVE HOUSEKEEPING,FRONT OFFICE GUNAWANGSA HOTEL MERR,PERHOTELAN


In [12]:
job_merged = job_merged[['JobID', 'JobTitle', 'FunctionPositionName', 'EducationLevelName', 'CityName', 'ProvinceName', 'Description', 'Requirement', 'MajorName']]

In [13]:
job_merged.Description = job_merged.Description.map(clean_text)

job_merged.Requirement = job_merged.Requirement.map(remove_html).map(remove_enumerate).map(maintain_alpha).map(remove_single).map(remove_morespace).map(str.strip).map(str.lower)

job_merged.JobTitle = job_merged.JobTitle.map(str.lower).map(remove_insideparentheses).map(remove_standalonesymbols).map(remove_morespace)

job_merged.EducationLevelName = job_merged.EducationLevelName.replace('None', '').map(str.lower)

job_merged.CityName = job_merged.CityName.map(str.lower)
job_merged.ProvinceName = job_merged.ProvinceName.map(str.lower)

job_merged.FunctionPositionName = job_merged.FunctionPositionName.map(remove_standalonesymbols).apply(lambda x: re.sub('[\(\)0-9]', '', x)).map(remove_morespace).map(str.strip).map(str.lower)

job_merged.MajorName = job_merged.MajorName.map(str.lower)

In [14]:
job_merged.head(2)

Unnamed: 0,JobID,JobTitle,FunctionPositionName,EducationLevelName,CityName,ProvinceName,Description,Requirement,MajorName
0,1,night audit,night audit,d3,surabaya,jawa timur,under asst front office manager at gunawangsa ...,with 1 year minimum experiences in the same po...,perhotelan
2,48,executive housekeeping,executive housekeeping,d3,surabaya,jawa timur,becoming executive housekeeping for gunawangsa...,male experience in hospitality industry and ex...,perhotelan


Clean Description & Requirement

In [15]:
def remove_stopwords(stopwords, text):
    list_text = text.split(' ')

    for text in list_text:
        if text in stopwords:
            list_text.remove(text)
            # print(text)
    return ' '.join(list_text)

In [16]:
sastrawi_stopwords = StopWordRemoverFactory().get_stop_words()
nltk_stopwords_in =  stopwords.words('indonesian')
nltk_stopwords_en =  stopwords.words('english')
user_stopwords = ['perusahaan', 'sesuai', 'become', 'becoming', 'gunawangsa', 'hotel', 'merr']

stopwords_in = list(set(sastrawi_stopwords + nltk_stopwords_in + nltk_stopwords_en + user_stopwords))

Description

In [17]:
'''cara 1'''
job_merged.Description = job_merged.Description.apply(lambda x: re.sub('\s+', '   ', '   ' + x + '   ')).apply(lambda x: re.sub('(' + ' | '.join(stopwords_in) + ')', ' ', x)).map(remove_morespace).map(str.strip)

'''cara 2'''
# job_merged.Description.apply(lambda x: remove_stopwords(stopwords_in, x))

'cara 2'

Requirement

In [18]:
job_merged.Requirement = job_merged.Requirement.apply(lambda x: re.sub('\s+', '   ', '   ' + x + '   ')).apply(lambda x: re.sub('(' + ' | '.join(stopwords_in) + ')', ' ', x)).map(remove_morespace).map(str.strip)

In [19]:
job_merged.head(2)

Unnamed: 0,JobID,JobTitle,FunctionPositionName,EducationLevelName,CityName,ProvinceName,Description,Requirement,MajorName
0,1,night audit,night audit,d3,surabaya,jawa timur,asst front office manager surabaya,1 year minimum experiences position experience...,perhotelan
2,48,executive housekeeping,executive housekeeping,d3,surabaya,jawa timur,executive housekeeping,male experience hospitality industry executive...,perhotelan


In [20]:
'''mengecek kata-kata yang belum bermakna'''
# teks = ''
# for sentence in job_merged.Description.values: 
#     teks = teks + ' ' + sentence
# teks = teks.strip()
# teks = teks.split(' ')
# teks = pd.Series(teks)

'mengecek kata-kata yang belum bermakna'

In [21]:
# job_merged = job_merged.set_index(['JobID'])

job_train = pd.DataFrame([], index=job_merged.index)

job_train['Text'] = job_merged.JobTitle.str.cat(
    job_merged.FunctionPositionName.str.cat(
        job_merged.EducationLevelName.str.cat(
            job_merged.MajorName.str.cat(
                job_merged.CityName.str.cat(
                    job_merged.ProvinceName.str.cat(
                        job_merged.Description.str.cat(
                            job_merged.Requirement
                        , sep=' ')
                    , sep=' ')
                , sep=' ')
            , sep=' ')
        , sep=' ')
    , sep=' ')
, sep=' ')

In [22]:
from eris import ErisRecommender

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_distances, cosine_similarity

In [23]:
encoder = TfidfVectorizer()
bank = encoder.fit_transform(job_train.Text)

# code = encoder.transform(job_train.Text)
# dist = cosine_similarity(code, bank)[0]*100

In [24]:
eris = ErisRecommender(job_merged, job_train.copy(), 'Text')
eris.fit()
eris.recommend('android')

Index yang ada 226


Unnamed: 0,JobTitle,Description,Requirement,Similarity
404,sosial media,maintain akun sosmed official store updating p...,wanita pria usia min 18 sd 25 min lulusan sma ...,97


In [25]:
df_job[df_job.JobID == 1661]

Unnamed: 0,JobID,Description,EducationLevelID,FunctionPositionID,JobStatus,JobTitle,PostedDate,Requirement,SalaryMax,SalaryMin,UserId,HiredQuota,ClosedDate,DepartmentID,CityID,CompanyID,ProvinceID,SkipTest,MajorID,DbName,UsiaMax,UsingGlasses,IQMin,IsSpecificEducationLevel,DriverLicenseType,IsSpesificDriverLicense,Gender,IsSpecificAge,IsSpecificGender,IsSpecificIQ,IsSpecificMarital,MaritalStatus,PublishDate,AdsStatisticID
634,1661,"<ul type=""disc""><li class=""MsoNormal"" style=""c...",2,36,Close,TRANSLATOR STAFF,2021-05-10 00:00:00.0000000,"<p>\r\n\r\n</p><br>\r\n\r\n<ul type=""disc""><li...",0.0,0.0,aab981ee-3ee0-464e-8cd4-8a033918050a,1,2021-06-17 13:42:35.4541978,16,1,1,1,True,20,WarnaWarni,0.0,False,0,False,,False,,False,False,False,False,,0001-01-01 00:00:00.0000000,


Applicant

In [26]:
df_applicant = pd.read_csv('data/cdf_applicant.csv')
df_applicant_experience = pd.read_csv('data/cdf_applicant_experience.csv')
df_applicant_experience.fillna('', inplace=True)
df_applicant_education = pd.read_csv('data/cdf_applicant_education.csv')

df_city = pd.read_csv('data/df_city.csv')
df_province = pd.read_csv('data/df_province.csv')

In [27]:
df_applicant_education = pd.merge(df_applicant_education, df_education_level, on=['EducationLevelID'])
df_applicant_education = pd.merge(df_applicant_education, df_major, on=['MajorID'])
df_applicant_education = df_applicant_education[['ApplicantID', 'EducationLevelName', 'MajorName']]

In [28]:
df_applicant_education.head(2)

Unnamed: 0,ApplicantID,EducationLevelName,MajorName
0,1,SMA,SMA SEGALA JURUSAN
1,620,SMA,SMA SEGALA JURUSAN


In [29]:
df_applicant = df_applicant[['ApplicantID', 'Age', 'CityID', 'ProvinceID', 'Strengthness', 'Weaknesses']]
df_applicant_experience = df_applicant_experience[['ApplicantID', 'Industry', 'JobDescription', 'Position', 'YearsOfExperience']]

In [30]:
app_merged = pd.merge(df_applicant, df_applicant_experience, on=['ApplicantID'])
app_merged = pd.merge(app_merged, df_city, on=['CityID'])
app_merged = pd.merge(app_merged, df_province, on=['ProvinceID'])
app_merged = pd.merge(app_merged, df_applicant_education, on=['ApplicantID'])
app_merged.drop(columns=['CityID', 'ProvinceID'], inplace=True)

In [31]:
app_merged.head(2)

Unnamed: 0,ApplicantID,Age,Strengthness,Weaknesses,Industry,JobDescription,Position,YearsOfExperience,CityName,ProvinceName,EducationLevelName,MajorName
0,3,29,"pekerja keras,teliti, ambisius","pelupa,tidak cepat tanggap",telekomunikasi retail service,maintenance dealer cek stock dealer apakah mas...,sales executive whole sales credit marketing o...,1,surabaya,jawa timur,S1,SEMUA JURUSAN
1,13,0,swot,swot,rudi,,it,0,surabaya,jawa timur,D4,PSIKOLOGI


In [32]:
app_merged.Age = app_merged.Age.apply(lambda x: 'usia ' + str(x) + ' tahun' if x != 0 else '')
app_merged.YearsOfExperience = app_merged.YearsOfExperience.apply(lambda x: 'pengalaman ' + str(x) + ' tahun' if x != 0 else '')
app_merged.EducationLevelName = app_merged.EducationLevelName.apply(lambda x: 'lulusan ' + x.lower())
app_merged.MajorName = app_merged.MajorName.map(str.lower)

app_merged.Strengthness = app_merged.Strengthness.map(clean_text)
app_merged.Weaknesses = app_merged.Weaknesses.map(clean_text)

app_merged.Strengthness = app_merged.Strengthness.apply(lambda x: re.sub('\s+', '   ', '   ' + x + '   ')).apply(lambda x: re.sub('(' + ' | '.join(stopwords_in) + ')', ' ', x)).map(remove_morespace).map(str.strip)
app_merged.Weaknesses = app_merged.Weaknesses.apply(lambda x: re.sub('\s+', '   ', '   ' + x + '   ')).apply(lambda x: re.sub('(' + ' | '.join(stopwords_in) + ')', ' ', x)).map(remove_morespace).map(str.strip)
app_merged.JobDescription = app_merged.JobDescription.apply(lambda x: re.sub('\s+', '   ', '   ' + x + '   ')).apply(lambda x: re.sub('(' + ' | '.join(stopwords_in) + ')', ' ', x)).map(remove_morespace).map(str.strip)



In [33]:
app_merged.head(1)

Unnamed: 0,ApplicantID,Age,Strengthness,Weaknesses,Industry,JobDescription,Position,YearsOfExperience,CityName,ProvinceName,EducationLevelName,MajorName
0,3,usia 29 tahun,pekerja keras teliti ambisius,pelupa cepat tanggap,telekomunikasi retail service,maintenance dealer cek stock dealer tersedia a...,sales executive whole sales credit marketing o...,pengalaman 1 tahun,surabaya,jawa timur,lulusan s1,semua jurusan


In [34]:
app_train = pd.DataFrame([], index=app_merged.index)

app_train['Text'] = app_merged.Position.str.cat(
    app_merged.EducationLevelName.str.cat(
        app_merged.MajorName.str.cat(
            app_merged.CityName.str.cat(
                app_merged.ProvinceName.str.cat(
                    app_merged.Age.str.cat(
                        app_merged.YearsOfExperience.str.cat(
                            app_merged.Strengthness.str.cat(
                                app_merged.Weaknesses.str.cat(
                                    app_merged.JobDescription
                                , sep=' ')
                            , sep=' ')
                        , sep=' ')
                    , sep=' ')
                , sep=' ')
            , sep=' ')
        , sep=' ')
    , sep=' ')
, sep=' ')

In [35]:
app_train

Unnamed: 0,Text
0,sales executive whole sales credit marketing o...
1,it lulusan d4 psikologi surabaya jawa timur ...
2,java web developer android developer native lu...
3,asisten dosen supervisor lulusan s1 semua juru...
4,junior progammer progammer and analyst lulusan...
...,...
4317,admin produksi lulusan s1 manajemen banggai su...
4318,asisten advokat paralegal staff legal lulusan ...
4319,legal staff lulusan s2 hukum sorong papua bara...
4320,quantity surveyor dan quality control quantity...


In [36]:
app_encoder = TfidfVectorizer()
app_bank = app_encoder.fit_transform(app_train.Text)

job_encoder = TfidfVectorizer()
job_bank = job_encoder.fit_transform(job_train.Text)

In [54]:
for i in job_encoder.get_feature_names_out():
    print(i)

00
07
10
11
15
155
16
165
17
18
1tahun
20
2000
2018
2019
2020
21
22
23
24
25
25th
26
27
28
29
30
30th
32
35
35tahun
36
38
3d
3ds
3dsmax
40
45
4d
50
9001
abilities
ability
able
absensi
ac
acara
acc
accommodation
accordance
account
accounting
accounts
accrued
accumatica
accurate
accustomed
achieve
achieves
across
act
action
active
activities
activity
actractive
acurate
ad
adaministrasi
address
adertising
adjustment
admin
administrasi
administratif
administration
administrative
adobe
ads
advantage
advantageous
advertise
advertisement
advertisements
advertising
advice
advidor
advise
advokasi
advokat
adwords
ae
affair
affairs
age
agencies
agency
agenda
agent
agents
aging
agreement
agronomi
agustus
ahir
ahli
ai
air
akad
akomodasi
akses
aktif
aktifitas
aktifitasnya
aktiva
aktivitas
aktuaria
akuisisi
akun
akunnya
akuntansi
akuntasi
akunting
akurat
akutansi
ala
alat
aldo
aliansi
alphard
also
alterations
alternatif
alur
always
aman
amn
ampu
anak
analisa
analisis
analitis
analysing
analysis
analy

In [53]:
job_bank.shape

(1003, 3418)

In [49]:
job_bank.todense()

matrix([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.03560514, 0.07214623, 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.03604784, 0.07304327, 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ]])

In [37]:
app_bank.todense().shape, job_bank.todense().shape

((4322, 23397), (1003, 3418))

In [48]:
app_train

Unnamed: 0,Text
0,sales executive whole sales credit marketing o...
1,it lulusan d4 psikologi surabaya jawa timur ...
2,java web developer android developer native lu...
3,asisten dosen supervisor lulusan s1 semua juru...
4,junior progammer progammer and analyst lulusan...
...,...
4317,admin produksi lulusan s1 manajemen banggai su...
4318,asisten advokat paralegal staff legal lulusan ...
4319,legal staff lulusan s2 hukum sorong papua bara...
4320,quantity surveyor dan quality control quantity...


In [39]:
codes = job_encoder.transform([app_train.Text[0]])

In [40]:
dist = cosine_similarity(codes, job_bank).T

In [44]:
dist.shape

(1003, 1)

In [42]:
codes.shape, job_bank.shape

((1, 3418), (1003, 3418))

In [43]:
job_merged['Similarity'] = dist

Kemiripan applicant dan job harus ditingkatkan dengan nilai similarity. Pada hasil tersebut, nilai similaritynya 0.2 atau 20% meskipun track recordnya sangat sesuai. Normalnya bisa mencapai 75% ke atas. Untuk mendapatkan hal demikian diperlukan cleansing yang lebih akurat.

In [45]:
app_merged.head(1)

Unnamed: 0,ApplicantID,Age,Strengthness,Weaknesses,Industry,JobDescription,Position,YearsOfExperience,CityName,ProvinceName,EducationLevelName,MajorName
0,3,usia 29 tahun,pekerja keras teliti ambisius,pelupa cepat tanggap,telekomunikasi retail service,maintenance dealer cek stock dealer tersedia a...,sales executive whole sales credit marketing o...,pengalaman 1 tahun,surabaya,jawa timur,lulusan s1,semua jurusan


In [46]:
job_merged.sort_values(by='Similarity', ascending=False)

Unnamed: 0,JobID,JobTitle,FunctionPositionName,EducationLevelName,CityName,ProvinceName,Description,Requirement,MajorName,Similarity
35,103,sales executive,sales executive,sma,surabaya,jawa timur,bertanggung penjualan budget maintance custome...,usia max 35 pendidikan min smk perhotelan peng...,perhotelan,0.241175
34,1228,sales executive,sales executive,d3,surabaya,jawa timur,kemampuan meningkatkan revenue budget mengadak...,pria wanita usia maksimal 35 penampilan menari...,perhotelan,0.189796
36,149,sales executive,sales executive,smk,surabaya,jawa timur,bertanggung penjualan budget maintance custome...,perempuan usia max 35 pengalaman dibidang sale...,perhotelan,0.183249
186,153,sales executive,sales executive,d3,surabaya,jawa timur,sales executive,female experience hospitality industry minimum...,semua jurusan 2,0.179756
384,3091,marketing staff,marketing executive,,surabaya,jawa timur,,,semua jurusan,0.165997
...,...,...,...,...,...,...,...,...,...,...
936,1599,staff legal kontrak,legal,s1,jakarta,dki jakarta,prepare draft legal agreement contract provide...,candidate must possess least bachelor degree l...,hukum,0.000957
1005,1415,teacher,teacher,s1,jakarta,dki jakarta,good teamwork communicate colleagues staff pro...,english communication oral written must minimu...,bahasa inggris,0.000840
1004,1413,assistant teacher,teacher,s1,jakarta,dki jakarta,good teamwork communicate colleagues staff pro...,english communication oral written must minimu...,bahasa inggris,0.000830
779,29,admin it,it,d3,jakarta,dki jakarta,administrasi web update terkait perbaruan perb...,minimal d3 management informatika berpengalama...,teknik informatika,0.000000


In [47]:
# TODO: Done, tinggal menambahkan improvement!