In [302]:
import pyodbc
import re

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_recommenders as tfrs

from sqlalchemy import create_engine
from datetime import datetime, timedelta
from sklearn.metrics.pairwise import cosine_similarity

from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

from googletrans import Translator

from transform_copy import *

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\eats\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [303]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 50)

In [304]:
user = 'huda'
password = 'Vancha12'
host = '127.0.0.1'
port = 1433
database = 'HRSystemDB'


def get_connection():         
    return create_engine(
        url=f"mssql+pyodbc://{user}:{password}@{host}:{port}/{database}?driver=SQL Server",
    )

engine = get_connection()
conn = engine.connect()

In [305]:
'''
JOB>>>MATCH<<<APPLICANT

> numerical
UsiaMax == Dob
SalaryMean == ExpectedSalary

> categorical
CityName == CurrentCityName
ProvinceName == CurrentProvinceName
EducationLevelName
MajorName
DriverLicenseType
IsUsingGlasses
Gender
MaritalStatus

> textual
JobTile, FunctionPositionName == Position
Description, Requirement == JobDescription, Strengthness
'''

'\nJOB>>>MATCH<<<APPLICANT\n\n> numerical\nUsiaMax == Dob\nSalaryMean == ExpectedSalary\n\n> categorical\nCityName == CurrentCityName\nProvinceName == CurrentProvinceName\nEducationLevelName\nMajorName\nDriverLicenseType\nIsUsingGlasses\nGender\nMaritalStatus\n\n> textual\nJobTile, FunctionPositionName == Position\nDescription, Requirement == JobDescription, Strengthness\n'

In [306]:
dict_function = {
    'r&d': 'research development',
    'asst.': 'assistant',
    'hrd': 'human resources development',
    'spv.': 'supervisor',
    'and': '',
    '&': '',
    '-': '',
}

def function_replacement(dictionary, text):
    list_text = text.split(' ')

    for i in range(len(list_text)):
        if list_text[i] in dictionary:
            list_text[i] = dictionary[list_text[i]]
    
    return ' '.join(list_text)

In [307]:
applicant_id = 31790

df_job = pd.DataFrame(engine.execute(
    """
    SELECT Job.JobID, Job.UsiaMax, Job.SalaryMin, Job.SalaryMax, City.Name AS CityName, Province.Name AS ProvinceName, EducationLevel.EducationLevelName, Major.MajorName, Job.DriverLicenseType, Job.UsingGlasses, Job.Gender, Job.MaritalStatus, Job.JobTitle, FunctionPosition.FunctionPositionName, Job.Description, Job.Requirement
    FROM (((((Job
    RIGHT JOIN FunctionPosition ON Job.FunctionPositionID = FunctionPosition.FunctionPositionID)
    RIGHT JOIN EducationLevel ON Job.EducationLevelID = EducationLevel.EducationLevelID)
    RIGHT JOIN City ON Job.CityID = City.CityID)
    RIGHT JOIN Province ON Job.ProvinceID = Province.ProvinceID)
    RIGHT JOIN Major ON Job.MajorID = Major.MajorID)
    WHERE JobStatus='Publish'
    """
))

df_function = pd.DataFrame(engine.execute(
    """
    SELECT FunctionPositionID, FunctionPositionName
    FROM FunctionPosition
    """
))

df_education = pd.DataFrame(engine.execute(
    """
    SELECT EducationLevelID, EducationLevelName
    FROM EducationLevel
    """
))

df_city = pd.DataFrame(engine.execute(
    """
    SELECT CityID, Name AS CityName
    FROM City
    """
))

df_province = pd.DataFrame(engine.execute(
    """
    SELECT ProvinceID, Name AS ProvinceName
    FROM Province
    """
))

df_major = pd.DataFrame(engine.execute(
    """
    SELECT MajorID, MajorName
    FROM Major
    """
))

df_applicant = pd.DataFrame(engine.execute(
    f"""
    SELECT Applicant.ApplicantID, Applicant.Dob, Applicant.ExpectedSalary, City.Name AS CityName, Province.Name AS ProvinceName, Applicant.DriverLicenseType, Applicant.IsUsingGlasses, Applicant.Gender, Applicant.MaritalStatus, Applicant.Strengthness
    FROM ((Applicant
    RIGHT JOIN City ON Applicant.CurrentAddressCityID = City.CityID)
    RIGHT JOIN Province ON Applicant.CurrentAddressProvinceID = Province.ProvinceID)
    WHERE ApplicantID={applicant_id}
    """
))

df_applicant_education = pd.DataFrame(engine.execute(
    f"""
    SELECT ApplicantEducation.ApplicantID, ApplicantEducation.DateStart, ApplicantEducation.DateEnd, EducationLevel.EducationLevelName, Major.MajorName
    FROM ((ApplicantEducation
    RIGHT JOIN EducationLevel ON ApplicantEducation.EducationLevelID = EducationLevel.EducationLevelID)
    RIGHT JOIN Major ON ApplicantEducation.MajorID = Major.MajorID)
    WHERE ApplicantID={applicant_id}
    """
))

df_applicant_experience = pd.DataFrame(engine.execute(
    f"""
    SELECT ApplicantExperience.ApplicantID, ApplicantExperience.DateFrom, ApplicantExperience.DateTo, ApplicantExperience.Position, ApplicantExperience.JobDescription
    FROM ApplicantExperience
    WHERE ApplicantID={applicant_id}
    """
))



"""PRE-PROCESSING APPLICANT"""
df_applicant = df_applicant.drop_duplicates()
df_applicant = df_applicant.fillna('')
df_applicant_education = df_applicant_education.fillna('')
df_applicant_experience = df_applicant_experience.fillna('')

'''applicant'''
df_applicant['Age'] = pd.to_datetime(
    df_applicant.Dob.map(pick_date).apply(lambda x: filter_date(x, 1958, 2006))
).map(get_age)

df_applicant.drop(columns=['Dob'], inplace=True)

df_applicant.Age = df_applicant.Age.fillna(0).astype(int)

'''education'''
df_applicant_education.DateStart = pd.to_datetime(
    df_applicant_education.DateStart.map(pick_date).apply(lambda x: filter_date(x, 1980, 2023))
)

df_applicant_education.DateEnd = pd.to_datetime(
    df_applicant_education.DateEnd.map(pick_date).apply(lambda x: filter_date(x, 1980, 2023))
)

df_applicant_education = df_applicant_education[~(df_applicant_education.DateStart.isna()) & ~(df_applicant_education.DateEnd.isna())]
df_applicant_education = df_applicant_education.sort_values('DateStart').groupby(['ApplicantID']).agg('last')

df_applicant_education.drop(columns=['DateStart', 'DateEnd'], inplace=True)

'''experience'''
df_applicant_experience.DateFrom = pd.to_datetime(
    df_applicant_experience.DateFrom.map(pick_date).apply(lambda x: filter_date(x, 1980, 2023))
)

df_applicant_experience.DateTo = pd.to_datetime(
    df_applicant_experience.DateTo.map(pick_date).apply(lambda x: filter_date(x, 1980, 2023))
)

df_applicant_experience = df_applicant_experience[~(df_applicant_experience.DateFrom.isna()) & ~(df_applicant_experience.DateTo.isna())]

# add YearsOfExperience column
df_applicant_experience['YearsOfExperience'] = substract_months(
    df_applicant_experience.DateFrom, df_applicant_experience.DateTo
)

df_applicant_experience = df_applicant_experience.sort_values('DateFrom').groupby(['ApplicantID']).agg({
    'DateFrom': 'last',
    'DateTo': 'last',
    'JobDescription': ' '.join,
    'Position': ' '.join,
    'YearsOfExperience': 'sum',
})

df_applicant_experience.drop(columns=['DateFrom', 'DateTo'], inplace=True)

'''merge'''
df_applicant = pd.merge(df_applicant, df_applicant_experience, on=['ApplicantID'])
df_applicant = pd.merge(df_applicant, df_applicant_education, on=['ApplicantID'])

'''preprocessing'''
df_applicant.set_index(['ApplicantID'], inplace=True)

df_applicant[df_applicant.select_dtypes(object).columns] = df_applicant[df_applicant.select_dtypes(object).columns].applymap(str.lower)

for col in ['Strengthness', 'JobDescription', 'Position']:
    df_applicant[col] = df_applicant[col].map(clean_text)

'''translate'''
translator = Translator(service_urls=['translate.googleapis.com'])
for col in ['Strengthness', 'JobDescription', 'Position']:
    df_applicant[col] = df_applicant[col].apply(lambda x: translator.translate(x, dest='id').text)

df_applicant = df_applicant[[
    'Age', 'ExpectedSalary', 'CityName', 'ProvinceName', 'EducationLevelName', 'MajorName', 'DriverLicenseType', 'IsUsingGlasses', 'Gender', 'MaritalStatus', 'Position', 'JobDescription', 'Strengthness'
]]

'''casting'''
df_applicant.IsUsingGlasses = df_applicant.IsUsingGlasses.astype(str).map(str.lower)

'''concat, rename, and drop columns to be features together'''
df_applicant.JobDescription = df_applicant.JobDescription.str.cat(
    df_applicant.Strengthness, sep=' '
)
df_applicant.rename(columns={'JobDescription': 'DescriptionStrengthness'}, inplace=True)
df_applicant.drop(columns=['Strengthness'], inplace=True)




"""PRE-PROCESSING JOB"""
str_cols = ['CityName', 'ProvinceName', 'EducationLevelName', 'MajorName', 'DriverLicenseType', 'Gender', 'MaritalStatus', 'JobTitle', 'FunctionPositionName', 'Description', 'Requirement']
num_cols = ['UsiaMax', 'SalaryMin', 'SalaryMax']
bol_cols = ['UsingGlasses']

'''general'''
df_job.set_index(['JobID'], inplace=True)
df_job.fillna('', inplace=True)

df_job[str_cols] = df_job[str_cols].applymap(str.lower)
df_job.replace('none', '', inplace=True)

df_job[num_cols] = df_job[num_cols].replace('', 0)
df_job[num_cols] = df_job[num_cols].astype(int)

'''translate'''
translator = Translator(service_urls=['translate.googleapis.com'])

df_job.JobTitle = df_job.JobTitle.apply(lambda x: translator.translate(x, dest='id').text.lower())
df_job.FunctionPositionName = df_job.FunctionPositionName.apply(lambda x: translator.translate(x, dest='id').text.lower())
df_job.MajorName = df_job.MajorName.apply(lambda x: translator.translate(x, dest='id').text.lower())

df_job.Description = df_job.Description.map(clean_text).apply(lambda x: translator.translate(x, dest='id').text.lower())
df_job.Requirement = df_job.Requirement.map(clean_text).apply(lambda x: translator.translate(x, dest='id').text.lower())

'''casting'''
df_job.UsingGlasses = df_job.UsingGlasses.astype(str).map(str.lower)

'''concat, rename, and drop columns to be features together'''
# get mean from SalaryMin and SalaryMax
df_job.SalaryMin = (df_job.SalaryMax + df_job.SalaryMin) // 2
df_job.rename(columns={'SalaryMin': 'SalaryMean'}, inplace=True)
df_job.SalaryMean = df_job.SalaryMean.apply(lambda x: 0 if x < 1_000_000 else x)
df_job.drop(columns=['SalaryMax'], inplace=True)
# concatenate JobTitle and FunctionPositionName to be textual feature together
df_job.JobTitle = df_job.JobTitle.str.cat(
    df_job.FunctionPositionName, sep=' '
)
df_job.rename(columns={'JobTitle': 'JobTitlePosition'}, inplace=True)
df_job.drop(columns=['FunctionPositionName'], inplace=True)
# concatenate Description and Requirement to be textual feature together
df_job.Description = df_job.Description.str.cat(
    df_job.Requirement, sep=' '
)
df_job.rename(columns={'Description': 'DescriptionRequirement'}, inplace=True)
df_job.drop(columns=['Requirement'], inplace=True)

'''additional tables'''
# load table for vocabulary
df_function.FunctionPositionName = df_function.FunctionPositionName.map(str.lower).apply(lambda x: function_replacement(dict_function, x)).map(remove_insideparentheses).map(remove_morespace).map(str.strip)
df_education.EducationLevelName = df_education.EducationLevelName.map(str.lower)
df_city.CityName = df_city.CityName.map(str.lower)
df_province.ProvinceName = df_province.ProvinceName.map(str.lower)
df_major.MajorName = df_major.MajorName.map(str.lower)

In [511]:
stopwords = [str(i[0]) for i in pd.read_csv('FlaskApp/data/stopwords.csv').values] + ['maksimal', 'minimal', 'usia', 'memiliki', 'pengalaman']

In [512]:
len(stopwords)

789

In [513]:
def clean_stopwords(stopwords, text: str):
    for i in stopwords:
        text = text.replace(i, '')
    return text    

In [515]:
df_job.DescriptionRequirement.map(clean_text).apply(lambda x: clean_stopwords(stopwords, x)).values

array([' aktivitas kretartan aan mengup lann pel agenda gtan reksi  masu  reksi  aspek hukum  berkaitan  oasionalisasi  pengembangan a aan  data ba  menyimpan dumen asli aan mengkomunikasi bij aan   internal aan mengkomunikasi bij aan   eksternal aan mengkoornasi bahan bahan laporan  rt komisaris  rt  pemegang saham menyp laporan  luruan  gtan krestaris aan      us  35 n pen mmal s1 jurusan ntansi komunikasi hukum pengan mmal 2 n  krestaris reksi splin tanggung  jujur stif komunikatif  memi prentation skill   detail orientied  penampilan  menarik  proposional  berbahasa mann men nilai  penan  suraba empuan  meni',
       ' desain  menarik  butuhan aan  perjaan   dtah  an us max 30 n pen mmal s1 desain komunikasi visual menguasai aplikasi dasar desain corel photshop pengan min 1 n terbsa meng komputer  internet pengan big    sukai terbsa berja ba teka  berja  target bertanggung  jujur kreatif',
       ' greeting  customer  menerima orderan cust customer memsp orderan  customer menerima 

In [481]:
tesval = df_job.DescriptionRequirement.map(clean_text).apply(lambda x: clean_stopwords(stopwords, x)).values

In [482]:
tesval = ' '.join(tesval)

In [483]:
tesval = tesval.split(' ')

In [484]:
for i in pd.Series(tesval).value_counts().index:
    print(i)

memiliki
pengalaman
keuangan
laporan
menguasai
pendidikan
yang
pekerjaan
maksimal
dengan
kerja
s1
legal
3
perusahaan
administrasi
hukum
komunikasi
dan
disiplin
mengawasi
1
akuntansi
pembayaran
35
perjanjian
terkait
kantor
laki
2
bekerja
dokumen
jujur
mengelola
baik
surat
kebutuhan
klien
bidang
mengatur
bank
membuat
proses
khusus
manajemen
melakukan
pajak
kemampuan
hubungan
komunikatif
jurusan
media
untuk
30
tekanan
manager
audit
bertanggung
operasional
oriented
pelaksanaan
skill
tugas
inisiatif
data
accounting
di
bersedia
sistem
kegiatan
transaksi
maintenance
prosedur
disukai
sama
office
anak
departemen
memonitor
inggris
detail
aset
pph
gedung
min
customer
dibawah
microsoft
diutamakan
jadwal
pengetahuan
hal
service
ditugaskan
memastikan
pemeliharaan
kerjasama
bahasa
komputer
bulanan
tanggung
sebagai
kuat
ga
mengevaluasi
mampu
bahan
acara
perbaikan
software
memahami
gelar
dalam
menyusun
rencana
lift
membantu
supervisor
vendor
kandidat
teknik
building
akan
sewa
analisa
menarik
desain
bis

In [447]:
df_job.DescriptionRequirement.apply(lambda x: clean_stopwords(x)).values

array(['melakukan aktivitas kesekretariatan perusahaan mengupayakan kelancaran pelaksanaan agenda kegiatan direksi memberikan masukan kepada direksi dari aspek hukum yang berkaitan dengan operasionalisasi dan pengembangan usaha perusahaan membuat data base dan menyimpan dokumen asli perusahaan mengkomunikasikan kebijakan perusahaan kepada pihak internal perusahaan mengkomunikasikan kebijakan perusahaan kepada pihak eksternal perusahaan mengkoordinasikan bahan bahan laporan untuk rapat komisaris dan rapat umum pemegang saham menyiapkan laporan secara keseluruan mengenai kegiatan sekrestaris perusahaan secara benar dan tepat waktu usia maksimal 35 tahun pendidikan minimal s1 jurusan akuntansi komunikasi hukum pengalaman minimal 2 tahun sebagai sekrestaris direksi displin tanggung jawab jujur inisiatif komunikatif dan mempunyai presentation skill tegas dan detail orientied memiliki penampilan yang menarik dan proposional bisa berbahasa mandarin menjadi nilai lebih penempatan di surabaya p

In [308]:
df_applicant.head(1)

Unnamed: 0_level_0,Age,ExpectedSalary,CityName,ProvinceName,EducationLevelName,MajorName,DriverLicenseType,IsUsingGlasses,Gender,MaritalStatus,Position,DescriptionStrengthness
ApplicantID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
31790,25,4500000.0,surabaya,jawa timur,s1,akuntansi,c,False,female,single,guru les privat mahasiswa magang,les privat siswa sd smp bagian jasa pelayanan ...


In [309]:
df_job.head(1)

Unnamed: 0_level_0,UsiaMax,SalaryMean,CityName,ProvinceName,EducationLevelName,MajorName,DriverLicenseType,UsingGlasses,Gender,MaritalStatus,JobTitlePosition,DescriptionRequirement
JobID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2775,0,0,surabaya,jawa timur,,sekretaris,,False,,,sekretaris direksi sekretaris,melakukan aktivitas kesekretariatan perusahaan...


In [372]:
province = ["Aceh", "Sumatera Utara", "Sumatera Barat", "Riau", "Kepulauan Riau", "Jambi", "Bengkulu", "Sumatera Selatan", "Bangka Belitung", "Lampung", "DKI Jakarta", "Banten", "Jawa Barat", "Jawa Tengah", "Yogyakarta", "Jawa Timur", "Bali", "Nusa Tenggara Barat", "Nusa Tenggara Timur", "Kalimantan Barat", "Kalimantan Tengah", "Kalimantan Selatan", "Kalimantan Timur", "Kalimantan Utara", "Sulawesi Utara", "Gorontalo", "Sulawesi Tengah", "Sulawesi Barat", "Sulawesi Selatan", "Sulawesi Tenggara", "Maluku", "Maluku Utara", "Papua Barat", "Papua"]

In [437]:
class JobModel:
    def __init__(self, df_job, df_function, df_education, df_city, df_province, df_major):
        self.max_tokens = 10_000

        '''tables'''
        # load main table
        self.df_job = df_job
        # load reference table
        self.df_function = df_function
        self.df_education = df_education
        self.df_city = df_city
        self.df_province = df_province
        self.df_major = df_major

        '''numerical'''
        # adapt from Age in Job Table
        self.age_normalizer = tf.keras.layers.Normalization(axis=None)
        self.age_normalizer.adapt(self.df_job.UsiaMax)
        # adapt from SalaryMean in Job Table
        self.salary_normalizer = tf.keras.layers.Normalization(axis=None)
        self.salary_normalizer.adapt(self.df_job.SalaryMean)

        '''categorical'''
        # adapt from Job Table
        self.city_encoder = tf.keras.layers.StringLookup(output_mode='multi_hot', vocabulary=self.df_city.CityName.values)
        self.province_encoder = tf.keras.layers.StringLookup(output_mode='multi_hot', vocabulary=self.df_province.ProvinceName.values)
        self.education_encoder = tf.keras.layers.StringLookup(output_mode='multi_hot', vocabulary=self.df_education.EducationLevelName.values)
        self.major_encoder = tf.keras.layers.StringLookup(output_mode='multi_hot', vocabulary=self.df_major.MajorName.values)
        self.license_encoder = tf.keras.layers.StringLookup(output_mode='multi_hot', vocabulary=['a', 'c', 'd', 'b1', 'b2'])
        self.glasses_encoder = tf.keras.layers.StringLookup(output_mode='multi_hot', vocabulary=['true', 'false'])
        self.gender_encoder = tf.keras.layers.StringLookup(output_mode='multi_hot', vocabulary=['male', 'female'])
        self.status_encoder = tf.keras.layers.StringLookup(output_mode='multi_hot', vocabulary=['single', 'married', 'divorced', 'widowed'])

        '''textual'''
        # adapt from JobTitle in Job Table
        self.title_vectorizer = tf.keras.layers.TextVectorization(max_tokens=self.max_tokens, output_mode='multi_hot')
        self.title_vectorizer.adapt(self.df_job.JobTitlePosition)
        # adapt from Description in Job Table
        self.description_vectorizer = tf.keras.layers.TextVectorization(max_tokens=self.max_tokens, output_mode='tf_idf')
        self.description_vectorizer.adapt(self.df_job.DescriptionRequirement)

        self.job_vector = self.get_vector([
            # self.get_age(self.df_job.UsiaMax.values),
            # self.get_salary(self.df_job.SalaryMean.values),
            # self.get_city(self.df_job.CityName.values),
            # self.get_province(self.df_job.ProvinceName.values),
            # self.get_education(self.df_job.EducationLevelName.values),
            # self.get_major(self.df_job.MajorName.values),
            # self.get_license(self.df_job.DriverLicenseType.values),
            # self.get_glasses(self.df_job.UsingGlasses.values),
            # self.get_gender(self.df_job.Gender.values),
            # self.get_status(self.df_job.MaritalStatus.values),
            # self.get_title(self.df_job.JobTitlePosition.values),
            self.get_description(self.df_job.DescriptionRequirement.values),
        ])

    '''helper function'''
    def get_vector(self, features_vector: list):
        return np.concatenate(features_vector, axis=1)
    
    def areshape(self, vector):
        return np.array(vector).reshape(-1, 1)
    
    def aloop(self, function, values):
        return np.array([function(i) for i in values])

    '''encoder functions'''
    def get_age(self, values):
        return self.areshape(self.age_normalizer(values))

    def get_salary(self, values):
        return self.areshape(self.salary_normalizer(values))

    def get_city(self, values):
        return self.aloop(self.city_encoder, values)

    def get_province(self, values):
        return self.aloop(self.province_encoder, values)
    
    def get_education(self, values):
        return self.aloop(self.education_encoder, values)

    def get_major(self, values):
        return self.aloop(self.major_encoder, values)
    
    def get_license(self, values):
        return self.aloop(self.license_encoder, values)
    
    def get_glasses(self, values):
        return self.aloop(self.glasses_encoder, values)
    
    def get_gender(self, values):
        return self.aloop(self.gender_encoder, values)
    
    def get_status(self, values):
        return self.aloop(self.status_encoder, values)

    def get_title(self, values):
        return self.title_vectorizer(values)

    def get_description(self, values):
        return self.description_vectorizer(values)
    
    '''additional functions'''
    def get_vector_shape(self):
        return self.job_vector.shape
    


class AppModel:
    def __init__(self, job_model:JobModel, df_applicant):
        self.job = job_model
        self.df_app = df_applicant

        self.app_vector = np.concatenate([
            # self.job.get_age(self.df_app.Age.values),
            # self.job.get_salary(self.df_app.ExpectedSalary.values),
            # self.job.get_city(self.df_app.CityName.values),
            # self.job.get_province(self.df_app.ProvinceName.values),
            # self.job.get_education(self.df_app.EducationLevelName.values),
            # self.job.get_major(self.df_app.MajorName.values),
            # self.job.get_license(self.df_app.DriverLicenseType.values),
            # self.job.get_glasses(self.df_app.IsUsingGlasses.values),
            # self.job.get_gender(self.df_app.Gender.values),
            # self.job.get_status(self.df_app.MaritalStatus.values),
            # self.job.get_title(self.df_app.Position.values),
            self.job.get_description(self.df_app.DescriptionStrengthness.values)
        ], axis=1)

In [438]:
job_model = JobModel(df_job, df_function, df_education, df_city, df_province, df_major)
app_model = AppModel(job_model, df_applicant)

In [439]:
app_model.app_vector.shape, job_model.job_vector.shape

((1, 1195), (42, 1195))

In [440]:
cosim = [cosine_similarity(app_model.app_vector, i.reshape(1, -1)) for i in job_model.job_vector]

In [441]:
cosine_similarity(app_model.app_vector[0].reshape(1, -1), job_model.job_vector[0].reshape(1, -1))

array([[0.01622981]], dtype=float32)

In [442]:
df_tes = df_job.copy()
df_tes['similarity'] = [i[0][0] for i in cosim]

In [443]:
df_applicant

Unnamed: 0_level_0,Age,ExpectedSalary,CityName,ProvinceName,EducationLevelName,MajorName,DriverLicenseType,IsUsingGlasses,Gender,MaritalStatus,Position,DescriptionStrengthness
ApplicantID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
31790,25,4500000.0,surabaya,jawa timur,s1,akuntansi,c,False,female,single,guru les privat mahasiswa magang,les privat siswa sd smp bagian jasa pelayanan ...


In [444]:
df_tes[df_tes.similarity != 0].sort_values('similarity', ascending=False)

Unnamed: 0_level_0,UsiaMax,SalaryMean,CityName,ProvinceName,EducationLevelName,MajorName,DriverLicenseType,UsingGlasses,Gender,MaritalStatus,JobTitlePosition,DescriptionRequirement,similarity
JobID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2879,0,0,jakarta,dki jakarta,sma,sma segala jurusan,,False,,,outlet manajer awak kapal,mengelola dan memantau operasional pelayanan s...,0.036468
3035,0,0,jakarta,dki jakarta,s1,semua jurusan,,False,,,guru bahasa mandarin (laoshi) guru,mampu bekerja anak usia 2 6 tahun membuat lapo...,0.034946
2963,0,0,jakarta,dki jakarta,smk,teknik sipil,,False,,,qc lapangan operasi umum,melakukan qc terhadap hasil pekerjaan subkon m...,0.028075
3093,0,4650000,surabaya,jawa timur,,semua jurusan 2,,False,,,urusan umum (ga) urusan umum,mengerti dan memahami peraturan perundangan ya...,0.027746
3069,0,5250000,surabaya,jawa timur,s1,akuntansi,,False,,,kepala akuntansi kepala akuntansi,usia maksimal 38 tahun pendidikan minimal s1 a...,0.027153
3045,0,4750000,surabaya,jawa timur,,akuntansi,,False,,,pengawas keuangan & akuntansi akuntansi,membuat jalannya sistem dan prosedur akuntansi...,0.026929
2991,0,0,jakarta,dki jakarta,s1,akuntansi,,False,,,staf keuangan keuangan & akuntansi,membuat laporan keuangan bertanggung jawab ata...,0.026174
3083,0,0,surabaya,jawa timur,,teknik informatika,,,,,itu mendukung dia,bertanggungjawab pada kesiapan dan ketersediaa...,0.024868
3034,0,0,jakarta,dki jakarta,s1,semua jurusan,,False,,,marketing communication (marcomm) komunikasi p...,menerapkan rencana promosi mengatur acara prom...,0.02244
2984,0,0,jakarta,dki jakarta,d3,semua jurusan,,False,,,urusan umum staf urusan umum,melakukan administrasi berkas berkas tidak mel...,0.022159


In [180]:
city = tf.keras.layers.StringLookup(output_mode='multi_hot', vocabulary=df_city.CityName.values)
province = tf.keras.layers.StringLookup(output_mode='multi_hot', vocabulary=df_province.ProvinceName.values)

job_city = np.array([city(i) for i in df_job.CityName.values])
app_city = np.array([city(df_applicant.CityName.values)])

job_province = np.array([province(i) for i in df_job.ProvinceName.values])
app_province = np.array([province(df_applicant.ProvinceName.values)])

In [182]:
job_city.shape, app_city.shape, job_province.shape, app_province.shape

((42, 492), (1, 492), (42, 35), (1, 35))

In [190]:
job_vector = np.concatenate([job_city, job_province], axis=1)
app_vector = np.concatenate([app_city, app_province], axis=1)

In [191]:
cosine_similarity(job_vector[0].reshape(1, -1), app_vector)

array([[0.99999994]], dtype=float32)