In [1]:
import pyodbc
import re

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_recommenders as tfrs

from sqlalchemy import create_engine
from datetime import datetime, timedelta
from sklearn.metrics.pairwise import cosine_similarity

from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory

import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

from googletrans import Translator

from transform_copy import *

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\eats\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 50)

In [3]:
user = 'huda'
password = 'Vancha12'
host = '127.0.0.1'
port = 1433
database = 'HRSystemDB'


def get_connection():         
    return create_engine(
        url=f"mssql+pyodbc://{user}:{password}@{host}:{port}/{database}?driver=SQL Server",
    )

engine = get_connection()
conn = engine.connect()

In [4]:
'''
JOB>>>MATCH<<<APPLICANT

> numerical
UsiaMax == Dob
SalaryMean == ExpectedSalary

> categorical
CityName == CurrentCityName
ProvinceName == CurrentProvinceName
EducationLevelName
MajorName
DriverLicenseType
IsUsingGlasses
Gender
MaritalStatus

> textual
JobTile, FunctionPositionName == Position
Description, Requirement == JobDescription, Strengthness
'''

'\nJOB>>>MATCH<<<APPLICANT\n\n> numerical\nDob == UsiaMax\nSalaryMean == ExpectedSalary\n\n> categorical\nCityName == CurrentCityName\nProvinceName == CurrentProvinceName\nDriverLicenseType\nEducationLevelID\nGender\nMaritalStatus\nMajorName\nIsUsingGlasses\n\n> textual\nJobTile, FunctionPositionName == Position\nDescription, Requirement == JobDescription, Strengthness\n'

In [9]:
applicant_id = 31790

df_job = pd.DataFrame(engine.execute(
    """
    SELECT Job.JobID, Job.UsiaMax, Job.SalaryMin, Job.SalaryMax, City.Name AS CityName, Province.Name AS ProvinceName, EducationLevel.EducationLevelName, Major.MajorName, Job.DriverLicenseType, Job.UsingGlasses, Job.Gender, Job.MaritalStatus, Job.JobTitle, FunctionPosition.FunctionPositionName, Job.Description, Job.Requirement
    FROM (((((Job
    RIGHT JOIN FunctionPosition ON Job.FunctionPositionID = FunctionPosition.FunctionPositionID)
    RIGHT JOIN EducationLevel ON Job.EducationLevelID = EducationLevel.EducationLevelID)
    RIGHT JOIN City ON Job.CityID = City.CityID)
    RIGHT JOIN Province ON Job.ProvinceID = Province.ProvinceID)
    RIGHT JOIN Major ON Job.MajorID = Major.MajorID)
    WHERE JobStatus='Publish'
    """
))

df_applicant = pd.DataFrame(engine.execute(
    f"""
    SELECT Applicant.ApplicantID, Applicant.Dob, Applicant.Strengthness, Applicant.Weaknesses, City.Name AS CityName, Province.Name AS ProvinceName
    FROM ((Applicant
    RIGHT JOIN City ON Applicant.CurrentAddressCityID = City.CityID)
    RIGHT JOIN Province ON Applicant.CurrentAddressProvinceID = Province.ProvinceID)
    WHERE ApplicantID={applicant_id}
    """
))

df_applicant_education = pd.DataFrame(engine.execute(
    f"""
    SELECT ApplicantEducation.ApplicantID, ApplicantEducation.DateStart, ApplicantEducation.DateEnd, EducationLevel.EducationLevelName, Major.MajorName
    FROM ((ApplicantEducation
    RIGHT JOIN EducationLevel ON ApplicantEducation.EducationLevelID = EducationLevel.EducationLevelID)
    RIGHT JOIN Major ON ApplicantEducation.MajorID = Major.MajorID)
    WHERE ApplicantID={applicant_id}
    """
))

df_applicant_experience = pd.DataFrame(engine.execute(
    f"""
    SELECT ApplicantExperience.ApplicantID, ApplicantExperience.DateFrom, ApplicantExperience.DateTo, ApplicantExperience.Industry, ApplicantExperience.JobDescription, ApplicantExperience.Position
    FROM ApplicantExperience
    WHERE ApplicantID={applicant_id}
    """
))

In [6]:
df_applicant = df_applicant.drop_duplicates()
df_applicant = df_applicant.fillna('')
df_applicant_education = df_applicant_education.fillna('')
df_applicant_experience = df_applicant_experience.fillna('')

'''applicant'''
df_applicant['Age'] = pd.to_datetime(
    df_applicant.Dob.map(pick_date).apply(lambda x: filter_date(x, 1958, 2006))
).map(get_age)

df_applicant.drop(columns=['Dob'], inplace=True)

df_applicant.Age = df_applicant.Age.fillna(0).astype(int)

'''education'''
df_applicant_education.DateStart = pd.to_datetime(
    df_applicant_education.DateStart.map(pick_date).apply(lambda x: filter_date(x, 1980, 2023))
)

df_applicant_education.DateEnd = pd.to_datetime(
    df_applicant_education.DateEnd.map(pick_date).apply(lambda x: filter_date(x, 1980, 2023))
)

df_applicant_education = df_applicant_education[~(df_applicant_education.DateStart.isna()) & ~(df_applicant_education.DateEnd.isna())]
df_applicant_education = df_applicant_education.sort_values('DateStart').groupby(['ApplicantID']).agg('last')

df_applicant_education.drop(columns=['DateStart', 'DateEnd'], inplace=True)

'''experience'''
df_applicant_experience.DateFrom = pd.to_datetime(
    df_applicant_experience.DateFrom.map(pick_date).apply(lambda x: filter_date(x, 1980, 2023))
)

df_applicant_experience.DateTo = pd.to_datetime(
    df_applicant_experience.DateTo.map(pick_date).apply(lambda x: filter_date(x, 1980, 2023))
)

df_applicant_experience = df_applicant_experience[~(df_applicant_experience.DateFrom.isna()) & ~(df_applicant_experience.DateTo.isna())]

# add YearsOfExperience column
df_applicant_experience['YearsOfExperience'] = substract_months(
    df_applicant_experience.DateFrom, df_applicant_experience.DateTo
)

df_applicant_experience = df_applicant_experience.sort_values('DateFrom').groupby(['ApplicantID']).agg({
    'DateFrom': 'last',
    'DateTo': 'last',
    'Industry': ' '.join,
    'JobDescription': ' '.join,
    'Position': ' '.join,
    'YearsOfExperience': 'sum',
})

df_applicant_experience.drop(columns=['DateFrom', 'DateTo'], inplace=True)

'''merge'''
df_applicant = pd.merge(df_applicant, df_applicant_experience, on=['ApplicantID'])
df_applicant = pd.merge(df_applicant, df_applicant_education, on=['ApplicantID'])

'''remove weaknesses'''
df_applicant = df_applicant.drop(columns=['Weaknesses'])

'''preprocessing'''
df_applicant.set_index(['ApplicantID'], inplace=True)

df_applicant[df_applicant.select_dtypes(object).columns] = df_applicant[df_applicant.select_dtypes(object).columns].applymap(str.lower)

for col in ['Strengthness', 'JobDescription', 'Industry', 'Position']:
    df_applicant[col] = df_applicant[col].map(clean_text)

'''translate'''
translator = Translator(service_urls=['translate.googleapis.com'])
for col in ['Strengthness', 'Industry', 'JobDescription', 'Position']:
    df_applicant[col] = df_applicant[col].apply(lambda x: translator.translate(x, dest='id').text)


In [7]:
df_job.set_index(['JobID'], inplace=True)
df_job.fillna('', inplace=True)

df_job = df_job.applymap(str.lower)
df_job.EducationLevelName = df_job.EducationLevelName.replace('none', '')

translator = Translator(service_urls=['translate.googleapis.com'])

df_job.JobTitle = df_job.JobTitle.apply(lambda x: translator.translate(x, dest='id').text.lower())
df_job.FunctionPositionName = df_job.FunctionPositionName.apply(lambda x: translator.translate(x, dest='id').text.lower())
df_job.MajorName = df_job.MajorName.apply(lambda x: translator.translate(x, dest='id').text.lower())

df_job.Description = df_job.Description.map(clean_text).apply(lambda x: translator.translate(x, dest='id').text.lower())
df_job.Requirement = df_job.Requirement.map(clean_text).apply(lambda x: translator.translate(x, dest='id').text.lower())