# **Importing Library**

In [39]:
from google.colab import drive
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# **Importing Dataset**

In [40]:
!git clone https://github.com/anglingkusuma/JobRecommender.git
%cd JobRecommender

dataset = pd.read_excel('Job_Description_and_Salary_in_Indonesia.xlsx')
dataset.head(5)

Cloning into 'JobRecommender'...
remote: Enumerating objects: 13, done.[K
remote: Counting objects:   7% (1/13)[Kremote: Counting objects:  15% (2/13)[Kremote: Counting objects:  23% (3/13)[Kremote: Counting objects:  30% (4/13)[Kremote: Counting objects:  38% (5/13)[Kremote: Counting objects:  46% (6/13)[Kremote: Counting objects:  53% (7/13)[Kremote: Counting objects:  61% (8/13)[Kremote: Counting objects:  69% (9/13)[Kremote: Counting objects:  76% (10/13)[Kremote: Counting objects:  84% (11/13)[Kremote: Counting objects:  92% (12/13)[Kremote: Counting objects: 100% (13/13)[Kremote: Counting objects: 100% (13/13), done.[K
remote: Compressing objects: 100% (10/10), done.[K
remote: Total 13 (delta 4), reused 11 (delta 2), pack-reused 0 (from 0)[K
Receiving objects: 100% (13/13), 14.38 MiB | 37.27 MiB/s, done.
Resolving deltas: 100% (4/4), done.
/content/JobRecommender/JobRecommender/JobRecommender/JobRecommender/JobRecommender


Unnamed: 0,id,job_title,location,salary_currency,career_level,experience_level,education_level,employment_type,job_function,job_benefits,company_process_time,company_size,company_industry,job_description,salary
0,1,Facility Maintenance & Smart Warehouse Manager,Bandung,IDR,Manajer/Asisten Manajer,5 tahun,"Sertifikat Professional, D3 (Diploma), D4 (Dip...",Penuh Waktu,"Manufaktur,Pemeliharaan",,,,,Deskripsi PekerjaanRequirements :D3/SI from re...,
1,2,Procurement Department Head,Jakarta Raya,IDR,Manajer/Asisten Manajer,5 tahun,"Sarjana (S1), Diploma Pascasarjana, Gelar Prof...",Penuh Waktu,"Manufaktur,Pembelian/Manajemen Material",,25 days,51 - 200 pekerja,Manajemen/Konsulting HR,Job Role: 1. Responsible for material availabi...,
2,3,SALES ADMIN,Jakarta Barat,IDR,Supervisor/Koordinator,4 tahun,Sarjana (S1),Penuh Waktu,"Penjualan / Pemasaran,Penjualan Ritel","Waktu regular, Senin - Jumat;Bisnis (contoh: K...",30 days,51 - 200 pekerja,Umum & Grosir,Internal Sales & AdminJob Description :We are ...,
3,4,City Operation Lead Shopee Express (Cirebon),Cirebon,IDR,Supervisor/Koordinator,5 tahun,"Sarjana (S1), Diploma Pascasarjana, Gelar Prof...",Penuh Waktu,"Pelayanan,Logistik/Rantai Pasokan","Tip;Waktu regular, Senin - Jumat;Kasual (conto...",21 days,2001 - 5000 pekerja,Retail/Merchandise,Job Description:Responsible for HSE implementa...,
4,5,Japanese Interpreter,Bekasi,IDR,Pegawai (non-manajemen & non-supervisor),2 tahun,"Sertifikat Professional, D3 (Diploma), D4 (Dip...",Penuh Waktu,"Lainnya,Jurnalis/Editor",,23 days,201 - 500 pekerja,Manajemen/Konsulting HR,Overview: Our clients is manufacture for autom...,


# **Data Preparation**

Removing unused column

In [41]:
chosen_columns = ['id','job_title','location','career_level','experience_level','education_level','employment_type','job_function','company_size']
df = dataset[chosen_columns]
df.head(5)
print('Number of rows in the dataset:', df.shape[0])

Number of rows in the dataset: 34746


Standardize text data

In [42]:
df = df.map(lambda x: x.lower() if isinstance(x, str) else x)
df.head(5)
print('Number of rows in the dataset:', df.shape[0])

Number of rows in the dataset: 34746


Identify missing value

In [43]:
df.isnull().sum()

Unnamed: 0,0
id,0
job_title,0
location,0
career_level,0
experience_level,4541
education_level,0
employment_type,1344
job_function,0
company_size,5643


Drop rows with missing value

In [44]:
df.dropna(inplace=True)
df.isnull().sum()
print('Number of rows in the dataset:', df.shape[0])

Number of rows in the dataset: 25936


Check and remove duplicate row

In [45]:
df.duplicated().sum()
df.drop_duplicates(inplace=True)
print('Number of rows in the dataset:', df.shape[0])

Number of rows in the dataset: 25936


Check unique value

Feature Engineering

In [46]:
#Convert experience_level data type to int
df['experience_level'] = df['experience_level'].str.replace('lebih dari ', '')
df['experience_level'] = df['experience_level'].str.replace(' tahun', '')
df['experience_level'] = df['experience_level'].replace(np.nan, 0)
df['experience_level'] = df['experience_level'].astype(int)

df['education_level'] = df['education_level'].astype('category')
df['location'] = df['location'].astype('category')
df['employment_type'] = df['employment_type'].astype('category')

print(df.dtypes)

id                     int64
job_title             object
location            category
career_level          object
experience_level       int64
education_level     category
employment_type     category
job_function          object
company_size          object
dtype: object


Job Recommendation

In [47]:
def recommend_jobs(experience, education, location=None, employment_type=None):
    filtered_df = df[
        (df['experience_level'] <= experience) &
        (df['education_level'].str.contains(education, case=False))
    ]

    if location:
        filtered_df = filtered_df[filtered_df['location'].str.contains(location, case=False)]

    if employment_type:
        filtered_df = filtered_df[filtered_df['employment_type'].str.contains(employment_type, case=False)]

    recommended_jobs = filtered_df.sort_values(by='experience_level', ascending=True)

    return recommended_jobs[['job_title', 'location', 'experience_level', 'education_level', 'employment_type']]

recommended_jobs = recommend_jobs(1, #experience_level
                                  's1', #education_level
                                  location='jawa tengah',
                                  employment_type='penuh waktu')

output_df = pd.DataFrame({
    'Job Title': recommended_jobs['job_title'],
    'Location': recommended_jobs['location'],
    'Experience Level': recommended_jobs['experience_level'],
    'Education Level': recommended_jobs['education_level'],
    'Employment Type': recommended_jobs['employment_type']
})

output_df

Unnamed: 0,Job Title,Location,Experience Level,Education Level,Employment Type
745,sales taking order - solo,jawa tengah,1,"sma, smu/smk/stm, sertifikat professional, d3 ...",penuh waktu
22654,teller cipta jawa tengah dan diy,jawa tengah,1,"sertifikat professional, d3 (diploma), d4 (dip...",penuh waktu
23353,salesman,jawa tengah,1,"sertifikat professional, d3 (diploma), d4 (dip...",penuh waktu
23792,business development (central java & yogyakarta),jawa tengah,1,"sma, smu/smk/stm, sarjana (s1), diploma pascas...",penuh waktu
23933,bancassurance consultant (pekalongan),jawa tengah,1,"sertifikat professional, d3 (diploma), d4 (dip...",penuh waktu
...,...,...,...,...,...
16702,kabag marketing (sukoharjo),jawa tengah,1,"sertifikat professional, d3 (diploma), d4 (dip...",penuh waktu
14665,public relation,jawa tengah,1,sarjana (s1),penuh waktu
14202,"account officer (kebumen, gombong dan petanahan)",jawa tengah,1,"sma, smu/smk/stm, sertifikat professional, d3 ...",penuh waktu
17971,"account officer (bantul, gunung kidul kulon pr...",jawa tengah,1,"sma, smu/smk/stm, sertifikat professional, d3 ...",penuh waktu
