# **Importing Library**

In [51]:
from google.colab import drive
import pandas as pd
import numpy as np

# **Importing Dataset**

In [52]:
!git clone https://github.com/anglingkusuma/JobRecommender.git
%cd JobRecommender

dataset = pd.read_excel('Job_Description_and_Salary_in_Indonesia.xlsx')
dataset.head(5)

Cloning into 'JobRecommender'...
remote: Enumerating objects: 7, done.[K
remote: Counting objects:  14% (1/7)[Kremote: Counting objects:  28% (2/7)[Kremote: Counting objects:  42% (3/7)[Kremote: Counting objects:  57% (4/7)[Kremote: Counting objects:  71% (5/7)[Kremote: Counting objects:  85% (6/7)[Kremote: Counting objects: 100% (7/7)[Kremote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 7 (delta 0), reused 7 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (7/7), 14.37 MiB | 34.63 MiB/s, done.
/content/JobRecommender/JobRecommender/JobRecommender/JobRecommender/JobRecommender


Unnamed: 0,id,job_title,location,salary_currency,career_level,experience_level,education_level,employment_type,job_function,job_benefits,company_process_time,company_size,company_industry,job_description,salary
0,1,Facility Maintenance & Smart Warehouse Manager,Bandung,IDR,Manajer/Asisten Manajer,5 tahun,"Sertifikat Professional, D3 (Diploma), D4 (Dip...",Penuh Waktu,"Manufaktur,Pemeliharaan",,,,,Deskripsi PekerjaanRequirements :D3/SI from re...,
1,2,Procurement Department Head,Jakarta Raya,IDR,Manajer/Asisten Manajer,5 tahun,"Sarjana (S1), Diploma Pascasarjana, Gelar Prof...",Penuh Waktu,"Manufaktur,Pembelian/Manajemen Material",,25 days,51 - 200 pekerja,Manajemen/Konsulting HR,Job Role: 1. Responsible for material availabi...,
2,3,SALES ADMIN,Jakarta Barat,IDR,Supervisor/Koordinator,4 tahun,Sarjana (S1),Penuh Waktu,"Penjualan / Pemasaran,Penjualan Ritel","Waktu regular, Senin - Jumat;Bisnis (contoh: K...",30 days,51 - 200 pekerja,Umum & Grosir,Internal Sales & AdminJob Description :We are ...,
3,4,City Operation Lead Shopee Express (Cirebon),Cirebon,IDR,Supervisor/Koordinator,5 tahun,"Sarjana (S1), Diploma Pascasarjana, Gelar Prof...",Penuh Waktu,"Pelayanan,Logistik/Rantai Pasokan","Tip;Waktu regular, Senin - Jumat;Kasual (conto...",21 days,2001 - 5000 pekerja,Retail/Merchandise,Job Description:Responsible for HSE implementa...,
4,5,Japanese Interpreter,Bekasi,IDR,Pegawai (non-manajemen & non-supervisor),2 tahun,"Sertifikat Professional, D3 (Diploma), D4 (Dip...",Penuh Waktu,"Lainnya,Jurnalis/Editor",,23 days,201 - 500 pekerja,Manajemen/Konsulting HR,Overview: Our clients is manufacture for autom...,


# **Data Preparation**

Removing unused column

In [53]:
chosen_columns = ['id','job_title','location','career_level','experience_level','education_level','employment_type','job_function','company_size']
df = dataset[chosen_columns]
df.head(5)
print('Number of rows in the dataset:', df.shape[0])

Number of rows in the dataset: 34746


Standardize text data

In [54]:
df = df.map(lambda x: x.lower() if isinstance(x, str) else x)
df.head(5)
print('Number of rows in the dataset:', df.shape[0])

Number of rows in the dataset: 34746


Identify missing value

In [55]:
df.isnull().sum()

Unnamed: 0,0
id,0
job_title,0
location,0
career_level,0
experience_level,4541
education_level,0
employment_type,1344
job_function,0
company_size,5643


Drop rows with missing value

In [56]:
df.dropna(inplace=True)
df.isnull().sum()
print('Number of rows in the dataset:', df.shape[0])

Number of rows in the dataset: 25936


Check and remove duplicate row

In [57]:
df.duplicated().sum()
df.drop_duplicates(inplace=True)
print('Number of rows in the dataset:', df.shape[0])

0

Check unique value

In [59]:
df.nunique()

Unnamed: 0,0
id,25936
job_title,15324
location,193
career_level,5
experience_level,18
education_level,19
employment_type,7
job_function,68
company_size,7


In [60]:
df['experience_level'] = df['experience_level'].str.replace('lebih dari ', '')
df['experience_level'] = df['experience_level'].str.replace(' tahun', '')
df['experience_level'] = df['experience_level'].replace(np.nan, 0)
df['experience_level'] = df['experience_level'].astype(int)

In [61]:
df['education_level'] = df['education_level'].astype('category')
df['location'] = df['location'].astype('category')
df['employment_type'] = df['employment_type'].astype('category')

print(df.dtypes)

id                     int64
job_title             object
location            category
career_level          object
experience_level       int64
education_level     category
employment_type     category
job_function          object
company_size          object
dtype: object


In [62]:
def recommend_jobs(experience, education, location=None, employment_type=None):
    filtered_df = df[
        (df['experience_level'] <= experience) &
        (df['education_level'].str.contains(education, case=False))
    ]

    if location:
        filtered_df = filtered_df[filtered_df['location'].str.contains(location, case=False)]

    if employment_type:
        filtered_df = filtered_df[filtered_df['employment_type'].str.contains(employment_type, case=False)]

    recommended_jobs = filtered_df.sort_values(by='experience_level', ascending=True)

    return recommended_jobs[['id', 'location', 'experience_level', 'education_level', 'employment_type']]

recommended_jobs = recommend_jobs(5, 's3', location='bandung', employment_type='penuh waktu')
print(recommended_jobs)

          id location  experience_level  \
20037  20038  bandung                 1   
5940    5941  bandung                 2   
13758  13759  bandung                 2   
29381  29382  bandung                 2   
8380    8381  bandung                 5   

                                         education_level employment_type  
20037                                        doktor (s3)     penuh waktu  
5940   sarjana (s1), diploma pascasarjana, gelar prof...     penuh waktu  
13758  sarjana (s1), diploma pascasarjana, gelar prof...     penuh waktu  
29381  diploma pascasarjana, gelar professional, magi...     penuh waktu  
8380   sarjana (s1), diploma pascasarjana, gelar prof...     penuh waktu  
