### 1. Load dataset LinkedIn

In [None]:
import pandas as pd
import json
import re
import folium
from folium.plugins import MarkerCluster

dataset = pd.read_csv("Data/LinkedIn_Job_Search.csv")
dataset = dataset.drop(columns=["Unnamed: 15", "Unnamed: 16"], errors='ignore')
dataset.head()

Unnamed: 0,Job Title,Company Name,Location,URL,Company Location,company size,Industry_stats,Job Qualifications,Job Skills,Job Skills.1,Job Skills.2,Profile,Skills,Type,Work Hours
0,Python Machine Learning Developer,Demandlane,13 applicants,https://www.linkedin.com/jobs/search/?currentJ...,Indonesia,11-50 employees,Legal Services 11-50 employees 25 on LinkedIn,"AWS Lambda, Amazon Redshift, Amazon Web Servic...",1 school alum works here,5+ years of work experience with Machine Learning,"Skills: Pandas (Software), TensorFlow, +8 more",Demandlane,5+ years of work experience with Machine Learning,Remote,Full-time
1,Python Machine Learning Developer,Demandlane,13 applicants,https://www.linkedin.com/jobs/search/?currentJ...,Indonesia,11-50 employees,Legal Services 11-50 employees 25 on LinkedIn,"AWS Lambda, Amazon Redshift, Amazon Web Servic...",1 school alum works here,5+ years of work experience with Machine Learning,"Skills: Pandas (Software), TensorFlow, +8 more",Demandlane,5+ years of work experience with Machine Learning,Remote,Full-time
2,Computer Aided Design Engineer (U.S.-Based Cli...,Eterna Indonesia,13 applicants,https://www.linkedin.com/jobs/search/?currentJ...,Indonesia,51-200 employees,Outsourcing and Offshoring Consulting 51-200 ...,"3D Modeling, AutoCAD, Computer-Aided Design (C...","Skills: SketchUp, AutoCAD, +6 more","3D Modeling, AutoCAD, Computer-Aided Design (C...",Try Premium for IDR0,Eterna Indonesia,"3D Modeling, AutoCAD, Computer-Aided Design (C...",Remote,Full-time
3,"Software Engineer Full-time (AI, Video and Aud...",Videfly,13 applicants,https://www.linkedin.com/jobs/search/?currentJ...,"Jakarta, Indonesia",2-10 employees,Software Development 2-10 employees 7 on Li...,"Amazon Web Services (AWS), Back-End Web Develo...","Skills: TypeScript, OpenGL, +8 more",1+ years of work experience with JavaScript,Try Premium for IDR0,Videfly,1+ years of work experience with JavaScript,Remote,Full-time
4,"Artificial Intelligence Engineer, 2 Hour Learn...",Crossover,13 applicants,https://www.linkedin.com/jobs/search/?currentJ...,"Surabaya, East Java, Indonesia","5,001-10,000 employees","IT Services and IT Consulting 5,001-10,000 em...","Angular, Artificial Intelligence (AI), Back-En...","Skills: Svelte, Flask, +8 more","Angular, Artificial Intelligence (AI), Back-En...",Try Premium for IDR0,Crossover,"Angular, Artificial Intelligence (AI), Back-En...",Remote,Full-time


In [35]:
# ========================
# 2. Simplifikasi Lokasi Perusahaan
# ========================
kota_list = ['Jakarta', 'Surabaya', 'Bandung', 'Cikarang', 'Denpasar', 'Batang', 'Yogyakarta', 'Malang',
             'Pontianak', 'Kendal', 'Muarajawa', 'Tangerang', 'Pekanbaru', 'Medan', 'Lampung', 'Kuta',
             'Ubud', 'Bali', 'Bekasi', "East Java"]

def ganti_kota(lokasi):
    if isinstance(lokasi, str):
        for kota in kota_list:
            if kota in lokasi:
                return kota
    return lokasi

df = dataset.copy()
df['Company Location'] = df['Company Location'].apply(ganti_kota)
df['Company Location'] = df["Company Location"].replace({
    "East Java": "Jawa Timur", 
    "Lampung": "Bandar Lampung"
})

In [36]:
# ========================
# 3. Load Data Koordinat Kota
# ========================
with open("Data/location.json", 'r') as f:
    location_data = json.load(f)
location_df = pd.DataFrame(location_data)

In [37]:
# ========================
# 4. Merge Dataframe dengan Koordinat Kota
# ========================
df_jobs = df.merge(location_df, left_on='Company Location', right_on='Kota', how='left')
df_jobs = df_jobs.dropna(subset=["Kota"])  # Buang baris yang tidak match

In [38]:
# ========================
# 5. Ekstrak Koordinat dari Georeferenced
# ========================
def extract_coords(geo_str):
    try:
        lng, lat = geo_str.replace("POINT (", "").replace(")", "").split()
        return float(lng), float(lat)
    except:
        return None, None

df_jobs[['Longitude', 'Latitude']] = df_jobs['Georeferenced'].apply(
    lambda x: pd.Series(extract_coords(x))
)
df_jobs = df_jobs.dropna(subset=["Longitude", "Latitude"])
df_jobs.head()

Unnamed: 0,Job Title,Company Name,Location,URL,Company Location,company size,Industry_stats,Job Qualifications,Job Skills,Job Skills.1,Job Skills.2,Profile,Skills,Type,Work Hours,Kota,Georeferenced,Longitude,Latitude
0,Python Machine Learning Developer,Demandlane,13 applicants,https://www.linkedin.com/jobs/search/?currentJ...,Indonesia,11-50 employees,Legal Services 11-50 employees 25 on LinkedIn,"AWS Lambda, Amazon Redshift, Amazon Web Servic...",1 school alum works here,5+ years of work experience with Machine Learning,"Skills: Pandas (Software), TensorFlow, +8 more",Demandlane,5+ years of work experience with Machine Learning,Remote,Full-time,Indonesia,POINT (117.343188 -0.878596),117.343188,-0.878596
1,Python Machine Learning Developer,Demandlane,13 applicants,https://www.linkedin.com/jobs/search/?currentJ...,Indonesia,11-50 employees,Legal Services 11-50 employees 25 on LinkedIn,"AWS Lambda, Amazon Redshift, Amazon Web Servic...",1 school alum works here,5+ years of work experience with Machine Learning,"Skills: Pandas (Software), TensorFlow, +8 more",Demandlane,5+ years of work experience with Machine Learning,Remote,Full-time,Indonesia,POINT (117.343188 -0.878596),117.343188,-0.878596
2,Computer Aided Design Engineer (U.S.-Based Cli...,Eterna Indonesia,13 applicants,https://www.linkedin.com/jobs/search/?currentJ...,Indonesia,51-200 employees,Outsourcing and Offshoring Consulting 51-200 ...,"3D Modeling, AutoCAD, Computer-Aided Design (C...","Skills: SketchUp, AutoCAD, +6 more","3D Modeling, AutoCAD, Computer-Aided Design (C...",Try Premium for IDR0,Eterna Indonesia,"3D Modeling, AutoCAD, Computer-Aided Design (C...",Remote,Full-time,Indonesia,POINT (117.343188 -0.878596),117.343188,-0.878596
3,"Software Engineer Full-time (AI, Video and Aud...",Videfly,13 applicants,https://www.linkedin.com/jobs/search/?currentJ...,Jakarta,2-10 employees,Software Development 2-10 employees 7 on Li...,"Amazon Web Services (AWS), Back-End Web Develo...","Skills: TypeScript, OpenGL, +8 more",1+ years of work experience with JavaScript,Try Premium for IDR0,Videfly,1+ years of work experience with JavaScript,Remote,Full-time,Jakarta,POINT (106.84634081531794 -6.196639809431919),106.846341,-6.19664
4,"Artificial Intelligence Engineer, 2 Hour Learn...",Crossover,13 applicants,https://www.linkedin.com/jobs/search/?currentJ...,Surabaya,"5,001-10,000 employees","IT Services and IT Consulting 5,001-10,000 em...","Angular, Artificial Intelligence (AI), Back-En...","Skills: Svelte, Flask, +8 more","Angular, Artificial Intelligence (AI), Back-En...",Try Premium for IDR0,Crossover,"Angular, Artificial Intelligence (AI), Back-En...",Remote,Full-time,Surabaya,POINT (112.740515 -7.244582),112.740515,-7.244582


In [39]:
# ========================
# 6. Bersihkan Job Title
# ========================
def extract_job_title(title):
    title = re.sub(r'\(.*?\)', '', str(title)).strip()
    title = title.split(',')[0]
    title = re.sub(
        r'\b(Senior|Junior|Lead|Head|Manager|Specialist|Full-time|Consultant|Developer|Instructor|Lecturer|GoTo Financial|Gojek|dyeing|in|MarTech|Indonesia|Intern|Experimentation)\b',
        '', title, flags=re.IGNORECASE
    ).strip()
    title = title.replace('-', '').strip()
    return title

df_jobs['Job Title'] = df_jobs['Job Title'].apply(extract_job_title)

In [40]:
# ========================
# 7. Filter Data-Related Jobs
# ========================
keywords = ['Data', 'Analyst', 'Analys', 'Analis', 'Analisa', 'Analisis', 
            'Analitik', 'Sains', 'Science', 'Scientist', 'Sciences', 
            'Engineer', 'Machine', 'Learning', 'Mesin', 'AI', 'Artificial', 'Intelligence']
pattern = '|'.join(keywords)

df_jobs = df_jobs[df_jobs['Job Title'].str.contains(pattern, flags=re.IGNORECASE, na=False)].reset_index(drop=True)

In [41]:
# ========================
# 8. Pilih Kolom yang Ingin Ditampilkan
# ========================
df_jobs = df_jobs[['Job Title', 'Company Name', 'Job Qualifications', 'URL', 'Type', 'Work Hours', 'Kota', 'Georeferenced', 'Longitude', 'Latitude']]

# Standarisasi nilai teks
df_jobs["Type"] = df_jobs["Type"].astype(str).str.strip().str.title()
df_jobs["Work Hours"] = df_jobs["Work Hours"].astype(str).str.strip().str.title()

# ========================
# 9. Simpan ke File CSV (Opsional)
# ========================
df_jobs.to_csv("Data_Clean.csv", index=False)