In [12]:
import pandas as pd
import numpy as np
import geopandas as gpd
import time

import altair as alt
import plotly.graph_objects as go
import plotly.express as px
import matplotlib as plt
import seaborn as sns

from urllib.request import urlopen
import json

In [13]:
filepath_gdf = "../datasets/geojson/pulau-jawa.geojson"
filepath_df = "../datasets/processed-long-format.csv"

gdf = gpd.read_file(filepath_gdf)
df = pd.read_csv(filepath_df)

In [14]:
display(gdf.head())
display(df.head())

Unnamed: 0,kabupaten/kota,provinsi,geometry
0,Bangkalan,Jawa Timur,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906..."
1,Banyuwangi,Jawa Timur,"POLYGON ((114.42380 -7.93550, 114.42272 -7.960..."
2,Blitar,Jawa Timur,"POLYGON ((112.35896 -8.34845, 112.34630 -8.349..."
3,Bojonegoro,Jawa Timur,"POLYGON ((112.15454 -7.11012, 112.16409 -7.115..."
4,Bondowoso,Jawa Timur,"POLYGON ((114.22434 -7.98666, 114.24071 -8.013..."


Unnamed: 0,provinsi,kabupaten/kota,tahun,upahminimum,kenaikanupah,persentasekenaikan,upahsebelumnya
0,Jawa Barat,Bandung,2013,1388333,,,
1,Jawa Barat,Bandung,2014,1735473,347140.0,25.0,1388333.0
2,Jawa Barat,Bandung,2015,2001195,265722.0,15.31,1735473.0
3,Jawa Barat,Bandung,2016,2275715,274520.0,13.72,2001195.0
4,Jawa Barat,Bandung,2017,2463461,187746.0,8.25,2275715.0


In [15]:
display(gdf["provinsi"].unique())
display(df["provinsi"].unique())

array(['Jawa Timur', 'Jawa Tengah', 'Jawa Barat', 'Dki Jakarta',
       'Daerah Istimewa Yogyakarta', 'Banten'], dtype=object)

array(['Jawa Barat', 'Jawa Timur', 'Jawa Tengah',
       'Daerah Istimewa Yogyakarta', 'Banten', 'DKI Jakarta'],
      dtype=object)

In [16]:
display(gdf["kabupaten/kota"].nunique())
display(df["kabupaten/kota"].nunique())

122

118

In [17]:
# copy dataframe
wage_df = df.copy()
boundaries = gdf.copy()

In [18]:
# perform dataframe merging
merged_df = wage_df.merge(boundaries, on="kabupaten/kota", how="right", indicator=True)
merged_df

Unnamed: 0,provinsi_x,kabupaten/kota,tahun,upahminimum,kenaikanupah,persentasekenaikan,upahsebelumnya,provinsi_y,geometry,_merge
0,Jawa Timur,Bangkalan,2013.0,983800.0,,,,Jawa Timur,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906...",both
1,Jawa Timur,Bangkalan,2014.0,1102000.0,118200.0,12.01,983800.0,Jawa Timur,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906...",both
2,Jawa Timur,Bangkalan,2015.0,1267300.0,165300.0,15.00,1102000.0,Jawa Timur,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906...",both
3,Jawa Timur,Bangkalan,2016.0,1414000.0,146700.0,11.58,1267300.0,Jawa Timur,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906...",both
4,Jawa Timur,Bangkalan,2017.0,1530660.0,116660.0,8.25,1414000.0,Jawa Timur,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906...",both
...,...,...,...,...,...,...,...,...,...,...
1296,Banten,Tangerang,2019.0,3841368.0,285533.0,8.03,3555835.0,Banten,"POLYGON ((106.72466 -6.08918, 106.71138 -6.095...",both
1297,Banten,Tangerang,2020.0,4168268.0,326900.0,8.51,3841368.0,Banten,"POLYGON ((106.72466 -6.08918, 106.71138 -6.095...",both
1298,Banten,Tangerang,2021.0,4230793.0,62525.0,1.50,4168268.0,Banten,"POLYGON ((106.72466 -6.08918, 106.71138 -6.095...",both
1299,Banten,Tangerang,2022.0,4230792.0,-1.0,-0.00,4230793.0,Banten,"POLYGON ((106.72466 -6.08918, 106.71138 -6.095...",both


In [19]:
# display values that did not match 
right = merged_df.loc[merged_df["_merge"]=="right_only"].reset_index(drop=True)

right

Unnamed: 0,provinsi_x,kabupaten/kota,tahun,upahminimum,kenaikanupah,persentasekenaikan,upahsebelumnya,provinsi_y,geometry,_merge
0,,Hutan,,,,,,Jawa Tengah,"POLYGON ((110.34881 -7.15143, 110.35282 -7.158...",right_only
1,,Wadung Kedungombo,,,,,,Jawa Tengah,"POLYGON ((110.82794 -7.24749, 110.84517 -7.267...",right_only
2,,Waduk Cirata,,,,,,Jawa Barat,"POLYGON ((107.36784 -6.70070, 107.34255 -6.699...",right_only
3,,Kepulauan Seribu,,,,,,Dki Jakarta,"POLYGON ((106.62987 -5.85381, 106.61048 -5.863...",right_only


In [20]:
# filter only values that match on both dataframe
merged_df = merged_df.loc[merged_df["_merge"]=="both"].reset_index(drop=True)
merged_df.drop(["provinsi_y","_merge"], axis=1, inplace=True)
merged_df = merged_df.rename(columns={"provinsi_x":"provinsi"})
merged_df

Unnamed: 0,provinsi,kabupaten/kota,tahun,upahminimum,kenaikanupah,persentasekenaikan,upahsebelumnya,geometry
0,Jawa Timur,Bangkalan,2013.0,983800.0,,,,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906..."
1,Jawa Timur,Bangkalan,2014.0,1102000.0,118200.0,12.01,983800.0,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906..."
2,Jawa Timur,Bangkalan,2015.0,1267300.0,165300.0,15.00,1102000.0,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906..."
3,Jawa Timur,Bangkalan,2016.0,1414000.0,146700.0,11.58,1267300.0,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906..."
4,Jawa Timur,Bangkalan,2017.0,1530660.0,116660.0,8.25,1414000.0,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906..."
...,...,...,...,...,...,...,...,...
1292,Banten,Tangerang,2019.0,3841368.0,285533.0,8.03,3555835.0,"POLYGON ((106.72466 -6.08918, 106.71138 -6.095..."
1293,Banten,Tangerang,2020.0,4168268.0,326900.0,8.51,3841368.0,"POLYGON ((106.72466 -6.08918, 106.71138 -6.095..."
1294,Banten,Tangerang,2021.0,4230793.0,62525.0,1.50,4168268.0,"POLYGON ((106.72466 -6.08918, 106.71138 -6.095..."
1295,Banten,Tangerang,2022.0,4230792.0,-1.0,-0.00,4230793.0,"POLYGON ((106.72466 -6.08918, 106.71138 -6.095..."


In [21]:
# replace negative values menjadi 0
merged_df.loc[merged_df["kenaikanupah"] < 0, "kenaikanupah"] = 0
merged_df.loc[merged_df["persentasekenaikan"] < 0, "persentasekenaikan"] = 0

# round values
merged_df[["tahun", "upahminimum", "kenaikanupah", "upahsebelumnya"]] = merged_df[["tahun", "upahminimum", "kenaikanupah", "upahsebelumnya"]].astype("Int64")

In [22]:
merged_df

Unnamed: 0,provinsi,kabupaten/kota,tahun,upahminimum,kenaikanupah,persentasekenaikan,upahsebelumnya,geometry
0,Jawa Timur,Bangkalan,2013,983800,,,,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906..."
1,Jawa Timur,Bangkalan,2014,1102000,118200,12.01,983800,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906..."
2,Jawa Timur,Bangkalan,2015,1267300,165300,15.00,1102000,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906..."
3,Jawa Timur,Bangkalan,2016,1414000,146700,11.58,1267300,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906..."
4,Jawa Timur,Bangkalan,2017,1530660,116660,8.25,1414000,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906..."
...,...,...,...,...,...,...,...,...
1292,Banten,Tangerang,2019,3841368,285533,8.03,3555835,"POLYGON ((106.72466 -6.08918, 106.71138 -6.095..."
1293,Banten,Tangerang,2020,4168268,326900,8.51,3841368,"POLYGON ((106.72466 -6.08918, 106.71138 -6.095..."
1294,Banten,Tangerang,2021,4230793,62525,1.50,4168268,"POLYGON ((106.72466 -6.08918, 106.71138 -6.095..."
1295,Banten,Tangerang,2022,4230792,0,-0.00,4230793,"POLYGON ((106.72466 -6.08918, 106.71138 -6.095..."


In [25]:
# filter umk tahun 2023
umk_2023 = merged_df[merged_df["tahun"]==2023]
umk_2023.reset_index(drop=True, inplace=True)

umk_2023

Unnamed: 0,provinsi,kabupaten/kota,tahun,upahminimum,kenaikanupah,persentasekenaikan,upahsebelumnya,geometry
0,Jawa Timur,Bangkalan,2023,2152450,195677,10.00,1956773,"POLYGON ((113.12451 -6.89289, 113.12664 -6.906..."
1,Jawa Timur,Banyuwangi,2023,2528899,200000,8.59,2328899,"POLYGON ((114.42380 -7.93550, 114.42272 -7.960..."
2,Jawa Timur,Blitar,2023,2215071,176047,8.63,2039024,"POLYGON ((112.35896 -8.34845, 112.34630 -8.349..."
3,Jawa Timur,Bojonegoro,2023,2279568,200000,9.62,2079568,"POLYGON ((112.15454 -7.11012, 112.16409 -7.115..."
4,Jawa Timur,Bondowoso,2023,2154504,195864,10.00,1958640,"POLYGON ((114.22434 -7.98666, 114.24071 -8.013..."
...,...,...,...,...,...,...,...,...
113,Banten,Kota Tangerang Selatan,2023,4551451,271237,6.34,4280214,"POLYGON ((106.74815 -6.24961, 106.75652 -6.262..."
114,Banten,Lebak,2023,2944665,171075,6.17,2773590,"POLYGON ((106.39973 -6.33239, 106.42021 -6.349..."
115,Banten,Pandeglang,2023,2980351,180059,6.43,2800292,"MULTIPOLYGON (((105.54977 -6.99566, 105.56973 ..."
116,Banten,Serang,2023,4492961,277781,6.59,4215180,"MULTIPOLYGON (((105.85639 -5.93773, 105.86610 ..."
