### Import Library

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

### Data Ingestion

In [3]:
df_eco = pd.read_csv("data/ekonomi_bps.csv")
df_pol = pd.read_csv("data/political_survey.csv")

df_eco.head()

Unnamed: 0,tahun,provinsi,gdp_per_capita,tingkat_pengangguran,inflasi
0,2023,DKI Jakarta,250000000,6.5,3.1
1,2023,Jawa Barat,65000000,8.2,3.5
2,2023,Jawa Tengah,60000000,5.7,3.0
3,2023,Jawa Timur,70000000,5.9,3.2
4,2023,Banten,68000000,7.1,3.4


In [4]:
df_pol.head()

Unnamed: 0,tahun,provinsi,trust_government,political_participation_index
0,2023,DKI Jakarta,72,65
1,2023,Jawa Barat,68,60
2,2023,Jawa Tengah,75,62
3,2023,Jawa Timur,70,64
4,2023,Banten,66,58


In [31]:
print(df_eco)
print("="*20)
print(df_pol)

   tahun     provinsi  gdp_per_capita  tingkat_pengangguran  inflasi
0   2023  DKI Jakarta       250000000                   6.5      3.1
1   2023   Jawa Barat        65000000                   8.2      3.5
2   2023  Jawa Tengah        60000000                   5.7      3.0
3   2023   Jawa Timur        70000000                   5.9      3.2
4   2023       Banten        68000000                   7.1      3.4
5   2024  DKI Jakarta       260000000                   6.2      2.9
6   2024   Jawa Barat        67000000                   7.9      3.2
7   2024  Jawa Tengah        62000000                   5.5      2.8
8   2024   Jawa Timur        72000000                   5.7      3.0
9   2024       Banten        70000000                   6.8      3.1
   tahun     provinsi  trust_government  political_participation_index
0   2023  DKI Jakarta                72                             65
1   2023   Jawa Barat                68                             60
2   2023  Jawa Tengah       

### Cleaning Data

In [17]:
def missing_value():
    eco_miss = df_eco.isnull().sum()
    print("Missing Value df_eco:\n", eco_miss)
    pol_miss = df_pol.isnull().sum()
    print("Missing Value df_pol:\n", pol_miss)

missing_value()

Missing Value df_eco:
 tahun                   0
provinsi                0
gdp_per_capita          0
tingkat_pengangguran    0
inflasi                 0
dtype: int64
Missing Value df_pol:
 tahun                            0
provinsi                         0
trust_government                 0
political_participation_index    0
dtype: int64


In [12]:
def duplicates():
    eco_dup = df_eco.duplicated().sum()
    print("Duplicated Data df_eco:", eco_dup)
    pol_dup = df_pol.duplicated().sum()
    print("Duplicated Data df_pol:", pol_dup)

duplicates()

Duplicated Data df_eco: 0
Duplicated Data df_pol: 0


In [18]:
df_eco.describe()

Unnamed: 0,tahun,gdp_per_capita,tingkat_pengangguran,inflasi
count,10.0,10.0,10.0,10.0
mean,2023.5,104400000.0,6.55,3.12
std,0.527046,79493120.0,0.943104,0.214994
min,2023.0,60000000.0,5.5,2.8
25%,2023.0,65500000.0,5.75,3.0
50%,2023.5,69000000.0,6.35,3.1
75%,2024.0,71500000.0,7.025,3.2
max,2024.0,260000000.0,8.2,3.5


In [19]:
df_pol.describe()

Unnamed: 0,tahun,trust_government,political_participation_index
count,10.0,10.0,10.0
mean,2023.5,71.2,62.9
std,0.527046,3.457681,2.960856
min,2023.0,66.0,58.0
25%,2023.0,68.5,60.5
50%,2023.5,71.0,63.0
75%,2024.0,73.5,65.0
max,2024.0,77.0,67.0


In [23]:
print("Shape df_eco:", df_eco.shape)
print("Shape df_pol:", df_pol.shape)

Shape df_eco: (10, 5)
Shape df_pol: (10, 4)


**Merge dua file csv menjadi 1**

In [35]:
keys_cols = ["tahun", "provinsi"]
cols_from_eco = ["gdp_per_capita", "tingkat_pengangguran", "inflasi"]

df_eco_subset = df_eco[keys_cols + cols_from_eco]

df_merge = pd.merge(
    df_pol,
    df_eco_subset,
    on=keys_cols,
    how="left"
)

df_merge.to_csv("gabungan.csv", index=False)
print("Selesai, tersimpan sebagai gabungan.csv")

Selesai, tersimpan sebagai gabungan.csv


In [36]:
df_merge.head()

Unnamed: 0,tahun,provinsi,trust_government,political_participation_index,gdp_per_capita,tingkat_pengangguran,inflasi
0,2023,DKI Jakarta,72,65,250000000,6.5,3.1
1,2023,Jawa Barat,68,60,65000000,8.2,3.5
2,2023,Jawa Tengah,75,62,60000000,5.7,3.0
3,2023,Jawa Timur,70,64,70000000,5.9,3.2
4,2023,Banten,66,58,68000000,7.1,3.4


In [37]:
df = df_merge.copy()

### EDA + Visualization

GDP per Capita per Provinsi

In [38]:
fig_gdp = px.bar(
    df,
    x="provinsi",
    y="gdp_per_capita",
    title="GDP per Capita per Provinsi",
    text="gdp_per_capita"
)
fig_gdp.update_layout(xaxis_tickangle=-20)
fig_gdp.show()

Tingkat Pengangguran per Provinsi

In [39]:
fig_unemploy = px.bar(
    df,
    x="provinsi",
    y="tingkat_pengangguran",
    title="Tingkat Pengangguran per Provinsi",
    text="tingkat_pengangguran"
)
fig_unemploy.update_layout(xaxis_tickangle=-20)
fig_unemploy.show()

Inflansi per Provinsi

In [40]:
fig_inflasi = px.bar(
    df,
    x="provinsi",
    y="inflasi",
    title="Tingkat Inflansi per Provinsi",
    text="inflasi"
)
fig_inflasi.update_layout(xaxis_tickangle=-20)
fig_inflasi.show()

Trust Government & Political Participation per Provinsi

In [47]:
graph = df.melt( # fungsi di Pandas yang digunakan untuk mengubah format DataFrame dari bentuk lebar (wide) menjadi bentuk panjang (long).
    id_vars = ["tahun", "provinsi"],
    value_vars = ["trust_government", "political_participation_index"],
    var_name = "indikator",
    value_name = "count"
)

fig = px.bar(
    graph,
    x="provinsi",
    y="count",
    color="indikator",
    barmode="group",
    title="Trust Government & Political Participation per Provinsi"
)

fig.update_layout(xaxis_tickangle=-20)
fig.show()

In [22]:

fig = px.scatter(
    df,
    x="tahun",
    size=""
    y="political_participation_index",
    title="Political Index dari tahun ke tahun")
fig.show()