# **Additional Data**

```
🚨 Jumlah Penduduk 🚨
🚨 Jumlah Kendaraan 🚨

📌https://www.bps.go.id/indicator/17/57/1/jumlah-kendaraan-bermotor.html

📌https://www.bps.go.id/indikator/indikator/view_data_pub/0000/api_pub/V2w4dFkwdFNLNU5mSE95Und2UDRMQT09/da_10/1

📌https://www.bps.go.id/indicator/12/1886/1/jumlah-penduduk-hasil-proyeksi-menurut-provinsi-dan-jenis-kelamin.html

```

# **Import Library**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import missingno
!pip install squarify
import squarify as sq

import plotly.express as px
import plotly.graph_objs as go
import plotly.offline as pyoff
import warnings # Ignores any warning
warnings.filterwarnings("ignore")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting squarify
  Downloading squarify-0.4.3-py3-none-any.whl (4.3 kB)
Installing collected packages: squarify
Successfully installed squarify-0.4.3


In [None]:
plt.style.use('seaborn')
sns.set(rc={'figure.figsize':(15, 8)})

In [None]:
pd.set_option('display.max_columns', None)

# **Jumlah Kendaraan Keseluruhan (per Tahun)**

In [None]:
# jumlah_kendaraan_bermotor.csv
df_kendaraan = pd.read_csv("https://drive.google.com/uc?id=1kSguqLIcFnTgqs2r67W0qLUVi0QWyHvh", sep=";")

In [None]:
df_kendaraan

Unnamed: 0,Year,Mobil Penumpang,Mobil Bis,Mobil Barang,Sepeda motor,Jumlah
0,2015,12304221,196309,4145857,88656931,105303318
1,2016,13142958,204512,4326731,94531510,112205711
2,2017,13968202,213359,4540902,100200245,118922708
3,2018,14830698,222872,4797254,106657952,126508776
4,2019,15592419,231569,5021888,112771136,133617012
5,2020,15797746,233261,5083405,115023039,136137451


In [None]:
def human_format(num):
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    # add more suffixes if you need them
    return '%.1f%s' % (num, ['', 'K', 'M', 'B', 'T', 'P'][magnitude])

In [None]:
df_kendaraan2 = df_kendaraan.copy()
df_kendaraan2 = df_kendaraan2.melt(id_vars='Year', value_vars=["Mobil Penumpang", "Mobil Bis", "Mobil Barang", "Sepeda motor", "Jumlah"],
                                   var_name='Jenis', value_name='Jumlah')

df_kendaraan2["text"] = df_kendaraan2["Jumlah"].apply(lambda x: human_format(x))
df_kendaraan2["Jenis"] = df_kendaraan2["Jenis"].apply(lambda x: "Total" if x == "Jumlah" else x)

fig2= px.line(df_kendaraan2, y='Jumlah', 
              x='Year',
              color='Jenis',
             title="Number of Vehicles in Indonesia", 
              symbol='Jenis',
              text="text")

fig2.layout.plot_bgcolor = "light grey"
fig2.update_traces(textposition="bottom right")
fig2.update_yaxes(visible=False, showticklabels=False, )
fig2.show()

# **Jumlah Kendaraan (per Provinsi)**

In [None]:
# jumlah_kendaraan_bermotor_provinsi_jenis.csv
df_kendaraan_prov = pd.read_csv("https://drive.google.com/uc?id=1qFTbI3xHlvNMxdYDQMs34KG50n7Xrl5Y", sep=";")

In [None]:
df_kendaraan_prov.head()

Unnamed: 0,Year,Province,Mobil Penumpang,Bus,Truk,Sepeda Motor,Jumlah
0,2021,Aceh,166570,1103,67817,2089319,2324809
1,2021,Sumatera Utara,690543,5893,271352,6062939,7030727
2,2021,Sumatera Barat,278705,4239,135086,2118305,2536335
3,2021,Riau,367515,6060,207390,3485246,4066211
4,2021,Jambi,296892,35646,182830,2520112,3035480


In [None]:
df_kendaraan_prov2 = df_kendaraan_prov[["Year", "Province", "Jumlah"]].copy()
df_kendaraan_prov2 = df_kendaraan_prov2.sort_values("Jumlah", ascending=False).reset_index(drop=True)
df_kendaraan_prov2 = df_kendaraan_prov2[(df_kendaraan_prov2["Province"] != "Indonesia") & (df_kendaraan_prov2["Year"] == 2021)].reset_index(drop=True)
df_kendaraan_prov2["text"] = df_kendaraan_prov2["Jumlah"].apply(lambda x: human_format(x))

In [None]:
fig = px.bar(df_kendaraan_prov2.head(10), 
             x = 'Province',
             y = 'Jumlah', 
             labels = {'Province': 'Province'}, 
             color = 'Jumlah', 
             text = 'text',
             title = "Indonesia Vehicles by Province",
)

# plot background white
fig.layout.plot_bgcolor = "white"
fig.show()

# **Jumlah Penduduk (per Provinsi dan Jenis Kelamin)**

In [None]:
# jumlah_penduduk_provinsi_jk_all.csv
df_penduduk_all = pd.read_csv("https://drive.google.com/uc?id=1NZZlMpsApa_VSO75TQfpe4qbQs0CXQeu", sep=";")

In [None]:
df_penduduk_all.head()

Unnamed: 0,Provinsi,2018,2019,2020
0,ACEH,5243.4,5316.3,5388.1
1,SUMATERA UTARA,14476.0,14639.4,14798.4
2,SUMATERA BARAT,5411.8,5479.5,5545.7
3,RIAU,6717.6,6835.1,6951.2
4,JAMBI,3527.1,3566.2,3604.2


In [None]:
df_penduduk_all_bar = df_penduduk_all.copy()
df_penduduk_all_bar = df_penduduk_all_bar.sort_values("2020", ascending=False).reset_index(drop=True)
df_penduduk_all_bar = df_penduduk_all_bar[(df_penduduk_all_bar["Provinsi"] != "INDONESIA")].reset_index(drop=True)
df_penduduk_all_bar["text"] = df_penduduk_all_bar["2020"].apply(lambda x: human_format(x))

In [None]:
# bar plot using plotly express
fig = px.bar(df_penduduk_all_bar.head(10), 
             x = 'Provinsi',
             y = '2020', 
             labels = {'Provinsi': 'Provinsi'}, 
             color = '2020', 
             text = 'text',
             title = "Indonesia Total Population by Provinsi",
)

# plot background white
fig.layout.plot_bgcolor = "white"
fig.show()

In [None]:
# jumlah_penduduk_provinsi_jk_lk.csv
df_penduduk_lk = pd.read_csv("https://drive.google.com/uc?id=1_z13W8cLJByUUMraMU1APpZQydFAalx7", sep=";")

In [None]:
df_penduduk_lk.head()

Unnamed: 0,Provinsi,2018,2019,2020
0,ACEH,2619.9,2656.1,2691.8
1,SUMATERA UTARA,7229.4,7312.2,7392.7
2,SUMATERA BARAT,2692.6,2727.0,2760.6
3,RIAU,3440.0,3497.0,3553.2
4,JAMBI,1795.2,1813.5,1831.3


In [None]:
# jumlah_penduduk_provinsi_jk_pr.csv
df_penduduk_pr = pd.read_csv("https://drive.google.com/uc?id=1x5KE9Z0AKmyLsSHwdMpXqZupH0ghODzc", sep=";")

In [None]:
df_penduduk_pr.head()

Unnamed: 0,Provinsi,2018,2019,2020
0,ACEH,2623.5,2660.2,2696.3
1,SUMATERA UTARA,7246.5,7327.2,7405.7
2,SUMATERA BARAT,2719.3,2752.5,2785.1
3,RIAU,3277.6,3338.1,3398.0
4,JAMBI,1731.9,1752.7,1772.9
