In [None]:
# Extract periode, tahun, provinsi filter values

import requests
from bs4 import BeautifulSoup

URL = "https://djpk.kemenkeu.go.id/portal/data/apbd"
r = requests.get(URL)
soup = BeautifulSoup(r.text, "html.parser")

periode_dropdownid = "sel_periode"
tahun_dropdownid = "sel_tahun"
provinsi_dropdownid = "sel_provinsi"

def get_dropdown_values(soup, select_id):
    dropdown = soup.find("select", {"id": select_id})
    if not dropdown:
        return []
    return [(opt["value"], opt.get_text(strip=True))
            for opt in dropdown.find_all("option") if opt.get("value")]

provinsi_list = get_dropdown_values(soup, provinsi_dropdownid)
periode_list  = get_dropdown_values(soup, periode_dropdownid)
tahun_list    = get_dropdown_values(soup, tahun_dropdownid)


print("Provinsi:", provinsi_list[:5])  # show first few
print("Periode:", periode_list)
print("Tahun:", tahun_list)


Provinsi: [('--', 'Semua Provinsi'), ('01', 'Provinsi Aceh'), ('02', 'Provinsi Sumatera Utara'), ('03', 'Provinsi Sumatera Barat'), ('04', 'Provinsi Riau')]
Periode: [('1', 'Januari'), ('2', 'Februari'), ('3', 'Maret'), ('4', 'April'), ('5', 'Mei'), ('6', 'Juni'), ('7', 'Juli'), ('8', 'Agustus'), ('9', 'September'), ('10', 'Oktober'), ('11', 'November'), ('12', 'Desember')]
Tahun: [('2026', '2026'), ('2025', '2025'), ('2024', '2024'), ('2023', '2023'), ('2022', '2022'), ('2021', '2021'), ('2020', '2020'), ('2019', '2019'), ('2018', '2018'), ('2017', '2017'), ('2016', '2016'), ('2015', '2015'), ('2014', '2014'), ('2013', '2013'), ('2012', '2012'), ('2011', '2011')]


In [None]:
# Remove "Semua Provinsi" from the list
provinsi_list = [p for p in provinsi_list if p[1] != "Semua Provinsi"]

In [None]:
# Hapus tahun 2026
tahun_list = [t for t in tahun_list if t[0] != '2026']

# Start from 2018 to shorten loop time
tahun_list = [t for t in tahun_list if int(t[0])>2017 ]


In [None]:
# Scrape the APBD data from each provinsi, periode, tahun combination
import os
import requests
import pandas as pd
from io import BytesIO, StringIO
import csv
from tqdm import tqdm
import itertools

# Base download URL (you need to confirm exact endpoint from Inspect > Network)
BASE_URL = "https://djpk.kemenkeu.go.id/portal/csv_apbd"


# Create output folder
os.makedirs("apbd_excels", exist_ok=True)

# Pre-create an empty dataframe to which we'll append all the data
all_data = pd.DataFrame()

# Loop over combinations
loop_length = len(provinsi_list)*len(periode_list)*len(tahun_list)
i = 0

for provinsi, periode, tahun in itertools.product(provinsi_list, periode_list, tahun_list):
    i = i+1
    print(f"Progress: {i/loop_length:.3%}")

    # Skip future periods
    if (tahun == '2025') and periode in [9,10,11,12]:
      continue

    params_value = {
        "type": "apbd",
        "provinsi": provinsi[0],
        "periode": periode[0],
        "tahun": tahun[0],
        "pemda": "--",
    }

    print(provinsi[1], periode[1], tahun[1])

    # Construct filename
    filename = f"apbd_{provinsi[1]}_{periode[1]}_{tahun[1]}.xlsx"
    filepath = os.path.join("apbd_excels", filename)

    try:
      print(f"Downloading {filename}...")
      r = requests.get(BASE_URL, params=params_value, timeout=60)

      raw = r.content.decode("utf-8", errors="ignore")
      reader = csv.reader(StringIO(raw), delimiter=",", quotechar='"')
      rows = list(reader)

      df = pd.DataFrame(rows[1:], columns=rows[0])  # first row as header

      # Add metadata columns
      df["provinsi"] = provinsi[1]
      df["periode"] = periode[1]
      df["tahun"] = tahun[1]
      df["pemda"] = "Semua Pemda"

      # Append to master dataframe
      all_data = pd.concat([all_data, df], ignore_index=True)

      print(f"✅ Appended {provinsi}-{periode}-{tahun}, rows: {len(df)}")

    except Exception as e:
      print(f"❌ Error parsing CSV for {provinsi}-{periode}-{tahun}: {e}")

# Save final dataframe once
all_data.to_csv("apbd_all.csv", index=False)
print(f"🎉 Final dataframe shape: {all_data.shape}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Provinsi Maluku Desember 2019
Downloading apbd_Provinsi Maluku_Desember_2019.xlsx...
✅ Appended ('25', 'Provinsi Maluku')-('12', 'Desember')-('2019', '2019'), rows: 36
Progress: 65.789%
Provinsi Maluku Desember 2018
Downloading apbd_Provinsi Maluku_Desember_2018.xlsx...
✅ Appended ('25', 'Provinsi Maluku')-('12', 'Desember')-('2018', '2018'), rows: 36
Progress: 65.817%
Provinsi Papua Januari 2025
Downloading apbd_Provinsi Papua_Januari_2025.xlsx...
✅ Appended ('26', 'Provinsi Papua')-('1', 'Januari')-('2025', '2025'), rows: 40
Progress: 65.844%
Provinsi Papua Januari 2024
Downloading apbd_Provinsi Papua_Januari_2024.xlsx...
✅ Appended ('26', 'Provinsi Papua')-('1', 'Januari')-('2024', '2024'), rows: 37
Progress: 65.872%
Provinsi Papua Januari 2023
Downloading apbd_Provinsi Papua_Januari_2023.xlsx...
✅ Appended ('26', 'Provinsi Papua')-('1', 'Januari')-('2023', '2023'), rows: 41
Progress: 65.899%
Provinsi Papua Januari 202

In [None]:
all_data

Unnamed: 0,akun,anggaran,realisasi,persentase,provinsi,periode,tahun,pemda
0,Pendapatan Daerah,"40.039,55 M","2.789,22 M",6.97,Provinsi Aceh,Januari,2025,Semua Pemda
1,PAD,"6.455,14 M","374,43 M",5.80,Provinsi Aceh,Januari,2025,Semua Pemda
2,Pajak Daerah,"2.716,15 M","169,46 M",6.24,Provinsi Aceh,Januari,2025,Semua Pemda
3,Retribusi Daerah,"1.084,92 M","85,98 M",7.92,Provinsi Aceh,Januari,2025,Semua Pemda
4,Hasil Pengelolaan Kekayaan Daerah yang Dipisa...,"371,97 M","0,00 M",0.00,Provinsi Aceh,Januari,2025,Semua Pemda
...,...,...,...,...,...,...,...,...
129056,Penerimaan Pembiayaan Daerah,"183,19 M","446,66 M",243.82,Provinsi Papua Barat Daya,Desember,2023,Semua Pemda
129057,Sisa Lebih Perhitungan Anggaran Tahun Sebelumnya,"183,19 M","446,66 M",243.82,Provinsi Papua Barat Daya,Desember,2023,Semua Pemda
129058,Pengeluaran Pembiayaan Daerah,"172,13 M","141,67 M",82.30,Provinsi Papua Barat Daya,Desember,2023,Semua Pemda
129059,Penyertaan Modal Daerah,"10,26 M","8,41 M",82.01,Provinsi Papua Barat Daya,Desember,2023,Semua Pemda


# Scrape pemda-level APBD in Bali

In [None]:
import requests
from bs4 import BeautifulSoup

url_bali = "https://djpk.kemenkeu.go.id/portal/data/apbd?periode=9&tahun=2025&provinsi=22&pemda=00" # Bali
r = requests.get(url_bali)
soup = BeautifulSoup(r.text, "html.parser")

def get_dropdown_values(soup, select_id):
    dropdown = soup.find("select", {"id": select_id})
    if not dropdown:
        return []
    return [(opt["value"], opt.get_text(strip=True))
            for opt in dropdown.find_all("option") if opt.get("value")]

periode_list  = get_dropdown_values(soup, "sel_periode")
tahun_list    = get_dropdown_values(soup, "sel_tahun")
bali_pemda_list = get_dropdown_values(soup, "sel_pemda")

print(bali_pemda_list)

[('00', 'Provinsi Bali'), ('01', 'Kab. Badung'), ('02', 'Kab. Bangli'), ('03', 'Kab. Buleleng'), ('04', 'Kab. Gianyar'), ('05', 'Kab. Jembrana'), ('06', 'Kab. Karangasem'), ('07', 'Kab. Klungkung'), ('08', 'Kab. Tabanan'), ('09', 'Kota Denpasar'), ('--', 'Semua pemda')]


In [None]:
# remove pemda = "Provinsi Bali" and "Semua pemda"
bali_pemda_list = [p for p in bali_pemda_list if p[1] != "Provinsi Bali" and p[1] != "Semua Pemda"]

In [None]:
# Hapus tahun 2026
tahun_list = [t for t in tahun_list if t[0] != '2026']

# # Start from 2018 to shorten loop time
# tahun_list = [t for t in tahun_list if int(t[0])>2017 ]

In [None]:
periode_list

[('1', 'Januari'),
 ('2', 'Februari'),
 ('3', 'Maret'),
 ('4', 'April'),
 ('5', 'Mei'),
 ('6', 'Juni'),
 ('7', 'Juli'),
 ('8', 'Agustus'),
 ('9', 'September'),
 ('10', 'Oktober'),
 ('11', 'November'),
 ('12', 'Desember')]

In [None]:
# Scrape the APBD data from each provinsi, periode, tahun combination
import os
import requests
import pandas as pd
from io import BytesIO, StringIO
import csv
from tqdm import tqdm
import itertools

# Base download URL (you need to confirm exact endpoint from Inspect > Network)
BASE_URL = "https://djpk.kemenkeu.go.id/portal/csv_apbd"

# Create output folder
os.makedirs("apbd_excels_bali", exist_ok=True)

# Pre-create an empty dataframe to which we'll append all the data
all_data = pd.DataFrame()

# Loop over combinations
loop_length = len(bali_pemda_list)*len(tahun_list)
i = 0

for pemda, tahun in itertools.product(bali_pemda_list, tahun_list):
    i = i+1
    print(f"Progress: {i/loop_length:.3%}")

    params_value = {
        "type": "apbd",
        "provinsi": '22', # Bali
        "periode": '1', # set static month
        "tahun": tahun[0],
        "pemda": pemda[0],
    }

    print(pemda[1], tahun[1])

    # Construct filename
    filename = f"apbd_{pemda[1]}_{tahun[1]}.xlsx"
    filepath = os.path.join("apbd_excels_bali", filename)

    try:
      print(f"Downloading {filename}...")
      r = requests.get(BASE_URL, params=params_value, timeout=60)

      raw = r.content.decode("utf-8", errors="ignore")
      reader = csv.reader(StringIO(raw), delimiter=",", quotechar='"')
      rows = list(reader)

      df = pd.DataFrame(rows[1:], columns=rows[0])  # first row as header

      # Add metadata columns
      df["pemda"] = pemda[1]
      df["periode"] = "-"
      df["tahun"] = tahun[1]
      df["provinsi"] = "Bali"

      # Append to master dataframe
      all_data = pd.concat([all_data, df], ignore_index=True)

      print(f"✅ Appended {pemda}-{tahun}, rows: {len(df)}")

    except Exception as e:
      print(f"❌ Error parsing CSV for {pemda}-{tahun}: {e}")

# Save final dataframe once
all_data.to_csv("apbd_all.csv", index=False)
print(f"🎉 Final dataframe shape: {all_data.shape}")

Progress: 0.606%
Provinsi Bali 2025
Downloading apbd_Provinsi Bali_2025.xlsx...
✅ Appended ('00', 'Provinsi Bali')-('2025', '2025'), rows: 31
Progress: 1.212%
Provinsi Bali 2024
Downloading apbd_Provinsi Bali_2024.xlsx...
✅ Appended ('00', 'Provinsi Bali')-('2024', '2024'), rows: 33
Progress: 1.818%
Provinsi Bali 2023
Downloading apbd_Provinsi Bali_2023.xlsx...
✅ Appended ('00', 'Provinsi Bali')-('2023', '2023'), rows: 32
Progress: 2.424%
Provinsi Bali 2022
Downloading apbd_Provinsi Bali_2022.xlsx...
✅ Appended ('00', 'Provinsi Bali')-('2022', '2022'), rows: 31
Progress: 3.030%
Provinsi Bali 2021
Downloading apbd_Provinsi Bali_2021.xlsx...
✅ Appended ('00', 'Provinsi Bali')-('2021', '2021'), rows: 31
Progress: 3.636%
Provinsi Bali 2020
Downloading apbd_Provinsi Bali_2020.xlsx...
✅ Appended ('00', 'Provinsi Bali')-('2020', '2020'), rows: 43
Progress: 4.242%
Provinsi Bali 2019
Downloading apbd_Provinsi Bali_2019.xlsx...
✅ Appended ('00', 'Provinsi Bali')-('2019', '2019'), rows: 28
Progre

In [None]:
-