# Scraping Wilayah Kerja Statistik 2020

In [216]:
import requests
import json
import pandas as pd
import numpy as np
import time
from io import StringIO

## Import Data Provinsi

In [16]:
prov = pd.read_csv('prov.csv')
prov.head()

Unnamed: 0,kode,nama
0,11,ACEH
1,12,SUMATERA UTARA
2,13,SUMATERA BARAT
3,14,RIAU
4,15,JAMBI


## Ambil Data Semua BPS Kab/Kota Masing-Masing Provinsi

In [165]:
# base dataframe
df = pd.DataFrame({
    "kode_prov": [],
    "kode_bps": [],
    "nama_bps": [],
    "nama_dagri": []
})

for k in prov['kode']:
    url = 'https://sig.bps.go.id/rest-bridging/getwilayah?level=kabupaten&parent={}'.format(k)
    r = requests.get(url)
    data = r.json()

    df_temp = pd.DataFrame(data)
    df_temp['kode_prov'] = k

    df = pd.concat([df, df_temp], ignore_index=True)

df["kode_prov"] = df["kode_prov"].astype(int)
df.head()


Unnamed: 0,kode_prov,kode_bps,nama_bps,nama_dagri,kode_dagri
0,11,1101,SIMEULUE,KAB. SIMEULUE,11.09
1,11,1102,ACEH SINGKIL,KAB. ACEH SINGKIL,11.10
2,11,1103,ACEH SELATAN,KAB. ACEH SELATAN,11.01
3,11,1104,ACEH TENGGARA,KAB. ACEH TENGGARA,11.02
4,11,1105,ACEH TIMUR,KAB. ACEH TIMUR,11.03
...,...,...,...,...,...
480,91,9109,TAMBRAUW,KAB. TAMBRAUW,92.09
481,91,9110,MAYBRAT,KAB. MAYBRAT,92.10
482,91,9111,MANOKWARI SELATAN,KAB. MANOKWARI SELATAN,92.11
483,91,9112,PEGUNUNGAN ARFAK,KAB. PEGUNUNGAN ARFAK,92.12


## Export Data BPS Kab/Kota

In [167]:
df.to_csv('kabupaten.csv', index=False)
df.to_json('kabupaten.json', orient='records')

In [191]:
from selenium import webdriver
driver = webdriver.Edge()

# input data
kabkota = pd.read_csv('kabupaten.csv')
kabkota.head()

Unnamed: 0,kode_prov,kode_bps,nama_bps,nama_dagri,kode_dagri
0,11,1101,SIMEULUE,KAB. SIMEULUE,11.09
1,11,1102,ACEH SINGKIL,KAB. ACEH SINGKIL,11.1
2,11,1103,ACEH SELATAN,KAB. ACEH SELATAN,11.01
3,11,1104,ACEH TENGGARA,KAB. ACEH TENGGARA,11.02
4,11,1105,ACEH TIMUR,KAB. ACEH TIMUR,11.03


In [222]:
# split data into 5 chunks
kabkota_split = np.array_split(kabkota, 5)

Unnamed: 0,kode_prov,kode_bps,nama_bps,nama_dagri,kode_dagri
388,72,7204,POSO,KAB. POSO,72.02
389,72,7205,DONGGALA,KAB. DONGGALA,72.03
390,72,7206,TOLI-TOLI,KAB. TOLI TOLI,72.04
391,72,7207,BUOL,KAB. BUOL,72.05
392,72,7208,PARIGI MOUTONG,KAB. PARIGI MOUTONG,72.08
...,...,...,...,...,...
480,91,9109,TAMBRAUW,KAB. TAMBRAUW,92.09
481,91,9110,MAYBRAT,KAB. MAYBRAT,92.10
482,91,9111,MANOKWARI SELATAN,KAB. MANOKWARI SELATAN,92.11
483,91,9112,PEGUNUNGAN ARFAK,KAB. PEGUNUNGAN ARFAK,92.12


In [295]:
latlong = pd.DataFrame({
    "kode_bps": [],
    "latitude": [],
    "longitude": []
})
new_data_list = []
latlong

Unnamed: 0,kode_bps,latitude,longitude


In [297]:
def iterateScrape(chunk):
    for i, row in chunk.iterrows():
        url = 'https://www.google.com/maps/search/Badan Pusat Statistik {}, {}'.format(row['nama_bps'], row['nama_dagri'])
        driver.get(url)
        time.sleep(3.5)
        latlong = driver.current_url.split("@")[1].split("/")[0].split(",")
        new_data = {
            "kode_bps": row['kode_bps'],
            "latitude": latlong[0],
            "longitude": latlong[1]
        }

        print("process {} of {}".format(i, len(chunk)))
        new_data_list.append(new_data)

In [298]:
# iterateScrape(kabkota_split[0])
for chunk in kabkota_split:
    iterateScrape(chunk)
latlong = pd.concat([latlong, pd.DataFrame(new_data_list)], ignore_index=True)


process 0 of 97


KeyboardInterrupt: 

In [256]:
latlong = pd.concat([latlong, pd.DataFrame(new_data_list)], ignore_index=True)

In [293]:
temp = pd.merge(kabkota_split[0], latlong, on='kode_bps', how='inner')
# kabkota_split[0]
# get info variable
# latlong
# kabkota_split[0]

In [294]:
temp

Unnamed: 0,kode_prov,kode_bps,nama_bps,nama_dagri,kode_dagri,latitude,longitude
0,11,1101,SIMEULUE,KAB. SIMEULUE,11.09,2,9
1,11,1102,ACEH SINGKIL,KAB. ACEH SINGKIL,11.10,2,9
2,11,1103,ACEH SELATAN,KAB. ACEH SELATAN,11.01,3,9
3,11,1104,ACEH TENGGARA,KAB. ACEH TENGGARA,11.02,3,9
4,11,1105,ACEH TIMUR,KAB. ACEH TIMUR,11.03,4,9
...,...,...,...,...,...,...,...
92,15,1506,TANJUNG JABUNG TIMUR,KAB. TANJUNG JABUNG TIMUR,15.07,-,1
93,15,1507,TANJUNG JABUNG BARAT,KAB. TANJUNG JABUNG BARAT,15.06,-,1
94,15,1508,TEBO,KAB. TEBO,15.09,-,1
95,15,1509,BUNGO,KAB. BUNGO,15.08,-,1
