## BDSP Scraper

The main tasks of this project as follows:
1. Extract all indicator values such as _Subsektor_, _Komoditas_, _Level_, _Provinsi_, _Kabupaten_, _Tahun Awal_, and _Tahun Akhir_ from [BDSP dynamic website](https://bdsp2.pertanian.go.id/bdsp/id/indikator) using Selenium.
2. Extract a table based on specific indicators using Selenium and transform it into tabular format.

## PyMongo

### Example

In [1]:
import datetime

import pymongo
from pymongo import MongoClient

client = MongoClient(
    "mongodb://localhost:27017",
    username="root",
    password="rootpassword"
)

db = client["test-database"]
display("db: ", db)

collection = db["test-collection"]
display("collection: ", collection)

post = {
    "author": "Mike",
    "text": "My first blog post!",
    "tags": ["mongodb", "python", "pymongo"],
    "date": datetime.datetime.utcnow()
}
posts = db.posts
post_id = posts.insert_one(post).inserted_id
display(post_id)

'db: '

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'test-database')

'collection: '

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'test-database'), 'test-collection')

ObjectId('647ac137a8247e15384a881d')

In [2]:
db.list_collection_names()

['posts']

In [3]:
import pprint

pprint.pprint(posts.find_one())

{'_id': ObjectId('647abc85a282918c2b9d5d26'),
 'author': 'Mike',
 'date': datetime.datetime(2023, 6, 3, 4, 7, 33, 367000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


In [6]:
pprint.pprint(posts.find_one({'tags': 'python'}))

{'_id': ObjectId('647abc85a282918c2b9d5d26'),
 'author': 'Mike',
 'date': datetime.datetime(2023, 6, 3, 4, 7, 33, 367000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


In [22]:
posts.find_one({"author": "Eliot"})

In [26]:
db = client["bdsp"]
collection = db["indicator"]

In [27]:
db.list_collection_names()

[]

### Indicator

In [66]:
import random
import time
import os
import dill

from typing import Dict

class BinaryFileHandler:
    def __init__(self, folder_path: str):
        self.root_path = os.path.expanduser("~")
        self.folder_path = folder_path
        self.create_folder_if_not_exist()

    def create_folder_if_not_exist(self) -> None:
        abs_path = f"{self.root_path}/{self.folder_path}"
        if not os.path.exists(abs_path):
            os.system(f"mkdir -p {abs_path}")

        self.abs_path = abs_path

    def save(self, dict_to_save: Dict, filename: str) -> None:
        with open(f"{self.abs_path}/{filename}", "wb") as f:
            dill.dump(dict_to_save, f)

    def load(self, filename: str):
        with open(f"{self.abs_path}/{filename}", "rb") as f:
            return dill.load(f)

import dill
import pprint

import pymongo
from pymongo import MongoClient

# From HTML elements scrapping
file_handler = BinaryFileHandler("data/binary")
indicator_dict = file_handler.load("indicator_input_values.bin")
indicator_dict

# Initialize the Client
client = MongoClient(
    "mongodb://localhost:27017",
    username="root",
    password="rootpassword"
)

# Create new database
db = client["bdsp"]

# TEMP: drop all database
try:
    for name in db.list_collection_names():
        db[name].drop()
except:
    pass

# Ingest subsektor
subsector_collection = db["subsektor"]
for idx, subsector in enumerate(indicator_dict["subsektor"]):
    document_subsector = {
        "subsektor_idx": idx,
        "subsektor_name": subsector        
    }
    if not subsector_collection.find_one(document_subsector):
        subsector_collection.insert_one(document_subsector).inserted_id

# Ingest komoditas  
commodity_collection = db["komoditas"]
for subsector, commodity_list in indicator_dict["commodities"].items():
    for idx, commodity in enumerate(commodity_list):
        document_commodities = {
            "subsektor_name": subsector,
            "komoditas_idx": idx,
            "komoditas_name": commodity    
        }
        if not commodity_collection.find_one(document_commodities):
            commodity_collection.insert_one(document_commodities).inserted_id

# Ingest level
level_collection = db["level"]
for idx, level in enumerate(indicator_dict["level"]):
    document = {
        "level_idx": idx,
        "level_name": level
    }
    if not level_collection.find_one(document):
        level_collection.insert_one(document).inserted_id

# Ingest provinsi
prov_collection = db["prov"]
for idx, prov in enumerate(indicator_dict["prov"]):
    document = {
        "level_name": "Provinsi",
        "prov_idx": idx,
        "prov_name": prov
    }
    if not prov_collection.find_one(document):
        prov_collection.insert_one(document).inserted_id

# Ingest kabupaten 
regency_collection = db["kab"]
for province, regency_list in indicator_dict["regency"].items():
    for idx, regency in enumerate(regency_list):
        document_regency = {
            "level_name": "Kabupaten",
            "prov_name": province,
            "kab_idx": idx,
            "kab_name": regency    
        }
        if not regency_collection.find_one(document_regency):
            regency_collection.insert_one(document_regency).inserted_id
        
# Ingest tahunAwal
tahunAwal_collection = db["tahunAwal"]
for idx, year in enumerate(indicator_dict["tahunAwal"]):
    document = {
        "tahunAwal_idx": idx,
        "tahunAwal_name": int(year)
    }
    if not tahunAwal_collection.find_one(document):
        tahunAwal_collection.insert_one(document).inserted_id

# Ingest tahunAkhir
tahunAkhir_collection = db["tahunAkhir"]
for idx, year in enumerate(indicator_dict["tahunAkhir"]):
    document = {
        "tahunAkhir_idx": idx,
        "tahunAkhir_name": int(year)
    }
    if not tahunAkhir_collection.find_one(document):
        tahunAkhir_collection.insert_one(document).inserted_id

# Input validation
def validate(input_dict: Dict, indicator: str) -> Dict:
    searched = db[indicator].find_one(
        {f"{indicator}_name": input_dict[indicator]}
    )
    if not searched:
        # print(f"{indicator} Not found")
        return None
    
    idx = searched[f"{indicator}_idx"]
    return {indicator: idx}

def validate_all(input_dict: Dict) -> Dict:
    input_idx = {}
    for indicator in input_dict.keys():
        idx_dict = validate(input_dict, indicator)
        if isinstance(idx_dict, dict):
            input_idx.update(idx_dict)  
            
    return input_idx

# Input for a table scrapping
input_dict = {
    "subsektor": "Tanaman Pangan",
    "komoditas": "JAGUNG",
    "level": "Kabupaten",
    "prov": "Aceh",
    "kab": "Kab. Simeulue",
    "tahunAwal": 1970,
    "tahunAkhir": 2023,
}
display(validate_all(input_dict))


input_dict = {
    "subsektor": "Hortikultura",
    "komoditas": "AGLAOENEMA",
    "level": "Provinsi",
    "prov": "Aceh",
    "kab": "",
    "tahunAwal": 1970,
    "tahunAkhir": 1980,
}
display(validate_all(input_dict))


input_dict =  {
    "subsektor": "Peternakan",
    "komoditas": "SAPI POTONG",
    "level": "Nasional",
    "prov": "",
    "kab": "",
    "tahunAwal": 2002,
    "tahunAkhir": 2023,
}
display(validate_all(input_dict))

{'subsektor': 0,
 'komoditas': 0,
 'level': 2,
 'prov': 0,
 'kab': 0,
 'tahunAwal': 0,
 'tahunAkhir': 53}

{'subsektor': 3,
 'komoditas': 1,
 'level': 1,
 'prov': 0,
 'tahunAwal': 0,
 'tahunAkhir': 10}

{'subsektor': 2,
 'komoditas': 28,
 'level': 0,
 'tahunAwal': 32,
 'tahunAkhir': 53}

### Dependencies

In [1]:
import os
import time

import pandas as pd

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

HOME_PATH = os.path.expanduser('~')
CHROMEDRIVER_PATH = f"{HOME_PATH}/web-scraping/chromedriver"

URL = 'https://bdsp2.pertanian.go.id/bdsp/id/indikator'

In [2]:
def get_all_indicator_values(driver, indicator_id):
    indicator_list = []
    select_elements = driver.find_elements(By.ID, indicator_id)
    
    for select in select_elements:
        option_elements = select.find_elements(By.TAG_NAME, 'option')
        for idx, option in enumerate(option_elements[1:]):
            indicator_list.append(option.text)
        
    return {indicator_id: indicator_list}

### 1. Scrap all indicators

In [3]:
# set up the Chrome driver
service = Service(CHROMEDRIVER_PATH)
options = webdriver.ChromeOptions()
options.add_argument('--headless')

# run Chrome in headless mode to avoid opening a visible window
driver = webdriver.Chrome(service=service, options=options)

# load the page
driver.get(URL)

Incompatible release of chromedriver (version 107.0.5304.62) detected in PATH: /usr/local/bin/chromedriver


In [4]:
subsektor = driver.find_element(By.XPATH, '//*[@id="subsektor"]/option[2]')
subsektor.click()

print(f"Selected: {subsektor.text}")
get_all_indicator_values(driver, 'subsektor')

Selected: Tanaman Pangan


{'subsektor': ['Tanaman Pangan', 'Perkebunan', 'Peternakan', 'Hortikultura']}

In [5]:
komoditas = driver.find_element(By.XPATH, '//*[@id="komoditas"]/option[2]')
komoditas.click()

print(komoditas.text)
get_all_indicator_values(driver, 'komoditas')

JAGUNG


{'komoditas': ['JAGUNG',
  'KACANG HIJAU',
  'KACANG TANAH',
  'KEDELAI',
  'PADI',
  'PADI LADANG',
  'PADI SAWAH',
  'UBIJALAR',
  'UBIKAYU / KETELA POHON']}

In [6]:
level = driver.find_element(By.XPATH, '//*[@id="level"]/option[4]')
level.click()

print(level.text)
get_all_indicator_values(driver, 'level')

Kabupaten


{'level': ['Nasional', 'Provinsi', 'Kabupaten']}

In [7]:
if level.text != 'Nasional':
    print(f'Level: {level.text}')
    prov = driver.find_element(By.XPATH, '//*[@id="prov"]/option[9]')
    prov.click()

    print(prov.text)
    print(get_all_indicator_values(driver, 'prov'))
else:
    print(f'Level: {level.text}\nNo need to input prov')
    

Level: Kabupaten
Lampung
{'prov': ['Aceh', 'Sumatera Utara', 'Sumatera Barat', 'Riau', 'Jambi', 'Sumatera Selatan', 'Bengkulu', 'Lampung', 'Kepulauan Bangka Belitung', 'Kepulauan Riau', 'Daerah Khusus Ibukota Jakarta', 'Jawa Barat', 'Jawa Tengah', 'Daerah Istimewa Yogyakarta', 'Jawa Timur', 'Banten', 'Bali', 'Nusa Tenggara Barat', 'Nusa Tenggara Timur', 'Kalimantan Barat', 'Kalimantan Tengah', 'Kalimantan Selatan', 'Kalimantan Timur', 'Kalimantan Utara', 'Sulawesi Utara', 'Sulawesi Tengah', 'Sulawesi Selatan', 'Sulawesi Tenggara', 'Gorontalo', 'Sulawesi Barat', 'Maluku', 'Maluku Utara', 'Papua Barat', 'Papua']}


In [8]:
if level.text == 'Kabupaten':
    kab = driver.find_element(By.XPATH, '//*[@id="kab"]/option[14]')
    kab.click()
    print(kab.text)
    print(get_all_indicator_values(driver, 'kab'))

else:
    print('No need to input kab')

Kab. Pesisir Barat
{'kab': ['Kab. Lampung Barat', 'Kab. Tanggamus', 'Kab. Lampung Selatan', 'Kab. Lampung Timur', 'Kab. Lampung Tengah', 'Kab. Lampung Utara', 'Kab. Way Kanan', 'Kab. Tulang Bawang', 'Kab. Pesawaran', 'Kab. Pringsewu', 'Kab. Mesuji', 'Kab. Tulang Bawang Barat', 'Kab. Pesisir Barat', 'Kota Bandar Lampung', 'Kota Metro']}


In [9]:
tahunAwal = driver.find_element(By.XPATH, '//*[@id="tahunAwal"]/option[2]')
tahunAwal.click()

print(tahunAwal.text)
print(get_all_indicator_values(driver, 'tahunAwal'))

1970
{'tahunAwal': ['1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023']}


In [10]:
tahunAkhir = driver.find_element(By.XPATH, '//*[@id="tahunAkhir"]/option[55]')
tahunAkhir.click()

print(tahunAkhir.text)
print(get_all_indicator_values(driver, 'tahunAkhir'))

2023
{'tahunAkhir': ['1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023']}


In [11]:
indicator_name_list = [
    'subsektor',
    'komoditas',
    'level',
    'prov',
    'kab',
    'tahunAwal',
    'tahunAkhir',
]

dict_indicator = {}
for indicator in indicator_name_list:
    dict_indicator.update(get_all_indicator_values(driver, indicator))
    

print(dict_indicator)

{'subsektor': ['Tanaman Pangan', 'Perkebunan', 'Peternakan', 'Hortikultura'], 'komoditas': ['JAGUNG', 'KACANG HIJAU', 'KACANG TANAH', 'KEDELAI', 'PADI', 'PADI LADANG', 'PADI SAWAH', 'UBIJALAR', 'UBIKAYU / KETELA POHON'], 'level': ['Nasional', 'Provinsi', 'Kabupaten'], 'prov': ['Aceh', 'Sumatera Utara', 'Sumatera Barat', 'Riau', 'Jambi', 'Sumatera Selatan', 'Bengkulu', 'Lampung', 'Kepulauan Bangka Belitung', 'Kepulauan Riau', 'Daerah Khusus Ibukota Jakarta', 'Jawa Barat', 'Jawa Tengah', 'Daerah Istimewa Yogyakarta', 'Jawa Timur', 'Banten', 'Bali', 'Nusa Tenggara Barat', 'Nusa Tenggara Timur', 'Kalimantan Barat', 'Kalimantan Tengah', 'Kalimantan Selatan', 'Kalimantan Timur', 'Kalimantan Utara', 'Sulawesi Utara', 'Sulawesi Tengah', 'Sulawesi Selatan', 'Sulawesi Tenggara', 'Gorontalo', 'Sulawesi Barat', 'Maluku', 'Maluku Utara', 'Papua Barat', 'Papua'], 'kab': ['Kab. Lampung Barat', 'Kab. Tanggamus', 'Kab. Lampung Selatan', 'Kab. Lampung Timur', 'Kab. Lampung Tengah', 'Kab. Lampung Utara',

### 2. Scrap a table

In [12]:
searchButton = driver.find_element(By.XPATH, '//*[@id="search"]')
searchButton.click()
time.sleep(1)


# wait for the table to load
table_element = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CLASS_NAME, 'table-responsive')))

print(table_element.text)

Search:
No Indikator Satuan 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023
1 LUAS PANEN Ha 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 4804,00 3254,00 4774,00 6146,00 6050,60 0,00 0,00 0,00 0,00 0,00 0,00
2 PRODUKSI Ton 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 20048,00 13488,00 20214,00 32005,00 32668,49 0,00 0,00 0,00 0,00 0,00 0,00
3 PRODUKTIVITAS Kuintal/Ha 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00 0,00

In [13]:
header_elements = table_element.find_elements(By.TAG_NAME, 'thead')
headers = [header.text.split(" ") for header in header_elements][0]

print(headers)


['No', 'Indikator', 'Satuan', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023']


In [14]:
data = []
row_elements = table_element.find_elements(By.TAG_NAME, 'tr')
for row in row_elements[1:]:
    cell_elements = row.find_elements(By.TAG_NAME, 'td')
    row_data = [cell.text.strip() for cell in cell_elements]
    data.append(row_data)
    
for row in data:
    print(row)

['1', 'LUAS PANEN', 'Ha', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '4804,00', '3254,00', '4774,00', '6146,00', '6050,60', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00']
['2', 'PRODUKSI', 'Ton', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00', '20048,00', '13488,00', '20214,00', '32005,00', '32668,49', '0,00', '0,00', '0,00', '0,00', '0,00', '0,00']
['3', 'PRODUKTIVITAS', 'Kuintal/Ha', '0,00', '0,00

In [28]:
df = pd.DataFrame(data, columns=headers)
df = df[['No', 'Indikator', 'Satuan', '2020', '2021', '2022', '2023']].copy()
df.to_csv('./sample.csv', index=False)

In [16]:
df2 = df.drop(columns=['No'])

df2 = df2.melt(
    id_vars=['Indikator', 'Satuan'],
    var_name="Tahun",
    value_name="Jumlah"
)

print(df2.shape)
df2.sample(5)

(162, 4)


Unnamed: 0,Indikator,Satuan,Tahun,Jumlah
102,LUAS PANEN,Ha,2004,0
124,PRODUKSI,Ton,2011,0
12,LUAS PANEN,Ha,1974,0
33,LUAS PANEN,Ha,1981,0
123,LUAS PANEN,Ha,2011,0


In [17]:
df2['Subsektor'] = subsektor.text
df2['Komoditi'] = komoditas.text
df2['Provinsi'] = prov.text
df2['Kabupaten'] = kab.text

df2

Unnamed: 0,Indikator,Satuan,Tahun,Jumlah,Subsektor,Komoditi,Provinsi,Kabupaten
0,LUAS PANEN,Ha,1970,000,,,,
1,PRODUKSI,Ton,1970,000,,,,
2,PRODUKTIVITAS,Kuintal/Ha,1970,000,,,,
3,LUAS PANEN,Ha,1971,000,,,,
4,PRODUKSI,Ton,1971,000,,,,
...,...,...,...,...,...,...,...,...
157,PRODUKSI,Ton,2022,000,,,,
158,PRODUKTIVITAS,Kuintal/Ha,2022,000,,,,
159,LUAS PANEN,Ha,2023,000,,,,
160,PRODUKSI,Ton,2023,000,,,,


In [18]:
df2.to_csv('../data/DataKeluaranIndikator.csv')

In [19]:
# close the browser
driver.quit()

In [20]:
a_dict = {
    'level': ['Nasional', 'Provinsi', 'Kabupaten'],
    'provinsi': {'Nasional': [], 'Provinsi': ['Aceh', 'Sumatera Utara']}
}

a_dict['provinsi'].keys()

for level in a_dict['level']:
    print(level)

Nasional
Provinsi
Kabupaten


In [21]:
def test(**kwargs):
    for key, value in kwargs.items():
        print(key, value)
        
test(**a_dict)

level ['Nasional', 'Provinsi', 'Kabupaten']
provinsi {'Nasional': [], 'Provinsi': ['Aceh', 'Sumatera Utara']}


In [22]:
a = {'subsektor': {'Tanaman Pangan': 2}, 'level': {'Kabupaten': 4}, 'tahunAwal': {'1970': 2}, 'tahunAkhir': {'2023': 55}, 'kab': {'Kab. Simeulue': 2}, 'komoditas': {'JAGUNG': 2}}


In [23]:
for indicator_id, indicator_name in a.items():
    for indicator_name, indicator_idx in a[indicator_id].items():
        print(indicator_idx)

2
4
2
55
2
2


In [24]:
import numpy as np
import pandas as pd

shape = (162, 4)
df_info = pd.DataFrame(
    np.eye(shape[0], shape[1]),
    columns=['Subsektor', 'Komoditi', 'Provinsi', 'Kabupaten']
)
df_info['Subsektor'] = 'Tanaman Pangan'
df_info

Unnamed: 0,Subsektor,Komoditi,Provinsi,Kabupaten
0,Tanaman Pangan,0.0,0.0,0.0
1,Tanaman Pangan,1.0,0.0,0.0
2,Tanaman Pangan,0.0,1.0,0.0
3,Tanaman Pangan,0.0,0.0,1.0
4,Tanaman Pangan,0.0,0.0,0.0
...,...,...,...,...
157,Tanaman Pangan,0.0,0.0,0.0
158,Tanaman Pangan,0.0,0.0,0.0
159,Tanaman Pangan,0.0,0.0,0.0
160,Tanaman Pangan,0.0,0.0,0.0


In [25]:
import dill

def read_binary_file(path):
    with open(path, 'rb') as f:
        return dill.load(f)
    
a_dict = read_binary_file(path="../data/indicator_input_dict.bin")

a_dict

{'subsektor': ['Tanaman Pangan', 'Perkebunan', 'Peternakan', 'Hortikultura'],
 'level': ['Nasional', 'Provinsi', 'Kabupaten'],
 'tahunAwal': ['1970',
  '1971',
  '1972',
  '1973',
  '1974',
  '1975',
  '1976',
  '1977',
  '1978',
  '1979',
  '1980',
  '1981',
  '1982',
  '1983',
  '1984',
  '1985',
  '1986',
  '1987',
  '1988',
  '1989',
  '1990',
  '1991',
  '1992',
  '1993',
  '1994',
  '1995',
  '1996',
  '1997',
  '1998',
  '1999',
  '2000',
  '2001',
  '2002',
  '2003',
  '2004',
  '2005',
  '2006',
  '2007',
  '2008',
  '2009',
  '2010',
  '2011',
  '2012',
  '2013',
  '2014',
  '2015',
  '2016',
  '2017',
  '2018',
  '2019',
  '2020',
  '2021',
  '2022',
  '2023'],
 'tahunAkhir': ['1970',
  '1971',
  '1972',
  '1973',
  '1974',
  '1975',
  '1976',
  '1977',
  '1978',
  '1979',
  '1980',
  '1981',
  '1982',
  '1983',
  '1984',
  '1985',
  '1986',
  '1987',
  '1988',
  '1989',
  '1990',
  '1991',
  '1992',
  '1993',
  '1994',
  '1995',
  '1996',
  '1997',
  '1998',
  '1999',
  '20

In [26]:
df_info

Unnamed: 0,Subsektor,Komoditi,Provinsi,Kabupaten
0,Tanaman Pangan,0.0,0.0,0.0
1,Tanaman Pangan,1.0,0.0,0.0
2,Tanaman Pangan,0.0,1.0,0.0
3,Tanaman Pangan,0.0,0.0,1.0
4,Tanaman Pangan,0.0,0.0,0.0
...,...,...,...,...
157,Tanaman Pangan,0.0,0.0,0.0
158,Tanaman Pangan,0.0,0.0,0.0
159,Tanaman Pangan,0.0,0.0,0.0
160,Tanaman Pangan,0.0,0.0,0.0
