In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor
import pyarrow

def get_data(tanggal):
    url = "https://siskaperbapo.jatimprov.go.id/harga/tabel.nodesign/"
    payload = {
        "tanggal": tanggal,
        "kabkota": "pasuruankab",
        "pasar": ""
    }

    response = requests.post(url, data=payload)
    
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data for date: {tanggal}")
        return None

def parse_data(html_content, date):
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table')

    headers = []
    rows = []

    for th in table.find_all('th'):
        headers.append(th.text.strip())
        
    # Tambahkan baris tanggal sebelum baris "01 BERAS"
    rows.append(["Tanggal", date.strftime("%Y-%m-%d"), "", "", "", "", ""])
    rows.append(["01", "BERAS", "", "", "", "", ""])

    for tr in table.find_all('tr')[1:]:
        row = []
        for td in tr.find_all('td'):
            row.append(td.text.strip())
        rows.append(row)

    return headers, rows

def create_dataframe(headers, rows):
    df = pd.DataFrame(rows, columns=headers)
    return df

def process_date(date):
    html_content = get_data(date.strftime("%Y-%m-%d"))
    if html_content:
        headers, rows = parse_data(html_content, date)
        df = create_dataframe(headers, rows)
        return df

start_date = datetime(2023, 4, 24)
end_date = datetime(2023, 5, 24)

all_data = pd.DataFrame()
with ThreadPoolExecutor() as executor:
    futures = []
    for current_date in [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]:
        futures.append(executor.submit(process_date, current_date))
    
    for future in futures:
        df = future.result()
        if df is not None:
            all_data = pd.concat([all_data, df])

all_data.reset_index(drop=True, inplace=True)
all_data.index += 1
all_data.index.name = "No"

start_date_formatted = start_date.strftime('%Y-%m-%d')
end_date_formatted = end_date.strftime('%Y-%m-%d')
print(f"Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal {start_date_formatted} s/d {end_date_formatted}")
print("Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan")
print()
print(all_data)

output_file = "hasil_pasuruan_24april-24mei.parquet"
all_data.to_parquet(output_file)


Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal 2023-04-24 s/d 2023-05-24
Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan

           NO           NAMA BAHAN POKOK SATUAN HARGA KEMARIN HARGA SEKARANG  \
No                                                                             
1     Tanggal                 2023-04-24                                       
2          01                      BERAS                                       
3                        - Beras Premium     kg        12.733         12.733   
4                         - Beras Medium     kg        10.150         10.150   
5          02                       GULA                                       
...       ...                        ...    ...           ...            ...   
2724             - Pupuk KCL Non Subsidi     Kg         4.833          4.833   
2725             - Pupuk NPK Non Subsidi     Kg         7.000          7.000   
2726           - Pupuk SP 35 Non Subsidi     Kg         4.1

In [8]:
def get_data(tanggal):
    url = "https://siskaperbapo.jatimprov.go.id/harga/tabel.nodesign/"
    payload = {
        "tanggal": tanggal,
        "kabkota": "pasuruankab",
        "pasar": ""
    }

    response = requests.post(url, data=payload)
    
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data for date: {tanggal}")
        return None

def parse_data(html_content, date):
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table')

    headers = []
    rows = []

    for th in table.find_all('th'):
        headers.append(th.text.strip())
        
    # Tambahkan baris tanggal sebelum baris "01 BERAS"
    rows.append(["Tanggal", date.strftime("%Y-%m-%d"), "", "", "", "", ""])
    rows.append(["01", "BERAS", "", "", "", "", ""])

    for tr in table.find_all('tr')[1:]:
        row = []
        for td in tr.find_all('td'):
            row.append(td.text.strip())
        rows.append(row)

    return headers, rows

def create_dataframe(headers, rows):
    df = pd.DataFrame(rows, columns=headers)
    return df

def process_date(date):
    html_content = get_data(date.strftime("%Y-%m-%d"))
    if html_content:
        headers, rows = parse_data(html_content, date)
        df = create_dataframe(headers, rows)
        return df

start_date = datetime(2023, 5, 25)
end_date = datetime(2023, 6, 25)

all_data = pd.DataFrame()
with ThreadPoolExecutor() as executor:
    futures = []
    for current_date in [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]:
        futures.append(executor.submit(process_date, current_date))
    
    for future in futures:
        df = future.result()
        if df is not None:
            all_data = pd.concat([all_data, df])

all_data.reset_index(drop=True, inplace=True)
all_data.index += 1
all_data.index.name = "No"

start_date_formatted = start_date.strftime('%Y-%m-%d')
end_date_formatted = end_date.strftime('%Y-%m-%d')
print(f"Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal {start_date_formatted} s/d {end_date_formatted}")
print("Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan")
print()
print(all_data)

output_file = "hasil_pasuruan_25mei-25juni.parquet"
all_data.to_parquet(output_file)

Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal 2023-05-25 s/d 2023-06-25
Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan

           NO           NAMA BAHAN POKOK SATUAN HARGA KEMARIN HARGA SEKARANG  \
No                                                                             
1     Tanggal                 2023-05-25                                       
2          01                      BERAS                                       
3                        - Beras Premium     kg        12.733         12.733   
4                         - Beras Medium     kg         9.983          9.983   
5          02                       GULA                                       
...       ...                        ...    ...           ...            ...   
2812             - Pupuk KCL Non Subsidi     Kg         4.833          4.833   
2813             - Pupuk NPK Non Subsidi     Kg         7.000          7.000   
2814           - Pupuk SP 35 Non Subsidi     Kg         4.1

In [5]:
def get_data(tanggal):
    url = "https://siskaperbapo.jatimprov.go.id/harga/tabel.nodesign/"
    payload = {
        "tanggal": tanggal,
        "kabkota": "pasuruankab",
        "pasar": ""
    }

    response = requests.post(url, data=payload)
    
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data for date: {tanggal}")
        return None

def parse_data(html_content, date):
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table')

    headers = []
    rows = []

    for th in table.find_all('th'):
        headers.append(th.text.strip())
        
    # Tambahkan baris tanggal sebelum baris "01 BERAS"
    rows.append(["Tanggal", date.strftime("%Y-%m-%d"), "", "", "", "", ""])
    rows.append(["01", "BERAS", "", "", "", "", ""])

    for tr in table.find_all('tr')[1:]:
        row = []
        for td in tr.find_all('td'):
            row.append(td.text.strip())
        rows.append(row)

    return headers, rows

def create_dataframe(headers, rows):
    df = pd.DataFrame(rows, columns=headers)
    return df

def process_date(date):
    html_content = get_data(date.strftime("%Y-%m-%d"))
    if html_content:
        headers, rows = parse_data(html_content, date)
        df = create_dataframe(headers, rows)
        return df

start_date = datetime(2023, 6, 26)
end_date = datetime(2023, 7, 26)

all_data = pd.DataFrame()
with ThreadPoolExecutor() as executor:
    futures = []
    for current_date in [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]:
        futures.append(executor.submit(process_date, current_date))
    
    for future in futures:
        df = future.result()
        if df is not None:
            all_data = pd.concat([all_data, df])

all_data.reset_index(drop=True, inplace=True)
all_data.index += 1
all_data.index.name = "No"

start_date_formatted = start_date.strftime('%Y-%m-%d')
end_date_formatted = end_date.strftime('%Y-%m-%d')
print(f"Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal {start_date_formatted} s/d {end_date_formatted}")
print("Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan")
print()
print(all_data)

output_file = "hasil_pasuruan_26juni-26juli.parquet"
all_data.to_parquet(output_file)

Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal 2023-06-26 s/d 2023-07-26
Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan

           NO           NAMA BAHAN POKOK SATUAN HARGA KEMARIN HARGA SEKARANG  \
No                                                                             
1     Tanggal                 2023-06-26                                       
2          01                      BERAS                                       
3                        - Beras Premium     kg        12.733         12.733   
4                         - Beras Medium     kg        10.067         10.067   
5          02                       GULA                                       
...       ...                        ...    ...           ...            ...   
2724             - Pupuk KCL Non Subsidi     Kg         4.833          4.833   
2725             - Pupuk NPK Non Subsidi     Kg         7.000          7.000   
2726           - Pupuk SP 35 Non Subsidi     Kg         4.1

In [6]:
def get_data(tanggal):
    url = "https://siskaperbapo.jatimprov.go.id/harga/tabel.nodesign/"
    payload = {
        "tanggal": tanggal,
        "kabkota": "pasuruankab",
        "pasar": ""
    }

    response = requests.post(url, data=payload)
    
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data for date: {tanggal}")
        return None

def parse_data(html_content, date):
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table')

    headers = []
    rows = []

    for th in table.find_all('th'):
        headers.append(th.text.strip())
        
    # Tambahkan baris tanggal sebelum baris "01 BERAS"
    rows.append(["Tanggal", date.strftime("%Y-%m-%d"), "", "", "", "", ""])
    rows.append(["01", "BERAS", "", "", "", "", ""])

    for tr in table.find_all('tr')[1:]:
        row = []
        for td in tr.find_all('td'):
            row.append(td.text.strip())
        rows.append(row)

    return headers, rows

def create_dataframe(headers, rows):
    df = pd.DataFrame(rows, columns=headers)
    return df

def process_date(date):
    html_content = get_data(date.strftime("%Y-%m-%d"))
    if html_content:
        headers, rows = parse_data(html_content, date)
        df = create_dataframe(headers, rows)
        return df

start_date = datetime(2023, 7, 27)
end_date = datetime(2023, 8, 27)

all_data = pd.DataFrame()
with ThreadPoolExecutor() as executor:
    futures = []
    for current_date in [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]:
        futures.append(executor.submit(process_date, current_date))
    
    for future in futures:
        df = future.result()
        if df is not None:
            all_data = pd.concat([all_data, df])

all_data.reset_index(drop=True, inplace=True)
all_data.index += 1
all_data.index.name = "No"

start_date_formatted = start_date.strftime('%Y-%m-%d')
end_date_formatted = end_date.strftime('%Y-%m-%d')
print(f"Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal {start_date_formatted} s/d {end_date_formatted}")
print("Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan")
print()
print(all_data)

output_file = "hasil_pasuruan_27juli-27agustus.parquet"
all_data.to_parquet(output_file)


Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal 2023-07-27 s/d 2023-08-27
Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan

           NO           NAMA BAHAN POKOK SATUAN HARGA KEMARIN HARGA SEKARANG  \
No                                                                             
1     Tanggal                 2023-07-27                                       
2          01                      BERAS                                       
3                        - Beras Premium     kg        12.733         12.733   
4                         - Beras Medium     kg        10.067         10.067   
5          02                       GULA                                       
...       ...                        ...    ...           ...            ...   
2812             - Pupuk KCL Non Subsidi     Kg         4.833          4.833   
2813             - Pupuk NPK Non Subsidi     Kg         7.000          7.000   
2814           - Pupuk SP 35 Non Subsidi     Kg         4.1

In [7]:
def get_data(tanggal):
    url = "https://siskaperbapo.jatimprov.go.id/harga/tabel.nodesign/"
    payload = {
        "tanggal": tanggal,
        "kabkota": "pasuruankab",
        "pasar": ""
    }

    response = requests.post(url, data=payload)
    
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data for date: {tanggal}")
        return None

def parse_data(html_content, date):
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table')

    headers = []
    rows = []

    for th in table.find_all('th'):
        headers.append(th.text.strip())
        
    # Tambahkan baris tanggal sebelum baris "01 BERAS"
    rows.append(["Tanggal", date.strftime("%Y-%m-%d"), "", "", "", "", ""])
    rows.append(["01", "BERAS", "", "", "", "", ""])

    for tr in table.find_all('tr')[1:]:
        row = []
        for td in tr.find_all('td'):
            row.append(td.text.strip())
        rows.append(row)

    return headers, rows

def create_dataframe(headers, rows):
    df = pd.DataFrame(rows, columns=headers)
    return df

def process_date(date):
    html_content = get_data(date.strftime("%Y-%m-%d"))
    if html_content:
        headers, rows = parse_data(html_content, date)
        df = create_dataframe(headers, rows)
        return df

start_date = datetime(2023, 8, 28)
end_date = datetime(2023, 9, 28)

all_data = pd.DataFrame()
with ThreadPoolExecutor() as executor:
    futures = []
    for current_date in [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]:
        futures.append(executor.submit(process_date, current_date))
    
    for future in futures:
        df = future.result()
        if df is not None:
            all_data = pd.concat([all_data, df])

all_data.reset_index(drop=True, inplace=True)
all_data.index += 1
all_data.index.name = "No"

start_date_formatted = start_date.strftime('%Y-%m-%d')
end_date_formatted = end_date.strftime('%Y-%m-%d')
print(f"Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal {start_date_formatted} s/d {end_date_formatted}")
print("Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan")
print()
print(all_data)

output_file = "hasil_pasuruan_28agustus-28september.parquet"
all_data.to_parquet(output_file)


Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal 2023-08-28 s/d 2023-09-28
Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan

           NO           NAMA BAHAN POKOK SATUAN HARGA KEMARIN HARGA SEKARANG  \
No                                                                             
1     Tanggal                 2023-08-28                                       
2          01                      BERAS                                       
3                        - Beras Premium     kg        13.167         13.167   
4                         - Beras Medium     kg        11.267         11.333   
5          02                       GULA                                       
...       ...                        ...    ...           ...            ...   
2812             - Pupuk KCL Non Subsidi     Kg         4.833          4.833   
2813             - Pupuk NPK Non Subsidi     Kg         7.000          7.000   
2814           - Pupuk SP 35 Non Subsidi     Kg         4.1

In [9]:
def get_data(tanggal):
    url = "https://siskaperbapo.jatimprov.go.id/harga/tabel.nodesign/"
    payload = {
        "tanggal": tanggal,
        "kabkota": "pasuruankab",
        "pasar": ""
    }

    response = requests.post(url, data=payload)
    
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data for date: {tanggal}")
        return None

def parse_data(html_content, date):
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table')

    headers = []
    rows = []

    for th in table.find_all('th'):
        headers.append(th.text.strip())
        
    # Tambahkan baris tanggal sebelum baris "01 BERAS"
    rows.append(["Tanggal", date.strftime("%Y-%m-%d"), "", "", "", "", ""])
    rows.append(["01", "BERAS", "", "", "", "", ""])

    for tr in table.find_all('tr')[1:]:
        row = []
        for td in tr.find_all('td'):
            row.append(td.text.strip())
        rows.append(row)

    return headers, rows

def create_dataframe(headers, rows):
    df = pd.DataFrame(rows, columns=headers)
    return df

def process_date(date):
    html_content = get_data(date.strftime("%Y-%m-%d"))
    if html_content:
        headers, rows = parse_data(html_content, date)
        df = create_dataframe(headers, rows)
        return df

start_date = datetime(2023, 9, 29)
end_date = datetime(2023, 10, 29)

all_data = pd.DataFrame()
with ThreadPoolExecutor() as executor:
    futures = []
    for current_date in [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]:
        futures.append(executor.submit(process_date, current_date))
    
    for future in futures:
        df = future.result()
        if df is not None:
            all_data = pd.concat([all_data, df])

all_data.reset_index(drop=True, inplace=True)
all_data.index += 1
all_data.index.name = "No"

start_date_formatted = start_date.strftime('%Y-%m-%d')
end_date_formatted = end_date.strftime('%Y-%m-%d')
print(f"Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal {start_date_formatted} s/d {end_date_formatted}")
print("Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan")
print()
print(all_data)

output_file = "hasil_pasuruan_29september-29oktober.parquet"
all_data.to_parquet(output_file)


Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal 2023-09-29 s/d 2023-10-29
Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan

           NO           NAMA BAHAN POKOK SATUAN HARGA KEMARIN HARGA SEKARANG  \
No                                                                             
1     Tanggal                 2023-09-29                                       
2          01                      BERAS                                       
3                        - Beras Premium     kg        13.500         13.500   
4                         - Beras Medium     kg        11.567         11.567   
5          02                       GULA                                       
...       ...                        ...    ...           ...            ...   
2724             - Pupuk KCL Non Subsidi     Kg         4.833          4.833   
2725             - Pupuk NPK Non Subsidi     Kg         7.000          7.000   
2726           - Pupuk SP 35 Non Subsidi     Kg         4.1

In [10]:
def get_data(tanggal):
    url = "https://siskaperbapo.jatimprov.go.id/harga/tabel.nodesign/"
    payload = {
        "tanggal": tanggal,
        "kabkota": "pasuruankab",
        "pasar": ""
    }

    response = requests.post(url, data=payload)
    
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data for date: {tanggal}")
        return None

def parse_data(html_content, date):
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table')

    headers = []
    rows = []

    for th in table.find_all('th'):
        headers.append(th.text.strip())
        
    # Tambahkan baris tanggal sebelum baris "01 BERAS"
    rows.append(["Tanggal", date.strftime("%Y-%m-%d"), "", "", "", "", ""])
    rows.append(["01", "BERAS", "", "", "", "", ""])

    for tr in table.find_all('tr')[1:]:
        row = []
        for td in tr.find_all('td'):
            row.append(td.text.strip())
        rows.append(row)

    return headers, rows

def create_dataframe(headers, rows):
    df = pd.DataFrame(rows, columns=headers)
    return df

def process_date(date):
    html_content = get_data(date.strftime("%Y-%m-%d"))
    if html_content:
        headers, rows = parse_data(html_content, date)
        df = create_dataframe(headers, rows)
        return df

start_date = datetime(2023, 10, 30)
end_date = datetime(2023, 11, 30)

all_data = pd.DataFrame()
with ThreadPoolExecutor() as executor:
    futures = []
    for current_date in [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]:
        futures.append(executor.submit(process_date, current_date))
    
    for future in futures:
        df = future.result()
        if df is not None:
            all_data = pd.concat([all_data, df])

all_data.reset_index(drop=True, inplace=True)
all_data.index += 1
all_data.index.name = "No"

start_date_formatted = start_date.strftime('%Y-%m-%d')
end_date_formatted = end_date.strftime('%Y-%m-%d')
print(f"Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal {start_date_formatted} s/d {end_date_formatted}")
print("Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan")
print()
print(all_data)

output_file = "hasil_pasuruan_30oktober-30november.parquet"
all_data.to_parquet(output_file)

Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal 2023-10-30 s/d 2023-11-30
Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan

           NO           NAMA BAHAN POKOK SATUAN HARGA KEMARIN HARGA SEKARANG  \
No                                                                             
1     Tanggal                 2023-10-30                                       
2          01                      BERAS                                       
3                        - Beras Premium     kg        13.567         13.567   
4                         - Beras Medium     kg        11.000         11.000   
5          02                       GULA                                       
...       ...                        ...    ...           ...            ...   
2812             - Pupuk KCL Non Subsidi     Kg         5.000          5.000   
2813             - Pupuk NPK Non Subsidi     Kg         7.333          7.333   
2814           - Pupuk SP 35 Non Subsidi     Kg         4.1

In [11]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor

def get_data(tanggal):
    url = "https://siskaperbapo.jatimprov.go.id/harga/tabel.nodesign/"
    payload = {
        "tanggal": tanggal,
        "kabkota": "pasuruankab",
        "pasar": ""
    }

    response = requests.post(url, data=payload)
    
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data for date: {tanggal}")
        return None

def parse_data(html_content, date):
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table')

    headers = []
    rows = []

    for th in table.find_all('th'):
        headers.append(th.text.strip())
        
    # Tambahkan baris tanggal sebelum baris "01 BERAS"
    rows.append(["Tanggal", date.strftime("%Y-%m-%d"), "", "", "", "", ""])
    rows.append(["01", "BERAS", "", "", "", "", ""])

    for tr in table.find_all('tr')[1:]:
        row = []
        for td in tr.find_all('td'):
            row.append(td.text.strip())
        rows.append(row)

    return headers, rows

def create_dataframe(headers, rows):
    df = pd.DataFrame(rows, columns=headers)
    return df

def process_date(date):
    html_content = get_data(date.strftime("%Y-%m-%d"))
    if html_content:
        headers, rows = parse_data(html_content, date)
        df = create_dataframe(headers, rows)
        return df

start_date = datetime(2023, 12, 1)
end_date = datetime(2023, 12, 31)

all_data = pd.DataFrame()
with ThreadPoolExecutor() as executor:
    futures = []
    for current_date in [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]:
        futures.append(executor.submit(process_date, current_date))
    
    for future in futures:
        df = future.result()
        if df is not None:
            all_data = pd.concat([all_data, df])

all_data.reset_index(drop=True, inplace=True)
all_data.index += 1
all_data.index.name = "No"

start_date_formatted = start_date.strftime('%Y-%m-%d')
end_date_formatted = end_date.strftime('%Y-%m-%d')
print(f"Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal {start_date_formatted} s/d {end_date_formatted}")
print("Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan")
print()
print(all_data)

output_file = "hasil_pasuruan_1desember-31desember.parquet"
all_data.to_parquet(output_file)

Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal 2023-12-01 s/d 2023-12-31
Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan

           NO           NAMA BAHAN POKOK SATUAN HARGA KEMARIN HARGA SEKARANG  \
No                                                                             
1     Tanggal                 2023-12-01                                       
2          01                      BERAS                                       
3                        - Beras Premium     kg        13.400         13.400   
4                         - Beras Medium     kg        10.933         10.933   
5          02                       GULA                                       
...       ...                        ...    ...           ...            ...   
2724             - Pupuk KCL Non Subsidi     Kg         5.667          5.667   
2725             - Pupuk NPK Non Subsidi     Kg         7.833          7.833   
2726           - Pupuk SP 35 Non Subsidi     Kg         4.6

In [14]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor

def get_data(tanggal):
    url = "https://siskaperbapo.jatimprov.go.id/harga/tabel.nodesign/"
    payload = {
        "tanggal": tanggal,
        "kabkota": "pasuruankab",
        "pasar": ""
    }

    response = requests.post(url, data=payload)
    
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data for date: {tanggal}")
        return None

def parse_data(html_content, date):
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table')

    headers = []
    rows = []

    for th in table.find_all('th'):
        headers.append(th.text.strip())
        
    # Tambahkan baris tanggal sebelum baris "01 BERAS"
    rows.append(["Tanggal", date.strftime("%Y-%m-%d"), "", "", "", "", ""])
    rows.append(["01", "BERAS", "", "", "", "", ""])

    for tr in table.find_all('tr')[1:]:
        row = []
        for td in tr.find_all('td'):
            row.append(td.text.strip())
        rows.append(row)

    return headers, rows

def create_dataframe(headers, rows):
    df = pd.DataFrame(rows, columns=headers)
    return df

def process_date(date):
    html_content = get_data(date.strftime("%Y-%m-%d"))
    if html_content:
        headers, rows = parse_data(html_content, date)
        df = create_dataframe(headers, rows)
        return df

start_date = datetime(2024,1, 1)
end_date = datetime(2024, 1, 31)

all_data = pd.DataFrame()
with ThreadPoolExecutor() as executor:
    futures = []
    for current_date in [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]:
        futures.append(executor.submit(process_date, current_date))
    
    for future in futures:
        df = future.result()
        if df is not None:
            all_data = pd.concat([all_data, df])

all_data.reset_index(drop=True, inplace=True)
all_data.index += 1
all_data.index.name = "No"

start_date_formatted = start_date.strftime('%Y-%m-%d')
end_date_formatted = end_date.strftime('%Y-%m-%d')
print(f"Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal {start_date_formatted} s/d {end_date_formatted}")
print("Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan")
print()
print(all_data)

output_file = "hasil_pasuruan_1januari24-31januari24.parquet"
all_data.to_parquet(output_file)

Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal 2024-01-01 s/d 2024-01-31
Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan

           NO           NAMA BAHAN POKOK SATUAN HARGA KEMARIN HARGA SEKARANG  \
No                                                                             
1     Tanggal                 2024-01-01                                       
2          01                      BERAS                                       
3                        - Beras Premium     kg        13.400         13.400   
4                         - Beras Medium     kg        10.900         10.900   
5          02                       GULA                                       
...       ...                        ...    ...           ...            ...   
2724             - Pupuk KCL Non Subsidi     Kg         5.667          5.667   
2725             - Pupuk NPK Non Subsidi     Kg         7.833          7.833   
2726           - Pupuk SP 35 Non Subsidi     Kg         4.6

In [15]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor

def get_data(tanggal):
    url = "https://siskaperbapo.jatimprov.go.id/harga/tabel.nodesign/"
    payload = {
        "tanggal": tanggal,
        "kabkota": "pasuruankab",
        "pasar": ""
    }

    response = requests.post(url, data=payload)
    
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data for date: {tanggal}")
        return None

def parse_data(html_content, date):
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table')

    headers = []
    rows = []

    for th in table.find_all('th'):
        headers.append(th.text.strip())
        
    # Tambahkan baris tanggal sebelum baris "01 BERAS"
    rows.append(["Tanggal", date.strftime("%Y-%m-%d"), "", "", "", "", ""])
    rows.append(["01", "BERAS", "", "", "", "", ""])

    for tr in table.find_all('tr')[1:]:
        row = []
        for td in tr.find_all('td'):
            row.append(td.text.strip())
        rows.append(row)

    return headers, rows

def create_dataframe(headers, rows):
    df = pd.DataFrame(rows, columns=headers)
    return df

def process_date(date):
    html_content = get_data(date.strftime("%Y-%m-%d"))
    if html_content:
        headers, rows = parse_data(html_content, date)
        df = create_dataframe(headers, rows)
        return df

start_date = datetime(2024, 2, 1)
end_date = datetime(2024, 2, 29)

all_data = pd.DataFrame()
with ThreadPoolExecutor() as executor:
    futures = []
    for current_date in [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]:
        futures.append(executor.submit(process_date, current_date))
    
    for future in futures:
        df = future.result()
        if df is not None:
            all_data = pd.concat([all_data, df])

all_data.reset_index(drop=True, inplace=True)
all_data.index += 1
all_data.index.name = "No"

start_date_formatted = start_date.strftime('%Y-%m-%d')
end_date_formatted = end_date.strftime('%Y-%m-%d')
print(f"Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal {start_date_formatted} s/d {end_date_formatted}")
print("Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan")
print()
print(all_data)

output_file = "hasil_pasuruan_1februari24-29februari24.parquet"
all_data.to_parquet(output_file)

Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal 2024-02-01 s/d 2024-02-29
Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan

           NO           NAMA BAHAN POKOK SATUAN HARGA KEMARIN HARGA SEKARANG  \
No                                                                             
1     Tanggal                 2024-02-01                                       
2          01                      BERAS                                       
3                        - Beras Premium     kg        13.400         13.400   
4                         - Beras Medium     kg        10.900         10.900   
5          02                       GULA                                       
...       ...                        ...    ...           ...            ...   
2548             - Pupuk KCL Non Subsidi     Kg         6.667          6.667   
2549             - Pupuk NPK Non Subsidi     Kg         8.767          8.767   
2550           - Pupuk SP 35 Non Subsidi     Kg         5.5

In [16]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor

def get_data(tanggal):
    url = "https://siskaperbapo.jatimprov.go.id/harga/tabel.nodesign/"
    payload = {
        "tanggal": tanggal,
        "kabkota": "pasuruankab",
        "pasar": ""
    }

    response = requests.post(url, data=payload)
    
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data for date: {tanggal}")
        return None

def parse_data(html_content, date):
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table')

    headers = []
    rows = []

    for th in table.find_all('th'):
        headers.append(th.text.strip())
        
    # Tambahkan baris tanggal sebelum baris "01 BERAS"
    rows.append(["Tanggal", date.strftime("%Y-%m-%d"), "", "", "", "", ""])
    rows.append(["01", "BERAS", "", "", "", "", ""])

    for tr in table.find_all('tr')[1:]:
        row = []
        for td in tr.find_all('td'):
            row.append(td.text.strip())
        rows.append(row)

    return headers, rows

def create_dataframe(headers, rows):
    df = pd.DataFrame(rows, columns=headers)
    return df

def process_date(date):
    html_content = get_data(date.strftime("%Y-%m-%d"))
    if html_content:
        headers, rows = parse_data(html_content, date)
        df = create_dataframe(headers, rows)
        return df

start_date = datetime(2024, 3, 1)
end_date = datetime(2024, 3, 31)

all_data = pd.DataFrame()
with ThreadPoolExecutor() as executor:
    futures = []
    for current_date in [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]:
        futures.append(executor.submit(process_date, current_date))
    
    for future in futures:
        df = future.result()
        if df is not None:
            all_data = pd.concat([all_data, df])

all_data.reset_index(drop=True, inplace=True)
all_data.index += 1
all_data.index.name = "No"

start_date_formatted = start_date.strftime('%Y-%m-%d')
end_date_formatted = end_date.strftime('%Y-%m-%d')
print(f"Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal {start_date_formatted} s/d {end_date_formatted}")
print("Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan")
print()
print(all_data)

output_file = "hasil_pasuruan_1maret24-31maret24.parquet"
all_data.to_parquet(output_file)

Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal 2024-03-01 s/d 2024-03-31
Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan

           NO           NAMA BAHAN POKOK SATUAN HARGA KEMARIN HARGA SEKARANG  \
No                                                                             
1     Tanggal                 2024-03-01                                       
2          01                      BERAS                                       
3                        - Beras Premium     kg        14.533         14.533   
4                         - Beras Medium     kg        11.267         11.267   
5          02                       GULA                                       
...       ...                        ...    ...           ...            ...   
2724             - Pupuk KCL Non Subsidi     Kg         6.667          6.667   
2725             - Pupuk NPK Non Subsidi     Kg         8.767          8.767   
2726           - Pupuk SP 35 Non Subsidi     Kg         5.5

In [17]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor

def get_data(tanggal):
    url = "https://siskaperbapo.jatimprov.go.id/harga/tabel.nodesign/"
    payload = {
        "tanggal": tanggal,
        "kabkota": "pasuruankab",
        "pasar": ""
    }

    response = requests.post(url, data=payload)
    
    if response.status_code == 200:
        return response.content
    else:
        print(f"Failed to fetch data for date: {tanggal}")
        return None

def parse_data(html_content, date):
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find('table')

    headers = []
    rows = []

    for th in table.find_all('th'):
        headers.append(th.text.strip())
        
    # Tambahkan baris tanggal sebelum baris "01 BERAS"
    rows.append(["Tanggal", date.strftime("%Y-%m-%d"), "", "", "", "", ""])
    rows.append(["01", "BERAS", "", "", "", "", ""])

    for tr in table.find_all('tr')[1:]:
        row = []
        for td in tr.find_all('td'):
            row.append(td.text.strip())
        rows.append(row)

    return headers, rows

def create_dataframe(headers, rows):
    df = pd.DataFrame(rows, columns=headers)
    return df

def process_date(date):
    html_content = get_data(date.strftime("%Y-%m-%d"))
    if html_content:
        headers, rows = parse_data(html_content, date)
        df = create_dataframe(headers, rows)
        return df

start_date = datetime(2024, 4, 1)
end_date = datetime(2024, 4, 24)

all_data = pd.DataFrame()
with ThreadPoolExecutor() as executor:
    futures = []
    for current_date in [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]:
        futures.append(executor.submit(process_date, current_date))
    
    for future in futures:
        df = future.result()
        if df is not None:
            all_data = pd.concat([all_data, df])

all_data.reset_index(drop=True, inplace=True)
all_data.index += 1
all_data.index.name = "No"

start_date_formatted = start_date.strftime('%Y-%m-%d')
end_date_formatted = end_date.strftime('%Y-%m-%d')
print(f"Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal {start_date_formatted} s/d {end_date_formatted}")
print("Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan")
print()
print(all_data)

output_file = "hasil_pasuruan_1april24-24april24.parquet"
all_data.to_parquet(output_file)

Harga Rata-Rata Kabupaten Pasuruan di Tingkat Konsumen Tanggal 2024-04-01 s/d 2024-04-24
Pasar: Pasar Sukorejo, Pasar Bangil, Pasar Pandaan

           NO           NAMA BAHAN POKOK SATUAN HARGA KEMARIN HARGA SEKARANG  \
No                                                                             
1     Tanggal                 2024-04-01                                       
2          01                      BERAS                                       
3                        - Beras Premium     kg        14.500         14.500   
4                         - Beras Medium     kg        11.767         11.767   
5          02                       GULA                                       
...       ...                        ...    ...           ...            ...   
2108             - Pupuk KCL Non Subsidi     Kg         6.667          6.667   
2109             - Pupuk NPK Non Subsidi     Kg         8.767          8.767   
2110           - Pupuk SP 35 Non Subsidi     Kg         5.5