In [1]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
import matplotlib.pyplot as plt
import os
from datetime import datetime, timedelta

def setup_client():
    """
    Menyiapkan client API dengan fitur Cache dan Retry.
    Tujuannya agar koneksi stabil dan hemat kuota.
    """
    # Buat cache bernama '.cache', data disimpan selama 3600 detik (1 jam)
    cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
    
    # Jika gagal connect, coba lagi (retry) sampai 5x
    retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
    
    # Return objek client yang siap pakai
    return openmeteo_requests.Client(session=retry_session)

In [4]:
# ==========================================
# 2. FETCH DATA PER CHUNK
# ==========================================
def fetch_chunk(client, lat, lon, start_date, end_date):
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": lat,
        "longitude": lon,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": [
            "temperature_2m", 
            "relative_humidity_2m",
            "dew_point_2m",
            "rain",
            "wind_speed_10m",
            "wind_direction_10m",
            "surface_pressure",
            "weather_code"
            ],
        "timezone": "Asia/Jakarta"
    }
    
    print(f"   ‚è≥ Mengambil: {start_date} s.d {end_date}...")
    try:
        responses = client.weather_api(url, params=params)
        return process_data(responses[0]) # Langsung olah jadi DataFrame
    except Exception as e:
        print(f"   ‚ùå Gagal pada chunk ini: {e}")
        return None

# ==========================================
# 3. PROCESS DATA (SAMA SEPERTI SEBELUMNYA)
# ==========================================
def process_data(response):
    hourly = response.Hourly()
    
    date_range = pd.date_range(
        start = pd.to_datetime(hourly.Time(), unit="s", utc=True),
        end = pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
        freq = pd.Timedelta(seconds=hourly.Interval()),
        inclusive = "left"
    )

    df = pd.DataFrame(data = {
        "date": date_range,
        "temperature": hourly.Variables(0).ValuesAsNumpy(),
        "humidity": hourly.Variables(1).ValuesAsNumpy(),
        "dewpoint":hourly.Variables(2).ValuesAsNumpy(),
        "rain_mm": hourly.Variables(3).ValuesAsNumpy(),
        "wind_speed": hourly.Variables(4).ValuesAsNumpy(),
        "wind_direction": hourly.Variables(5).ValuesAsNumpy(),
        "pressure": hourly.Variables(6).ValuesAsNumpy(),
        "weather_code": hourly.Variables(7).ValuesAsNumpy()
    })

    df = df.set_index('date')
    df.index = df.index.tz_convert('Asia/Jakarta')
    return df

# ==========================================
# 4. LOGIKA UTAMA: CHUNKING & MERGING
# ==========================================
def fetch_long_period_data(client, lat, lon, start_str, end_str, folder_tujuan, chunk_years=10):
    
    if not os.path.exists(folder_tujuan):
        os.makedirs(folder_tujuan)

    start_date = datetime.strptime(start_str, "%Y-%m-%d")
    final_end_date = datetime.strptime(end_str, "%Y-%m-%d")
    
    all_files = []
    
    current_start = start_date
    
    print(f"üöÄ Memulai Misi Pengambilan Data 75 Tahun ({chunk_years} tahunan)...")

    while current_start < final_end_date:
        # Hitung tanggal akhir chunk ini
        # Misal start 1950, tambah 10 tahun -> 1960. Dikurang 1 hari biar gak overlap.
        current_end = current_start.replace(year=current_start.year + chunk_years) - timedelta(days=1)
        
        # Jika current_end melebihi batas akhir request, pakai batas akhir request
        if current_end > final_end_date:
            current_end = final_end_date
            
        # Format string untuk API
        s_str = current_start.strftime("%Y-%m-%d")
        e_str = current_end.strftime("%Y-%m-%d")
        
        # Nama file sementara
        chunk_filename = os.path.join(folder_tujuan, f"temp_{s_str}_{e_str}.csv")
        
        # Cek apakah file sudah ada? (Resume Capability)
        if os.path.exists(chunk_filename):
            print(f"‚è© Skip: {s_str} - {e_str} (Sudah ada)")
            all_files.append(chunk_filename)
        else:
            # Ambil Data
            df_chunk = fetch_chunk(client, lat, lon, s_str, e_str)
            if df_chunk is not None:
                df_chunk.to_csv(chunk_filename)
                print(f"   ‚úÖ Tersimpan: {chunk_filename}")
                all_files.append(chunk_filename)
            else:
                print("   ‚ö†Ô∏è Chunk ini dilewati karena error.")
        
        # Lanjut ke periode berikutnya
        current_start = current_end + timedelta(days=1)

    print("\nüîó Menggabungkan semua pecahan data...")
    
    # Gabungkan semua CSV jadi satu
    df_list = []
    for f in all_files:
        df = pd.read_csv(f, index_col='date', parse_dates=True)
        df_list.append(df)
        
    if df_list:
        df_final = pd.concat(df_list)
        df_final = df_final.sort_index() # Urutkan waktu biar rapi
        # Hapus duplikat jika ada irisan
        df_final = df_final[~df_final.index.duplicated(keep='first')]
        
        print(f"üéâ SUKSES BESAR! Total Data: {len(df_final)} baris.")
        print(f"   Mulai: {df_final.index.min()}")
        print(f"   Akhir: {df_final.index.max()}")
        return df_final
    else:
        return None

In [7]:
# ==========================================
# EKSEKUSI
# ==========================================
if __name__ == "__main__":
    # Konfigurasi
    LAT = -7.736436737566032, 
    LON = 109.6460550796716
    MULAI = "1950-01-01"
    AKHIR = "2025-12-03"
    
    FOLDER = "open_meteo_climate"
    FILE_FINAL = "kebumen_75tahun_lengkap.csv"

    client = setup_client()
    
    # Jalankan Fetching Bertahap (per 10 tahun)
    df_lengkap = fetch_long_period_data(client, LAT, LON, MULAI, AKHIR, FOLDER, chunk_years=10)
    
    if df_lengkap is not None:
        # Simpan Hasil Akhir
        path_final = os.path.join(FOLDER, FILE_FINAL)
        df_lengkap.to_csv(path_final)
        print(f"üíæ File Gabungan Tersimpan: {path_final}")
        
        # Hapus file temp (Opsional, kalau mau hemat disk)
        # import glob
        # for f in glob.glob(f"{FOLDER}/temp_*.csv"):
        #    os.remove(f)

üöÄ Memulai Misi Pengambilan Data 75 Tahun (10 tahunan)...
‚è© Skip: 1950-01-01 - 1959-12-31 (Sudah ada)
‚è© Skip: 1960-01-01 - 1969-12-31 (Sudah ada)
‚è© Skip: 1970-01-01 - 1979-12-31 (Sudah ada)
‚è© Skip: 1980-01-01 - 1989-12-31 (Sudah ada)
‚è© Skip: 1990-01-01 - 1999-12-31 (Sudah ada)
‚è© Skip: 2000-01-01 - 2009-12-31 (Sudah ada)
‚è© Skip: 2010-01-01 - 2019-12-31 (Sudah ada)
   ‚è≥ Mengambil: 2020-01-01 s.d 2025-12-03...
   ‚úÖ Tersimpan: open_meteo_climate/temp_2020-01-01_2025-12-03.csv

üîó Menggabungkan semua pecahan data...
üéâ SUKSES BESAR! Total Data: 665544 baris.
   Mulai: 1950-01-01 01:00:00+08:00
   Akhir: 2025-12-03 23:00:00+07:00
üíæ File Gabungan Tersimpan: open_meteo_climate/kebumen_75tahun_lengkap.csv


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=672c3bf7-b636-46e8-b7c0-4f6feec29313' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>