In [6]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

# ================= KONFIGURASI =================
FILE_SENSOR = "D:/Github/Catatan_Notebook/Analisis_Meteorologi/analisis/meteo_data_id-05.csv"
FILE_REF    = "D:/Github/Catatan_Notebook/Analisis_Meteorologi/open_meteo_data/data_open_meteo.csv"
VARIABEL    = "temperature"
# ===============================================

def hitung_kalibrasi():
    print("üìÇ Sedang membaca file...")
    
    # 1. Load Data
    # Kita baca biasa dulu tanpa parse_dates biar aman
    df_sensor = pd.read_csv(FILE_SENSOR, index_col=0)
    df_ref    = pd.read_csv(FILE_REF, index_col=0)

    # --- PERBAIKAN DI SINI (FORCE TO DATETIME) ---
    # Kita paksa index menjadi format Waktu. 
    # 'coerce' artinya kalau ada data error/sampah, ubah jadi NaT (Not a Time) biar gak crash.
    df_sensor.index = pd.to_datetime(df_sensor.index, utc=True, errors='coerce')
    df_ref.index    = pd.to_datetime(df_ref.index, utc=True, errors='coerce')

    # Hapus baris yang gagal dikonversi jadi waktu (NaT)
    df_sensor = df_sensor.dropna(how='all') 
    df_ref = df_ref.dropna(how='all')

    # Pastikan Zona Waktu Sama (Convert ke WIB / Jakarta)
    # Ini penting biar jam 7 pagi sensor ketemu jam 7 pagi referensi
    if df_sensor.index.tz is not None:
        df_sensor.index = df_sensor.index.tz_convert('Asia/Jakarta')
    if df_ref.index.tz is not None:
        df_ref.index = df_ref.index.tz_convert('Asia/Jakarta')
    
    print("‚úÖ Konversi waktu sukses. Melakukan resampling...")

    # 2. Sinkronisasi Data (Resampling)
    try:
        # Sekarang pasti aman karena indexnya sudah DatetimeIndex
        df_sensor_hourly = df_sensor.resample('1h').mean()
    except TypeError as e:
        print(f"‚ùå Masih Error Resample: {e}")
        return

    # Gabungkan (Inner Join)
    df_merged = pd.concat([df_sensor_hourly[VARIABEL], df_ref[VARIABEL]], axis=1, join='inner')
    df_merged.columns = ['Raw_Sensor', 'Reference']
    
    # Hapus data kosong
    df_merged = df_merged.dropna()

    if len(df_merged) < 10:
        print(f"‚ùå Data terlalu sedikit ({len(df_merged)}) untuk kalibrasi! Butuh minimal 10 jam yang cocok.")
        print("üí° Tips: Cek apakah rentang tanggal kedua file CSV tersebut beririsan?")
        return

    # 3. Hitung Korelasi & Error
    korelasi = df_merged['Raw_Sensor'].corr(df_merged['Reference'])
    mae_raw = np.mean(np.abs(df_merged['Raw_Sensor'] - df_merged['Reference']))
    
    print(f"\nüìä Analisis Awal ({VARIABEL}):")
    print(f"   - Jumlah Data Match : {len(df_merged)} jam")
    print(f"   - Korelasi (r)      : {korelasi:.4f}")
    print(f"   - Rata-rata Error   : {mae_raw:.4f}")

    # 4. Regresi Linear
    X = df_merged['Raw_Sensor'].values.reshape(-1, 1)
    y = df_merged['Reference'].values.reshape(-1, 1)

    model = LinearRegression()
    model.fit(X, y)

    slope = model.coef_[0][0]
    intercept = model.intercept_[0]

    print("\n‚úÖ RUMUS KALIBRASI DITEMUKAN!")
    print("------------------------------------------------")
    print(f"   Slope (m)     : {slope:.5f}")
    print(f"   Intercept (c) : {intercept:.5f}")
    print("------------------------------------------------")
    
    if abs(slope - 1.0) < 0.05:
        print(f"üëâ Rumus: {VARIABEL}_cal = {VARIABEL}_raw + ({intercept:.2f})")
    else:
        print(f"üëâ Rumus: {VARIABEL}_cal = ({VARIABEL}_raw * {slope:.4f}) + {intercept:.4f}")

    # 5. Visualisasi
    df_merged['Calibrated'] = (df_merged['Raw_Sensor'] * slope) + intercept
    
    plt.figure(figsize=(10, 5))
    plt.scatter(df_merged['Raw_Sensor'], df_merged['Reference'], color='gray', alpha=0.5, label='Data Mentah')
    plt.plot(df_merged['Raw_Sensor'], df_merged['Calibrated'], color='red', linewidth=2, label='Garis Kalibrasi')
    plt.xlabel(f"Nilai Sensor Asli ({VARIABEL})")
    plt.ylabel(f"Nilai Reference ({VARIABEL})")
    plt.title(f"Kurva Kalibrasi {VARIABEL}")
    plt.legend()
    plt.grid(True, linestyle='--')
    plt.show()

# Jalankan
if __name__ == "__main__":
    hitung_kalibrasi()

üìÇ Sedang membaca file...
‚úÖ Konversi waktu sukses. Melakukan resampling...
‚ùå Data terlalu sedikit (0) untuk kalibrasi! Butuh minimal 10 jam yang cocok.
üí° Tips: Cek apakah rentang tanggal kedua file CSV tersebut beririsan?
