# -*- coding: utf-8 -*-
"""
================================================================================
                      SKRIP PERAMALAN SPASIAL-TEMPORAL V3.1 - TAHAP 4

Versi Skrip: 3.1 - Tahap 4 (Optimalisasi Parameter & Struktur Hyperparameter ML)

Deskripsi Umum:
Skrip ini dirancang untuk melakukan peramalan time series spasial pada data NetCDF.
Tujuannya adalah untuk memprediksi nilai variabel target (misalnya, jumlah kejadian)
di setiap piksel grid untuk beberapa langkah waktu ke depan.

Fitur Utama:
1.  Model Peramalan:
    - ARIMA: Menggunakan `pmdarima.auto_arima` untuk pemilihan order (p,d,q) otomatis.
    - LSTM (Long Short-Term Memory): Jaringan Neural Rekuren.
    - ANN (Artificial Neural Network): Jaringan Neural Tiruan standar (Feedforward).
    - RF (Random Forest): Model ensemble berbasis Pohon Keputusan.
2.  Input Data:
    - Data utama dari file NetCDF (.nc).
    - Fitur tambahan dari data titik longsor (Shapefile .shp), dikonversi ke grid.
3.  Fitur Tambahan yang Dihasilkan:
    - Jumlah Kejadian Longsor per Piksel per Bulan.
    - Rata-rata Spasial Tetangga (Spatial Average Lag-1): Nilai rata-rata dari 8 piksel
      tetangga pada langkah waktu sebelumnya (t-1) untuk variabel target.
4.  Evaluasi Model:
    - Dilakukan pada periode data historis terakhir.
    - Metrik: RMSE (Root Mean Squared Error), R2-score.
    - Uji Diagnostik Residual:
        - ARIMA: Uji Ljung-Box pada residual in-sample.
        - LSTM, ANN, RF: Uji Ljung-Box pada residual in-sample (data yang di-scaled).
5.  Peramalan Masa Depan:
    - Menghasilkan peramalan untuk periode masa depan setelah model dilatih
      pada seluruh data historis.
6.  Output:
    - Peta metrik evaluasi (RMSE, R2, Ljung-Box p-value) dalam format GeoTIFF dan PNG.
    - Plot time series sampel untuk perbandingan observasi vs. prediksi.
    - Animasi GIF dari peta peramalan (evaluasi dan masa depan).
    - Data peramalan (evaluasi dan masa depan) dalam format GeoTIFF per langkah waktu.
    - Data peramalan dan metrik evaluasi dalam format NetCDF.
    - Peta orde differencing 'd' yang digunakan oleh ARIMA (dari auto_arima).
7.  Penanganan Spasial:
    - Pengaturan Coordinate Reference System (CRS) target (EPSG:32749 - WGS 84 / UTM Zone 49S).
    - Reprojeksi data longsor ke CRS target.
    - Output geospasial disimpan dengan informasi CRS yang sesuai.
8.  Struktur Hyperparameter (Tahap 4):
    - Fungsi peramalan untuk model ML (LSTM, ANN, RF) dimodifikasi untuk menerima
      kamus hyperparameter, memungkinkan fleksibilitas dalam eksperimen.
    - Set hyperparameter awal didefinisikan dalam konfigurasi.
9.  Lain-lain:
    - Output prediksi dipastikan tidak negatif.
    - Menggunakan pemrosesan paralel (joblib) untuk mempercepat perhitungan per piksel.
    - Logging untuk melacak proses eksekusi.


In [None]:
# --- IMPORTS ---
import os
import logging
import warnings
import optuna
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd
import rasterio
from rasterio import features
from affine import Affine
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from typing import List, Tuple, Optional, Callable, Dict, Any
from joblib import Parallel, delayed
from tqdm import tqdm
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from statsmodels.tsa.arima.model import ARIMA as StatsmodelsARIMA # Alias
from statsmodels.tsa.stattools import adfuller
from statsmodels.stats.diagnostic import acorr_ljungbox
from scipy.ndimage import convolve
import tensorflow as tf
import rioxarray
from pandas.tseries.offsets import DateOffset
import pmdarima as pm

In [None]:
# --- KONFIGURASI ---
BASE_DIR = "D:\\DataPenelitian\\Longsor" # GANTI DENGAN DIREKTORI ANDA
if not os.path.exists(BASE_DIR):
    logging.warning(f"BASE_DIR '{BASE_DIR}' tidak ditemukan. Menggunakan direktori kerja saat ini.")
    BASE_DIR = "."
try:
    os.chdir(BASE_DIR)
    logging.info(f"Direktori kerja diubah ke: {os.getcwd()}")
except FileNotFoundError:
    logging.error(f"Tidak dapat mengubah direktori ke {BASE_DIR}. Pastikan path benar.")
    # raise SystemExit(f"Gagal mengubah direktori ke {BASE_DIR}")

TARGET_VARIABLE = 'COUNT'
FORECAST_STEPS = 3
N_LAG = 24
SAMPLE_COORDS = [(0, 1), (35, 14), (0, 0), (27, 6), (35, 11)]
ADF_P_THRESHOLD = 0.05
MAX_DIFFERENCING = 2
LJUNG_BOX_LAGS_CONFIG = 10
NC_FILE = "nc_20250426_1M.nc"
LANDSLIDE_SHP_FILE = "data_fix/TitikLongsor_Magelang_2025.shp"
LANDSLIDE_DATE_COLUMN = "Date"
TARGET_CRS = "EPSG:32749"

OUTPUT_BASE_DIR_EVAL = "1_Month/forecast_evaluation_1M_spatial_v3.1_tests_tahap4_optim"
OUTPUT_BASE_DIR_FUTURE = "1_Month/forecast_future_1M_spatial_v3.1_tests_tahap4_optim"
OUTPUT_DIR_ACTUAL_AGG = "1_Month/actual_aggregation_1M"

LANDSLIDE_FEATURE_NAME = "LANDSLIDE_COUNT"
SPATIAL_FEATURE_NAME = f"{TARGET_VARIABLE}_SPATIAL_AVG_LAG1"

HYPERPARAMS_LSTM = {
    "lstm_units": 32, "lstm_activation": 'relu', "learning_rate": 0.01,
    "epochs": 50, "batch_size_val": None
}
HYPERPARAMS_ANN = {
    "ann_layers_units": [64, 32], "ann_activation": 'relu', "learning_rate": 0.01,
    "epochs": 50, "batch_size_val": None
}
HYPERPARAMS_RF = {
    "n_estimators": 100, "rf_random_state": 42, "max_depth": None,
    "min_samples_split": 2, "min_samples_leaf": 1
}

In [None]:
# --- FUNGSI PEMROSESAN DATA ---
def process_landslide_data(
    shp_path: str, date_column: str, ds_template: xr.Dataset, feature_name: str = "LANDSLIDE_COUNT"
) -> Optional[xr.DataArray]:
    logging.info(f"--- Processing Landslide Data from: {shp_path} ---")
    try:
        landslide_gdf = gpd.read_file(shp_path)
        logging.info(f"Loaded {len(landslide_gdf)} landslide points from {os.path.basename(shp_path)}.")
        if date_column not in landslide_gdf.columns:
            logging.error(f"❌ Date column '{date_column}' not found in Shapefile.")
            return None
        try:
            landslide_gdf[date_column] = pd.to_datetime(landslide_gdf[date_column], errors='coerce')
            original_count = len(landslide_gdf)
            landslide_gdf = landslide_gdf.dropna(subset=[date_column])
            if len(landslide_gdf) < original_count:
                logging.warning(f"Removed {original_count - len(landslide_gdf)} points with invalid dates.")
            if landslide_gdf.empty:
                 logging.warning("No valid landslide points remaining after date processing. Returning zero array.")
                 return xr.DataArray(
                    np.zeros((len(ds_template['time']), ds_template.dims['y'], ds_template.dims['x']), dtype=np.int32),
                    coords=ds_template.coords, dims=['time', 'y', 'x'], name=feature_name,
                    attrs={'long_name': 'Monthly landslide event count', 'units': 'count'}
                 )
        except Exception as date_err:
            logging.error(f"❌ Error parsing date column '{date_column}': {date_err}")
            return None
        ds_template_crs = None
        try:
            if hasattr(ds_template, 'rio') and ds_template.rio.crs:
                ds_template_crs = ds_template.rio.crs
                logging.info(f"Template NetCDF CRS: {ds_template_crs}")
            else:
                logging.warning("Template NetCDF does not have CRS information via rio accessor. Landslide data will not be reprojected unless TARGET_CRS is used.")
                if TARGET_CRS:
                    ds_template_crs = rasterio.crs.CRS.from_string(TARGET_CRS)
                    logging.info(f"Using globally defined TARGET_CRS for template: {ds_template_crs}")
        except Exception as crs_read_err:
             logging.warning(f"Could not read CRS from NetCDF template: {crs_read_err}. Landslide data will not be reprojected unless TARGET_CRS is used.")
             if TARGET_CRS:
                ds_template_crs = rasterio.crs.CRS.from_string(TARGET_CRS)
                logging.info(f"Using globally defined TARGET_CRS for template due to error: {ds_template_crs}")
        shp_crs = landslide_gdf.crs
        logging.info(f"Landslide Shapefile CRS: {shp_crs}")
        if ds_template_crs and shp_crs and shp_crs != ds_template_crs:
            logging.info(f"Reprojecting landslide data from {shp_crs} to {ds_template_crs}...")
            try:
                landslide_gdf = landslide_gdf.to_crs(ds_template_crs)
                logging.info(f"Landslide data reprojected successfully to {landslide_gdf.crs}.")
            except Exception as crs_err:
                logging.error(f"❌ Failed to reproject landslide data: {crs_err}. Proceeding without reprojection.")
        elif not ds_template_crs: logging.info("Skipping landslide reprojection as NetCDF CRS is not set/available.")
        elif not shp_crs: logging.info("Skipping landslide reprojection as Shapefile CRS is unknown.")
        elif shp_crs == ds_template_crs: logging.info("Landslide data CRS matches template CRS. No reprojection needed.")

        ny, nx = ds_template.dims['y'], ds_template.dims['x']
        grid_shape = (ny, nx)
        transform = ds_template.rio.transform()
        if transform.is_identity and ds_template_crs:
            logging.warning("NetCDF transform is identity despite having CRS. Rasterization might be incorrect if coordinates are not pixel indices.")
            if 'x' in ds_template.coords and 'y' in ds_template.coords:
                 try:
                     x_coords_val = ds_template['x'].values; y_coords_val = ds_template['y'].values
                     x_res = (x_coords_val[-1] - x_coords_val[0]) / (nx -1) if nx > 1 else 1
                     y_res_calc = (y_coords_val[-1] - y_coords_val[0]) / (ny -1) if ny > 1 else 1
                     y_origin_affine = y_coords_val[0]
                     y_res_affine = y_res_calc
                     if y_coords_val[0] > y_coords_val[-1]: y_res_affine = -abs(y_res_calc) if y_res_calc > 0 else y_res_calc
                     else: y_res_affine = abs(y_res_calc) if y_res_calc < 0 else y_res_calc
                     transform_new = Affine(x_res, 0.0, x_coords_val[0], 0.0, y_res_affine, y_origin_affine)
                     if abs(y_res_affine) > 1e-9 : logging.info(f"Attempting to use calculated transform: {transform_new}"); transform = transform_new
                     else: logging.warning("Calculated y_res is zero or too small, cannot form a valid fallback transform.")
                 except Exception as e_trans_calc: logging.warning(f"Could not calculate fallback transform: {e_trans_calc}")

        time_coords = ds_template['time']
        landslide_grid_monthly = np.zeros((len(time_coords), ny, nx), dtype=np.int32)
        logging.info("Rasterizing landslide points to monthly grid...")
        landslide_gdf['YearMonth'] = landslide_gdf[date_column].dt.to_period('M')
        for t_idx, timestamp in enumerate(tqdm(time_coords.values, desc="Rasterizing Months")):
            current_month = pd.Timestamp(timestamp).to_period('M')
            monthly_points = landslide_gdf[landslide_gdf['YearMonth'] == current_month]
            if not monthly_points.empty:
                shapes = [(geom, 1) for geom in monthly_points.geometry]
                try:
                    monthly_raster = features.rasterize(
                        shapes=shapes, out_shape=grid_shape, transform=transform,
                        fill=0, merge_alg=rasterio.enums.MergeAlg.add, dtype=np.int32)
                    landslide_grid_monthly[t_idx, :, :] = monthly_raster
                except Exception as raster_err: logging.error(f"❌ Error during rasterization for month {current_month}: {raster_err}")
        landslide_da = xr.DataArray(
            landslide_grid_monthly, coords={'time': time_coords, 'y': ds_template['y'], 'x': ds_template['x']},
            dims=['time', 'y', 'x'], name=feature_name,
            attrs={'long_name': 'Monthly landslide event count', 'units': 'count', 'source_shapefile': os.path.basename(shp_path)})
        logging.info(f"--- Landslide Data Processing Complete ---")
        return landslide_da
    except ImportError as imp_err: logging.error(f"❌ Missing library for landslide processing: {imp_err}. Please install geopandas and rasterio."); return None
    except FileNotFoundError: logging.error(f"❌ Landslide Shapefile not found at: {shp_path}"); return None
    except Exception as e: logging.error(f"❌ An unexpected error occurred during landslide data processing: {e}", exc_info=True); return None

# --- FUNGSI FITUR SPASIAL ---
def calculate_spatial_avg_lag1(
    data_array: xr.DataArray, feature_name: str = "SPATIAL_AVG_LAG1"
) -> xr.DataArray:
    logging.info(f"Calculating spatial feature: {feature_name}")
    kernel = np.array([[1, 1, 1], [1, 0, 1], [1, 1, 1]], dtype=np.float32)
    values = data_array.values.astype(np.float32)
    spatial_avg_lag1 = np.full_like(values, np.nan)
    for t in range(1, values.shape[0]):
        data_t_minus_1 = values[t-1, :, :]
        nan_mask_t_minus_1 = np.isnan(data_t_minus_1)
        neighbor_counts = convolve(~nan_mask_t_minus_1, kernel, mode='constant', cval=0.0)
        data_t_minus_1_nan_as_zero = np.nan_to_num(data_t_minus_1, nan=0.0)
        neighbor_sum = convolve(data_t_minus_1_nan_as_zero, kernel, mode='constant', cval=0.0)
        valid_neighbors_mask = neighbor_counts > 0
        avg_values = np.full_like(neighbor_sum, np.nan)
        avg_values[valid_neighbors_mask] = neighbor_sum[valid_neighbors_mask] / neighbor_counts[valid_neighbors_mask]
        spatial_avg_lag1[t, :, :] = avg_values
    spatial_da = xr.DataArray(
        spatial_avg_lag1, coords=data_array.coords, dims=data_array.dims, name=feature_name,
        attrs={
            'long_name': f'Average of 8 spatial neighbors at lag 1 for {data_array.name}',
            'units': data_array.attrs.get('units', 'unknown'),
            'calculation': 'Convolution with 3x3 queen kernel (center 0), NaN handled by averaging only valid neighbors'
        }
    )
    logging.info(f"Spatial feature calculation complete.")
    return spatial_da


In [None]:
# --- FUNGSI UTILS VISUALISASI & PENYIMPANAN ---
def plot_accuracy_maps(metrics: dict, outdir: str):
    os.makedirs(outdir, exist_ok=True)
    for name, data in metrics.items():
        if data is None or np.isnan(data).all():
            logging.warning(f"Skipping map for metric '{name}' as data is missing or all NaN.")
            continue
        plt.figure(figsize=(8, 6))
        if 'rmse' in name.lower(): cmap = 'hot'; vmin, vmax = np.nanmin(data), np.nanmax(data)
        elif 'r2' in name.lower(): cmap = 'viridis'; vmin, vmax = max(0, np.nanmin(data)) if not np.isnan(np.nanmin(data)) else 0, 1.0
        elif 'ljung_box_pvalue' in name.lower(): cmap = 'viridis_r'; vmin, vmax = 0.0, 1.0
        else: cmap = 'viridis'; vmin, vmax = np.nanmin(data), np.nanmax(data)
        im = plt.imshow(data, origin='upper', cmap=cmap, vmin=vmin, vmax=vmax)
        plt.title(f"{name.replace('_', ' ').title()}"); plt.colorbar(im, label=f"{name} Value")
        plt.xlabel("X Coordinate Index"); plt.ylabel("Y Coordinate Index"); plt.tight_layout()
        map_path = os.path.join(outdir, f"{name.lower()}_map.png"); plt.savefig(map_path); plt.close()
        logging.info(f"✅ {name} map saved to: {map_path}")

def plot_sample_timeseries(ds: xr.Dataset, forecast_stack: np.ndarray, time_index: pd.DatetimeIndex, time_test: pd.DatetimeIndex, n_lag: int, forecast_steps: int, sample_coords: List[Tuple[int, int]], outdir: str, target_var: str = 'COUNT'):
    os.makedirs(outdir, exist_ok=True); target_data = ds[target_var].values
    n_time_total = len(time_index); test_start_idx = n_time_total - forecast_steps
    for i, j in sample_coords:
        if 0 <= i < target_data.shape[1] and 0 <= j < target_data.shape[2]:
            ts_all_pixel = target_data[:, i, j]; forecast_pixel = forecast_stack[:, i, j]
            if not np.isnan(ts_all_pixel).all() or not np.isnan(forecast_pixel).all():
                ts_full = pd.Series(ts_all_pixel, index=time_index); ts_test_obs = ts_full.iloc[test_start_idx:]
                train_display_end_idx = test_start_idx; train_display_start_idx = max(0, train_display_end_idx - n_lag * 2)
                ts_train_display = ts_full.iloc[train_display_start_idx:train_display_end_idx]
                ts_pred = pd.Series(forecast_pixel, index=time_test); plt.figure(figsize=(12, 5))
                if not ts_train_display.empty: ts_train_display.plot(label=f'Train ({target_var}, Last {len(ts_train_display)} steps)', color='gray', marker='.', linestyle='-')
                if not ts_test_obs.empty: ts_test_obs.plot(label=f'Observed ({target_var}, Test Period)', color='blue', marker='o', linestyle='-')
                if not ts_pred.empty and not ts_pred.isna().all(): ts_pred.plot(label=f'Forecast ({target_var})', color='red', linestyle='--', marker='x')
                plt.title(f"Evaluation: Forecast vs Observation ({target_var}) at Pixel (y={i}, x={j})"); plt.xlabel("Time"); plt.ylabel(f"{target_var} Value"); plt.legend(); plt.grid(True); plt.tight_layout()
                plot_filename = os.path.join(outdir, f"eval_timeseries_{target_var}_y{i}_x{j}.png"); plt.savefig(plot_filename); plt.close()
                logging.info(f"✅ Evaluation time series plot for {target_var} saved for (y={i}, x={j}) to: {plot_filename}")
        else: logging.warning(f"Sample coordinate (y={i}, x={j}) is outside the data bounds.")

def save_animation(forecast_stack: np.ndarray, dates: pd.DatetimeIndex, outdir: str, name: str, var_name: str = "Forecast", prefix: str = ""):
    os.makedirs(outdir, exist_ok=True); valid_frames_mask = ~np.isnan(forecast_stack).all(axis=(1, 2))
    valid_indices = np.where(valid_frames_mask)[0]
    if len(valid_indices) == 0: logging.warning(f"Skipping animation for {name} ({var_name}) as all forecast frames contain only NaN."); return
    first_valid_frame_idx = valid_indices[0]; initial_data = forecast_stack[first_valid_frame_idx]
    vmin = np.nanmin(forecast_stack[valid_indices]); vmax = np.nanmax(forecast_stack[valid_indices])
    vmin = max(0, vmin) if vmin is not np.nan else 0;
    if vmax is not np.nan and vmin is not np.nan and vmax - vmin < 1e-6 : vmax = vmin + 1.0
    elif vmax is np.nan and vmin is not np.nan : vmax = vmin + 1.0
    elif vmin is np.nan: vmin = 0; vmax = 1.0
    fig, ax = plt.subplots(figsize=(8, 6)); im = ax.imshow(initial_data, origin='upper', cmap='viridis', animated=True, vmin=vmin, vmax=vmax)
    cbar = fig.colorbar(im, ax=ax, label=f"{var_name} Value"); title_prefix = f"{prefix.capitalize()}: " if prefix else ""
    title = ax.set_title(f"{title_prefix}{name} {var_name} – {dates[first_valid_frame_idx].strftime('%B %Y')}")
    ax.set_xlabel("X Coordinate Index"); ax.set_ylabel("Y Coordinate Index")
    def update(frame_num): actual_frame_index = valid_indices[frame_num]; im.set_array(forecast_stack[actual_frame_index]); title.set_text(f"{title_prefix}{name} {var_name} – {dates[actual_frame_index].strftime('%B %Y')}"); return [im, title]
    ani = animation.FuncAnimation(fig, update, frames=len(valid_indices), blit=True, interval=500)
    filename_prefix = f"{prefix}_" if prefix else ""; gif_path = os.path.join(outdir, f"{filename_prefix}{name.lower()}_{var_name.lower()}_animation.gif")
    try: ani.save(gif_path, writer='pillow', fps=2); logging.info(f"✅ {prefix.capitalize()} animation for {name} ({var_name}) saved to: {gif_path}")
    except Exception as e: logging.error(f"❌ Failed to save {prefix} animation for {name} ({var_name}) to {gif_path}: {e}")
    plt.close(fig)

def save_evaluation_metrics_geotiff(metrics: dict, ds_ref: xr.Dataset, outdir: str):
    os.makedirs(outdir, exist_ok=True); saved_any = False
    try:
        if "y" not in ds_ref.coords or "x" not in ds_ref.coords: raise ValueError("Coords 'y' or 'x' missing in ds_ref")
        coords = {"y": ds_ref["y"], "x": ds_ref["x"]}; crs_from_ds, transform_from_ds = None, None
        try:
            if hasattr(ds_ref, 'rio') and ds_ref.rio.crs: crs_from_ds = ds_ref.rio.crs
            else: logging.warning("CRS not found in ds_ref. GeoTIFFs will lack CRS.")
            if hasattr(ds_ref, 'rio') and ds_ref.rio.transform(): transform_from_ds = ds_ref.rio.transform()
            if not transform_from_ds or transform_from_ds.is_identity:
                logging.warning("Transform not found or is identity in ds_ref. GeoTIFFs may be incorrectly georeferenced if coordinates are not geographic/projected.")
        except Exception as rio_err: logging.warning(f"Could not extract CRS/Transform from ds_ref: {rio_err}.")
        for name, data in metrics.items():
            if data is None or np.isnan(data).all(): continue
            try:
                units = "unknown"
                if 'rmse' in name.lower(): units = ds_ref[TARGET_VARIABLE].attrs.get('units', 'count')
                elif 'r2' in name.lower(): units = "1"
                elif 'ljung_box_pvalue' in name.lower(): units = "p-value"
                da = xr.DataArray(data, dims=("y", "x"), coords=coords, name=name, attrs={"long_name": name.replace('_', ' ').title(), "units": units})
                if crs_from_ds: da = da.rio.write_crs(crs_from_ds, inplace=True)
                if transform_from_ds and not transform_from_ds.is_identity: da = da.rio.write_transform(transform_from_ds, inplace=True)
                da = da.rio.set_spatial_dims(x_dim='x', y_dim='y', inplace=True)
                filepath = os.path.join(outdir, f"{name.lower()}_map.tif")
                da.rio.to_raster(filepath, tiled=True, compress='LZW', num_threads='ALL_CPUS', nodata=np.nan); saved_any = True
            except Exception as e_metric: logging.error(f"❌ Failed to save metric GeoTIFF for {name}: {e_metric}")
        if saved_any: logging.info(f"✅ Evaluation metric GeoTIFFs saved to directory: {outdir}")
        else: logging.warning("No valid evaluation metrics found to save as GeoTIFF.")
    except Exception as e_general: logging.error(f"❌ General error in save_evaluation_metrics_geotiff: {e_general}", exc_info=True)

def save_forecast_geotiff(forecast_stack: np.ndarray, ds_ref: xr.Dataset, time_labels: pd.DatetimeIndex, outdir: str, var_name: str = "Forecast", prefix: str = ""):
    os.makedirs(outdir, exist_ok=True); saved_any = False
    try:
        if "y" not in ds_ref.coords or "x" not in ds_ref.coords: raise ValueError("Coords 'y' or 'x' missing in ds_ref")
        coords = {"y": ds_ref["y"], "x": ds_ref["x"]}; crs_from_ds, transform_from_ds = None, None
        try:
            if hasattr(ds_ref, 'rio') and ds_ref.rio.crs: crs_from_ds = ds_ref.rio.crs
            else: logging.warning(f"CRS not found in ds_ref for {prefix} {var_name}. GeoTIFFs will lack CRS.")
            if hasattr(ds_ref, 'rio') and ds_ref.rio.transform(): transform_from_ds = ds_ref.rio.transform()
            if not transform_from_ds or transform_from_ds.is_identity: logging.warning(f"Transform not found or is identity in ds_ref for {prefix} {var_name}.")
        except Exception as rio_err: logging.warning(f"Could not extract CRS/Transform for {prefix} {var_name} from ds_ref: {rio_err}.")
        filename_prefix = f"{prefix}_" if prefix else ""; target_units = ds_ref[TARGET_VARIABLE].attrs.get('units', 'unknown')
        for t in range(forecast_stack.shape[0]):
            forecast_slice = forecast_stack[t]
            if np.isnan(forecast_slice).all(): continue
            try:
                timestamp_str = time_labels[t].strftime("%Y%m%d")
                da = xr.DataArray(forecast_slice, dims=("y", "x"), coords=coords, name=f"{var_name}_{timestamp_str}", attrs={"long_name": f"{prefix.capitalize()} {var_name} for {timestamp_str}", "units": target_units})
                if crs_from_ds: da = da.rio.write_crs(crs_from_ds, inplace=True)
                if transform_from_ds and not transform_from_ds.is_identity: da = da.rio.write_transform(transform_from_ds, inplace=True)
                da = da.rio.set_spatial_dims(x_dim='x', y_dim='y', inplace=True)
                filepath = os.path.join(outdir, f"{filename_prefix}{var_name.lower()}_{timestamp_str}.tif")
                da.rio.to_raster(filepath, tiled=True, compress='LZW', num_threads='ALL_CPUS', nodata=np.nan); saved_any = True
            except Exception as e_slice: logging.error(f"❌ Failed to save {prefix} forecast GeoTIFF for {time_labels[t].strftime('%Y%m%d')} ({var_name}): {e_slice}")
        if saved_any: logging.info(f"✅ {prefix.capitalize()} forecast GeoTIFFs for {var_name} saved to directory: {outdir}")
        else: logging.warning(f"No valid forecast slices found to save as GeoTIFF for {prefix} {var_name}.")
    except Exception as e_general: logging.error(f"❌ General error in save_forecast_geotiff ({prefix} {var_name}): {e_general}", exc_info=True)

def save_aggregated_geotiff(aggregated_data: np.ndarray, ds_ref: xr.Dataset, time_period: pd.DatetimeIndex, outdir: str, var_name: str = "Aggregated", prefix: str = "agg"):
    os.makedirs(outdir, exist_ok=True)
    if np.isnan(aggregated_data).all(): logging.warning(f"Skipping saving aggregated GeoTIFF for {prefix} {var_name} as data contains only NaN."); return
    try:
        if "y" not in ds_ref.coords or "x" not in ds_ref.coords: raise ValueError("Coords 'y' or 'x' missing in ds_ref")
        coords = {"y": ds_ref["y"], "x": ds_ref["x"]}; crs_from_ds, transform_from_ds = None, None
        try:
            if hasattr(ds_ref, 'rio') and ds_ref.rio.crs: crs_from_ds = ds_ref.rio.crs
            else: logging.warning(f"CRS not found in ds_ref for {prefix} {var_name}. GeoTIFF will lack CRS.")
            if hasattr(ds_ref, 'rio') and ds_ref.rio.transform(): transform_from_ds = ds_ref.rio.transform()
            if not transform_from_ds or transform_from_ds.is_identity: logging.warning(f"Transform not found or is identity in ds_ref for {prefix} {var_name}.")
        except Exception as rio_err: logging.warning(f"Could not extract CRS/Transform for {prefix} {var_name} from ds_ref: {rio_err}.")
        filename_prefix = f"{prefix}_" if prefix else ""; target_units = ds_ref[TARGET_VARIABLE].attrs.get('units', 'unknown')
        start_date_str = time_period[0].strftime("%Y%m%d"); end_date_str = time_period[-1].strftime("%Y%m%d")
        period_str = f"{start_date_str}_to_{end_date_str}"
        da = xr.DataArray(aggregated_data, dims=("y", "x"), coords=coords, name=f"{var_name}_{period_str}", attrs={"long_name": f"{prefix.capitalize()} {var_name} from {start_date_str} to {end_date_str}", "units": target_units})
        if crs_from_ds: da = da.rio.write_crs(crs_from_ds, inplace=True)
        if transform_from_ds and not transform_from_ds.is_identity: da = da.rio.write_transform(transform_from_ds, inplace=True)
        da = da.rio.set_spatial_dims(x_dim='x', y_dim='y', inplace=True)
        filepath = os.path.join(outdir, f"{filename_prefix}{var_name.lower()}_{period_str}.tif")
        da.rio.to_raster(filepath, tiled=True, compress='LZW', num_threads='ALL_CPUS', nodata=np.nan)
        logging.info(f"✅ Aggregated GeoTIFF for {var_name} ({period_str}) saved to: {filepath}")
    except Exception as e_general: logging.error(f"❌ Failed to save aggregated GeoTIFF ({prefix} {var_name}): {e_general}", exc_info=True)

def save_forecast_netcdf(
    forecast_stack: np.ndarray, metrics: Optional[dict], ds_ref: xr.Dataset,
    time_labels: pd.DatetimeIndex, out_path: str, target_var: str = 'COUNT', is_future: bool = False
):
    if np.isnan(forecast_stack).all(): logging.warning(f"Skipping saving NetCDF to {out_path} as forecast stack contains only NaN."); return
    metrics_valid = False
    if not is_future and metrics:
        for data in metrics.values():
            if data is not None and not np.isnan(data).all(): metrics_valid = True; break
    if not is_future and not metrics_valid and np.isnan(forecast_stack).all():
         logging.warning(f"Skipping saving evaluation NetCDF to {out_path} as both metrics and forecast are all NaN."); return
    if not is_future and not metrics_valid and not np.isnan(forecast_stack).all():
        logging.warning(f"Metrics for evaluation NetCDF {out_path} are missing or all NaN. Saving forecast data only.")
    try:
        y_coord = ds_ref["y"] if "y" in ds_ref else xr.DataArray(np.arange(forecast_stack.shape[1]), dims="y", name="y")
        x_coord = ds_ref["x"] if "x" in ds_ref else xr.DataArray(np.arange(forecast_stack.shape[2]), dims="x", name="x")
        time_coord = xr.DataArray(time_labels, dims="time", name="time", attrs={"standard_name": "time"})
        coords_forecast = {"time": time_coord, "y": y_coord, "x": x_coord}
        coords_metrics = {"y": y_coord, "x": x_coord}
        target_units = ds_ref[target_var].attrs.get('units', 'unknown'); fill_value = np.nan
        model_name_from_path = os.path.basename(os.path.dirname(out_path)); model_name = model_name_from_path.split('_')[-1].upper() if '_' in model_name_from_path else model_name_from_path.upper()
        title_prefix = "Future" if is_future else "Evaluation"
        global_attrs = {
            "Conventions": "CF-1.8", "title": f"{title_prefix} Spatial-Temporal Forecast Output ({model_name} Model)",
            "institution": "Generated by Python Script", "source_data": os.path.basename(NC_FILE),
            "history": f"Created on {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')} using {model_name} model.",
            "forecast_steps": FORECAST_STEPS, "n_lag_used": N_LAG, "target_variable": target_var,
            "time_coverage_start": time_labels[0].strftime('%Y-%m-%d %H:%M:%S'), "time_coverage_end": time_labels[-1].strftime('%Y-%m-%d %H:%M:%S'),
        }
        if hasattr(ds_ref.rio, 'crs') and ds_ref.rio.crs: global_attrs["geospatial_crs"] = str(ds_ref.rio.crs)
        if SPATIAL_FEATURE_NAME in ds_ref.variables: global_attrs["spatial_feature_used"] = SPATIAL_FEATURE_NAME
        if LANDSLIDE_FEATURE_NAME in ds_ref.variables:
            global_attrs["landslide_data_source"] = os.path.basename(LANDSLIDE_SHP_FILE)
            global_attrs["landslide_feature_name"] = LANDSLIDE_FEATURE_NAME
        forecast_attrs = {"long_name": f"{title_prefix} Forecasted {target_var}", "units": target_units, "coordinates": "time y x", "grid_mapping": "spatial_ref"}
        forecast_da = xr.DataArray(forecast_stack, coords=coords_forecast, dims=("time", "y", "x"), name="forecast", attrs=forecast_attrs)
        ds_out_dict = {"forecast": forecast_da}
        if not is_future and metrics_valid and metrics:
            metric_names = []
            for name, data in metrics.items():
                 if data is not None and not np.isnan(data).all():
                     metric_attrs = {"long_name": name.replace('_', ' ').title(), "coordinates": "y x", "grid_mapping": "spatial_ref"}
                     if 'rmse' in name.lower(): metric_attrs["units"] = target_units
                     elif 'r2' in name.lower(): metric_attrs.update({"units": "1", "valid_range": [-np.inf, 1.0]})
                     elif 'ljung_box_pvalue' in name.lower(): metric_attrs.update({"units": "p-value", "valid_range": [0.0, 1.0]})
                     metric_da = xr.DataArray(data, coords=coords_metrics, dims=("y", "x"), name=name, attrs=metric_attrs)
                     ds_out_dict[name] = metric_da; metric_names.append(name)
            if metric_names: global_attrs["evaluation_metrics"] = ", ".join(metric_names)
        ds_out = xr.Dataset(ds_out_dict, attrs=global_attrs)
        grid_mapping_name = "spatial_ref"
        try:
            if hasattr(ds_ref, 'rio') and ds_ref.rio.crs:
                ds_out.rio.write_crs(ds_ref.rio.crs, inplace=True)
                ds_out.rio.write_coordinate_system(inplace=True)
                logging.info("CRS and grid mapping variable added to output NetCDF.")
            else:
                 logging.warning("CRS not found in ds_ref. Output NetCDF will lack full georeferencing information.")
                 for var_name_iter in list(ds_out.data_vars):
                     if 'grid_mapping' in ds_out[var_name_iter].attrs: del ds_out[var_name_iter].attrs['grid_mapping']
                 if "spatial_ref" in ds_out: del ds_out["spatial_ref"]
        except Exception as crs_err:
            logging.warning(f"Could not write CRS info to NetCDF: {crs_err}")
            for var_name_iter in list(ds_out.data_vars):
                 if 'grid_mapping' in ds_out[var_name_iter].attrs: del ds_out[var_name_iter].attrs['grid_mapping']
            if "spatial_ref" in ds_out: del ds_out["spatial_ref"]
        encoding = {}
        for var_name_enc in ds_out.data_vars: encoding[var_name_enc] = {'_FillValue': fill_value, 'zlib': True, 'complevel': 4}
        for coord_name_enc in ds_out.coords:
             if ds_out[coord_name_enc].dtype.kind in 'ifc': encoding[coord_name_enc] = {'_FillValue': None}
             elif ds_out[coord_name_enc].dtype.kind == 'M': encoding[coord_name_enc] = {'_FillValue': None, 'dtype': 'double', 'units': "days since 1970-01-01"}
             elif ds_out[coord_name_enc].dtype.kind == 'O': pass
        if "spatial_ref" in ds_out and isinstance(ds_out["spatial_ref"], xr.DataArray):
            encoding["spatial_ref"] = {'_FillValue': None}
        os.makedirs(os.path.dirname(out_path), exist_ok=True)
        ds_out.to_netcdf(out_path, encoding=encoding, format='NETCDF4')
        logging.info(f"✅ Saved {'future' if is_future else 'evaluation'} forecast data to NetCDF: {out_path}")
    except Exception as e: logging.error(f"❌ Failed to save {'future' if is_future else 'evaluation'} NetCDF file {out_path}: {e}", exc_info=True)
    finally:
        if 'ds_out' in locals(): ds_out.close()

In [None]:
# --- FUNGSI MODEL FORECASTERS (EVALUASI) ---
def get_stationarity(timeseries: np.ndarray, p_threshold: float = 0.05, max_diff: int = 2) -> Tuple[np.ndarray, int]:
    """
    Determines the order of differencing 'd' required to make a time series stationary
    based on the Augmented Dickey-Fuller (ADF) test.
    """
    d = 0; ts_processed = timeseries.copy()
    if len(ts_processed) < 10: return ts_processed, d
    valid_indices = np.where(~np.isnan(ts_processed))[0]
    if len(valid_indices) < 10 : return ts_processed, d
    first_valid, last_valid = valid_indices[0], valid_indices[-1]
    ts_processed = ts_processed[first_valid:last_valid+1]
    if len(ts_processed) < 10: return ts_processed, d
    s = pd.Series(ts_processed); s_interpolated = s.interpolate(method='linear', limit_direction='both').fillna(0); ts_processed = s_interpolated.values
    if np.nanstd(ts_processed) < 1e-9: return ts_processed, d
    try: p_value = adfuller(ts_processed, autolag='AIC')[1]
    except Exception: p_value = 1.0
    while p_value > p_threshold and d < max_diff:
        d += 1; ts_processed = np.diff(ts_processed)
        if len(ts_processed) < 10: break
        if np.nanstd(ts_processed) < 1e-9: break
        try: p_value = adfuller(ts_processed, autolag='AIC')[1]
        except Exception: p_value = 1.0; break
    return ts_processed, d

def forecast_arima(ts_train: np.ndarray, ts_test_obs: np.ndarray, forecast_steps: int, ljung_box_lags_config: int) -> Tuple[np.ndarray, float, float, float, Optional[Tuple[int,int,int]]]:
    """
    Performs ARIMA forecasting using pmdarima.auto_arima, calculates RMSE, R2 on test data,
    and Ljung-Box p-value on in-sample residuals.
    """
    y_pred = np.full(forecast_steps, np.nan); rmse, r2 = np.nan, np.nan
    ljung_box_p_value_insample = np.nan; order_used = None
    if ts_train.ndim > 1: ts_train = ts_train.flatten()
    if ts_test_obs.ndim > 1: ts_test_obs = ts_test_obs.flatten()
    valid_train_indices = ~np.isnan(ts_train); ts_train_valid = ts_train[valid_train_indices]
    if len(ts_train_valid) < 20: # auto_arima works better with more data
        return y_pred, rmse, r2, ljung_box_p_value_insample, order_used

    # Impute series for auto_arima
    series_train = pd.Series(ts_train_valid).interpolate(method='linear', limit_direction='both').fillna(method='bfill').fillna(method='ffill')
    if series_train.isnull().any() or len(series_train) < 20:
        return y_pred, rmse, r2, ljung_box_p_value_insample, order_used

    fitted_model = None
    try:
        auto_model = pm.auto_arima(series_train,
                                   start_p=1, start_q=1,
                                   max_p=3, max_q=3,
                                   d=None, # Let auto_arima determine d
                                   max_d=MAX_DIFFERENCING,
                                   seasonal=False, # Set to True and specify m if seasonality is expected
                                   # m=12, D=None, max_D=1, start_P=0, start_Q=0, max_P=2, max_Q=2, seasonal=True,
                                   stepwise=True,
                                   suppress_warnings=True,
                                   error_action='ignore', # Skip models that fail to fit
                                   trace=False # Set to True to see search process
                                  )
        order_used = auto_model.order # (p,d,q)
        fitted_model = auto_model

        y_pred = fitted_model.predict(n_periods=forecast_steps)
        y_pred[~np.isfinite(y_pred)] = np.nan

        insample_residuals = fitted_model.resid()
        valid_insample_residuals = insample_residuals[~np.isnan(insample_residuals)]

        p, _, q = order_used # d is part of the model, not subtracted for model_df in pmdarima context
        num_params = p + q # Number of AR and MA parameters

        actual_lags_for_lb = min(ljung_box_lags_config, len(valid_insample_residuals) - 1)
        if actual_lags_for_lb > num_params and np.std(valid_insample_residuals) > 1e-6:
            try:
                # model_df in acorr_ljungbox refers to the number of estimated parameters (p+q)
                lb_test_df = acorr_ljungbox(valid_insample_residuals,
                                            lags=[actual_lags_for_lb],
                                            model_df=num_params,
                                            return_df=True)
                if not lb_test_df.empty:
                    ljung_box_p_value_insample = lb_test_df['lb_pvalue'].iloc[0]
            except ValueError: pass
            except Exception: pass
    except Exception:
        pass

    y_pred = np.maximum(0, y_pred)
    valid_pred_indices = ~np.isnan(y_pred); valid_test_indices = ~np.isnan(ts_test_obs)
    common_valid_indices = valid_pred_indices & valid_test_indices
    if np.any(common_valid_indices):
        y_true_valid = ts_test_obs[common_valid_indices]; y_pred_valid = y_pred[common_valid_indices]
        if len(y_true_valid) > 0:
            try:
                rmse = np.sqrt(mean_squared_error(y_true_valid, y_pred_valid))
                if len(y_true_valid) > 1 and np.nanstd(y_true_valid) > 1e-6: r2 = r2_score(y_true_valid, y_pred_valid)
            except Exception: pass
    return y_pred, rmse, r2, ljung_box_p_value_insample, order_used

def forecast_lstm(
    ts_input: np.ndarray, n_lag: int, forecast_steps: int, n_features: int,
    ljung_box_lags_config: int, hyperparams: Dict[str, Any]
) -> Tuple[np.ndarray, float, float, float, Optional[MinMaxScaler]]:
    scaler = None; y_pred_unscaled = np.full(forecast_steps, np.nan); rmse, r2 = np.nan, np.nan
    ljung_box_p_value_insample = np.nan
    lstm_units = hyperparams.get("lstm_units", 32)
    lstm_activation = hyperparams.get("lstm_activation", 'relu')
    learning_rate = hyperparams.get("learning_rate", 0.01)
    epochs = hyperparams.get("epochs", 50)
    batch_size_val = hyperparams.get("batch_size_val", None)

    if ts_input.ndim != 2 or ts_input.shape[1] != n_features or ts_input.shape[0] < n_lag + forecast_steps:
        return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
    train_data_full = ts_input[:-forecast_steps, :]
    target_actual_unscaled_test = ts_input[-forecast_steps:, 0]
    train_data_nonan_for_scaler = train_data_full[~np.isnan(train_data_full).any(axis=1)]
    if train_data_nonan_for_scaler.shape[0] < 2:
        return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
    try:
        scaler = MinMaxScaler(); scaler.fit(train_data_nonan_for_scaler)
        ts_scaled = scaler.transform(ts_input)
        ts_scaled_imputed_df = pd.DataFrame(ts_scaled); ts_scaled_imputed = ts_scaled_imputed_df.ffill().bfill().fillna(0.0).values
        X_pred_seq_scaled = ts_scaled_imputed[-(n_lag + forecast_steps) : -forecast_steps, :]
        if X_pred_seq_scaled.shape[0] != n_lag:
            return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
        X_train_list, y_train_list_scaled = [], []
        train_data_for_sequences_scaled = ts_scaled_imputed[:-forecast_steps, :]
        if len(train_data_for_sequences_scaled) < n_lag + 1 :
            return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
        for k in range(len(train_data_for_sequences_scaled) - n_lag):
             input_window = train_data_for_sequences_scaled[k : k + n_lag, :]
             target_val_scaled = train_data_for_sequences_scaled[k + n_lag, 0]
             X_train_list.append(input_window); y_train_list_scaled.append(target_val_scaled)
        if not X_train_list:
            return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
        X_train_scaled = np.array(X_train_list); y_train_scaled = np.array(y_train_list_scaled)
        tf.keras.backend.clear_session()
        model = tf.keras.Sequential([
            tf.keras.layers.LSTM(lstm_units, activation=lstm_activation, input_shape=(n_lag, n_features)),
            tf.keras.layers.Dense(1)
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='mse')
        current_batch_size = batch_size_val if batch_size_val is not None else min(16, len(X_train_scaled))
        if current_batch_size == 0 and len(X_train_scaled) > 0: current_batch_size = 1
        if len(X_train_scaled) == 0: return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
        model.fit(X_train_scaled, y_train_scaled, epochs=epochs, batch_size=current_batch_size, verbose=0)
        if len(X_train_scaled) > 0:
            insample_preds_scaled = model.predict(X_train_scaled, verbose=0).flatten()
            insample_residuals_scaled = y_train_scaled - insample_preds_scaled
            valid_insample_residuals_scaled = insample_residuals_scaled[~np.isnan(insample_residuals_scaled)]
            actual_lags_lb_ml = min(ljung_box_lags_config, len(valid_insample_residuals_scaled) - 1)
            if actual_lags_lb_ml > 0 and np.std(valid_insample_residuals_scaled) > 1e-6:
                try:
                    lb_df = acorr_ljungbox(valid_insample_residuals_scaled, lags=[actual_lags_lb_ml], return_df=True, model_df=0)
                    if not lb_df.empty: ljung_box_p_value_insample = lb_df['lb_pvalue'].iloc[0]
                except ValueError: pass
                except Exception: pass
        y_pred_scaled_list = []
        current_input_scaled = X_pred_seq_scaled.reshape(1, n_lag, n_features)
        for _ in range(forecast_steps):
            current_input_imputed = np.nan_to_num(current_input_scaled, nan=0.0)
            next_pred_target_scaled = model.predict(current_input_imputed, verbose=0)[0, 0]
            y_pred_scaled_list.append(next_pred_target_scaled)
            next_features_scaled = np.zeros(n_features); next_features_scaled[0] = next_pred_target_scaled
            for feat_idx in range(1, n_features): next_features_scaled[feat_idx] = current_input_imputed[0, -1, feat_idx]
            new_input_sequence = np.roll(current_input_imputed[0], -1, axis=0); new_input_sequence[-1, :] = next_features_scaled
            current_input_scaled = new_input_sequence.reshape(1, n_lag, n_features)
        y_pred_scaled = np.array(y_pred_scaled_list)
        dummy_pred_scaled_for_inverse = np.zeros((forecast_steps, n_features)); dummy_pred_scaled_for_inverse[:, 0] = y_pred_scaled
        if n_features > 1:
            for feat_idx in range(1, n_features):
                 last_known_vals = X_pred_seq_scaled[:, feat_idx]
                 dummy_pred_scaled_for_inverse[:, feat_idx] = np.nanmean(last_known_vals) if not np.isnan(last_known_vals).all() else 0.0
        y_pred_unscaled_full = scaler.inverse_transform(dummy_pred_scaled_for_inverse); y_pred_unscaled = y_pred_unscaled_full[:, 0]
        y_pred_unscaled[~np.isfinite(y_pred_unscaled)] = np.nan
    except Exception: y_pred_unscaled.fill(np.nan)
    y_pred_unscaled = np.maximum(0, y_pred_unscaled)
    valid_pred_idx = ~np.isnan(y_pred_unscaled); valid_target_idx = ~np.isnan(target_actual_unscaled_test)
    common_valid_idx = valid_pred_idx & valid_target_idx
    if np.any(common_valid_idx):
        y_true_v = target_actual_unscaled_test[common_valid_idx]; y_pred_v = y_pred_unscaled[common_valid_idx]
        if len(y_true_v) > 0:
            try:
                rmse = np.sqrt(mean_squared_error(y_true_v, y_pred_v))
                if len(y_true_v) > 1 and np.nanstd(y_true_v) > 1e-6: r2 = r2_score(y_true_v, y_pred_v)
            except Exception: pass
    return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler

def forecast_ann(
    ts_input: np.ndarray, n_lag: int, forecast_steps: int, n_features: int,
    ljung_box_lags_config: int, hyperparams: Dict[str, Any]
) -> Tuple[np.ndarray, float, float, float, Optional[MinMaxScaler]]:
    scaler = None; y_pred_unscaled = np.full(forecast_steps, np.nan); rmse, r2 = np.nan, np.nan
    ljung_box_p_value_insample = np.nan
    ann_layers_units = hyperparams.get("ann_layers_units", [64, 32])
    ann_activation = hyperparams.get("ann_activation", 'relu')
    learning_rate = hyperparams.get("learning_rate", 0.01)
    epochs = hyperparams.get("epochs", 50)
    batch_size_val = hyperparams.get("batch_size_val", None)

    if ts_input.ndim != 2 or ts_input.shape[1] != n_features or ts_input.shape[0] < n_lag + forecast_steps:
        return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
    train_data_full = ts_input[:-forecast_steps, :]; target_actual_unscaled_test = ts_input[-forecast_steps:, 0]
    train_data_nonan_for_scaler = train_data_full[~np.isnan(train_data_full).any(axis=1)]
    if train_data_nonan_for_scaler.shape[0] < 2: return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
    try:
        scaler = MinMaxScaler(); scaler.fit(train_data_nonan_for_scaler)
        ts_scaled = scaler.transform(ts_input); ts_scaled_imputed_df = pd.DataFrame(ts_scaled)
        ts_scaled_imputed = ts_scaled_imputed_df.ffill().bfill().fillna(0.0).values
        X_pred_seq_scaled = ts_scaled_imputed[-(n_lag + forecast_steps) : -forecast_steps, :]
        if X_pred_seq_scaled.shape[0] != n_lag: return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
        X_train_flat_list, y_train_list_scaled = [], []
        train_data_for_sequences_scaled = ts_scaled_imputed[:-forecast_steps, :]
        if len(train_data_for_sequences_scaled) < n_lag + 1 : return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
        for k in range(len(train_data_for_sequences_scaled) - n_lag):
             input_window = train_data_for_sequences_scaled[k : k + n_lag, :]
             target_val_scaled = train_data_for_sequences_scaled[k + n_lag, 0]
             X_train_flat_list.append(input_window.flatten()); y_train_list_scaled.append(target_val_scaled)
        if not X_train_flat_list: return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
        X_train_flat_scaled = np.array(X_train_flat_list); y_train_scaled = np.array(y_train_list_scaled)
        input_shape_flat = n_lag * n_features; tf.keras.backend.clear_session()
        model_layers = [tf.keras.layers.Dense(ann_layers_units[0], activation=ann_activation, input_shape=(input_shape_flat,))]
        for units in ann_layers_units[1:]: model_layers.append(tf.keras.layers.Dense(units, activation=ann_activation))
        model_layers.append(tf.keras.layers.Dense(1)); model = tf.keras.Sequential(model_layers)
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='mse')
        current_batch_size = batch_size_val if batch_size_val is not None else min(16, len(X_train_flat_scaled))
        if current_batch_size == 0 and len(X_train_flat_scaled) > 0: current_batch_size = 1
        if len(X_train_flat_scaled) == 0: return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
        model.fit(X_train_flat_scaled, y_train_scaled, epochs=epochs, batch_size=current_batch_size, verbose=0)
        if len(X_train_flat_scaled) > 0:
            insample_preds_scaled = model.predict(X_train_flat_scaled, verbose=0).flatten()
            insample_residuals_scaled = y_train_scaled - insample_preds_scaled
            valid_insample_residuals_scaled = insample_residuals_scaled[~np.isnan(insample_residuals_scaled)]
            actual_lags_lb_ml = min(ljung_box_lags_config, len(valid_insample_residuals_scaled) -1)
            if actual_lags_lb_ml > 0 and np.std(valid_insample_residuals_scaled) > 1e-6:
                try:
                    lb_df = acorr_ljungbox(valid_insample_residuals_scaled, lags=[actual_lags_lb_ml], return_df=True, model_df=0)
                    if not lb_df.empty: ljung_box_p_value_insample = lb_df['lb_pvalue'].iloc[0]
                except ValueError: pass
                except Exception: pass
        y_pred_scaled_list = []; current_input_sequence_scaled_2d = X_pred_seq_scaled
        for _ in range(forecast_steps):
            current_input_imputed_2d = np.nan_to_num(current_input_sequence_scaled_2d, nan=0.0)
            current_input_flat_for_pred = current_input_imputed_2d.flatten().reshape(1, -1)
            next_pred_target_scaled = model.predict(current_input_flat_for_pred, verbose=0)[0, 0]; y_pred_scaled_list.append(next_pred_target_scaled)
            next_features_scaled = np.zeros(n_features); next_features_scaled[0] = next_pred_target_scaled
            for feat_idx in range(1, n_features): next_features_scaled[feat_idx] = current_input_imputed_2d[-1, feat_idx]
            new_input_sequence_2d = np.roll(current_input_imputed_2d, -1, axis=0); new_input_sequence_2d[-1, :] = next_features_scaled
            current_input_sequence_scaled_2d = new_input_sequence_2d
        y_pred_scaled = np.array(y_pred_scaled_list); dummy_pred_scaled_for_inverse = np.zeros((forecast_steps, n_features)); dummy_pred_scaled_for_inverse[:, 0] = y_pred_scaled
        if n_features > 1:
            for feat_idx in range(1, n_features):
                 last_known_vals = X_pred_seq_scaled[:, feat_idx]
                 dummy_pred_scaled_for_inverse[:, feat_idx] = np.nanmean(last_known_vals) if not np.isnan(last_known_vals).all() else 0.0
        y_pred_unscaled_full = scaler.inverse_transform(dummy_pred_scaled_for_inverse); y_pred_unscaled = y_pred_unscaled_full[:, 0]
        y_pred_unscaled[~np.isfinite(y_pred_unscaled)] = np.nan
    except Exception: y_pred_unscaled.fill(np.nan)
    y_pred_unscaled = np.maximum(0, y_pred_unscaled)
    valid_pred_idx = ~np.isnan(y_pred_unscaled); valid_target_idx = ~np.isnan(target_actual_unscaled_test)
    common_valid_idx = valid_pred_idx & valid_target_idx
    if np.any(common_valid_idx):
        y_true_v = target_actual_unscaled_test[common_valid_idx]; y_pred_v = y_pred_unscaled[common_valid_idx]
        if len(y_true_v) > 0:
             try:
                 rmse = np.sqrt(mean_squared_error(y_true_v, y_pred_v))
                 if len(y_true_v) > 1 and np.nanstd(y_true_v) > 1e-6: r2 = r2_score(y_true_v, y_pred_v)
             except Exception: pass
    return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler

def forecast_rf(
    ts_input: np.ndarray, n_lag: int, forecast_steps: int, n_features: int,
    ljung_box_lags_config: int, hyperparams: Dict[str, Any]
) -> Tuple[np.ndarray, float, float, float, Optional[MinMaxScaler]]:
    scaler = None; y_pred_unscaled = np.full(forecast_steps, np.nan); rmse, r2 = np.nan, np.nan
    ljung_box_p_value_insample = np.nan
    n_estimators = hyperparams.get("n_estimators", 100)
    rf_random_state = hyperparams.get("rf_random_state", 42)
    max_depth = hyperparams.get("max_depth", None)
    min_samples_split = hyperparams.get("min_samples_split", 2)
    min_samples_leaf = hyperparams.get("min_samples_leaf", 1)

    if ts_input.ndim != 2 or ts_input.shape[1] != n_features or ts_input.shape[0] < n_lag + forecast_steps:
        return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
    train_data_full = ts_input[:-forecast_steps, :]; target_actual_unscaled_test = ts_input[-forecast_steps:, 0]
    train_data_target_nonan_for_scaler = train_data_full[~np.isnan(train_data_full[:,0])]
    if train_data_target_nonan_for_scaler.shape[0] < 2 : return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
    train_data_nonan_for_scaler = train_data_full[~np.isnan(train_data_full).any(axis=1)]
    if train_data_nonan_for_scaler.shape[0] < 2: return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
    try:
        scaler = MinMaxScaler(); scaler.fit(train_data_nonan_for_scaler)
        ts_scaled = scaler.transform(ts_input); ts_scaled_imputed_df = pd.DataFrame(ts_scaled)
        ts_scaled_imputed = ts_scaled_imputed_df.ffill().bfill().fillna(0.0).values
        X_pred_seq_scaled = ts_scaled_imputed[-(n_lag + forecast_steps) : -forecast_steps, :]
        if X_pred_seq_scaled.shape[0] != n_lag: return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
        X_train_flat_list, y_train_list_scaled = [], []
        train_data_for_sequences_scaled = ts_scaled_imputed[:-forecast_steps, :]
        if len(train_data_for_sequences_scaled) < n_lag + 1: return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
        for k in range(len(train_data_for_sequences_scaled) - n_lag):
             input_window = train_data_for_sequences_scaled[k : k + n_lag, :]
             target_val_scaled = train_data_for_sequences_scaled[k + n_lag, 0]
             X_train_flat_list.append(input_window.flatten()); y_train_list_scaled.append(target_val_scaled)
        if not X_train_flat_list: return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler
        X_train_flat_scaled = np.array(X_train_flat_list); y_train_scaled = np.array(y_train_list_scaled)
        model = RandomForestRegressor(
            n_estimators=n_estimators, random_state=rf_random_state, max_depth=max_depth,
            min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, n_jobs=1
        )
        model.fit(X_train_flat_scaled, y_train_scaled)
        if len(X_train_flat_scaled) > 0:
            insample_preds_scaled = model.predict(X_train_flat_scaled).flatten()
            insample_residuals_scaled = y_train_scaled - insample_preds_scaled
            valid_insample_residuals_scaled = insample_residuals_scaled[~np.isnan(insample_residuals_scaled)]
            actual_lags_lb_ml = min(ljung_box_lags_config, len(valid_insample_residuals_scaled) -1)
            if actual_lags_lb_ml > 0 and np.std(valid_insample_residuals_scaled) > 1e-6:
                try:
                    lb_df = acorr_ljungbox(valid_insample_residuals_scaled, lags=[actual_lags_lb_ml], return_df=True, model_df=0)
                    if not lb_df.empty: ljung_box_p_value_insample = lb_df['lb_pvalue'].iloc[0]
                except ValueError: pass
                except Exception: pass
        y_pred_scaled_list = []; current_input_sequence_scaled_2d = X_pred_seq_scaled
        for _ in range(forecast_steps):
            current_input_imputed_2d = np.nan_to_num(current_input_sequence_scaled_2d, nan=0.0)
            current_input_flat_for_pred = current_input_imputed_2d.flatten().reshape(1, -1)
            next_pred_target_scaled = model.predict(current_input_flat_for_pred)[0]
            y_pred_scaled_list.append(next_pred_target_scaled)
            next_features_scaled = np.zeros(n_features); next_features_scaled[0] = next_pred_target_scaled
            for feat_idx in range(1, n_features): next_features_scaled[feat_idx] = current_input_imputed_2d[-1, feat_idx]
            new_input_sequence_2d = np.roll(current_input_imputed_2d, -1, axis=0)
            new_input_sequence_2d[-1, :] = next_features_scaled
            current_input_sequence_scaled_2d = new_input_sequence_2d
        y_pred_scaled = np.array(y_pred_scaled_list)
        dummy_pred_scaled_for_inverse = np.zeros((forecast_steps, n_features)); dummy_pred_scaled_for_inverse[:, 0] = y_pred_scaled
        if n_features > 1:
            for feat_idx in range(1, n_features):
                 last_known_vals = X_pred_seq_scaled[:, feat_idx]
                 dummy_pred_scaled_for_inverse[:, feat_idx] = np.nanmean(last_known_vals) if not np.isnan(last_known_vals).all() else 0.0
        y_pred_unscaled_full = scaler.inverse_transform(dummy_pred_scaled_for_inverse); y_pred_unscaled = y_pred_unscaled_full[:, 0]
        y_pred_unscaled[~np.isfinite(y_pred_unscaled)] = np.nan
    except Exception: y_pred_unscaled.fill(np.nan)
    y_pred_unscaled = np.maximum(0, y_pred_unscaled)
    valid_pred_idx = ~np.isnan(y_pred_unscaled); valid_target_idx = ~np.isnan(target_actual_unscaled_test)
    common_valid_idx = valid_pred_idx & valid_target_idx
    if np.any(common_valid_idx):
        y_true_v = target_actual_unscaled_test[common_valid_idx]; y_pred_v = y_pred_unscaled[common_valid_idx]
        if len(y_true_v) > 0:
             try:
                 rmse = np.sqrt(mean_squared_error(y_true_v, y_pred_v))
                 if len(y_true_v) > 1 and np.nanstd(y_true_v) > 1e-6: r2 = r2_score(y_true_v, y_pred_v)
             except Exception: pass
    return y_pred_unscaled, rmse, r2, ljung_box_p_value_insample, scaler

In [None]:
# --- FUNGSI MODEL FORECASTERS (FUTURE FORECAST) ---
def forecast_arima_future(ts_full_history: np.ndarray, n_lag: int, forecast_steps: int, n_features: int) -> np.ndarray:
    y_pred_future = np.full(forecast_steps, np.nan)
    ts_target_history = ts_full_history[:, 0] if ts_full_history.ndim > 1 and ts_full_history.shape[1] > 0 else ts_full_history.flatten()
    valid_hist_indices = ~np.isnan(ts_target_history); ts_target_history_valid = ts_target_history[valid_hist_indices]
    if len(ts_target_history_valid) < 20: return y_pred_future
    series_history = pd.Series(ts_target_history_valid).interpolate(method='linear', limit_direction='both').fillna(method='bfill').fillna(method='ffill')
    if series_history.isnull().any() or len(series_history) < 20: return y_pred_future
    try:
        auto_model_future = pm.auto_arima(series_history, start_p=1, start_q=1, max_p=3, max_q=3,
                                          d=None, max_d=MAX_DIFFERENCING, seasonal=False,
                                          stepwise=True, suppress_warnings=True, error_action='ignore', trace=False)
        y_pred_future = auto_model_future.predict(n_periods=forecast_steps)
        y_pred_future[~np.isfinite(y_pred_future)] = np.nan
    except Exception: pass
    y_pred_future = np.maximum(0, y_pred_future)
    return y_pred_future

def forecast_lstm_future(
    ts_full_history: np.ndarray, n_lag: int, forecast_steps: int, n_features: int, hyperparams: Dict[str, Any]
) -> np.ndarray:
    scaler = None; y_pred_unscaled_future = np.full(forecast_steps, np.nan)
    lstm_units = hyperparams.get("lstm_units", 32)
    lstm_activation = hyperparams.get("lstm_activation", 'relu')
    learning_rate = hyperparams.get("learning_rate", 0.01)
    epochs = hyperparams.get("epochs", 50)
    batch_size_val = hyperparams.get("batch_size_val", None)
    if ts_full_history.ndim != 2 or ts_full_history.shape[1] != n_features: return y_pred_unscaled_future
    ts_history_nonan_for_scaler = ts_full_history[~np.isnan(ts_full_history).any(axis=1)]
    if ts_history_nonan_for_scaler.shape[0] < n_lag + 1 : return y_pred_unscaled_future
    try:
        scaler = MinMaxScaler(); scaler.fit(ts_history_nonan_for_scaler)
        ts_scaled = scaler.transform(ts_full_history); ts_scaled_imputed_df = pd.DataFrame(ts_scaled)
        ts_scaled_imputed = ts_scaled_imputed_df.ffill().bfill().fillna(0.0).values
        X_train_list, y_train_list_scaled = [], []
        if len(ts_scaled_imputed) < n_lag + 1: return y_pred_unscaled_future
        for k in range(len(ts_scaled_imputed) - n_lag):
            input_window = ts_scaled_imputed[k : k + n_lag, :]; target_val_scaled = ts_scaled_imputed[k + n_lag, 0]
            X_train_list.append(input_window); y_train_list_scaled.append(target_val_scaled)
        if not X_train_list: return y_pred_unscaled_future
        X_train_scaled = np.array(X_train_list); y_train_scaled = np.array(y_train_list_scaled)
        tf.keras.backend.clear_session()
        model = tf.keras.Sequential([
            tf.keras.layers.LSTM(lstm_units, activation=lstm_activation, input_shape=(n_lag, n_features)),
            tf.keras.layers.Dense(1)
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='mse')
        current_batch_size = batch_size_val if batch_size_val is not None else min(16, len(X_train_scaled))
        if current_batch_size == 0 and len(X_train_scaled) > 0: current_batch_size = 1
        if len(X_train_scaled) == 0: return y_pred_unscaled_future
        model.fit(X_train_scaled, y_train_scaled, epochs=epochs, batch_size=current_batch_size, verbose=0)
        last_known_sequence_scaled = ts_scaled_imputed[-n_lag:, :]; y_pred_scaled_future_list = []
        current_input_scaled = last_known_sequence_scaled.reshape(1, n_lag, n_features)
        for _ in range(forecast_steps):
            current_input_imputed = np.nan_to_num(current_input_scaled, nan=0.0)
            next_pred_target_scaled = model.predict(current_input_imputed, verbose=0)[0, 0]; y_pred_scaled_future_list.append(next_pred_target_scaled)
            next_features_scaled = np.zeros(n_features); next_features_scaled[0] = next_pred_target_scaled
            for feat_idx in range(1, n_features): next_features_scaled[feat_idx] = current_input_imputed[0, -1, feat_idx]
            new_input_sequence = np.roll(current_input_imputed[0], -1, axis=0); new_input_sequence[-1, :] = next_features_scaled
            current_input_scaled = new_input_sequence.reshape(1, n_lag, n_features)
        y_pred_scaled_future = np.array(y_pred_scaled_future_list); dummy_pred_scaled_for_inverse = np.zeros((forecast_steps, n_features)); dummy_pred_scaled_for_inverse[:, 0] = y_pred_scaled_future
        if n_features > 1:
            for feat_idx in range(1, n_features):
                 last_known_vals = last_known_sequence_scaled[:, feat_idx]
                 dummy_pred_scaled_for_inverse[:, feat_idx] = np.nanmean(last_known_vals) if not np.isnan(last_known_vals).all() else 0.0
        y_pred_unscaled_full = scaler.inverse_transform(dummy_pred_scaled_for_inverse); y_pred_unscaled_future = y_pred_unscaled_full[:, 0]
        y_pred_unscaled_future[~np.isfinite(y_pred_unscaled_future)] = np.nan
    except Exception: pass
    y_pred_unscaled_future = np.maximum(0, y_pred_unscaled_future)
    return y_pred_unscaled_future

def forecast_ann_future(
    ts_full_history: np.ndarray, n_lag: int, forecast_steps: int, n_features: int, hyperparams: Dict[str, Any]
) -> np.ndarray:
    scaler = None; y_pred_unscaled_future = np.full(forecast_steps, np.nan)
    ann_layers_units = hyperparams.get("ann_layers_units", [64, 32])
    ann_activation = hyperparams.get("ann_activation", 'relu')
    learning_rate = hyperparams.get("learning_rate", 0.01)
    epochs = hyperparams.get("epochs", 50)
    batch_size_val = hyperparams.get("batch_size_val", None)
    if ts_full_history.ndim != 2 or ts_full_history.shape[1] != n_features: return y_pred_unscaled_future
    ts_history_nonan_for_scaler = ts_full_history[~np.isnan(ts_full_history).any(axis=1)]
    if ts_history_nonan_for_scaler.shape[0] < n_lag + 1: return y_pred_unscaled_future
    try:
        scaler = MinMaxScaler(); scaler.fit(ts_history_nonan_for_scaler)
        ts_scaled = scaler.transform(ts_full_history); ts_scaled_imputed_df = pd.DataFrame(ts_scaled)
        ts_scaled_imputed = ts_scaled_imputed_df.ffill().bfill().fillna(0.0).values
        X_train_flat_list, y_train_list_scaled = [], []
        if len(ts_scaled_imputed) < n_lag + 1: return y_pred_unscaled_future
        for k in range(len(ts_scaled_imputed) - n_lag):
            input_window = ts_scaled_imputed[k : k + n_lag, :]
            target_val_scaled = ts_scaled_imputed[k + n_lag, 0]
            X_train_flat_list.append(input_window.flatten()); y_train_list_scaled.append(target_val_scaled)
        if not X_train_flat_list: return y_pred_unscaled_future
        X_train_flat_scaled = np.array(X_train_flat_list); y_train_scaled = np.array(y_train_list_scaled)
        input_shape_flat = n_lag * n_features; tf.keras.backend.clear_session()
        model_layers = [tf.keras.layers.Dense(ann_layers_units[0], activation=ann_activation, input_shape=(input_shape_flat,))]
        for units in ann_layers_units[1:]: model_layers.append(tf.keras.layers.Dense(units, activation=ann_activation))
        model_layers.append(tf.keras.layers.Dense(1)); model = tf.keras.Sequential(model_layers)
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='mse')
        current_batch_size = batch_size_val if batch_size_val is not None else min(16, len(X_train_flat_scaled))
        if current_batch_size == 0 and len(X_train_flat_scaled) > 0: current_batch_size = 1
        if len(X_train_flat_scaled) == 0: return y_pred_unscaled_future
        model.fit(X_train_flat_scaled, y_train_scaled, epochs=epochs, batch_size=current_batch_size, verbose=0)
        last_known_sequence_scaled_2d = ts_scaled_imputed[-n_lag:, :]; y_pred_scaled_future_list = []
        current_input_sequence_scaled_2d = last_known_sequence_scaled_2d
        for _ in range(forecast_steps):
            current_input_imputed_2d = np.nan_to_num(current_input_sequence_scaled_2d, nan=0.0)
            current_input_flat_for_pred = current_input_imputed_2d.flatten().reshape(1, -1)
            next_pred_target_scaled = model.predict(current_input_flat_for_pred, verbose=0)[0, 0]; y_pred_scaled_future_list.append(next_pred_target_scaled)
            next_features_scaled = np.zeros(n_features); next_features_scaled[0] = next_pred_target_scaled
            for feat_idx in range(1, n_features): next_features_scaled[feat_idx] = current_input_imputed_2d[-1, feat_idx]
            new_input_sequence_2d = np.roll(current_input_imputed_2d, -1, axis=0); new_input_sequence_2d[-1, :] = next_features_scaled
            current_input_sequence_scaled_2d = new_input_sequence_2d
        y_pred_scaled_future = np.array(y_pred_scaled_future_list); dummy_pred_scaled_for_inverse = np.zeros((forecast_steps, n_features)); dummy_pred_scaled_for_inverse[:, 0] = y_pred_scaled_future
        if n_features > 1:
            for feat_idx in range(1, n_features):
                 last_known_vals = last_known_sequence_scaled_2d[:, feat_idx]
                 dummy_pred_scaled_for_inverse[:, feat_idx] = np.nanmean(last_known_vals) if not np.isnan(last_known_vals).all() else 0.0
        y_pred_unscaled_full = scaler.inverse_transform(dummy_pred_scaled_for_inverse); y_pred_unscaled_future = y_pred_unscaled_full[:, 0]
        y_pred_unscaled_future[~np.isfinite(y_pred_unscaled_future)] = np.nan
    except Exception: pass
    y_pred_unscaled_future = np.maximum(0, y_pred_unscaled_future)
    return y_pred_unscaled_future

def forecast_rf_future(
    ts_full_history: np.ndarray, n_lag: int, forecast_steps: int, n_features: int, hyperparams: Dict[str, Any]
) -> np.ndarray:
    scaler = None; y_pred_unscaled_future = np.full(forecast_steps, np.nan)
    n_estimators = hyperparams.get("n_estimators", 100)
    rf_random_state = hyperparams.get("rf_random_state", 42)
    max_depth = hyperparams.get("max_depth", None)
    min_samples_split = hyperparams.get("min_samples_split", 2)
    min_samples_leaf = hyperparams.get("min_samples_leaf", 1)
    if ts_full_history.ndim != 2 or ts_full_history.shape[1] != n_features: return y_pred_unscaled_future
    ts_history_nonan_for_scaler = ts_full_history[~np.isnan(ts_full_history).any(axis=1)]
    if ts_history_nonan_for_scaler.shape[0] < n_lag + 1: return y_pred_unscaled_future
    try:
        scaler = MinMaxScaler(); scaler.fit(ts_history_nonan_for_scaler)
        ts_scaled = scaler.transform(ts_full_history); ts_scaled_imputed_df = pd.DataFrame(ts_scaled)
        ts_scaled_imputed = ts_scaled_imputed_df.ffill().bfill().fillna(0.0).values
        X_train_flat_list, y_train_list_scaled = [], []
        if len(ts_scaled_imputed) < n_lag + 1: return y_pred_unscaled_future
        for k in range(len(ts_scaled_imputed) - n_lag):
            input_window = ts_scaled_imputed[k : k + n_lag, :]
            target_val_scaled = ts_scaled_imputed[k + n_lag, 0]
            X_train_flat_list.append(np.nan_to_num(input_window, nan=0.0).flatten()); y_train_list_scaled.append(target_val_scaled)
        if not X_train_flat_list: return y_pred_unscaled_future
        X_train_flat_scaled = np.array(X_train_flat_list); y_train_scaled = np.array(y_train_list_scaled)
        model = RandomForestRegressor(
            n_estimators=n_estimators, random_state=rf_random_state, max_depth=max_depth,
            min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, n_jobs=1
        )
        model.fit(X_train_flat_scaled, y_train_scaled)
        last_known_sequence_scaled_2d = ts_scaled_imputed[-n_lag:, :]; y_pred_scaled_future_list = []
        current_input_sequence_scaled_2d = last_known_sequence_scaled_2d
        for _ in range(forecast_steps):
            current_input_imputed_2d = np.nan_to_num(current_input_sequence_scaled_2d, nan=0.0)
            current_input_flat_for_pred = current_input_imputed_2d.flatten().reshape(1, -1)
            next_pred_target_scaled = model.predict(current_input_flat_for_pred)[0]; y_pred_scaled_future_list.append(next_pred_target_scaled)
            next_features_scaled = np.zeros(n_features); next_features_scaled[0] = next_pred_target_scaled
            for feat_idx in range(1, n_features): next_features_scaled[feat_idx] = current_input_imputed_2d[-1, feat_idx]
            new_input_sequence_2d = np.roll(current_input_imputed_2d, -1, axis=0); new_input_sequence_2d[-1, :] = next_features_scaled
            current_input_sequence_scaled_2d = new_input_sequence_2d
        y_pred_scaled_future = np.array(y_pred_scaled_future_list); dummy_pred_scaled_for_inverse = np.zeros((forecast_steps, n_features)); dummy_pred_scaled_for_inverse[:, 0] = y_pred_scaled_future
        if n_features > 1:
            for feat_idx in range(1, n_features):
                 last_known_vals = last_known_sequence_scaled_2d[:, feat_idx]
                 dummy_pred_scaled_for_inverse[:, feat_idx] = np.nanmean(last_known_vals) if not np.isnan(last_known_vals).all() else 0.0
        y_pred_unscaled_full = scaler.inverse_transform(dummy_pred_scaled_for_inverse); y_pred_unscaled_future = y_pred_unscaled_full[:, 0]
        y_pred_unscaled_future[~np.isfinite(y_pred_unscaled_future)] = np.nan
    except Exception: pass
    y_pred_unscaled_future = np.maximum(0, y_pred_unscaled_future)
    return y_pred_unscaled_future

In [None]:
# --- FUNGSI UTAMA PER MODEL (EVALUASI & FUTURE) ---
# Modifikasi run_forecast_per_pixel_eval untuk menangani output baru dari forecast_arima
# dan meneruskan hyperparams ke model ML
def run_forecast_per_pixel_eval(args):
    """
    Menjalankan fungsi peramalan evaluasi untuk satu piksel.
    Menerima semua argumen yang diperlukan termasuk hyperparameter model.
    """
    # Args: i, j, model_func, ts_data_input, n_lag, forecast_steps, ts_test_obs_target, n_features, ljung_box_lags_config, model_hyperparams
    i, j, model_func, ts_data_input, n_lag, forecast_steps, ts_test_obs_target, n_features, ljung_box_lags_cfg, model_hyperparams_dict = args
    y_pred, rmse, r2 = np.full(forecast_steps, np.nan), np.nan, np.nan
    ljung_box_p_val = np.nan
    arima_order_or_d_used = None # Akan berisi tuple (p,d,q) untuk ARIMA, atau None untuk ML

    try:
        if model_func == forecast_arima: # Menggunakan pm.auto_arima
            # forecast_arima sekarang mengembalikan order_used (p,d,q)
            y_pred, rmse, r2, ljung_box_p_val, arima_order_or_d_used = model_func(
                ts_data_input, ts_test_obs_target, forecast_steps, ljung_box_lags_cfg
            )
        elif model_func in [forecast_lstm, forecast_ann, forecast_rf]:
             # Teruskan model_hyperparams_dict ke fungsi peramalan ML
             y_pred, rmse, r2, ljung_box_p_val, _ = model_func( # _ untuk scaler yang tidak digunakan di sini
                 ts_data_input, n_lag, forecast_steps, n_features, ljung_box_lags_cfg, model_hyperparams_dict
             )
        else: # Fungsi model tidak dikenal
            # logging.warning(f"Pixel ({i},{j}): Unknown model function {model_func}")
            return i, j, np.full(forecast_steps, np.nan), np.nan, np.nan, np.nan, None

        return i, j, y_pred, rmse, r2, ljung_box_p_val, arima_order_or_d_used

    except Exception as e_pixel:
        # logging.debug(f"Pixel ({i},{j}), Model {model_func.__name__}: Error in run_forecast_per_pixel_eval: {e_pixel}")
        return i, j, np.full(forecast_steps, np.nan), np.nan, np.nan, np.nan, None

def run_generic_forecast_eval(
    model_func: Callable, model_name: str, ds: xr.Dataset, feature_list: List[str], target_var: str,
    n_lag: int, forecast_steps: int, time_index: pd.DatetimeIndex, time_test: pd.DatetimeIndex,
    ny: int, nx: int, sample_coords: List[Tuple[int, int]], output_base_dir: str, ds_ref: xr.Dataset,
    # model_hyperparams argumen tidak lagi dibutuhkan di sini karena akan diambil dari global
    use_parallel: bool = True, n_jobs: int = -1
):
    """
    Menjalankan proses evaluasi peramalan generik untuk model tertentu.
    Mengambil hyperparameter dari variabel global HYPERPARAMS_LSTM, HYPERPARAMS_ANN, HYPERPARAMS_RF.
    """
    outdir = os.path.join(output_base_dir, f"evaluation_{model_name}"); os.makedirs(outdir, exist_ok=True)
    logging.info(f"--- Starting {model_name} EVALUATION (Target: {target_var}, Features: {feature_list}) ---")

    # Tentukan hyperparameter yang akan digunakan berdasarkan nama model
    current_model_hyperparams = None # Default untuk ARIMA
    if model_name == "LSTM":
        current_model_hyperparams = HYPERPARAMS_LSTM
        logging.info(f"Using Hyperparameters for LSTM: {current_model_hyperparams}")
    elif model_name == "ANN":
        current_model_hyperparams = HYPERPARAMS_ANN
        logging.info(f"Using Hyperparameters for ANN: {current_model_hyperparams}")
    elif model_name == "RF":
        current_model_hyperparams = HYPERPARAMS_RF
        logging.info(f"Using Hyperparameters for RF: {current_model_hyperparams}")

    if target_var not in feature_list: logging.error(f"Target var '{target_var}' not in features: {feature_list}"); return
    target_idx = feature_list.index(target_var); n_features = len(feature_list)
    forecast_stack = np.full((forecast_steps, ny, nx), np.nan)
    metrics_maps = {'rmse': np.full((ny, nx), np.nan), 'r2': np.full((ny, nx), np.nan), 'ljung_box_pvalue': np.full((ny, nx), np.nan)}
    arima_d_map = np.full((ny, nx), np.nan) if model_name == "ARIMA" else None

    try:
        all_feature_data = np.stack([ds[feat].values for feat in feature_list], axis=-1)
        logging.info(f"Stacked data shape for evaluation: {all_feature_data.shape}")
    except KeyError as ke: logging.error(f"Feature key error: {ke}"); return
    except Exception as stack_err: logging.error(f"Error stacking data: {stack_err}"); return

    n_time_total = all_feature_data.shape[0]; test_start_idx = n_time_total - forecast_steps; train_end_idx = test_start_idx
    logging.info(f"Preparing evaluation tasks for {ny*nx} pixels..."); tasks = []
    for i in range(ny):
        for j in range(nx):
            ts_multi_feature_pixel = all_feature_data[:, i, j, :]
            if np.isnan(ts_multi_feature_pixel[:, target_idx]).all(): continue
            ts_test_obs_target = ts_multi_feature_pixel[test_start_idx:, target_idx]

            if model_func == forecast_arima:
                ts_target_pixel_for_arima = ts_multi_feature_pixel[:, target_idx]
                if train_end_idx > 0: # Pastikan ada data training
                    ts_train_arima = ts_target_pixel_for_arima[:train_end_idx]
                    # auto_arima butuh lebih banyak data, misal minimal 20
                    if len(ts_train_arima[~np.isnan(ts_train_arima)]) >= 20 and not np.isnan(ts_test_obs_target).all():
                         # Argumen ke-10 adalah model_hyperparams, None untuk ARIMA
                         tasks.append((i, j, model_func, ts_train_arima, n_lag, forecast_steps, ts_test_obs_target, 1, LJUNG_BOX_LAGS_CONFIG, None))
            elif model_func in [forecast_lstm, forecast_ann, forecast_rf]:
                input_start_idx = 0; # Gunakan semua histori untuk input ML
                input_end_idx = train_end_idx + forecast_steps # Seluruh data yang dilihat model (train+test)
                ts_input_ml = ts_multi_feature_pixel[input_start_idx:input_end_idx, :]
                if ts_input_ml.shape[0] >= n_lag + forecast_steps: # Cukup data untuk membentuk sekuens
                     train_part_target_for_ml = ts_input_ml[:-forecast_steps, target_idx]
                     if np.sum(~np.isnan(train_part_target_for_ml)) >= n_lag +1 : # Cukup data non-NaN untuk training
                        spatial_feat_valid = True
                        if SPATIAL_FEATURE_NAME in feature_list:
                            spatial_feat_idx_in_ml = feature_list.index(SPATIAL_FEATURE_NAME)
                            spatial_train_part_ml = ts_input_ml[:-forecast_steps, spatial_feat_idx_in_ml]
                            if np.isnan(spatial_train_part_ml).all(): spatial_feat_valid = False
                        if spatial_feat_valid and not np.isnan(ts_test_obs_target).all():
                            # Teruskan current_model_hyperparams yang sudah ditentukan
                            tasks.append((i, j, model_func, ts_input_ml, n_lag, forecast_steps, ts_test_obs_target, n_features, LJUNG_BOX_LAGS_CONFIG, current_model_hyperparams))
    results = []
    if len(tasks) > 0 :
        if use_parallel:
            logging.info(f"Running {model_name} evaluation forecast in parallel (n_jobs={n_jobs}) for {len(tasks)} pixels...")
            if n_jobs == -1: actual_n_jobs = os.cpu_count() or 1
            else: actual_n_jobs = min(n_jobs, os.cpu_count() or 1)
            results = Parallel(n_jobs=actual_n_jobs, backend='loky', verbose=10)(delayed(run_forecast_per_pixel_eval)(task) for task in tasks)
        else:
            logging.info(f"Running {model_name} evaluation forecast sequentially for {len(tasks)} pixels...")
            results = [run_forecast_per_pixel_eval(task) for task in tqdm(tasks, desc=f"{model_name} Eval Pixels")]
    else: logging.warning(f"No valid evaluation tasks generated for {model_name}.")

    logging.info(f"Collecting evaluation results for {model_name}..."); valid_results_count = 0
    all_arima_orders_collected = [] # Untuk menyimpan semua order ARIMA jika diperlukan
    for res in results:
        if res:
            i_res, j_res, y_pred_res, rmse_res, r2_res, ljung_box_pvalue_res, arima_order_or_d = res
            if y_pred_res is not None and len(y_pred_res) == forecast_steps:
                forecast_stack[:, i_res, j_res] = y_pred_res
                if not np.isnan(y_pred_res).all(): valid_results_count += 1
            else: forecast_stack[:, i_res, j_res] = np.nan
            metrics_maps['rmse'][i_res, j_res] = rmse_res; metrics_maps['r2'][i_res, j_res] = r2_res
            metrics_maps['ljung_box_pvalue'][i_res, j_res] = ljung_box_pvalue_res
            if model_name == "ARIMA" and arima_order_or_d is not None:
                if isinstance(arima_order_or_d, tuple) and len(arima_order_or_d) == 3: # (p,d,q)
                    arima_d_map[i_res, j_res] = arima_order_or_d[1] # Simpan 'd'
                    all_arima_orders_collected.append({'y': i_res, 'x': j_res, 'order_p': arima_order_or_d[0], 'order_d': arima_order_or_d[1], 'order_q': arima_order_or_d[2]})
                # elif isinstance(arima_order_or_d, int) : # Jika hanya 'd' yang dikembalikan (versi lama)
                #      arima_d_map[i_res, j_res] = arima_order_or_d

    logging.info(f"Finished collecting results. Got valid predictions for {valid_results_count} out of {len(tasks)} processed pixels.")

    if model_name == "ARIMA" and all_arima_orders_collected:
        try:
            orders_df = pd.DataFrame(all_arima_orders_collected)
            orders_csv_path = os.path.join(outdir, "arima_orders_used.csv")
            orders_df.to_csv(orders_csv_path, index=False)
            logging.info(f"✅ ARIMA orders (p,d,q) saved to: {orders_csv_path}")
        except Exception as e_csv:
            logging.error(f"❌ Failed to save ARIMA orders to CSV: {e_csv}")

    logging.info(f"--- {model_name} Evaluation Forecasting Complete ---"); logging.info(f"--- Saving {model_name} Evaluation Outputs to {outdir} ---")
    plot_accuracy_maps(metrics_maps, outdir); save_evaluation_metrics_geotiff(metrics_maps, ds_ref, outdir)
    plot_sample_timeseries(ds, forecast_stack, time_index, time_test, n_lag, forecast_steps, sample_coords, outdir, target_var=target_var)
    save_animation(forecast_stack, time_test, outdir, model_name, var_name=f"Forecast_{target_var}", prefix="eval")
    save_forecast_geotiff(forecast_stack, ds_ref, time_test, outdir, var_name=f"Forecast_{target_var}", prefix="eval")
    save_forecast_netcdf(forecast_stack, metrics_maps, ds_ref, time_test, os.path.join(outdir, f"forecast_eval_{model_name}.nc"), target_var=target_var, is_future=False)
    if arima_d_map is not None and not np.isnan(arima_d_map).all():
        try:
            d_map_da = xr.DataArray(arima_d_map, coords={"y": ds_ref["y"], "x": ds_ref["x"]}, dims=("y", "x"), name="arima_d_order_auto", attrs={'long_name': 'ARIMA Differencing Order Used (auto_arima)'})
            if hasattr(ds_ref, 'rio') and ds_ref.rio.crs: d_map_da = d_map_da.rio.write_crs(ds_ref.rio.crs)
            if hasattr(ds_ref, 'rio') and ds_ref.rio.transform(): d_map_da = d_map_da.rio.write_transform(ds_ref.rio.transform())
            d_map_da = d_map_da.rio.set_spatial_dims(x_dim='x', y_dim='y')
            d_map_path = os.path.join(outdir, "arima_d_order_auto_map.tif")
            d_map_da.rio.to_raster(d_map_path, tiled=True, compress='LZW', num_threads='ALL_CPUS', nodata=np.nan, dtype='int8')
            logging.info(f"✅ ARIMA differencing order (d) map saved to: {d_map_path}")
        except Exception as e_dmap: logging.error(f"❌ Failed to save ARIMA d order map: {e_dmap}")
    logging.info(f"✅ {model_name} evaluation process completed and outputs saved to: {outdir}")

def run_forecast_per_pixel_future(args):
    # Args: i, j, model_func_future, ts_full_history, n_lag, forecast_steps, n_features, model_hyperparams
    i, j, model_func_future, ts_full_history, n_lag, forecast_steps, n_features, model_hyperparams_dict = args
    try:
        if model_func_future == forecast_arima_future:
            # ARIMA future tidak memerlukan hyperparams dict dari luar, auto_arima menangani sendiri
            y_pred_future = model_func_future(ts_full_history, n_lag, forecast_steps, n_features)
        else: # Untuk model ML, teruskan hyperparams_dict
            y_pred_future = model_func_future(ts_full_history, n_lag, forecast_steps, n_features, model_hyperparams_dict)
        return i, j, y_pred_future
    except Exception:
        return i, j, np.full(forecast_steps, np.nan)

def run_generic_future_forecast(
    model_func_future: Callable, model_name: str, ds: xr.Dataset, feature_list: List[str], target_var: str,
    n_lag: int, forecast_steps: int, time_future: pd.DatetimeIndex, ny: int, nx: int,
    output_base_dir: str, ds_ref: xr.Dataset,
    # model_hyperparams argumen tidak lagi jadi argumen di sini, diambil dari global
    use_parallel: bool = True, n_jobs: int = -1
):
    outdir = os.path.join(output_base_dir, f"future_{model_name}"); os.makedirs(outdir, exist_ok=True)
    logging.info(f"--- Starting {model_name} FUTURE FORECAST (Target: {target_var}, Features: {feature_list}) ---")

    current_model_hyperparams = None # Default untuk ARIMA
    if model_name == "LSTM":
        current_model_hyperparams = HYPERPARAMS_LSTM
        logging.info(f"Using Hyperparameters for LSTM Future: {current_model_hyperparams}")
    elif model_name == "ANN":
        current_model_hyperparams = HYPERPARAMS_ANN
        logging.info(f"Using Hyperparameters for ANN Future: {current_model_hyperparams}")
    elif model_name == "RF":
        current_model_hyperparams = HYPERPARAMS_RF
        logging.info(f"Using Hyperparameters for RF Future: {current_model_hyperparams}")

    if target_var not in feature_list: logging.error(f"Target var '{target_var}' not in features: {feature_list}"); return
    target_idx = feature_list.index(target_var); n_features = len(feature_list)
    future_forecast_stack = np.full((forecast_steps, ny, nx), np.nan)
    try:
        all_feature_data = np.stack([ds[feat].values for feat in feature_list], axis=-1)
        logging.info(f"Stacked data shape for future forecast: {all_feature_data.shape}")
    except KeyError as ke: logging.error(f"Feature key error: {ke}"); return
    except Exception as stack_err: logging.error(f"Error stacking data: {stack_err}"); return

    logging.info(f"Preparing future forecast tasks for {ny*nx} pixels..."); tasks = []
    for i in range(ny):
        for j in range(nx):
            ts_multi_feature_full_pixel = all_feature_data[:, i, j, :]
            if np.isnan(ts_multi_feature_full_pixel[:, target_idx]).all(): continue
            min_len_required = n_lag + 1 if model_func_future != forecast_arima_future else 20 # auto_arima butuh lebih banyak
            valid_target_data_count = np.sum(~np.isnan(ts_multi_feature_full_pixel[:, target_idx]))
            if valid_target_data_count >= min_len_required:
                 spatial_feat_idx = feature_list.index(SPATIAL_FEATURE_NAME) if SPATIAL_FEATURE_NAME in feature_list else -1
                 spatial_feat_valid_for_training = True
                 if spatial_feat_idx != -1:
                     if np.isnan(ts_multi_feature_full_pixel[:, spatial_feat_idx]).all(): spatial_feat_valid_for_training = False
                 if spatial_feat_valid_for_training:
                      # Teruskan current_model_hyperparams
                      tasks.append((i, j, model_func_future, ts_multi_feature_full_pixel, n_lag, forecast_steps, n_features, current_model_hyperparams))
    results = []
    if len(tasks) > 0 :
        if use_parallel:
            logging.info(f"Running {model_name} future forecast in parallel (n_jobs={n_jobs}) for {len(tasks)} pixels...")
            if n_jobs == -1: actual_n_jobs = os.cpu_count() or 1
            else: actual_n_jobs = min(n_jobs, os.cpu_count() or 1)
            results = Parallel(n_jobs=actual_n_jobs, backend='loky', verbose=10)(delayed(run_forecast_per_pixel_future)(task) for task in tasks)
        else:
            logging.info(f"Running {model_name} future forecast sequentially for {len(tasks)} pixels...")
            results = [run_forecast_per_pixel_future(task) for task in tqdm(tasks, desc=f"{model_name} Future Pixels")]
    else: logging.warning(f"No valid future forecast tasks generated for {model_name}.")

    logging.info(f"Collecting future forecast results for {model_name}..."); valid_results_count = 0
    for res in results:
        if res:
            i_res, j_res, y_pred_future_res = res
            if y_pred_future_res is not None and len(y_pred_future_res) == forecast_steps:
                future_forecast_stack[:, i_res, j_res] = y_pred_future_res
                if not np.isnan(y_pred_future_res).all(): valid_results_count +=1
            else: future_forecast_stack[:, i_res, j_res] = np.nan
    logging.info(f"Finished collecting results. Got valid future predictions for {valid_results_count} out of {len(tasks)} processed pixels.")
    logging.info(f"--- {model_name} Future Forecasting Complete ---"); logging.info(f"--- Saving {model_name} Future Forecast Outputs to {outdir} ---")
    save_animation(future_forecast_stack, time_future, outdir, model_name, var_name=f"Forecast_{target_var}", prefix="future")
    save_forecast_geotiff(future_forecast_stack, ds_ref, time_future, outdir, var_name=f"Forecast_{target_var}", prefix="future")
    save_forecast_netcdf(future_forecast_stack, None, ds_ref, time_future, os.path.join(outdir, f"forecast_future_{model_name}.nc"), target_var=target_var, is_future=True)
    logging.info(f"✅ {model_name} future forecast process completed and outputs saved to: {outdir}")

In [None]:
# --- MAIN EXECUTION BLOCK ---
def main():
    ds: Optional[xr.Dataset] = None
    try:
        logging.info(f"--- Memulai Proses Peramalan Spasial-Temporal (v3.1 - Tahap 4: Optimasi Hyperparameter) ---")
        logging.info(f"Base Directory: {os.path.abspath(BASE_DIR)}")
        logging.info(f"NetCDF Input: {NC_FILE}")
        logging.info(f"Target Variable: {TARGET_VARIABLE}, Target CRS: {TARGET_CRS}")
        logging.info(f"Forecast Steps: {FORECAST_STEPS}, N Lag (for ML): {N_LAG}")
        logging.info(f"ADF p-value Threshold: {ADF_P_THRESHOLD}, Max Differencing: {MAX_DIFFERENCING}")
        logging.info(f"Ljung-Box Lags Config: {LJUNG_BOX_LAGS_CONFIG}")
        logging.info(f"Hyperparameters LSTM: {HYPERPARAMS_LSTM}")
        logging.info(f"Hyperparameters ANN: {HYPERPARAMS_ANN}")
        logging.info(f"Hyperparameters RF: {HYPERPARAMS_RF}")


        logging.info("--- Memuat Data Target (NetCDF) ---")
        nc_path = os.path.join(BASE_DIR, NC_FILE)
        try:
            try: ds = xr.open_dataset(nc_path)
            except ValueError:
                 logging.warning("Gagal engine default saat membuka NetCDF, mencoba h5netcdf...")
                 try: ds = xr.open_dataset(nc_path, engine='h5netcdf')
                 except ImportError: raise ImportError("Engine 'h5netcdf' tidak ditemukan. Harap install.")
            ds.load()
            logging.info(f"Berhasil memuat NetCDF '{NC_FILE}'.")
            if hasattr(ds, 'rio'):
                try:
                    ds = ds.rio.set_crs(TARGET_CRS, inplace=True)
                    if 'x' in ds.coords and 'y' in ds.coords:
                        current_x_dim = ds.rio.x_dim
                        current_y_dim = ds.rio.y_dim
                        if current_x_dim not in ds.dims or current_y_dim not in ds.dims:
                             ds = ds.rio.set_spatial_dims(x_dim='x', y_dim='y', inplace=True)
                             logging.info(f"Dimensi spasial diatur ke x='{ds.rio.x_dim}', y='{ds.rio.y_dim}'.")
                    logging.info(f"CRS dataset diatur ke: {ds.rio.crs}")
                    if ds.rio.crs and 'x' in ds.coords and 'y' in ds.coords:
                        try:
                            if ds.rio.transform().is_identity or not ds.rio.transform():
                                ds = ds.rio.write_transform(inplace=True)
                                logging.info(f"Transform dataset dihitung ulang/ditulis: {ds.rio.transform()}")
                        except Exception as e_trans: logging.warning(f"Tidak dapat menulis/menghitung transform secara otomatis: {e_trans}")
                except Exception as e_crs: logging.error(f"❌ Gagal mengatur CRS '{TARGET_CRS}' pada dataset: {e_crs}")
            else: logging.warning("❌ Atribut 'rio' tidak ditemukan pada dataset. Tidak dapat mengatur CRS.")
        except FileNotFoundError: raise FileNotFoundError(f"File NetCDF tidak ditemukan: {nc_path}")
        except Exception as e_nc: raise IOError(f"Tidak dapat membuka atau memproses NetCDF '{NC_FILE}': {e_nc}")

        if TARGET_VARIABLE not in ds.variables: raise ValueError(f"Variabel target '{TARGET_VARIABLE}' tidak ditemukan.")
        required_dims = ['time', 'y', 'x'];
        if not all(d in ds.dims for d in required_dims): raise ValueError(f"Dimensi {required_dims} harus ada.")
        ny, nx = ds.sizes['y'], ds.sizes['x']; n_time = ds.sizes['time']
        logging.info(f"Data target dimuat: Var '{TARGET_VARIABLE}' shape (time={n_time}, y={ny}, x={nx}).")
        if FORECAST_STEPS <= 0: raise ValueError("FORECAST_STEPS > 0.")
        if n_time <= FORECAST_STEPS: raise ValueError(f"n_time ({n_time}) > FORECAST_STEPS ({FORECAST_STEPS}).")
        if n_time < 20: logging.warning(f"Data waktu ({n_time}) sangat pendek, hasil auto_arima mungkin tidak andal. Model ML juga mungkin terpengaruh.")

        try: time_index = pd.to_datetime(ds['time'].values)
        except Exception as time_err: raise ValueError(f"Gagal parse 'time': {time_err}")
        logging.info(f"Periode data historis: {time_index[0].strftime('%Y-%m-%d')} hingga {time_index[-1].strftime('%Y-%m-%d')}")
        test_start_index = n_time - FORECAST_STEPS; time_test = time_index[test_start_index:]
        logging.info(f"Periode evaluasi: {time_test[0].strftime('%Y-%m-%d')} hingga {time_test[-1].strftime('%Y-%m-%d')}")
        time_future = None
        try:
            inferred_freq = pd.infer_freq(time_index); freq = inferred_freq if inferred_freq else 'MS'
            logging.info(f"Frekuensi waktu digunakan untuk masa depan: {freq}")
            last_historical_date = time_index[-1]
            future_start_date = last_historical_date + pd.tseries.frequencies.to_offset(freq)
            time_future = pd.date_range(start=future_start_date, periods=FORECAST_STEPS, freq=freq)
            logging.info(f"Periode forecast masa depan: {time_future[0].strftime('%Y-%m-%d')} hingga {time_future[-1].strftime('%Y-%m-%d')}")
        except Exception as future_time_err: logging.warning(f"Gagal menentukan periode waktu masa depan: {future_time_err}. Forecast masa depan akan dilewati.")

        features_to_use = [TARGET_VARIABLE]
        landslide_data_path = os.path.join(BASE_DIR, LANDSLIDE_SHP_FILE)
        if os.path.exists(landslide_data_path):
            landslide_da = process_landslide_data(landslide_data_path, LANDSLIDE_DATE_COLUMN, ds, LANDSLIDE_FEATURE_NAME)
            if landslide_da is not None:
                ds[LANDSLIDE_FEATURE_NAME] = landslide_da; features_to_use.append(LANDSLIDE_FEATURE_NAME)
                logging.info(f"Fitur longsor '{LANDSLIDE_FEATURE_NAME}' ditambahkan.")
            else: logging.warning("Gagal memproses data longsor.")
        else: logging.warning(f"File data longsor tidak ditemukan di {landslide_data_path}.")
        try:
            spatial_feature_da = calculate_spatial_avg_lag1(ds[TARGET_VARIABLE], SPATIAL_FEATURE_NAME)
            ds[SPATIAL_FEATURE_NAME] = spatial_feature_da; features_to_use.append(SPATIAL_FEATURE_NAME)
            logging.info(f"Fitur spasial '{SPATIAL_FEATURE_NAME}' ditambahkan.")
        except Exception as e_spatial:
            logging.error(f"Gagal menghitung fitur spasial: {e_spatial}. Melanjutkan tanpa fitur spasial.")
            if SPATIAL_FEATURE_NAME in features_to_use: features_to_use.remove(SPATIAL_FEATURE_NAME)
        logging.info(f"Fitur akhir yang digunakan untuk model ML: {features_to_use}")

        run_arima_flag = True; run_lstm_flag = True; run_ann_flag = True; run_rf_flag = True
        if n_time < N_LAG + 1 + FORECAST_STEPS and any([run_lstm_flag, run_ann_flag, run_rf_flag]):
             logging.warning(f"Jumlah data waktu ({n_time}) mungkin tidak cukup untuk N_LAG ({N_LAG}) + FORECAST_STEPS ({FORECAST_STEPS}) + 1 untuk melatih model ML dengan benar.")
        run_actual_aggregation_only = False; run_evaluation = True; run_future = True
        parallel_processing = True; num_jobs = 10 # Sesuaikan dengan CPU Anda
        ds_ref = ds # ds_ref adalah dataset utama yang sudah diproses (termasuk CRS)

        if run_actual_aggregation_only:
            logging.info("===== MEMULAI MODE: HANYA AGREGRASI DATA AKTUAL =====")
            target_data = ds[TARGET_VARIABLE].values; actual_test_data = target_data[test_start_index:, :, :]
            if actual_test_data.size > 0:
                actual_agg_outdir = os.path.join(BASE_DIR, OUTPUT_DIR_ACTUAL_AGG); os.makedirs(actual_agg_outdir, exist_ok=True)
                logging.info(f"Melakukan agregasi (sum) data aktual '{TARGET_VARIABLE}'...")
                aggregated_actual_data = np.nansum(actual_test_data, axis=0)
                save_aggregated_geotiff(aggregated_actual_data, ds_ref, time_test, actual_agg_outdir, var_name=f"Actual_{TARGET_VARIABLE}", prefix="agg")
            else: logging.warning("Tidak ada data aktual pada periode tes untuk diagregasi.")
            logging.info("===== MODE AGREGRASI DATA AKTUAL SELESAI =====")
        else:
            if run_evaluation:
                logging.info("===== MEMULAI EVALUASI MODEL (Tahap 4) =====")
                eval_output_dir = os.path.join(BASE_DIR, OUTPUT_BASE_DIR_EVAL)
                if run_arima_flag: run_generic_forecast_eval(forecast_arima, "ARIMA", ds, [TARGET_VARIABLE], TARGET_VARIABLE, N_LAG, FORECAST_STEPS, time_index, time_test, ny, nx, SAMPLE_COORDS, eval_output_dir, ds_ref, use_parallel=parallel_processing, n_jobs=num_jobs)
                if run_lstm_flag: run_generic_forecast_eval(forecast_lstm, "LSTM", ds, features_to_use, TARGET_VARIABLE, N_LAG, FORECAST_STEPS, time_index, time_test, ny, nx, SAMPLE_COORDS, eval_output_dir, ds_ref, use_parallel=parallel_processing, n_jobs=num_jobs)
                if run_ann_flag: run_generic_forecast_eval(forecast_ann, "ANN", ds, features_to_use, TARGET_VARIABLE, N_LAG, FORECAST_STEPS, time_index, time_test, ny, nx, SAMPLE_COORDS, eval_output_dir, ds_ref, use_parallel=parallel_processing, n_jobs=num_jobs)
                if run_rf_flag: run_generic_forecast_eval(forecast_rf, "RF", ds, features_to_use, TARGET_VARIABLE, N_LAG, FORECAST_STEPS, time_index, time_test, ny, nx, SAMPLE_COORDS, eval_output_dir, ds_ref, use_parallel=parallel_processing, n_jobs=num_jobs)
                logging.info("===== EVALUASI MODEL SELESAI =====")
            else: logging.info("Melewati evaluasi model.")
            if run_future:
                if time_future is not None:
                    logging.info("===== MEMULAI FORECAST MASA DEPAN (Tahap 4) =====")
                    future_output_dir = os.path.join(BASE_DIR, OUTPUT_BASE_DIR_FUTURE)
                    if run_arima_flag: run_generic_future_forecast(forecast_arima_future, "ARIMA", ds, [TARGET_VARIABLE], TARGET_VARIABLE, N_LAG, FORECAST_STEPS, time_future, ny, nx, future_output_dir, ds_ref, use_parallel=parallel_processing, n_jobs=num_jobs)
                    if run_lstm_flag: run_generic_future_forecast(forecast_lstm_future, "LSTM", ds, features_to_use, TARGET_VARIABLE, N_LAG, FORECAST_STEPS, time_future, ny, nx, future_output_dir, ds_ref, use_parallel=parallel_processing, n_jobs=num_jobs)
                    if run_ann_flag: run_generic_future_forecast(forecast_ann_future, "ANN", ds, features_to_use, TARGET_VARIABLE, N_LAG, FORECAST_STEPS, time_future, ny, nx, future_output_dir, ds_ref, use_parallel=parallel_processing, n_jobs=num_jobs)
                    if run_rf_flag: run_generic_future_forecast(forecast_rf_future, "RF", ds, features_to_use, TARGET_VARIABLE, N_LAG, FORECAST_STEPS, time_future, ny, nx, future_output_dir, ds_ref, use_parallel=parallel_processing, n_jobs=num_jobs)
                    logging.info("===== FORECAST MASA DEPAN SELESAI =====")
                else: logging.warning("Melewati forecast masa depan karena gagal menentukan periode waktu masa depan.")
            else: logging.info("Melewati forecast masa depan.")
            if not run_evaluation and not run_future: logging.warning("Evaluasi dan forecast masa depan keduanya dinonaktifkan.")
        logging.info("--- Semua Proses yang Dipilih Selesai ---")
    except FileNotFoundError as fnf_err: logging.error(f"❌ File Tidak Ditemukan: {fnf_err}")
    except ValueError as ve: logging.error(f"❌ Error Konfigurasi/Data: {ve}")
    except ImportError as imp_err: logging.error(f"❌ Error Impor Pustaka: {imp_err}.")
    except IOError as io_err: logging.error(f"❌ Error IO: {io_err}")
    except Exception as e: logging.critical(f"❌ Error kritis tidak terduga: {e}", exc_info=True)
    finally:
        if ds is not None: ds.close(); logging.info("Dataset NetCDF ditutup.")
        try: tf.keras.backend.clear_session(); logging.info("Sesi backend TensorFlow dibersihkan.")
        except Exception: pass

# --- Titik Masuk Eksekusi ---
if __name__ == "__main__":
    main()

  ds = ds.rio.set_crs(TARGET_CRS, inplace=True)
  ny, nx = ds_template.dims['y'], ds_template.dims['x']
Rasterizing Months: 100%|███████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 468.94it/s]
[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done   5 tasks      | elapsed:    9.0s
[Parallel(n_jobs=10)]: Done  12 tasks      | elapsed:    9.1s
[Parallel(n_jobs=10)]: Done  21 tasks      | elapsed:    9.6s
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    9.8s
[Parallel(n_jobs=10)]: Done  41 tasks      | elapsed:   10.0s
[Parallel(n_jobs=10)]: Done  52 tasks      | elapsed:   10.2s
[Parallel(n_jobs=10)]: Done  65 tasks      | elapsed:   10.5s
[Parallel(n_jobs=10)]: Done  78 tasks      | elapsed:   10.8s
[Parallel(n_jobs=10)]: Done  93 tasks      | elapsed:   11.0s
[Parallel(n_jobs=10)]: Done 108 tasks      | elapsed:   11.3s
[Parallel(n_jobs=10)]: Done 125 tasks      | elapsed:   11.6s
[Parallel(n_jobs=