In [82]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter
from scipy import stats
import statsmodels.api as sm
from statsmodels.tsa.stattools import grangercausalitytests

import warnings
warnings.filterwarnings('ignore')

# Features

In [83]:
era5_features = [
        # Temperature variables
        'temperature_2m',                       # Air temperature
        'temperature_2m_min',                   # Daily minimum air temperature
        'temperature_2m_max',                   # Daily maximum air temperature
        'soil_temperature_level_1',             # Topsoil temperature (0-7 cm)
        'soil_temperature_level_2',             # Soil temperature (7-28 cm)
        
        # Moisture variables
        'volumetric_soil_water_layer_1',        # Topsoil moisture content
        'volumetric_soil_water_layer_2',        # Soil moisture (7-28 cm)
        'volumetric_soil_water_layer_3',        # Soil moisture (28-100 cm)
        'total_precipitation_sum',              # Total rainfall and snow
        'dewpoint_temperature_2m',              # Air humidity indicator
        
        # Radiation and energy variables
        'surface_solar_radiation_downwards_sum', # Solar radiation at surface
        'surface_net_solar_radiation_sum',       # Net solar radiation at surface
        
        # Evaporation and water cycle
        'total_evaporation_sum',                 # Actual evaporation
        
        # Wind variables
        'u_component_of_wind_10m',               # East-west wind component
        'v_component_of_wind_10m'                # North-south wind component
    ]

openweather_features = [
        'temp',         # Current temperature (C or K depending on units)
        'feels_like',   # Perceived temperature considering humidity and wind (C or K)
        'temp_min',     # Minimum temperature at the moment (C or K)
        'temp_max',     # Maximum temperature at the moment (C or K)
        'pressure',     # Atmospheric pressure at sea level (hPa)
        'humidity',     # Humidity percentage (%)
        'wind_speed',   # Wind speed (meter/sec)
        'wind_deg',     # Wind direction in degrees (0–360)
        'rain_1h',      # Rain volume for the last 1 hour (mm)
        'rain_3h',      # Rain volume for the last 3 hours (mm)
        'clouds_all'    # Cloudiness percentage (%)
    ]

chirts_features = [
        'heat_index',
        'maximum_temperature',
        'minimum_temperature',
        'relative_humidity',
        'saturation_vapor_pressure',
        'vapor_pressure_deficit',
    ]

power_features = [
        'T2M',           # MERRA-2 Temperature at 2 Meters (C)
        'T2MDEW',        # MERRA-2 Dew/Frost Point at 2 Meters (C)
        'T2MWET',        # MERRA-2 Wet Bulb Temperature at 2 Meters (C)
        'TS',            # MERRA-2 Earth Skin Temperature (C)
        'T2M_RANGE',     # MERRA-2 Temperature at 2 Meters Range (C)
        'T2M_MAX',       # MERRA-2 Temperature at 2 Meters Maximum (C)
        'T2M_MIN',       # MERRA-2 Temperature at 2 Meters Minimum (C)
        'PS',            # MERRA-2 Surface Pressure (kPa)
        'WS2M',          # MERRA-2 Wind Speed at 2 Meters (m/s)
        'WS2M_MAX',      # MERRA-2 Wind Speed at 2 Meters Maximum (m/s)
        'WS2M_MIN',      # MERRA-2 Wind Speed at 2 Meters Minimum (m/s)
        'GWETTOP',       # MERRA-2 Surface Soil Wetness (1)
        'GWETROOT'      # MERRA-2 Root Zone Soil Wetness (1)
    ]

selected_features = era5_features

# Config

In [84]:
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

climate_dir = "climate_timeseries/cleaned/"
vegetation_dir = "pandanwangi_timeseries/"
viz_dir = "visualization/"

# konfigurasi parameter
start_year = 2020
end_year = 2025
evi = True
kecamatan_names = ['warungkondang']
planting_month_group = 1

era5_datasets = ['era5_warungkondang.csv']
chirts_datasets = ['chirts_warungkondang.csv']
ow_datasets = ['OpenWeather_warungkondang.csv']
power_datasets = ['power_warungkondang.csv']

selected_dataset = era5_datasets

available_vegetation_datasets = ['warungkondang.csv']

selected_kec = kecamatan_names[0]

if evi:
    available_vegetation_datasets = ['evi_' + dataset for dataset in available_vegetation_datasets]

extract_statistics = ['mean', 'min', 'max', 'std']
statistic_corr = ['mean', 'min', 'max', 'std']

temporal_resolution = '5D'

# Cleaning Weather

In [85]:
# load seluruh data climate
def load_climate_data(climate_dir, datasets, kecamatan_names):
    climate_df = {}
    for i, dataset in enumerate(selected_dataset):
        try:
            curr = pd.read_csv(f"{climate_dir}/{dataset}")
            kec = kecamatan_names[min(i, len(kecamatan_names)-1)]
            climate_df[kec] = curr
        except Exception as e:
            print(f"File kecamatan {kec} tidak dapat dimuat: {e}")
    return climate_df

# process data cuaca
def climate_df_processing(df):
    df['datetime'] = pd.to_datetime(df['datetime'], format='%Y-%m-%d')
    df.set_index('datetime', inplace=True)
    
    return df[(df.index.year >= start_year) & (df.index.year <= end_year)]

# Cleaning Vegetation

In [105]:
start_year_grouping = 2022
end_year_grouping = 2023
outliers_threshold = 1.5

# load seluruh data vegetasi
def load_vegetation_data(vegetation_dir, datasets, kecamatan_names):
    vegetation_df = {}
    for i, dataset in enumerate(datasets):
        try:
            curr = pd.read_csv(f"{vegetation_dir}/{dataset}")
            kec = kecamatan_names[min(i, len(kecamatan_names)-1)]
            vegetation_df[kec] = curr
        except Exception as e:
            print(f"File vegetasi {kec} tidak dapat dimuat: {e}")
    return vegetation_df

def moving_average(array, window_size=6):
    return pd.Series(array).rolling(window=window_size, center=True, min_periods=1).mean().to_numpy()

def remove_outliers(array):
    array = np.array(array, dtype=float)
    q1 = np.percentile(array, 25)
    q3 = np.percentile(array, 75)
    iqr = q3 - q1
    lower = q1 - outliers_threshold * iqr
    upper = q3 + outliers_threshold * iqr

    mask = (array >= lower) & (array <= upper)
    cleaned = np.where(mask, array, np.nan)
    series = pd.Series(cleaned).interpolate(method='linear').ffill().bfill()
    
    return series.to_numpy()

def process_row(row):
    cleaned_row = remove_outliers(row.values)
    return pd.Series(cleaned_row, index=row.index)
    
def detect_planting_month(row, time_cols):
    evi_values = row[time_cols].values
    cleaned_evi = remove_outliers(evi_values)
    smoothed_evi = moving_average(cleaned_evi)
    dates = pd.to_datetime(time_cols, format='%Y%m%d')
    
    min_index = smoothed_evi.argmin()
    planting_date = dates[min_index]
    planting_month = planting_date.month
    
    if(planting_month > 6):
        return (planting_month % 6) + 1
    return planting_month

def vegetation_planttime_preprocessing(df, label='pandanwangi'):
    df = df.iloc[:, 2:]
    df = df[df.label == label]
    cluster_id = df['cluster_id']
    df = df.set_index('cluster_id')
    df = df.iloc[:, :-1]
    df = df.T
    indexes = df.index
    new_indexes = []
    for i in indexes:
        date = pd.to_datetime(i, format='%Y%m%d')
        new_indexes.append(date)
    df['datetime'] = new_indexes
    df = df.set_index('datetime')
    df = df.asfreq(temporal_resolution, method='nearest')
    
    df = df.interpolate()
    df = df.ffill()
    df = df.bfill()
    df = df[(df.index.year >= start_year_grouping) & (df.index.year <= end_year_grouping)]
    df.index = df.index.strftime('%Y%m%d')
    print(df)
    df = df.T
    df['cluster_id'] = cluster_id.values
    df = df.reset_index(drop=True)
    time_cols = [col for col in df.columns if col != 'cluster_id']
    df['planting_month'] = df.apply(lambda row: detect_planting_month(row, time_cols=time_cols), axis=1)
    return df[(df.index.year >= start_year_grouping) & (df.index.year <= end_year_grouping)]    

In [101]:
def vegetation_preprocessing(df, plant_time, month=1, grouping=True, label='pandanwangi'):
    df = df.iloc[:, 2:]
    df.iloc[:,:-2] = df.iloc[:,:-2].interpolate(method='linear', axis=1).ffill(axis=1).bfill(axis=1)
    df = df[df.label == label]
    if grouping:
        df = df.merge(plant_time, how='left', on='cluster_id')
        df = df[df['planting_month'] == month]
    df = df.set_index('cluster_id')
    if grouping:
        df = df.iloc[:, :-2]
    else:
        df = df.iloc[:, :-1]
    df = df.apply(lambda row: process_row(row), axis=1)

    # print(df)
    df = df.describe()
    df = df.T
    indexes = df.index
    new_indexes = []
    for i in indexes:
        date = pd.to_datetime(i, format='%Y%m%d')
        new_indexes.append(date)
    df['datetime'] = new_indexes
    df = df.set_index('datetime')
    # df = df.asfreq(temporal_resolution, method='nearest')
    df = df.resample(temporal_resolution).mean()

    df = df.interpolate(method='akima')
    df = df.ffill()
    df = df.bfill()
    return df

In [88]:
# load seluruh data vegetasi
def load_vegetation_data(vegetation_dir, datasets, kecamatan_names):
    vegetation_df = {}
    for i, dataset in enumerate(datasets):
        try:
            curr = pd.read_csv(f"{vegetation_dir}/{dataset}")
            kec = kecamatan_names[min(i, len(kecamatan_names)-1)]
            vegetation_df[kec] = curr
        except Exception as e:
            print(f"File vegetasi {kec} tidak dapat dimuat: {e}")
    return vegetation_df



# process data vegetasi
outliers_threshold = 1.5
def remove_outliers(array):
    array = np.array(array, dtype=float)
    q1 = np.percentile(array, 25)
    q3 = np.percentile(array, 75)
    iqr = q3 - q1
    lower = q1 - outliers_threshold * iqr
    upper = q3 + outliers_threshold * iqr
    # print(f'lower bound:{lower}')
    # print(f'upper bound:{upper}')
    mask = (array >= lower) & (array <= upper)

    cleaned = np.where(mask, array, np.nan)
    
    series = pd.Series(cleaned).interpolate(method='linear').ffill().bfill()
    
    return series.to_numpy()

def process_row(row):
    cleaned_row = remove_outliers(row.values)
    return pd.Series(cleaned_row, index=row.index)
    
def vegetation_preprocessing(df):
    df = df.iloc[:, 2:]
    df.iloc[:,:-2] = df.iloc[:,:-2].interpolate(method='linear', axis=1).ffill(axis=1).bfill(axis=1)
    df = df[df.label == label]
    df = df.set_index('cluster_id')
    df = df.apply(lambda row: process_row(row), axis=1)

    df = df.describe()
    df = df.T
    df = df[extract_statistics]
    indexes = df.index
    new_indexes = []
    for i in indexes:
        date = pd.to_datetime(i, format='%Y%m%d')
        new_indexes.append(date)
    df['datetime'] = new_indexes
    df = df.set_index('datetime')
    df = df.resample(temporal_resolution).mean()
        
    df = df.interpolate(method='akima')
    df = df.ffill()
    df = df.bfill()
    return df[(df.index.year >= start_year) & (df.index.year <= end_year)]
    
# def vegetation_preprocessing(df):
#     df = df.iloc[:,2:]
#     df = df[df.label == 'pandanwangi']
#     df = df.set_index('cluster_id') 
#     df = df.describe()
#     df = df.T # transpose urutan waktu menjadi baris
#     df = df[extract_statistics] # ambil fitur statistik yang diinginkan dari .describe()
    
#     indexes = df.index
#     new_indexes = []
#     print(indexes)
#     for i in indexes:
#         date = pd.to_datetime(i, format='%Y%m%d')
#         new_indexes.append(date)
    
#     df['datetime'] = new_indexes
#     df = df.set_index('datetime')
#     df = df.asfreq(temporal_resolution, method='nearest') # resample data, dengan space temporal menjadi per 5 hari dengan metode nilai terdekat
    
#     return df[(df.index.year >= start_year) & (df.index.year <= end_year)]




# Viz and Corr

In [102]:
# Visualize time series data
def plot_time_series(climate_df, vegetation_df, feature, stat):
    fig, ax1 = plt.subplots(figsize=(14, 6))
    
    # Plot climate data
    color = 'tab:blue'
    ax1.set_xlabel('Date')
    ax1.set_ylabel(feature, color=color)
    climate_series = climate_df[feature]
    ax1.plot(climate_series.index, climate_series.values, color=color, label=feature)
    ax1.tick_params(axis='y', labelcolor=color)
    
    # Create second y-axis for vegetation data
    ax2 = ax1.twinx()
    color = 'tab:red'
    ax2.set_ylabel(f'Vegetation {stat}', color=color)
    veg_series = vegetation_df[stat]
    ax2.plot(veg_series.index, veg_series.values, color=color, label=f'Vegetation {stat}')
    ax2.tick_params(axis='y', labelcolor=color)
    
    # Add title and legend
    plt.title(f'Time Series: {feature} vs Vegetation {stat}')
    fig.tight_layout()
    fig.legend(loc="upper left", bbox_to_anchor=(0.1, 0.9))
    plt.savefig(f'{viz_dir}time_series_{feature}_{stat}.png')
    plt.close()

# Visualize lag correlation
def plot_lag_correlation(lag_corr_df, vegetation_stat, climate_feature):
    subset = lag_corr_df[(lag_corr_df['statistic'] == vegetation_stat) & 
                         (lag_corr_df['climate_feature'] == climate_feature)]
    
    plt.figure(figsize=(10, 5))
    plt.plot(subset['lag'], subset['correlation'], marker='o')
    plt.axhline(y=0, color='r', linestyle='-', alpha=0.3)
    
    # Add significance threshold lines (assuming p<0.05)
    significant = subset[subset['p_value'] < 0.05]
    plt.scatter(significant['lag'], significant['correlation'], color='red', 
                s=80, label='p < 0.05', zorder=3)
    
    plt.title(f'Lag Correlation: {climate_feature} vs Vegetation {vegetation_stat}')
    plt.xlabel('Lag (Negative: Climate leads, Positive: Vegetation leads)')
    plt.ylabel('Correlation Coefficient')
    plt.grid(True, alpha=0.3)
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'{viz_dir}lag_correlation_{climate_feature}_{vegetation_stat}.png')
    plt.close()

# Visualize correlation heatmap
def plot_correlation_heatmap(lag_corr_df, lag=0):
    # Filter for specific lag
    lag_subset = lag_corr_df[lag_corr_df['lag'] == lag]
    
    # Prepare data for heatmap
    pivot_data = lag_subset.pivot_table(
        index='climate_feature', 
        columns='statistic', 
        values='correlation'
    )
    
    plt.figure(figsize=(12, 8))
    
    # Create heatmap
    sns.heatmap(pivot_data, annot=True, cmap='coolwarm', center=0, 
                vmin=-1, vmax=1, fmt='.2f')
    
    plt.title(f'Correlation Heatmap (Lag = {lag})')
    plt.tight_layout()
    plt.savefig(f'{viz_dir}correlation_heatmap_lag_{lag}.png')
    plt.close()
    
    # Also create p-value heatmap
    p_value_pivot = lag_subset.pivot_table(
        index='climate_feature', 
        columns='statistic', 
        values='p_value'
    )
    
    plt.figure(figsize=(12, 8))
    sns.heatmap(p_value_pivot < 0.05, annot=p_value_pivot, cmap='viridis', 
                fmt='.3f', cbar_kws={'label': 'p-value'})
    
    plt.title(f'P-values Heatmap (Lag = {lag})')
    plt.tight_layout()
    plt.savefig(f'{viz_dir}pvalue_heatmap_lag_{lag}.png')
    plt.close()


In [103]:
# function untuk mem-filter dataframe dengan metode savitzky-golay 
def smoothing_sg(df, window_length=16, polyorder=3):
    for col in ['mean', 'min', 'max']:
        filtered = savgol_filter(df[col], window_length=window_length, polyorder=polyorder)
        df[f'smoothed_{col}'] = filtered
    return df


# melakukan differencing pada data time series
def difference_df(df):
    return df.diff()
    
# mengecek stasioneritas pada data dengan Dicky-Fuller test dengan alpha 5%
def test_stationarity(df):
    results = {}
    for col in df.columns:
        print(df[col])
        adf_result = sm.tsa.stattools.adfuller(df[col].dropna())
        results[col] = {
            'ADF Statistic': adf_result[0],
            'p-value': adf_result[1],
            'Stationary': adf_result[1] < 0.05
        }
    return pd.DataFrame(results).T

# hitung korelasi dengan lag
def calculate_lagged_correlation(climate_df, vegetation_df, max_lag=30):
    vegetation_stats = statistic_corr
    
    lag_correlation = {
        'statistic': [],
        'climate_feature': [],
        'lag': [],
        'correlation': [],
        'p_value': []
    }
    
    for stat in vegetation_stats:
        if stat in vegetation_df.columns:
            veg_series = vegetation_df[stat]
            
            for feature in climate_df.columns:
                climate_series = climate_df[feature]
                
                intersecting_idx = veg_series.index.intersection(climate_series.index) # kedua dataset yang dibandingkan mempunyai index yang sama (time range yang sama)
                veg_adjusted = veg_series.loc[intersecting_idx]
                climate_adjusted = climate_series.loc[intersecting_idx]
                
                # hitung cross-correlation untuk lag 0 hingga max_lag (pada time series)
                for lag in range(max_lag+1):
                    x = climate_adjusted.iloc[:-lag].values if lag != 0 else climate_adjusted.values
                    y = veg_adjusted.iloc[lag:].values if lag != 0 else veg_adjusted.values
                    
                    corr, p_value = stats.pearsonr(x, y)
                    
                    lag_correlation['statistic'].append(stat)
                    lag_correlation['climate_feature'].append(feature)
                    lag_correlation['lag'].append(lag)
                    lag_correlation['correlation'].append(corr)
                    lag_correlation['p_value'].append(p_value)
    
    return pd.DataFrame(lag_correlation)

# granger causality
def granger_causality_test(climate_df, vegetation_df, max_lag=10):
    
    results = {
        'climate_feature': [],
        'vegetation_stat': [],
        'max_lag': [],
        'min_p_value': [],
        'causality_direction': []
    }
    
    for climate_feature in climate_df.columns:
        for veg_stat in vegetation_df.columns:
            if veg_stat in statistic_corr:
                
                intersecting_idx = climate_df.index.intersection(vegetation_df.index)

                climate_series = climate_df[climate_feature].loc[intersecting_idx]
                veg_series = vegetation_df[veg_stat].loc[intersecting_idx]
                
                # data yang akan dilakukan granger test
                data = pd.DataFrame({
                    'climate': climate_series,
                    'vegetation': veg_series
                })
                
                # Test if climate Granger-causes vegetation
                try:
                    climate_to_veg = grangercausalitytests(data[['vegetation', 'climate']], 
                                                          maxlag=max_lag, verbose=False)

                    # print("grangertest:")
                    # print(climate_to_veg)

                    min_p_climate_to_veg = min([climate_to_veg[lag][0]['ssr_chi2test'][1] 
                                             for lag in range(1, max_lag+1)])
                    
                    # Determine causality direction
                    if min_p_climate_to_veg < 0.05:
                        direction = 'granger-causal'
                        min_p = min_p_climate_to_veg
                    else:
                        direction = 'none'
                        min_p = min(min_p_climate_to_veg, min_p_veg_to_climate)
                    
                    results['climate_feature'].append(climate_feature)
                    results['vegetation_stat'].append(veg_stat)
                    results['max_lag'].append(max_lag)
                    results['min_p_value'].append(min_p)
                    results['causality_direction'].append(direction)
                
                except Exception as e:
                    print(f"Error in Granger test: {e}")
    
    return pd.DataFrame(results)


# Start Analysis

In [104]:
climate_df = load_climate_data(climate_dir, era5_datasets, kecamatan_names)
vegetation_df = load_vegetation_data(vegetation_dir, available_vegetation_datasets, kecamatan_names)

preprocessed_climate = {}
preprocessed_vegetation = {}

# lakukan preprocess pada setiap kecamatan
for kec in kecamatan_names:
    plant_time_df = vegetation_planttime_preprocessing(vegetation_df[kec])
    plant_time_df = plant_time_df[['cluster_id', 'planting_month']]
    
    preprocessed_climate[kec] = vegetation_preprocessing(vegetation_df[kec], plant_time_df_pw,1, grouping=True)
    preprocessed_vegetation[kec] = vegetation_preprocessing(vegetation_df[kec])
    preprocessed_vegetation[kec] = smoothing_sg(preprocessed_vegetation[kec])

# print(preprocessed_vegetation['warungkondang'].value_counts())

# uji stasioneritas
for kec in kecamatan_names:
    print(f"\nStationarity Test for Climate Data ({kec}):")
    climate_stationarity = test_stationarity(preprocessed_climate[kec])
    print(climate_stationarity)
    
    print(f"\nStationarity Test for Vegetation Data ({kec}):")
    # print(preprocessed_vegetation[kec])
    vegetation_stationarity = test_stationarity(preprocessed_vegetation[kec])
    print(vegetation_stationarity)

# differencing data
differenced_climate = {}
differenced_vegetation = {}

for kec in kecamatan_names:
    climate_stationary = test_stationarity(preprocessed_climate[kec])
    veg_stationary = test_stationarity(preprocessed_vegetation[kec])

    differenced_climate[kec] = difference_df(preprocessed_climate[kec])
    print(f"Applied differencing to climate data for {kec}")
    differenced_vegetation[kec] = difference_df(preprocessed_vegetation[kec])
    print(f"Applied differencing to vegetation data for {kec}")

# adjust data agar mempunyai time series yang sama
to_adjust = [differenced_climate, differenced_vegetation]
# to_adjust = [preprocessed_climate, preprocessed_vegetation]
adjusted_data = {}
for kec in kecamatan_names:
    intersecting_idx = to_adjust[0][kec].index.intersection(to_adjust[1][kec].index)
    adjusted_data[kec] = {
        'climate': to_adjust[0][kec].loc[intersecting_idx].interpolate().bfill().ffill(),
        'vegetation': to_adjust[1][kec].loc[intersecting_idx].interpolate().bfill().ffill()
    }

AttributeError: 'Index' object has no attribute 'year'

In [48]:
print("adjusted_data", adjusted_data)
# hitung korelasi dengan faktor lag hingga max_corr_lag
max_corr_lag = 365
results = {}
for kec in kecamatan_names:
    print(f"\nLagged correlations for {kec}...")
    lag_corr_df = calculate_lagged_correlation(
        adjusted_data[kec]['climate'], 
        adjusted_data[kec]['vegetation'], 
        max_lag=max_corr_lag
    )
    results[kec] = lag_corr_df

print("hasil result:")
print(results)
print(selected_kec)
# cari korelasi terkuat
for kec in kecamatan_names:
    lag_corr = results[kec]
    # ambil korelasi signifikan dan terkuat
    significant = lag_corr[lag_corr['p_value'] < 0.05]
    strongest = significant.loc[significant['correlation'].abs().sort_values(ascending=False).index]
    
    print(f"\nTop 20 strongest correlations for {kec}:")
    print(strongest.head(15))


adjusted_data {'warungkondang': {'climate':             temperature_2m  temperature_2m_min  temperature_2m_max  \
datetime                                                             
2020-01-05       -0.253285           -0.290606           -0.481770   
2020-01-10       -0.894642           -0.621436           -2.082504   
2020-01-15       -0.088300           -0.223561            0.441169   
2020-01-20        0.758240            0.786157            0.408493   
2020-01-25       -0.991496           -1.242667           -2.607253   
...                    ...                 ...                 ...   
2024-12-09       -0.278502           -0.246974           -0.065834   
2024-12-14       -1.216818           -0.485587           -1.814174   
2024-12-19        0.282064            0.870295           -1.167842   
2024-12-24       -0.549871           -0.603832           -1.739697   
2024-12-29       -0.197736            0.826865           -2.052348   

            soil_temperature_level_1  soil_te

ValueError: `x` and `y` must have length at least 2.

In [None]:
# Granger causality test
max_granger_lag = 30
for kec in kecamatan_names:
    print(f"\nGranger causality test for {kec}...")
    granger_results = granger_causality_test(
        adjusted_data[kec]['climate'],
        adjusted_data[kec]['vegetation'],
        max_lag=max_granger_lag,
    )
    
    # Filter for significant causal relationships
    significant_causality = granger_results[granger_results['min_p_value'] < 0.05]
    
    print(f"\nSignificant causal relationships for {kec}:")
    print(significant_causality)

In [None]:
# 1. Time series plots for top correlations
if len(significant) > 0:
    top_correlation = strongest.iloc[0]
    feature = top_correlation['climate_feature']
    stat = top_correlation['statistic']

    plot_time_series(
        adjusted_data[selected_kec]['climate'],
        adjusted_data[selected_kec]['vegetation'],
        feature, stat
    )

# 2. Lag correlation plots
for feature in chirts_features[:3]:  # Plot first 3 features for example
    for stat in ['mean', 'smoothed_mean']:
        plot_lag_correlation(results[selected_kec], stat, feature)

# 3. Correlation heatmaps at different lags
# for lag in [-10, -5, 0, 5, 10]:
#     plot_correlation_heatmap(results[selected_kec], lag)

print("\nAnalysis complete! Visualization files have been saved.")