In [68]:
#Trend analysis for each ecoregion of Nepal based on different SUs
#Input: 1. Trend Raster (all fitted and significant only)
#        2. DEM Raster (elevation, slope (degrees), aspect (degrees))

# Workflow steps : 0. SUs stratified by ecoregions > elevation > aspect > slope
#                 1. no of trend fitted pixels for each SU (>10 for valid)
#                 2. percentage of significant pixels for each SU (> %5 for valid)
#                 3. no of pixels with postive trend and negative trend
#                 4. Trend Assymetry Ratio: #n/#p (> 2 or <0.5 for valid)
#                 5. SUs that pass all criteria 
#                 6. Ecoregion trend (areal% and slope in both direction) derived from valid SUs

#starting with a simple code only considering sos  

In [69]:
# ECO_ID     ECO_NAME
# 81003     Eastern Himalayan alpine shrub and meadows
# 40115     Himalayan subtropical broadleaf forests
# 40301     Himalayan subtropical pine forests
# 40403     Western Himalayan broadleaf forests
# -9999     Rock and Ice
# 40401     Eastern Himalayan broadleaf forests
# 40166     Upper Gangetic Plains moist deciduous forests - assign value null at last for all variables
# 81021     Western Himalayan alpine shrub and Meadows
# 40701     Terai-Duar savanna and grasslands
# 40501     Eastern Himalayan subalpine conifer forests
# 40502     Western Himalayan subalpine conifer forests
# 40120     Lower Gangetic Plains moist deciduous forests - assign value null at last for all variables

In [70]:
#load libraries and data
import numpy as np  
import pandas as pd 
import matplotlib.pyplot as plt
import xarray as xr 
import rasterio as rio  
import rioxarray as rxr 
import os
import geopandas as gpd

ecoregion_rxr = rxr.open_rasterio(r"..\Data\Ecoregion_raster\ecoregions_raster.tif")

elev_rxr = rxr.open_rasterio(r"..\Data\DEM_Rasters\elevation.tif")
aspect_rxr = rxr.open_rasterio(r"..\Data\DEM_Rasters\aspect.tif")
slope_rxr = rxr.open_rasterio(r"..\Data\DEM_Rasters\slope.tif")

roi_gdf = gpd.read_file(r"../Data/roi_nepal/nepal_actual_roi.shp")
roi = roi_gdf.to_crs("EPSG:4326")

output_dir = r"../Data/Processed/Ecoregion_trends/"



In [71]:
# preprocess dem rasters, actual values to classes
# divide elevation, slope, aspect into classes

# Elevation classes: <1000: 1, 1000-2000: 2, 2000-3000: 3, 3000-4000: 4, >4000: 5
elev_class = xr.where(elev_rxr < 1000, 1, 
                      xr.where(elev_rxr < 2000, 2,
                               xr.where(elev_rxr < 3000, 3,
                                        xr.where(elev_rxr < 4000, 4, 5))))

# Slope classes: 0-2: 1, 2-15: 2, 15-30: 3, >30: 4              Reference: FAO(2006)
slope_class = xr.where(slope_rxr < 2, 1,
                       xr.where(slope_rxr < 15, 2,
                                xr.where(slope_rxr < 30, 3, 4)))

# Aspect classes: Northern (270-360, 0-90): 1, Southern (90-270): 2
aspect_class = xr.where((aspect_rxr >= 0) & (aspect_rxr <= 90), 1,
                        xr.where((aspect_rxr >= 270) & (aspect_rxr <= 360), 1,
                                 xr.where((aspect_rxr > 90) & (aspect_rxr < 270), 2, np.nan)))

In [72]:
lsp_metrics = ['sos', 'eos', 'los', 'pos']

# Initialize list to store pixel statistics for each metric
pixel_stats_list = []

for metric in lsp_metrics:
    trend_rxr = rxr.open_rasterio(r"..\Data\Trend_Rasters\mod_"+metric+"_mk_raw.tif")
    sig_trend_rxr = rxr.open_rasterio(r"..\Data\Trend_Rasters\mod_"+metric+"_mk_significant.tif")
    final_outdir = os.path.join(output_dir, f"{metric}")
    os.makedirs(final_outdir, exist_ok=True)
    
    
    #stack rasters to create a pd dataframe
    ref = sig_trend_rxr.rio.clip(roi.geometry, roi.crs, drop=True)

    raster_dict = {
        "ecoregion": ecoregion_rxr,
        "elevation": elev_class,
        "slope": slope_class,
        "aspect": aspect_class,
        "trend": trend_rxr,
        "sig_trend": sig_trend_rxr
    }

    aligned_rasters = []

    for name, raster in raster_dict.items():
        raster = raster.rio.write_crs("EPSG: 4326")
        reproj = raster.rio.reproject_match(ref)
        reproj.name = name
        reproj = reproj.squeeze('band', drop = True)
        aligned_rasters.append(reproj)

    stacked_xr = xr.merge(aligned_rasters)
    stacked_df = stacked_xr.to_dataframe().reset_index()
    
    #filter for select ecoregions
    select_ecoregions = [81003, 40115, 40301, 40403, 40401, 40166, 81021, 40701, 40501, 40502, 40120]
    stacked_df = stacked_df[stacked_df['ecoregion'].isin(select_ecoregions)]
    stacked_df1 = stacked_df[~((stacked_df['trend'] == -999) )]
    stacked_df2 = stacked_df[~((stacked_df['trend'] == -999) | (stacked_df['sig_trend'] == -999))]

    # Calculate statistics
    total_pixels = stacked_df.shape[0]
    total_trend_fitted = stacked_df1.shape[0]
    total_significant = stacked_df2.shape[0]
    
    trend_fitted_pct = (total_trend_fitted / total_pixels) * 100
    sig_from_fitted_pct = (total_significant / total_trend_fitted) * 100
    sig_from_total_pct = (total_significant / total_pixels) * 100
    
    # Print statistics (original behavior)
    print(f"\n{'='*50}")
    print(f"Statistics for {metric.upper()}")
    print(f"{'='*50}")
    print("total_pixels:", total_pixels)
    print("total trend fitted pixels:", total_trend_fitted)
    print("total significant pixels:", total_significant)
    print(f"trend fitted / total  %: {trend_fitted_pct:.2f}")
    print(f"significant / trend fitted  %: {sig_from_fitted_pct:.2f}")
    print(f"significant / total  %: {sig_from_total_pct:.2f}")
    
    # Store statistics for CSV
    pixel_stats_list.append({
        'lsp_metric': metric,
        'total_pixels': total_pixels,
        'trend_fitted_pixels': total_trend_fitted,
        'significant_pixels': total_significant,
        'trend_fitted_pct': round(trend_fitted_pct, 2),
        'sig_from_fitted_pct': round(sig_from_fitted_pct, 2),
        'sig_from_total_pct': round(sig_from_total_pct, 2)
    })

    stacked_df['trend'] = stacked_df['trend'].fillna(-999)

    su_stats = stacked_df.groupby(['ecoregion', 'elevation', 'slope', 'aspect']).agg(
        
        # 1. Trend Pixels
        n_trend_unfitted_count=('trend', lambda x: (x == -999).sum()),
        n_trend_fitted_count=('trend', lambda x: (x != -999).sum()),
        
        # 2. Significance Pixels
        n_insig_trend_count=('sig_trend', lambda x: (x == -999).sum()),
        n_sig_trend_count=('sig_trend', lambda x: ((x != -999) & (~x.isna())).sum()),
        
        # Positive and negative trends (for significant pixels only)
        positive_sig_trend_count=('sig_trend', lambda x: ((x > 0) & (x != -999)).sum()),
        negative_sig_trend_count=('sig_trend', lambda x: ((x < 0) & (x != -999)).sum()),
        
        # Mean values (excluding -999 and NaN)
        positive_sig_trend_mean=('sig_trend', lambda x: x[(x > 0) & (x != -999)].mean() if len(x[(x > 0) & (x != -999)]) > 0 else np.nan),
        negative_sig_trend_mean=('sig_trend', lambda x: x[(x < 0) & (x != -999)].mean() if len(x[(x < 0) & (x != -999)]) > 0 else np.nan),
        all_sig_trend_mean=('sig_trend', lambda x: x[(x != -999) & (~x.isna())].mean())
    ).reset_index()

    # Add derived metrics based on your workflow
    su_stats['percent_sig_pixels'] = (su_stats['n_sig_trend_count'] / su_stats['n_trend_fitted_count']) * 100
    su_stats['trend_asymmetry_ratio'] = su_stats['positive_sig_trend_count'] / su_stats['negative_sig_trend_count']


    #based on the set criteria we divide SUs into either 'valid' or 'invalid' category in lsp_change
    su_stats['lsp_change'] = np.where(
        (su_stats['n_trend_fitted_count'] > 10) &  # At least 10 fitted pixels
        (su_stats['percent_sig_pixels'] > 5) &      # At least 5% significant
        ((su_stats['trend_asymmetry_ratio'] > 2) | (su_stats['trend_asymmetry_ratio'] < 0.5)),  # Strong asymmetry
        1,                  #means yes or valid
        0                   #means no or invalid
    )

    su_stats.to_csv(os.path.join(final_outdir, "All_SU_Stats_"+metric+".csv"), index=False)

    #from this step we drop all SUs with invalid lsp_change
    # all % are calculated based on the ecoregions area (total pixels of that ecoregions)

    ecr_count = su_stats.groupby('ecoregion')[['n_trend_fitted_count', 'n_trend_unfitted_count']].sum().reset_index()
    ecr_count['total_pixels'] = ecr_count['n_trend_fitted_count'] + ecr_count['n_trend_unfitted_count']
    ecr_count = ecr_count.drop(columns=['n_trend_fitted_count', 'n_trend_unfitted_count'])

    su_filtered = (su_stats[su_stats['lsp_change'] == 1]).drop(columns=['lsp_change'])
    ecr_stats = su_filtered.groupby('ecoregion').agg(
        n_trend_fitted_count = ('n_trend_fitted_count', 'sum'),
        n_sig_trend_count=('n_sig_trend_count', 'sum'),
        positive_sig_trend_count=('positive_sig_trend_count', 'sum'),
        negative_sig_trend_count=('negative_sig_trend_count', 'sum'),
        positive_sig_trend_mean=('positive_sig_trend_mean', 'mean'),
        negative_sig_trend_mean=('negative_sig_trend_mean', 'mean'),
        all_sig_trend_mean=('all_sig_trend_mean', 'mean')
    )

    ecr_stats = pd.merge(ecr_stats, ecr_count, on='ecoregion', how = 'inner')    

    ecr_stats['percent_trend_fit_px'] = (ecr_stats['n_trend_fitted_count'] / ecr_stats['total_pixels']) * 100
    ecr_stats['percent_significant_valid_px'] = (ecr_stats['n_sig_trend_count'] / ecr_stats['total_pixels']) * 100
    ecr_stats['percent_positive_valid_px'] = (ecr_stats['positive_sig_trend_count'] / ecr_stats['total_pixels']) * 100
    ecr_stats['percent_negative_valid_px'] = (ecr_stats['negative_sig_trend_count'] / ecr_stats['total_pixels']) * 100
    ecr_stats['trend_asymmetry_ratio'] = ecr_stats['positive_sig_trend_count'] / ecr_stats['negative_sig_trend_count']

    #calculate net area % and net trend mean for ecoregions with notably asymmetric trend
    ecr_stats['net_area_percent'] = np.where(ecr_stats['trend_asymmetry_ratio'] > 2,
        ecr_stats['percent_positive_valid_px'] - ecr_stats['percent_negative_valid_px'],
        np.where(ecr_stats['trend_asymmetry_ratio'] < 0.5,
            ecr_stats['percent_positive_valid_px'] - ecr_stats['percent_negative_valid_px'],
            0
        )
    )
    
    ecr_stats = ecr_stats[['ecoregion','percent_trend_fit_px','percent_significant_valid_px','percent_positive_valid_px','percent_negative_valid_px','trend_asymmetry_ratio',
                'positive_sig_trend_mean','negative_sig_trend_mean','all_sig_trend_mean','net_area_percent']]
    ecr_stats.to_csv(os.path.join(final_outdir, "ecoregion_stats.csv"), index=False)

# Convert to DataFrame and save as CSV
pixel_stats_df = pd.DataFrame(pixel_stats_list)
pixel_stats_df.to_csv(os.path.join(output_dir, "pixel_statistics_summary.csv"), index=False)


  stacked_xr = xr.merge(aligned_rasters)



Statistics for SOS
total_pixels: 2569062
total trend fitted pixels: 2047814
total significant pixels: 159978
trend fitted / total  %: 79.71
significant / trend fitted  %: 7.81
significant / total  %: 6.23


  stacked_xr = xr.merge(aligned_rasters)



Statistics for EOS
total_pixels: 2569062
total trend fitted pixels: 2048897
total significant pixels: 155358
trend fitted / total  %: 79.75
significant / trend fitted  %: 7.58
significant / total  %: 6.05


  stacked_xr = xr.merge(aligned_rasters)



Statistics for LOS
total_pixels: 2569062
total trend fitted pixels: 2048897
total significant pixels: 191566
trend fitted / total  %: 79.75
significant / trend fitted  %: 9.35
significant / total  %: 7.46


  stacked_xr = xr.merge(aligned_rasters)



Statistics for POS
total_pixels: 2569062
total trend fitted pixels: 2048897
total significant pixels: 514601
trend fitted / total  %: 79.75
significant / trend fitted  %: 25.12
significant / total  %: 20.03


In [73]:
# # Topographic Driver Analysis - Which factors control LSP change direction?
# # For each ecoregion, test if Elevation, Slope, or Aspect significantly influence trend asymmetry

# import numpy as np
# import pandas as pd
# from scipy.stats import spearmanr, mannwhitneyu

# # Step 1: Create normalized asymmetry index (avoids division by zero)
# # Range: -1 (all positive trends) to +1 (all negative trends)
# su_stats['asymmetry_index'] = (
#     (su_stats['positive_sig_trend_count'] - su_stats['negative_sig_trend_count']) /
#     (su_stats['positive_sig_trend_count'] + su_stats['negative_sig_trend_count'])
# )

# # Step 2: Filter for valid SUs only
# valid_sus = su_stats[su_stats['lsp_change'] == 1].dropna(subset=['asymmetry_index'])

# # Step 3: Test each ecoregion
# driver_results = []

# for eco_id, group in valid_sus.groupby('ecoregion'):
#     if len(group) < 5:  # Skip if too few SUs
#         continue
    
#     # --- ELEVATION: Spearman correlation ---
#     rho_elev, p_elev = spearmanr(group['elevation'], group['asymmetry_index'])
#     is_elev_driver = (p_elev < 0.05) and (abs(rho_elev) > 0.1)
    
#     # --- SLOPE: Spearman correlation ---
#     rho_slope, p_slope = spearmanr(group['slope'], group['asymmetry_index'])
#     is_slope_driver = (p_slope < 0.05) and (abs(rho_slope) > 0.1)
    
#     # --- ASPECT: Mann-Whitney with directional effect size ---
#     north = group[group['aspect'] == 1]['asymmetry_index']
#     south = group[group['aspect'] == 2]['asymmetry_index']
    
#     if len(north) > 5 and len(south) > 5:
#         u_stat, p_aspect = mannwhitneyu(north, south)
        
#         # Calculate effect size r = |Z| / sqrt(N)
#         n1, n2 = len(north), len(south)
#         mu = n1 * n2 / 2
#         sigma = np.sqrt(n1 * n2 * (n1 + n2 + 1) / 12)
#         z_score = (u_stat - mu) / sigma
#         r_magnitude = abs(z_score) / np.sqrt(n1 + n2)
        
#         # Assign direction: + if North has higher asymmetry, - if South higher
#         r_aspect = r_magnitude if north.median() > south.median() else -r_magnitude
        
#         is_aspect_driver = (p_aspect < 0.05) and (abs(r_aspect) > 0.1)
#     else:
#         r_aspect, p_aspect, is_aspect_driver = 0, 1.0, False
    
#     # Store results
#     driver_results.append({
#         'ecoregion': eco_id,
#         'elev_r': round(rho_elev, 3),
#         'slope_r': round(rho_slope, 3),
#         'aspect_r': round(r_aspect, 3),
#         'elev_p': round(p_elev, 4),
#         'slope_p': round(p_slope, 4),
#         'aspect_p': round(p_aspect, 4),
#         'is_elev_driver': 'YES' if is_elev_driver else 'NO',
#         'is_slope_driver': 'YES' if is_slope_driver else 'NO',
#         'is_aspect_driver': 'YES' if is_aspect_driver else 'NO'
#     })

# # Step 4: Create results table
# drivers_df = pd.DataFrame(driver_results)
# print("\n=== Topographic Drivers of LSP Trend Direction ===")
# print("Criteria: p < 0.05 AND |r| > 0.1")
# print("\nInterpretation:")
# print("  + value = Higher class/North aspect → more positive trends")
# print("  - value = Higher class/South aspect → more negative trends\n")
# display(drivers_df)

# # Optional: Save results
# #drivers_df.to_csv("Topographic_Drivers.csv", index=False)