In [1]:
#Validate LSP metrics obtained from MODIS-EVI (MOD13Q1.061) using standard LSP data (MCD12Q2). 
# MODIS-EVI is still preferred to (and only validated using MCD12Q2) because of 
# the superior resolution (250 m vs 500 m) 
# r2 calculated on annual rasters, mae and pbias calculated on trend rasters (sen's slope)
# filter scaled raster between 0 and 365 doy 


import os
import numpy as np
import matplotlib.pyplot as plt
import rasterio as rio
from rasterio.enums import Resampling
import rioxarray as rxr
import xarray as xr
from scipy.stats import spearmanr
import xskillscore as xs
import geopandas as gpd

In [3]:
base_path = r"../Data/Trend_Rasters"
lsp_metrics = ['sos', 'eos', 'pos']
roi_gdf = gpd.read_file(r"../Data/roi_nepal/nepal_actual_roi.shp")
roi = roi_gdf.to_crs("EPSG:4326")

for metric in lsp_metrics:
    #load pair rasters
    mod_raster = rxr.open_rasterio(os.path.join(base_path, f"mod_{metric}_mk_significant.tif"), masked = True)
    mcd_raster = rxr.open_rasterio(os.path.join(base_path, f"mcd_{metric}_mk_significant.tif"), masked = True)

    #reproject mod_sos to mcd_sos resolution
    mod_raster = mod_raster.rio.reproject_match(mcd_raster, resampling=Resampling.bilinear)
    
    #clip to nepal's actual roi
    mod_raster = mod_raster.rio.clip(roi.geometry, roi.crs, drop = True)
    mcd_raster = mcd_raster.rio.clip(roi.geometry, roi.crs, drop = True)

    mcd_np = mcd_raster.values.flatten()
    mod_np = mod_raster.values.flatten()
    combined = np.vstack([mcd_np, mod_np])

    # Identify and remove NaNs from both arrays simultaneously
    # This ensures that corresponding pixels are kept or discarded together
    nan_mask = np.isnan(combined).any(axis=0)
    arr1_obs = mod_np[~nan_mask]
    arr2_standard = mcd_np[~nan_mask]

    spearman_rho, p_value = spearmanr(arr1_obs, arr2_standard)
    mae = np.mean(np.abs(arr1_obs - arr2_standard))
    sum_diff = np.sum(arr1_obs - arr2_standard)
    sum_observed = np.sum(arr1_obs)

    print(f"Spearman r ({metric}):", spearman_rho.round(3))
    print(f"MAE ({metric}):", mae.round(3))
    print(f"pBias ({metric}):", (sum_diff / sum_observed).round(3))
    print("\n")

Spearman r (sos): 0.571
MAE (sos): 0.954
pBias (sos): 0.44


Spearman r (eos): 0.686
MAE (eos): 0.788
pBias (eos): 0.258


Spearman r (pos): 0.67
MAE (pos): 0.002
pBias (pos): 0.168




In [None]:
# import seaborn as sns
# import matplotlib.pyplot as plt
# from scipy.stats import gaussian_kde
# # Set up the aesthetic style
# sns.set_style("whitegrid")
# sns.set_context("notebook", font_scale=1.2)
# # Create figure with subplots
# fig = plt.figure(figsize=(14, 6))
# gs = fig.add_gridspec(1, 2, width_ratios=[2, 1], hspace=0.05, wspace=0.3)
# # Main scatter plot with density coloring
# ax1 = fig.add_subplot(gs[0])
# # Calculate point density for color mapping
# xy = np.vstack([clean_arr1, clean_arr2])
# z = gaussian_kde(xy)(xy)
# # Create scatter plot with density-based coloring
# scatter = ax1.scatter(clean_arr1, clean_arr2, c=z, s=20, alpha=0.6, 
#                      cmap='viridis', edgecolors='none', rasterized=True)
# # Add 1:1 reference line
# min_val = min(clean_arr1.min(), clean_arr2.min())
# max_val = max(clean_arr1.max(), clean_arr2.max())
# ax1.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, 
#          alpha=0.7, label='1:1 Line', zorder=5)
# # Add regression line
# from scipy.stats import linregress
# slope, intercept, r_value, p_value_reg, std_err = linregress(clean_arr1, clean_arr2)
# line_x = np.array([clean_arr1.min(), clean_arr1.max()])
# line_y = slope * line_x + intercept
# ax1.plot(line_x, line_y, 'b-', linewidth=2.5, alpha=0.8, 
#          label=f'Regression Line (y={slope:.2f}x+{intercept:.2f})', zorder=4)
# # Add colorbar
# cbar = plt.colorbar(scatter, ax=ax1, pad=0.02)
# cbar.set_label('Point Density', rotation=270, labelpad=20, fontsize=11)
# # Labels and title
# ax1.set_xlabel('MCD12Q2 Standard Product\nSOS Trend (days/year)', fontsize=12, fontweight='bold')
# ax1.set_ylabel('MOD13Q1 EVI Product\nSOS Trend (days/year)', fontsize=12, fontweight='bold')
# ax1.set_title('Spearman Correlation: Trend Values Comparison', fontsize=14, fontweight='bold', pad=15)
# # Add statistics text box
# stats_text = f'''Statistics:
# Spearman œÅ = {spearman_rho:.4f}
# p-value < 0.001
# MAE = {np.mean(diff_value):.4f} days/yr
# R_sdev = {r_sdev:.4f}
# n = {len(clean_arr1):,} pixels'''
# ax1.text(0.05, 0.95, stats_text, transform=ax1.transAxes,
#          fontsize=10, verticalalignment='top',
#          bbox=dict(boxstyle='round', facecolor='white', alpha=0.8, edgecolor='gray'))
# ax1.legend(loc='lower right', framealpha=0.9, fontsize=9)
# ax1.grid(True, alpha=0.3, linestyle='--')
# # Residual plot
# ax2 = fig.add_subplot(gs[1])
# residuals = clean_arr2 - clean_arr1
# sns.kdeplot(y=residuals, ax=ax2, fill=True, color='steelblue', alpha=0.6)
# ax2.axhline(y=0, color='red', linestyle='--', linewidth=2, alpha=0.7, label='Zero Residual')
# ax2.axhline(y=residuals.mean(), color='green', linestyle=':', linewidth=2, 
#             alpha=0.7, label=f'Mean: {residuals.mean():.3f}')
# ax2.set_ylabel('Residuals (EVI - Standard)\n(days/year)', fontsize=11, fontweight='bold')
# ax2.set_xlabel('Density', fontsize=11, fontweight='bold')
# ax2.set_title('Residual Distribution', fontsize=12, fontweight='bold', pad=10)
# ax2.legend(loc='best', framealpha=0.9, fontsize=8)
# ax2.grid(True, alpha=0.3, axis='y', linestyle='--')
# # Add residual statistics
# residual_stats = f'''Residuals:
# Mean: {residuals.mean():.4f}
# Std: {residuals.std():.4f}
# Median: {np.median(residuals):.4f}'''
# ax2.text(0.05, 0.05, residual_stats, transform=ax2.transAxes,
#          fontsize=8, verticalalignment='bottom',
#          bbox=dict(boxstyle='round', facecolor='white', alpha=0.8, edgecolor='gray'))
# plt.suptitle('Validation: MODIS-EVI (MOD13Q1) vs Standard LSP (MCD12Q2)', 
#              fontsize=15, fontweight='bold', y=0.98)
# plt.tight_layout()
# plt.show()