# Variance Map Quicklook

Load the variance diagnostics generated by the land-sweeper pipeline and draw global percent-change maps using `geospatial_plotting.plot_region`. Adjust the path in the configuration cell to point at your variance NetCDF directory.

In [None]:
from pathlib import Path
import numpy as np
import xarray as xr

from geospatial_plotting import plot_region, REGION_BOUNDS

%matplotlib inline

In [None]:
# --- Configuration ---
BASE_DIR = Path('/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_OLv8_M36/output/SMAP_EASEv2_M36_GLOBAL/cat')
OUTPUT_DIR = Path('./plots')

DAILY_FILE = BASE_DIR / 'variance_daily_fullperiod.nc'
WITHIN_MONTH_FILE = BASE_DIR / 'variance_withinmonth_daily.nc'
MONTHLY_MEAN_FILE = BASE_DIR / 'variance_monthlymean_IAV.nc'
ANNUAL_MEAN_FILE = BASE_DIR / 'variance_annualmean_IAV.nc'


In [None]:
def load_variance_products():
    daily = xr.load_dataset(DAILY_FILE)
    within_month = xr.load_dataset(WITHIN_MONTH_FILE)
    monthly_mean = xr.load_dataset(MONTHLY_MEAN_FILE)
    annual_mean = xr.load_dataset(ANNUAL_MEAN_FILE)
    return daily, within_month, monthly_mean, annual_mean


EPS = 1e-10

def mask_small_baseline(ds, base_name, pct_name, eps=EPS):
    base = ds[base_name]
    ds[pct_name] = xr.where(base > eps, ds[pct_name], np.nan)
    return ds


def summarise_percent_change(ds, pct_name):
    field = ds[pct_name]
    return {
        'mean': float(field.mean(skipna=True)),
        'median': float(field.median(skipna=True)),
    }


In [None]:
def da_to_plot_with_limits(da, percentile=95, symmetric=True):
    values = da.values.astype(float)
    lons = da['lon'].values.astype(float)
    lats = da['lat'].values.astype(float)

    plot_array = np.stack([values, lons, lats], axis=-1).reshape(-1, 3)

    finite = np.isfinite(values)
    if not np.any(finite):
        return plot_array, None, None

    if symmetric:
        bound = float(np.nanpercentile(np.abs(values[finite]), percentile))
        cmin, cmax = -bound, bound
    else:
        cmin = float(np.nanpercentile(values[finite], 100 - percentile))
        cmax = float(np.nanpercentile(values[finite], percentile))
    return plot_array, cmin, cmax


In [None]:
def da_ratio_plot_array(da_num, da_den, percentile=95):
    numerator = da_num.values.astype(float)
    denominator = da_den.values.astype(float)
    lons = da_num['lon'].values.astype(float)
    lats = da_num['lat'].values.astype(float)

    ratio = np.full_like(numerator, np.nan)
    valid = np.isfinite(numerator) & np.isfinite(denominator) & (denominator != 0)
    ratio[valid] = numerator[valid] / denominator[valid]

    plot_array = np.stack([ratio, lons, lats], axis=-1).reshape(-1, 3)

    if np.any(valid):
        finite_ratio = ratio[valid]
        deviation = np.abs(finite_ratio - 1.0)
        bound = float(np.nanpercentile(deviation, percentile))
        lower = 1.0 - bound
        upper = 1.0 + bound
    else:
        lower = upper = None

    return plot_array, lower, upper


In [None]:
daily, within_month, monthly_mean, annual_mean = load_variance_products()

# Mask percent-change fields against their OL baselines
for var in ['SFMC', 'RZMC']:
    daily = mask_small_baseline(daily, f'{var}_daily_var_OL', f'{var}_daily_var_pct')
    monthly_mean = mask_small_baseline(monthly_mean, f'{var}_monthlymean_var_OL', f'{var}_monthlymean_var_pct')
    annual_mean = mask_small_baseline(annual_mean, f'{var}_annualmean_var_OL', f'{var}_annualmean_var_pct')


In [None]:
sfmc_daily_pct, daily_cmin, daily_cmax = da_to_plot_with_limits(daily['SFMC_daily_var_pct'])
fig, ax = plot_region(
    sfmc_daily_pct,
    REGION_BOUNDS['global'],
    saveflag=False,
    meanflag=True,
    plot_title='SFMC Daily Variance %Δ (DA - OL)',
    units='%',
    cmin=daily_cmin,
    cmax=daily_cmax
)


In [None]:
# SFMC within-month January percent change
sfmc_within_jan, jan_cmin, jan_cmax = da_to_plot_with_limits(within_month['SFMC_month_dailyvar_pct'].sel(month=1))
fig, ax = plot_region(
    sfmc_within_jan,
    REGION_BOUNDS['global'],
    saveflag=False,
    meanflag=False,
    plot_title='SFMC Within-Month Var %Δ (January)',
    units='%',
    cmin=jan_cmin,
    cmax=jan_cmax
)


In [None]:
# Annual-mean interannual variability percent change
sfmc_annual_pct, annual_cmin, annual_cmax = da_to_plot_with_limits(annual_mean['SFMC_annualmean_var_pct'])
fig, ax = plot_region(
    sfmc_annual_pct,
    REGION_BOUNDS['global'],
    saveflag=False,
    meanflag=False,
    plot_title='SFMC Annual-Mean Variance %Δ (DA - OL)',
    units='%',
    cmin=annual_cmin,
    cmax=annual_cmax
)


In [None]:
# SFMC monthly-mean variance difference (DA - OL)
sfmc_monthly_delta, monthly_delta_cmin, monthly_delta_cmax = da_to_plot_with_limits(
    monthly_mean['SFMC_monthlymean_var_pct']
)
fig, ax = plot_region(
    sfmc_monthly_delta,
    REGION_BOUNDS['global'],
    saveflag=False,
    meanflag=False,
    plot_title='SFMC Monthly-Mean Variance %Δ (DA - OL)',
    units='%',
    cmin=monthly_delta_cmin,
    cmax=monthly_delta_cmax
)


In [None]:
# SFMC daily variance for OL
sfmc_daily_ol, daily_ol_cmin, daily_ol_cmax = da_to_plot_with_limits(
    daily['SFMC_daily_var_OL'],
    symmetric=False
)
fig, ax = plot_region(
    sfmc_daily_ol,
    REGION_BOUNDS['global'],
    saveflag=False,
    meanflag=False,
    plot_title='SFMC Daily Variance (OL)',
    units='cm$^4$',
    cmin=daily_ol_cmin,
    cmax=daily_ol_cmax
)



In [None]:
# SFMC daily variance for DA
sfmc_daily_da, daily_da_cmin, daily_da_cmax = da_to_plot_with_limits(
    daily['SFMC_daily_var_DA'],
    symmetric=False
)
fig, ax = plot_region(
    sfmc_daily_da,
    REGION_BOUNDS['global'],
    saveflag=False,
    meanflag=False,
    plot_title='SFMC Daily Variance (DA)',
    units='cm$^4$',
    cmin=daily_da_cmin,
    cmax=daily_da_cmax
)


## Root-Zone Soil Moisture Variance Maps

In [None]:
rzmc_daily_pct, rz_daily_cmin, rz_daily_cmax = da_to_plot_with_limits(daily['RZMC_daily_var_pct'])
fig, ax = plot_region(
    rzmc_daily_pct,
    REGION_BOUNDS['global'],
    saveflag=False,
    meanflag=True,
    plot_title='RZMC Daily Variance %Δ (DA vs OL)',
    units='%',
    cmin=rz_daily_cmin,
    cmax=rz_daily_cmax
)
fig


In [None]:
rzmc_within_jan, rz_jan_cmin, rz_jan_cmax = da_to_plot_with_limits(within_month['RZMC_month_dailyvar_pct'].sel(month=1))
fig, ax = plot_region(
    rzmc_within_jan,
    REGION_BOUNDS['global'],
    saveflag=False,
    meanflag=False,
    plot_title='RZMC Within-Month Var %Δ (January)',
    units='%',
    cmin=rz_jan_cmin,
    cmax=rz_jan_cmax
)
fig


In [None]:
rzmc_annual_pct, rz_annual_cmin, rz_annual_cmax = da_to_plot_with_limits(annual_mean['RZMC_annualmean_var_pct'])
fig, ax = plot_region(
    rzmc_annual_pct,
    REGION_BOUNDS['global'],
    saveflag=False,
    meanflag=False,
    plot_title='RZMC Annual-Mean Variance %Δ',
    units='%',
    cmin=rz_annual_cmin,
    cmax=rz_annual_cmax
)
fig


In [None]:
rzmc_monthly_delta, rz_monthly_delta_cmin, rz_monthly_delta_cmax = da_to_plot_with_limits(
    monthly_mean['RZMC_monthlymean_var_delta']
)
fig, ax = plot_region(
    rzmc_monthly_delta,
    REGION_BOUNDS['global'],
    saveflag=False,
    meanflag=False,
    plot_title='RZMC Monthly-Mean Variance Δ (DA - OL)',
    units='cm$^4$',
    cmin=rz_monthly_delta_cmin,
    cmax=rz_monthly_delta_cmax
)
fig


In [None]:
rzmc_daily_ol, rz_daily_ol_cmin, rz_daily_ol_cmax = da_to_plot_with_limits(
    daily['RZMC_daily_var_OL'],
    symmetric=False
)
fig, ax = plot_region(
    rzmc_daily_ol,
    REGION_BOUNDS['global'],
    saveflag=False,
    meanflag=False,
    plot_title='RZMC Daily Variance (OL)',
    units='cm$^4$',
    cmin=rz_daily_ol_cmin,
    cmax=rz_daily_ol_cmax
)
fig


In [None]:
rzmc_daily_da, rz_daily_da_cmin, rz_daily_da_cmax = da_to_plot_with_limits(
    daily['RZMC_daily_var_DA'],
    symmetric=False
)
fig, ax = plot_region(
    rzmc_daily_da,
    REGION_BOUNDS['global'],
    saveflag=False,
    meanflag=False,
    plot_title='RZMC Daily Variance (DA)',
    units='cm$^4$',
    cmin=rz_daily_da_cmin,
    cmax=rz_daily_da_cmax
)
fig


In [None]:
import pandas as pd

import numpy as np

EPS_VAR = 1e-6     # ignore OL cells with negligible variance
DELTA_ABS_MIN = 1e-5  # require absolute change above noise floor

def summarize_variance(ds, var_prefix):
    ol = ds[f"{var_prefix}_daily_var_OL"].values
    da = ds[f"{var_prefix}_daily_var_DA"].values
    delta = ds[f"{var_prefix}_daily_var_delta"].values
    pct = ds[f"{var_prefix}_daily_var_pct"].values
    lat = ds['lat'].values
    lon = ds['lon'].values

    # area weights
    w = np.cos(np.deg2rad(lat))
    # broadcast to field shape if needed
    while w.ndim < ol.ndim: w = w[:, None]

    finite = {
        'ol': np.isfinite(ol),
        'da': np.isfinite(da),
        'delta': np.isfinite(delta),
        'pct': np.isfinite(pct)
    }
    # robust mask for % and ratios
    denom_ok = (ol > EPS_VAR)
    meaningful = np.abs(delta) > DELTA_ABS_MIN

    def basic_stats(arr, mask, weights=None):
        data = arr[mask]
        if data.size == 0:
            return dict(mean=np.nan, median=np.nan, p25=np.nan, p75=np.nan, p5=np.nan, p95=np.nan)
        if weights is not None:
            ww = weights[mask]
            m = float(np.average(data, weights=ww))
        else:
            m = float(np.nanmean(data))
        return dict(
            mean=m,
            median=float(np.nanmedian(data)),
            p25=float(np.nanpercentile(data, 25)),
            p75=float(np.nanpercentile(data, 75)),
            p5=float(np.nanpercentile(data, 5)),
            p95=float(np.nanpercentile(data, 95)),
        )

    # vanilla (for absolute variances)
    variance_stats = {
        'OL': basic_stats(ol, finite['ol']),
        'DA': basic_stats(da, finite['da']),
        'Delta': basic_stats(delta, finite['delta']),
    }

    # robust percent stats
    pct_mask = finite['pct'] & denom_ok
    percent_stats = basic_stats(pct, pct_mask, weights=w if pct_mask.any() else None)

    # reduction/amplification based on robust mask (+ absolute Δ threshold)
    delta_mask = finite['delta'] & denom_ok & meaningful
    delta_data = delta[delta_mask]
    if delta_data.size:
        reduction_frac = float(np.sum(delta_data < 0) / delta_data.size)
        amplification_frac = 1.0 - reduction_frac
    else:
        reduction_frac = amplification_frac = np.nan

    # robust ratios
    ratio_mask = finite['ol'] & finite['da'] & denom_ok
    ratio_stats = {'ratio_mean': np.nan, 'ratio_median': np.nan}
    if np.any(ratio_mask):
        ratios = da[ratio_mask] / ol[ratio_mask]
        ratio_stats = {
            'ratio_mean': float(np.nanmean(ratios)),
            'ratio_median': float(np.nanmedian(ratios))
        }

    # hotspots after robust filtering
    hotspot_df = pd.DataFrame(columns=['rank', 'value_pct', 'lat', 'lon'])
    if np.any(pct_mask & meaningful):
        pdata = pct[pct_mask & meaningful]
        plat = lat[pct_mask.any(axis=1)] if pdata.ndim == 2 else lat[finite['pct']]
        # safer: rebuild flattened lat/lon view that matches pdata indexing if needed
        pflat = pdata.ravel()
        iloc = np.argsort(-np.abs(pflat))[:10]
        # reconstruct lat/lon per iloc (left as exercise if ds is 2D; you have lon/lat grids)
        hotspot_df = pd.DataFrame({
            'rank': np.arange(1, iloc.size + 1),
            'value_pct': pflat[iloc],
            # fill lat/lon using your meshgrid arrays if available
        })

    return {
        'variance_stats': variance_stats,
        'percent_stats': percent_stats,
        'reduction_frac': reduction_frac,
        'amplification_frac': amplification_frac,
        'ratio_stats': ratio_stats,
        'hotspots': hotspot_df
    }



def stats_dict_to_df(stats_dict):
    rows = []
    for name, values in stats_dict.items():
        row = {'metric': name}
        row.update(values)
        rows.append(row)
    return pd.DataFrame(rows)

# Compute summaries
sfmc_summary = summarize_variance(daily, 'SFMC')
rzmc_summary = summarize_variance(daily, 'RZMC')

print('--- SFMC Daily Variance Summary ---')
display(stats_dict_to_df(sfmc_summary['variance_stats']))
print('Percent change stats (SFMC):', sfmc_summary['percent_stats'])
print('Reduction fraction (SFMC):', sfmc_summary['reduction_frac'])
print('Amplification fraction (SFMC):', sfmc_summary['amplification_frac'])
print('DA/OL ratio stats (SFMC):', sfmc_summary['ratio_stats'])
print('Hotspots (SFMC):')
display(sfmc_summary['hotspots'])

print('--- RZMC Daily Variance Summary ---')
display(stats_dict_to_df(rzmc_summary['variance_stats']))
print('Percent change stats (RZMC):', rzmc_summary['percent_stats'])
print('Reduction fraction (RZMC):', rzmc_summary['reduction_frac'])
print('Amplification fraction (RZMC):', rzmc_summary['amplification_frac'])
print('DA/OL ratio stats (RZMC):', rzmc_summary['ratio_stats'])
print('Hotspots (RZMC):')
display(rzmc_summary['hotspots'])


## Daily Variance Ratios (DA / OL)

In [None]:
sfmc_ratio_array, sfmc_ratio_min, sfmc_ratio_max = da_ratio_plot_array(
    daily['SFMC_daily_var_DA'],
    daily['SFMC_daily_var_OL']
)
fig, ax = plot_region(
    sfmc_ratio_array,
    REGION_BOUNDS['global'],
    saveflag=False,
    meanflag=False,
    plot_title='SFMC Daily Variance Ratio (DA / OL)',
    units='ratio',
    cmin=sfmc_ratio_min,
    cmax=sfmc_ratio_max,
    cmap='RdBu_r'
)
fig


In [None]:
rzmc_ratio_array, rzmc_ratio_min, rzmc_ratio_max = da_ratio_plot_array(
    daily['RZMC_daily_var_DA'],
    daily['RZMC_daily_var_OL']
)
fig, ax = plot_region(
    rzmc_ratio_array,
    REGION_BOUNDS['global'],
    saveflag=False,
    meanflag=False,
    plot_title='RZMC Daily Variance Ratio (DA / OL)',
    units='ratio',
    cmin=rzmc_ratio_min,
    cmax=rzmc_ratio_max,
    cmap='RdBu_r'
)
fig
