# Glambie anaylsis


## Setup


In [None]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
import glob
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns
import statsmodels.api as sm
from scipy import stats
from scipy.stats import pearsonr, spearmanr
import pymannkendall as mk
import warnings
import geopandas as gpd
import cartopy.crs as ccrs
import cartopy.feature as cfeature

warnings.filterwarnings("ignore", category=FutureWarning)

# Paths
input_dir = Path("input")
output_dir = Path("output")
input_rel = input_dir / "glambie_reference"
input_maps = input_dir / "maps"
input_datasets = input_dir / "input_datasets"
glambie_runs = input_dir / "glambie_runs"
output_sensitivity = output_dir / "sensitivity"
output_rel = output_dir / "relative_change"
output_dir_datasets = output_dir / "datasets"

# Set colors
colors_list = sns.color_palette(palette='colorblind', n_colors=18, desat=0.9)
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=colors_list)
# two colors (first two from seaborn colorblind)
blue, orange = colors_list[:2]   # first two colors

## Relative change calculations


In [None]:
glacier_mass_2000_path = input_rel / "glacier_mass_2000.csv"
calendar_years_path = input_rel / "calendar_years"

mass_2000 = pd.read_csv(str(glacier_mass_2000_path), sep=';')
file_to_mass_data = {}
for _, row in mass_2000.iterrows():
    file_to_mass_data[row['File']] = {
        'region': row['Region'],
        'mass': row['Mass'],
        'error': row['Error']
    }
annual_changes = {}

for csv_file in sorted(glob.glob(str(calendar_years_path / "*.csv"))):
    annual_changes[Path(csv_file).name] = pd.read_csv(csv_file)

In [None]:
# Calculate annual relative change with uncertainty propagation
results = []

for filename, df_changes in sorted(annual_changes.items()):
    matched_mass_data = file_to_mass_data[filename]
    current_mass = matched_mass_data['mass']
    current_mass_error = matched_mass_data['error']

    df_changes_sorted = df_changes.sort_values('start_dates').reset_index(drop=True)

    for idx, row in df_changes_sorted.iterrows():
        annual_change = row['combined_gt']
        annual_change_error = row['combined_gt_errors']

        relative_change = annual_change / current_mass

        # error propagation
        relative_change_error_term1 = (annual_change_error / annual_change) ** 2
        relative_change_error_term2 = (current_mass_error / current_mass) ** 2
        relative_change_error = abs(relative_change) * np.sqrt(relative_change_error_term1 + relative_change_error_term2)

        results.append({
            'region': matched_mass_data['region'],
            'year_start': int(row['start_dates']),
            'year_end': int(row['end_dates']),
            'glacier_mass_at_start': current_mass,
            'glacier_mass_error_at_start': current_mass_error,
            'annual_change_gt': annual_change,
            'annual_change_error': annual_change_error,
            'relative_change': relative_change,
            'relative_change_error': relative_change_error,
            'relative_change_pct': relative_change * 100,
            'relative_change_error_pct': relative_change_error * 100
        })

        current_mass = current_mass + annual_change
        current_mass_error = np.sqrt(current_mass_error**2 + annual_change_error**2)

results_df = pd.DataFrame(results)


### Combined relative change and mass evolution

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 8))
all_regions = sorted(results_df['region'].unique())
global_regions = [r for r in all_regions if r.lower() == 'global']
other_regions = [r for r in all_regions if r.lower() != 'global']

ax1 = axes[0]
for region in global_regions:
    region_data = results_df[results_df['region'] == region].sort_values('year_start')
    ax1.plot(region_data['year_start'], region_data['relative_change_pct'],
             marker='o', label=region, alpha=0.8, linewidth=2, color=blue)
    ax1.fill_between(region_data['year_start'],
                      region_data['relative_change_pct'] - region_data['relative_change_error_pct'],
                      region_data['relative_change_pct'] + region_data['relative_change_error_pct'],
                      alpha=0.2, color=blue)
ax1.set_xlabel('Year Start', fontsize=12)
ax1.set_ylabel('Relative Change (%)', fontsize=12)
ax1.set_title('Global Annual Relative Glacier Mass Change', fontsize=14)
ax1.legend(fontsize=10, loc='lower left')
ax1.grid(True, alpha=0.3)
ax1.axhline(y=0, color='k', linestyle='--', alpha=0.5)

ax2 = axes[1]
for region in other_regions:
    region_data = results_df[results_df['region'] == region].sort_values('year_start')
    ax2.plot(region_data['year_start'], region_data['relative_change_pct'],
             marker='o', label=region, alpha=0.8, linewidth=2)
    ax2.fill_between(region_data['year_start'],
                      region_data['relative_change_pct'] - region_data['relative_change_error_pct'],
                      region_data['relative_change_pct'] + region_data['relative_change_error_pct'],
                      alpha=0.15)
ax2.set_xlabel('Year Start', fontsize=12)
ax2.set_ylabel('Relative Change (%)', fontsize=12)
ax2.set_title('Regional Annual Relative Glacier Mass Change', fontsize=14)
ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)
ax2.grid(True, alpha=0.3)
ax2.axhline(y=0, color='k', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.savefig(output_rel / 'relative_change_plots.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 8))
ax3 = axes[0]

for region in global_regions:
    region_data = results_df[results_df['region'] == region].sort_values('year_start')
    ax3.plot(region_data['year_start'], region_data['glacier_mass_at_start'],
             marker='s', label=region, alpha=0.8, linewidth=2, color=blue)
    ax3.fill_between(region_data['year_start'],
                      region_data['glacier_mass_at_start'] - region_data['glacier_mass_error_at_start'],
                      region_data['glacier_mass_at_start'] + region_data['glacier_mass_error_at_start'],
                      alpha=0.2, color=blue)
ax3.set_xlabel('Year Start', fontsize=12)
ax3.set_ylabel('Glacier Mass (Gt)', fontsize=12)
ax3.set_title('Global Glacier Mass', fontsize=14)
ax3.legend(fontsize=10)
ax3.grid(True, alpha=0.3)
ax3.set_ylim(bottom=0)
ax4 = axes[1]

for region in other_regions:
    region_data = results_df[results_df['region'] == region].sort_values('year_start')
    ax4.plot(region_data['year_start'], region_data['glacier_mass_at_start'],
             marker='s', label=region, alpha=0.8, linewidth=2)
    ax4.fill_between(region_data['year_start'],
                      region_data['glacier_mass_at_start'] - region_data['glacier_mass_error_at_start'],
                      region_data['glacier_mass_at_start'] + region_data['glacier_mass_error_at_start'],
                      alpha=0.15)
ax4.set_xlabel('Year Start', fontsize=12)
ax4.set_ylabel('Glacier Mass (Gt)', fontsize=12)
ax4.set_title('Regional Glacier Mass', fontsize=14)
ax4.legend(bbox_to_anchor=(1, 0.8), loc='upper left', fontsize=9)
ax4.grid(True, alpha=0.3)
ax4.set_ylim(bottom=0)
plt.tight_layout()
plt.savefig(output_rel / 'glacier_mass_evolution.png', dpi=300, bbox_inches='tight')
plt.show()

### Relative vs absolute change with trend test

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
all_regions = sorted(results_df['region'].unique())
global_regions = [r for r in all_regions if r.lower() == 'global']
for region in global_regions:
    region_data = results_df[results_df['region'] == region].sort_values('year_start')
    ax.plot(region_data['year_start'], region_data['relative_change_pct'],
             marker='o', label=region, alpha=0.8, linewidth=2, color=blue)
    ax.fill_between(region_data['year_start'],
                    region_data['relative_change_pct'] - region_data['relative_change_error_pct'],
                    region_data['relative_change_pct'] + region_data['relative_change_error_pct'],
                    alpha=0.2, color=blue)
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('Relative Mass Change (%)', fontsize=12)
ax.set_title('Global Annual Relative Glacier Mass Change', fontsize=14)
ax.grid(True, alpha=0.3)
ax.axhline(y=0, color='k', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.savefig(output_rel / 'global_relative_change_plot.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
for region in all_regions:
    region_data = results_df[results_df['region'] == region].sort_values('year_start')
    absolute_result = mk.original_test(region_data['relative_change_pct'])
    print(f"{region}: {absolute_result}")


In [None]:
# Absolute change for comparison
fig, ax = plt.subplots(figsize=(10, 6))
all_regions = sorted(results_df['region'].unique())
global_regions = [r for r in all_regions if r.lower() == 'global']
for region in global_regions:
    region_data = results_df[results_df['region'] == region].sort_values('year_start')
    ax.plot(region_data['year_start'], region_data['annual_change_gt'],
             marker='o', label=region, alpha=0.8, linewidth=2, color=blue)
    ax.fill_between(region_data['year_start'],
                    region_data['annual_change_gt'] - region_data['annual_change_error'],
                    region_data['annual_change_gt'] + region_data['annual_change_error'],
                    alpha=0.2, color=blue)
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('Mass Change (Gt)', fontsize=12)
ax.set_title('Global Annual Absolute Glacier Mass Change', fontsize=14)
ax.grid(True, alpha=0.3)
ax.axhline(y=0, color='k', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.savefig(output_rel / 'global_absolute_change_plot.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
for region in all_regions:
    region_data = results_df[results_df['region'] == region].sort_values('year_start')
    absolute_result = mk.original_test(region_data['annual_change_gt'])
    print(f"{region}: {absolute_result}")


In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
for region in global_regions:
    region_data = results_df[results_df['region'] == region].sort_values('year_start')
    ax.scatter(region_data['annual_change_gt'], region_data['relative_change_pct'],
               marker='o', label=region, alpha=0.8, s=50, color=blue)
ax.set_xlabel('Absolute Change (Gt)', fontsize=12)
ax.set_ylabel('Relative Change (%)', fontsize=12)
ax.set_title('Absolute vs Relative Change (Global)', fontsize=14)
ax.grid(True, alpha=0.3)
ax.axhline(y=0, color='k', linestyle='--', alpha=0.5)
ax.axvline(x=0, color='k', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.savefig(output_rel / 'absolute_vs_relative_change.png', dpi=300, bbox_inches='tight')
plt.show()


### Combined relative change and mass evolution

In [None]:
for region in sorted(results_df['region'].unique()):
    region_data = results_df[results_df['region'] == region].sort_values('year_start')

    # Glacier Mass Data
    x_mass = region_data['year_start'].values
    y_mass = region_data['glacier_mass_at_start'].values
    y_err_mass = region_data['glacier_mass_error_at_start'].values

    # Relative Change Data
    x_rel = region_data['year_start'].values
    y_rel = region_data['relative_change_pct'].values
    y_err_rel = region_data['relative_change_error_pct'].values

    # Compute relative change from start to end for glacier mass
    start_mass = region_data.iloc[0]['glacier_mass_at_start']
    end_mass = region_data.iloc[-1]['glacier_mass_at_start']
    relative_change = ((end_mass - start_mass) / start_mass) * 100

    # Create a figure with two subplots (top = upper, bottom = glacier mass)
    fig, (ax_top, ax_bottom) = plt.subplots(nrows=2, figsize=(10, 8), sharex=True)

    # Upper subplot: Relative change (%) + twin for Absolute change (Gt)
    ax_top.plot(x_rel, y_rel, marker='o', label='Relative Change', alpha=0.8, linewidth=2, color=blue)
    ax_top.fill_between(x_rel, y_rel - y_err_rel, y_rel + y_err_rel, alpha=0.1, color=blue)
    ax_top.set_ylabel('Relative Mass Change (%)', fontsize=12)
    ax_top.set_title(f'{region}')
    ax_top.grid(True, alpha=0.3)

    # Twin axis for absolute change (Gt) # DELETE IF PLOTTING ONLY RELATIVE CHANGES
    ax_top_twin = ax_top.twinx()
    ax_top_twin.plot(region_data['year_start'], region_data['annual_change_gt'],
                     marker='s', label='Absolute Change', alpha=0.6, linewidth=2, color=orange)
    ax_top_twin.fill_between(region_data['year_start'],
                             region_data['annual_change_gt'] - region_data['annual_change_error'],
                             region_data['annual_change_gt'] + region_data['annual_change_error'],
                             alpha=0.1, color=orange)
    ax_top_twin.set_ylabel('Mass Change (Gt)', fontsize=12)

    # Symmetric y-limits and zero lines for upper panel
    max_rel = max(abs(region_data['relative_change_pct'].min()), abs(region_data['relative_change_pct'].max()))
    max_abs = max(abs(region_data['annual_change_gt'].min()), abs(region_data['annual_change_gt'].max()))
    ax_top.set_ylim(-max_rel * 1.55, max_rel * 1.4)
    ax_top_twin.set_ylim(-max_abs * 1.55, max_abs * 1.4)
    ax_top.axhline(y=0, color='k', linestyle='--', alpha=0.5)
    ax_top_twin.axhline(y=0, color='k', linestyle='--', alpha=0.5)

    # Lower subplot: Glacier Mass (Gt)
    ax_bottom.plot(x_mass, y_mass, marker='s', linestyle='-', alpha=0.8, linewidth=2, color=blue)
    ax_bottom.fill_between(x_mass, y_mass - y_err_mass, y_mass + y_err_mass, alpha=0.15, color=blue)
    y_lower_mass = np.nanmin(y_mass - y_err_mass)
    y_upper_mass = np.nanmax(y_mass + y_err_mass)
    pad_mass = 0.05 * (y_upper_mass - y_lower_mass) if y_upper_mass > y_lower_mass else 1.0
    ax_bottom.set_ylim(y_lower_mass - pad_mass, y_upper_mass + pad_mass)
    ax_bottom.text(0.78, 0.95, f"Relative Change: {relative_change:.1f}%",
                   transform=ax_bottom.transAxes, fontsize=10, verticalalignment='top',
                   bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
    ax_bottom.set_ylabel('Glacier Mass (Gt)')
    ax_bottom.set_xlabel('Year')
    ax_bottom.grid(alpha=0.3)

    fname = f"combined_{region.replace(' ', '_').replace('/', '_').lower()}.png"
    plt.tight_layout()
    plt.savefig(output_rel / fname, dpi=300, bbox_inches='tight')
    plt.show()

## Visualization of glambie runs


Global


In [None]:
csv_files = sorted(
    list(glambie_runs.glob("datasets_including_most/0_*/consensus/csvs/global_gt.csv"))
    + list(glambie_runs.glob("datasets_default*/0_*/consensus/csvs/global_gt.csv"))
)

fig, ax = plt.subplots(figsize=(14, 8))
colors_list = list(plt.colormaps["tab10"].colors)

for file_idx, csv_file in enumerate(csv_files):
    df = pd.read_csv(str(csv_file))
    c = colors_list[file_idx % len(colors_list)]
    label = csv_file.parents[3].name.split("_", 1)[1]

    ax.fill_between(
        df["start_dates"],
        df["changes"] - df["errors"],
        df["changes"] + df["errors"],
        color=c, alpha=0.25
    ) 

    ax.plot(df["start_dates"], df["changes"], color=c, linewidth=2, alpha=0.9, label=label)

ax.set_xlabel("Year", fontsize=14)
ax.set_ylabel("Mass Change (Gt)", fontsize=14)
ax.legend(bbox_to_anchor=(0.78, 1), loc="upper left", fontsize=16)
ax.grid(True, alpha=0.8)
fig.tight_layout()
plt.show()

Loop


In [None]:
for region_idx in range(1, 19):
    csv_files = sorted(glambie_runs.glob(
        f"datasets_including*/{region_idx}_*/consensus/csvs/consensus_calendar_year_gt*.csv"
    ))
    region_matches = list(glambie_runs.glob(f"datasets_default*/{region_idx}_*"))
    region_name = region_matches[0].name.split("_", 1)[1].replace('_', ' ').title()

    fig, ax = plt.subplots(figsize=(14, 8))

    ax.set_prop_cycle(color=plt.colormaps["tab20"].colors)  # 20-color cycle

    for file_idx, csv_file in enumerate(csv_files):
        df = pd.read_csv(csv_file)
        label = csv_file.parents[3].name.split("_", 1)[1].replace('including_', '')

        if "errors" in df.columns:
            ax.errorbar(df["start_dates"], df["changes"], yerr=df["errors"],
                        fmt="o-", capsize=5, alpha=0.6, linewidth=2, label=label)
        else:
            ax.plot(df["start_dates"], df["changes"], linewidth=2, alpha=0.6, label=label)

    ax.set_title(region_name)
    ax.set_xlabel("Date", fontsize=12)
    ax.set_ylabel("Mass Change (Gt)", fontsize=12)
    ax.legend(bbox_to_anchor=(0.78, 1), loc="upper left")
    ax.grid(True, linestyle="--", alpha=0.6)
    fig.tight_layout()
    plt.show()

## Barcharts

Comparison of different datasets in the algorithm. 
- Relative difference in overall mass change
- Absolute difference in overall mass change
- Spearman correlation

In [None]:
# Paths and configuration
run1 = glambie_runs / "Reanalysis_RGI_6_default"
run2 = glambie_runs / "Reanalysis_RGI_6_including_most"

glacier_mass_file = input_rel / "glacier_mass_2000.csv"
glacier_mass_df = pd.read_csv(str(glacier_mass_file), sep=";")
glacier_mass_dict = dict(zip(glacier_mass_df["Region"], glacier_mass_df["Mass"]))

# Region mappings
regions_bar = {
    "1_alaska": "Alaska",
    "2_western_canada_us": "W. Canada & US",
    "3_arctic_canada_north": "Arctic Canada N.",
    "4_arctic_canada_south": "Arctic Canada S.",
    "5_greenland_periphery": "Greenland Per.",
    "6_iceland": "Iceland",
    "7_svalbard": "Svalbard",
    "8_scandinavia": "Scandinavia",
    "9_russian_arctic": "Russian Arctic",
    "10_north_asia": "North Asia",
    "11_central_europe": "Central Europe",
    "12_caucasus_middle_east": "Caucasus & M.E.",
    "13_central_asia": "Central Asia",
    "14_south_asia_west": "South Asia W.",
    "15_south_asia_east": "South Asia E.",
    "16_low_latitudes": "Low Latitudes",
    "17_southern_andes": "Southern Andes",
    "18_new_zealand": "New Zealand",
    "19_antarctic_and_subantarctic": "Antarctic & Sub.",
}

mass_lookup = {
    "Alaska": "Alaska",
    "W. Canada & US": "Western Canada and USA",
    "Arctic Canada N.": "Arctic Canada north",
    "Arctic Canada S.": "Arctic Canada south",
    "Greenland Per.": "Greenland periphery",
    "Iceland": "Iceland",
    "Svalbard": "Svalbard and Jan Mayen",
    "Scandinavia": "Scandinavia",
    "Russian Arctic": "Russian Arctic",
    "North Asia": "North Asia",
    "Central Europe": "Central Europe",
    "Caucasus & M.E.": "Caucasus and Middle East",
    "Central Asia": "Central Asia",
    "South Asia W.": "South Asia west",
    "South Asia E.": "South Asia east",
    "Low Latitudes": "Low latitudes",
    "Southern Andes": "Southern Andes",
    "New Zealand": "New Zealand",
    "Antarctic & Sub.": "Antarctic and subantarctic islands",
    "Global": "Global",
}

def compute_all_metrics(df1, df2, glacier_mass):
    merged = pd.merge(df1, df2, on="start_dates", suffixes=("_run1", "_run2"), how="outer")
    diff = merged["changes_run1"] - merged["changes_run2"]
    cumul_run1 = merged["changes_run1"].sum()
    cumul_run2 = merged["changes_run2"].sum()
    if abs(cumul_run1) > 0.01:
        rel_diff = (cumul_run2 - cumul_run1) / abs(cumul_run1) * 100.0
    else:
        rel_diff = 0.0
    abs_diff = cumul_run2 - cumul_run1
    rmse_pct = (rmse / glacier_mass * 100.0) if glacier_mass else 0.0
    try:
        corr, _ = spearmanr(merged["changes_run1"], merged["changes_run2"])
        if np.isnan(corr):
            corr = 0.0
    except Exception:
        corr = 0.0
    return rel_diff, abs_diff, corr

# Initialize lists to store metrics
region_names = []
rel_diff_values = []
rmse_pct_values = []
abs_diff_values = []
corr_values = []  # New list for correlation values

for region_dir, display_name in regions_bar.items():
    region_key = "_".join(region_dir.split("_")[1:])
    csv1 = run1 / region_dir / "consensus" / "csvs" / f"consensus_calendar_year_gt_{region_key}.csv"
    csv2 = run2 / region_dir / "consensus" / "csvs" / f"consensus_calendar_year_gt_{region_key}.csv"

    df1 = pd.read_csv(str(csv1))
    df2 = pd.read_csv(str(csv2))

    mass_key = mass_lookup.get(display_name, display_name)
    glacier_mass = glacier_mass_dict.get(mass_key, None)

    rel_diff, abs_diff, corr = compute_all_metrics(df1, df2, glacier_mass)

    region_names.append(display_name)
    rel_diff_values.append(rel_diff)
    rmse_pct_values.append(rmse_pct)
    abs_diff_values.append(abs_diff)
    corr_values.append(corr)

# Compute metrics for the global data
csv1_global = run1 / "0_global" / "consensus" / "csvs" / "global_gt.csv"
csv2_global = run2 / "0_global" / "consensus" / "csvs" / "global_gt.csv"
df1 = pd.read_csv(str(csv1_global))
df2 = pd.read_csv(str(csv2_global))
rel_diff, abs_diff, corr = compute_all_metrics(df1, df2, glacier_mass_dict.get("Global", None))
region_names.append("Global")
rel_diff_values.append(rel_diff)
rmse_pct_values.append(rmse_pct)
abs_diff_values.append(abs_diff)
corr_values.append(corr)

# Plotting
fig, (ax3, ax4, ax5) = plt.subplots(1, 3, figsize=(18, 8), sharey=True)

# Relative difference
bars3 = ax3.barh(range(len(region_names)), rel_diff_values)
ax3.set_yticks(range(len(region_names)))
ax3.set_yticklabels(region_names)
ax3.invert_yaxis()
ax3.set_xlabel("Relative Change (%)")
ax3.set_title("Relative Mass Difference (negative = run shows more mass loss)")
ax3.axvline(0, color="black", linewidth=0.8)
max_abs_rel = max(abs(v) for v in rel_diff_values)
for i, val in enumerate(rel_diff_values):
    if val >= 0:
        ax3.text(val + 0.02 * max_abs_rel, i, f"+{val:.1f}%", va="center", fontsize=8)
    else:
        ax3.text(val - 0.02 * max_abs_rel, i, f"{val:.1f}%", va="center", ha="right", fontsize=8)
pad = max_abs_rel * 0.15
ax3.set_xlim(min(rel_diff_values) - pad, max(rel_diff_values) + pad)

# Absolute difference
bars4 = ax4.barh(range(len(region_names)), abs_diff_values)
ax4.set_xlabel("Absolute Difference (Gt)")
ax4.set_title("Absolute Mass Difference")
ax4.axvline(0, color="black", linewidth=0.8)
max_abs_diff = max(abs(v) for v in abs_diff_values)
for i, (val, rel) in enumerate(zip(abs_diff_values, rel_diff_values)):
    if val >= 0:
        ax4.text(val + 0.02 * max_abs_diff, i, f"+{val:.2f}", va="center", fontsize=8)
    else:
        ax4.text(val - 0.02 * max_abs_diff, i, f"{val:.2f}", va="center", ha="right", fontsize=8)
pad = max_abs_diff * 0.30
ax4.set_xlim(min(abs_diff_values) - pad, max(abs_diff_values) + pad)

# Correlation
bars5 = ax5.barh(range(len(region_names)), corr_values)
ax5.set_xlabel("Correlation")
ax5.set_title("Spearman Correlation")
ax5.set_xlim(-0.1, 1.1)
for i, val in enumerate(corr_values):
    if val >= 0:
        ax5.text(val + 0.02, i, f"{val:.2f}", va="center", fontsize=8)
    else:
        ax5.text(val + 0.08, i, f"{val:.2f}", va="center", ha="right", fontsize=8)

fig.tight_layout()
path_combined = output_sensitivity / "sensitivity_metrics_combined.png"
fig.savefig(str(path_combined), dpi=200, bbox_inches="tight")
plt.show()

In [None]:
# Prepare input data and region mapping
glacier_mass_file = input_rel / "glacier_mass_2000.csv"
glacier_mass_df = pd.read_csv(str(glacier_mass_file), sep=";")
glacier_mass_dict = dict(zip(glacier_mass_df["Region"], glacier_mass_df["Mass"]))

# Region mappings
regions_bar = {
    "1_alaska": "Alaska",
    "2_western_canada_us": "W. Canada & US",
    "3_arctic_canada_north": "Arctic Canada N.",
    "4_arctic_canada_south": "Arctic Canada S.",
    "5_greenland_periphery": "Greenland Per.",
    "6_iceland": "Iceland",
    "7_svalbard": "Svalbard",
    "8_scandinavia": "Scandinavia",
    "9_russian_arctic": "Russian Arctic",
    "10_north_asia": "North Asia",
    "11_central_europe": "Central Europe",
    "12_caucasus_middle_east": "Caucasus & M.E.",
    "13_central_asia": "Central Asia",
    "14_south_asia_west": "South Asia W.",
    "15_south_asia_east": "South Asia E.",
    "16_low_latitudes": "Low Latitudes",
    "17_southern_andes": "Southern Andes",
    "18_new_zealand": "New Zealand",
    "19_antarctic_and_subantarctic": "Antarctic & Sub.",
}

mass_lookup = {
    "Alaska": "Alaska",
    "W. Canada & US": "Western Canada and USA",
    "Arctic Canada N.": "Arctic Canada north",
    "Arctic Canada S.": "Arctic Canada south",
    "Greenland Per.": "Greenland periphery",
    "Iceland": "Iceland",
    "Svalbard": "Svalbard and Jan Mayen",
    "Scandinavia": "Scandinavia",
    "Russian Arctic": "Russian Arctic",
    "North Asia": "North Asia",
    "Central Europe": "Central Europe",
    "Caucasus & M.E.": "Caucasus and Middle East",
    "Central Asia": "Central Asia",
    "South Asia W.": "South Asia west",
    "South Asia E.": "South Asia east",
    "Low Latitudes": "Low latitudes",
    "Southern Andes": "Southern Andes",
    "New Zealand": "New Zealand",
    "Antarctic & Sub.": "Antarctic and subantarctic islands",
    "Global": "Global",
}

def compute_all_metrics(df1, df2, glacier_mass, start_year=None, end_year=None):
    merged = pd.merge(df1, df2, on="start_dates", suffixes=("_run1", "_run2"), how="outer")
    if start_year is not None:
        merged = merged[merged["start_dates"] >= start_year]
    if end_year is not None:
        merged = merged[merged["start_dates"] <= end_year]
    merged = merged.dropna(subset=["changes_run1", "changes_run2"])
    diff = merged["changes_run1"] - merged["changes_run2"]
    cumul_run1 = merged["changes_run1"].sum()
    cumul_run2 = merged["changes_run2"].sum()
    if abs(cumul_run1) > 0.01:
        rel_diff = (cumul_run2 - cumul_run1) / abs(cumul_run1) * 100.0
    else:
        rel_diff = 0.0
    abs_diff = cumul_run2 - cumul_run1
    corr, _ = spearmanr(merged["changes_run1"], merged["changes_run2"])
    return rel_diff, abs_diff, corr

### Compare all runs

In [None]:
# Discover runs
all_runs = sorted([
    p for p in glambie_runs.iterdir()
    if p.is_dir() and (p.name.startswith("datasets_")  or p.name.startswith("datasets_default"))
])

reference_run = next((r for r in all_runs if "default" in r.name), all_runs[0])
print(f"Reference run: {reference_run.name}")

summary_rows = []

for run in all_runs:
    if run == reference_run:
        continue

    start_year = None
    end_year = None
    # Year bounds: ETH from 2001, DUS-combined end at 2020, else end at 2019
    # if "ETH" in run.name:
    #     start_year, end_year = 2001, 2020
    # elif "DUS-combined" in run.name:
    #     start_year, end_year = None, 2020
    # else:
    #     start_year, end_year = None, 2019

    region_names = []
    rel_diff_values = []
    abs_diff_values = []
    corr_values = []

    for region_dir, display_name in regions_bar.items():
        region_key = "_".join(region_dir.split("_")[1:])
        csv1 = reference_run / region_dir / "consensus" / "csvs" / f"consensus_calendar_year_gt_{region_key}.csv"
        csv2 = run / region_dir / "consensus" / "csvs" / f"consensus_calendar_year_gt_{region_key}.csv"

        if not csv1.exists() or not csv2.exists():
            continue

        df1 = pd.read_csv(str(csv1))
        df2 = pd.read_csv(str(csv2))
        mass_key = mass_lookup.get(display_name, display_name)
        glacier_mass = glacier_mass_dict.get(mass_key, None)
        rel_diff, abs_diff, corr = compute_all_metrics(
            df1, df2, glacier_mass, start_year=start_year, end_year=end_year
        )

        region_names.append(display_name)
        rel_diff_values.append(rel_diff)
        abs_diff_values.append(abs_diff)
        corr_values.append(corr)

    # Global
    csv1_global = reference_run / "0_global" / "consensus" / "csvs" / "global_gt.csv"
    csv2_global = run / "0_global" / "consensus" / "csvs" / "global_gt.csv"
    if csv1_global.exists() and csv2_global.exists():
        df1 = pd.read_csv(str(csv1_global))
        df2 = pd.read_csv(str(csv2_global))
        rel_diff, abs_diff, corr = compute_all_metrics(
            df1, df2, glacier_mass_dict.get("Global", None),
            start_year=start_year, end_year=end_year
        )
        region_names.append("Global")
        rel_diff_values.append(rel_diff)
        abs_diff_values.append(abs_diff)
        corr_values.append(corr)

    if not region_names:
        continue

    # Summary row (Global is last)
    global_rel = rel_diff_values[-1]
    global_abs = abs_diff_values[-1]
    global_corr = corr_values[-1]
    regional_rel = rel_diff_values[:-1]
    regional_abs = abs_diff_values[:-1]

    run_label = run.name.split("_", 1)[1] if "_" in run.name else run.name

    summary_rows.append({
        "Run": run_label,
        "Global Rel Diff (%)": global_rel,
        "Global Abs Diff (Gt)": global_abs,
        "Global Correlation": global_corr,
        "Mean |Regional Rel Diff| (%)": np.mean(np.abs(regional_rel)) if regional_rel else np.nan,
        "Sum |Regional Abs Diff| (Gt)": np.sum(np.abs(regional_abs)) if regional_abs else np.nan,
    })

    # Combined plot
    fig, (ax3, ax4, ax5) = plt.subplots(1, 3, figsize=(18, 8), sharey=True)

    # Panel 1: Relative difference
    ax3.barh(range(len(region_names)), rel_diff_values)
    ax3.set_yticks(range(len(region_names)))
    ax3.set_yticklabels(region_names)
    ax3.invert_yaxis()
    ax3.set_xlabel("Relative Change (%)")
    # ax3.set_title("Relative Mass Difference (positive = more loss)")
    ax3.axvline(0, color="black", linewidth=0.8)
    max_abs_rel = max(abs(v) for v in rel_diff_values) or 1
    for i, val in enumerate(rel_diff_values):
        if val >= 0:
            ax3.text(val + 0.02 * max_abs_rel, i, f"+{val:.1f}%", va="center", fontsize=8)
        else:
            ax3.text(val - 0.02 * max_abs_rel, i, f"{val:.1f}%", va="center", ha="right", fontsize=8)
    pad = max_abs_rel * 0.15
    ax3.set_xlim(min(rel_diff_values) - pad, max(rel_diff_values) + pad)

    # Panel 2: Absolute difference
    ax4.barh(range(len(region_names)), abs_diff_values)
    ax4.set_xlabel("Absolute Difference (Gt)")
    ax4.axvline(0, color="black", linewidth=0.8)
    max_abs_diff = max(abs(v) for v in abs_diff_values) or 1
    for i, (val, rel) in enumerate(zip(abs_diff_values, rel_diff_values)):
        if val >= 0:
            ax4.text(val + 0.02 * max_abs_diff, i, f"+{val:.2f}", va="center", fontsize=8)
        else:
            ax4.text(val - 0.02 * max_abs_diff, i, f"{val:.2f}", va="center", ha="right", fontsize=8)
    pad = max_abs_diff * 0.30
    ax4.set_xlim(min(abs_diff_values) - pad, max(abs_diff_values) + pad)

    # Panel 3: Correlation
    ax5.barh(range(len(region_names)), corr_values)
    ax5.set_xlabel("Correlation")
    # ax5.set_title("Spearman Correlation")
    ax5.set_xlim(-0.1, 1.1)
    for i, val in enumerate(corr_values):
        if val >= 0:
            ax5.text(val + 0.02, i, f"{val:.2f}", va="center", fontsize=8)
        else:
            ax5.text(val + 0.08, i, f"{val:.2f}", va="center", ha="right", fontsize=8)

    fig.suptitle(f"{reference_run.name} vs {run_label}", fontsize=12)
    fig.tight_layout()
    safe_name = run.name.replace(" ", "_").replace("/", "-")
    path_combined = output_sensitivity / f"sensitivity_{safe_name}.png"
    fig.savefig(str(path_combined), dpi=200, bbox_inches="tight")
    plt.show()

# Summary
summary_df = pd.DataFrame(summary_rows)
summary_df = summary_df.reindex(
    summary_df["Global Rel Diff (%)"].abs().sort_values(ascending=False).index
).reset_index(drop=True)

summary_df.to_csv(output_sensitivity / "run_influence_summary.csv", index=False)
summary_df

### Run metrics as barchart

In [None]:
region_rows = [] 

for run in [all_runs[i] for i in [0, 4, 5, 6, 7, 8, 9, 10, 11]]: # all_runs[0:4]:
    if run == reference_run:
        continue

    run_label = run.name.split("_", 1)[1] if "_" in run.name else run.name

    for region_dir, display_name in regions_bar.items():
        region_key = "_".join(region_dir.split("_")[1:])
        csv1 = reference_run / region_dir / "consensus" / "csvs" / f"consensus_calendar_year_gt_{region_key}.csv"
        csv2 = run / region_dir / "consensus" / "csvs" / f"consensus_calendar_year_gt_{region_key}.csv"
        if not csv1.exists() or not csv2.exists():
            continue

        df1 = pd.read_csv(csv1)
        df2 = pd.read_csv(csv2)

        rel_diff, abs_diff, corr = compute_all_metrics(df1, df2, glacier_mass=None)

        region_rows.append({
            "Run": run_label,
            "Region": display_name,
            "RelDiffPct": rel_diff,
            "AbsDiffGt": abs_diff,
            "Corr": corr
        })

    # Global
    csv1_global = reference_run / "0_global" / "consensus" / "csvs" / "global_gt.csv"
    csv2_global = run / "0_global" / "consensus" / "csvs" / "global_gt.csv"
    if csv1_global.exists() and csv2_global.exists():
        df1g = pd.read_csv(csv1_global)
        df2g = pd.read_csv(csv2_global)
        rel_diff, abs_diff, corr = compute_all_metrics(df1g, df2g, glacier_mass=None)

        region_rows.append({
            "Run": run_label,
            "Region": "Global",
            "RelDiffPct": rel_diff,
            "AbsDiffGt": abs_diff,
            "Corr": corr
        })

metrics_df = pd.DataFrame(region_rows)
metrics_df["AbsRelDiffPct"] = metrics_df["RelDiffPct"].abs()
metrics_df["AbsAbsDiffGt"] = metrics_df["AbsDiffGt"].abs()

region_variability = (metrics_df.groupby("Region")
    .agg(
        n_runs=("Run", "nunique"),
        mean_abs_rel=("AbsRelDiffPct", "mean"),
        std_abs_rel=("AbsRelDiffPct", "std"),   
        mean_rel = ("RelDiffPct", "mean"),
        std_rel=("RelDiffPct", "std"),
        mean_abs_abs=("AbsAbsDiffGt", "mean"),
        std_abs_abs=("AbsAbsDiffGt", "std"),   
        mean_abs = ("AbsDiffGt", "mean"),
        std_abs = ("AbsDiffGt", "std"),
        mean_corr=("Corr", "mean"),
        min_corr=("Corr", "min"),
    )
    .sort_values(["Region"], ascending=False)
    .reset_index()
)

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(14, 6), sharey=True)

# color by direction
sign_source = region_variability["mean_rel"] 

bar_colors = [blue if v >= 0 else orange for v in sign_source]

# relative difference
ax1.barh(region_variability["Region"], region_variability["mean_abs_rel"],
         xerr=region_variability["std_abs_rel"],
         color=bar_colors)
ax1.set_xlabel("Mean |relative difference| (%)") 

# absolute difference
ax2.barh(region_variability["Region"], region_variability["mean_abs_abs"],
         xerr=region_variability["std_abs_abs"],
         color=bar_colors)
ax2.set_xlabel("Mean |absolute difference| (Gt)")

# Legend 
from matplotlib.patches import Patch
handles = [Patch(facecolor=blue, label="Positive"),  Patch(facecolor=orange, label="Negative"),
]

fig.legend(handles=handles, loc="lower center", ncol=2, frameon=False, title='Direction of change')
plt.tight_layout(rect=[0, 0.07, 1, 1])

**Notes, results and discussion**

About those datasets
- DUS-combined: combined at *annual* resolution available in 19 regions / never excluded
- ETH: demdiff at *multi*-annual resolution available in 19 regions / never excluded
- WGMS-beta: glaciological at *annual* resolution available in 19 regions / never exclued
- Harig_Group: gravimetry at *sub*-annual resolution available in 10 regions / excluded in 4 regions
- Jacob_2012_dmdt: gravimetry at *multi*-annual resolution in 17 regions / excluded in 17 regions
- Sasgen_AWIarc_RL01: gravimetry at *sub*-annual available in 16 regions / excluded in 9 regions
- Wouters: gravimetry at *sub*-annual resolution available in 17 regions / excluded in 10 regions
- Treichler_ICESat: altimetry at *multi*-annual resolution available in 3 regions / excluded in 3 regions

Results
- DUS-combined causes relative difference of 11.6% or 756 GT and and the global change does not match regional ones
    - This should not be the case: "Global estimates were computed [...] by simple sums for global mass changes" (Zemp et al., 2025)
    - Seems to be because results for some regions seem to be missing for the final years 
- Other datasets such as Harig_Group cause almost no difference
- Regions can shft a lot while global is not affected much
    - Sasgen_AWIarc_RL01_2: 1.8% global change vs 57.6% mean regional relative difference
    - Jacob_2012_dmdt: 7.1% global vs. 52.1% mean regional.

Which datasets are still excluded from "includin most" datasets?
- antarctic_and_subantarctic: Gardner2013_incesat
- arctic_canada_north: Khan
- arctic_canada_south: Khan
- central_asia: Treichler_ICESat, Treichler_snowfall
- greenland_periphery: Bolch_2013, Gardner2013_icesat
- south_asia_east: Treichler_ICESat, Treichler_snowfall
- south_asia_west: Treichler_ICESat, Treichler_snowfall

In [None]:
import pandas as pd
from pathlib import Path
import glob
from scipy.stats import spearmanr
import matplotlib.pyplot as plt
import numpy as np

# Paths
input_dir = Path("input")
output_dir = Path("output")
input_rel = input_dir / "glambie_reference"
input_maps = input_dir / "maps"
input_datasets = input_dir / "input_datasets"
glambie_runs = input_dir / "glambie_runs"
output_sensitivity = output_dir / "sensitivity"
output_rel = output_dir / "relative_change"
output_dir_datasets = output_dir / "datasets"

# Initial glacier mass
glacier_mass_file = input_rel / "glacier_mass_2000.csv"
glacier_mass_df = pd.read_csv(str(glacier_mass_file), sep=";")
glacier_mass_dict = dict(zip(glacier_mass_df["Region"], glacier_mass_df["Mass"]))


def compute_all_metrics(df1, df2, glacier_mass):
    merged = pd.merge(df1, df2, on="start_dates", suffixes=("_run1", "_run2"), how="outer")
    diff = merged["changes_run1"] - merged["changes_run2"]
    cumul_run1 = merged["changes_run1"].sum()
    cumul_run2 = merged["changes_run2"].sum()
    if abs(cumul_run1) > 0.01:
        rel_diff = (cumul_run2 - cumul_run1) / abs(cumul_run1) * 100.0
    else:
        rel_diff = 0.0
    abs_diff = cumul_run2 - cumul_run1
    corr, _ = spearmanr(merged["changes_run1"], merged["changes_run2"])
    return rel_diff, abs_diff, corr


# Global metrics only, for all runs starting with "datasets_"
runs = sorted([
    p for p in glambie_runs.iterdir()
    if p.is_dir() and p.name.startswith("datasets_")
])
reference_run = next((r for r in runs if "default" in r.name), runs[0])

run_labels = []
global_rel = []
global_abs = []
global_corr = []

for run in runs:
    if run == reference_run:
        continue
    csv_ref = reference_run / "0_global" / "consensus" / "csvs" / "global_gt.csv"
    csv_run = run / "0_global" / "consensus" / "csvs" / "global_gt.csv"
    if not csv_ref.exists() or not csv_run.exists():
        continue
    df1 = pd.read_csv(str(csv_ref))
    df2 = pd.read_csv(str(csv_run))
    rel_diff, abs_diff, corr = compute_all_metrics(
        df1, df2, glacier_mass_dict.get("Global", None))
    label = run.name.split("_", 1)[1] if "_" in run.name else run.name
    run_labels.append(label)
    global_rel.append(rel_diff)
    global_abs.append(abs_diff)
    global_corr.append(corr)

if not run_labels:
    print("No runs to plot (or only reference run found).")
else:
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(16, max(6, len(run_labels) * 0.6)), sharey=True)

    n = len(run_labels)
    y_pos = np.arange(n)

    # Panel 1: Global relative difference (%)
    ax1.barh(y_pos, global_rel)
    ax1.set_yticks(y_pos)
    ax1.set_yticklabels(run_labels, fontsize=9)
    ax1.invert_yaxis()
    ax1.set_xlabel("Relative Change (%)")
    ax1.axvline(0, color="black", linewidth=0.8)
    max_abs_rel = max(abs(v) for v in global_rel) or 1
    for i, val in enumerate(global_rel):
        if val >= 0:
            ax1.text(val + 0.02 * max_abs_rel, i, f"+{val:.1f}%", va="center", fontsize=8)
        else:
            ax1.text(val - 0.02 * max_abs_rel, i, f"{val:.1f}%", va="center", ha="right", fontsize=8)
    pad = max_abs_rel * 0.20
    ax1.set_xlim(min(global_rel) - pad, max(global_rel) + pad)

    # Panel 2: Global absolute difference (Gt)
    ax2.barh(y_pos, global_abs)
    ax2.set_xlabel("Absolute Difference (Gt)")
    ax2.axvline(0, color="black", linewidth=0.8)
    max_abs_diff = max(abs(v) for v in global_abs) or 1
    for i, val in enumerate(global_abs):
        if val >= 0:
            ax2.text(val + 0.02 * max_abs_diff, i, f"+{val:.2f}", va="center", fontsize=8)
        else:
            ax2.text(val - 0.02 * max_abs_diff, i, f"{val:.2f}", va="center", ha="right", fontsize=8)
    pad = max_abs_diff * 0.30
    ax2.set_xlim(min(global_abs) - pad, max(global_abs) + pad)

    # Panel 3: Global correlation
    ax3.barh(y_pos, global_corr)
    ax3.set_xlabel("Correlation")
    ax3.set_xlim(-0.1, 1.2)
    for i, val in enumerate(global_corr):
        if val >= 0:
            ax3.text(val + 0.02, i, f"{val:.2f}", va="center", fontsize=8)
        else:
            ax3.text(val + 0.08, i, f"{val:.2f}", va="center", ha="right", fontsize=8)

    fig.suptitle(f"Global metrics vs {reference_run.name}", fontsize=12)
    fig.tight_layout()
    fig.set_dpi(300)
    plt.show()

In [None]:
# Combined: global relative difference + mean regional variation per run
runs = sorted([
    p for p in glambie_runs.iterdir()
    if p.is_dir() and p.name.startswith("datasets_")
])
reference_run = next((r for r in runs if "default" in r.name), runs[0])

run_labels = []
global_rel = []
mean_regional_rel = []

start_year = None
end_year = None

for run in runs:
    if run == reference_run:
        continue
    if "ETH" in run.name:
        start_year, end_year = 2001, 2020
    elif "DUS-combined" in run.name:
        start_year, end_year = None, 2020
    else:
        start_year, end_year = None, 2019

    # Global metric
    csv_ref_global = reference_run / "0_global" / "consensus" / "csvs" / "global_gt.csv"
    csv_run_global = run / "0_global" / "consensus" / "csvs" / "global_gt.csv"
    if not csv_ref_global.exists() or not csv_run_global.exists():
        continue
    df1_g = pd.read_csv(str(csv_ref_global))
    df2_g = pd.read_csv(str(csv_run_global))
    rel_global, _, _ = compute_all_metrics(
        df1_g, df2_g, glacier_mass_dict.get("Global", None),
        start_year=start_year, end_year=end_year
    )

    # Regional metrics (mean |rel diff|)
    rel_diffs = []
    for region_dir, display_name in regions_bar.items():
        region_key = "_".join(region_dir.split("_")[1:])
        csv1 = reference_run / region_dir / "consensus" / "csvs" / f"consensus_calendar_year_gt_{region_key}.csv"
        csv2 = run / region_dir / "consensus" / "csvs" / f"consensus_calendar_year_gt_{region_key}.csv"
        if not csv1.exists() or not csv2.exists():
            continue
        df1 = pd.read_csv(str(csv1))
        df2 = pd.read_csv(str(csv2))
        mass_key = mass_lookup.get(display_name, display_name)
        glacier_mass = glacier_mass_dict.get(mass_key, None)
        rel_diff, _, _ = compute_all_metrics(
            df1, df2, glacier_mass, start_year=start_year, end_year=end_year
        )
        rel_diffs.append(rel_diff)

    if not rel_diffs:
        continue
    label = run.name.split("_", 1)[1] if "_" in run.name else run.name
    run_labels.append(label)
    global_rel.append(rel_global)
    mean_regional_rel.append(np.mean(np.abs(rel_diffs)))

if not run_labels:
    print("No runs to plot.")
else:
    n = len(run_labels)
    y_pos = np.arange(n)
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, max(5, n * 0.6)), sharey=True)

    # Panel 1: Global relative difference (%)
    ax1.barh(y_pos, global_rel)
    ax1.set_yticks(y_pos)
    ax1.set_yticklabels(run_labels, fontsize=9)
    ax1.invert_yaxis()
    ax1.set_xlabel("Global relative difference (%)")
    ax1.axvline(0, color="black", linewidth=0.8)
    for i, val in enumerate(global_rel):
        x = val + (0.02 * (max(global_rel) - min(global_rel)) or 1) if val >= 0 else val - (0.02 * (max(global_rel) - min(global_rel)) or 1)
        ax1.text(val + 0.5 if val >= 0 else val - 0.5, i, f"{val:+.1f}%", va="center", ha="left" if val >= 0 else "right", fontsize=8)
    pad = (max(global_rel) - min(global_rel)) * 0.2 or 1
    ax1.set_xlim(min(global_rel) - pad, max(global_rel) + pad)

    # Panel 2: Mean |regional relative difference| (%)
    ax2.barh(y_pos, mean_regional_rel)
    ax2.set_xlabel("Mean |Regional relative difference| (%)")
    ax2.axvline(0, color="black", linewidth=0.8)
    for i, val in enumerate(mean_regional_rel):
        ax2.text(val + 0.1, i, f"{val:.1f}%", va="center", fontsize=8)
    pad = max(mean_regional_rel) * 0.15 or 0.5
    ax2.set_xlim(0, max(mean_regional_rel) + pad)

    fig.suptitle(f"Adjusted datasets vs reference run", fontsize=12)
    fig.tight_layout()
    plt.show()

## Map visualization

Create maps which show differences between dataset inclusions per region. 
- Root mean squared difference
- Mean absolute difference
- Relative difference in overall mass change
- Absolute difference in overall mass change

### Default vs all datasets

In [None]:
# Map plots
glacreg_path = input_maps / "GlacReg_2023" / "GTN-G_202307_o1regions.shp"
glacier_regions = gpd.read_file(str(glacreg_path))

# Positioning
regions_map = {
    "1_alaska":                      ("Alaska",            63.0, -150.0, 1),
    "2_western_canada_us":           ("W. Canada & US",    50.0, -122.0, 2),
    "3_arctic_canada_north":         ("Arctic Canada N.",  77.0,  -82.0, 3),
    "4_arctic_canada_south":         ("Arctic Canada S.",  66.0,  -70.0, 4),
    "5_greenland_periphery":         ("Greenland Per.",    72.0,  -42.0, 5),
    "6_iceland":                     ("Iceland",           65.0,  -19.0, 6),
    "7_svalbard":                    ("Svalbard",          78.0,   17.0, 7),
    "8_scandinavia":                 ("Scandinavia",       67.0,   15.0, 8),
    "9_russian_arctic":              ("Russian Arctic",    77.0,   60.0, 9),
    "10_north_asia":                 ("North Asia",        50.0,   90.0, 10),
    "11_central_europe":             ("Central Europe",    47.0,   11.0, 11),
    "12_caucasus_middle_east":       ("Caucasus & M.E.",   42.0,   44.0, 12),
    "13_central_asia":               ("Central Asia",      40.0,   75.0, 13),
    "14_south_asia_west":            ("South Asia W.",     35.0,   74.0, 14),
    "15_south_asia_east":            ("South Asia E.",     30.0,   90.0, 15),
    "16_low_latitudes":              ("Low Latitudes",     -1.0,  -78.0, 16),
    "17_southern_andes":             ("Southern Andes",   -47.0,  -73.0, 17),
    "18_new_zealand":                ("New Zealand",      -44.0,  170.0, 18),
    "19_antarctic_and_subantarctic": ("Antarctic & Sub.", -70.0,    0.0, 19),
}

label_positions = {
    "Alaska":            (-170.0, 55.0),
    "W. Canada & US":    (-140.0, 42.0),
    "Arctic Canada N.":  (-100.0, 82.0),
    "Arctic Canada S.":  (-85.0, 72.0),
    "Greenland Per.":    (-50.0, 78.0),
    "Iceland":           (-30.0, 68.0),
    "Svalbard":          (10.0, 82.0),
    "Scandinavia":       (5.0, 72.0),
    "Russian Arctic":    (70.0, 82.0),
    "North Asia":        (110.0, 55.0),
    "Central Europe":    (0.0, 50.0),
    "Caucasus & M.E.":   (55.0, 48.0),
    "Central Asia":      (85.0, 45.0),
    "South Asia W.":     (65.0, 30.0),
    "South Asia E.":     (100.0, 25.0),
    "Low Latitudes":     (-90.0, -8.0),
    "Southern Andes":    (-80.0, -52.0),
    "New Zealand":       (175.0, -50.0),
    "Antarctic & Sub.":  (-10.0, -75.0),
}

data = []
for i, (region_dir, (display_name, lat, lon, region_num)) in enumerate(regions_map.items()):
    data.append({
        "name": display_name,
        "lat": lat,
        "lon": lon,
        "region_num": region_num,
        "rel_diff": rel_diff_values[i],
        "abs_diff": abs_diff_values[i],
        "corr": corr_values[i],
    })

df = pd.DataFrame(data)

# Global is the last element in the barchart lists
global_metrics = {
    "rel_diff": rel_diff_values[-1],
    "abs_diff": abs_diff_values[-1],
    "corr": corr_values[-1],
}

map_configs = [
    {
        "metric": "rel_diff",
        "title": "Relative Difference in Total Mass Change\n(default vs. all datasets)",
        "cmap": "RdBu_r",
        "label": "Relative Difference (%)",
        "diverging": True,
        "output": "map_relative_difference.png",
        "fmt": "+.1f",
        "unit": "%",
    },
    {
        "metric": "abs_diff",
        "title": "Absolute Mass Change Difference Between Glambie Runs\n(default vs. all datasets)",
        "cmap": "RdBu_r",
        "label": "Absolute Difference (Gt)",
        "diverging": True,
        "output": "map_absolute_difference.png",
        "fmt": "+.2f",
        "unit": " Gt",
    },
    {
        "metric": "corr",
        "title": "Correlation Between Glambie Runs\n(default vs. all datasets)",
        "cmap": "RdYlGn",
        "label": "Correlation",
        "diverging": False,
        "output": "map_correlation.png",
        "fmt": ".2f",
        "unit": "",
    },
]

for cfg in map_configs:
    metric = cfg["metric"]
    title = cfg["title"]
    cmap = cfg["cmap"]
    label = cfg["label"]
    diverging = cfg["diverging"]
    output_path = output_sensitivity / cfg["output"]
    fmt = cfg["fmt"]
    unit = cfg["unit"]

    values = df[metric].values
    global_val = global_metrics.get(metric, None)

    fig = plt.figure(figsize=(20, 11))
    ax = fig.add_subplot(1, 1, 1, projection=ccrs.Robinson())

    ax.add_feature(cfeature.OCEAN, facecolor='white', zorder=0)
    ax.add_feature(cfeature.LAND, facecolor='#e0e0e0', edgecolor='#aaaaaa', linewidth=0.3, zorder=1)
    ax.add_feature(cfeature.COASTLINE, linewidth=0.4, edgecolor='#666666', zorder=2)

    glaciers = cfeature.NaturalEarthFeature(
        'physical', 'glaciated_areas', '50m',
        edgecolor='#2e5f7f', facecolor='#4a8fc4', linewidth=0.2
    )
    ax.add_feature(glaciers, zorder=3, alpha=0.7)

    if glacier_regions is not None:
        ax.add_geometries(
            glacier_regions.geometry,
            crs=ccrs.PlateCarree(),
            facecolor='none',
            edgecolor='#5a5a5a',
            linewidth=1.0,
            linestyle='--',
            zorder=4,
            alpha=0.6
        )
    ax.set_global()

    vals = np.array(values)
    abs_vals = np.abs(vals)

    if global_val is not None:
        size_scale = max(abs_vals.max(), abs(global_val))
    else:
        size_scale = abs_vals.max()

    sizes = (abs_vals / size_scale) * 5000

    if diverging:
        all_vals = list(vals) + ([global_val] if global_val is not None else [])
        vmax_abs = max(abs(min(all_vals)), abs(max(all_vals)))
        vmin, vmax = -vmax_abs, vmax_abs
        norm = mcolors.TwoSlopeNorm(vmin=vmin, vcenter=0, vmax=vmax)
    else:
        vmin = 0
        all_vals = list(vals) + ([global_val] if global_val is not None else [])
        vmax = max(all_vals)
        norm = mcolors.Normalize(vmin=vmin, vmax=vmax)

    sc = ax.scatter(
        df["lon"], df["lat"],
        c=vals, s=sizes,
        cmap=cmap, norm=norm,
        edgecolors="0.2", linewidths=1.0,
        zorder=8, alpha=0.75,
        transform=ccrs.PlateCarree()
    )

    for _, row in df.iterrows():
        region_name = row["name"]
        val = row[metric]
        val_str = f"{val:{fmt}}{unit}"

        if region_name in label_positions:
            label_lon, label_lat = label_positions[region_name]

            ax.text(
                label_lon, label_lat,
                f"{region_name}\n{val_str}",
                fontsize=7, ha="center", va='center',
                color="0.1",
                bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="0.4", alpha=0.85, linewidth=0.5),
                transform=ccrs.PlateCarree(),
                zorder=12
            )

            ax.plot(
                [label_lon, row["lon"]], [label_lat, row["lat"]],
                color='0.5', linewidth=0.4, linestyle='-', alpha=0.5,
                transform=ccrs.PlateCarree(),
                zorder=7
            )

    if global_val is not None:
        global_lon, global_lat = 0.0, -20.0
        global_size = (abs(global_val) / size_scale) * 5000

        ax.scatter(
            [global_lon], [global_lat],
            c=[global_val], s=[global_size * 1.5],
            cmap=cmap, norm=norm,
            edgecolors="0.1", linewidths=1.0,
            zorder=11, alpha=0.9,
            marker="o",
            transform=ccrs.PlateCarree()
        )

        val_str = f"{global_val:{fmt}}{unit}"
        ax.text(
            global_lon, global_lat - 8,
            f"Global\n{val_str}",
            fontsize=9, fontweight="bold", ha="center", va='top',
            color="0.05",
            bbox=dict(boxstyle="round,pad=0.35", fc="white", ec="0.3", alpha=0.95, linewidth=1),
            transform=ccrs.PlateCarree(),
            zorder=12
        )

    cb = plt.colorbar(sc, ax=ax, shrink=0.5, pad=0.03, aspect=20, orientation='vertical')
    cb.set_label(label, fontsize=11)
    cb.ax.tick_params(labelsize=9)

    ax.set_title(title, fontsize=13, pad=15)

    fig.tight_layout()
    fig.savefig(str(output_path), dpi=200)
    plt.show()

### Aggregate dataset differences between regions

In [None]:
# Build
name_to_lonlat = {}
for region_dir, (display_name, lat, lon, region_num) in regions_map.items():
    name_to_lonlat[display_name] = (lon, lat)
name_to_lonlat["Global"] = (0.0, -20.0)

# DataFrame from region_variability
rows = []
for _, row in region_variability.iterrows():
    name = row["Region"]
    if name not in name_to_lonlat:
        continue
    lon, lat = name_to_lonlat[name]
    rows.append({
        "name": name,
        "lat": lat,
        "lon": lon,
        "mean_abs_rel": row["mean_abs_rel"],
        "mean_abs_abs": row["mean_abs_abs"],
    })
df = pd.DataFrame(rows)

# Separate Global for plotting (optional, same as your current style)
df_regions = df[df["name"] != "Global"]
global_row = df[df["name"] == "Global"]
global_metrics = global_row.iloc[0].to_dict() if len(global_row) else {}

# Single map: mean_abs_rel = color, mean_abs_abs = size
metric_color = "mean_abs_rel"
metric_size = "mean_abs_abs"
cmap = "RdBu_r"
label_color = "Mean relative change (%)"
output_path = output_sensitivity / "map_region_variability.png"  # or your preferred path

fig = plt.figure(figsize=(20, 11))
ax = fig.add_subplot(1, 1, 1, projection=ccrs.Robinson())

ax.add_feature(cfeature.OCEAN, facecolor='white', zorder=0)
ax.add_feature(cfeature.LAND, facecolor='#e0e0e0', edgecolor='#aaaaaa', linewidth=0.3, zorder=1)
ax.add_feature(cfeature.COASTLINE, linewidth=0.4, edgecolor='#666666', zorder=2)
glaciers = cfeature.NaturalEarthFeature(
    'physical', 'glaciated_areas', '50m',
    edgecolor='#2e5f7f', facecolor='#4a8fc4', linewidth=0.2
)
ax.add_feature(glaciers, zorder=3, alpha=0.7)
if glacier_regions is not None:
    ax.add_geometries(
        glacier_regions.geometry,
        crs=ccrs.PlateCarree(),
        facecolor='none',
        edgecolor='#5a5a5a',
        linewidth=1.0,
        linestyle='--',
        zorder=4,
        alpha=0.6
    )
ax.set_global()

# Size from mean_abs (same scaling idea as your original)
vals_size = df_regions[metric_size].values
size_scale = vals_size.max()
if global_metrics and metric_size in global_metrics:
    size_scale = max(size_scale, global_metrics[metric_size])
sizes = (df_regions[metric_size].values / size_scale) * 5000

# Color from mean_rel (diverging)
vals_color = df_regions[metric_color].values
all_color = list(vals_color)
vmin, vmax = min(all_color), max(all_color)
norm = mcolors.TwoSlopeNorm(vmin=vmin, vcenter=50, vmax=vmax)

sc = ax.scatter(
    df_regions["lon"], df_regions["lat"],
    c=vals_color, s=sizes,
    cmap=cmap, norm=norm,
    edgecolors="0.2", linewidths=1.0,
    zorder=8, alpha=0.75,
    transform=ccrs.PlateCarree()
)

# Labels and leader lines (same as your plots)
for _, row in df_regions.iterrows():
    region_name = row["name"]
    val = row[metric_color]
    val_str = f"Ø {val:+.2f}%\n({row[metric_size]:.1f} Gt)"  # mean_rel and mean_abs

    if region_name in label_positions:
        label_lon, label_lat = label_positions[region_name]
        ax.text(
            label_lon, label_lat,
            f"{region_name}\n{val_str}",
            fontsize=7, ha="center", va='center',
            color="0.1",
            bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="0.4", alpha=0.85, linewidth=0.5),
            transform=ccrs.PlateCarree(),
            zorder=12
        )
        ax.plot(
            [label_lon, row["lon"]], [label_lat, row["lat"]],
            color='0.5', linewidth=0.4, linestyle='-', alpha=0.5,
            transform=ccrs.PlateCarree(),
            zorder=7
        )

# Global point
g = global_row.iloc[0]
global_lon, global_lat = 0.0, -20.0
global_size = (g[metric_size] / size_scale) * 5000 * 1.5
ax.scatter(
    [global_lon], [global_lat],
    c=[g[metric_color]], s=[global_size],
    cmap=cmap, norm=norm,
    edgecolors="0.1", linewidths=1.0,
    zorder=11, alpha=0.9,
    marker="o",
    transform=ccrs.PlateCarree()
)
ax.text(
    global_lon, global_lat - 8,
    f"Global\nØ {g[metric_color]:+.2f}%\n({g[metric_size]:.1f} Gt)",
    fontsize=9, fontweight="bold", ha="center", va='top',
    color="0.05",
    bbox=dict(boxstyle="round,pad=0.35", fc="white", ec="0.3", alpha=0.95, linewidth=1),
    transform=ccrs.PlateCarree(),
    zorder=12
)

# Size legend: circles + labels for mean_abs
size_legend_vals = [10, 40, 70, 100]   # 4 circles
size_scale_leg = max(np.abs(df_regions[metric_size]).max(), 1.0)
legend_ax = fig.add_axes([0.02, 0.36, 0.18, 0.25])
legend_ax.set_facecolor("white")
legend_ax.axis("off")
legend_ax.set_xlim(-2, 4)
legend_ax.set_ylim(-2, 4)
legend_ax.set_aspect("equal")
for i, v in enumerate(size_legend_vals):
    s = (v / size_scale_leg) * 5000
    s = max(s, 5)
    legend_ax.scatter(0, 2.5 - i * 1, s=s, c="grey", alpha=0.8, edgecolors="0.5")
    legend_ax.text(1.5, 2.5 - i * 1, f"{v:.0f}", fontsize=10, va="center")
legend_ax.text(1.4, -1.8, "Mean absolute\nchange (Gt)", fontsize=10, style="italic")

cb = plt.colorbar(sc, ax=ax, shrink=0.5, pad=0.03, aspect=20, orientation='vertical')
cb.set_label(label_color, fontsize=11)
cb.ax.tick_params(labelsize=9)
fig.tight_layout()
fig.savefig(str(output_path), dpi=200)
plt.show()