In [1]:
import numpy as np
import pandas as pd
import scipy.stats
import xarray as xr

In [2]:
metrics = xr.open_dataset("annual_mean_metrics.nc")

In [3]:
def _ttest_ind_wrapper(a, b, axis=0, equal_var=True, nan_policy="propagate"):
    result = scipy.stats.ttest_ind(
        a,
        b,
        axis=axis,
        equal_var=equal_var,
        nan_policy=nan_policy
    )
    return result.statistic, result.pvalue


def xarray_t_test(
    a,
    b,
    dim,
    equal_var=True,
    nan_policy="propagate",
):
    kwargs = {
        "axis": -1,
        "equal_var": equal_var,
        "nan_policy": nan_policy,
    }
    return xr.apply_ufunc(
        _ttest_ind_wrapper,
        a,
        b,
        input_core_dims=[[dim], [dim]],
        output_core_dims=[[], []],
        kwargs=kwargs,
        exclude_dims={dim}
    )

In [4]:
VARIABLES = ["total_precipitation_rate",  "surface_temperature", "net_surface_radiative_flux"]
REGIONS = ["land", "ocean/sea-ice"]


def metrics_table(metrics, configuration, validation_year):
    metrics = metrics.sel(validation_year=validation_year).drop("validation_year")
    stacked_metrics = metrics.stack(sample=("configuration", "time")).drop("samples")
    stacked_metrics = stacked_metrics.dropna("sample")
    
    in_five_year_window = (stacked_metrics.time.dt.year.isin(range(2018, 2023)))
    baseline_samples = (stacked_metrics.configuration == "Baseline") & in_five_year_window
    configuration_samples = (stacked_metrics.configuration == configuration) & in_five_year_window
    
    t_statistic, p_values = xarray_t_test(
        stacked_metrics.isel(sample=configuration_samples),
        stacked_metrics.isel(sample=baseline_samples),
        "sample",
        equal_var=False,
        nan_policy="omit"
    )
    mean_metrics = xr.concat(
        [
            stacked_metrics.sel(sample=baseline_samples).mean("sample"),
            stacked_metrics.sel(sample=configuration_samples).mean("sample")
        ],
        dim=pd.Index(["Baseline", configuration], name="configuration")
    )
    significant = p_values < 0.05
    insignificant = p_values >= 0.05
    
    unmasked_metrics = (
        mean_metrics[VARIABLES]
        .sel(region=REGIONS)
        .to_array()
        .rename("metric")
        .to_dataframe(dim_order=["variable", "metric", "region", "configuration", "climate"])
        .unstack(level=-1)
        .unstack(level=-1)
    )
    masked_metrics = (
        mean_metrics[VARIABLES]
        .where(significant[VARIABLES])
        .sel(region=REGIONS)
        .to_array()
        .rename("metric")
        .to_dataframe(dim_order=["variable", "metric", "region", "configuration", "climate"])
        .unstack(level=-1)
        .unstack(level=-1)
    )
    return mean_metrics, significant[VARIABLES], unmasked_metrics, masked_metrics

In [5]:
mean_metrics_year_one, significant_year_one, unmasked_year_one, masked_year_one = metrics_table(metrics, "ML-corrected seed 2", "Year one")
mean_metrics_year_two, significant_year_two, unmasked_year_two, masked_year_two = metrics_table(metrics, "ML-corrected seed 2", "Year two")

### Source of data in Table 2 of the manuscript

In [6]:
unmasked_year_two.round(decimals=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,metric,metric,metric,metric,metric,metric,metric,metric
Unnamed: 0_level_1,Unnamed: 1_level_1,climate,Minus 4 K,Minus 4 K,Unperturbed,Unperturbed,Plus 4 K,Plus 4 K,Plus 8 K,Plus 8 K
Unnamed: 0_level_2,Unnamed: 1_level_2,configuration,Baseline,ML-corrected seed 2,Baseline,ML-corrected seed 2,Baseline,ML-corrected seed 2,Baseline,ML-corrected seed 2
variable,metric,region,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3
total_precipitation_rate,mean,land,-0.8,-0.2,-0.8,-0.2,-0.7,-0.0,-0.7,-0.0
total_precipitation_rate,mean,ocean/sea-ice,0.1,-0.1,0.1,-0.1,0.1,-0.3,0.2,-0.4
total_precipitation_rate,rmse,land,1.7,1.2,1.9,1.4,1.8,1.4,1.7,1.5
total_precipitation_rate,rmse,ocean/sea-ice,1.2,1.2,1.6,1.6,1.8,2.0,2.3,2.2
total_precipitation_rate,mae,land,1.1,0.7,1.1,0.8,1.1,0.9,1.1,1.0
total_precipitation_rate,mae,ocean/sea-ice,0.7,0.7,0.9,1.0,1.1,1.2,1.4,1.4
surface_temperature,mean,land,0.3,0.2,0.4,-0.1,0.1,-0.5,-0.4,-0.9
surface_temperature,mean,ocean/sea-ice,0.0,0.1,-0.0,0.1,-0.0,0.0,-0.0,0.0
surface_temperature,rmse,land,3.6,2.8,3.5,2.7,3.6,2.8,3.4,2.7
surface_temperature,rmse,ocean/sea-ice,0.7,0.6,0.7,0.5,0.6,0.5,0.6,0.5


In [7]:
masked_year_two.round(decimals=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,metric,metric,metric,metric,metric,metric,metric,metric
Unnamed: 0_level_1,Unnamed: 1_level_1,climate,Minus 4 K,Minus 4 K,Unperturbed,Unperturbed,Plus 4 K,Plus 4 K,Plus 8 K,Plus 8 K
Unnamed: 0_level_2,Unnamed: 1_level_2,configuration,Baseline,ML-corrected seed 2,Baseline,ML-corrected seed 2,Baseline,ML-corrected seed 2,Baseline,ML-corrected seed 2
variable,metric,region,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3
total_precipitation_rate,mean,land,-0.8,-0.2,-0.8,-0.2,-0.7,-0.0,-0.7,-0.0
total_precipitation_rate,mean,ocean/sea-ice,0.1,-0.1,0.1,-0.1,0.1,-0.3,0.2,-0.4
total_precipitation_rate,rmse,land,1.7,1.2,1.9,1.4,1.8,1.4,1.7,1.5
total_precipitation_rate,rmse,ocean/sea-ice,,,,,1.8,2.0,,
total_precipitation_rate,mae,land,1.1,0.7,1.1,0.8,1.1,0.9,1.1,1.0
total_precipitation_rate,mae,ocean/sea-ice,,,,,,,,
surface_temperature,mean,land,,,0.4,-0.1,0.1,-0.5,-0.4,-0.9
surface_temperature,mean,ocean/sea-ice,0.0,0.1,-0.0,0.1,-0.0,0.0,-0.0,0.0
surface_temperature,rmse,land,3.6,2.8,3.5,2.7,3.6,2.8,3.4,2.7
surface_temperature,rmse,ocean/sea-ice,0.7,0.6,0.7,0.5,0.6,0.5,0.6,0.5


In [8]:
def identical_sign(ds, dim):
    """Check if variables have the same sign for all values along a dimension"""
    return (ds > 0).all(dim) | (ds < 0).all(dim)

### To measure improvement of the RMSE we compute minus the percent deviation from the baseline

$$-100 \cdot \frac{M_{ML} - M_{baseline}}{M_{baseline}}$$

In the manuscript we only report cases where the percent improvement (or degredation) was statistically significant and of a consistent sign in all climates.

In [9]:
def compute_rmse_percent_improvement(metrics, significant, configuration):
    improvement = -(100 * (metrics.sel(configuration=configuration) - metrics.sel(configuration="Baseline")) / metrics.sel(configuration="Baseline")).drop("configuration")
    improvement_min = improvement.min("climate")
    improvement_max = improvement.max("climate")
    ds = xr.concat([improvement_min, improvement_max], dim=pd.Index(["min", "max"], name="bound")).sel(metric="rmse").drop("metric")
    ds = ds.where(significant.sel(metric="rmse").drop("metric").all("climate") & identical_sign(ds, "bound"))
    return ds[VARIABLES].sel(region=REGIONS).to_array().rename("metric").to_dataframe(dim_order=["variable", "region", "bound"]).unstack(level=-1)

In [10]:
compute_rmse_percent_improvement(mean_metrics_year_two, significant_year_two, "ML-corrected seed 2").round(decimals=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,metric,metric
Unnamed: 0_level_1,bound,min,max
variable,region,Unnamed: 2_level_2,Unnamed: 3_level_2
total_precipitation_rate,land,16.6,29.6
total_precipitation_rate,ocean/sea-ice,,
surface_temperature,land,20.4,22.6
surface_temperature,ocean/sea-ice,16.3,20.7
net_surface_radiative_flux,land,29.8,51.1
net_surface_radiative_flux,ocean/sea-ice,,


### To measure improvement of the bias we compute minus the percent deviation from the baseline for the absolute values

$$-100 \cdot \frac{|M_{ML}| - |M_{baseline}|}{|M_{baseline}|}$$

In the manuscript we only report cases where the percent improvement (or degredation) was statistically significant and of a consistent sign in all climates.

In [11]:
def compute_bias_percent_improvement(metrics, significant, configuration):
    improvement = -(100 * (np.abs(metrics.sel(configuration=configuration)) - np.abs(metrics.sel(configuration="Baseline"))) / np.abs(metrics.sel(configuration="Baseline"))).drop("configuration")
    improvement_min = improvement.min("climate")
    improvement_max = improvement.max("climate")
    ds = xr.concat([improvement_min, improvement_max], dim=pd.Index(["min", "max"], name="bound")).sel(metric="mean").drop("metric")
    ds = ds.where(significant.sel(metric="mean").drop("metric").all("climate") & identical_sign(ds, "bound"))
    return ds[VARIABLES].sel(region=REGIONS).to_array().rename("metric").to_dataframe(dim_order=["variable", "region", "bound"]).unstack(level=-1)

In [12]:
compute_bias_percent_improvement(mean_metrics_year_two, significant_year_two, "ML-corrected seed 2").round(decimals=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,metric,metric
Unnamed: 0_level_1,bound,min,max
variable,region,Unnamed: 2_level_2,Unnamed: 3_level_2
total_precipitation_rate,land,76.4,99.9
total_precipitation_rate,ocean/sea-ice,,
surface_temperature,land,,
surface_temperature,ocean/sea-ice,-491.0,-54.5
net_surface_radiative_flux,land,41.0,87.3
net_surface_radiative_flux,ocean/sea-ice,,


### Impact of using the first year of the fine-resolution run as validation data

In [13]:
unmasked_year_one.round(decimals=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,metric,metric,metric,metric,metric,metric,metric,metric
Unnamed: 0_level_1,Unnamed: 1_level_1,climate,Minus 4 K,Minus 4 K,Unperturbed,Unperturbed,Plus 4 K,Plus 4 K,Plus 8 K,Plus 8 K
Unnamed: 0_level_2,Unnamed: 1_level_2,configuration,Baseline,ML-corrected seed 2,Baseline,ML-corrected seed 2,Baseline,ML-corrected seed 2,Baseline,ML-corrected seed 2
variable,metric,region,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3
total_precipitation_rate,mean,land,-0.7,-0.1,-0.8,-0.1,-0.7,-0.0,-0.7,0.1
total_precipitation_rate,mean,ocean/sea-ice,0.1,-0.1,0.1,-0.2,0.1,-0.4,0.3,-0.3
total_precipitation_rate,rmse,land,1.7,1.2,1.7,1.3,1.7,1.5,1.7,1.5
total_precipitation_rate,rmse,ocean/sea-ice,1.3,1.1,1.5,1.8,1.8,2.1,2.2,1.9
total_precipitation_rate,mae,land,1.0,0.7,1.1,0.8,1.1,0.9,1.0,1.0
total_precipitation_rate,mae,ocean/sea-ice,0.8,0.7,0.9,1.0,1.1,1.2,1.4,1.2
surface_temperature,mean,land,0.2,0.1,0.3,-0.2,-0.0,-0.6,1.3,0.8
surface_temperature,mean,ocean/sea-ice,0.0,0.1,-0.0,0.0,-0.0,0.0,1.0,1.0
surface_temperature,rmse,land,3.6,2.9,3.4,2.7,3.4,2.7,3.9,2.8
surface_temperature,rmse,ocean/sea-ice,0.7,0.6,0.7,0.5,0.6,0.5,1.1,1.1


In [14]:
masked_year_one.round(decimals=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,metric,metric,metric,metric,metric,metric,metric,metric
Unnamed: 0_level_1,Unnamed: 1_level_1,climate,Minus 4 K,Minus 4 K,Unperturbed,Unperturbed,Plus 4 K,Plus 4 K,Plus 8 K,Plus 8 K
Unnamed: 0_level_2,Unnamed: 1_level_2,configuration,Baseline,ML-corrected seed 2,Baseline,ML-corrected seed 2,Baseline,ML-corrected seed 2,Baseline,ML-corrected seed 2
variable,metric,region,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3
total_precipitation_rate,mean,land,-0.7,-0.1,-0.8,-0.1,-0.7,-0.0,-0.7,0.1
total_precipitation_rate,mean,ocean/sea-ice,0.1,-0.1,0.1,-0.2,0.1,-0.4,0.3,-0.3
total_precipitation_rate,rmse,land,1.7,1.2,1.7,1.3,1.7,1.5,1.7,1.5
total_precipitation_rate,rmse,ocean/sea-ice,1.3,1.1,1.5,1.8,1.8,2.1,,
total_precipitation_rate,mae,land,1.0,0.7,1.1,0.8,1.1,0.9,,
total_precipitation_rate,mae,ocean/sea-ice,0.8,0.7,0.9,1.0,1.1,1.2,,
surface_temperature,mean,land,,,0.3,-0.2,-0.0,-0.6,1.3,0.8
surface_temperature,mean,ocean/sea-ice,0.0,0.1,-0.0,0.0,-0.0,0.0,1.0,1.0
surface_temperature,rmse,land,3.6,2.9,3.4,2.7,3.4,2.7,3.9,2.8
surface_temperature,rmse,ocean/sea-ice,0.7,0.6,0.7,0.5,0.6,0.5,,


In [15]:
compute_rmse_percent_improvement(mean_metrics_year_one, significant_year_one, "ML-corrected seed 2").round(decimals=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,metric,metric
Unnamed: 0_level_1,bound,min,max
variable,region,Unnamed: 2_level_2,Unnamed: 3_level_2
total_precipitation_rate,land,9.2,26.4
total_precipitation_rate,ocean/sea-ice,,
surface_temperature,land,19.5,28.6
surface_temperature,ocean/sea-ice,,
net_surface_radiative_flux,land,26.8,47.0
net_surface_radiative_flux,ocean/sea-ice,,


In [16]:
compute_bias_percent_improvement(mean_metrics_year_one, significant_year_one, "ML-corrected seed 2").round(decimals=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,metric,metric
Unnamed: 0_level_1,bound,min,max
variable,region,Unnamed: 2_level_2,Unnamed: 3_level_2
total_precipitation_rate,land,82.7,99.2
total_precipitation_rate,ocean/sea-ice,,
surface_temperature,land,,
surface_temperature,ocean/sea-ice,-3362.8,-4.4
net_surface_radiative_flux,land,43.3,95.7
net_surface_radiative_flux,ocean/sea-ice,,
