# Figures for [Doore et al. (2025)]()

This is the code to replicate the figures in [Doore et al. (2025)]().
Additionally, it creates the LaTeX tables and prints out the quoted statistics.
To run, it requires the whole workflow to have been ran first as it depends on some output files from the workflows.

In [None]:
import xarray as xr
from xarray_einstats import linalg, stats
import numpy as np
import geopandas as gpd
import pandas as pd
from scipy.stats import norm
from shapely import unary_union, box
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import matplotlib.lines as mlines
import matplotlib.dates as mdates
from mpl_toolkits.axes_grid1 import make_axes_locatable
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import cartopy
from cartopy import crs as ccrs, feature as cfeature
from pyproj import CRS

Read in and group the [regridded data sets](../workflow/1_regrid.ipynb) for easy loop style plotting.

In [None]:
ssebop = xr.open_dataset('../Data/ssebop/ssebop_aet_regridded.nc', engine='netcdf4')
gleam = xr.open_dataset('../Data/gleam/gleam_aet.nc', engine='netcdf4')
era5 = xr.open_dataset('../Data/era5/era5_aet_regridded.nc', engine='netcdf4')
nldas = xr.open_dataset('../Data/nldas/nldas_aet_regridded.nc', engine='netcdf4')
terra = xr.open_dataset('../Data/terraclimate/terraclimate_aet_regridded.nc')
wbet = xr.open_dataset('../Data/wbet/wbet_aet_regridded.nc', engine='netcdf4')

datasets = xr.concat([ssebop, gleam, era5, nldas, terra, wbet],
                     dim=pd.Index(['SSEBop', 'GLEAM', 'ERA5', 'NLDAS', 'TerraClimate', 'WBET'],
                                  name='dataset_name')).compute()
datasets

As we will be plotting CONUS, let's make a mask of CONUS for all data sets. Since WBET is limited to CONUS, we can just use its data to create the mask.

In [None]:
# Use the mean summer data as it ensures no NaNs in CONUS
conus_mask = (datasets
              .sel(dataset_name='WBET',
                   time=(datasets.time.dt.season == 'JJA'))
              .mean(dim='time')
              .drop_vars('dataset_name')
              .aet)
# Normalize it
conus_mask = conus_mask/conus_mask
conus_mask.name = 'conus_mask'
conus_mask

## Figure 1

The graphical depiction of the TC affine error model. This is in the `TC/TC_diagram/` folder for use in the TC description. So, let's just run that notebook

In [None]:
%run ../TC/TC_diagram/TC_figure.ipynb

## Figure 2

Map the mean summer ET of each data set at GLEAM resolution.

In [None]:
projPC = ccrs.PlateCarree()
# projPC = ccrs.epsg(5070)
letter = 'abcdef'

data = datasets.sel(time=(datasets.time.dt.season == 'JJA')).mean(dim='time') * conus_mask

# Define the figure and each axis for the 2 rows and 3 columns
fig, ax = plt.subplots(nrows=2, ncols=3,
                         subplot_kw={'projection': projPC},
                         figsize=(15, 5))

# axs is a 2 dimensional array of `GeoAxes`.  We will flatten it into a 1-D array
ax = ax.flatten()

#Loop over all of the datasets
for i, dataset_name in enumerate(data.dataset_name.data):
    
    ax[i].set_extent([-126, -66, 24, 52], crs=ccrs.PlateCarree())

    # Select the specified dataset
    data_plt = data.aet.sel(dataset_name=dataset_name)

    # Map plot
    cs = data_plt.plot(ax=ax[i], transform=ccrs.PlateCarree(),
                       add_colorbar=False, vmin=0, vmax=160,
                       rasterized=True, cmap='plasma')


    # Title each subplot with the name of the dataset
    ax[i].set_title('(' + letter[i] + ') ' + dataset_name,
                    fontdict={'fontsize': 15})

    # Draw the coastines for each subplot
    ax[i].coastlines(resolution='110m', rasterized=True)
    ax[i].add_feature(cfeature.STATES, rasterized=True)

    # Create the tick labels on the outside plots
    draw_labels = {}
    if i > 2:
        draw_labels['bottom'] = 'x'
    if i == 0 or i == 3:
        draw_labels['left'] = 'y'
    ax[i].gridlines(draw_labels=draw_labels, alpha=0,
                    xlabel_style={'size': 14}, ylabel_style={'size': 14})
    
    # Add tick marks to the labels
    xticks = range(-120, -60, 10)
    ax[i].set_xticks(xticks, crs=projPC)
    ax[i].set_xticklabels(['' for i in range(len(xticks))])
    ax[i].set_xlabel('')
    ax[i].tick_params(axis="x", direction="in", bottom=True, top=True)

    yticks = range(25, 50, 5)
    ax[i].set_yticks(yticks, crs=projPC)
    ax[i].set_yticklabels(['' for i in range(len(yticks))])
    ax[i].set_ylabel('')
    ax[i].tick_params(axis="y", direction="in", right=True, left=True)


# Add a colorbar axis at the side of the graph
cbar_ax = fig.add_axes([0.925, 0.11, 0.01, 0.82])
cbar_ax.tick_params(axis="y", direction="in")

# Draw the colorbar
cbar=fig.colorbar(cs, cax=cbar_ax, extend='max')
cbar.set_label('Evapotranspiration\n[mm month$^{-1}$]', fontsize=15)
cbar.ax.tick_params(labelsize=14) 

fig.supxlabel('Longitude', y=0.005, fontsize=15)
fig.supylabel('Latitude', x=0.005, fontsize=15)
# Adjust the location of the subplots on the page to make room for the colorbar
fig.subplots_adjust(left=0.07, right=0.91, bottom=0.1, top=0.95, wspace=0.02, hspace=0.15)

fig.savefig('mean_summer_ET.pdf')

## Figure 3

Plot the regionally aggregated time series for the three regions. Add an inset of the location of each region in the US in the upper right.

In [None]:
# First we need to create the weight map to do the spatial aggregation
# See the Regional Analysis notebook for more details on this code block
def grid_to_poly_boxes(grid, lat_coord='latitude', lon_coord='longitude'):
    lat_diff = grid[lat_coord].diff(dim=lat_coord)
    lat_spacing = np.abs(np.unique(lat_diff))
        
    lon_diff = grid[lon_coord].diff(dim=lon_coord)
    lon_spacing = np.abs(np.unique(lon_diff))
        
    bounds = np.vstack((grid[lat_coord] + lat_spacing/2,
                        grid[lat_coord] - lat_spacing/2)).T
    grid[lat_coord+'_bounds'] = ((lat_coord, 'bound'), bounds)
    bounds = np.vstack((grid[lon_coord] + lon_spacing/2,
                        grid[lon_coord] - lon_spacing/2)).T
    grid[lon_coord+'_bounds'] = ((lon_coord, 'bound'), bounds)

    points = grid.stack(point=(lat_coord, lon_coord))
    
    def bounds_to_poly(lat_bounds, lon_bounds):
        if lon_bounds[0] >= 180:
            lon_bounds = lon_bounds - 360
        return box(lon_bounds[0],
                   lat_bounds[0],
                   lon_bounds[1],
                   lat_bounds[1])

    boxes = xr.apply_ufunc(
        bounds_to_poly,
        points[lat_coord+'_bounds'],
        points[lon_coord+'_bounds'],
        input_core_dims=[('bound',),  ('bound',)],
        output_dtypes=[np.dtype('O')],
        vectorize=True
    )

    return boxes

grid = datasets[['lat', 'lon']]
boxes = grid_to_poly_boxes(grid, lat_coord='lat', lon_coord='lon')

grid_df= gpd.GeoDataFrame(
    data={'geometry': boxes.data, 'lat': boxes.lat, 'lon': boxes.lon},
    index=boxes.indexes['point'],
    crs='EPSG:4326'
)

high_plns_aqfr = gpd.read_file('../Data/regions/High_Plains_aquifer.zip')
high_plns_aqfr = high_plns_aqfr[high_plns_aqfr['AQUIFER'] == 'High Plains aquifer']
high_plns_aqfr = gpd.GeoDataFrame(geometry=[unary_union(high_plns_aqfr.geometry.values)],
                                  crs=high_plns_aqfr.crs)
high_plns_aqfr = high_plns_aqfr.to_crs('EPSG:4269').to_crs('EPSG:4326')
high_plns_aqfr['region_name'] = 'High Plains Aquifer'

cntrl_valley = gpd.read_file('../Data/regions/Central_Valley.zip')
cntrl_valley = gpd.GeoDataFrame(geometry=[unary_union(cntrl_valley.geometry.buffer(0.001).values)],
                                crs=cntrl_valley.crs)
cntrl_valley = cntrl_valley.to_crs('EPSG:4269').to_crs('EPSG:4326')
cntrl_valley['region_name'] = 'Central Valley'

ucrb = gpd.read_file('../Data/regions/Upper_Colorado_River_Basin.zip')
ucrb = ucrb.drop(columns=['EXT_ID', 'EXT_TYP_ID', 'NAME'])
ucrb = ucrb.to_crs('EPSG:4326')
ucrb['region_name'] = 'Upper Colorado River Basin'

regions = pd.concat([high_plns_aqfr, cntrl_valley, ucrb], ignore_index=True)

overlay = grid_df.to_crs('EPSG:5070').overlay(regions.to_crs('EPSG:5070'))

grid_cell_fraction = (
    overlay.geometry.area.groupby(overlay['region_name'])
    .transform(lambda x: x / x.sum())
)

multi_index = overlay.set_index(['lat', 'lon', 'region_name']).index
df_weights = pd.DataFrame({'weights': grid_cell_fraction.values}, index=multi_index)
da_weights = xr.Dataset(df_weights).unstack(fill_value=0).weights
da_weights, _ = xr.align(da_weights, datasets, join='outer',
                         exclude=['time', 'dataset_name'],
                         fill_value=0)

Now plot the time series.

In [None]:
et_weighted = datasets.weighted(da_weights)
ds_et_regional = et_weighted.mean(dim=['lat', 'lon'], skipna=True, keep_attrs=True)
ds_et_regional = ds_et_regional.sel(time=slice('2003-01', '2018-09'))

fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(15, 4), sharey=True)
    
colors = ['blue', 'orange', 'green', 'red', 'purple', 'brown']
handles = []
for i, dataset_name in enumerate(ds_et_regional.dataset_name.data):
    handles.append(mlines.Line2D([], [], label=dataset_name, color=f'tab:{colors[i]}'))

for i, region_name in enumerate(ds_et_regional.region_name.data):
    ds_et_regional.aet.sel(region_name=region_name).plot.line(x='time', ax=ax[i], ylim=(-5, 180), add_legend=False)
    ax[i].set_xlabel('Year', fontsize=15)
    ax[i].xaxis.set_major_locator(mdates.YearLocator(base=3))
    ax[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    if i == 0:
        ax[i].set_ylabel('Evapotranspiration\n[mm month$^{-1}$]', fontsize=15)
    else:
        ax[i].set_ylabel(None)

    ax[i].set_title(f'({letter[i]}) {region_name}', fontsize=15)
    ax[i].tick_params(axis="both", direction="in", right=True, top=True, labelsize=14)
    if i == 2:
        ax[i].legend(handles=handles, loc='upper left', fontsize=14)

    ax[i].set_ylim(-10, 220)

    ax_in = inset_axes(ax[i], width='40%', height='40%', loc="upper right", 
                       axes_class=cartopy.mpl.geoaxes.GeoAxes, 
                       axes_kwargs=dict(projection=projPC))

    ax_in.set_extent([-126, -66, 24, 52], crs=projPC)
    # Draw the coastines and states
    ax_in.coastlines(resolution='110m', linewidth=0.4, rasterized=True)
    ax_in.add_feature(cfeature.STATES, linewidth=0.4, rasterized=True)

    cs = regions[regions['region_name'] == region_name].plot(ax=ax_in, transform=projPC, color='tab:red', rasterized=True)

plt.tight_layout()
fig.savefig('regional_time_series.pdf')

## Figure 4

Plot the error cross-correlation estimates of the data sets in a lower triangular corner plot.

In [None]:
ec_errs = xr.open_dataset('../Data/EC_errs.nc')

projPC = ccrs.PlateCarree()
letter = np.array([['a', 'b', 'c'],
                   ['d', 'e', 'f'],
                   ['g', 'h', 'i'],
                   ['j', 'k', 'l'],
                   ['m', 'n', 'o']]).T.flatten()

# Define the figure and each axis for the 2 rows and 3 columns
fig, ax = plt.subplots(nrows=5, ncols=5,
                         subplot_kw={'projection': projPC},
                         figsize=(22, 11))

# axs is a 2 dimensional array of `GeoAxes`.  We will flatten it into a 1-D array
# ax = ax.T.flatten()

#Loop over all of the datasets
j = 0
i = -1
for k, covar_pair in enumerate(ec_errs.covar_pair.data):
    i += 1
    if k == 5:
        i = 1
        j += 1
    if k == 9:
        i = 2
        j += 1
    if k == 12:
        i = 3
        j += 1
    if k == 14:
        i = 4
        j += 1
    
    ax[i, j].set_extent([-126, -66, 24, 52], crs=projPC)

    # Select the specified dataset
    data = ec_errs.rho.median(dim='est_idx').sel(covar_pair=covar_pair, season='All') * conus_mask

    # Map plot
    cs = data.plot(ax=ax[i, j], transform=projPC,
                   add_colorbar=False, vmin=-1, vmax=1,
                   rasterized=True, cmap='PuOr')

    # Title each subplot with the name of the dataset
    if i == j:
        ax[i, j].set_title(covar_pair.split()[0], fontdict={'fontsize': 20})
    else:  
        ax[i, j].set_title(None)

    # Draw the coastines for each subplot
    ax[i, j].coastlines(resolution='110m', rasterized=True)
    ax[i, j].add_feature(cfeature.STATES, rasterized=True)

    # Create the tick labels on the outside plots
    draw_labels = {}
    if (i % 5) == 4:
        draw_labels['bottom'] = 'x'
    if k < 5:
        draw_labels['left'] = 'y'
    ax[i, j].gridlines(draw_labels=draw_labels, alpha=0,
                    xlabel_style={'size': 14}, ylabel_style={'size': 14})
    
    # Add tick marks to the labels
    xticks = range(-120, -60, 10)
    ax[i, j].set_xticks(xticks, crs=projPC)
    ax[i, j].set_xticklabels(['' for tick in range(len(xticks))])
    ax[i, j].set_xlabel('')
    ax[i, j].tick_params(axis="x", direction="in", bottom=True, top=True)

    yticks = range(25, 50, 5)
    ax[i, j].set_yticks(yticks, crs=projPC)
    ax[i, j].set_yticklabels(['' for tick in range(len(yticks))])
    ax[i, j].set_ylabel('')
    ax[i, j].tick_params(axis="y", direction="in", right=True, left=True)

    if k < 5:
        ax[i, j].set_ylabel(f"{covar_pair.split()[1]}\n\n", fontsize=20)

for i in range(0, 4):
    for j in range(i+1, 5):
        ax[i, j].axis('off')
# Add a colorbar axis at the side of the graph
cbar_ax = fig.add_axes([0.93, 0.3, 0.01, 0.63])
cbar_ax.tick_params(axis="y", direction="in")

# Draw the colorbar
cbar=fig.colorbar(cs, cax=cbar_ax)
cbar.set_label(r'Error Cross-Correlation ($\rho_{\varepsilon_i, \varepsilon_j}$)', fontsize=20)
cbar.ax.tick_params(labelsize=18) 

# fig.supxlabel('Longitude', y=0.005, fontsize=15)
# fig.supylabel('Latitude', x=0.005, fontsize=15)
# Adjust the location of the subplots on the page to make room for the colorbar
fig.subplots_adjust(left=0.055, right=0.99, bottom=0.05, top=0.95, wspace=0.02, hspace=0.02)

fig.savefig('median_rho.pdf')

## Figure 5

Create a bar chart for the regional error cross-correlations. Include the 16th and 84th percentile ranges as error bars.

In [None]:
# Generate the data
errs = ec_errs.rename({'est_pair': 'est_pair_cov', 'est_idx': 'est_idx_cov'})
weighted_results = errs.weighted(da_weights)

# Use mean rather than sum as it accounts for NaNs
# The are equivalent since the sum of weights is 1
regional_errs = weighted_results.mean(dim=['lat', 'lon'], skipna=True, keep_attrs=True)

# Do non-NaN area weighting
regional_weights = weighted_results.sum_of_weights(dim=['lat', 'lon'], keep_attrs=True)
regional_weights /= regional_weights.sum(dim='est_idx_cov')

regional_errs

In [None]:
for season in ['All']:#regional_errs.season.data:
    data = (regional_errs * regional_weights).sel(season=season)
    data_err = regional_errs.sel(season=season)
    
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7.2, 4))
    
    x = np.arange(3)  # the label locations
    width = 1/(3 + len(data.covar_pair))  # the width of the bars
    multiplier = 2
    
    colors = plt.cm.tab20(np.linspace(0,1,15))
    for color, covar_pair in zip(colors, data.covar_pair.data):
        offset = width * multiplier
        rects = ax.bar(x + offset, data.rho.sum(dim='est_idx_cov').sel(covar_pair=covar_pair).data,
                          width, label=covar_pair.replace(' ', '-'), color=color, edgecolor='black')
        _ = ax.errorbar(x + offset, data.rho.sum(dim='est_idx_cov').sel(covar_pair=covar_pair).data,
                           yerr=np.abs(data_err.rho.quantile([0.16, 0.84], dim='est_idx_cov').sel(covar_pair=covar_pair).data
                                       - data.rho.sum(dim='est_idx_cov').sel(covar_pair=covar_pair).data),
                           linestyle='', color='black', capsize=3)
        multiplier += 1
    
    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax.plot(ax.get_xlim(), [0, 0], color='black', linestyle='dashed', linewidth=1)
    ax.set_ylabel(r'Error Cross-Correlation ($\rho_{\varepsilon_i, \varepsilon_j}$)', fontsize=14)
    xticks = data.region_name.data
    xticks[2] = 'Upper Colorado\nRiver Basin'
    ax.set_xticks(x + 9 * width, xticks)
    ax.set_xlim(0, 3)
    ax.legend(loc='upper left', ncols=3, columnspacing=1)
    ax.set_ylim(-1, 2.5)
    ax.tick_params(axis="both", direction="in", right=True, top=True, labelsize=12)
    ax.set_yticks(np.linspace(-1, 1, 5), np.linspace(-1, 1, 5)) 
    
    plt.tight_layout()
    if season == 'All':
        fig.savefig('regional_rho.pdf')
    else:
        fig.savefig(f'regional_rho_{season}.pdf')

    # plt.close()

## Figure 6 (and Appendix Figures)

Plot the agreement probability of the data set pairs using the median covariance matrices.

In [None]:
#  Create the agreement probabilities
rel_bias = xr.open_dataset('../Data/avg_bias.nc')
ds_results = xr.merge([ec_errs.rename({'covar_pair': 'dataset_pair'}),
                       rel_bias])

variances = ds_results['covar'].linalg.diagonal(
                dims=['covar_pair_idx_1', 'covar_pair_idx_2'], offset=0,
            )
covariances = ds_results['covar'].sel(covar_pair_idx_1=0, covar_pair_idx_2=1).squeeze()
ds_results['sigma_bias'] = np.sqrt(variances.sum(dim='covar_pair_idx_1') - 2 * covariances)

norm_dist = stats.XrContinuousRV(norm,
                                 loc=np.abs(ds_results['median_bias']),
                                 scale=ds_results['sigma_bias'])
# Set and name it as a DataArray as the attributes of the coordinates are not kept
agreement_probability = norm_dist.cdf(0)
agreement_probability.name = 'agreement_probability'

# Merge to preserve coordinate attributes
ds_results = xr.merge([ds_results, agreement_probability])
ds_results

In [None]:
for season in ds_results.season.data:
    # Define the figure and each axis for the 2 rows and 3 columns
    fig, ax = plt.subplots(nrows=5, ncols=5,
                           subplot_kw={'projection': projPC},
                           figsize=(22, 11))
        
    #Loop over all of the datasets
    j = 0
    i = -1
    for k, dataset_pair in enumerate(ds_results.dataset_pair.data):
        i += 1
        if k == 5:
            i = 1
            j += 1
        if k == 9:
            i = 2
            j += 1
        if k == 12:
            i = 3
            j += 1
        if k == 14:
            i = 4
            j += 1
    
        ax[i, j].set_extent([-126, -66, 24, 52], crs=projPC)
    
        # Select the specified dataset
        data = ds_results['agreement_probability'].median(dim='est_idx').sel(dataset_pair=dataset_pair, season=season) * conus_mask

        # Map plot
        cs = data.plot(ax=ax[i, j], transform=projPC,
                       add_colorbar=False, vmin=0, vmax=0.5,
                       rasterized=True, cmap='plasma')
    
        # Title each subplot with the name of the dataset
        if i == j:
            ax[i, j].set_title(dataset_pair.split()[0], fontdict={'fontsize': 20})
        else:  
            ax[i, j].set_title(None)
    
        # Draw the coastines for each subplot
        ax[i, j].coastlines(resolution='110m', rasterized=True)
        ax[i, j].add_feature(cfeature.STATES, rasterized=True)
    
        # Create the tick labels on the outside plots
        draw_labels = {}
        if (i % 5) == 4:
            draw_labels['bottom'] = 'x'
        if k < 5:
            draw_labels['left'] = 'y'
        ax[i, j].gridlines(draw_labels=draw_labels, alpha=0,
                        xlabel_style={'size': 14}, ylabel_style={'size': 14})
        
        # Add tick marks to the labels
        xticks = range(-120, -60, 10)
        ax[i, j].set_xticks(xticks, crs=projPC)
        ax[i, j].set_xticklabels(['' for tick in range(len(xticks))])
        ax[i, j].set_xlabel('')
        ax[i, j].tick_params(axis="x", direction="in", bottom=True, top=True)
    
        yticks = range(25, 50, 5)
        ax[i, j].set_yticks(yticks, crs=projPC)
        ax[i, j].set_yticklabels(['' for tick in range(len(yticks))])
        ax[i, j].set_ylabel('')
        ax[i, j].tick_params(axis="y", direction="in", right=True, left=True)
    
        if k < 5:
            ax[i, j].set_ylabel(f"{dataset_pair.split()[1]}\n\n", fontsize=20)
    
    for i in range(0, 4):
        for j in range(i+1, 5):
            ax[i, j].axis('off')
    # Add a colorbar axis at the side of the graph
    cbar_ax = fig.add_axes([0.93, 0.3, 0.01, 0.63])
    cbar_ax.tick_params(axis="y", direction="in")
    
    # Draw the colorbar
    cbar=fig.colorbar(cs, cax=cbar_ax)
    cbar.set_label(r'Agreement Probability', fontsize=20)
    cbar.ax.tick_params(labelsize=18) 

    # fig.supxlabel('Longitude', y=0.005, fontsize=15)
    # fig.supylabel('Latitude', x=0.005, fontsize=15)
    # Adjust the location of the subplots on the page to make room for the colorbar
    fig.subplots_adjust(left=0.055, right=0.99, bottom=0.05, top=0.95, wspace=0.02, hspace=0.02)

    if season == 'All':
        fig.savefig('agreement.pdf')
    else:
        fig.savefig(f'agreement_{season}.pdf')

    plt.close()

## Table 2

Make Table 2 using the agreement probability data.

In [None]:
table = [r'\begin{table*}[t]']
table.append(r'    \caption{The percentage of grid cells across CONUS with agreement probabilities '
             r'above the given significance levels for each of the data set pairs.}')
table.append(r'    \label{tab:PercCellSigLevel}')
table.append(r'    \footnotesize')
table.append(r'    \begin{tabular}{lrrrrrrrrrrrrrrr}')
table.append(r'        \tophline')
table.append(r'        Data Set Pairs & \multicolumn{3}{c}{Non-seasonal} & '
             r'\multicolumn{3}{c}{Winter} & \multicolumn{3}{c}{Spring} & '
             r'\multicolumn{3}{c}{Summer} & \multicolumn{3}{c}{Fall}\\')
table.append(r'        & \multicolumn{3}{c}{Significance Levels} & \multicolumn{3}{c}{Significance Levels} & '
             r'\multicolumn{3}{c}{Significance Levels} & \multicolumn{3}{c}{Significance Levels} & '
             r'\multicolumn{3}{c}{Significance Levels} \\')
table.append(r'        & 0.16 & 0.05 & 0.01 & 0.16 & 0.05 & 0.01 & 0.16 & 0.05 & 0.01 & 0.16 & 0.05 & 0.01 & 0.16 & 0.05 & 0.01\\')
table.append(r'        \middlehline')

for dataset_pair in ds_results.dataset_pair.data:
    line = f"        {dataset_pair.replace(' ', '-')} & "
    for season in ['All', 'DJF', 'MAM', 'JJA', 'SON']:
        conus_agree = ds_results['agreement_probability'].sel(season=season, dataset_pair=dataset_pair).median(dim='est_idx') * conus_mask
        conus_frac16 = (conus_agree > 0.16).sum(dim=['lat', 'lon']) / conus_mask.sum()
        conus_frac05 = (conus_agree > 0.05).sum(dim=['lat', 'lon']) / conus_mask.sum()
        conus_frac01 = (conus_agree > 0.01).sum(dim=['lat', 'lon']) / conus_mask.sum()
        line += f"{conus_frac16*100:.1f} & {conus_frac05*100:.1f} & {conus_frac01*100:.1f} "
        if season == 'SON':
            line += r'\\'
        else:
            line += '& '

        if season == 'All':
            print(f'Fraction of {dataset_pair} > 0.16 (0.05): {conus_frac16.data:0.3f} ({conus_frac05.data:0.3f})')

    table.append(line)

table.append(r'        \bottomhline')
table.append(r'    \end{tabular}')
table.append(r'\end{table*}')

with open('table_PercCellSigLevel.tex', 'w') as outfile:
    outfile.writelines((str(i)+'\n' for i in table))

Print quoted stats for the seasonal data.

In [None]:
for season in ['DJF', 'MAM', 'JJA', 'SON']:
    conus_prob = ds_results['agreement_probability'].sel(season=season).median(dim='est_idx') * conus_mask
    conus_frac16 = (conus_prob > 0.16).sum(dim=['lat', 'lon', 'dataset_pair']) / (conus_mask.sum() * len(ds_results.dataset_pair))
    conus_frac05 = (conus_prob > 0.05).sum(dim=['lat', 'lon', 'dataset_pair']) / (conus_mask.sum() * len(ds_results.dataset_pair))
    print(f'Fraction in {season} with p > 0.16 (0.05): {conus_frac16.data:0.3f} ({conus_frac05.data:0.3f})')

# Figure 7

Make a bar plot of the agreement across regions.

In [None]:
# Calculate regional agreement
weighted_bias = rel_bias.weighted(da_weights)

# Use mean rather than sum as it accounts for NaNs
regional_bias = weighted_bias.mean(dim=['lat', 'lon'], skipna=True, keep_attrs=True)

# Revert the error variances back to error standard deviations
regional_results = xr.merge([regional_errs.rename({'covar_pair': 'dataset_pair',
                                                   'est_idx_cov': 'est_idx',
                                                   'est_pair_cov': 'est_pair'}),
                             regional_bias])

variances = regional_results['covar'].linalg.diagonal(
                dims=['covar_pair_idx_1', 'covar_pair_idx_2'], offset=0,
            )
covariances = regional_results['covar'].linalg.diagonal(
                dims=['covar_pair_idx_1', 'covar_pair_idx_2'], offset=1,
              ).squeeze()
regional_results['sigma_bias'] = np.sqrt(variances.sum(dim='covar_pair_idx_1') - 2 * covariances)

norm_dist = stats.XrContinuousRV(norm,
                                 loc=np.abs(regional_results['median_bias']),
                                 scale=regional_results['sigma_bias'])
# Set and name it as a DataArray as the attributes of the coordinates are not kept
agreement_probability = norm_dist.cdf(0)
agreement_probability.name = 'agreement_probability'

# Merge to preserve coordinate attributes
regional_results = xr.merge([regional_results, agreement_probability])
regional_results

In [None]:
data = regional_results.agreement_probability.sel(season='All')

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7.2, 3.5))

x = np.arange(3)  # the label locations
width = 1/(3 + len(regional_results.dataset_pair))  # the width of the bars
multiplier = 2

colors = plt.cm.tab20(np.linspace(0,1,15))
for color, dataset_pair in zip(colors, regional_results.dataset_pair.data):
    offset = width * multiplier
    rects = ax.bar(x + offset, data.sel(dataset_pair=dataset_pair).median(dim='est_idx').data,
                   width, label=dataset_pair.replace(' ', '-'), color=color, edgecolor='black')
    _ = ax.errorbar(x + offset, data.sel(dataset_pair=dataset_pair).median(dim='est_idx').data,
                       yerr=np.abs(data.sel(dataset_pair=dataset_pair).quantile([0.16, 0.84], dim='est_idx').data
                                   - data.sel(dataset_pair=dataset_pair).median(dim='est_idx').data),
                       linestyle='', color='black', capsize=3)
    multiplier += 1

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.plot(ax.get_xlim(), [0.05, 0.05], color='black', linestyle='dashed', linewidth=1)
ax.plot(ax.get_xlim(), [0.16, 0.16], color='black', linestyle='dashed', linewidth=1)
ax.set_ylabel('Agreement Probability', fontsize=14)
xticks = regional_results.region_name.data
xticks[2] = 'Upper Colorado\nRiver Basin'
ax.set_xticks(x + 9 * width, xticks)
ax.legend(loc='upper left', ncols=3, columnspacing=1)
ax.set_ylim(0, 1)
ax.tick_params(axis="both", direction="in", right=True, top=True, labelsize=12)
ax.set_yticks(np.linspace(0, 0.5, 3), np.linspace(0, 0.5, 3))
ax.set_xlim(0, 3)

plt.tight_layout()
fig.savefig('regional_agreement.pdf')

# Figure 8

Make bar plot of the agreement across regions separated out by season.

In [None]:
fig, ax = plt.subplots(nrows=4, ncols=1, figsize=(7, 9), sharex=True, height_ratios=[0.4, 0.2, 0.2, 0.2])

ax = ax.flatten()

letter = 'abcd'
for i, (season, season_name) in enumerate({'DJF': 'winter', 'MAM': 'spring', 'JJA': 'summer', 'SON': 'fall'}.items()):
    data = regional_results.agreement_probability.sel(season=season)

    x = np.arange(3)  # the label locations
    width = 1/(3 + len(regional_results.dataset_pair))  # the width of the bars
    multiplier = 2
    
    colors = plt.cm.tab20(np.linspace(0, 1, 15))
    for color, dataset_pair in zip(colors, regional_results.dataset_pair.data):
        offset = width * multiplier
        rects = ax[i].bar(x + offset, data.sel(dataset_pair=dataset_pair).median(dim='est_idx').data,
                       width, label=dataset_pair.replace(' ', '-'), color=color, edgecolor='black')
        _ = ax[i].errorbar(x + offset, data.sel(dataset_pair=dataset_pair).median(dim='est_idx').data,
                           yerr=np.abs(data.sel(dataset_pair=dataset_pair).quantile([0.16, 0.84], dim='est_idx').data
                                       - data.sel(dataset_pair=dataset_pair).median(dim='est_idx').data),
                           linestyle='', color='black', capsize=3)
        multiplier += 1
    
    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax[i].set_title(f'({letter[i]}) ' + season_name,
                    fontdict={'fontsize': 14})
    ax[i].plot(ax[i].get_xlim(), [0.05, 0.05], color='black', linestyle='dashed', linewidth=1)
    ax[i].plot(ax[i].get_xlim(), [0.16, 0.16], color='black', linestyle='dashed', linewidth=1)

    if i == 3:
        xticks = regional_results.region_name.data
        xticks[2] = 'Upper Colorado\nRiver Basin'
        ax[i].set_xticks(x + 9 * width, xticks)
    if i == 0:
        ax[i].legend(loc='upper left', ncols=3, columnspacing=1, fontsize=10)
        ax[i].set_ylim(0, 1)
    else:
        ax[i].set_ylim(0, 0.5)
    ax[i].set_yticks(np.linspace(0, 0.5, 3), np.linspace(0, 0.5, 3))
    ax[i].tick_params(axis="both", direction="in", right=True, top=True, labelsize=12)
    ax[i].set_xlim(0, 3)

fig.supylabel('Agreement Probability', y=0.43, fontsize=16)
plt.subplots_adjust(left=0.12, right=0.98, top=0.96, bottom=0.05, hspace=0.17)
fig.savefig('regional_agreement_seasonal.pdf')

Print more quoted stats.

In [None]:
# Starting with the region with the highest overall seasonal
# agreement, the High Plains, the summer and fall have all data set pairs with probability values >0.1, indicating reasonable
# agreement.
regional_results['agreement_probability'].median(dim='est_idx').sel(region_name='High Plains Aquifer', season=['JJA', 'SON']) > 0.1

In [None]:
# Finally, the Central Valley shows similar trends in probabilities in the spring and fall as the UCRB, with 1–3
# data set pairs having values below 0.05. Yet, when looking at the winter and summer, only a few data sets have probabilities
# >0.16, with the majority of data set pairs having extremely low agreement (<0.015).
# This is especially true in the summer, where 8 pairs (SSBEop or WBET paired with GLEAM, ERA5, NLDAS, or TerraClimte) have median probabilities <0.005
print((regional_results['agreement_probability'].median(dim='est_idx').sel(region_name='Central Valley', season=['DJF']) < 0.015).sum())
print((regional_results['agreement_probability'].median(dim='est_idx').sel(region_name='Central Valley', season=['JJA']) < 0.005).sum())