The following code reproduces Figure 2 from the manuscript, containing the following: 

Historical trends in Southern Ocean freshwater forcing, 1990-2021. (a,b) shows 1990-2021 trends in $P$-$E$ ((P-E)$_{hist}$) for the CMIP6 multi-model mean (a), and ERA5 reanalysis (b). Stippling denotes grid cells where trends are not statistically significant at the 95$\%$ level, according to a two-sided t-test, and green circles are shown as in Fig. 1a. (c) compares the Southern Ocean-integrated $P$-$E$ trends for CMIP6 models (blue) and observations (black), shown as anomalies relative to a 1979-1989 baseline average, with dashed lines showing linear trend fits for each dataset. (d) compares the $F_{(P-E)}$ (blue) and  $F_{melt}$ (green) forcing time series and their associated uncertainties (shading), where $F_{(P-E)}$ is defined as the difference between simulated (multi-model mean) and observed linear trends in (c). Labels in (d) list the cumulative 1990-2021 freshwater input from each source.

In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import netCDF4
from scipy.stats import linregress
import os 
import dask
import cftime
import xesmf as xe

from matplotlib import pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.ticker as mticker
import matplotlib.path as mpath


import cartopy.crs as ccrs
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter

import intake
from xmip.preprocessing import rename_cmip6
from xmip.preprocessing import broadcast_lonlat
from xmip.preprocessing import combined_preprocessing
from xmip.utils import google_cmip_col
from xmip.postprocessing import match_metrics
from xmip.postprocessing import merge_variables
from xmip.postprocessing import interpolate_grid_label
from xmip.postprocessing import concat_experiments
from xmip.postprocessing import pick_first_member

%matplotlib inline
import warnings 
warnings.filterwarnings("ignore")

First, obtain 1979-2021 Southern Ocean trends in precipitation-evaporation balance (P-E) balance for 14 CMIP6 models from the google cloud. Then splice historical and projected (ssp585 emissions scenario) simulations using monthly diagnostic output for precipitation (pr) evaporation (evspsbl) from the atmosphere component (Amon) 

In [2]:
# get base query 
url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
col = intake.open_esm_datastore(url)

df_base = col.search(
    activity_id=['CMIP','ScenarioMIP'],
    table_id = ['Amon'],
    variable_id = ['pr','evspsbl'],
    experiment_id = ['historical','ssp585'],
    source_id = [
    "ACCESS-CM2",
    "ACCESS-ESM1-5",
    "AWI-ESM-1-REcoM",
    "CanESM5",
    "CAS-ESM2-0",
    "CESM2",
    "FGOALS-g3",
    "GFDL-CM4",
    "GFDL-ESM2M",
    "GFDL-ESM4",
    "GISS-E2-1-G",
    "HadGEM3-GC31-LL",
    "MIROC6",
    "MPI-ESM1-2-HR",
    "MPI-ESM1-2-LR",
    "MRI-ESM2-0"
],
    member_id = ['r1i1p1f1','r1i1p1f3','r1i1p5f1','r10i1p1f1']

)

# create xarray dictionaries for each search query 
kwargs = {
    'zarr_kwargs':{
        'consolidated':True,
        'use_cftime':True
    },
    'aggregate':False,
    
    'preprocess':combined_preprocessing
}

with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    combined_dict = df_base.to_dataset_dict(**kwargs)



--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.member_id.table_id.variable_id.grid_label.zstore.dcpp_init_year.version'


In [7]:
# now get a dataset for gridcell area for looking at spatially integrated quantities. 
# since all datasets are eventually regridded to a common grid, most _id parameter selections are arbitrary here
# furthermore, unique areacella files for each model grid aren't always available, necessitating regridding. 

df_area = col.search(
    table_id = ['fx'],
    variable_id = ['areacella'],
    grid_label = ['gn','gr1'],
    experiment_id = ['piControl'],
    source_id = [
    "ACCESS-CM2",
    "ACCESS-ESM1-5",
    "AWI-ESM-1-REcoM",
    "CanESM5",
    "CAS-ESM2-0",
    "CESM2",
    "FGOALS-g3",
    "GFDL-CM4",
    "GFDL-ESM2M",
    "GFDL-ESM4",
    "GISS-E2-1-G",
    "HadGEM3-GC31-LL",
    "MIROC6",
    "MPI-ESM1-2-HR",
    "MPI-ESM1-2-LR",
    "MRI-ESM2-0"
]
)

with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    areacella_dict = pick_first_member(df_area.to_dataset_dict(**kwargs))
print("Number of items in the combined dictionary:", len(areacella_dict))
list(areacella_dict.keys())


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.member_id.table_id.variable_id.grid_label.zstore.dcpp_init_year.version'


Number of items in the combined dictionary: 12


['ACCESS-CM2.gn.piControl.fx.areacella',
 'GISS-E2-1-G.gn.piControl.fx.areacella',
 'HadGEM3-GC31-LL.gn.piControl.fx.areacella',
 'CanESM5.gn.piControl.fx.areacella',
 'ACCESS-ESM1-5.gn.piControl.fx.areacella',
 'CESM2.gn.piControl.fx.areacella',
 'GFDL-ESM4.gr1.piControl.fx.areacella',
 'GFDL-CM4.gr1.piControl.fx.areacella',
 'MPI-ESM1-2-LR.gn.piControl.fx.areacella',
 'MRI-ESM2-0.gn.piControl.fx.areacella',
 'MIROC6.gn.piControl.fx.areacella',
 'MPI-ESM1-2-HR.gn.piControl.fx.areacella']

In [8]:
# Do the same thing for landfrac 

df_area = col.search(
    table_id = ['fx'],
    variable_id = ['sftlf'],
    grid_label = ['gn','gr1'],
    experiment_id = ['piControl'],
    source_id = [
    "ACCESS-CM2",
    "ACCESS-ESM1-5",
    "AWI-ESM-1-REcoM",
    "CanESM5",
    "CAS-ESM2-0",
    "CESM2",
    "FGOALS-g3",
    "GFDL-CM4",
    "GFDL-ESM2M",
    "GFDL-ESM4",
    "GISS-E2-1-G",
    "HadGEM3-GC31-LL",
    "MIROC6",
    "MPI-ESM1-2-HR",
    "MPI-ESM1-2-LR",
    "MRI-ESM2-0"
]
)

with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    sftlf_dict = pick_first_member(df_area.to_dataset_dict(**kwargs))
print("Number of items in the combined dictionary:", len(sftlf_dict))
list(sftlf_dict.keys())


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.member_id.table_id.variable_id.grid_label.zstore.dcpp_init_year.version'


Number of items in the combined dictionary: 12


['MPI-ESM1-2-LR.gn.piControl.fx.sftlf',
 'ACCESS-CM2.gn.piControl.fx.sftlf',
 'ACCESS-ESM1-5.gn.piControl.fx.sftlf',
 'MRI-ESM2-0.gn.piControl.fx.sftlf',
 'CanESM5.gn.piControl.fx.sftlf',
 'HadGEM3-GC31-LL.gn.piControl.fx.sftlf',
 'CESM2.gn.piControl.fx.sftlf',
 'GFDL-CM4.gr1.piControl.fx.sftlf',
 'MPI-ESM1-2-HR.gn.piControl.fx.sftlf',
 'FGOALS-g3.gn.piControl.fx.sftlf',
 'GISS-E2-1-G.gn.piControl.fx.sftlf',
 'MIROC6.gn.piControl.fx.sftlf']

In [9]:
# merge pr and evspsbl variables together in each dataset 
combined_dict_merged = merge_variables(combined_dict)

# subset to the time period of interest 

historical_start = '1979-01-01'
historical_end = '2014-12-30'
ssp245_start = '2015-01-01'
ssp245_end = '2021-12-30'

# Adjust time range for historical and ssp585 data in the original dictionary
for key, value in combined_dict_merged.items():
    # Check if the item is related to historical data
    if 'historical' in key:
        combined_dict_merged[key] = value.sel(time=slice(historical_start, historical_end))
    # Check if the item is related to ssp245 data
    elif 'ssp585' in key:
        combined_dict_merged[key] = value.sel(time=slice(ssp245_start, ssp245_end))
        
# splice the experiments together 
PE_dict_CMIP6 = \
match_metrics(combined_dict_merged, areacella_dict, ['areacella'], match_attrs=['source_id', 'grid_label'])
PE_dict_CMIP6 = \
match_metrics(PE_dict_CMIP6, sftlf_dict, ['sftlf'], match_attrs=['source_id', 'grid_label'])
PE_dict_CMIP6 = concat_experiments(PE_dict_CMIP6)


# get rid of unneeded models and verify we have 14 
del PE_dict_CMIP6['GISS-E2-1-G.gn.Amon.r1i1p1f1']
del PE_dict_CMIP6['CESM2.gn.Amon.r1i1p1f1']
del PE_dict_CMIP6['GISS-E2-1-G.gn.Amon.r1i1p1f3']
del PE_dict_CMIP6['GISS-E2-1-G.gn.Amon.r10i1p1f1']
del PE_dict_CMIP6['ACCESS-ESM1-5.gn.Amon.r10i1p1f1']
del PE_dict_CMIP6['MPI-ESM1-2-LR.gn.Amon.r10i1p1f1']
del PE_dict_CMIP6['MIROC6.gn.Amon.r10i1p1f1']
del PE_dict_CMIP6['CanESM5.gn.Amon.r10i1p1f1']
del PE_dict_CMIP6['MPI-ESM1-2-HR.gn.Amon.r10i1p1f1']
del PE_dict_CMIP6['MRI-ESM2-0.gn.Amon.r10i1p1f1']
print("Number of items in the combined dictionary:", len(PE_dict_CMIP6))
list(PE_dict_CMIP6.keys())


Number of items in the combined dictionary: 14


['MPI-ESM1-2-HR.gn.Amon.r1i1p1f1',
 'CESM2.gn.Amon.r10i1p1f1',
 'MPI-ESM1-2-LR.gn.Amon.r1i1p1f1',
 'GFDL-ESM4.gr1.Amon.r1i1p1f1',
 'CAS-ESM2-0.gn.Amon.r1i1p1f1',
 'MRI-ESM2-0.gn.Amon.r1i1p1f1',
 'FGOALS-g3.gn.Amon.r1i1p1f1',
 'ACCESS-CM2.gn.Amon.r1i1p1f1',
 'ACCESS-ESM1-5.gn.Amon.r1i1p1f1',
 'HadGEM3-GC31-LL.gn.Amon.r1i1p1f3',
 'GFDL-CM4.gr1.Amon.r1i1p1f1',
 'MIROC6.gn.Amon.r1i1p1f1',
 'CanESM5.gn.Amon.r1i1p1f1',
 'GISS-E2-1-G.gn.Amon.r1i1p5f1']

In [10]:
# to be continued... 