# Import modules

In [1]:
%%time
%load_ext memory_profiler

import datetime
import os, glob, sys, gc
import warnings
# warnings.filterwarnings('ignore', '.*invalid value encountered in true_divide.*', )

from dask.diagnostics import ProgressBar
pbar = ProgressBar(minimum=10)
pbar.register()
#pbar.unregister()

import numpy as np
import xarray as xr
xr.set_options(keep_attrs=True)

import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.colors import ListedColormap

import cartopy.crs       as ccrs
import cartopy.feature as cfeature
import cartopy

import cmcrameri

mpl.rcParams['savefig.dpi'] = 300

CPU times: user 1.11 s, sys: 584 ms, total: 1.69 s
Wall time: 1.14 s


# Starters

In [2]:
%%time
%%memit -c
print(datetime.datetime.now())

dirout = '24-06-10-compute-aou/'
if not os.path.isdir(dirout) : os.mkdir(dirout)

netcdfdir = dirout+'netcdf_files/'
if not os.path.isdir(netcdfdir) : os.mkdir(netcdfdir)

sys.stdout.echo = open(dirout+'stdout.txt', 'w')
sys.stderr.echo = open(dirout+'stderr.txt', 'w')

2024-10-18 18:13:14.177716
peak memory: 315.50 MiB, increment: 123.31 MiB
CPU times: user 59.8 ms, sys: 40.1 ms, total: 99.9 ms
Wall time: 197 ms


# Main parameters

In [3]:
%%time
%%memit -c

print(datetime.datetime.now())

kwopends=dict(use_cftime=True, decode_times=None,
              decode_cf=True, decode_coords=True)
kwopenmfds = dict(combine='by_coords', parallel=True, 
                  use_cftime=True, decode_times=None,
                  decode_cf=True, decode_coords=True)


rename_dict = {
    "x": "i",
    "y": "j",
    "lat": "latitude", 
    "lon": "longitude",
    "nav_lat": "latitude", 
    "nav_lon": "longitude",
    'lev': 'depth', 
    'deptht': 'depth', 
    'olevel': 'depth', 
    "Depth":"depth"
}


2024-10-18 18:13:14.363748
peak memory: 315.80 MiB, increment: 133.02 MiB
CPU times: user 57.6 ms, sys: 14.9 ms, total: 72.5 ms
Wall time: 178 ms


# Define some functions

## Preparation of data 

In [4]:
def shift_180_lon(zwda, verbose=False): 
    if verbose: print("func: shift_180_lon")
    
    try: 
        if not np.nanmin(zwda['longitude']) < -150: 
            zwda['longitude'] = (zwda['longitude'] + 180) % 360 - 180
            addtxt=str(datetime.datetime.now())+' shift_180_lon to get longitude from -180 to 180'
            try: zwda.attrs['history'] =  addtxt + ' ; '+zwda.attrs['history']
            except: zwda.attrs['history'] =  addtxt             
        #
    except: print('WARNING! longitude likely not shifted')
    return zwda
#

def rename_vars_dims_coords(ds, rename_dict, verbose=False):
    """
    Renames variables, dimensions, and coordinates in an xarray Dataset according to the provided rename dictionary.

    Parameters:
    -----------
    ds : xr.Dataset
        The xarray Dataset to be renamed.
    rename_dict : Dict[str, str]
        Dictionary containing the variable, dimension, or coordinate names to be renamed. 
        The keys represent the original names, and the values represent the new names.
    verbose : bool, optional
        If True, prints the function name at the start and end of execution (default is False).
    
    Returns:
    --------
    xr.Dataset
        A new xarray Dataset with variables, dimensions, and coordinates renamed according to the rename dictionary.
    
    Example:
    --------
    import xarray as xr
    data = {'temp': ([], [0]), 'sali': ([], [1])}
    coords = {'time': [0]}
    ds = xr.Dataset(data, coords)
    renamed_ds = rename_vars_dims_coords(ds, {'temp': 'temperature', 'sali': 'salinity'})

    Dependencies:
    -------------
    xarray
    """
    if verbose: print('func: rename_vars_dims_coords')
    for old_name, new_name in rename_dict.items():
        if (old_name in ds.variables) | (old_name in ds.dims) | (old_name in ds.coords): 
            ds = ds.rename({old_name: new_name})
        #
    if verbose: print('endfunc')
    return ds
#

def split_coords_dimensions(ds, verbose=False):
    """
    Splits the latitude, longitude, and depth dimensions and coordinates of an xarray dataset into separate variables,
    updates their names, and assigns them back to the dataset.

    Parameters:
    -----------
    ds : xr.Dataset
        The xarray Dataset to be updated.
    verbose : bool, optional
        If True, prints the function name at the start and end of execution (default is False).
    
    Returns:
    --------
    xr.Dataset
        A new xarray Dataset with the latitude, longitude, and depth dimensions and coordinates split into separate variables
        and reassigned to the original dataset.
    
    Example:
    --------
    import xarray as xr
    data = {'temp': ([0, 1, 2], [0, 1]), 'sali': ([0, 1, 2], [0, 1])}
    coords = {'latitude': [0, 1, 2], 'longitude': [0, 1], 'depth': [0, 1, 2]}
    ds = xr.Dataset(data, coords)
    updated_ds = split_coords_dimensions(ds)

    Dependencies:
    -------------
    xarray
    """
    if verbose: print('func: split_coords_dimensions')
    new_coords = {}
    new_coords2 = {}
    new_dims = {}
    dim_name_dict = dict(latitude='j', longitude='i', depth='k')
    dimschanged = []
    for name, coord in ds.coords.items():
        if name in ds.dims and name in ["latitude", "longitude", "depth"]:
            new_coords[name + "_coord"] = coord
            new_dims[name] = dim_name_dict[name]
            new_coords2[name + "_coord"] = name
            dimschanged.append(name)
    if verbose: print('endfunc')
    for name in ['k', 'j', 'i']: 
        if name in ds.coords: dimschanged.append(name)
    #
    return ds.assign_coords(new_coords).rename_dims(new_dims).drop_vars(dimschanged).rename(new_coords2)
#


## Others

In [5]:
def get_esgf_dataset_filepaths(variable, sourceID, experimentID, 
                               freq='mon', grid='g*', version='latest', 
                               variant='r1i1p1f1',
                               mipera = 'CMIP6', diresgf='/mnt/reef-ns1002k-esgf/', verbose=False, **kwargs): 
    """
    Returns the filepaths of the remote netCDF files corresponding to the specified dataset of the Earth System
    Grid Federation (ESGF) data portal on NIRD.

    Parameters:
    -----------
    variable : str
        Variable to search for on ESGF data portal.
    sourceID : str
        Name of the data source on the ESGF data portal.
    experimentID : str
        Name of the experiment on the ESGF data portal.
    freq : str, optional
        Frequency of the data (default is 'mon').
    grid : str, optional
        Type of grid (default is 'g*').
    version : str, optional
        Version of the data being queried (default is 'latest').
    variant : str, optional
        Label for the variant of the data being queried (default is 'r1i1p1f1').
    mipera : str, optional
        Name of the CMIP era being queried (default is 'CMIP6').
    diresgf : str, optional
        Absolute path to the directory where the data is stored (default is '/mnt/reef-ns1002k-esgf/').
    verbose : bool, optional
        If True, prints the function name at the start and end of execution (default is False).
    **kwargs : dict, optional
        Other key-value arguments to be passed in the function.

    Returns:
    --------
    List[str]
        A list of filepaths corresponding to the specified dataset on the ESGF data portal.

    Example:
    --------
    fp_list = get_esgf_dataset_filepaths('tas', 'CanESM5', 'historical', freq='mon')

    Dependencies:
    -------------
    glob, sys
    """
    import glob, sys
    
    if verbose: print('func: get_esgf_dataset_filepaths')
    
    if experimentID in ['1pctCO2', 'piControl', 'historical', 'abrupt-4xCO2']: zwActivity='CMIP'
    elif experimentID in ['ssp126', 'ssp245', 'ssp585']: zwActivity='ScenarioMIP'
    else: sys.exit('Check experimentID, case not implemented')
    
    if sourceID in ['CESM2', 'CESM2-WACCM']: zwInstitutionID = 'NCAR'
    elif sourceID in ['ACCESS-ESM1-5']: zwInstitutionID = 'CSIRO'
    elif sourceID in ['CNRM-ESM2-1']: zwInstitutionID = 'CNRM-CERFACS'
    elif sourceID in ['CanESM5', 'CanESM5-CanOE']: zwInstitutionID = 'CCCma'
    elif sourceID in ['UKESM1-0-LL']: zwInstitutionID = 'MOHC'
    elif sourceID in ['GFDL-CM4', 'GFDL-ESM4']: zwInstitutionID = 'NOAA-GFDL'
    elif sourceID in ['IPSL-CM6A-LR', 'IPSL-CM6A-LR-INCA']: zwInstitutionID = 'IPSL'
    elif sourceID in ['MIROC-ES2L']: zwInstitutionID = 'MIROC'
    elif sourceID in ['MPI-ESM1-2-LR', 'ICON-ESM-LR']: zwInstitutionID = 'MPI-M'
    elif sourceID in ['NorESM2-LM']: zwInstitutionID = 'NCC'
    else: sys.exit('Check sourceID, case not implemented')
    
    ocean_list = ['fgco2', 'intpp', 'o2', 'thetao', 'so', 'agessc', 'po4', 'no3']
    if variable in ocean_list: zwTableID = 'O'+freq
    elif variable in ['areacello']: zwTableID='Ofx'
    elif variable in ['psl']: zwTableID='A'+freq
    else: sys.exit('!!! WARNING !!! Check variable, case not implemented')
        
    zwdname = diresgf + mipera +'/'+ zwActivity +'/'+ \
        zwInstitutionID +'/'+ sourceID +'/'+ \
        experimentID  +'/'+ variant +'/'+ zwTableID +'/'+ \
        variable+'/'+ grid +'/'+ version +'/'
    zwfname = variable +'_'+ zwTableID +'_'+ sourceID +'_'+ \
        experimentID +'_'+ variant +'_'+ grid +'*.nc' 

    if verbose: print('endfunc')
    return glob.glob(zwdname + zwfname)
#
def nan_helper(y):
    """Helper to handle indices and logical indices of NaNs.

    Input:
        - y, 1d numpy array with possible NaNs
    Output:
        - nans, logical indices of NaNs
        - index, a function, with signature indices= index(logical_indices),
          to convert logical indices of NaNs to 'equivalent' indices
    Example:
        >>> # linear interpolation of NaNs
        >>> nans, x= nan_helper(y)
        >>> y[nans]= np.interp(x(nans), x(~nans), y[~nans])
        nb: y[~nans] values of y that are not nans
            x(~nans) indexes of y that are not nans
    """

    return np.isnan(y), lambda z: z.nonzero()[0]
#


## O2SAT and AOU

In [6]:
def get_o2sat_garcia(temp, saln, verbose=False):
    """
    Calculates the dissolved oxygen concentration at saturation using the Garcia and Gordon (1992) equation.

    Parameters:
    -----------
    temp : xr.DataArray
        DataArray containing the temperature values in degrees Celsius.
    saln : xr.DataArray
        DataArray containing the salinity values in practical salinity units.
    verbose : bool
        Print verbose output if True.

    Returns:
    --------
    xr.DataArray
        DataArray containing the dissolved oxygen concentration at saturation in mol/m3.

    Example:
    --------
    o2sat_data = get_o2sat_garcia(temp_data, sal_data)

    Dependencies:
    -------------
    numpy
    """
    
    import numpy as np
    
    if verbose: print('func: get_o2sat_garcia')
    #Garcia, H. E., & Gordon, L. I. (1992). Oxygen solubility in seawater: Better fitting equations. Limnology and Oceanography, 37(6), 1307–1312.
    #micromol/kg
    A0=5.80818
    A1=3.20684
    A2=4.11890
    A3=4.93845
    A4=1.01567
    A5=1.41575
    B0=-7.01211e-3
    B1=-7.25958e-3
    B2=-7.93334e-3
    B3=-5.54491e-3
    C0=-1.32412e-7
    
    Ts = np.log((298.15-temp)/(273.15+temp))
    OXY_nor = A0 + A1*Ts + A2*Ts**2 + A3*Ts**2 + A3*Ts**3 + A4*Ts**4 + A5*Ts**5 + saln*(B0 + B1*Ts + B2*Ts**2 + B3*Ts**3) + C0*saln**2
    o2sat = np.exp(OXY_nor)*1.024e-3 # micromol/kg=> mol/m3
        
    o2sat.attrs['units']='mol m-3'
    o2sat.attrs['longname']='Dissolved Oxygen Concentration at Saturation'
    o2sat.attrs['description']='Dissolved Oxygen Concentration at Saturation computed following Garcia, H. E., & \
    Gordon, L. I. (1992). Oxygen solubility in seawater: Better fitting equations. Limnology and Oceanography, 37(6), 1307–1312.\
    Computed from temperature and salinity.'
    o2sat = o2sat.rename('o2sat')
    if verbose: print('endfunc')
    
    return o2sat #mol O2/m3
#

# Compute AOU for SSP585

## Check data avaibility

In [11]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute AOU for SSP585')

esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR', 'CanESM5', 'MIROC-ES2L']
esm_list = ['CNRM-ESM2-1', 'GFDL-ESM4', 'UKESM1-0-LL']

variant_dict = {
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'IPSL-CM6A-LR' : 'r1i1p1f1',
    'CanESM5'      : 'r1i1p1f1',
    'MIROC-ES2L'   : 'r1i1p1f2', 
    'CNRM-ESM2-1'  : 'r1i1p1f2', 
    'GFDL-CM4'     : 'r1i1p1f1',
    'GFDL-ESM4'    : 'r1i1p1f1',
    'UKESM1-0-LL'  : 'r1i1p1f2'
}

version_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20191021', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190726'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20191021', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190726'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20191021', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190726'
    }
}

grid_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }
}

simu='ssp585'

for esm in esm_list: 
        
    print('------------')
    print(esm)
    print('------------')
    print('')

    # Load temperature
    var = 'thetao'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    good = tstep/12 == ymax-ymin+1
    print('>>> THETAO <<<')
    if good: 
        print('GOOD')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
        print('------------')
    else:
        print('Years missing')
        for fname in fname_list: print(fname)
    #
    
    # Load salinity
    var = 'so'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    good = tstep/12 == ymax-ymin+1
    print('')
    print('>>> SO <<<')
    if good: 
        print('GOOD')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
        print('------------')
    else:
        print('Years missing')
        for fname in fname_list: print(fname)
    #
    
    # Load oxygen
    var = 'o2'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    good = tstep/12 == ymax-ymin+1
    print('')
    print('>>> O2 <<<')
    if good: 
        print('GOOD')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
        print('------------')
    else:
        print('Years missing')
        for fname in fname_list: print(fname)
    #
    print('')
#





2024-10-18 15:03:07.863180
# Compute AOU for SSP585
------------
CNRM-ESM2-1
------------

>>> THETAO <<<
GOOD
2015 to 2100, 86.0 years (1032 months)
------------

>>> SO <<<
GOOD
2015 to 2100, 86.0 years (1032 months)
------------

>>> O2 <<<
GOOD
2015 to 2100, 86.0 years (1032 months)
------------

------------
GFDL-ESM4
------------

>>> THETAO <<<
GOOD
2015 to 2100, 86.0 years (1032 months)
------------

>>> SO <<<
GOOD
2015 to 2100, 86.0 years (1032 months)
------------

>>> O2 <<<
GOOD
2015 to 2100, 86.0 years (1032 months)
------------

------------
UKESM1-0-LL
------------

>>> THETAO <<<
GOOD
2015 to 2100, 86.0 years (1032 months)
------------

>>> SO <<<
GOOD
2015 to 2100, 86.0 years (1032 months)
------------

>>> O2 <<<
GOOD
2015 to 2100, 86.0 years (1032 months)
------------

peak memory: 761.52 MiB, increment: 329.61 MiB
CPU times: user 925 ms, sys: 512 ms, total: 1.44 s
Wall time: 5.76 s


## Compute

In [None]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute AOU for SSP585')

esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR', 'CanESM5', 'MIROC-ES2L']
esm_list = ['CNRM-ESM2-1', 'GFDL-ESM4', 'UKESM1-0-LL']

variant_dict = {
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'IPSL-CM6A-LR' : 'r1i1p1f1',
    'CanESM5'      : 'r1i1p1f1',
    'MIROC-ES2L'   : 'r1i1p1f2', 
    'CNRM-ESM2-1'  : 'r1i1p1f2', 
    'GFDL-CM4'     : 'r1i1p1f1',
    'GFDL-ESM4'    : 'r1i1p1f1',
    'UKESM1-0-LL'  : 'r1i1p1f2'
}

version_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20191021', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190726'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20191021', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190726'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20191021', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190726'
    }
}

grid_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }
}

simu='ssp585'

year_list = ['%04d' %yyy for yyy in np.arange(2015, 2099.5)]

for esm in esm_list: 
        
    print(f'Computing AOU for {esm}...')

    # Load temperature
    var = 'thetao'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds_temp = shift_180_lon(zwds2)

    # Load salinity
    var = 'so'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds_sali = shift_180_lon(zwds2)

    # Load oxygen
    var = 'o2'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds_oxyg = shift_180_lon(zwds2)
    
    # save for later
    zwds_attrs=zwds.attrs
    
    del zwds, zwds2
    gc.collect()
    
    # Loop on years
    for year in year_list: 
        
        temp = zwds_temp.sel(time=year)['thetao'].load()
        sali = zwds_sali.sel(time=year)['so'].load()
        oxyg = zwds_oxyg.sel(time=year)['o2'].load()
        
        # Compute o2sat
        o2sat = get_o2sat_garcia(temp, sali, verbose=False).compute()
        
        # Clean
        del sali, temp
        gc.collect()
        
        # Compute aou
        aou = xr.zeros_like(o2sat)
        aou.values = o2sat.values - oxyg.values
        aou.attrs={}
        aou.attrs['units']='mol m-3'
        aou.attrs['long_name']='Apparent Oxygen Utilization'
        aou.attrs['description']='AOU computed as O2sat - O2 with O2sat computed following Garcia, H. E., & \
        Gordon, L. I. (1992). Oxygen solubility in seawater: Better fitting equations. Limnology and Oceanography, 37(6), 1307–1312.'
        aou = aou.rename('aou')

        # Clean
        del oxyg
        gc.collect()
    
        # Temporal mean
        o2sat_tavg = o2sat.groupby('time.year').mean(dim='time')
        aou_tavg = aou.groupby('time.year').mean(dim='time')
    
        # Clean
        del o2sat, aou
        gc.collect()
    
        # create dataset
        o2sat_ds = o2sat_tavg.to_dataset() 
        o2sat_ds.attrs = zwds_attrs
        aou_ds = aou_tavg.to_dataset() 
        aou_ds.attrs = zwds_attrs

        # Save in netcdf
        #---------------    
        # print('Save in netcdf...')
        ncname = netcdfdir+esm+'_'+simu+'_o2sat_'+year+'.nc'
        o2sat_ds.to_netcdf(ncname)
        # print('File saved: %s'%ncname)
        #---------------    
        # print('Save in netcdf...')
        ncname = netcdfdir+esm+'_'+simu+'_aou_'+year+'.nc'
        aou_ds.to_netcdf(ncname)
        # print('File saved: %s'%ncname)
    #
    print(f'Done with {esm}')
#





2024-10-18 15:05:16.375266
# Compute AOU for SSP585
Computing AOU for CNRM-ESM2-1...


# Compute AOU for historical

## Check data avaibility

In [7]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute AOU for hitsorical')

esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR', 'CanESM5', 'MIROC-ES2L']
esm_list = ['CNRM-ESM2-1', 'GFDL-ESM4', 'UKESM1-0-LL']

variant_dict = {
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'IPSL-CM6A-LR' : 'r1i1p1f1',
    'CanESM5'      : 'r1i1p1f1',
    'MIROC-ES2L'   : 'r1i1p1f2',
    'CNRM-ESM2-1'  : 'r1i1p1f2', 
    'GFDL-CM4'     : 'r1i1p1f1',
    'GFDL-ESM4'    : 'r1i1p1f1',
    'UKESM1-0-LL'  : 'r1i1p1f2'
}

version_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181206', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20190726',
        'UKESM1-0-LL'  : 'v20190627'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181206', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20190726',
        'UKESM1-0-LL'  : 'v20190627'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181206', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20190726',
        'UKESM1-0-LL'  : 'v20190627'
    }
}    

grid_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }
}    

simu='historical'

for esm in esm_list: 
        
    print('------------')
    print(esm)
    print('------------')
    print('')

    # Load temperature
    var = 'thetao'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    good = tstep/12 == ymax-ymin+1
    print('>>> THETAO <<<')
    if good: 
        print('GOOD')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
        print('------------')
    else:
        print('Years missing')
        for fname in fname_list: print(fname)
    #
    
    # Load salinity
    var = 'so'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    good = tstep/12 == ymax-ymin+1
    print('')
    print('>>> SO <<<')
    if good: 
        print('GOOD')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
        print('------------')
    else:
        print('Years missing')
        for fname in fname_list: print(fname)
    #
    
    # Load oxygen
    var = 'o2'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    good = tstep/12 == ymax-ymin+1
    print('')
    print('>>> O2 <<<')
    if good: 
        print('GOOD')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
        print('------------')
    else:
        print('Years missing')
        for fname in fname_list: print(fname)
    #
    print('')
#





2024-10-18 16:01:22.568355
# Compute AOU for hitsorical
------------
CNRM-ESM2-1
------------

[########################################] | 100% Completed | 20.10 s
>>> THETAO <<<
GOOD
1850 to 2014, 165.0 years (1980 months)
------------

>>> SO <<<
GOOD
1850 to 2014, 165.0 years (1980 months)
------------

>>> O2 <<<
GOOD
1850 to 2014, 165.0 years (1980 months)
------------

------------
GFDL-ESM4
------------

>>> THETAO <<<
GOOD
1850 to 2014, 165.0 years (1980 months)
------------

>>> SO <<<
GOOD
1850 to 2014, 165.0 years (1980 months)
------------

>>> O2 <<<
GOOD
1850 to 2014, 165.0 years (1980 months)
------------

------------
UKESM1-0-LL
------------

>>> THETAO <<<
GOOD
1850 to 2014, 165.0 years (1980 months)
------------

>>> SO <<<
GOOD
1850 to 2014, 165.0 years (1980 months)
------------

>>> O2 <<<
GOOD
1850 to 2014, 165.0 years (1980 months)
------------

peak memory: 548.12 MiB, increment: 365.59 MiB
CPU times: user 2.11 s, sys: 1.2 s, total: 3.31 s
Wall time: 30.9 s


## Compute

In [24]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute AOU for historical')


esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR', 'CanESM5', 'MIROC-ES2L']
esm_list = ['CNRM-ESM2-1', 'GFDL-ESM4', 'UKESM1-0-LL']

variant_dict = {
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'IPSL-CM6A-LR' : 'r1i1p1f1',
    'CanESM5'      : 'r1i1p1f1',
    'MIROC-ES2L'   : 'r1i1p1f2',
    'CNRM-ESM2-1'  : 'r1i1p1f2', 
    'GFDL-CM4'     : 'r1i1p1f1',
    'GFDL-ESM4'    : 'r1i1p1f1',
    'UKESM1-0-LL'  : 'r1i1p1f2'
}

version_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181206', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20190726',
        'UKESM1-0-LL'  : 'v20190627'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181206', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20190726',
        'UKESM1-0-LL'  : 'v20190627'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181206', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20190726',
        'UKESM1-0-LL'  : 'v20190627'
    }
}    

grid_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }
}    

simu='historical'
year_list = ['%04d' %yyy for yyy in np.arange(1850, 2014.5)]

for esm in esm_list: 
        
    print(f'Computing AOU for {esm}...')

    print('    Load data...')
    
    # Load temperature
    var = 'thetao'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds_temp = shift_180_lon(zwds2)

    # Load salinity
    var = 'so'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds_sali = shift_180_lon(zwds2)

    # Load oxygen
    var = 'o2'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds_oxyg = shift_180_lon(zwds2)
    
    # save for later
    zwds_attrs=zwds.attrs
    
    del zwds, zwds2
    gc.collect()
    
    # Loop on years
    print('    Loop on years...')
    for year in year_list: 
        
        modulo = 15
        if ( (int(year) - int(year_list[0]))%modulo )==0: 
            print(f'        From years {int(year)} to {int(year)+modulo-1}...')
        #
        
        temp = zwds_temp.sel(time=year)['thetao'].load()
        sali = zwds_sali.sel(time=year)['so'].load()
        oxyg = zwds_oxyg.sel(time=year)['o2'].load()
        
        # Compute o2sat
        o2sat = get_o2sat_garcia(temp, sali, verbose=False).compute()
        
        # Clean
        del sali, temp
        gc.collect()
        
        # Compute aou
        aou = xr.zeros_like(o2sat)
        aou.values = o2sat.values - oxyg.values
        aou.attrs={}
        aou.attrs['units']='mol m-3'
        aou.attrs['long_name']='Apparent Oxygen Utilization'
        aou.attrs['description']='AOU computed as O2sat - O2 with O2sat computed following Garcia, H. E., & \
        Gordon, L. I. (1992). Oxygen solubility in seawater: Better fitting equations. Limnology and Oceanography, 37(6), 1307–1312.'
        aou = aou.rename('aou')

        # Clean
        del oxyg
        gc.collect()
    
        # Temporal mean
        o2sat_tavg = o2sat.groupby('time.year').mean(dim='time')
        aou_tavg = aou.groupby('time.year').mean(dim='time')
    
        # Clean
        del o2sat, aou
        gc.collect()
    
        # create dataset
        o2sat_ds = o2sat_tavg.to_dataset() 
        o2sat_ds.attrs = zwds_attrs
        aou_ds = aou_tavg.to_dataset() 
        aou_ds.attrs = zwds_attrs

        # Save in netcdf
        #---------------    
        # print('Save in netcdf...')
        ncname = netcdfdir+esm+'_'+simu+'_o2sat_'+year+'.nc'
        o2sat_ds.to_netcdf(ncname)
        # print('File saved: %s'%ncname)
        #---------------    
        # print('Save in netcdf...')
        ncname = netcdfdir+esm+'_'+simu+'_aou_'+year+'.nc'
        aou_ds.to_netcdf(ncname)
        # print('File saved: %s'%ncname)
    #
    print(f'Done with {esm}')
#





2024-10-18 16:12:34.597410
# Compute AOU for historical
Computing AOU for CNRM-ESM2-1...
    Load data...
    Loop on years...
        From years 1850 to 1864...
        From years 1865 to 1879...
        From years 1880 to 1894...
        From years 1940 to 1954...
        From years 1955 to 1969...
        From years 1970 to 1984...
        From years 1985 to 1999...
        From years 2000 to 2014...
Done with CNRM-ESM2-1
Computing AOU for GFDL-ESM4...
    Load data...
    Loop on years...
        From years 1850 to 1864...
        From years 1865 to 1879...
        From years 1880 to 1894...
        From years 1895 to 1909...
        From years 1910 to 1924...
        From years 1925 to 1939...
        From years 1940 to 1954...
        From years 1955 to 1969...
        From years 2000 to 2014...
Done with GFDL-ESM4
Computing AOU for UKESM1-0-LL...
    Load data...
    Loop on years...
        From years 1850 to 1864...
        From years 1865 to 1879...
        From years 1880 to

# Compute AOU for piControl

## def shorten_fname_list(fname_list):

In [43]:
def shorten_fname_list(fname_list):
    fname_list.sort()
    new_fname_list = []
    for fname in fname_list: 
        year1_of_fname = fname.split('/')[-1].split('_')[-1].split('-')[0][:4]
        for search in np.arange(1850, 1950): 
            if '%04d'%search==year1_of_fname:
                if not (fname in new_fname_list): new_fname_list.append(fname)
                #
            #
        #
    #
    if len(new_fname_list)==0: 
        zwmax = int(fname_list[-1][-9:-5])
        zwmin = zwmax-99
        for fname in fname_list: 
            year1_of_fname = fname.split('/')[-1].split('_')[-1].split('-')[0][:4]
            for search in np.arange(zwmin, zwmax): 
                if '%04d'%search==year1_of_fname:
                    if not (fname in new_fname_list): new_fname_list.append(fname)
                    #
                #
            #
        #
    #
    return new_fname_list
#


## Check data avaibility

In [47]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute AOU for piControl')
print('## Check data avaibility')

esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR', 'CanESM5', 'MIROC-ES2L']
# esm_list = ['ACCESS-ESM1-5']
variant_dict = {
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'IPSL-CM6A-LR' : 'r1i1p1f1',
    'CanESM5'      : 'r1i1p1f1',
    'MIROC-ES2L'   : 'r1i1p1f2',
}
version_dict = {
    'MPI-ESM1-2-LR': 'v20190710',
    'ACCESS-ESM1-5': 'v20191214',
    'IPSL-CM6A-LR' : 'v20200326',
    'CanESM5'      : 'v20190429',
    'MIROC-ES2L'   : 'v20190823',
}


simu='piControl'

for esm in esm_list: 
        
    print('------------')
    print(esm)
    print('------------')
    print('')

    # Load temperature
    var = 'thetao'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[esm], 
                                            variant=variant_dict[esm], grid='gn')
    new_fname_list = shorten_fname_list(fname_list)
    zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    good = tstep/12 == ymax-ymin+1
    print('>>> THETAO <<<')
    if good: 
        print('GOOD')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
        print('------------')
    else:
        print('Years missing')
        for fname in fname_list: print(fname)
    #
    
    # Load salinity
    var = 'so'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[esm], 
                                            variant=variant_dict[esm], grid='gn')
    new_fname_list = shorten_fname_list(fname_list)
    zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    good = tstep/12 == ymax-ymin+1
    print('')
    print('>>> SO <<<')
    if good: 
        print('GOOD')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
        print('------------')
    else:
        print('Years missing')
        for fname in fname_list: print(fname)
    #
    
    # Load oxygen
    var = 'o2'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[esm], 
                                            variant=variant_dict[esm], grid='gn')
    new_fname_list = shorten_fname_list(fname_list)
    zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    good = tstep/12 == ymax-ymin+1
    print('')
    print('>>> O2 <<<')
    if good: 
        print('GOOD')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
        print('------------')
    else:
        print('Years missing')
        for fname in fname_list: print(fname)
    #
    print('')
#





2024-06-14 15:09:22.413309
# Compute AOU for hitsorical
------------
MPI-ESM1-2-LR
------------

>>> THETAO <<<
GOOD
1850 to 1949, 100.0 years (1200 months)
------------

>>> SO <<<
GOOD
1850 to 1949, 100.0 years (1200 months)
------------

>>> O2 <<<
GOOD
1850 to 1949, 100.0 years (1200 months)
------------

------------
ACCESS-ESM1-5
------------

>>> THETAO <<<
GOOD
901 to 1000, 100.0 years (1200 months)
------------

>>> SO <<<
GOOD
901 to 1000, 100.0 years (1200 months)
------------

>>> O2 <<<
GOOD
901 to 1000, 100.0 years (1200 months)
------------

------------
IPSL-CM6A-LR
------------

[########################################] | 100% Completed | 6.14 s
>>> THETAO <<<
GOOD
1850 to 1949, 100.0 years (1200 months)
------------
[########################################] | 100% Completed | 5.34 s

>>> SO <<<
GOOD
1850 to 1949, 100.0 years (1200 months)
------------
[########################################] | 100% Completed | 6.14 s

>>> O2 <<<
GOOD
1850 to 1949, 100.0 years (120

## Compute

In [56]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute AOU for piControl')
print('## Compute')


# esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR', 'CanESM5', 'MIROC-ES2L']
esm_list = ['CanESM5']
variant_dict = {
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'IPSL-CM6A-LR' : 'r1i1p1f1',
    'CanESM5'      : 'r1i1p1f1',
    'MIROC-ES2L'   : 'r1i1p1f2',
}
version_dict = {
    'MPI-ESM1-2-LR': 'v20190710',
    'ACCESS-ESM1-5': 'v20191214',
    'IPSL-CM6A-LR' : 'v20200326',
    'CanESM5'      : 'v20190429',
    'MIROC-ES2L'   : 'v20190823',
}






simu='piControl'
# year_list = ['%04d' %yyy for yyy in np.arange(1850, 2014.5)]

for esm in esm_list: 
        
    print(f'Computing AOU for {esm}...')

    # Load temperature
    var = 'thetao'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, version=version_dict[esm], variant=variant_dict[esm], grid='gn')
    new_fname_list = shorten_fname_list(fname_list)
    zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)
    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds_temp = shift_180_lon(zwds2)

    # Load salinity
    var = 'so'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, version=version_dict[esm], variant=variant_dict[esm], grid='gn')
    new_fname_list = shorten_fname_list(fname_list)
    zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)
    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds_sali = shift_180_lon(zwds2)

    # Load oxygen
    var = 'o2'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, version=version_dict[esm], variant=variant_dict[esm], grid='gn')
    new_fname_list = shorten_fname_list(fname_list)
    zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)
    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds_oxyg = shift_180_lon(zwds2)
    
    # save for later
    zwds_attrs=zwds.attrs
    ymin = int(np.min(zwds['time.year']))
    year_list = ['%04d' %yyy for yyy in np.arange(ymin, ymin+99)]
    year_list = ['6200']

    del zwds, zwds2
    gc.collect()
    
    # Loop on years
    for year in year_list: 
        
        temp = zwds_temp.sel(time=year)['thetao'].load()
        sali = zwds_sali.sel(time=year)['so'].load()
        oxyg = zwds_oxyg.sel(time=year)['o2'].load()
        
        # Compute o2sat
        o2sat = get_o2sat_garcia(temp, sali, verbose=False).compute()
        
        # Clean
        del sali, temp
        gc.collect()
        
        # Compute aou
        aou = xr.zeros_like(o2sat)
        aou.values = o2sat.values - oxyg.values
        aou.attrs={}
        aou.attrs['units']='mol m-3'
        aou.attrs['long_name']='Apparent Oxygen Utilization'
        aou.attrs['description']='AOU computed as O2sat - O2 with O2sat computed following Garcia, H. E., & \
        Gordon, L. I. (1992). Oxygen solubility in seawater: Better fitting equations. Limnology and Oceanography, 37(6), 1307–1312.'
        aou = aou.rename('aou')

        # Clean
        del oxyg
        gc.collect()
    
        # Temporal mean
        o2sat_tavg = o2sat.groupby('time.year').mean(dim='time')
        aou_tavg = aou.groupby('time.year').mean(dim='time')
    
        # Clean
        del o2sat, aou
        gc.collect()
    
        # create dataset
        o2sat_ds = o2sat_tavg.to_dataset() 
        o2sat_ds.attrs = zwds_attrs
        aou_ds = aou_tavg.to_dataset() 
        aou_ds.attrs = zwds_attrs

        # Save in netcdf
        #---------------    
        print('Save in netcdf...')
        ncname = netcdfdir+esm+'_'+simu+'_o2sat_'+year+'.nc'
        o2sat_ds.to_netcdf(ncname)
        print('File saved: %s'%ncname)
        #---------------    
        print('Save in netcdf...')
        ncname = netcdfdir+esm+'_'+simu+'_aou_'+year+'.nc'
        aou_ds.to_netcdf(ncname)
        print('File saved: %s'%ncname)
    #
    print(f'Done with {esm}')
#





2024-06-16 15:06:06.226409
# Compute AOU for piControl
## Compute
Computing AOU for CanESM5...
[########################################] | 100% Completed | 1.31 s
[########################################] | 100% Completed | 1.30 s
[########################################] | 100% Completed | 1.30 s
Save in netcdf...
File saved: 24-06-10-compute-aou/netcdf_files/CanESM5_piControl_o2sat_6200.nc
Save in netcdf...
File saved: 24-06-10-compute-aou/netcdf_files/CanESM5_piControl_aou_6200.nc
Done with CanESM5
peak memory: 5645.54 MiB, increment: 3495.71 MiB
CPU times: user 7.68 s, sys: 1.61 s, total: 9.29 s
Wall time: 16.9 s


## Compute piControl drift

In [16]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute AOU for piControl')
print('## Compute piControl drift')

esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR', 'CanESM5', 'MIROC-ES2L']
esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR']
# esm_list = ['IPSL-CM6A-LR']
simu = 'piControl'

#----------------
# Compute budget
#----------------

data2plot = {}

for iesm, vesm in enumerate(esm_list): 
    
    print(vesm)
    
    #________________
    # Get data
    var='aou'
    simu = 'piControl'
    fname = netcdfdir+vesm+"_"+simu+"_"+var+"_[0-9][0-9][0-9][0-9].nc"
    zwds = xr.open_mfdataset(fname, **kwopenmfds) # in mol.m-3
    zwda = zwds[var] # in mol.m-3

    #________________
    # Compute linear regression
    coefs = zwda.polyfit('year', 1)['polyfit_coefficients'].compute()
    for coords in ['latitude', 'longitude', 'depth']: 
        coefs[coords] = zwda[coords]
    #
    coefs = coefs.drop(['i', 'j', 'k'])

    ymin = int(np.min(zwda['year']))
    ymax = int(np.max(zwda['year']))
    
    coefs.attrs['description']= 'Coefficients from the least squares polynomial fit (degree 1)\
    computed with xr.polyfit. The fit is computed over the years %04d to %04d.\n\
    To compute the trend over some years: trd = zw2.mean(dim="year") + (coefs.sel(degree=1))\
    *(zw2.year - zw2.year.mean(dim="year"))' %(ymin, ymax)

    #________________
    # Create dataset
    coefs_ds = coefs.to_dataset() 
    coefs_ds.attrs = zwds.attrs

    # Save in netcdf
    #---------------    
    print('Save in netcdf...')
    ncname = netcdfdir+vesm+'_'+simu+'_aou_drift_coefficient.nc'
    coefs_ds.to_netcdf(ncname)
    print('File saved: %s'%ncname)
#




2024-06-16 16:19:58.952770
# Compute AOU for piControl
## Compute piControl drift
MPI-ESM1-2-LR
[########################################] | 100% Completed | 1.21 s
[########################################] | 100% Completed | 59.91 s
Save in netcdf...
File saved: 24-06-10-compute-aou/netcdf_files/MPI-ESM1-2-LR_piControl_aou_drift_coefficient.nc
ACCESS-ESM1-5
[########################################] | 100% Completed | 1.41 s
[########################################] | 100% Completed | 142.76 s
Save in netcdf...
File saved: 24-06-10-compute-aou/netcdf_files/ACCESS-ESM1-5_piControl_aou_drift_coefficient.nc
IPSL-CM6A-LR
[########################################] | 100% Completed | 1.61 s
[########################################] | 100% Completed | 218.05 s
Save in netcdf...
File saved: 24-06-10-compute-aou/netcdf_files/IPSL-CM6A-LR_piControl_aou_drift_coefficient.nc
peak memory: 8664.74 MiB, increment: 7786.35 MiB
CPU times: user 7min 33s, sys: 29.7 s, total: 8min 3s
Wall time: 8min 4

# Compute AOU for piControl with specific years depending on models

## def shorten_fname_list(fname_list, startyear, endyear):

In [7]:
def shorten_fname_list(fname_list, startyear, endyear):
    fname_list.sort()
    new_fname_list = []
    for fname in fname_list: 
        year1_of_fname = int(fname.split('/')[-1].split('_')[-1].split('-')[0][:4])
        year2_of_fname = int(fname.split('/')[-1].split('_')[-1].split('-')[1][:4])
        startyear_in_between = ((startyear>=year1_of_fname) & (startyear<=year2_of_fname))
        endyear_in_between   = ((endyear>=year1_of_fname) & (endyear<=year2_of_fname))
        year1_in_between = ((year1_of_fname>=startyear) & (year1_of_fname<=endyear))
        year2_in_between = ((year2_of_fname>=startyear) & (year2_of_fname<=endyear))
        if startyear_in_between | endyear_in_between | year1_in_between | year2_in_between: 
            if not (fname in new_fname_list): 
                new_fname_list.append(fname)
        #
    #
    #
    # if len(new_fname_list)==0: 
    #     zwmax = int(fname_list[-1][-9:-5])
    #     zwmin = zwmax-99
    #     for fname in fname_list: 
    #         year1_of_fname = fname.split('/')[-1].split('_')[-1].split('-')[0][:4]
    #         for search in np.arange(zwmin, zwmax): 
    #             if '%04d'%search==year1_of_fname:
    #                 if not (fname in new_fname_list): new_fname_list.append(fname)
    #                 #
    #             #
    #         #
    #     #
    # #
    if len(new_fname_list)==0: new_fname_list=fname_list
    return new_fname_list
#


## Check data avaibility

In [8]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute AOU for piControl with specific years depending on models')
print('## Check data avaibility')

esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR', 'CanESM5', 'MIROC-ES2L']
esm_list = ['CNRM-ESM2-1', 'GFDL-ESM4', 'UKESM1-0-LL']

refyear_dict = {
    'MPI-ESM1-2-LR': 1850,
    'ACCESS-ESM1-5':  161,
    'IPSL-CM6A-LR' : 1910,
    'CanESM5'      : 5201,
    'MIROC-ES2L'   : 1850,
    'CNRM-ESM2-1'  : 1850, 
    'GFDL-CM4'     :  101,
    'GFDL-ESM4'    :  101,
    'UKESM1-0-LL'  : 2250
}

variant_dict = {
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'IPSL-CM6A-LR' : 'r1i1p1f1',
    'CanESM5'      : 'r1i1p1f1',
    'MIROC-ES2L'   : 'r1i1p1f2',
    'CNRM-ESM2-1'  : 'r1i1p1f2', 
    'GFDL-CM4'     : 'r1i1p1f1',
    'GFDL-ESM4'    : 'r1i1p1f1',
    'UKESM1-0-LL'  : 'r1i1p1f2'
}

version_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823' ,
        'CNRM-ESM2-1'  : 'v20181115', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190827'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181115', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190827'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181115', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20200219'
    }
}    

grid_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }
}    

simu='piControl'

for esm in esm_list: 
        
    print('------------')
    print(esm)
    print('------------')
    print('')

    startyear, endyear = refyear_dict[esm]+(1850-1850), refyear_dict[esm]+(2099-1850)
    print('piControl targeted time period: %04d-%04d'%(startyear, endyear))    
    
    # Load temperature
    var = 'thetao'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    new_fname_list = shorten_fname_list(fname_list, startyear, endyear)
    zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    print('------------')
    good1 = tstep/12 == ymax-ymin+1
    good2 = (startyear>=ymin) & (endyear<=ymax)
    if good1 & good2: 
        print('>>> THETAO: GOOD, time period complete and match target')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
    elif good1 and (not good2): 
        print('!!! THETAO: time period complete BUT do not match target')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months), file list:')
        for fname in fname_list: print(fname)
    else: 
        print('!!! THETAO: some years are missing. Here is the file list: ')
        for fname in fname_list: print(fname)
    #
    print('------------')
    
    # Load salinity
    var = 'so'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    new_fname_list = shorten_fname_list(fname_list, startyear, endyear)
    zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    good1 = tstep/12 == ymax-ymin+1
    good2 = (startyear>=ymin) & (endyear<=ymax)
    if good1 & good2: 
        print('>>> SO: GOOD, time period complete and match target')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
    elif good1 and (not good2): 
        print('!!! SO: time period complete BUT do not match target')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months), file list:')
        for fname in fname_list: print(fname)
    else: 
        print('!!! SO: some years are missing. Here is the file list: ')
        for fname in fname_list: print(fname)
    #
    print('------------')
    
    # Load oxygen
    var = 'o2'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    new_fname_list = shorten_fname_list(fname_list, startyear, endyear)
    zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    good1 = tstep/12 == ymax-ymin+1
    good2 = (startyear>=ymin) & (endyear<=ymax)
    if good1 & good2: 
        print('>>> O2: GOOD, time period complete and match target')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
    elif good1 and (not good2): 
        print('!!! O2: time period complete BUT do not match target')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months), file list:')
        for fname in fname_list: print(fname)
    else: 
        print('!!! O2: some years are missing. Here is the file list: ')
        for fname in fname_list: print(fname)
    #
    print('------------')
        
    print('')
#





2024-10-18 18:13:26.906628
# Compute AOU for piControl with specific years depending on models
## Check data avaibility
------------
CNRM-ESM2-1
------------

piControl targeted time period: 1850-2099
[########################################] | 100% Completed | 29.68 s
------------
>>> THETAO: GOOD, time period complete and match target
1850 to 2099, 250.0 years (3000 months)
------------
[########################################] | 100% Completed | 24.36 s
>>> SO: GOOD, time period complete and match target
1850 to 2099, 250.0 years (3000 months)
------------
[########################################] | 100% Completed | 23.63 s
>>> O2: GOOD, time period complete and match target
1850 to 2099, 250.0 years (3000 months)
------------

------------
GFDL-ESM4
------------

piControl targeted time period: 0101-0350
------------
>>> THETAO: GOOD, time period complete and match target
101 to 360, 260.0 years (3120 months)
------------
>>> SO: GOOD, time period complete and match target
101 t

## Compute

In [None]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute AOU for piControl with specific years depending on models')
print('## Compute')


esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR', 'CanESM5', 'MIROC-ES2L']
esm_list = ['CNRM-ESM2-1', 'GFDL-ESM4', 'UKESM1-0-LL']

refyear_dict = {
    'MPI-ESM1-2-LR': 1850,
    'ACCESS-ESM1-5':  161,
    'IPSL-CM6A-LR' : 1910,
    'CanESM5'      : 5201,
    'MIROC-ES2L'   : 1850,
    'CNRM-ESM2-1'  : 1850, 
    'GFDL-CM4'     :  101,
    'GFDL-ESM4'    :  101,
    'UKESM1-0-LL'  : 2250
}

variant_dict = {
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'IPSL-CM6A-LR' : 'r1i1p1f1',
    'CanESM5'      : 'r1i1p1f1',
    'MIROC-ES2L'   : 'r1i1p1f2',
    'CNRM-ESM2-1'  : 'r1i1p1f2', 
    'GFDL-CM4'     : 'r1i1p1f1',
    'GFDL-ESM4'    : 'r1i1p1f1',
    'UKESM1-0-LL'  : 'r1i1p1f2'
}

version_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823' ,
        'CNRM-ESM2-1'  : 'v20181115', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190827'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181115', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190827'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181115', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20200219'
    }
}    

grid_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }
}    

simu='piControl'

for esm in esm_list: 
        
    print(f'Computing AOU for {esm}...')

    startyear, endyear = refyear_dict[esm]+(1850-1850), refyear_dict[esm]+(2099-1850)

    print('    Load data...')
    # Load temperature
    var = 'thetao'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    new_fname_list = shorten_fname_list(fname_list, startyear, endyear)
    zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)
    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds_temp = shift_180_lon(zwds2)

    # Load salinity
    var = 'so'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    new_fname_list = shorten_fname_list(fname_list, startyear, endyear)
    zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)
    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds_sali = shift_180_lon(zwds2)

    # Load oxygen
    var = 'o2'
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            version=version_dict[var][esm], 
                                            variant=variant_dict[esm], 
                                            grid=grid_dict[var][esm])
    new_fname_list = shorten_fname_list(fname_list, startyear, endyear)
    zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)
    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds_oxyg = shift_180_lon(zwds2)
    
    # save for later
    zwds_attrs=zwds.attrs
    year_list = ['%04d' %yyy for yyy in np.arange(startyear, endyear+.5)]

    del zwds, zwds2
    gc.collect()
    
    # Loop on years
    print('    Loop on year...')
    for year in year_list: 
        
        modulo = 15
        if ( (int(year) - int(year_list[0]))%modulo )==0: 
            print(f'        From years {int(year)} to {int(year)+modulo-1}...')
        #
        
        temp = zwds_temp.sel(time=year)['thetao'].load()
        sali = zwds_sali.sel(time=year)['so'].load()
        oxyg = zwds_oxyg.sel(time=year)['o2'].load()
        
        # Compute o2sat
        o2sat = get_o2sat_garcia(temp, sali, verbose=False).compute()
        
        # Clean
        del sali, temp
        gc.collect()
        
        # Compute aou
        aou = xr.zeros_like(o2sat)
        aou.values = o2sat.values - oxyg.values
        aou.attrs={}
        aou.attrs['units']='mol m-3'
        aou.attrs['long_name']='Apparent Oxygen Utilization'
        aou.attrs['description']='AOU computed as O2sat - O2 with O2sat computed following Garcia, H. E., & \
        Gordon, L. I. (1992). Oxygen solubility in seawater: Better fitting equations. Limnology and Oceanography, 37(6), 1307–1312.'
        aou = aou.rename('aou')

        # Clean
        del oxyg
        gc.collect()
    
        # Temporal mean
        o2sat_tavg = o2sat.groupby('time.year').mean(dim='time')
        aou_tavg = aou.groupby('time.year').mean(dim='time')
    
        # Clean
        del o2sat, aou
        gc.collect()
    
        # create dataset
        o2sat_ds = o2sat_tavg.to_dataset() 
        o2sat_ds.attrs = zwds_attrs
        aou_ds = aou_tavg.to_dataset() 
        aou_ds.attrs = zwds_attrs

        # Save in netcdf
        #---------------    
        # print('Save in netcdf...')
        ncname = netcdfdir+esm+'_'+simu+'_o2sat_'+year+'.nc'
        o2sat_ds.to_netcdf(ncname)
        # print('File saved: %s'%ncname)
        #---------------    
        # print('Save in netcdf...')
        ncname = netcdfdir+esm+'_'+simu+'_aou_'+year+'.nc'
        aou_ds.to_netcdf(ncname)
        # print('File saved: %s'%ncname)
    #
    print(f'Done with {esm}')
#





2024-10-18 18:15:38.827565
# Compute AOU for piControl with specific years depending on models
## Compute
Computing AOU for CNRM-ESM2-1...
    Load data...
[########################################] | 100% Completed | 20.14 s
[########################################] | 100% Completed | 14.66 s
[########################################] | 100% Completed | 17.89 s
    Loop on year...
        From years 1850 to 1864...
        From years 1865 to 1879...
        From years 1880 to 1894...
        From years 1895 to 1909...
        From years 1910 to 1924...
        From years 1925 to 1939...
        From years 1940 to 1954...
        From years 1955 to 1969...
