# Import modules

In [1]:
%%time
%load_ext memory_profiler

import datetime
import os, glob, sys, gc
import warnings
# warnings.filterwarnings('ignore', '.*invalid value encountered in true_divide.*', )

from dask.diagnostics import ProgressBar
pbar = ProgressBar(minimum=10)
pbar.register()
#pbar.unregister()

import numpy as np
import xarray as xr
xr.set_options(keep_attrs=True)



CPU times: user 634 ms, sys: 543 ms, total: 1.18 s
Wall time: 627 ms


<xarray.core.options.set_options at 0x7fd42c593730>

# Starters

In [2]:
%%time
%%memit -c
print(datetime.datetime.now())

dirout = '24-06-20-compute-yearly-averages/'
if not os.path.isdir(dirout) : os.mkdir(dirout)

netcdfdir = dirout+'netcdf_files/'
if not os.path.isdir(netcdfdir) : os.mkdir(netcdfdir)

sys.stdout.echo = open(dirout+'stdout.txt', 'w')
sys.stderr.echo = open(dirout+'stderr.txt', 'w')

2024-10-17 17:39:19.501014
peak memory: 258.17 MiB, increment: 106.28 MiB
CPU times: user 45.5 ms, sys: 7.28 ms, total: 52.8 ms
Wall time: 154 ms


# Main parameters

In [3]:
%%time
%%memit -c

print(datetime.datetime.now())

kwopends=dict(use_cftime=True, decode_times=None,
              decode_cf=True, decode_coords=True)
kwopenmfds = dict(combine='by_coords', parallel=True, 
                  use_cftime=True, decode_times=None,
                  decode_cf=True, decode_coords=True)


rename_dict = {
    "x": "i",
    "y": "j",
    
    "lat": "latitude", 
    "lon": "longitude",
    "nav_lat": "latitude", 
    "nav_lon": "longitude",
    
    'lev': 'depth', 
    'deptht': 'depth', 
    'olevel': 'depth', 
    "Depth":"depth"
}


2024-10-17 17:39:19.684365
peak memory: 258.68 MiB, increment: 108.12 MiB
CPU times: user 52.7 ms, sys: 13.2 ms, total: 65.9 ms
Wall time: 170 ms


# Define some functions

## Preparation of data 

In [4]:
def shift_180_lon(zwda, verbose=False): 
    if verbose: print("func: shift_180_lon")
    
    try: 
        if not np.nanmin(zwda['longitude']) < -150: 
            zwda['longitude'] = (zwda['longitude'] + 180) % 360 - 180
            addtxt=str(datetime.datetime.now())+' shift_180_lon to get longitude from -180 to 180'
            try: zwda.attrs['history'] =  addtxt + ' ; '+zwda.attrs['history']
            except: zwda.attrs['history'] =  addtxt             
        #
    except: print('WARNING! longitude likely not shifted')
    return zwda
#

def rename_vars_dims_coords(ds, rename_dict, verbose=False):
    """
    Renames variables, dimensions, and coordinates in an xarray Dataset according to the provided rename dictionary.

    Parameters:
    -----------
    ds : xr.Dataset
        The xarray Dataset to be renamed.
    rename_dict : Dict[str, str]
        Dictionary containing the variable, dimension, or coordinate names to be renamed. 
        The keys represent the original names, and the values represent the new names.
    verbose : bool, optional
        If True, prints the function name at the start and end of execution (default is False).
    
    Returns:
    --------
    xr.Dataset
        A new xarray Dataset with variables, dimensions, and coordinates renamed according to the rename dictionary.
    
    Example:
    --------
    import xarray as xr
    data = {'temp': ([], [0]), 'sali': ([], [1])}
    coords = {'time': [0]}
    ds = xr.Dataset(data, coords)
    renamed_ds = rename_vars_dims_coords(ds, {'temp': 'temperature', 'sali': 'salinity'})

    Dependencies:
    -------------
    xarray
    """
    if verbose: print('func: rename_vars_dims_coords')
    for old_name, new_name in rename_dict.items():
        if (old_name in ds.variables) | (old_name in ds.dims) | (old_name in ds.coords): 
            ds = ds.rename({old_name: new_name})
        #
    if verbose: print('endfunc')
    return ds
#

def split_coords_dimensions(ds, verbose=False):
    """
    Splits the latitude, longitude, and depth dimensions and coordinates of an xarray dataset into separate variables,
    updates their names, and assigns them back to the dataset.

    Parameters:
    -----------
    ds : xr.Dataset
        The xarray Dataset to be updated.
    verbose : bool, optional
        If True, prints the function name at the start and end of execution (default is False).
    
    Returns:
    --------
    xr.Dataset
        A new xarray Dataset with the latitude, longitude, and depth dimensions and coordinates split into separate variables
        and reassigned to the original dataset.
    
    Example:
    --------
    import xarray as xr
    data = {'temp': ([0, 1, 2], [0, 1]), 'sali': ([0, 1, 2], [0, 1])}
    coords = {'latitude': [0, 1, 2], 'longitude': [0, 1], 'depth': [0, 1, 2]}
    ds = xr.Dataset(data, coords)
    updated_ds = split_coords_dimensions(ds)

    Dependencies:
    -------------
    xarray
    """
    if verbose: print('func: split_coords_dimensions')
    new_coords = {}
    new_coords2 = {}
    new_dims = {}
    dim_name_dict = dict(latitude='j', longitude='i', depth='k')
    dimschanged = []
    for name, coord in ds.coords.items():
        if name in ds.dims and name in ["latitude", "longitude", "depth"]:
            new_coords[name + "_coord"] = coord
            new_dims[name] = dim_name_dict[name]
            new_coords2[name + "_coord"] = name
            dimschanged.append(name)
    if verbose: print('endfunc')
    for name in ['k', 'j', 'i']: 
        if name in ds.coords: dimschanged.append(name)
    #
    return ds.assign_coords(new_coords).rename_dims(new_dims).drop_vars(dimschanged).rename(new_coords2)
#


## Others

In [5]:
def get_esgf_dataset_filepaths(variable, sourceID, experimentID, 
                               freq='mon', grid='g*', version='latest', 
                               variant='r1i1p1f1',
                               mipera = 'CMIP6', diresgf='/mnt/reef-ns1002k-esgf/', verbose=False, **kwargs): 
    """
    Returns the filepaths of the remote netCDF files corresponding to the specified dataset of the Earth System
    Grid Federation (ESGF) data portal on NIRD.

    Parameters:
    -----------
    variable : str
        Variable to search for on ESGF data portal.
    sourceID : str
        Name of the data source on the ESGF data portal.
    experimentID : str
        Name of the experiment on the ESGF data portal.
    freq : str, optional
        Frequency of the data (default is 'mon').
    grid : str, optional
        Type of grid (default is 'g*').
    version : str, optional
        Version of the data being queried (default is 'latest').
    variant : str, optional
        Label for the variant of the data being queried (default is 'r1i1p1f1').
    mipera : str, optional
        Name of the CMIP era being queried (default is 'CMIP6').
    diresgf : str, optional
        Absolute path to the directory where the data is stored (default is '/mnt/reef-ns1002k-esgf/').
    verbose : bool, optional
        If True, prints the function name at the start and end of execution (default is False).
    **kwargs : dict, optional
        Other key-value arguments to be passed in the function.

    Returns:
    --------
    List[str]
        A list of filepaths corresponding to the specified dataset on the ESGF data portal.

    Example:
    --------
    fp_list = get_esgf_dataset_filepaths('tas', 'CanESM5', 'historical', freq='mon')

    Dependencies:
    -------------
    glob, sys
    """
    import glob, sys
    
    if verbose: print('func: get_esgf_dataset_filepaths')
    
    if experimentID in ['1pctCO2', 'piControl', 'historical', 'abrupt-4xCO2']: zwActivity='CMIP'
    elif experimentID in ['ssp126', 'ssp245', 'ssp585']: zwActivity='ScenarioMIP'
    else: sys.exit('Check experimentID, case not implemented')
    
    if sourceID in ['CESM2', 'CESM2-WACCM']: zwInstitutionID = 'NCAR'
    elif sourceID in ['ACCESS-ESM1-5']: zwInstitutionID = 'CSIRO'
    elif sourceID in ['CNRM-ESM2-1']: zwInstitutionID = 'CNRM-CERFACS'
    elif sourceID in ['CanESM5', 'CanESM5-CanOE']: zwInstitutionID = 'CCCma'
    elif sourceID in ['UKESM1-0-LL']: zwInstitutionID = 'MOHC'
    elif sourceID in ['GFDL-CM4', 'GFDL-ESM4']: zwInstitutionID = 'NOAA-GFDL'
    elif sourceID in ['IPSL-CM6A-LR', 'IPSL-CM6A-LR-INCA']: zwInstitutionID = 'IPSL'
    elif sourceID in ['MIROC-ES2L']: zwInstitutionID = 'MIROC'
    elif sourceID in ['MPI-ESM1-2-LR', 'ICON-ESM-LR']: zwInstitutionID = 'MPI-M'
    elif sourceID in ['NorESM2-LM']: zwInstitutionID = 'NCC'
    else: sys.exit('Check sourceID, case not implemented')
    
    ocean_list = ['fgco2', 'intpp', 'o2', 'thetao', 'so', 'agessc', 'po4', 'no3', 'dissic', 'talk']
    if variable in ocean_list: zwTableID = 'O'+freq
    elif variable in ['areacello']: zwTableID='Ofx'
    elif variable in ['psl']: zwTableID='A'+freq
    else: sys.exit('!!! WARNING !!! Check variable, case not implemented')
        
    zwdname = diresgf + mipera +'/'+ zwActivity +'/'+ \
        zwInstitutionID +'/'+ sourceID +'/'+ \
        experimentID  +'/'+ variant +'/'+ zwTableID +'/'+ \
        variable+'/'+ grid +'/'+ version +'/'
    zwfname = variable +'_'+ zwTableID +'_'+ sourceID +'_'+ \
        experimentID +'_'+ variant +'_'+ grid +'*.nc' 

    if verbose: print('endfunc')
    return glob.glob(zwdname + zwfname)
#
def nan_helper(y):
    """Helper to handle indices and logical indices of NaNs.

    Input:
        - y, 1d numpy array with possible NaNs
    Output:
        - nans, logical indices of NaNs
        - index, a function, with signature indices= index(logical_indices),
          to convert logical indices of NaNs to 'equivalent' indices
    Example:
        >>> # linear interpolation of NaNs
        >>> nans, x= nan_helper(y)
        >>> y[nans]= np.interp(x(nans), x(~nans), y[~nans])
        nb: y[~nans] values of y that are not nans
            x(~nans) indexes of y that are not nans
    """

    return np.isnan(y), lambda z: z.nonzero()[0]
#


# Compute yearly mean for SSP585

## Check data avaibility

In [16]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute yearly mean for SSP585')
print('## Check data avaibility')

# esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR', 'CanESM5', 'MIROC-ES2L']
esm_list = ['CNRM-ESM2-1', 'GFDL-CM4', 'GFDL-ESM4', 'UKESM1-0-LL']

variant_dict = {
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'IPSL-CM6A-LR' : 'r1i1p1f1',
    'CanESM5'      : 'r1i1p1f1',
    'MIROC-ES2L'   : 'r1i1p1f2', 
    'CNRM-ESM2-1'  : 'r1i1p1f2', 
    'GFDL-CM4'     : 'r1i1p1f1',
    'GFDL-ESM4'    : 'r1i1p1f1',
    'UKESM1-0-LL'  : 'r1i1p1f2'
}

version_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20191021', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190726'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20191021', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190726'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20191021', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190726'
    }, 
    'agessc':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20220314',
    },
    'po4':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
    },
    'talk':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
    },
    'dissic':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190119',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
    } 
}    

grid_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'agessc':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'po4':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'talk':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'dissic':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    } 
}    



simu='ssp585'

var_list = ['o2', 'thetao', 'so']

for var in var_list:
    
    print('============')
    print(var.upper())
    print('============')
    print('')

    for esm in esm_list: 

        # Load data
        fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                                version=version_dict[var][esm], 
                                                variant=variant_dict[esm], 
                                                grid=grid_dict[var][esm])
        zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
        print('--- Dims ---')
        for kkk, vvv in enumerate(zwds.dims): print(vvv)
        print('--- Coords ---')
        for kkk, vvv in enumerate(zwds.coords): print(vvv)
        ymin = int(np.min(zwds['time.year']))
        ymax = int(np.max(zwds['time.year']))
        tstep = zwds[var].shape[0]
        good = tstep/12 == ymax-ymin+1
        print('------------')
        if good: 
            print('%s GOOD'%esm)
            print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
            print('------------')
        else:
            print('!!! WARNING some year are missing, \
            here is the list of available files: ')
            for fname in fname_list: print(fname)
        #
        print('')
    #





2024-10-17 14:20:01.480952
# Compute yearly mean for SSP585
## Check data avaibility
O2

--- Dims ---
y
x
time
nvertex
lev
axis_nbounds
--- Coords ---
lat
lon
lev
time
------------
CNRM-ESM2-1 GOOD
2015 to 2100, 86.0 years (1032 months)
------------

--- Dims ---
lat
time
bnds
lon
lev
--- Coords ---
lat
lon
time
lev
------------
GFDL-CM4 GOOD
2015 to 2100, 86.0 years (1032 months)
------------

--- Dims ---
lat
time
bnds
lon
lev
--- Coords ---
lat
lon
time
lev
------------
GFDL-ESM4 GOOD
2015 to 2100, 86.0 years (1032 months)
------------

--- Dims ---
time
bnds
lev
j
i
vertices
--- Coords ---
time
lev
j
i
latitude
longitude
------------
UKESM1-0-LL GOOD
2015 to 2100, 86.0 years (1032 months)
------------

THETAO

--- Dims ---
y
x
time
nvertex
lev
axis_nbounds
--- Coords ---
lat
lon
lev
time
------------
CNRM-ESM2-1 GOOD
2015 to 2100, 86.0 years (1032 months)
------------

--- Dims ---
lat
time
bnds
lon
lev
--- Coords ---
lat
lon
time
lev
------------
GFDL-CM4 GOOD
2015 to 2100, 86.0 y

## Compute

In [17]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute yearly mean for SSP585')
print('## Compute')

esm_list = ['CNRM-ESM2-1', 'GFDL-CM4', 'GFDL-ESM4', 'UKESM1-0-LL']

variant_dict = {
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'IPSL-CM6A-LR' : 'r1i1p1f1',
    'CanESM5'      : 'r1i1p1f1',
    'MIROC-ES2L'   : 'r1i1p1f2', 
    'CNRM-ESM2-1'  : 'r1i1p1f2', 
    'GFDL-CM4'     : 'r1i1p1f1',
    'GFDL-ESM4'    : 'r1i1p1f1',
    'UKESM1-0-LL'  : 'r1i1p1f2'
}

version_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20191021', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190726'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20191021', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190726'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20191021', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190726'
    }, 
    'agessc':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20220314',
    },
    'po4':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
    },
    'talk':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190903',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
    },
    'dissic':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20190119',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
    } 
}    

grid_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'agessc':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'po4':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'talk':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'dissic':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    } 
}    



simu='ssp585'

var_list = ['o2', 'thetao', 'so']

year_list = ['%04d' %yyy for yyy in np.arange(2015, 2099.5)]

for var in var_list:
    for esm in esm_list: 

        print(f'Computing yearly mean for {var}, {esm}...')

        # Load data
        fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                                version=version_dict[var][esm], 
                                                variant=variant_dict[esm], 
                                                grid=grid_dict[var][esm])
        zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
        zwds2 = zwds[var].to_dataset()
        zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
        zwds2 = split_coords_dimensions(zwds2)
        zwds2 = shift_180_lon(zwds2)

        # save for later
        zwds_attrs=zwds.attrs
        del zwds
        gc.collect()

        # Loop on years
        for year in year_list: 

            zwda = zwds2.sel(time=year)[var].load()
            zwda_tavg = zwda.groupby('time.year').mean(dim='time')
            # Clean
            del zwda
            gc.collect()
            # create dataset
            zwda_ds = zwda_tavg.to_dataset() 
            zwda_ds.attrs = zwds_attrs
            # Save in netcdf
            ncname = netcdfdir+esm+'_'+simu+'_'+var+'_'+year+'.nc'
            zwda_ds.to_netcdf(ncname)
        #
        print(f'Done with {var}, {esm}')
    #
#





2024-10-17 14:23:50.785692
# Compute yearly mean for SSP585
## Compute
Computing yearly mean for o2, CNRM-ESM2-1...
Done with o2, CNRM-ESM2-1
Computing yearly mean for o2, GFDL-CM4...
Done with o2, GFDL-CM4
Computing yearly mean for o2, GFDL-ESM4...
Done with o2, GFDL-ESM4
Computing yearly mean for o2, UKESM1-0-LL...
Done with o2, UKESM1-0-LL
Computing yearly mean for thetao, CNRM-ESM2-1...
Done with thetao, CNRM-ESM2-1
Computing yearly mean for thetao, GFDL-CM4...
Done with thetao, GFDL-CM4
Computing yearly mean for thetao, GFDL-ESM4...
Done with thetao, GFDL-ESM4
Computing yearly mean for thetao, UKESM1-0-LL...
Done with thetao, UKESM1-0-LL
Computing yearly mean for so, CNRM-ESM2-1...
Done with so, CNRM-ESM2-1
Computing yearly mean for so, GFDL-CM4...
Done with so, GFDL-CM4
Computing yearly mean for so, GFDL-ESM4...
Done with so, GFDL-ESM4
Computing yearly mean for so, UKESM1-0-LL...
Done with so, UKESM1-0-LL
peak memory: 2787.75 MiB, increment: 2354.29 MiB
CPU times: user 25min 59s,

# Compute yearly mean for historical

## Check data avaibility

In [15]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute yearly mean for historical')
print('## Check data avaibility')

esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR', 'CanESM5', 'MIROC-ES2L']
esm_list = ['CNRM-ESM2-1', 'GFDL-CM4', 'GFDL-ESM4', 'UKESM1-0-LL']

variant_dict = {
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'IPSL-CM6A-LR' : 'r1i1p1f1',
    'CanESM5'      : 'r1i1p1f1',
    'MIROC-ES2L'   : 'r1i1p1f2',
    'CNRM-ESM2-1'  : 'r1i1p1f2', 
    'GFDL-CM4'     : 'r1i1p1f1',
    'GFDL-ESM4'    : 'r1i1p1f1',
    'UKESM1-0-LL'  : 'r1i1p1f2'
}

version_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181206', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20190726',
        'UKESM1-0-LL'  : 'v20190627'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181206', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20190726',
        'UKESM1-0-LL'  : 'v20190627'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181206', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20190726',
        'UKESM1-0-LL'  : 'v20190627'
    }, 
    'agessc':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
    }, 
    'po4':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
    },
    'dissic':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
    },
    'talk':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
    }
}    

grid_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'agessc':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'po4':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'talk':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'dissic':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    } 
}    


simu='historical'

var_list = ['o2', 'thetao', 'so']

for var in var_list:
    
    print('============')
    print(var.upper())
    print('============')
    print('')

    for esm in esm_list: 

        # Load data
        fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                                version=version_dict[var][esm], 
                                                variant=variant_dict[esm], 
                                                grid=grid_dict[var][esm])
        zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
        print('--- Dims ---')
        for kkk, vvv in enumerate(zwds.dims): print(vvv)
        print('--- Coords ---')
        for kkk, vvv in enumerate(zwds.coords): print(vvv)
        ymin = int(np.min(zwds['time.year']))
        ymax = int(np.max(zwds['time.year']))
        tstep = zwds[var].shape[0]
        good = tstep/12 == ymax-ymin+1
        print('------------')
        if good: 
            print('%s GOOD'%esm)
            print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
            print('------------')
        else:
            print('!!! WARNING some year are missing, \
            here is the list of available files: ')
            for fname in fname_list: print(fname)
        #
        print('')
    #





2024-10-17 14:19:34.755349
# Compute yearly mean for historical
## Check data avaibility
O2

--- Dims ---
y
x
time
nvertex
lev
axis_nbounds
--- Coords ---
lat
lon
lev
time
------------
CNRM-ESM2-1 GOOD
1850 to 2014, 165.0 years (1980 months)
------------

--- Dims ---
lat
time
bnds
lon
lev
--- Coords ---
lat
lon
time
lev
------------
GFDL-CM4 GOOD
1850 to 2014, 165.0 years (1980 months)
------------

--- Dims ---
lat
time
bnds
lon
lev
--- Coords ---
lat
lon
time
lev
------------
GFDL-ESM4 GOOD
1850 to 2014, 165.0 years (1980 months)
------------

--- Dims ---
time
bnds
lev
j
i
vertices
--- Coords ---
time
lev
j
i
latitude
longitude
------------
UKESM1-0-LL GOOD
1850 to 2014, 165.0 years (1980 months)
------------

THETAO

--- Dims ---
y
x
time
nvertex
lev
axis_nbounds
--- Coords ---
lat
lon
lev
time
------------
CNRM-ESM2-1 GOOD
1850 to 2014, 165.0 years (1980 months)
------------

--- Dims ---
lat
time
bnds
lon
lev
--- Coords ---
lat
lon
time
lev
------------
GFDL-CM4 GOOD
1850 to 201

## Compute

In [18]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute yearly mean for historical')
print('## Compute')

esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR', 'CanESM5', 'MIROC-ES2L']
esm_list = ['CNRM-ESM2-1', 'GFDL-CM4', 'GFDL-ESM4', 'UKESM1-0-LL']

variant_dict = {
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'IPSL-CM6A-LR' : 'r1i1p1f1',
    'CanESM5'      : 'r1i1p1f1',
    'MIROC-ES2L'   : 'r1i1p1f2',
    'CNRM-ESM2-1'  : 'r1i1p1f2', 
    'GFDL-CM4'     : 'r1i1p1f1',
    'GFDL-ESM4'    : 'r1i1p1f1',
    'UKESM1-0-LL'  : 'r1i1p1f2'
}

version_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181206', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20190726',
        'UKESM1-0-LL'  : 'v20190627'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181206', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20190726',
        'UKESM1-0-LL'  : 'v20190627'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181206', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20190726',
        'UKESM1-0-LL'  : 'v20190627'
    }, 
    'agessc':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
    }, 
    'po4':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
    },
    'dissic':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
    },
    'talk':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191115',
        'IPSL-CM6A-LR' : 'v20180803',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
    }
}    


simu='historical'

var_list = ['o2', 'thetao', 'so']

year_list = ['%04d' %yyy for yyy in np.arange(1850, 2014.5)]

for var in var_list:
    for esm in esm_list: 

        print(f'Computing yearly mean for {var}, {esm}...')

        # Load data
        fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                                version=version_dict[var][esm], 
                                                variant=variant_dict[esm], 
                                                grid=grid_dict[var][esm])
        zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
        zwds2 = zwds[var].to_dataset()
        zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
        zwds2 = split_coords_dimensions(zwds2)
        zwds2 = shift_180_lon(zwds2)

        # save for later
        zwds_attrs=zwds.attrs
        del zwds
        gc.collect()

        # Loop on years
        for year in year_list: 

            zwda = zwds2.sel(time=year)[var].load()
            zwda_tavg = zwda.groupby('time.year').mean(dim='time')
            # Clean
            del zwda
            gc.collect()
            # create dataset
            zwda_ds = zwda_tavg.to_dataset() 
            zwda_ds.attrs = zwds_attrs
            # Save in netcdf
            ncname = netcdfdir+esm+'_'+simu+'_'+var+'_'+year+'.nc'
            zwda_ds.to_netcdf(ncname)
        #
        print(f'Done with {var}, {esm}')
    #
#





2024-10-17 14:56:55.592029
# Compute yearly mean for historical
## Compute
Computing yearly mean for o2, CNRM-ESM2-1...
Done with o2, CNRM-ESM2-1
Computing yearly mean for o2, GFDL-CM4...
Done with o2, GFDL-CM4
Computing yearly mean for o2, GFDL-ESM4...
Done with o2, GFDL-ESM4
Computing yearly mean for o2, UKESM1-0-LL...
Done with thetao, CNRM-ESM2-1
Computing yearly mean for thetao, GFDL-CM4...
Done with thetao, GFDL-CM4
Computing yearly mean for thetao, GFDL-ESM4...
Done with thetao, GFDL-ESM4
Computing yearly mean for thetao, UKESM1-0-LL...
Done with thetao, UKESM1-0-LL
Computing yearly mean for so, CNRM-ESM2-1...
Done with so, CNRM-ESM2-1
Computing yearly mean for so, GFDL-CM4...
Done with so, GFDL-CM4
Computing yearly mean for so, GFDL-ESM4...
Done with so, GFDL-ESM4
Computing yearly mean for so, UKESM1-0-LL...
Done with so, UKESM1-0-LL
peak memory: 3908.04 MiB, increment: 2630.03 MiB
CPU times: user 49min 48s, sys: 10min 20s, total: 1h 8s
Wall time: 1h 3min 46s


# Compute yearly mean for piControl

## def shorten_fname_list(fname_list, startyear, endyear):

In [6]:
def shorten_fname_list(fname_list, startyear, endyear):
    fname_list.sort()
    new_fname_list = []
    for fname in fname_list: 
        year1_of_fname = int(fname.split('/')[-1].split('_')[-1].split('-')[0][:4])
        year2_of_fname = int(fname.split('/')[-1].split('_')[-1].split('-')[1][:4])
        startyear_in_between = ((startyear>=year1_of_fname) & (startyear<=year2_of_fname))
        endyear_in_between   = ((endyear>=year1_of_fname) & (endyear<=year2_of_fname))
        year1_in_between = ((year1_of_fname>=startyear) & (year1_of_fname<=endyear))
        year2_in_between = ((year2_of_fname>=startyear) & (year2_of_fname<=endyear))
        if startyear_in_between | endyear_in_between | year1_in_between | year2_in_between: 
            if not (fname in new_fname_list): 
                new_fname_list.append(fname)
        #
    #
    #
    # if len(new_fname_list)==0: 
    #     zwmax = int(fname_list[-1][-9:-5])
    #     zwmin = zwmax-99
    #     for fname in fname_list: 
    #         year1_of_fname = fname.split('/')[-1].split('_')[-1].split('-')[0][:4]
    #         for search in np.arange(zwmin, zwmax): 
    #             if '%04d'%search==year1_of_fname:
    #                 if not (fname in new_fname_list): new_fname_list.append(fname)
    #                 #
    #             #
    #         #
    #     #
    # #
    if len(new_fname_list)==0: new_fname_list=fname_list
    return new_fname_list
#


## Check data avaibility

In [7]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute yearly mean for piControl')
print('## Check data avaibility')

esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR', 'CanESM5', 'MIROC-ES2L']
esm_list = ['CNRM-ESM2-1', 'GFDL-CM4', 'GFDL-ESM4', 'UKESM1-0-LL']

refyear_dict = {
    'MPI-ESM1-2-LR': 1850,
    'ACCESS-ESM1-5':  161,
    'IPSL-CM6A-LR' : 1910,
    'CanESM5'      : 5201,
    'MIROC-ES2L'   : 1850,
    'CNRM-ESM2-1'  : 1850, 
    'GFDL-CM4'     :  101,
    'GFDL-ESM4'    :  101,
    'UKESM1-0-LL'  : 2250
}

variant_dict = {
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'IPSL-CM6A-LR' : 'r1i1p1f1',
    'CanESM5'      : 'r1i1p1f1',
    'MIROC-ES2L'   : 'r1i1p1f2',
    'CNRM-ESM2-1'  : 'r1i1p1f2', 
    'GFDL-CM4'     : 'r1i1p1f1',
    'GFDL-ESM4'    : 'r1i1p1f1',
    'UKESM1-0-LL'  : 'r1i1p1f2'
}

version_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823' ,
        'CNRM-ESM2-1'  : 'v20181115', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190827'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181115', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190827'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181115', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20200219'
    }, 
    'po4':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823'
    }, 
    'dissic':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823'
    }, 
    'talk':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823'
    }, 
    'agessc':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823'
    }
}    

grid_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'agessc':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'po4':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'talk':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'dissic':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    } 
}    


simu='piControl'

var_list = ['o2', 'thetao', 'so']

for var in var_list:
    
    print('============')
    print(var.upper())
    print('============')
    print('')

    for esm in esm_list: 

        # startyear, endyear = refyear_dict[esm], refyear_dict[esm]+164
        startyear, endyear = refyear_dict[esm]+(1850-1850), refyear_dict[esm]+(2099-1850)
        print('piControl targeted time period: %04d-%04d'%(startyear, endyear))
    
        # Load data
        fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                                version=version_dict[var][esm], 
                                                variant=variant_dict[esm], 
                                                grid=grid_dict[var][esm])
        new_fname_list = shorten_fname_list(fname_list, startyear, endyear)
        zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)
        ymin = int(np.min(zwds['time.year']))
        ymax = int(np.max(zwds['time.year']))
        tstep = zwds[var].shape[0]
        good1 = tstep/12 == ymax-ymin+1
        good2 = (startyear>=ymin) & (endyear<=ymax)
        if good1 & good2: 
            print('%s GOOD, time period complete and match target'%esm)
            print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
        elif good1 and (not good2): 
            print('!!! WARNING !!! %s, time period complete BUT do not match target'%esm)
            print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months), \
            here is the list of the files:')
            for fname in new_fname_list: print(fname)
        else: 
            print('!!! WARNING !!! %s, some years are missing, \
            here is the list of available files: '%esm)
            for fname in new_fname_list: print(fname)
        #
        print('------------')
        print('')
    #
#




2024-10-17 17:39:25.904010
# Compute yearly mean for piControl
## Check data avaibility
O2

piControl targeted time period: 1850-2099
[########################################] | 100% Completed | 21.34 s
CNRM-ESM2-1 GOOD, time period complete and match target
1850 to 2099, 250.0 years (3000 months)
------------

piControl targeted time period: 0101-0350
151 to 350, 200.0 years (2400 months),             here is the list of the files:
/mnt/reef-ns1002k-esgf/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/piControl/r1i1p1f1/Omon/o2/gr/v20180701/o2_Omon_GFDL-CM4_piControl_r1i1p1f1_gr_015101-017012.nc
/mnt/reef-ns1002k-esgf/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/piControl/r1i1p1f1/Omon/o2/gr/v20180701/o2_Omon_GFDL-CM4_piControl_r1i1p1f1_gr_017101-019012.nc
/mnt/reef-ns1002k-esgf/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/piControl/r1i1p1f1/Omon/o2/gr/v20180701/o2_Omon_GFDL-CM4_piControl_r1i1p1f1_gr_019101-021012.nc
/mnt/reef-ns1002k-esgf/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/piControl/r1i1p1f1/Omon/o2/gr/v20180701/o2_Omon_GFDL-CM4_piCon

## Compute

In [None]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute yearly mean for piControl')
print('## Compute')

esm_list = ['MPI-ESM1-2-LR', 'ACCESS-ESM1-5', 'IPSL-CM6A-LR', 'CanESM5', 'MIROC-ES2L']
esm_list = ['CNRM-ESM2-1', 'GFDL-ESM4', 'UKESM1-0-LL']

refyear_dict = {
    'MPI-ESM1-2-LR': 1850,
    'ACCESS-ESM1-5':  161,
    'IPSL-CM6A-LR' : 1910,
    'CanESM5'      : 5201,
    'MIROC-ES2L'   : 1850,
    'CNRM-ESM2-1'  : 1850, 
    'GFDL-CM4'     :  101,
    'GFDL-ESM4'    :  101,
    'UKESM1-0-LL'  : 2250
}

variant_dict = {
    'MPI-ESM1-2-LR': 'r1i1p1f1',
    'ACCESS-ESM1-5': 'r1i1p1f1',
    'IPSL-CM6A-LR' : 'r1i1p1f1',
    'CanESM5'      : 'r1i1p1f1',
    'MIROC-ES2L'   : 'r1i1p1f2',
    'CNRM-ESM2-1'  : 'r1i1p1f2', 
    'GFDL-CM4'     : 'r1i1p1f1',
    'GFDL-ESM4'    : 'r1i1p1f1',
    'UKESM1-0-LL'  : 'r1i1p1f2'
}

version_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823' ,
        'CNRM-ESM2-1'  : 'v20181115', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190827'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181115', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20190827'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823',
        'CNRM-ESM2-1'  : 'v20181115', 
        'GFDL-CM4'     : 'v20180701',
        'GFDL-ESM4'    : 'v20180701',
        'UKESM1-0-LL'  : 'v20200219'
    }, 
    'po4':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823'
    }, 
    'dissic':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823'
    }, 
    'talk':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823'
    }, 
    'agessc':{
        'MPI-ESM1-2-LR': 'v20190710',
        'ACCESS-ESM1-5': 'v20191112',
        'IPSL-CM6A-LR' : 'v20200326',
        'CanESM5'      : 'v20190429',
        'MIROC-ES2L'   : 'v20190823'
    }
}    

grid_dict = {
    'thetao':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'so':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'o2':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
        'CNRM-ESM2-1'  : 'gn', 
        'GFDL-CM4'     : 'gr',
        'GFDL-ESM4'    : 'gr',
        'UKESM1-0-LL'  : 'gn'
    }, 
    'agessc':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'po4':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'talk':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    },
    'dissic':{
        'MPI-ESM1-2-LR': 'gn',
        'ACCESS-ESM1-5': 'gn',
        'IPSL-CM6A-LR' : 'gn',
        'CanESM5'      : 'gn',
        'MIROC-ES2L'   : 'gn',
    } 
}    


simu='piControl'

var_list = ['o2', 'thetao', 'so']

for var in var_list:
    for esm in esm_list: 

        print(f'Computing yearly mean for {var}, {esm}...')

        # startyear, endyear = refyear_dict[esm], refyear_dict[esm]+164
        startyear, endyear = refyear_dict[esm]+(1850-1850), refyear_dict[esm]+(2099-1850)
        print('piControl targeted time period: %04d-%04d'%(startyear, endyear))
    
        # Load data
        fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                                version=version_dict[var][esm], 
                                                variant=variant_dict[esm], 
                                                grid=grid_dict[var][esm])
        new_fname_list = shorten_fname_list(fname_list, startyear, endyear)
        zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)

        zwds2 = zwds[var].to_dataset()
        zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
        zwds2 = split_coords_dimensions(zwds2)
        zwds2 = shift_180_lon(zwds2)

        # save for later
        zwds_attrs=zwds.attrs
        year_list = ['%04d' %yyy for yyy in np.arange(startyear, endyear+.5)]
        del zwds
        gc.collect()

        # Loop on years
        for year in year_list: 

            zwda = zwds2.sel(time=year)[var].load()
            zwda_tavg = zwda.groupby('time.year').mean(dim='time')
            # Clean
            del zwda
            gc.collect()
            # create dataset
            zwda_ds = zwda_tavg.to_dataset() 
            zwda_ds.attrs = zwds_attrs
            # Save in netcdf
            ncname = netcdfdir+esm+'_'+simu+'_'+var+'_'+year+'.nc'
            zwda_ds.to_netcdf(ncname)
        #
        print(f'Done with {var}, {esm}')
    #
#





2024-10-17 17:41:42.938992
# Compute yearly mean for piControl
## Compute
Computing yearly mean for o2, CNRM-ESM2-1...
piControl targeted time period: 1850-2099
[########################################] | 100% Completed | 17.03 s
Done with o2, CNRM-ESM2-1
Computing yearly mean for o2, GFDL-ESM4...
piControl targeted time period: 0101-0350
Done with o2, GFDL-ESM4
Computing yearly mean for o2, UKESM1-0-LL...
piControl targeted time period: 2250-2499
