# Import modules

In [5]:
%%time
%load_ext memory_profiler

import datetime
import os, glob, sys, gc
import warnings
# warnings.filterwarnings('ignore', '.*invalid value encountered in true_divide.*', )

from dask.diagnostics import ProgressBar
pbar = ProgressBar(minimum=10)
pbar.register()
#pbar.unregister()

import numpy as np
import xarray as xr
xr.set_options(keep_attrs=True)



CPU times: user 283 ms, sys: 69.2 ms, total: 352 ms
Wall time: 1.55 s


<xarray.core.options.set_options at 0x7f7e44f13670>

# Starters

In [6]:
%%time
%%memit -c
print(datetime.datetime.now())

dirout = '24-08-20-compute-yearly-averages-noresm/'
if not os.path.isdir(dirout) : os.mkdir(dirout)

netcdfdir = dirout+'netcdf_files/'
if not os.path.isdir(netcdfdir) : os.mkdir(netcdfdir)

sys.stdout.echo = open(dirout+'stdout.txt', 'w')
sys.stderr.echo = open(dirout+'stderr.txt', 'w')

2024-08-21 16:07:24.689520
peak memory: 278.66 MiB, increment: 113.70 MiB
CPU times: user 36 ms, sys: 25.5 ms, total: 61.5 ms
Wall time: 149 ms


# Main parameters

In [7]:
%%time
%%memit -c

print(datetime.datetime.now())

kwopends=dict(use_cftime=True, decode_times=None,
              decode_cf=True, decode_coords=True)
kwopenmfds = dict(combine='by_coords', parallel=True, 
                  use_cftime=True, decode_times=None,
                  decode_cf=True, decode_coords=True)


rename_dict = {
    "x": "i",
    "y": "j",
    "lat": "latitude", 
    "lon": "longitude",
    "nav_lat": "latitude", 
    "nav_lon": "longitude",
    'lev': 'depth', 
    'deptht': 'depth', 
    'olevel': 'depth', 
    "Depth":"depth"
}


2024-08-21 16:07:25.019082
peak memory: 278.82 MiB, increment: 118.22 MiB
CPU times: user 34.3 ms, sys: 10.1 ms, total: 44.4 ms
Wall time: 148 ms


# Define some functions

## Preparation of data 

In [8]:
def shift_180_lon(zwda, verbose=False): 
    if verbose: print("func: shift_180_lon")
    
    try: 
        if not np.nanmin(zwda['longitude']) < -150: 
            zwda['longitude'] = (zwda['longitude'] + 180) % 360 - 180
            addtxt=str(datetime.datetime.now())+' shift_180_lon to get longitude from -180 to 180'
            try: zwda.attrs['history'] =  addtxt + ' ; '+zwda.attrs['history']
            except: zwda.attrs['history'] =  addtxt             
        #
    except: print('WARNING! longitude likely not shifted')
    return zwda
#

def rename_vars_dims_coords(ds, rename_dict, verbose=False):
    """
    Renames variables, dimensions, and coordinates in an xarray Dataset according to the provided rename dictionary.

    Parameters:
    -----------
    ds : xr.Dataset
        The xarray Dataset to be renamed.
    rename_dict : Dict[str, str]
        Dictionary containing the variable, dimension, or coordinate names to be renamed. 
        The keys represent the original names, and the values represent the new names.
    verbose : bool, optional
        If True, prints the function name at the start and end of execution (default is False).
    
    Returns:
    --------
    xr.Dataset
        A new xarray Dataset with variables, dimensions, and coordinates renamed according to the rename dictionary.
    
    Example:
    --------
    import xarray as xr
    data = {'temp': ([], [0]), 'sali': ([], [1])}
    coords = {'time': [0]}
    ds = xr.Dataset(data, coords)
    renamed_ds = rename_vars_dims_coords(ds, {'temp': 'temperature', 'sali': 'salinity'})

    Dependencies:
    -------------
    xarray
    """
    if verbose: print('func: rename_vars_dims_coords')
    for old_name, new_name in rename_dict.items():
        if (old_name in ds.variables) | (old_name in ds.dims) | (old_name in ds.coords): 
            ds = ds.rename({old_name: new_name})
        #
    if verbose: print('endfunc')
    return ds
#

def split_coords_dimensions(ds, verbose=False):
    """
    Splits the latitude, longitude, and depth dimensions and coordinates of an xarray dataset into separate variables,
    updates their names, and assigns them back to the dataset.

    Parameters:
    -----------
    ds : xr.Dataset
        The xarray Dataset to be updated.
    verbose : bool, optional
        If True, prints the function name at the start and end of execution (default is False).
    
    Returns:
    --------
    xr.Dataset
        A new xarray Dataset with the latitude, longitude, and depth dimensions and coordinates split into separate variables
        and reassigned to the original dataset.
    
    Example:
    --------
    import xarray as xr
    data = {'temp': ([0, 1, 2], [0, 1]), 'sali': ([0, 1, 2], [0, 1])}
    coords = {'latitude': [0, 1, 2], 'longitude': [0, 1], 'depth': [0, 1, 2]}
    ds = xr.Dataset(data, coords)
    updated_ds = split_coords_dimensions(ds)

    Dependencies:
    -------------
    xarray
    """
    if verbose: print('func: split_coords_dimensions')
    new_coords = {}
    new_coords2 = {}
    new_dims = {}
    dim_name_dict = dict(latitude='j', longitude='i', depth='k')
    dimschanged = []
    for name, coord in ds.coords.items():
        if name in ds.dims and name in ["latitude", "longitude", "depth"]:
            new_coords[name + "_coord"] = coord
            new_dims[name] = dim_name_dict[name]
            new_coords2[name + "_coord"] = name
            dimschanged.append(name)
    if verbose: print('endfunc')
    for name in ['k', 'j', 'i']: 
        if name in ds.coords: dimschanged.append(name)
    #
    return ds.assign_coords(new_coords).rename_dims(new_dims).drop_vars(dimschanged).rename(new_coords2)
#


## Others

In [9]:
def get_esgf_dataset_filepaths(variable, sourceID, experimentID, 
                               freq='mon', grid='g*', version='latest', 
                               variant='r1i1p1f1',
                               mipera = 'CMIP6', diresgf='/mnt/reef-ns1002k-esgf/', verbose=False, **kwargs): 
    """
    Returns the filepaths of the remote netCDF files corresponding to the specified dataset of the Earth System
    Grid Federation (ESGF) data portal on NIRD.

    Parameters:
    -----------
    variable : str
        Variable to search for on ESGF data portal.
    sourceID : str
        Name of the data source on the ESGF data portal.
    experimentID : str
        Name of the experiment on the ESGF data portal.
    freq : str, optional
        Frequency of the data (default is 'mon').
    grid : str, optional
        Type of grid (default is 'g*').
    version : str, optional
        Version of the data being queried (default is 'latest').
    variant : str, optional
        Label for the variant of the data being queried (default is 'r1i1p1f1').
    mipera : str, optional
        Name of the CMIP era being queried (default is 'CMIP6').
    diresgf : str, optional
        Absolute path to the directory where the data is stored (default is '/mnt/reef-ns1002k-esgf/').
    verbose : bool, optional
        If True, prints the function name at the start and end of execution (default is False).
    **kwargs : dict, optional
        Other key-value arguments to be passed in the function.

    Returns:
    --------
    List[str]
        A list of filepaths corresponding to the specified dataset on the ESGF data portal.

    Example:
    --------
    fp_list = get_esgf_dataset_filepaths('tas', 'CanESM5', 'historical', freq='mon')

    Dependencies:
    -------------
    glob, sys
    """
    import glob, sys
    
    if verbose: print('func: get_esgf_dataset_filepaths')
    
    if experimentID in ['1pctCO2', 'piControl', 'historical', 'abrupt-4xCO2']: zwActivity='CMIP'
    elif experimentID in ['ssp126', 'ssp245', 'ssp585']: zwActivity='ScenarioMIP'
    else: sys.exit('Check experimentID, case not implemented')
    
    if sourceID in ['CESM2', 'CESM2-WACCM']: zwInstitutionID = 'NCAR'
    elif sourceID in ['ACCESS-ESM1-5']: zwInstitutionID = 'CSIRO'
    elif sourceID in ['CNRM-ESM2-1']: zwInstitutionID = 'CNRM-CERFACS'
    elif sourceID in ['CanESM5', 'CanESM5-CanOE']: zwInstitutionID = 'CCCma'
    elif sourceID in ['UKESM1-0-LL']: zwInstitutionID = 'NIMS-KMA'
    elif sourceID in ['GFDL-CM4', 'GFDL-ESM4']: zwInstitutionID = 'NOAA-GFDL'
    elif sourceID in ['IPSL-CM6A-LR', 'IPSL-CM6A-LR-INCA']: zwInstitutionID = 'IPSL'
    elif sourceID in ['MIROC-ES2L']: zwInstitutionID = 'MIROC'
    elif sourceID in ['MPI-ESM1-2-LR', 'ICON-ESM-LR']: zwInstitutionID = 'MPI-M'
    elif sourceID in ['NorESM2-LM']: zwInstitutionID = 'NCC'
    else: sys.exit('Check sourceID, case not implemented')
    
    ocean_list = ['fgco2', 'intpp', 'o2', 'thetao', 'so', 'agessc', 'po4', 'no3', 'dissic', 'talk']
    if variable in ocean_list: zwTableID = 'O'+freq
    elif variable in ['areacello']: zwTableID='Ofx'
    elif variable in ['psl']: zwTableID='A'+freq
    else: sys.exit('!!! WARNING !!! Check variable, case not implemented')
        
    zwdname = diresgf + mipera +'/'+ zwActivity +'/'+ \
        zwInstitutionID +'/'+ sourceID +'/'+ \
        experimentID  +'/'+ variant +'/'+ zwTableID +'/'+ \
        variable+'/'+ grid +'/'+ version +'/'
    zwfname = variable +'_'+ zwTableID +'_'+ sourceID +'_'+ \
        experimentID +'_'+ variant +'_'+ grid +'*.nc' 

    if verbose: print('endfunc')
    return glob.glob(zwdname + zwfname)
#
def nan_helper(y):
    """Helper to handle indices and logical indices of NaNs.

    Input:
        - y, 1d numpy array with possible NaNs
    Output:
        - nans, logical indices of NaNs
        - index, a function, with signature indices= index(logical_indices),
          to convert logical indices of NaNs to 'equivalent' indices
    Example:
        >>> # linear interpolation of NaNs
        >>> nans, x= nan_helper(y)
        >>> y[nans]= np.interp(x(nans), x(~nans), y[~nans])
        nb: y[~nans] values of y that are not nans
            x(~nans) indexes of y that are not nans
    """

    return np.isnan(y), lambda z: z.nonzero()[0]
#


# Compute yearly mean for SSP585

## Check data avaibility

In [11]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute yearly mean for SSP585')
print('## Check data avaibility')

simu = 'ssp585'
esm  = 'NorESM2-LM' 

var_list = ['o2', 'thetao', 'so', 'agessc', 'dissic', 'talk', 'po4']
for var in var_list:
    
    print('============')
    print(var.upper())
    print('============')
    print('')

    # Load data
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            diresgf='/mnt/reef-ns1002k-ns9034k/', 
                                            version='v20191108', 
                                            variant='r1i1p1f1', grid='gr')
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    good = tstep/12 == ymax-ymin+1
    if good: 
        print('GOOD')
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
        print('------------')
    else:
        print('!!! WARNING some year are missing, \
        here is the list of available files: ')
        for fname in fname_list: print(fname)
    #
    print('')
    #





2024-08-21 16:08:57.850687
# Compute yearly mean for SSP585
## Check data avaibility
O2

%s GOOD
2015 to 2100, 86.0 years (1032 months)
------------

THETAO

%s GOOD
2015 to 2100, 86.0 years (1032 months)
------------

SO

%s GOOD
2015 to 2100, 86.0 years (1032 months)
------------

AGESSC

%s GOOD
2015 to 2100, 86.0 years (1032 months)
------------

DISSIC

%s GOOD
2015 to 2100, 86.0 years (1032 months)
------------

TALK

%s GOOD
2015 to 2100, 86.0 years (1032 months)
------------

PO4

%s GOOD
2015 to 2100, 86.0 years (1032 months)
------------

peak memory: 585.15 MiB, increment: 400.53 MiB
CPU times: user 2.35 s, sys: 1.19 s, total: 3.54 s
Wall time: 19.9 s


## Compute

In [24]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute yearly mean for SSP585')
print('## Compute')


simu='ssp585'
esm  = 'NorESM2-LM' 
year_list = ['%04d' %yyy for yyy in np.arange(2015, 2099.5)]

var_list = ['o2', 'thetao', 'so', 'agessc', 'dissic', 'talk', 'po4']

for var in var_list:

    print(f'Computing yearly mean for {var}...')

    # Load data
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            diresgf='/mnt/reef-ns1002k-ns9034k/', 
                                            version='v20191108', 
                                            variant='r1i1p1f1', grid='gr')
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds2 = shift_180_lon(zwds2)

    # save for later
    zwds_attrs=zwds.attrs
    del zwds
    gc.collect()

    # Loop on years
    for year in year_list: 

        zwda = zwds2.sel(time=year)[var].load()
        zwda_tavg = zwda.groupby('time.year').mean(dim='time')
        # Clean
        del zwda
        gc.collect()
        # create dataset
        zwda_ds = zwda_tavg.to_dataset() 
        zwda_ds.attrs = zwds_attrs
        # Save in netcdf
        ncname = netcdfdir+esm+'_'+simu+'_'+var+'_'+year+'.nc'
        zwda_ds.to_netcdf(ncname)
    #
    print(f'Done with {var}')
#





2024-08-21 16:16:41.939714
# Compute yearly mean for SSP585
## Compute
Computing yearly mean for o2...
Done with o2
Computing yearly mean for thetao...
Done with thetao
Computing yearly mean for so...
Done with so
Computing yearly mean for agessc...
Done with agessc
Computing yearly mean for dissic...
Done with dissic
Computing yearly mean for talk...
Done with talk
Computing yearly mean for po4...
Done with po4
peak memory: 3019.61 MiB, increment: 2521.24 MiB
CPU times: user 20min 36s, sys: 2min 6s, total: 22min 42s
Wall time: 24min 1s


# Compute yearly mean for historical

## Check data avaibility

In [27]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute yearly mean for historical')
print('## Check data avaibility')


simu='historical'
esm  = 'NorESM2-LM' 

var_list = ['o2', 'thetao', 'so', 'agessc', 'dissic', 'talk', 'po4']

for var in var_list:
    
    print('============')
    print(var.upper())
    print('============')
    print('')

    # Load data
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            diresgf='/mnt/reef-ns1002k-ns9034k/', 
                                            version='v20190815', 
                                            variant='r1i1p1f1', grid='gr')
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    good = tstep/12 == ymax-ymin+1
    if good: 
        print('%s GOOD'%esm)
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
        print('------------')
    else:
        print('!!! WARNING some year are missing, \
        here is the list of available files: ')
        for fname in fname_list: print(fname)
    #
    print('')
#





2024-08-21 16:43:18.360385
# Compute yearly mean for historical
## Check data avaibility
O2

NorESM2-LM GOOD
1850 to 2014, 165.0 years (1980 months)
------------

THETAO

NorESM2-LM GOOD
1850 to 2014, 165.0 years (1980 months)
------------

SO

NorESM2-LM GOOD
1850 to 2014, 165.0 years (1980 months)
------------

AGESSC

NorESM2-LM GOOD
1850 to 2014, 165.0 years (1980 months)
------------

DISSIC

NorESM2-LM GOOD
1850 to 2014, 165.0 years (1980 months)
------------

TALK

NorESM2-LM GOOD
1850 to 2014, 165.0 years (1980 months)
------------

PO4

NorESM2-LM GOOD
1850 to 2014, 165.0 years (1980 months)
------------

peak memory: 2791.36 MiB, increment: 1345.91 MiB
CPU times: user 3.3 s, sys: 891 ms, total: 4.19 s
Wall time: 31.8 s


## Compute

In [30]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute yearly mean for historical')
print('## Compute')


simu='historical'
esm  = 'NorESM2-LM' 

year_list = ['%04d' %yyy for yyy in np.arange(1850, 2014.5)]

var_list = ['o2', 'thetao', 'so', 'agessc', 'dissic', 'talk', 'po4']
for var in var_list:

    print(f'Computing yearly mean for {var}...')

    # Load data
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            diresgf='/mnt/reef-ns1002k-ns9034k/', 
                                            version='v20190815', 
                                            variant='r1i1p1f1', grid='gr')
    zwds = xr.open_mfdataset(fname_list, **kwopenmfds)
    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds2 = shift_180_lon(zwds2)

    # save for later
    zwds_attrs=zwds.attrs
    del zwds
    gc.collect()

    # Loop on years
    for year in year_list: 

        zwda = zwds2.sel(time=year)[var].load()
        zwda_tavg = zwda.groupby('time.year').mean(dim='time')
        # Clean
        del zwda
        gc.collect()
        # create dataset
        zwda_ds = zwda_tavg.to_dataset() 
        zwda_ds.attrs = zwds_attrs
        # Save in netcdf
        ncname = netcdfdir+esm+'_'+simu+'_'+var+'_'+year+'.nc'
        zwda_ds.to_netcdf(ncname)
    #
    print(f'Done with {var}')
#





2024-08-21 16:45:17.785117
# Compute yearly mean for historical
## Compute
Computing yearly mean for o2...
Done with o2
Computing yearly mean for thetao...
Done with thetao
Computing yearly mean for so...
Done with so
Computing yearly mean for agessc...
Done with agessc
Computing yearly mean for dissic...
Done with dissic
Computing yearly mean for talk...
Done with talk
Computing yearly mean for po4...
Done with po4
peak memory: 4431.55 MiB, increment: 3361.83 MiB
CPU times: user 40min 10s, sys: 4min 12s, total: 44min 23s
Wall time: 47min 8s


# Compute yearly mean for piControl

## def shorten_fname_list(fname_list, startyear, endyear):

In [28]:
def shorten_fname_list(fname_list, startyear, endyear):
    fname_list.sort()
    new_fname_list = []
    for fname in fname_list: 
        year1_of_fname = int(fname.split('/')[-1].split('_')[-1].split('-')[0][:4])
        year2_of_fname = int(fname.split('/')[-1].split('_')[-1].split('-')[1][:4])
        startyear_in_between = ((startyear>=year1_of_fname) & (startyear<=year2_of_fname))
        endyear_in_between   = ((endyear>=year1_of_fname) & (endyear<=year2_of_fname))
        year1_in_between = ((year1_of_fname>=startyear) & (year1_of_fname<=endyear))
        year2_in_between = ((year2_of_fname>=startyear) & (year2_of_fname<=endyear))
        if startyear_in_between | endyear_in_between | year1_in_between | year2_in_between: 
            if not (fname in new_fname_list): 
                new_fname_list.append(fname)
        #
    #
    #
    # if len(new_fname_list)==0: 
    #     zwmax = int(fname_list[-1][-9:-5])
    #     zwmin = zwmax-99
    #     for fname in fname_list: 
    #         year1_of_fname = fname.split('/')[-1].split('_')[-1].split('-')[0][:4]
    #         for search in np.arange(zwmin, zwmax): 
    #             if '%04d'%search==year1_of_fname:
    #                 if not (fname in new_fname_list): new_fname_list.append(fname)
    #                 #
    #             #
    #         #
    #     #
    # #
    if len(new_fname_list)==0: new_fname_list=fname_list
    return new_fname_list
#


## Check data avaibility

In [29]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute yearly mean for piControl')
print('## Check data avaibility')

refyear_dict = {
    'MPI-ESM1-2-LR': 1850,
    'ACCESS-ESM1-5':  161,
    'IPSL-CM6A-LR' : 1910,
    'CanESM5'      : 5201,
    'MIROC-ES2L'   : 1850,
    'NorESM2-LM'   : 1600
}

simu='piControl'
esm  = 'NorESM2-LM' 

var_list = ['o2', 'thetao', 'so', 'agessc', 'dissic', 'talk', 'po4']

for var in var_list:
    
    print('============')
    print(var.upper())
    print('============')
    print('')


    # startyear, endyear = refyear_dict[esm], refyear_dict[esm]+164
    startyear, endyear = refyear_dict[esm], refyear_dict[esm]+165+84
    print('piControl targeted time period: %04d-%04d'%(startyear, endyear))

    # Load data
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            diresgf='/mnt/reef-ns1002k-ns9034k/', 
                                            version='v20210118', 
                                            variant='r1i1p1f1', grid='gr')
    new_fname_list = shorten_fname_list(fname_list, startyear, endyear)
    zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)
    ymin = int(np.min(zwds['time.year']))
    ymax = int(np.max(zwds['time.year']))
    tstep = zwds[var].shape[0]
    good1 = tstep/12 == ymax-ymin+1
    good2 = (startyear>=ymin) & (endyear<=ymax)
    if good1 & good2: 
        print('%s GOOD, time period complete and match target'%esm)
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months)')
    elif good1 and (not good2): 
        print('!!! WARNING !!! %s, time period complete BUT do not match target'%esm)
        print(f'{ymin} to {ymax}, {tstep/12} years ({tstep} months), \
        here is the list of the files:')
        for fname in new_fname_list: print(fname)
    else: 
        print('!!! WARNING !!! %s, some years are missing, \
        here is the list of available files: '%esm)
        for fname in new_fname_list: print(fname)
    #
    print('------------')
    print('')
#



2024-08-21 16:44:20.993939
# Compute yearly mean for piControl
## Check data avaibility
O2

piControl targeted time period: 1600-1849
NorESM2-LM GOOD, time period complete and match target
1600 to 1850, 251.0 years (3012 months)
------------

THETAO

piControl targeted time period: 1600-1849
NorESM2-LM GOOD, time period complete and match target
1600 to 1850, 251.0 years (3012 months)
------------

SO

piControl targeted time period: 1600-1849
NorESM2-LM GOOD, time period complete and match target
1600 to 1850, 251.0 years (3012 months)
------------

AGESSC

piControl targeted time period: 1600-1849
NorESM2-LM GOOD, time period complete and match target
1600 to 1850, 251.0 years (3012 months)
------------

DISSIC

piControl targeted time period: 1600-1849
NorESM2-LM GOOD, time period complete and match target
1600 to 1850, 251.0 years (3012 months)
------------

TALK

piControl targeted time period: 1600-1849
NorESM2-LM GOOD, time period complete and match target
1600 to 1850, 251.0 ye

## Compute

In [None]:
%%time
%%memit -c
print(datetime.datetime.now())
print('# Compute yearly mean for piControl')
print('## Compute')

refyear_dict = {
    'MPI-ESM1-2-LR': 1850,
    'ACCESS-ESM1-5':  161,
    'IPSL-CM6A-LR' : 1910,
    'CanESM5'      : 5201,
    'MIROC-ES2L'   : 1850,
    'NorESM2-LM'   : 1600
}

simu='piControl'
esm  = 'NorESM2-LM' 

var_list = ['o2', 'thetao', 'so', 'agessc', 'dissic', 'talk', 'po4']

for var in var_list:

    print(f'Computing yearly mean for {var}...')

    # startyear, endyear = refyear_dict[esm], refyear_dict[esm]+164
    startyear, endyear = refyear_dict[esm], refyear_dict[esm]+165+84

    # Load data
    fname_list = get_esgf_dataset_filepaths(var, esm, simu, 
                                            diresgf='/mnt/reef-ns1002k-ns9034k/', 
                                            version='v20210118', 
                                            variant='r1i1p1f1', grid='gr')
    new_fname_list = shorten_fname_list(fname_list, startyear, endyear)
    zwds = xr.open_mfdataset(new_fname_list, **kwopenmfds)

    zwds2 = zwds[var].to_dataset()
    zwds2 = rename_vars_dims_coords(zwds2, rename_dict)
    zwds2 = split_coords_dimensions(zwds2)
    zwds2 = shift_180_lon(zwds2)

    # save for later
    zwds_attrs=zwds.attrs
    year_list = ['%04d' %yyy for yyy in np.arange(startyear, endyear+.5)]
    del zwds
    gc.collect()

    # Loop on years
    for year in year_list: 

        zwda = zwds2.sel(time=year)[var].load()
        zwda_tavg = zwda.groupby('time.year').mean(dim='time')
        # Clean
        del zwda
        gc.collect()
        # create dataset
        zwda_ds = zwda_tavg.to_dataset() 
        zwda_ds.attrs = zwds_attrs
        # Save in netcdf
        ncname = netcdfdir+esm+'_'+simu+'_'+var+'_'+year+'.nc'
        zwda_ds.to_netcdf(ncname)
    #
    print(f'Done with {var}')
#





2024-08-21 17:32:26.042909
# Compute yearly mean for piControl
## Compute
Computing yearly mean for o2...
Done with o2
Computing yearly mean for thetao...
Done with thetao
Computing yearly mean for so...
Done with so
Computing yearly mean for agessc...
Done with agessc
Computing yearly mean for dissic...
Done with dissic
Computing yearly mean for talk...
Done with talk
Computing yearly mean for po4...
Done with po4
peak memory: 6125.88 MiB, increment: 3866.18 MiB
CPU times: user 1h 52s, sys: 6min 16s, total: 1h 7min 8s
Wall time: 1h 11min 24s
