In [1]:
import numpy as np
import xarray as xr
import pandas as pd

import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('/glade/collections/cmip/catalog/intake-esm-datastore/catalogs/glade-cmip6.csv.gz')
df

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,dcpp_init_year,version,time_range,path
0,AerChemMIP,BCC,BCC-ESM1,ssp370-lowNTCF,r1i1p1f1,day,rsds,gn,,v20190624,20150101-20551231,/glade/collections/cmip/CMIP6/AerChemMIP/BCC/B...
1,AerChemMIP,BCC,BCC-ESM1,ssp370-lowNTCF,r1i1p1f1,day,tasmax,gn,,v20190624,20150101-20551231,/glade/collections/cmip/CMIP6/AerChemMIP/BCC/B...
2,AerChemMIP,BCC,BCC-ESM1,ssp370-lowNTCF,r2i1p1f1,day,rsds,gn,,v20190624,20150101-20551231,/glade/collections/cmip/CMIP6/AerChemMIP/BCC/B...
3,AerChemMIP,BCC,BCC-ESM1,ssp370-lowNTCF,r2i1p1f1,day,tasmax,gn,,v20190624,20150101-20551231,/glade/collections/cmip/CMIP6/AerChemMIP/BCC/B...
4,AerChemMIP,BCC,BCC-ESM1,ssp370-lowNTCF,r3i1p1f1,day,rsds,gn,,v20190624,20150101-20551231,/glade/collections/cmip/CMIP6/AerChemMIP/BCC/B...
...,...,...,...,...,...,...,...,...,...,...,...,...
1850252,ScenarioMIP,MOHC,UKESM1-0-LL,ssp585,r1i1p1f2,6hrLev,va,gn,,v20200602,209901010600-210001010000,/glade/collections/cmip/CMIP6/gcm/ScenarioMIP/...
1850253,ScenarioMIP,MOHC,UKESM1-0-LL,ssp585,r1i1p1f2,6hrLev,va,gn,,v20200602,210001010600-210101010000,/glade/collections/cmip/CMIP6/gcm/ScenarioMIP/...
1850254,ScenarioMIP,MOHC,UKESM1-0-LL,ssp585,r1i1p1f2,Oday,tos,gn,,v20190726,20150101-20491230,/glade/collections/cmip/CMIP6/gcm/ScenarioMIP/...
1850255,ScenarioMIP,MOHC,UKESM1-0-LL,ssp585,r1i1p1f2,Oday,tos,gn,,v20190726,20500101-20991230,/glade/collections/cmip/CMIP6/gcm/ScenarioMIP/...


In [3]:
mols_to_Tmolmon = 1e-12 * 86400. * 365. / 12.

def compute_mon_fgo2(source_id, nmax_members=None):
    """compute monthly climatology for fgo2 """
    print('='*40)
    print(source_id)
    
    df_sub = df.loc[
        (df.source_id==source_id) & (df.variable_id=='areacello')
    ]   
    if len(df_sub) == 0:
        return
    
    grid = xr.open_dataset(df_sub.iloc[0].path)
    
    variable_id = 'fgo2'
    df_sub = df.loc[
        (df.source_id==source_id) 
        & (df.variable_id==variable_id) 
        & (df.experiment_id=='historical') 
    ]   
    if len(df_sub) == 0: 
        return
        
    member_ids = sorted(df_sub.member_id.unique().tolist())
    print(f'\tfound {len(member_ids)} ensemble members')
    if nmax_members is not None:
        if len(member_ids) > nmax_members:
            member_ids = member_ids[:nmax_members]
        
    ds_list = []
    for member_id in member_ids:
        paths = sorted(list(
            df_sub.loc[(df.member_id == member_id)].path
        ))
        print(f'\treading {path}')
        ds_list.append(
            xr.open_mfdataset(paths).sel(time=slice('1980', '2010'))
        )    

    ds = xr.concat(
        ds_list, 
        dim=xr.DataArray(member_ids, dims=('member_id'), name='member_id')
    )
    ds['areacello'] = grid.areacello
    assert (ds.fgo2.attrs['units'] == 'mol m-2 s-1')
    
    for lat_varname in ['latitude', 'lat', 'nav_lat']:
        if lat_varname in grid:
            break
    print(f'lat varname: {lat_varname}')
    
    rmasks = dict(
        NH=grid.areacello.where(grid[lat_varname] >= 20.).fillna(0.),
        SH=grid.areacello.where(grid[lat_varname] <= -20.).fillna(0.),
    )
    da_list = []
    regions = []
    
    dims_lateral = tuple(d for d in ds.fgo2.dims if d not in ['time', 'member_id'])
    print(f'\tlateral dims: {dims_lateral}')
    for key, rmask in rmasks.items():
        plt.figure()
        rmask.plot()
        fgo2 = ((-1.0) * ds.fgo2 * rmask).sum(dims_lateral) * mols_to_Tmolmon
        fgo2.attrs['units'] = 'Tmol O$_2$ month$^{-1}$'
        fgo2.attrs['note'] = 'flux sign convention := positive upward'        
        
        with xr.set_options(keep_attrs=True):
            da_list.append(fgo2.groupby('time.month').mean().mean('member_id'))
        regions.append(key)    
  
    print()
    var =  xr.concat(
        da_list, 
        dim=xr.DataArray(regions, dims=('region'), name='region'),
    )
    var.name = variable_id
    return var

source_id = 'IPSL-CM6A-LR' #'MPI-M.MPI-ESM1-2-HR' #''MPI-ESM-1-2-HAM' #'CNRM-ESM2-1' #'CanESM5'

da = compute_mon_fgo2(source_id, nmax_members=2)
da

IPSL-CM6A-LR
	found 32 ensemble members


NameError: name 'path' is not defined

In [None]:
models  = [
    'CanESM5', 'CanESM5-CanOE', 'CNRM-ESM2-1', 'ACCESS-ESM1-5',
    'MPI-ESM-1-2-HAM','IPSL-CM6A-LR','MPI-M.MPI-ESM1-2-HR'
]

da_list = []
model_list = []
for source_id in models:
    da = compute_mon_fgo2(source_id, nmax_members=4)
    if da is not None:
        da_list.append(da)
        model_list.append(source_id)
    
ds = xr.concat(da_list, 
               dim=xr.DataArray(model_list, dims=('model'), name='model')
              ).to_dataset().drop(['depth']).compute()
ds

In [None]:
ds

In [None]:
fig = plt.figure(facecolor='w')

monlabs = np.array(["J", "F", "M", "A", "M", "J", "J", "A", "S", "O", "N", "D"])
plt.axhline(0., color='k', lw=1.)
plt.title(source_id)

for m in ds.model.values:
    plt.plot(ds.month-0.5, ds.fgo2.sel(model=m, region='NH'), '.-', label=m)

ax = plt.gca()
ax.set_xticks(np.arange(13))
ax.set_xticklabels([f'        {m}' for m in monlabs]+[''])
ax.set_ylabel(f"O$_2$ flux [{fgo2_mon.attrs['units']}]")
ax.set_title('NH');
ax.legend();


In [None]:
dp = xr.open_dataset('cmip6_fgo2_N20N_mol_s_1_sc_ds_v4.nc')
for m in range(len(dp.models)):
    (dp.__xarray_dataarray_variable__ * mols_to_Tmolmon).isel(models=m).plot()

In [None]:
ds.to_netcdf('cmip6_fgo2_nh_sh.nc')

In [None]:
ds.info()