# produce_cmip6_summary_figs.ipynb
Plot summary figures using data produced by process_cmip6_file.ipynb.

B. S. Grandey, 2022

In [1]:
! date

Thu Feb 10 19:15:51 +08 2022


In [2]:
from cdo import Cdo, CDOException
import matplotlib.pyplot as plt
import pathlib
import xarray as xr

%matplotlib inline

In [3]:
cdo = Cdo(tempdir='temp')

print(f'CDO version: {cdo.version()}')
print(f'cdo.py bindings version: {cdo.__version__()}')

print(f'xarray version: {xr.__version__}')

CDO version: 2.0.3
cdo.py bindings version: 1.5.4
xarray version: 0.21.0


In [4]:
# Reduce resolution of inline images (to reduce size of this notebook)
plt.rcParams['figure.dpi'] = 48.0

In [5]:
# Input base directory containing files produced by process_cmip6_file.ipynb
in_base = pathlib.Path('~/Data/p22c/CMIP6/').expanduser()
# Output base directory in which to save figures
out_base = pathlib.Path('~/Data/p22c/CMIP6_figs/').expanduser()
out_base.mkdir(exist_ok=True)

## Function to produce timeseries of fldmeans for all source-member pairs (rows) and experiments (columns) of interest for a given variable
These timeseries figures provide a quick summary of available data coverage, especially when all_source_member_pairs=True.

In [6]:
def fig_timeseries(variable='zostoga',
                   in_group='regrid_yearmean',  # directory within in_base
                   experiments=['piControl', 'historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585'],
                   all_source_member_pairs=True,  # include source-member paris that have data for any variable?
                  ):
    print(f'---- {in_group} {variable} ----')
    # Output directory
    out_dir = out_base.joinpath(in_group)
    out_dir.mkdir(exist_ok=True)
    # Identify available source-member pairs
    if all_source_member_pairs:  # include source-member pairs that have data for any variable?
        source_member_pairs = [d.name for d in in_base.glob(f'{in_group}/*/[!.]*_*')]
        source_member_pairs = sorted(set(source_member_pairs))  # remove duplicates
    else:  # or only include source-member pairs that have data for this specific variable?
        source_member_pairs = sorted([d.name for d in in_base.glob(f'{in_group}/{variable}/[!.]*_*')])
    # Configure subplots
    nrows = len(source_member_pairs)
    ncols = len(experiments)
    fig, axs = plt.subplots(ncols=ncols, nrows=nrows,
                            figsize=(ncols*3, nrows*3), constrained_layout=True)
    fig.suptitle(variable, fontsize='xx-large')  # main title
    # Loop over cols and rows
    for c in range(ncols):
        exp = experiments[c]
        ncover = 0  # counter for number of experiments with data coverage for experiment    
        for r in range(nrows):
            source_member = source_member_pairs[r]
            ax = axs[r,c]  # select axis
            # Input data directory
            in_dirs = sorted(in_base.glob(f'{in_group}/{variable}/{source_member}/*.{exp}.*'))
            if len(in_dirs) == 0:
                continue  # skip to next column
            elif len(in_dirs) > 1:
                print(f'Warning: {len(in_dirs)} possible input directories found for {source_member} {exp}')
            in_dir = in_dirs[-1]
            # Input files
            in_fns = sorted([str(f) for f in in_dir.glob(f'*.nc')])
            # Has at least one input file been found?
            if len(in_fns) >= 1:
                # Use CDO to merge files (if necessary), calculate fldmean, and use an absolute time axis
                temp_fn = cdo.fldmean(input=f'-mergetime {" ".join(in_fns)}',
                                      options='-a -f nc', env={"SKIP_SAME_TIME": "1"})
                # Read data using xarray
                ds = xr.open_dataset(temp_fn)
                # Lineplot
                ax.plot(ds['time']/1e4, ds[variable][:,0,0])
                # Increase counter
                ncover += 1
            # Label row
            if c == 0:
                axs[r,0].set_ylabel(f'{source_member}\n{variable}', fontsize='x-large')
        # Label column
        axs[0,c].set_title(f'{exp} (n={ncover})', fontsize='x-large')
    # Save
    out_fn = out_dir.joinpath(f'timeseries_{variable}_{nrows}x{ncols}.pdf')
    fig.savefig(out_fn)
    print(f'Written {in_group}/{out_fn.name}')
    return fig

In [7]:
%%time
# Loop over variables of interest and plot timerseries
for variable in ['zostoga', 'thetaoga',  # 1D ocean
                 'zos', 'hfds',  # 2D ocean
                 'rlut', 'rsdt', 'rsut', 'tas'  # 2D atmos
                ]:
    fig = fig_timeseries(variable=variable)
    plt.close()   # don't show fig

---- regrid_yearmean zostoga ----
Written regrid_yearmean/timeseries_zostoga_51x6.pdf
---- regrid_yearmean thetaoga ----
Written regrid_yearmean/timeseries_thetaoga_51x6.pdf
---- regrid_yearmean zos ----
Written regrid_yearmean/timeseries_zos_51x6.pdf
---- regrid_yearmean hfds ----
Written regrid_yearmean/timeseries_hfds_51x6.pdf
---- regrid_yearmean rlut ----
Written regrid_yearmean/timeseries_rlut_51x6.pdf
---- regrid_yearmean rsdt ----
Written regrid_yearmean/timeseries_rsdt_51x6.pdf
---- regrid_yearmean rsut ----
Written regrid_yearmean/timeseries_rsut_51x6.pdf
---- regrid_yearmean tas ----
Written regrid_yearmean/timeseries_tas_51x6.pdf
CPU times: user 2min 18s, sys: 1min 22s, total: 3min 40s
Wall time: 16min 1s


In [8]:
! date

Thu Feb 10 19:31:54 +08 2022
