# produce_cmip6_summary_figs.ipynb
Plot summary figures using data produced by process_cmip6_file.ipynb.

B. S. Grandey, 2022

In [1]:
! date

Wed Nov  2 18:28:35 +08 2022


In [2]:
import cartopy.crs as ccrs
from cdo import Cdo, CDOException
import matplotlib.pyplot as plt
import pathlib
import xarray as xr

In [3]:
cdo = Cdo(tempdir='temp')

print(f'CDO version: {cdo.version()}')
print(f'cdo.py bindings version: {cdo.__version__()}')

print(f'cartopy version: {ccrs.cartopy.__version__}')
print(f'xarray version: {xr.__version__}')

CDO version: 2.0.3
cdo.py bindings version: 1.5.4
cartopy version: 0.20.2
xarray version: 0.21.1


In [4]:
# Reduce resolution of inline images (to reduce size of this notebook)
plt.rcParams['figure.dpi'] = 48.0

In [5]:
# Input base directory containing files produced by process_cmip6_file.ipynb
in_base = pathlib.Path('~/Data/p22c/CMIP6/').expanduser()
# Output base directory in which to save figures
out_base = pathlib.Path('~/Data/p22c/CMIP6_figs/').expanduser()
out_base.mkdir(exist_ok=True)

## Function to produce timeseries of fldmeans for all source-member pairs (rows) and experiments (columns) of interest for a given variable
These timeseries figures provide a quick summary of available data coverage, especially when all_source_member_pairs=True.

In [6]:
def fig_timeseries(variable='zostoga',
                   in_group_list=['regrid_missto0_yearmean_fldmean',],
                   experiments=['piControl', '1pctCO2', 'abrupt-4xCO2',
                                'historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585'],
                   all_source_member_pairs=True,  # include source-member paris that have data for any variable?
                  ):
    print(f'---- {in_group_list} {variable} ----')
    # Output directory
    out_dir = out_base.joinpath('fig_timeseries')
    out_dir.mkdir(exist_ok=True)
    # Identify available source-member pairs - using only first item in in_group_list
    if all_source_member_pairs:  # include source-member pairs that have data for any variable?
        source_member_pairs = [d.name for d in in_base.glob(f'{in_group_list[0]}/*/[!.]*_*')]
        source_member_pairs = sorted(set(source_member_pairs))  # remove duplicates
    else:  # or only include source-member pairs that have data for this specific variable?
        source_member_pairs = sorted([d.name for d in in_base.glob(f'{in_group_list[0]}/{variable}/[!.]*_*')])
    # Configure subplots
    nrows = len(source_member_pairs)
    ncols = len(experiments)
    fig, axs = plt.subplots(ncols=ncols, nrows=nrows,
                            figsize=(ncols*3, nrows*3), constrained_layout=True)
    fig.suptitle(variable, fontsize='xx-large')  # main title
    # Loop over cols and rows
    for c in range(ncols):
        exp = experiments[c]
        print(f'Column {c}: {exp}')
        ncover = 0  # counter for number of experiments with data coverage for experiment    
        for r in range(nrows):
            source_member = source_member_pairs[r]
            ax = axs[r,c]  # select axis
            # Loop of in_groups
            for in_group in in_group_list:
                # Input data directory
                in_dirs = sorted(in_base.glob(f'{in_group}/{variable}/{source_member}/*.{exp}.*'))
                if len(in_dirs) == 0:
                    continue  # skip to next column
                elif len(in_dirs) > 1:
                    print(f'Warning: {len(in_dirs)} possible input dirs for {source_member} {exp} {in_group}')
                in_dir = in_dirs[-1]
                # Input files
                in_fns = sorted([str(f) for f in in_dir.glob(f'*.nc')])
                # Has at least one input file been found?
                if len(in_fns) >= 1:
                    # Use CDO to merge files (if necessary), calculate fldmean, and use an absolute time axis
                    temp_fn = cdo.fldmean(input=f'-mergetime {" ".join(in_fns)}',
                                          options='-a -f nc', env={"SKIP_SAME_TIME": "1"})
                    # Read data using xarray
                    ds = xr.open_dataset(temp_fn)
                    # Lineplot
                    ax.plot(ds['time']/1e4, ds[variable][:,0,0], alpha=0.5,
                            label=f'{in_group} {ds[variable].mean().data:0.2e}')
                # Show mean via legend
                ax.legend(fontsize='xx-small')
                # Increase counter
                ncover += 1
            # Label row
            if c == 0:
                axs[r,0].set_ylabel(f'{source_member}\n{variable}', fontsize='x-large')
        # Label column
        axs[0,c].set_title(f'{exp} (n={ncover})', fontsize='x-large')
    # Save
    out_fn = out_dir.joinpath(f'timeseries_{variable}_{nrows}x{ncols}.pdf')
    print(f'Writing {out_fn.name}')
    fig.savefig(out_fn)
    print(f'Written {out_fn.name}')
    return fig

In [7]:
%%time
# Loop over variables of interest
for variable in ['zostoga', 'thetaoga',  # 1D ocean
                 'zos', 'hfds',  'hfgeou', 'hfcorr',  # 2D ocean
                 'rlut', 'rsdt', 'rsut', 'tas'  # 2D atmos
                ]:
    # Plot timeseries
    fig = fig_timeseries(variable=variable)
    plt.close()   # don't show fig

---- ['regrid_missto0_yearmean_fldmean'] zostoga ----
Column 0: piControl
Column 1: 1pctCO2
Column 2: abrupt-4xCO2
Column 3: historical
Column 4: ssp126
Column 5: ssp245
Column 6: ssp370
Column 7: ssp585
Writing timeseries_zostoga_73x8.pdf
Written timeseries_zostoga_73x8.pdf
---- ['regrid_missto0_yearmean_fldmean'] thetaoga ----
Column 0: piControl
Column 1: 1pctCO2
Column 2: abrupt-4xCO2
Column 3: historical
Column 4: ssp126
Column 5: ssp245
Column 6: ssp370
Column 7: ssp585
Writing timeseries_thetaoga_73x8.pdf
Written timeseries_thetaoga_73x8.pdf
---- ['regrid_missto0_yearmean_fldmean'] zos ----
Column 0: piControl
Column 1: 1pctCO2
Column 2: abrupt-4xCO2
Column 3: historical
Column 4: ssp126
Column 5: ssp245
Column 6: ssp370
Column 7: ssp585
Writing timeseries_zos_73x8.pdf
Written timeseries_zos_73x8.pdf
---- ['regrid_missto0_yearmean_fldmean'] hfds ----
Column 0: piControl
Column 1: 1pctCO2
Column 2: abrupt-4xCO2
Column 3: historical
Column 4: ssp126
Column 5: ssp245
Column 6: ssp3

## Function to produce maps of timmeans for all source-member pairs (rows) and experiments (columns) of interest for a given variable
These time-mean maps provide a quick summary of available data coverage, especially when all_source_member_pairs=True, and also provide a sanity check that the regridding has produced sensible results.

In [8]:
def fig_timmean_map(variable='zos',
                    in_group='regrid_missto0_yearmean',  # directory within in_base
                    experiments=['piControl', '1pctCO2', 'abrupt-4xCO2',
                                 'historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585'],
                    all_source_member_pairs=True,  # include source-member paris that have data for any variable?
                   ):
    print(f'---- {in_group} {variable} ----')
    # Output directory
    out_dir = out_base.joinpath(in_group)
    out_dir.mkdir(exist_ok=True)
    # Identify available source-member pairs
    if all_source_member_pairs:  # include source-member pairs that have data for any variable?
        source_member_pairs = [d.name for d in in_base.glob(f'{in_group}/*/[!.]*_*')]
        source_member_pairs = sorted(set(source_member_pairs))  # remove duplicates
    else:  # or only include source-member pairs that have data for this specific variable?
        source_member_pairs = sorted([d.name for d in in_base.glob(f'{in_group}/{variable}/[!.]*_*')])
    # Configure subplots
    nrows = len(source_member_pairs)
    ncols = len(experiments)
    fig, axs = plt.subplots(ncols=ncols, nrows=nrows,
                            figsize=(ncols*3, nrows*1.5), constrained_layout=True,
                            subplot_kw={'projection': ccrs.PlateCarree(central_longitude=180.0),
                                        'facecolor': 'lightgrey'})
    fig.suptitle(variable, fontsize='xx-large')  # main title
    # Loop over cols and rows
    for c in range(ncols):
        exp = experiments[c]
        print(f'Column {c}: {exp}')
        ncover = 0  # counter for number of experiments with data coverage for experiment    
        for r in range(nrows):
            source_member = source_member_pairs[r]
            ax = axs[r,c]  # select axis
            # Input data directory
            in_dirs = sorted(in_base.glob(f'{in_group}/{variable}/{source_member}/*.{exp}.*'))
            if len(in_dirs) == 0:
                continue  # skip to next column
            elif len(in_dirs) > 1:
                print(f'Warning: {len(in_dirs)} possible input directories found for {source_member} {exp}')
            in_dir = in_dirs[-1]
            # Input files
            in_fns = sorted([str(f) for f in in_dir.glob(f'*.nc')])
            # Has at least one input file been found?
            if len(in_fns) >= 1:
                # Use CDO to merge files (if necessary) and calculate timmean
                temp_fn = cdo.timmean(input=f'-mergetime {" ".join(in_fns)}',
                                      options='-a -f nc', env={"SKIP_SAME_TIME": "1"})
                # Read data using xarray
                ds = xr.open_dataset(temp_fn)
                # Plot map
                p = ds[variable].isel(time=0).plot(robust=True,
                                                   cbar_kwargs={'label': None},
                                                   ax=ax, transform=ccrs.PlateCarree())
                ax.coastlines()
                # Increase counter
                ncover += 1
            # Label row
            if c == 0:
                ax.text(-190, 0, source_member, rotation=90, ha='center', va='center', fontsize='x-small')
            # Label column
            if r == 0:
                ax.set_title(f'{exp} (n={ncover})', fontsize='x-large')
            else:
                ax.set_title(None)
    # Save
    out_fn = out_dir.joinpath(f'timmean_map_{variable}_{nrows}x{ncols}.png')
    print(f'Writing {in_group}/{out_fn.name}')
    fig.savefig(out_fn, transparent=False)
    print(f'Written {in_group}/{out_fn.name}')
    return fig

In [9]:
%%time
# Loop over variables of interest and plot timmean maps
for variable in ['zos', 'hfds', 'hfgeou', 'hfcorr',  # 2D ocean
                 'rlut', 'rsdt', 'rsut', 'tas'  # 2D atmos
                ]:
    fig = fig_timmean_map(variable=variable)
    plt.close()   # don't show fig

---- regrid_missto0_yearmean zos ----
Column 0: piControl
Column 1: 1pctCO2
Column 2: abrupt-4xCO2
Column 3: historical
Column 4: ssp126
Column 5: ssp245
Column 6: ssp370
Column 7: ssp585
Writing regrid_missto0_yearmean/timmean_map_zos_73x8.png
Written regrid_missto0_yearmean/timmean_map_zos_73x8.png
---- regrid_missto0_yearmean hfds ----
Column 0: piControl
Column 1: 1pctCO2
Column 2: abrupt-4xCO2
Column 3: historical
Column 4: ssp126
Column 5: ssp245
Column 6: ssp370
Column 7: ssp585
Writing regrid_missto0_yearmean/timmean_map_hfds_73x8.png
Written regrid_missto0_yearmean/timmean_map_hfds_73x8.png
---- regrid_missto0_yearmean hfgeou ----
Column 0: piControl
Column 1: 1pctCO2
Column 2: abrupt-4xCO2
Column 3: historical
Column 4: ssp126
Column 5: ssp245
Column 6: ssp370
Column 7: ssp585
Writing regrid_missto0_yearmean/timmean_map_hfgeou_73x8.png
Written regrid_missto0_yearmean/timmean_map_hfgeou_73x8.png
---- regrid_missto0_yearmean hfcorr ----
Column 0: piControl
Column 1: 1pctCO2
Col

In [10]:
! date

Wed Nov  2 19:24:27 +08 2022
