In [1]:
import glob

from ecgtools import Builder
from ecgtools.parsers.cesm import parse_cesm_history, parse_cesm_timeseries

Let's see what is available for the new model configuration WACCM6-MA-1deg

In [2]:
glob.glob('/glade/campaign/cesm/development/wawg/WACCM6-MA-1deg/*/')

['/glade/campaign/cesm/development/wawg/WACCM6-MA-1deg/b.e21.BWma1850.f09_g17.release-cesm2.1.3.c20200918/',
 '/glade/campaign/cesm/development/wawg/WACCM6-MA-1deg/b.e21.BWmaHIST.f09_g17.release-cesm2.1.3.WACCM-MA-1deg.003/',
 '/glade/campaign/cesm/development/wawg/WACCM6-MA-1deg/b.e21.BWmaCO2x4.f09_g17.release-cesm2.1.3.WACCM-MA-1deg.001/',
 '/glade/campaign/cesm/development/wawg/WACCM6-MA-1deg/b.e21.BWmaHIST.f09_g17.release-cesm2.1.3.WACCM-MA-1deg.002/',
 '/glade/campaign/cesm/development/wawg/WACCM6-MA-1deg/b.e21.BWmaHIST.f09_g17.release-cesm2.1.3.WACCM-MA-1deg.001/',
 '/glade/campaign/cesm/development/wawg/WACCM6-MA-1deg/b.e21.BWSSP245.f09_g17.release-cesm2.1.3.WACCM-MA-1deg.001/']

Build a catalog of the 1 degree WACCM6 MA chemistry timeseries output for all these cases

In [31]:
# esm_dir = "/glade/campaign/cesm/development/wawg/WACCM6-MA-1deg/b.e21.BWma1850.f09_g17.release-cesm2.1.3.c20200918/atm/proc/tseries/month_1/"
esm_dir = "/glade/campaign/cesm/development/wawg/WACCM6-MA-1deg/"

b = Builder(
    # Directory with the output
    esm_dir,
    # Depth of 1 since we are sending it to the case output directory
    depth=5,
    # Exclude the other components, hist, and restart directories
    # and pick out the proc timeseries for 1- and 5-day and monthly data
    exclude_patterns=["*/cpl/*", 
                      "*/esp/*",
                      "*/glc/*",
                      "*/ice/*",
                      "*/lnd/*", 
                      "*/logs/*",
                      "*/ocn/*",
                      "*/rest/*",
                      "*/rof/*", 
                      "*/wav/*", 
                      "*/atm/h6/*",
                      "*/atm/hist/*",
                      "*/atm/proc/h*",
                      "*/atm/proc/*_RESTOM.nc"],
    # Number of jobs to execute - should be equal to # threads you are using
    njobs=1
)

In [32]:
b = b.build(parsing_func=parse_cesm_timeseries)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 1710 out of 1710 | elapsed:  2.2min finished


check for invalid assets

In [33]:
b.invalid_assets

Save the catalog - creates a csv and json file

In [34]:
catalog_dir = "/glade/work/marsh/intake-esm-catalogs/"

b.save(
    # File path - could save as .csv (uncompressed csv) or .csv.gz (compressed csv)
    catalog_dir+"WACCM6-MA-1deg.csv",
    # Column name including filepath
    path_column_name='path',
    # Column name including variables
    variable_column_name='variable',
    # Data file format - could be netcdf or zarr (in this case, netcdf)
    data_format="netcdf",
    # Which attributes to groupby when reading in variables using intake-esm
    groupby_attrs=["component", "stream", "case"],
    # Aggregations which are fed into xarray when reading in data using intake
    aggregations=[
        {'type': 'union', 'attribute_name': 'variable'},
        {
            'type': 'join_existing',
            'attribute_name': 'time_range',
            'options': {'dim': 'time', 'coords': 'minimal', 'compat': 'override'},
        },
    ],
)

Saved catalog location: /glade/work/marsh/intake-esm-catalogs/WACCM6-MA-1deg.json and /glade/work/marsh/intake-esm-catalogs/WACCM6-MA-1deg.csv


In [35]:
glob.glob(catalog_dir+'*')

['/glade/work/marsh/intake-esm-catalogs/waccm_1deg_MA_BWma1850.json',
 '/glade/work/marsh/intake-esm-catalogs/waccm_1deg_MA_BWma1850.csv',
 '/glade/work/marsh/intake-esm-catalogs/WACCM6-MA-1deg.csv',
 '/glade/work/marsh/intake-esm-catalogs/WACCM6-MA-1deg.json']

In [36]:
b.filelist?

[0;31mType:[0m        list
[0;31mString form:[0m [PosixPath('/glade/campaign/cesm/development/wawg/WACCM6-MA-1deg/b.e21.BWmaCO2x4.f09_g17.release- <...> /b.e21.BWmaCO2x4.f09_g17.release-cesm2.1.3.WACCM-MA-1deg.001.cam.h6.VTHzm.00110101-00201231.nc')]
[0;31mLength:[0m      1710
[0;31mDocstring:[0m  
Built-in mutable sequence.

If no argument is given, the constructor creates a new empty list.
The argument must be an iterable if specified.
