In [2]:
import glob

from ecgtools import Builder
from ecgtools.parsers.cesm import parse_cesm_history, parse_cesm_timeseries

Let's see what is available for
/glade/campaign/cesm/development/wawg/WACCM6-TSMLT-GEO/SAI1/b.e21.BW.f09_g17.SSP245-TSMLT-GAUSS-DEFAULT.005/atm/proc/tseries/:
day_1  day_5  hour_1  hour_3  month_1

In [4]:
glob.glob('/glade/campaign/cesm/development/wawg/WACCM6-TSMLT-GEO/SAI1/*')

['/glade/campaign/cesm/development/wawg/WACCM6-TSMLT-GEO/SAI1/b.e21.BW.f09_g17.SSP245-TSMLT-GAUSS-DEFAULT.004',
 '/glade/campaign/cesm/development/wawg/WACCM6-TSMLT-GEO/SAI1/b.e21.BW.f09_g17.SSP245-TSMLT-GAUSS-DEFAULT.003',
 '/glade/campaign/cesm/development/wawg/WACCM6-TSMLT-GEO/SAI1/b.e21.BW.f09_g17.SSP245-TSMLT-GAUSS-DEFAULT.005',
 '/glade/campaign/cesm/development/wawg/WACCM6-TSMLT-GEO/SAI1/b.e21.BW.f09_g17.SSP245-TSMLT-GAUSS-DEFAULT.001',
 '/glade/campaign/cesm/development/wawg/WACCM6-TSMLT-GEO/SAI1/b.e21.BW.f09_g17.SSP245-TSMLT-GAUSS-DEFAULT.002']

Build a catalog of the 1 degree WACCM6 MA chemistry timeseries output for all these cases

In [5]:
esm_dir = "/glade/campaign/cesm/development/wawg/WACCM6-TSMLT-GEO/SAI1/"

b = Builder(
    # Directory with the output
    esm_dir,
    # Depth of 1 since we are sending it to the case output directory
    depth=4,
    # Exclude the other components, hist, and restart directories
    # and pick out the proc timeseries for 1- and 5-day and monthly data
    exclude_patterns=["*/cpl/*", 
                      "*/esp/*",
                      "*/glc/*",
                      "*/ice/*",
                      "*/lnd/*", 
                      "*/logs/*",
                      "*/ocn/*",
                      "*/rest/*",
                      "*/rof/*", 
                      "*/wav/*",
                      "*/controller/*"],
    # Number of jobs to execute - should be equal to # threads you are using
    njobs=1
)

In [6]:
b = b.build(parsing_func=parse_cesm_timeseries)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  88 out of  88 | elapsed:    0.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 8634 out of 8634 | elapsed: 12.5min finished


check for invalid assets

In [7]:
b.invalid_assets.values

array([], shape=(0, 0), dtype=float64)

Save the catalog - creates a csv and json file

In [8]:
catalog_dir = "/glade/work/marsh/intake-esm-catalogs/"

b.save(
    # File path - could save as .csv (uncompressed csv) or .csv.gz (compressed csv)
    catalog_dir+"WACCM6-TSMLT-GEO-SAI1.csv",
    # Column name including filepath
    path_column_name='path',
    # Column name including variables
    variable_column_name='variable',
    # Data file format - could be netcdf or zarr (in this case, netcdf)
    data_format="netcdf",
    # Which attributes to groupby when reading in variables using intake-esm
    groupby_attrs=["component", "stream", "case"],
    # Aggregations which are fed into xarray when reading in data using intake
    aggregations=[
        {'type': 'union', 'attribute_name': 'variable'},
        {
            'type': 'join_existing',
            'attribute_name': 'time_range',
            'options': {'dim': 'time', 'coords': 'minimal', 'compat': 'override'},
        },
    ],
)

Saved catalog location: /glade/work/marsh/intake-esm-catalogs/WACCM6-TSMLT-GEO-SAI1.json and /glade/work/marsh/intake-esm-catalogs/WACCM6-TSMLT-GEO-SAI1.csv


In [None]:
glob.glob(catalog_dir+'*')

In [None]:
b.filelist?

In [7]:
import ecgtools
print(ecgtools.__version__)

2021.6.21.post2


In [None]:
from ecgtools.parsers.cesm import parse_cesm_timeseries

path = "/glade/campaign/cesm/development/wawg/WACCM6-MA-1deg/b.e21.BWSSP245.f09_g17.release-cesm2.1.3.WACCM-MA-1deg.001/atm/proc/tseries/month_1/b.e21.BWSSP245.f09_g17.release-cesm2.1.3.WACCM-MA-1deg.001.cam.h0.ACTREL.206501-209912.nc"

parse_cesm_timeseries(path)