In [21]:
import xarray as xr
import numpy as np
import glob
import sys
import dask

from CASutils import averaging_utils as avg
from CASutils import calendar_utils as cal

from functools import partial

pathout="/glade/scratch/islas/python/singleforcing/DATA_SORT/cesm1_aaer/"
topdir="/glade/collections/cdg/timeseries-cmip6/"

varnames=['SALT','PD','TEMP']

In [22]:
from dask_jobqueue import PBSCluster
from dask.distributed import Client

cluster = PBSCluster(
    cores = 1,
    memory = '50GB',
    processes = 1,
    queue = 'casper',
    local_directory = '$TMPDIR',
    resource_spec = 'select=1:ncpus=1:mem=20GB',
    project='P04010022',
    walltime='01:00:00',
    interface='ib0')

# scale up
cluster.scale(20)

# change your urls to the dask dashboard so that you can see it
dask.config.set({'distributed.dashboard.link':'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'})

# Setup your client
client = Client(cluster)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 41756 instead
  http_address["port"], self.http_server.port


In [23]:
client

0,1
Client  Scheduler: tcp://10.12.206.39:37801  Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/islas/proxy/41756/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [24]:
nmems=3

In [25]:
def preprocessor(ds):
    ds = ds.sel(z_t=500)
    timebndavg = np.array(ds.time_bound,
        dtype='datetime64[s]').view('i8').mean(axis=1).astype('datetime64[s]')
    ds['time'] = timebndavg
    am = ds.groupby('time.year').mean('time')
    return am

In [26]:
for varname in varnames:
    print(varname)
    memstr = [ str(i).zfill(3) for i in np.arange(1,nmems+1,1)]
    filelist = [ sorted(glob.glob(topdir+"b.e11.B1850LENS.f09_g16.aaer."+imem+"/ocn/proc/tseries/month_1/*."+varname+".*.nc"))+
                 sorted(glob.glob(topdir+"b.e11.B1850LENS.f09_g16.aaer.RCP85."+imem+"/ocn/proc/tseries/month_1/*."+varname+".*.nc")) for imem in memstr ]
    dat = [ xr.open_mfdataset(filelist[i], concat_dim=['time'], 
                              preprocess = partial(preprocessor))[varname] for i in np.arange(0,len(filelist),1) ]
    dat = xr.concat(dat, dim='M')
    dat.load().to_netcdf(pathout+"AAER_"+varname+"_am.nc")

SALT
PD
TEMP


In [27]:
cluster.close()

In [10]:
print(topdir+"b.e11.B1850LENS.f09_g16.aaer.001/ocn/proc/tseries/month_1/")

/glade/collections/cdg/timeseries-cmip6/b.e11.B1850LENS.f09_g16.aaer.001/ocn/proc/tseries/month_1/


In [13]:
print(memstr)

['000', '001', '002', '003']
