In [1]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import dask
import pop_tools
from functools import partial

dask.config.set(**{'array.slicing.split_large_chunks': True})

<dask.config.set at 0x2b6d81f02940>

In [2]:
topdir="/glade/campaign/collections/cmip/CMIP6/timeseries-cmip6/b.e21.B1850.f09_g17.CMIP6-piControl.001/ocn/proc/tseries/month_1/"

In [3]:
from dask_jobqueue import PBSCluster
from dask.distributed import Client

cluster = PBSCluster(
    cores = 1,
    memory = '50GB',
    processes = 1,
    queue = 'casper',
    local_directory = '$TMPDIR',
    resource_spec = 'select=1:ncpus=1:mem=20GB',
    project='P04010022',
    walltime='02:00:00',
    interface='ib0')

# scale up
cluster.scale(40)

# change your urls to the dask dashboard so that you can see it
dask.config.set({'distributed.dashboard.link':'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'})

# Setup your client
client = Client(cluster)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 40868 instead
  http_address["port"], self.http_server.port


In [5]:
client

0,1
Client  Scheduler: tcp://10.12.206.60:35742  Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/islas/proxy/40868/status,Cluster  Workers: 32  Cores: 32  Memory: 687.19 GB


In [6]:
dat = xr.open_mfdataset(topdir+"*.RHO.*.nc", 
                       chunks={"time":300, "z_t":60, "nlat":30, "nlon":30},
                       concat_dim='time', combine='by_coords', coords='minimal')

In [7]:
timebnds = dat.time_bound
tarea = dat.TAREA
dz = dat.dz
z_t = dat.z_t
tarea = dat.TAREA
dat = dat.RHO
diff = np.array(timebnds.isel(d2=1)) - np.array(timebnds.isel(d2=0))
diff = diff/2.
newtime = np.array(timebnds.isel(d2=0)) + diff
dat['time'] = newtime
dz['time'] = newtime
tarea['time'] = newtime
dat = dat[dat.time.dt.month == 3]
dz = dz[dz.time.dt.month == 3]
dat = dat.isel(z_t=slice(0,20))
z_t = z_t.isel(z_t=slice(0,20))
dz = dz.isel(z_t=slice(0,20))

In [8]:
dat = dat.chunk(chunks={"time":300, "z_t":60, "nlat":30, "nlon":30})
dz = dz.chunk(chunks={"time":300, "z_t":60})
tarea = tarea.chunk(chunks={"time":300, "nlat":30, "nlon":30})

In [9]:
rho = (dat*dz).sum('z_t')/dz.sum('z_t')

In [10]:
wgts = tarea
wgts = xr.where( (wgts.TLONG > 300) & (wgts.TLONG < 325), wgts, 0)
wgts = xr.where( (wgts.TLAT > 50) & (wgts.TLAT < 65), wgts, 0)

In [11]:
rho_w = rho.weighted(wgts.fillna(0))
rhom = rho_w.mean(("nlon","nlat"))

In [12]:
rhom = rhom.load()

In [14]:
rhom = rhom.rename('RHO')

In [15]:
rhom.to_netcdf("/glade/scratch/islas/python/singleforcing/DATA_SORT/cesm2_picontrol/cesm2_picontrol_rho_top203_60to35W_50to65N.nc")

In [16]:
cluster.close()