In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import glob
import matplotlib
import matplotlib.pyplot as plt
import cftime
import dask

In [2]:
# Setup your PBSCluster
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client
ncores=1
nmem='25GB'
cluster = PBSCluster(
    cores=ncores, # The number of cores you want
    memory=nmem, # Amount of memory
    processes=1, # How many processes
    queue='casper', # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
    local_directory='$TMPDIR', # Use your local directory
    resource_spec='select=1:ncpus='+str(ncores)+':mem='+nmem, # Specify resources
    project='P93300641', # Input your project ID here
    walltime='01:00:00', # Amount of wall time
    interface='ib0', # Interface to use
)

# Scale up
cluster.scale(30)

# Setup your client
client = Client(cluster)

In [3]:
files = sorted(glob.glob('/glade/campaign/asp/djk2120/PPEn11/CTL2010/hist/*h0*'))

In [9]:
datavars = ['GPP','EFLX_LH_TOT','FSA','TV','TSOI_10CM','SOILWATER_10CM']
def preprocess(ds):
    return ds[datavars]

In [56]:
ds = xr.open_mfdataset(files,combine='nested',concat_dim='ens',
                       parallel=True,decode_times=False,preprocess=preprocess)

In [57]:
nens=len(ds.ens)
#diagnose the number of datapoints, e.g. nlat*nlon*nt
nx=1
for dim in ds.dims:
    if dim !='ens':
        nx=nx*len(ds[dim])

In [58]:
dims=np.array([dim for dim in ds.dims])
tiler=np.ones(len(dims))
tiler[dims=='ens']=nens  #need this for a nan thing below
summer=tuple(np.arange(len(dims))[dims!='ens']) #I don't want to sum over the ens dimension, see below

In [64]:

bfb_allvars = np.ones(nens)

for v in datavars:  #loop through datavars
    x0=ds[v].isel(ens=0)  ##default run

    bfb_grid = (ds[v]==x0).values  ## testing equality across the ensemble

    #ignoring nans
    isnan = np.tile(np.isnan(x0),tiler.astype(int))
    bfb_grid[isnan]=1                       

    bfbx=bfb_grid.sum(axis=summer)==nx  #which members are bfb for this datavar
    bfb_allvars = bfbx*bfb_allvars #which members are bfb for all datavars


In [None]:
##none of my simulations are bfb...

In [65]:
bfb_allvars

array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.