In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import glob
import matplotlib
import matplotlib.pyplot as plt
import cftime
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client
import statsmodels.api as sm

In [24]:
def get_ensemble(files,data_vars,p=True):

    def preprocess(ds):
        return ds[data_vars]

    #read in the dataset
    ds = xr.open_mfdataset(files,combine='nested',concat_dim='ens',
                           parallel=p,preprocess=preprocess)

    #fix up time dimension
    htape='h0'
    #if htape=='h0' or htape=='h1':
    #    ds['time'] = xr.cftime_range(str(2005),periods=len(ds.time),freq='MS') #fix time bug

    #specify extra variables    
    if htape=='h0':
        extras     = ['grid1d_lat','grid1d_lon']
    elif htape=='h1':
        extras     = ['pfts1d_lat','pfts1d_lon','pfts1d_wtgcell','pfts1d_itype_veg']
    
    #add in some extra variables
    ds0 = xr.open_dataset(files[0])
    for extra in extras:
        ds[extra]=ds0[extra]

    return ds

In [10]:
# Setup your PBSCluster
ncores=1
nmem='25GB'
cluster = PBSCluster(
    cores=ncores, # The number of cores you want
    memory=nmem, # Amount of memory
    processes=1, # How many processes
    queue='casper', # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
    local_directory='$TMPDIR', # Use your local directory
    resource_spec='select=1:ncpus='+str(ncores)+':mem='+nmem, # Specify resources
    project='P93300641', # Input your project ID here
    walltime='03:00:00', # Amount of wall time
    interface='ib0', # Interface to use
)

# Scale up
cluster.scale(20)

# Setup your client
client = Client(cluster)

In [3]:
#fetch the paraminfo
csv = '/glade/scratch/djk2120/PPEn11/SP_bfb_test.csv' 
paramkey = pd.read_csv(csv)

#fetch the sparsegrid landarea
la_file = '/glade/scratch/djk2120/PPEn08/sparsegrid_landarea.nc'
la = xr.open_dataset(la_file).landarea  #km2

In [79]:
kdir = '/glade/scratch/oleson/'
keys  = []; params = []
files = []
for key,param in zip(paramkey.key,paramkey.param):
    thisdir = kdir+'PPEn11_CTL2010SP_'+key+'/run/'
    rfile   = glob.glob(thisdir+'*.clm2.r.*.nc')
    if len(rfile)>0:
        keys.append(key)
        params.append(param)
        h0 = glob.glob(thisdir+'*.clm2.h0.*.nc')
        files.append(h0[0])
    

In [25]:
datavars = ['FPSN','EFLX_LH_TOT','FSA','TV','TSOI_10CM','SOILWATER_10CM']
ds =get_ensemble(files,datavars)

In [74]:
nens = len(ds.ens)
bfb_all = np.zeros(nens)+1
for f in datavars:

    x0  = ds[v].sel(ens=0)
    isnan = np.tile(np.isnan(x0),[nens,1,1])
    bfb_grid = (ds[v]==x0).values
    bfb_grid[isnan]=1                                 #ignore nans
    bfb = bfb_grid.sum(axis=(1,2))==24*400            #all gridcells / all times must be BFB
    print(f,bfb.sum())
    
    bfb_all = bfb_all*bfb

FPSN 75
EFLX_LH_TOT 75
FSA 75
TV 75
TSOI_10CM 75
SOILWATER_10CM 75


In [80]:
np.array(params)[bfb]

array(['default', 'grperc', 'br_mr', 'lmr_intercept_atkin',
       'FUN_fracfixers', 'fun_cn_flex_a', 'fun_cn_flex_b',
       'fun_cn_flex_c', 'kc_nonmyc', 'kn_nonmyc', 'akc_active',
       'akn_active', 'ekc_active', 'ekn_active', 'stem_leaf',
       'croot_stem', 'flivewd', 'frootcn', 'leaf_long', 'lwtop_ann',
       'ndays_off', 'ndays_on', 'tau_cwd', 'tau_l1', 'tau_l2_l3',
       'tau_s1', 'tau_s2', 'tau_s3', 'q10_mr', 'minpsi_hr', 'maxpsi_hr',
       'rf_l1s1_bgc', 'rf_l2s1_bgc', 'rf_l3s2_bgc', 'rf_s2s1_bgc',
       'rf_s2s3_bgc', 'rf_s3s1_bgc', 'cn_s3_bgc', 'decomp_depth_efolding',
       'max_altdepth_cryoturbation', 'max_altmultiplier_cryoturb',
       'cryoturb_diffusion_k', 'som_diffus', 'k_nitr_max_perday',
       'denitrif_respiration_coefficient',
       'denitrif_respiration_exponent',
       'denitrif_nitrateconc_coefficient',
       'denitrif_nitrateconc_exponent', 'r_mort', 'fsr_pft', 'fd_pft',
       'prh30', 'ignition_efficiency', 'cc_dstem', 'cc_leaf', 'cc_lstem',
 

In [81]:
np.array(params)[~bfb]

array(['taulnir', 'taulvis', 'tausnir', 'tausvis', 'rholnir', 'rholvis',
       'rhosnir', 'rhosvis', 'xl', 'displar', 'dleaf', 'z0mr', 'csoilc',
       'cv', 'a_coef', 'a_exp', 'zlnd', 'zsno', 'd_max',
       'frac_sat_soil_dsl_init', 'lai_dl', 'z_dl', 'zetamaxstable',
       'wind_min', 'tkd_sand', 'tkd_clay', 'tkd_om', 'tkm_om', 'pd',
       'csol_om', 'csol_sand', 'csol_clay', 'bsw_sf', 'hksat_sf',
       'sucsat_sf', 'watsat_sf', 'baseflow_scalar',
       'maximum_leaf_wetted_fraction', 'interception_fraction',
       'aq_sp_yield_min', 'fff', 'liq_canopy_storage_scalar',
       'snow_canopy_storage_scalar', 'e_ice', 'n_baseflow', 'n_melt_coef',
       'accum_factor', 'eta0_vionnet', 'drift_gs', 'ssi', 'wimp',
       'upplim_destruct_metamorph', 'wind_snowcompact_fact', 'rho_max',
       'tau_ref', 'snowcan_unload_wind_fact', 'snowcan_unload_temp_fact',
       'snw_rds_refrz', 'scvng_fct_mlt_sf', 'ceta', 'medlynslope',
       'medlynintercept', 'fnps', 'theta_psii', 'theta_ip', 't

In [82]:
(~bfb).sum()

114