# CLM5-BGC OAAT parameter ensemble
- generates paramfiles and namelist_mods for the OAAT segment of the CLM5PPE
- leverages ppe-tools to write out the files
- written with community-derived ensemble ranges:
    -https://docs.google.com/spreadsheets/d/1OtkaO_uAmafWKR9kgtRC2Ge6d6fkhymngSpben5SJ_Q/
- Katie Dagon (kdagon@ucar.edu) and Daniel Kennedy (djk2120@ucar.edu)

In [8]:
import numpy as np
from ppe_tools import Ensemble,Member,ParamInfo
from ppe_tools.utils import get_default, parse_val
import xarray as xr
import os
import pandas as pd
import glob

In [9]:
#data_url = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQs413GtLXtHVDCqEPgAwn4BbDjoWmV7uFqOAWH4mgpxXoVfN6ijnJdhyRgLkV-n2eU-sSQush4CzYU/pub?output=csv'
#cmd = 'curl -L '+data_url+' > oaat_jun2020.csv' # need to add -L option to force redirects
#os.system(cmd)

0

In [3]:
# read in the google spreadsheet and and organize it into a dataframe
csvfile = 'oaat_jun2020.csv'
data = pd.read_csv(csvfile,header=0,skiprows=[1]) # modify read_csv to account for header spanning 2 rows
included = data['final'] == 1 # final flag
params_full = data.loc[included,['name','location','min','max','flag','pft_mins','pft_maxs']]
params = params_full.reset_index(drop=True) # reset indexing and get rid of excel row number

In [11]:
#ingest some information from the last ensemble
keyfile = '/glade/scratch/djk2120/PPEn08/OAAT_apr2020.csv'
data    = pd.read_csv(keyfile,header=None)
keys    = data[0].values
pnames  = data[1].values
minmaxs = data[2].values
maxkey=0
for key in keys:
    maxkey=max(maxkey,int(key[-4:]))
nextnum = maxkey

In [16]:
#create dict for the new parameters
oaats = {}
names = params['name']
flags = params['flag']
for name,flag in zip(names,flags):
    if name not in pnames:
        if not pd.notnull(flag):
            ix = params['name']==name
            minval = params['min'][ix].values[0]
            maxval = params['max'][ix].values[0]
            pftmin = params['pft_mins'][ix].values[0]
            pftmax = params['pft_maxs'][ix].values[0]
            thisloc = params['location'][ix].values[0]

            needs_pft = (minval=='pft')
            if needs_pft:
                thismin = np.fromstring(pftmin, dtype='float', sep=',')
            elif 'percent' in minval:
                thismin = minval
            else:
                thismin = np.array(float(minval))

            needs_pft = (maxval=='pft')
            if needs_pft:
                thismax = np.fromstring(pftmax, dtype='float', sep=',')
            elif 'percent' in maxval:
                thismax = maxval
            else:
                thismax = np.array(float(maxval))

            oaats[name]={'min':thismin,'max':thismax,'loc':thisloc}


In [18]:
oaats

{'om_frac_sf': {'min': '20percent', 'max': '20percent', 'loc': 'P'},
 'slopebeta': {'min': array(-5.), 'max': array(-1.), 'loc': 'P'},
 'slopemax': {'min': array(0.01), 'max': array(1.), 'loc': 'P'},
 'pc': {'min': array(0.4), 'max': array(0.69), 'loc': 'P'},
 'mu': {'min': array(0.131), 'max': array(0.24), 'loc': 'P'},
 'C2_liq_Brun89': {'min': '20percent', 'max': '20percent', 'loc': 'P'}}

In [4]:
#instantiate the Ensemble object
basefile = '/glade/p/cesm/cseg/inputdata/lnd/clm2/paramdata/ctsm51_params.c210507.nc'
pdir = '/glade/scratch/djk2120/PPEn11/paramfiles/'
ndir = '/glade/scratch/djk2120/PPEn11/namelist_mods/'
x    = Ensemble(basefile,pdir,ndir)

In [20]:
#add the new OAATS
prefix = 'OAAT'
x.add_oaats(oaats,prefix,nextnum,skipBFB=True)

pc-min looks BFB.... skipping


In [65]:
csvfile='/glade/scratch/djk2120/PPEn11/OAAT_jun2020.csv'
x.write(csvfile=csvfile)

In [12]:
#here I will replace (not append) paramfiles
#  therefore I need to diagnose the correct key
fixmes = ['medlynslope']
loc = 'P'
ds  = xr.open_dataset(basefile)
lndin = ''
for fixme in fixmes:
    ix = params['name']==fixme
    for minmax in ['min','max']:
        thisval = params[minmax][ix].values[0]


        needs_pft = (thisval=='pft')
        if needs_pft:
            pftval = params['pft_'+minmax+'s'][ix].values[0]
            newval = np.fromstring(pftval, dtype='float', sep=',')
        elif 'percent' in minval:
            newval = thisval
        else:
            newval = np.array(float(thisval))


        
        ix1 = pnames==fixme
        ix2 = minmaxs==minmax
        ix3 = np.logical_and(ix1,ix2)
        pname = keys[ix3][0]

        defval    = get_default(fixme, loc, ds, lndin)
        value     = parse_val(loc,defval,newval)
        paraminfo = ParamInfo(fixme, loc, defval, value)
        paramdict = {fixme:paraminfo}
        member = Member(pname,paramdict,basefile,minmax)
        
        x.add_member(member)
    


### add in sand_pf and clay_pf

In [4]:
#create dict for the new parameters
oaats = {}
names = ['sand_pf','clay_pf']

for name in names:
    minval = -20
    maxval = 20
    thismin = np.array(float(minval))
    thismax = np.array(float(maxval))

    oaats[name]={'min':thismin,'max':thismax,'loc':'P'}

In [6]:
#instantiate the Ensemble object
basefile = '/glade/p/cesm/cseg/inputdata/lnd/clm2/paramdata/ctsm51_params.c210507.nc'
pdir     = '/glade/scratch/djk2120/PPEn11/paramfiles/'
ndir     = '/glade/scratch/djk2120/PPEn11/namelist_mods/'
x    = Ensemble(basefile,pdir,ndir)

In [10]:
x.add_oaats(oaats,'OAAT',nextk)

In [12]:
x.write(csvfile='/glade/scratch/djk2120/PPEn11/sand_clay.csv')

### create a single paramfile to test cn_s1_bgc

In [66]:
basefile = '/glade/p/cesm/cseg/inputdata/lnd/clm2/paramdata/ctsm51_params.c210507.nc'
pdir = '/glade/scratch/djk2120/PPEn10/paramfiles/'
ndir = '/glade/scratch/djk2120/PPEn10/namelist_mods/'
x = Ensemble(basefile,pdir,ndir)

In [67]:
thisp = 'cn_s1_bgc'
ds = xr.open_dataset(basefile)
defval = ds[thisp]
defval

In [68]:
prtb  = 'max'
ix = np.array([thisp in p for p in params['name']])
val = params[prtb][ix].values[0]
thisval = np.array(float(val))
loc     = params['location'][ix].values[0]
value   = utils.parse_val(loc,defval,thisval)

In [69]:
ct=267
pname  = 'OAAT'+str(ct).zfill(4)
paraminfo = ParamInfo(thisp, loc, defval, value)
paramdict = {thisp:paraminfo}
member = Member(pname,paramdict,basefile,'max')
x.add_member(member)

In [70]:
x.write()

### create a dictionary of all the oaat perturbation
 - parameter matched with its min and max values

In [9]:
oaats = {}
names = params['name']
flags = params['flag']

for name,flag in zip(names,flags):
    if pd.notnull(flag):
        if flag=='KCN':
            flag=False
        else:
            flag=True
    else: flag=False
    if not flag:
        ix = params['name']==name
        minval = params['min'][ix].values[0]
        maxval = params['max'][ix].values[0]
        pftmin = params['pft_mins'][ix].values[0]
        pftmax = params['pft_maxs'][ix].values[0]
        thisloc = params['location'][ix].values[0]
        
        needs_pft = (minval=='pft')
        if needs_pft:
            thismin = np.fromstring(pftmin, dtype='float', sep=',')
        elif 'percent' in minval:
            thismin = minval
        else:
            thismin = np.array(float(minval))

        needs_pft = (maxval=='pft')
        if needs_pft:
            thismax = np.fromstring(pftmax, dtype='float', sep=',')
        elif 'percent' in maxval:
            thismax = maxval
        else:
            thismax = np.array(float(maxval))

        oaats[name]={'min':thismin,'max':thismax,'loc':thisloc}

In [22]:
if np.array([]).shape:
    'doesnt print'
else:
    print('0d array')

In [17]:
params['min'][ix].values.shape

(1,)

### create the ensemble object and populate it with the oaats

In [23]:
basefile = '/glade/p/cgd/tss/people/oleson/modify_param/ctsm51_params.c210217_kwo.c210222.nc'
pdir = '/glade/u/home/djk2120/ppetest/paramfiles/'
ndir = '/glade/u/home/djk2120/ppetest/namelist_mods/'
x = Ensemble(basefile,pdir,ndir)

In [30]:
[*oaats.keys()][0]

'taulnir'

In [10]:
prefix = 'OAAT'
nextnum = 1
x.add_oaats(oaats,prefix,nextnum,skipBFB=True)

a_coef-max looks BFB.... skipping
a_exp-min looks BFB.... skipping
lai_dl-min looks BFB.... skipping
interception_fraction-max looks BFB.... skipping
aq_sp_yield_min-max looks BFB.... skipping
n_baseflow-min looks BFB.... skipping
accum_factor-min looks BFB.... skipping
wind_snowcompact_fact-min looks BFB.... skipping
theta_ip-min looks BFB.... skipping
fnr-max looks BFB.... skipping
cn_s1_bgc-min looks BFB.... skipping
decomp_depth_efolding-max looks BFB.... skipping


In [11]:
x.nmemb

378

### add in the special cases

In [7]:
#these params move in unison
flags = params['flag']
ix = pd.notnull(params['flag'])
uflags = pd.unique(flags[ix])
sgns = {'min':'-','max':''}
for uflag in uflags:
    names = params['name'][flags==uflag]
    for minmax in ['min','max']:
        mf = {}
        for name in names:
            ix = params['name']==name

            thisval = params[minmax][ix].values[0]
            pftval  = params['pft_'+minmax+'s'][ix].values[0]
            thisloc = params['location'][ix].values[0]

            needs_pft = (thisval=='pft')
            if needs_pft:
                val = np.fromstring(pftval, dtype='float', sep=',')
            elif 'percent' in thisval:
                val = sgns[minmax]+thisval
            else:
                val = np.array(float(thisval))

            mf[name] = {'value':val,'loc':thisloc,'minmax':minmax,'flag':uflag}

        x.add_mf(mf,'OAAT')


In [8]:
x.nmemb

388

In [25]:
csvfile = '/glade/u/home/djk2120/ppetest/OAAT_apr2020.csv'
default_key='OAAT0000'
x.write(default_key,csvfile)

### spot check a few variables?

In [20]:
#should only have a min
thisvar='a_coef'
for member in x.members:
    if thisvar in member.paramdict:
        f = pdir+member.name+'.nc'
        params = [*member.paramdict]
        if len(params)==1:
            param=params[0]
        else:
            param=member.flag
        print(member.name,param,member.minmax)
        p = xr.open_dataset(f)
        for var in member.paramdict:
            print(p[var])

OAAT0029 a_coef min
<xarray.DataArray 'a_coef' ()>
array(0.1)
Attributes:
    long_name:  Drag coeff. under less dense canopy
    units:      unitless


In [16]:
#handling namelist?
thisvar='interception_fraction'
for key in ['OAAT'+str(i).zfill(4) for i in range(389)]:
    nlfile = ndir+key+'.txt'
    with open(nlfile,"r") as file:
        ff = file.read()
    if thisvar in ff:
        print(ff)

! user_nl_clm namelist options written by generate_params:
interception_fraction=0.5



In [17]:
#handling PFT?
thisvar='dleaf'
for member in x.members:
    if thisvar in member.paramdict:
        f = pdir+member.name+'.nc'
        params = [*member.paramdict]
        if len(params)==1:
            param=params[0]
        else:
            param=member.flag
        print(member.name,param,member.minmax)
        p = xr.open_dataset(f)
        for var in member.paramdict:
            print(p[var])

OAAT0021 dleaf min
<xarray.DataArray 'dleaf' (pft: 79)>
array([0.      , 0.000216, 0.000216, 0.00072 , 0.0081  , 0.0081  , 0.0081  ,
       0.0081  , 0.0081  , 0.0081  , 0.000405, 0.000162, 0.000144, 0.000144,
       0.000144, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162])
Coordinates:
    pftname  (pft) |S40 ...
Dimensions without coordinat

In [18]:
#handling PFTxsegment?
thisvar='ck'
for member in x.members:
    if thisvar in member.paramdict:
        f = pdir+member.name+'.nc'
        params = [*member.paramdict]
        if len(params)==1:
            param=params[0]
        else:
            param=member.flag
        print(member.name,param,member.minmax)
        p = xr.open_dataset(f)
        for var in member.paramdict:
            print(p[var])

OAAT0165 ck min
<xarray.DataArray 'ck' (segment: 4, pft: 79)>
array([[3., 3., 3., ..., 3., 3., 3.],
       [3., 3., 3., ..., 3., 3., 3.],
       [3., 3., 3., ..., 3., 3., 3.],
       [3., 3., 3., ..., 3., 3., 3.]])
Coordinates:
    pftname  (pft) |S40 ...
  * segment  (segment) |S40 b'sunlit                                  ' ... b...
Dimensions without coordinates: pft
Attributes:
    units:      unitless
    long_name:  weibull curve shape parameter
OAAT0166 ck max
<xarray.DataArray 'ck' (segment: 4, pft: 79)>
array([[5.15, 5.15, 5.15, ..., 5.15, 5.15, 5.15],
       [5.15, 5.15, 5.15, ..., 5.15, 5.15, 5.15],
       [5.15, 5.15, 5.15, ..., 5.15, 5.15, 5.15],
       [5.15, 5.15, 5.15, ..., 5.15, 5.15, 5.15]])
Coordinates:
    pftname  (pft) |S40 ...
  * segment  (segment) |S40 b'sunlit                                  ' ... b...
Dimensions without coordinates: pft
Attributes:
    units:      unitless
    long_name:  weibull curve shape parameter


In [22]:
#should have kc_nonmyc min/max AND KCN min/max
thisvar='kc_nonmyc'
for member in x.members:
    if thisvar in member.paramdict:
        f = pdir+member.name+'.nc'
        params = [*member.paramdict]
        if len(params)==1:
            param=params[0]
        else:
            param=member.flag
        print(member.name,param,member.minmax)
        p = xr.open_dataset(f)
        for var in member.paramdict:
            print(p[var])

OAAT0193 kc_nonmyc min
<xarray.DataArray 'kc_nonmyc' (pft: 79)>
array([0.    , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.0072, 0.072 , 0.072 ,
       0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.72  , 0.072 , 0.072 , 0.72  ,
       0.72  , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 ,
       0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 ,
       0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 ,
       0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 ,
       0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 , 0.072 ,
       0.072 , 0.072 , 0.072 , 0.072 , 0.72  , 0.72  , 0.072 , 0.072 , 0.072 ,
       0.072 , 0.72  , 0.72  , 0.72  , 0.72  , 0.072 , 0.072 ])
Coordinates:
    pftname  (pft) |S40 ...
Dimensions without coordinates: pft
Attributes:
    long_name:  Constant relating root C to non-mycorrhizal root active uptak...
    units:      gC/m3
OAAT0194 kc_nonmyc max
<xarray.DataArray 'kc_non