In [1]:
import pandas as pd
import os
from pyDOE import *
from scipy.io import netcdf as nc
import xarray as xr

## Download latest version of params file from google drive
* requires 'publishing' the google drive spreadsheet
* file > publish to web
* then it can be set up to continuously publish the spreadsheet to a stable url (with some latency, maybe 1-2 minutes)
* note that the first tab must be the sheet where the relevant information is located

In [2]:
data_url = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQs413GtLXtHVDCqEPgAwn4BbDjoWmV7uFqOAWH4mgpxXoVfN6ijnJdhyRgLkV-n2eU-sSQush4CzYU/pub?output=csv'
#cmd = 'curl '+data_url+' > params.csv'
cmd = 'curl -L '+data_url+' > params.csv' # need to add -L option to force redirects
os.system(cmd)

0

## Read in csv data, filtering by the "include" column

In [3]:
#data     = pd.read_csv('params.csv')
data     = pd.read_csv('params.csv',header=0,skiprows=[1]) # modify read_csv to account for header spanning 2 rows
included = data['include']==1
params_full   = data.loc[included,['name','location','min','max','pft_mins','pft_maxs']]

# reset indexing and get rid of excel row number
params = params_full.reset_index(drop=True)

params

Unnamed: 0,name,location,min,max,pft_mins,pft_maxs
0,dleaf,P,pft,pft,"0,0.000216,0.000216,0.00072,0.0081,0.0081,0.00...","0,0.00108,0.00108,0.0036,0.0567,0.0567,0.243,0..."
1,baseflow_scalar,N,0.0005,0.1,,
2,maximum_leaf_wetted_fraction,N,0.01,0.5,,
3,fff,P,0.02,5,,
4,medlynslope,P,pft,pft,"9,1.29,1.29,1.29,1.63,1.63,3.19,3.19,3.19,2.25...","9,4.7,4.7,4.7,4.59,4.59,5.11,5.11,5.11,9.27,9...."
5,jmaxb1,N,0.05,0.25,,
6,kmax,P,pft,pft,"0,2.00E-09,2.00E-09,2.00E-09,5.00E-09,5.00E-09...","0,3.00E-08,3.00E-08,3.00E-08,3.00E-08,3.00E-08..."
7,FUN_fracfixers,P,0,1,,
8,froot_leaf,P,20percent,20percent,,
9,leafcn,P,30percent,30percent,,


Example of how to read pft-specific values as a numpy array

In [4]:
pftfirstind = params.index[params['min']=='pft'][0]
np.fromstring(params['pft_mins'][pftfirstind],dtype='float',sep=',')

array([0.      , 0.000216, 0.000216, 0.00072 , 0.0081  , 0.0081  ,
       0.0081  , 0.0081  , 0.0081  , 0.0081  , 0.000405, 0.000162,
       0.000144, 0.000144, 0.000144, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162])

Testing out how to retrieve the pft-dependent parameter names

In [5]:
params.loc[params['min']=='pft']['name']

0          dleaf
4    medlynslope
6           kmax
Name: name, dtype: object

Testing out how to retrieve the pft-dependent parameter indices as a numpy array

In [6]:
params.index[params['min']=='pft'].values

array([0, 4, 6])

Example of how to parse "XXpercent" for perturbations relative to default values

In [7]:
mystring = params['min'][8]
mystring

'20percent'

In [8]:
# logic for detection
"percent" in mystring

True

In [9]:
# extracting the numerical value
float(mystring.split("percent")[0])

20.0

## Defining a class for organizing parameter information

In [69]:
class ParamInfo(object):

    def __init__(self, name, minval=None, maxval=None, defval=None):
        self._name = name
        self._min = minval
        self._max = maxval
        self._default = defval

    @property
    def name(self):
        return self._name
	
    @property
    def min(self):
        return self._min

    @property
    def max(self):
        return self._max
    
    @property
    def default(self):
        return self._default
    
    @name.setter
    def name(self, new_name):
        self._name = new_name
    
    @min.setter
    def min(self, new_min):
        self._min = new_min
        
    @max.setter
    def max(self, new_max):
        self._max = new_max
         
    @default.setter
    def default(self, new_def):
        self._default = new_def

    def __repr__(self):
        return "%s:\n\tdefault = %s\n\tmin = %s\n\tmax = %s\n" % (self.name, self.default, self.min, self.max)

In [70]:
# testing out the class/dictionary functionality
test_dict = {"P1": ParamInfo("P1", minval=0.0, maxval=1.0, defval=2.0),
			  "P2": ParamInfo("P2", minval=[0,0,0,0,0], maxval=[100,100,100,100,100], defval=[0,1,2,3,4]),
			  "P3": ParamInfo("P3", minval="min", maxval="max", defval="value"),
              "P4": ParamInfo("P4")
			  }

# adding a new parameter
test_dict["new_param"] = ParamInfo("new_param")

# setting the max value
#test_dict["P4"].set_max(100)
test_dict["P4"].max = 200

In [71]:
# look at the test dictionary
for key in test_dict:
	print(test_dict[key])

P1:
	default = 2.0
	min = 0.0
	max = 1.0

P2:
	default = [0, 1, 2, 3, 4]
	min = [0, 0, 0, 0, 0]
	max = [100, 100, 100, 100, 100]

P3:
	default = value
	min = min
	max = max

P4:
	default = None
	min = None
	max = 200

new_param:
	default = None
	min = None
	max = None



## Read in default parameter values
* to use defaults to scale/set parameter perturbations
* and to record/keep track of defaults for each parameter and save that information for each simulation

### First, get the default values from the params netcdf file

In [72]:
# assign the basepftfile
basepftfile = "../basecase/clm5_params.c200519.nc"

# read in default file
def_params = xr.open_dataset(basepftfile)

# declare a dictionary to store parameter information
params_dict={}

# loop over parameters grabbing name and location
for name,loc in zip(params['name'],params['location']):      
    # select parameters located in the params file only
    if loc=='P':
        # getting parameter dims (i.e., checking for segment variation)
        dims = len(def_params[name].values.shape)
        if dims<2:
            # no segment variation
            x = def_params[name].values
            params_dict[name] = ParamInfo(name, defval=x)
        else:
            # segment variation: kmax,ck,psi50,rootprof_beta
            # assumes same values applied across segments
            # TO DO: check this assumption, appears not true for rootprof_beta
            x = def_params[name][0,:].values
            params_dict[name] = ParamInfo(name, defval=x)

In [73]:
# check out the dictionary of default values so far
params_dict
#params_dict.keys() # get the keys (parameter names)
#params_dict.values() # get the values all strung together
#params_dict['dleaf'].default # look at the default values for dleaf

{'dleaf': dleaf:
 	default = [0.   0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04
  0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04
  0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04
  0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04
  0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04
  0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04]
 	min = None
 	max = None, 'fff': fff:
 	default = 0.5
 	min = None
 	max = None, 'medlynslope': medlynslope:
 	default = [9.         2.3499999  2.3499999  2.3499999  4.11999989 4.11999989
  4.44999981 4.44999981 4.44999981 4.69999981 4.69999981 4.69999981
  2.22000003 5.25       1.62       5.78999996 5.78999996 1.78999996
  1.78999996 5.78999996 5.78999996 5.78999996 5.78999996 5.78999996
  5.78999996 5.78999996 5.78999996 5.78999996 5.78999996 5.78999996
  5.78999996 5.78999996 5.78999996 5.78999996 5.78999996 5.78999996
  5.78999996 5.78999996 5.78999996 

### Second, get the namelist default values

In [74]:
# NOTE: here using an example lnd_in file to pull in default namelist values
# Could also parse the namelist defaults file, see: https://github.com/ESCOMP/CTSM/blob/e2b9745d81ed5cb7cd7f5d6098edf506a4956335/bld/namelist_files/namelist_defaults_ctsm.xml
thedir = '/glade/work/djk2120/ctsm_hardcode_co/cime/scripts/clm50c6_ctsmhardcodep_2deg_GSWP3V1_Sparse250_2000/CaseDocs/'
thefil = 'lnd_in'
lndin = thedir+thefil

# loop over parameters grabbing name and location
for name,loc in zip(params['name'],params['location']):      
    # select parameters located in the namelist only
    if loc=='N':
        # build a command to search for the parameter by name and put output in a tmp file
        cmd = 'grep '+name+' '+lndin+' > tmp.txt'
        ret = os.system(cmd)
        # checking for nonzero return code (exit status?), meaning parameter is not found
        if ret != 0:
            # TO DO: will need to address these special cases somehow...
            print(name+' not found')
        else:
            f = open('tmp.txt', 'r')
            # parse the value from the parameter name
            tmp = f.read().split()[2]
            f.close()
            # cases where scientific notation(?) is specified by a "d"
            # TO DO: there may be other special cases as well (scientific notation as an "e"?)
            if 'd' in tmp:
                tmp = tmp.split('d')
                x = float(tmp[0])*10**float(tmp[1])
                params_dict[name] = ParamInfo(name, defval=x)
            else:
                x = float(tmp)
                params_dict[name] = ParamInfo(name, defval=x)

In [75]:
# check out the dictionary of default values so far
params_dict

{'dleaf': dleaf:
 	default = [0.   0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04
  0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04
  0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04
  0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04
  0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04
  0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04 0.04]
 	min = None
 	max = None, 'fff': fff:
 	default = 0.5
 	min = None
 	max = None, 'medlynslope': medlynslope:
 	default = [9.         2.3499999  2.3499999  2.3499999  4.11999989 4.11999989
  4.44999981 4.44999981 4.44999981 4.69999981 4.69999981 4.69999981
  2.22000003 5.25       1.62       5.78999996 5.78999996 1.78999996
  1.78999996 5.78999996 5.78999996 5.78999996 5.78999996 5.78999996
  5.78999996 5.78999996 5.78999996 5.78999996 5.78999996 5.78999996
  5.78999996 5.78999996 5.78999996 5.78999996 5.78999996 5.78999996
  5.78999996 5.78999996 5.78999996 

## Set sampling option
* ### option available for latin hypercube (LHC) or one-at-a-time (OAAT)

In [10]:
sampling_protocol = 'OAAT'
#sampling_protocol = 'LHC'
prefix = sampling_protocol
nparam = len(params['name'])  #number of parameters

## Generate parameter sampling
 * ### careful, each time you run LHC you get a new random draw

In [11]:
# NOTE: LHC code is not updated to use dictionaries
if sampling_protocol == 'LHC':
    # define sample size (number of ensemble members)
    nsamp = 10

    # Generate the latin hypercube sample
    lhd = lhs(nparam, samples=int(nsamp))
    # lhd is a 2D array indexed by ensemble member x parameter
    

    # figure out how many pft-dependent params there are in this sample
    npftparam = sum(params['min']=='pft')
    
    if npftparam>0:
        # get dataframe index of first pft param
        pftfirstind = params.index[params['min']=='pft'][0]
        
        # get number of pfts
        npft = len(np.fromstring(params['pft_mins'][pftfirstind],dtype='float',sep=','))
        
        # set up numpy array to store pft-specific values
        pft_array = np.nan*np.ones([npftparam,npft,nsamp])
        
        for j in range(npftparam):
            # get the index for the current pft param
            pftind = params.index[params['min']=='pft'][j]
            
            # get min values
            min_pft_array = np.fromstring(params['pft_mins'][pftind],dtype='float',sep=',')
            # max values
            max_pft_array = np.fromstring(params['pft_maxs'][pftind],dtype='float',sep=',')
            
            # loop over samples and calculate parameter values for each pft
            for i in range(nsamp):
                pft_array[j,:,i] = (max_pft_array - min_pft_array)*lhd[i,pftind] + min_pft_array
                # can't store pft_array as a pandas dataframe because it's 3D
                # unless there is some alternate way to store this data?
    
    # initialize min/max arrays - for params without pft-variation
    min_array = np.nan*np.ones(nparam)
    max_array = np.nan*np.ones(nparam)
    
    # generate arrays with min and max values
    for i in range(nparam):
        if params['min'].values[i]=='pft':
            # TO DO: what's a good placeholder, to denote need to reference pft_array?
            # numpy doesn't like assigning a string to an existing array of floats
            # for now, just print a message
            print('skipping '+params['name'].values[i]+'...this parameter varies with PFT')
            
            # Numpy doesn't like assigning an array to a single index in an existing array
            # The problem is still that I'm declaring min_array before trying to assign values
            # If I could build it all at once, numpy would allow for nested arrays
            #min_array[i] = np.fromstring(params['pft_mins'].values[i],dtype='float',sep=',')
            #max_array[i] = np.fromstring(params['pft_maxs'].values[i],dtype='float',sep=',')
        else:
            # assign min/max values
            min_array[i] = float(params['min'].values[i])
            max_array[i] = float(params['max'].values[i])
            
    # calculate parameter values; skip pft params (NaNs in min/max arrays)
    param_array = (max_array - min_array)*lhd + min_array

elif sampling_protocol == 'OAAT':
    # number of samples is twice the number of parameters (min and max perturbations)
    nsamp = 2*nparam
    
    # set up parameter array
    # NaN is code for keep the default value
    param_array = np.nan*np.ones([nsamp,nparam])
    
    # get the min and max indices (even/odd rows)
    mins_index = (np.arange(0,nsamp,2),np.arange(0,nparam,1))
    maxs_index = (np.arange(1,nsamp,2),np.arange(0,nparam,1))
    
    # figure out how many pft-dependent params there are in this sample
    npftparam = sum(params['min']=='pft')
    
    # set up numpy array to store pft-specific values
    if npftparam>0:
        # get dataframe index of first pft param
        pftfirstind = params.index[params['min']=='pft'][0]
        
        # get number of pfts
        npft = len(np.fromstring(params['pft_mins'][pftfirstind],dtype='float',sep=','))
        
        # third dimension accounts for min/max values
        pft_array = np.nan*np.ones([npftparam,npft,2])
        
        for j in range(npftparam):
            # get the index for the current pft param
            pftind = params.index[params['min']=='pft'][j]
            
            # assign the values for min and max
            pft_array[j,:,0]=np.fromstring(params['pft_mins'][pftind],dtype='float',sep=',')
            pft_array[j,:,1]=np.fromstring(params['pft_maxs'][pftind],dtype='float',sep=',')
            # can't store pft_array as a pandas dataframe because it's 3D
            # unless there is some alternate way to store this data?
        
    # assign values to the parameter array
    for i in range(nparam):
        # check for pft variation
        if params['min'].values[i]=='pft':
            # TO DO: what's a good placeholder, to denote need to reference pft_array?
            # e.g., param_array[mins_index[0][i]][i] = float('pft')
            # but numpy doesn't like assigning a string to an existing array of floats
            # for now, just print a message
            print('skipping '+params['name'].values[i]+'...this parameter varies with PFT')
            #params_dict[params['name'].values[i]].min = np.fromstring(params['pft_mins'][i],dtype='float',sep=',')
            #params_dict[params['name'].values[i]].max = np.fromstring(params['pft_maxs'][i],dtype='float',sep=',')
        # check for "XXpercent" perturb from default
        elif "percent" in params['min'].values[i]:
            print('skipping '+params['name'].values[i]+'...for now, need default values')
            #percent_perturb = float(params['min'].values[i].split("percent")[0])
            #percent_min_values = params_dict[params['name'].values[i]].default*(1 - percent_perturb/100)
            #percent_max_values = params_dict[params['name'].values[i]].default*(1 + percent_perturb/100)            
            #params_dict[params['name'].values[i]].min = percent_min_values
            #params_dict[params['name'].values[i]].max = percent_max_values        
        else:
            # assign min/max values directly
            param_array[mins_index[0][i]][i]=params['min'].values[i]
            param_array[maxs_index[0][i]][i]=params['max'].values[i]
            #params_dict[params['name'].values[i]].min = params['min'].values[i]
            #params_dict[params['name'].values[i]].max = params['max'].values[i]

# store psets in a pandas dataframe
psets = pd.DataFrame(data=param_array, index=None, columns=params['name'])
psets

# params dictionary
#params_dict

skipping dleaf...this parameter varies with PFT
skipping medlynslope...this parameter varies with PFT
skipping kmax...this parameter varies with PFT
skipping froot_leaf...for now, need default values
skipping leafcn...for now, need default values
skipping leaf_long...for now, need default values


name,dleaf,baseflow_scalar,maximum_leaf_wetted_fraction,fff,medlynslope,jmaxb1,kmax,FUN_fracfixers,froot_leaf,leafcn,leaf_long,decomp_depth_efolding
0,,,,,,,,,,,,
1,,,,,,,,,,,,
2,,0.0005,,,,,,,,,,
3,,0.1,,,,,,,,,,
4,,,0.01,,,,,,,,,
5,,,0.5,,,,,,,,,
6,,,,0.02,,,,,,,,
7,,,,5.0,,,,,,,,
8,,,,,,,,,,,,
9,,,,,,,,,,,,


# NOTE: starting here, this code below is not fully updated to use dictionary of parameter info or reading default values

### Modify psets dataframe to include pft flag

In [12]:
if sampling_protocol == 'LHC':
    for ind,name in enumerate(params['name']):
        # check for NaNs in the whole column (denotes PFT-specific param)
        if np.isnan(psets[name]).all():
            print('adding pft flag for '+name)
            psets[name] = 'pft'

# NOTE: this bit of code generates a pandas warning, but still executes as it should
# Could come back to this if we figure out how to put some pft flag in the preceding code
elif sampling_protocol == 'OAAT':    
    for ind,name in enumerate(params['name']):
        # check for NaNs in the whole column (denotes PFT-specific param AND/OR needs default values)
        if np.isnan(psets[name]).all():
            print('adding pft flag for '+name)
            psets[name][mins_index[0][ind]] = 'pft'
            psets[name][maxs_index[0][ind]] = 'pft'

psets

adding pft flag for dleaf
adding pft flag for medlynslope
adding pft flag for kmax
adding pft flag for froot_leaf
adding pft flag for leafcn
adding pft flag for leaf_long


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


name,dleaf,baseflow_scalar,maximum_leaf_wetted_fraction,fff,medlynslope,jmaxb1,kmax,FUN_fracfixers,froot_leaf,leafcn,leaf_long,decomp_depth_efolding
0,pft,,,,,,,,,,,
1,pft,,,,,,,,,,,
2,,0.0005,,,,,,,,,,
3,,0.1,,,,,,,,,,
4,,,0.01,,,,,,,,,
5,,,0.5,,,,,,,,,
6,,,,0.02,,,,,,,,
7,,,,5.0,,,,,,,,
8,,,,,pft,,,,,,,
9,,,,,pft,,,,,,,


### Check out pft_array, the numpy array that stores pft-specific values

In [13]:
pft_array.shape 
# OAAT dims are (npftparam, npft, 2) where last dim represents min/max perturbations
# LHC dims are (npftparam, npft, nsamp)

(3, 79, 2)

## Generate parameter files
* ### this will overwrite parameter files!!
* ### proceed with caution

In [52]:
# assign the basepftfile
basepftfile = "../basecase/clm5_params.c200519.nc"

if sampling_protocol == 'OAAT':
    # initialize npftparam counter
    npftparam = 0
    # number of samples is twice the number of parameters (min and max perturbations)
    #nsamp = 2*nparam

# loop over nsamp and modify the parameter values accordingly
for i in range(nsamp):
    if sampling_protocol == 'OAAT':
        # open the default file (twice for OAAT)
        #tmp_min = xr.open_dataset(basepftfile)
        #tmp_max = xr.open_dataset(basepftfile)
                
        # generate name for this param file
        #min_pftfile = "../paramfiles/"+prefix+str(2*i+1).zfill(4)+".nc"
        #max_pftfile = "../paramfiles"+prefix+str(2*i+2).zfill(4)+".nc"
        #print('working on '+min_pftfile+' and '+max_pftfile)
        
    # open the default file
    tmp = xr.open_dataset(basepftfile)
    
    # generate name for this param file
    pftfile = "../paramfiles/"+prefix+str(i+1).zfill(4)+".nc"
    print('working on '+pftfile)
    
    if sampling_protocol == 'LHC':
        # reset npftparam counter for each sample
        npftparam = 0
    
    # loop over parameters
    for name,loc in zip(params['name'],params['location']):
        
        # select parameters located in the params file only
        if loc=='P':

            if sampling_protocol == 'LHC':
                print(name+' modified')
                var = tmp[name]
                
                # check to see if there is pft variation
                if psets[name][i]=='pft':
                    
                    # check which npftparam we are on
                    print('npftparam='+str(npftparam))
                    
                    # modify values
                    tmp[name][:] = pft_array[npftparam,:,i]
                    
                    # increment npftparam counter; only do this once per parameter
                    npftparam += 1
                    
                else: # no pft variation, assign the same number across all PFTs (as applicable)
                    
                    # check for indexing by pft
                    # NOTE: this logic might get tripped up by froz_q10 and q10_mr which are currently indexed by a placeholder dim "allpfts" (should be removed soon)
                    if var.shape:
                        
                        # check for indexing by segment or variants, which will be the first dimension
                        # skip the first index, don't want to overwrite non-vegetated values
                        if var.shape[0] != npft: 
                            tmp[name][:,1:] = psets[name][i]
                        else: # indexed by pft only
                            tmp[name][1:] = psets[name][i]
                        
                    else: # single value, no indexing by pft
                        tmp[name] = psets[name][i]
            
            elif sampling_protocol == 'OAAT':          
                # check to see if this parameter should be modified
                # logic is checking for psets that are NOT NaNs
                if pd.isna(psets[name][i])==False:
                    print(name+' modified')
                    var = tmp[name]
                    #print(var.shape)

                    # check to see if there is pft variation
                    # NOTE: may want to use only first 16 indices for this ensemble (no crop), in which case indexing changes 
                    if psets[name][i]=='pft':
                    
                        # check which npftparam we are on
                        print('npftparam='+str(npftparam))
                    
                        # check if this is a min or max perturbation
                        if i%2==0:
                            tmp[name][:] = pft_array[npftparam,:,0] # min values
                        else:
                            tmp[name][:] = pft_array[npftparam,:,1] # max values

                            # increment npftparam counter; only do this once per parameter
                            npftparam += 1 
                
                    else: # no pft variation, assign the same number across all PFTs (as applicable)
                    
                        # check for indexing by pft
                        # NOTE: this logic might get tripped up by froz_q10 and q10_mr which are currently indexed by a placeholder dim "allpfts" (should be removed soon)
                        if var.shape:
                        
                            # check for indexing by segment or variants, which will be the first dimension
                            # skip the first index, don't want to overwrite non-vegetated values
                            if var.shape[0] != npft: 
                                tmp[name][:,1:] = psets[name][i]
                                #tmp_min[name][:,1:] = params_dict[name].min
                            else: # indexed by pft only
                                tmp[name][1:] = psets[name][i]
                                #tmp_min[name][1:] = params_dict[name].min
                    
                        else: # single value, no indexing by pft
                            tmp[name] = psets[name][i]
                            #tmp_min[name] = params_dict[name].min

    # write changes (if any) to file
    tmp_min.to_netcdf(pftfile,'w')

working on ../paramfiles/OAAT0001.nc
displar modified
working on ../paramfiles/OAAT0002.nc
displar modified
working on ../paramfiles/OAAT0003.nc
dleaf modified
npftparam=0
working on ../paramfiles/OAAT0004.nc
dleaf modified
npftparam=0
working on ../paramfiles/OAAT0005.nc
working on ../paramfiles/OAAT0006.nc
working on ../paramfiles/OAAT0007.nc
working on ../paramfiles/OAAT0008.nc
working on ../paramfiles/OAAT0009.nc
fff modified
working on ../paramfiles/OAAT0010.nc
fff modified
working on ../paramfiles/OAAT0011.nc
medlynslope modified
npftparam=1
working on ../paramfiles/OAAT0012.nc
medlynslope modified
npftparam=1
working on ../paramfiles/OAAT0013.nc
kmax modified
npftparam=2
working on ../paramfiles/OAAT0014.nc
kmax modified
npftparam=2


## Generate namelist files

Bash script will generate the namelist mod for pointing to the right params file

In [53]:
# create the namelist mod files
for i in range(nsamp):
    nlfile = "../namelist_mods/"+prefix+str(i+1).zfill(4)+".txt" 
    with open(nlfile,"w") as file:
        output = "! user_nl_clm namelist options written by generate_params:\n"
        file.write(output)

# populate with mods
for name,loc in zip(params['name'],params['location']):
    if loc=='N':
        # don't have to worry about pft-variation here because namelist params won't have that
        for i in range(nsamp):
            
            if sampling_protocol == 'LHC':
                nlfile = "../namelist_mods/"+prefix+str(i+1).zfill(4)+".txt"
                print('working on '+nlfile)
                with open(nlfile,"a") as file: # key is using "a" for append option
                    print(name+' modified')
                    output = "%s=%s\n" % (name, psets[name][i]) #round??
                    file.write(output) 
            
            elif sampling_protocol == 'OAAT': 
                # check to see if this parameter should be modified
                # logic is checking for psets that are NOT NaNs
                if ~np.isnan(psets[name][i]):
                    nlfile = "../namelist_mods/"+prefix+str(i+1).zfill(4)+".txt"
                    print('working on '+nlfile)
                    with open(nlfile,"a") as file: # key is using "a" for append option
                        print(name+' modified')
                        output = "%s=%s\n" % (name, psets[name][i]) #round??
                        file.write(output) 

working on ../namelist_mods/OAAT0005.txt
baseflow_scalar modified
working on ../namelist_mods/OAAT0006.txt
baseflow_scalar modified
working on ../namelist_mods/OAAT0007.txt
maximum_leaf_wetted_fraction modified
working on ../namelist_mods/OAAT0008.txt
maximum_leaf_wetted_fraction modified


## Save off the parameter sets

In [54]:
# create a name for this particular ensemble
ensemble_name = "test0001"
# build the file name with the prefix (ensemble type)
psetsfile = "../parameter_sets/"+prefix+"_"+ensemble_name+".csv"
#print(psetsfile)

# first, save the psets dataframe to csv
psets.to_csv(psetsfile)

# second, save the pft array (if applicable)
pftarrayfile = "../parameter_sets/"+prefix+"_"+ensemble_name+"_pftvals"
#print(pftarrayfile)
# save as a numpy array (for now, easiest solution for 3D array?)
np.save(pftarrayfile, pft_array)
# example of how to load it back in
#test = np.load(pftarrayfile+".npy")