In [1]:
import pandas as pd
import os
from pyDOE import *
from scipy.io import netcdf as nc
import xarray as xr

### Download latest version of params file from google drive
* requires 'publishing' the google drive spreadsheet
* file > publish to web
* then it can be set up to continuously publish the spreadsheet to a stable url (with some latency, maybe 1-2 minutes)
* note that the first tab must be the sheet where the relevant information is located

In [6]:
data_url = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQs413GtLXtHVDCqEPgAwn4BbDjoWmV7uFqOAWH4mgpxXoVfN6ijnJdhyRgLkV-n2eU-sSQush4CzYU/pub?output=csv'
cmd = 'curl '+data_url+' > params.csv'
os.system(cmd)

0

### Read in csv data, filtering by the "include" column

In [7]:
#data     = pd.read_csv('params.csv')
data     = pd.read_csv('params.csv',header=0,skiprows=[1]) # modify read_csv to account for header spanning 2 rows
included = data['include']==1
params   = data.loc[included,['name','location','min','max','pft_mins','pft_maxs']]

params

Unnamed: 0,name,location,min,max,pft_mins,pft_maxs
12,displar,P,0.4,0.95,,
13,dleaf,P,pft,pft,"0,0.000216,0.000216,0.00072,0.0081,0.0081,0.00...","0,0.00108,0.00108,0.0036,0.0567,0.0567,0.243,0..."
41,baseflow_scalar,N,0.0005,0.1,,
42,maximum_leaf_wetted_fraction,N,0.01,0.5,,
64,medlynslope,P,pft,pft,"9,1.29,1.29,1.29,1.63,1.63,3.19,3.19,3.19,2.25...","9,4.7,4.7,4.7,4.59,4.59,5.11,5.11,5.11,9.27,9...."
82,kmax,P,1.00E-08,3.00E-08,,


In [13]:
# example of how to read pft-specific values as a numpy array
pftfirstind = params.index[params['min']=='pft'][0]
np.fromstring(params['pft_mins'][pftfirstind],dtype='float',sep=',')

array([0.      , 0.000216, 0.000216, 0.00072 , 0.0081  , 0.0081  ,
       0.0081  , 0.0081  , 0.0081  , 0.0081  , 0.000405, 0.000162,
       0.000144, 0.000144, 0.000144, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162, 0.000162, 0.000162, 0.000162, 0.000162, 0.000162,
       0.000162])

In [16]:
# testing out how to retrieve the pft-dependent parameter names
params.loc[params['min']=='pft']['name']

13          dleaf
64    medlynslope
Name: name, dtype: object

In [18]:
params['name'].values[i]

'displar'

### Generate parameter sampling
 * option available for latin hypercube (LHC) or one-at-a-time (OAAT)
 * careful, each time you run LHC you get a new random draw

In [22]:
sampling_protocol = 'OAAT'
prefix = sampling_protocol
nparam = len(params['name'])  #number of parameters

if sampling_protocol == 'LHC':
    # define sample size (number of ensemble members)
    nsamp = 10

    # Generate the latin hypercube sample
    lhd = lhs(nparam, samples=int(nsamp))

    # scale according to parameter range
    param_array = ((params['max'] - params['min']).values)*lhd + params['min'].values

elif sampling_protocol == 'OAAT':
    # number of samples is twice the number of parameters (min and max perturbations)
    nsamp = 2*nparam
    
    # set up parameter array
    # NaN is code for keep the default value
    param_array = np.nan*np.ones([nsamp,nparam])
    
    # get the min and max indices (even/odd rows)
    mins_index = (np.arange(0,nsamp,2),np.arange(0,nparam,1))
    maxs_index = (np.arange(1,nsamp,2),np.arange(0,nparam,1))
    
    # figure out how many pft-dependent params there are in this sample
    npftparam = sum(params['min']=='pft')
    
    # set up numpy array to store pft-specific values (if there are any)
    if npftparam>0:
        # get dataframe index (excel row number) of first pft param
        pftfirstind = params.index[params['min']=='pft'][0]
        
        # get number of pfts
        npft = len(np.fromstring(params['pft_mins'][pftfirstind],dtype='float',sep=','))
        
        # third dimension accounts for min/max values
        pft_array = np.nan*np.ones([npftparam,npft,2])
        
        for j in range(npftparam):
            # get the index for the current pft param
            pftind = params.index[params['min']=='pft'][j]
            
            # assign the values for min and max
            pft_array[j,:,0]=np.fromstring(params['pft_mins'][pftind],dtype='float',sep=',')
            pft_array[j,:,1]=np.fromstring(params['pft_maxs'][pftind],dtype='float',sep=',')
            # can't store pft_array as a pandas dataframe because it's 3D
            # unless there is some alternate way to store this data
        
    # assign values to the parameter array
    for i in range(nparam):
        if params['min'].values[i]=='pft':
            # TO DO: what's a good placeholder, to denote need to reference pft_array?
            # e.g., param_array[mins_index[0][i]][i] = float('pft')
            # but numpy doesn't like assigning a string to an existing array of floats
            # for now, just print a message
            print('skipping '+params['name'].values[i]+'...this parameter varies with PFT')
        else:
            # assign min/max values
            param_array[mins_index[0][i]][i]=params['min'].values[i]
            param_array[maxs_index[0][i]][i]=params['max'].values[i]

# store psets in a pandas dataframe
psets = pd.DataFrame(data=param_array, index=None, columns=params['name'])
psets

skipping dleaf...this parameter varies with PFT
skipping medlynslope...this parameter varies with PFT


name,displar,dleaf,baseflow_scalar,maximum_leaf_wetted_fraction,medlynslope,kmax
0,0.4,,,,,
1,0.95,,,,,
2,,,,,,
3,,,,,,
4,,,0.0005,,,
5,,,0.1,,,
6,,,,0.01,,
7,,,,0.5,,
8,,,,,,
9,,,,,,


### Modify psets dataframe to include pft flag

In [23]:
# NOTE: this bit of code generates a pandas warning, but still executes as it should
# Could come back to this if we figure out how to put some pft flag in the preceding code
for ind,name in enumerate(params['name']):
    #print(name)
    # check for NaNs in the whole column (denotes PFT-specific param)
    if np.isnan(psets[name]).all():
        #print(name)
        #print(ind)
        print('adding pft flag for '+name)
        psets[name][mins_index[0][ind]] = 'pft'
        psets[name][maxs_index[0][ind]] = 'pft'

psets

adding pft flag for dleaf
adding pft flag for medlynslope


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


name,displar,dleaf,baseflow_scalar,maximum_leaf_wetted_fraction,medlynslope,kmax
0,0.4,,,,,
1,0.95,,,,,
2,,pft,,,,
3,,pft,,,,
4,,,0.0005,,,
5,,,0.1,,,
6,,,,0.01,,
7,,,,0.5,,
8,,,,,pft,
9,,,,,pft,


### Check out pft_array, the numpy array that stores pft-specific values

In [27]:
pft_array.shape # dims are (npftparam, npft, 2)
#pft_array

(2, 79, 2)

## Generate parameter files
* ### this will overwrite parameter files!!
* ### proceed with caution

In [67]:
# assign the basepftfile
basepftfile = "../basecase/clm5_params.c171117.nc"

# initialize npftparam counter
npftparam = 0

# loop over nsamp and modify the parameter values accordingly
for i in range(nsamp):
    # open the default file
    tmp = xr.open_dataset(basepftfile)
    
    # generate name for this param file
    pftfile = "../paramfiles/"+prefix+str(i+1).zfill(4)+".nc"
    print('working on '+pftfile)
    
    # loop over parameters
    for name,loc in zip(params['name'],params['location']):
        
        # select parameters located in the params file only
        if loc=='P':

            # check to see if this parameter should be modified
            # logic is checking for psets that are NOT NaNs
            if pd.isna(psets[name][i])==False:
                print(name+' modified')
                var = tmp[name]
                #print(var.shape)

                # check to see if there is pft variation
                if psets[name][i]=='pft':
                    
                    # check which npftparam we are on
                    print('npftparam='+str(npftparam))
                    
                    # check if this is a min or max perturbation
                    if i%2==0:
                        # NOTE: will have to be careful with this indexing if we have params indexed by more than 1 dim (e.g., kmax)
                        # Currently, that logic is checked only if there isn't pft variation (which is the current situation for kmax)
                        tmp[name][:] = pft_array[npftparam,:,0] # min values
                    else:
                        tmp[name][:] = pft_array[npftparam,:,1] # max values

                        # increment npftparam counter; only do this once per parameter
                        npftparam += 1 
                
                else: # no pft variation, assign the same number across all PFTs (as applicable)
                    
                    # check for indexing by pft
                    if var.shape:
                        
                        # check for indexing by segment which will be the first dimension (IS THIS TRUE FOR ALL?)
                        if var.shape[0] != npft: 
                            tmp[name][:,1:] = psets[name][i]
                        else: # indexed by pft only
                            # skip the first index, don't want to overwrite non-vegetated landunit
                            tmp[name][1:] = psets[name][i]
                    
                    else: # single value, no indexing by pft
                        tmp[name] = psets[name][i]

    # write changes (if any) to file
    tmp.to_netcdf(pftfile,'w')

working on ../paramfiles/OAAT0001.nc
displar modified
working on ../paramfiles/OAAT0002.nc
displar modified
working on ../paramfiles/OAAT0003.nc
dleaf modified
npftparam=0
working on ../paramfiles/OAAT0004.nc
dleaf modified
npftparam=0
working on ../paramfiles/OAAT0005.nc
working on ../paramfiles/OAAT0006.nc
working on ../paramfiles/OAAT0007.nc
working on ../paramfiles/OAAT0008.nc
working on ../paramfiles/OAAT0009.nc
medlynslope modified
npftparam=1
working on ../paramfiles/OAAT0010.nc
medlynslope modified
npftparam=1
working on ../paramfiles/OAAT0011.nc
kmax modified
working on ../paramfiles/OAAT0012.nc
kmax modified


## Generate namelist files

In [70]:
# create the namelist mod files
for i in range(nsamp):
    nlfile = "../namelist_mods/"+prefix+str(i+1).zfill(4)+".txt" 
    with open(nlfile,"w") as file:
        output = "! user_nl_clm namelist options written by generate_params:\n"
        file.write(output)

# populate with mods
for name,loc in zip(params['name'],params['location']):
    if loc=='N':
        # don't have to worry about pft-variation here because namelist params won't have that
        for i in range(nsamp):
            # check to see if this parameter should be modified
            # logic is checking for psets that are NOT NaNs
            if ~np.isnan(psets[name][i]):
                nlfile = "../namelist_mods/"+prefix+str(i+1).zfill(4)+".txt"
                print('working on '+nlfile)
                with open(nlfile,"a") as file: # key is using "a" for append option
                    print(name+' modified')
                    output = "%s=%s\n" % (name, psets[name][i]) #round??
                    file.write(output) 

working on ../namelist_mods/OAAT0005.txt
baseflow_scalar modified
working on ../namelist_mods/OAAT0006.txt
baseflow_scalar modified
working on ../namelist_mods/OAAT0007.txt
maximum_leaf_wetted_fraction modified
working on ../namelist_mods/OAAT0008.txt
maximum_leaf_wetted_fraction modified
