In [1]:
import pandas as pd
import os
from pyDOE import *
from scipy.io import netcdf as nc
import xarray as xr

### Download latest version of params file from google drive
* requires 'publishing' the google drive spreadsheet
* file > publish to web
* then it can be set up to continuously publish the spreadsheet to a stable url (with some latency, maybe 1-2 minutes)
* note that the first tab must be the sheet where the relevant information is located

In [2]:
data_url = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQs413GtLXtHVDCqEPgAwn4BbDjoWmV7uFqOAWH4mgpxXoVfN6ijnJdhyRgLkV-n2eU-sSQush4CzYU/pub?output=csv'
cmd = 'curl '+data_url+' > params.csv'
os.system(cmd)

0

### Read in csv data, filtering by the "include" column

In [3]:
#data     = pd.read_csv('params.csv')
data     = pd.read_csv('params.csv',header=0,skiprows=[1]) # modify read_csv to account for header spanning 2 rows
included = data['include']==1
params   = data.loc[included,['name','location','min','max']]

params

Unnamed: 0,name,location,min,max
12,displar,P,0.4,0.95
41,baseflow_scalar,N,0.0005,0.1
42,maximum_leaf_wetted_fraction,N,0.01,0.5
82,kmax,P,1e-08,3e-08


### Generate parameter sampling
 * option available for latin hypercube (LHC) or one-at-a-time (OAAT)
 * careful, each time you run LHC you get a new random draw

In [5]:
sampling_protocol = 'OAAT'
prefix = sampling_protocol
nparam = len(params['name'])  #number of parameters

if sampling_protocol == 'LHC':
    # define sample size (number of ensemble members)
    nsamp = 10

    # Generate the latin hypercube sample
    lhd = lhs(nparam, samples=int(nsamp))

    # scale according to parameter range
    param_array = ((params['max'] - params['min']).values)*lhd + params['min'].values

elif sampling_protocol == 'OAAT':
    # NaN is code for keep the default value
    nsamp = 2*nparam
    param_array = np.nan*np.ones([nsamp,nparam])
    mins_index = (np.arange(0,nsamp,2),np.arange(0,nparam,1))
    maxs_index = (np.arange(1,nsamp,2),np.arange(0,nparam,1))
    param_array[mins_index]=params['min']
    param_array[maxs_index]=params['max']

# store in a pandas dataframe
psets = pd.DataFrame(data=param_array, index=None, columns=params['name'])
psets

name,displar,baseflow_scalar,maximum_leaf_wetted_fraction,kmax
0,0.4,,,
1,0.95,,,
2,,0.0005,,
3,,0.1,,
4,,,0.01,
5,,,0.5,
6,,,,1e-08
7,,,,3e-08


## Generate parameter files
* ### this will overwrite parameter files!!
* ### proceed with caution

In [12]:
# assign the basepftfile
basepftfile = "../basecase/clm5_params.c171117.nc"

# loop over nsamp and modify the parameter values accordingly
for i in range(nsamp):
    # open the default file
    tmp = xr.open_dataset(basepftfile)
    
    # generate name for this param file
    pftfile = "../paramfiles/"+prefix+str(i+1).zfill(4)+".nc"
    print('working on '+pftfile)
    
    # loop over parameters
    for name,loc in zip(params['name'],params['location']):
        
        # select parameters located in the params file only
        if loc=='P':

            # check to see if this parameter should be modified
            # logic is checking for psets that are NOT NaNs
            if pd.isna(psets[name][i])==False:
                print(name+' modified')
                var = tmp[name]
                #print(var.shape)
                                
                # check for indexing by pft
                if var.shape:   
                    # check for indexing by segment by checking if length of shape is > 1 (IS SEGMENT THE ONLY EXAMPLE OF THIS?)
                    if len(var.shape) > 1: 
                        tmp[name][:,1:] = psets[name][i]
                    else: # indexed by pft only
                        # skip the first index, don't want to overwrite non-vegetated landunit
                        tmp[name][1:] = psets[name][i]
                    
                else: # single value, no indexing by pft
                    tmp[name] = psets[name][i]
    
    # write changes (if any) to file
    tmp.to_netcdf(pftfile,'w')

working on ../paramfiles/OAAT0001.nc
displar modified
working on ../paramfiles/OAAT0002.nc
displar modified
working on ../paramfiles/OAAT0003.nc
working on ../paramfiles/OAAT0004.nc
working on ../paramfiles/OAAT0005.nc
working on ../paramfiles/OAAT0006.nc
working on ../paramfiles/OAAT0007.nc
kmax modified
working on ../paramfiles/OAAT0008.nc
kmax modified


## Generate namelist files

In [13]:
# create the namelist mod files
for i in range(nsamp):
    nlfile = "../namelist_mods/"+prefix+str(i+1).zfill(4)+".txt" 
    with open(nlfile,"w") as file:
        output = "! user_nl_clm namelist options written by generate_params:\n"
        file.write(output)

# populate with mods
for name,loc in zip(params['name'],params['location']):
    if loc=='N':
        for i in range(nsamp):
            # check to see if this parameter should be modified
            # logic is checking for psets that are NOT NaNs
            if ~np.isnan(psets[name][i]):
                nlfile = "../namelist_mods/"+prefix+str(i+1).zfill(4)+".txt"
                print('working on '+nlfile)
                with open(nlfile,"a") as file: # key is using "a" for append option
                    print(name+' modified')
                    output = "%s=%s\n" % (name, psets[name][i]) #round??
                    file.write(output) 

working on ../namelist_mods/OAAT0003.txt
baseflow_scalar modified
working on ../namelist_mods/OAAT0004.txt
baseflow_scalar modified
working on ../namelist_mods/OAAT0005.txt
maximum_leaf_wetted_fraction modified
working on ../namelist_mods/OAAT0006.txt
maximum_leaf_wetted_fraction modified
