In [1]:
import pandas as pd
import os
from pyDOE import *
from scipy.io import netcdf as nc

### Download latest version of params file from google drive
* requires 'publishing' the google drive spreadsheet
* file > publish to web
* then it can be set up to continuously publish the spreadsheet to a stable url (with some latency, maybe 1-2 minutes)

In [125]:
data_url = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQs413GtLXtHVDCqEPgAwn4BbDjoWmV7uFqOAWH4mgpxXoVfN6ijnJdhyRgLkV-n2eU-sSQush4CzYU/pub?output=csv'
cmd = 'curl '+data_url+' > params.csv'
os.system(cmd)

0

### Read in csv data, filtering by the "include" column

In [126]:
data     = pd.read_csv('params.csv')
included = data['include']=='1' # for some reason needed to change this 1 to a string to get this to work with the modified spreadsheet, perhaps the reorg with the secondary headers messed up the formatting
params   = data.loc[included,['name','location','min','max','pft_mins','pft_maxs']]

#np.fromstring(params['pft_mins'][2],dtype='float',sep=',')
params

Unnamed: 0,name,location,min,max,pft_mins,pft_maxs
13,displar,P,0.4,0.95,,
14,dleaf,P,pft,pft,"0,0.000216,0.000216,0.00072,0.0081,0.0081,0.00...","0,0.00108,0.00108,0.0036,0.0567,0.0567,0.243,0..."
48,baseflow_scalar,N,0.0005,0.1,,
49,maximum_leaf_wetted_fraction,N,0.01,0.5,,
64,medlynslope,P,pft,pft,"9,1.29,1.29,1.29,1.63,1.63,3.19,3.19,3.19,2.25...","9,4.7,4.7,4.7,4.59,4.59,5.11,5.11,5.11,9.27,9...."
80,kmax,P,1.00E-08,3.00E-08,,


In [147]:
# testing out how to retrieve the pft-dependent parameter names
params.loc[params['min']=='pft']['name']

14          dleaf
64    medlynslope
Name: name, dtype: object

### Generate parameter sampling
 * option available for latin hypercube (LHC) or one-at-a-time (OAAT)
 * careful, each time you run LHC you get a new random draw

In [142]:
sampling_protocol = 'OAAT'
prefix = sampling_protocol
nparam = len(params['name'])  #number of parameters

if sampling_protocol == 'LHC':
    #define sample size (number of ensemble members)
    nsamp = 10

    # Generate the latin hypercube sample
    lhd = lhs(nparam, samples=int(nsamp))

    # scale according to parameter range
    param_array = ((params['max'] - params['min']).values)*lhd + params['min'].values

elif sampling_protocol == 'OAAT':
    # number of samples is twice the number of parameters (min and max perturbations)
    nsamp = 2*nparam
    # set up parameter array
    # nan is code for keep the default value
    param_array = np.nan*np.ones([nsamp,nparam])
    # get the min and max indices (even/odd rows)
    mins_index = (np.arange(0,nsamp,2),np.arange(0,nparam,1))
    maxs_index = (np.arange(1,nsamp,2),np.arange(0,nparam,1))
    # figure out how many pft-dependent params there are in this sample
    npftparam = sum(params['min']=='pft')
    # set up array to store pft-specific values (if any)
    if npftparam>0:
        # get dataframe index (excel row number) of first pft param
        pftfirstind = params.index[params['min']=='pft'][0]
        # get number of pfts:
        npft = len(np.fromstring(params['pft_mins'][pftfirstind],dtype='float',sep=','))
        # third dimension accounts for min/max values
        pft_array = np.nan*np.ones([npftparam,npft,2])
        for j in range(npftparam):
            # get the index for the current pft param
            pftind = params.index[params['min']=='pft'][j]
            # assign the values for min and max
            pft_array[j,:,0]=np.fromstring(params['pft_mins'][pftind],dtype='float',sep=',')
            pft_array[j,:,1]=np.fromstring(params['pft_maxs'][pftind],dtype='float',sep=',')
            # can't store pft_array as a pandas dataframe because its 3D
        
    for i in range(nparam):
        if params['min'].values[i]=='pft':
            # what's a good placeholder, to denote need to reference pft_array?
            print('pft param')
        else:
            #print(params['name'].values[i])
            param_array[mins_index[0][i]][i]=params['min'].values[i]
            param_array[maxs_index[0][i]][i]=params['max'].values[i]

# store in a pandas dataframe
psets = pd.DataFrame(data=param_array, index=None, columns=params['name'])
psets

pft param
pft param


name,displar,dleaf,baseflow_scalar,maximum_leaf_wetted_fraction,medlynslope,kmax
0,0.4,,,,,
1,0.95,,,,,
2,,,,,,
3,,,,,,
4,,,0.0005,,,
5,,,0.1,,,
6,,,,0.01,,
7,,,,0.5,,
8,,,,,,
9,,,,,,


In [150]:
pft_array

array([[[0.000e+00, 0.000e+00],
        [2.160e-04, 1.080e-03],
        [2.160e-04, 1.080e-03],
        [7.200e-04, 3.600e-03],
        [8.100e-03, 5.670e-02],
        [8.100e-03, 5.670e-02],
        [8.100e-03, 2.430e-01],
        [8.100e-03, 2.430e-01],
        [8.100e-03, 2.430e-01],
        [8.100e-03, 8.100e-02],
        [4.050e-04, 1.215e-01],
        [1.620e-04, 4.860e-02],
        [1.440e-04, 1.800e-02],
        [1.440e-04, 1.800e-02],
        [1.440e-04, 1.800e-02],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        [1.620e-04, 1.215e-01],
        

In [None]:
## code for original solution (no PFT variation)
sampling_protocol = 'OAAT'
prefix = sampling_protocol
nparam = len(params['name'])  #number of parameters

if sampling_protocol == 'LHC':
    #define sample size (number of ensemble members)
    nsamp = 10

    # Generate the latin hypercube sample
    lhd = lhs(nparam, samples=int(nsamp))

    # scale according to parameter range
    param_array = ((params['max'] - params['min']).values)*lhd + params['min'].values

elif sampling_protocol == 'OAAT':
    #nan is code for keep the default value
    nsamp = 2*nparam
    param_array = np.nan*np.ones([nsamp,nparam])
    mins_index = (np.arange(0,nsamp,2),np.arange(0,nparam,1))
    maxs_index = (np.arange(1,nsamp,2),np.arange(0,nparam,1))
    print(param_array[mins_index])
    print(params['min'])
    param_array[mins_index]=params['min']
    param_array[maxs_index]=params['max']

# store in a pandas dataframe
psets = pd.DataFrame(data=param_array, index=None, columns=params['name'])
psets

## Generate parameter files
* ### this will overwrite parameter files!!
* ### proceed with caution

In [32]:
#create all the new parameter files, copied from basepftfile
# 0th case is the default parameter set
basepftfile = "../basecase/clm5_params.c171117.nc"
for i in range(nsamp+1):
    targetpftfile = "../paramfiles/"+prefix+str(i).zfill(4)+".nc"
    print(targetpftfile)
    os.system('cp '+basepftfile+' '+targetpftfile)
    
#modify the parameter values
for name,loc in zip(params['name'],params['location']):
    if loc=='P':
        print(name)
        for i in range(nsamp):
            if ~np.isnan(psets[name][i]):
                pftfile = "../paramfiles/"+prefix+str(i+1).zfill(4)+".nc"
                f = nc.netcdf_file(pftfile, 'a') # could probably update this to use xarray
                var = f.variables[name]
                var[:] = psets[name][i] 
                # TODO: pft variations
                f.close()

../paramfiles/OAAT0000.nc
../paramfiles/OAAT0001.nc
../paramfiles/OAAT0002.nc
../paramfiles/OAAT0003.nc
../paramfiles/OAAT0004.nc
../paramfiles/OAAT0005.nc
../paramfiles/OAAT0006.nc
../paramfiles/OAAT0007.nc
../paramfiles/OAAT0008.nc
../paramfiles/OAAT0009.nc
../paramfiles/OAAT0010.nc
displar
dleaf
kmax


### Generate namelist files

In [33]:
#create the namelist mod files
for i in range(nsamp+1):
    nlfile = "../namelist_mods/"+prefix+str(i).zfill(4)+".txt" 
    with open(nlfile,"w") as file:
        output = "! user_nl_clm namelist options written by generate_params:\n"
        file.write(output)
#populate with mods
for name,loc in zip(params['name'],params['location']):
    if loc=='N':
        for i in range(nsamp):
            if ~np.isnan(psets[name][i]):
                nlfile = "../namelist_mods/"+prefix+str(i+1).zfill(4)+".txt"  
                with open(nlfile,"a") as file: # key is using "a" for append option
                    output = "%s=%s\n" % (name, psets[name][i]) #round??
                    file.write(output) 