In [1]:
import pandas as pd
import os

### Download latest version of params file from google drive

In [2]:
os.system('./update_csv.sh')

0

### Read in csv data, filtering by the "include" column

In [3]:
data     = pd.read_csv('params.csv')
included = data['include']==1
params   = data.loc[included,['name','min','max','location']]
params

Unnamed: 0,name,min,max,location
1,displar,0.4,0.95,P
2,dleaf,0.0081,0.243,P
11,baseflow_scalar,0.0005,0.1,N
12,maximum_leaf_wetted_fraction,0.01,0.5,N


### Demonstrating how to iterate through the params

In [4]:
for name,minval,maxval,loc in zip(params['name'],params['min'],params['max'],params['location']):
    print(name,minval,maxval,loc)

displar 0.4 0.95 P
dleaf 0.0081 0.243 P
baseflow_scalar 0.0005 0.1 N
maximum_leaf_wetted_fraction 0.01 0.5 N


### Generate Latin Hypercube sampling

In [6]:
from pyDOE import *

Set sample size (number of ensemble members)

In [15]:
nsamp = 10

Number of parameters

In [23]:
nparam = len(params['name'])

Generate the latin hypercube

In [16]:
# Only run this once! otherwise it changes the values so we might also want to save out this sampling
lhd = lhs(nparam, samples=int(nsamp))

In [54]:
# this is a numpy array
# rows are ensemble members
# columns are parameters
lhd

array([[0.73557401, 0.59224427, 0.6475084 , 0.11221791],
       [0.69463579, 0.1767889 , 0.09913828, 0.31459653],
       [0.93392253, 0.94205474, 0.98224486, 0.60563941],
       [0.51014985, 0.77882368, 0.48264039, 0.8957714 ],
       [0.32698981, 0.84235968, 0.29597634, 0.71963151],
       [0.40896752, 0.01395879, 0.17372989, 0.58211021],
       [0.26123003, 0.65358836, 0.50909018, 0.44180956],
       [0.8804445 , 0.43047588, 0.80768877, 0.28257231],
       [0.07585396, 0.26131156, 0.79833674, 0.06085381],
       [0.18193349, 0.33568572, 0.39242211, 0.99354429]])

Generate the parameter sets

In [56]:
# to get numpy arrays out of pandas dataframe and do the arithmetic, need to use .values
# there could be a better way to do this...I'm relatively new to pandas
params['min'].values

array([0.4   , 0.0081, 0.0005, 0.01  ])

In [52]:
param_sets = ((params['max'] - params['min']).values)*lhd + params['min'].values

In [53]:
param_sets

array([[0.8045657 , 0.14721818, 0.06492709, 0.06498677],
       [0.78204969, 0.04962771, 0.01036426, 0.1641523 ],
       [0.91365739, 0.22938866, 0.09823336, 0.30676331],
       [0.68058242, 0.19104568, 0.04852272, 0.44892799],
       [0.57984439, 0.20597029, 0.02994965, 0.36261944],
       [0.62493214, 0.01137892, 0.01778612, 0.295234  ],
       [0.54367652, 0.16162791, 0.05115447, 0.22648668],
       [0.88424447, 0.10921878, 0.08086503, 0.14846043],
       [0.44171968, 0.06948208, 0.07993451, 0.03981837],
       [0.50006342, 0.08695258, 0.039546  , 0.4968367 ]])

Convert parameter sets back to pandas 

In [62]:
psets = pd.DataFrame(data=param_sets, index=None, columns=params['name'])

In [116]:
psets

name,displar,dleaf,baseflow_scalar,maximum_leaf_wetted_fraction
0,0.804566,0.147218,0.064927,0.064987
1,0.78205,0.049628,0.010364,0.164152
2,0.913657,0.229389,0.098233,0.306763
3,0.680582,0.191046,0.048523,0.448928
4,0.579844,0.20597,0.02995,0.362619
5,0.624932,0.011379,0.017786,0.295234
6,0.543677,0.161628,0.051154,0.226487
7,0.884244,0.109219,0.080865,0.14846
8,0.44172,0.069482,0.079935,0.039818
9,0.500063,0.086953,0.039546,0.496837


In [102]:
# Example of how to index the dataframe by parameter / ensemble member
psets['baseflow_scalar'][0]

0.06492708598089841

### Generate parameter files

In [66]:
from scipy.io import netcdf as nc

In [79]:
basepftfile = "../basecase/clm5_params.c171117.nc"

In [89]:
print(basepftfile)

../basecase/clm5_params.c171117.nc


Copy the basepftfile `nsamp` times with unique filesname

In [94]:
for i in range(nsamp):
    print(i+1)
    targetpftfile = "../paramfiles/e00"+str(i+1)+".nc"
    print(targetpftfile)
    os.system('cp '+basepftfile+' '+targetpftfile)

1
../paramfiles/e001.nc
2
../paramfiles/e002.nc
3
../paramfiles/e003.nc
4
../paramfiles/e004.nc
5
../paramfiles/e005.nc
6
../paramfiles/e006.nc
7
../paramfiles/e007.nc
8
../paramfiles/e008.nc
9
../paramfiles/e009.nc
10
../paramfiles/e0010.nc


### Modify the params files with the appropriate values

In [100]:
for name,loc in zip(params['name'],params['location']):
    if loc=='P':
        print(name)
        for i in range(nsamp):
            print(i+1)
            pftfile = "../paramfiles/e00"+str(i+1)+".nc"
            f = nc.netcdf_file(pftfile, 'a') # could probably update this to use xarray
            var = f.variables[name]
            var[:] = psets[name][i] # NOTE: we will have issues with this type of indexing when we get to things like kmax (indexed by pft and segment)
            # Also, we might want to round these values taken from psets, otherwise it puts a lot of decimal places in the params file
            # Do we want to have any variation among PFTs? For dleaf it doesn't really make sense to use the same min/max for all pfts
            f.close()

displar
1
2
3
4
5
6
7
8
9
10
dleaf
1
2
3
4
5
6
7
8
9
10


### Generate namelist files

First create files and specify the parameter file

In [117]:
for i in range(nsamp):
    print(i+1)
    nlfile = "../namelist_mods/e00"+str(i+1)+".txt" 
    with open(nlfile,"w") as file:
        pftfile = "e00"+str(i+1)+".nc" # placeholder for now; this need to be an exact path for the namelist to know where to pull the params file
        output = "paramfile='%s'\n" % (pftfile)
        print(output)
        file.write(output)

1
paramfile='e001.nc'

2
paramfile='e002.nc'

3
paramfile='e003.nc'

4
paramfile='e004.nc'

5
paramfile='e005.nc'

6
paramfile='e006.nc'

7
paramfile='e007.nc'

8
paramfile='e008.nc'

9
paramfile='e009.nc'

10
paramfile='e0010.nc'



Then populate them with parameter values\
I couldn't do this in the same loop as creating them because it was overriding content when iterating over parameters, but there may be a better way

In [118]:
for name,loc in zip(params['name'],params['location']):
    if loc=='N':
        print(name)
        for i in range(nsamp):
            print(i+1)
            nlfile = "../namelist_mods/e00"+str(i+1)+".txt"   
            with open(nlfile,"a") as file: # key is using "a" for append option
                output = "%s=%s\n" % (name, psets[name][i]) # Again, we can round these values if we think that's important
                print(output)
                file.write(output) 

baseflow_scalar
1
baseflow_scalar=0.06492708598089841

2
baseflow_scalar=0.010364258751763182

3
baseflow_scalar=0.098233363381138

4
baseflow_scalar=0.04852271929780149

5
baseflow_scalar=0.02994964534013261

6
baseflow_scalar=0.01778612448033447

7
baseflow_scalar=0.051154473279627154

8
baseflow_scalar=0.08086503289277693

9
baseflow_scalar=0.07993450573965355

10
baseflow_scalar=0.03954599970630231

maximum_leaf_wetted_fraction
1
maximum_leaf_wetted_fraction=0.06498677492688287

2
maximum_leaf_wetted_fraction=0.1641523018303713

3
maximum_leaf_wetted_fraction=0.3067633129646689

4
maximum_leaf_wetted_fraction=0.44892798607756335

5
maximum_leaf_wetted_fraction=0.362619440378379

6
maximum_leaf_wetted_fraction=0.29523400055145715

7
maximum_leaf_wetted_fraction=0.22648668378423226

8
maximum_leaf_wetted_fraction=0.14846043324743308

9
maximum_leaf_wetted_fraction=0.039818367753108264

10
maximum_leaf_wetted_fraction=0.4968366997098795

