### Script to generate FATES parameters using LHS

In [1]:
from scipy.stats import qmc
import numpy as np
import csv
import pandas as pd
import os
import netCDF4 as nc4
import sys
import shutil
from tempfile import TemporaryFile                                                                                                                                 
import argparse                                                                                                                                                                                                                                                                                                       
import tempfile 
import random
import re
import modp as mp
from matplotlib import pyplot as plt
from scipy.io import netcdf as nc
import importlib

<module 'modp' from '/home/adam/cloud/gdrive/postdoc/california-fates/ensemble_tools/modp.py'>

In [26]:
pd.set_option('display.max_rows', 500) 

### Read in min and max values for each parameter and pft 
- Adapted from work by Rachel Ward (parameter ranges and inequalities). 
- Where there are inequalities between PFTx and PFTy, sample PFTx parameter and then use a scalar to generate PFTy parameter. -- If parameter ranges are the same ignore the inequality
- Add some noise to the inequalities so that inter-PFT trait correlations are not too strict

### Pre-process param ranges file

In [110]:
path_to_ca_fates = '/home/adam/cloud/gdrive/postdoc/california-fates'
path_to_base_param_files_root = '/home/adam/cloud/gdrive/postdoc/parameters/'
path_to_ensemble_param_files_root = '/home/adam/cloud/gdrive/postdoc/parameters/ensemble_params_test'
pft_names = ['pine','cedar','fir','shrub','oak']

#path_to_ensemble_param_files_root = '/glade/u/home/adamhb/ahb_params/fates_api_25/ensembles'

param_ranges_full = pd.read_csv(os.path.join(path_to_ca_fates,'parameter_ranges/param_ranges_091923.csv'))
param_ranges_full = param_ranges_full[['pft_ineq','param', 'value_min', 'value_max', 'pft', 'organ']]

# Filter to params that we are going to sample from the LHS
param_ranges = param_ranges_full.loc[param_ranges_full['pft_ineq'] == 'FALSE']

# Make sure min and max are floats
convert_dict = {'value_min': float,
                'value_max': float
                }
param_ranges = param_ranges.astype(convert_dict)

# number of parameters
n_params = len(param_ranges)
print("Number of params:",n_params)

# number of PFTs - some are global so subtract one
n_pfts = max(len(pd.unique(param_ranges['pft'])) - 1, 1)
print("Number of pfts:", n_pfts)

param_names = list(param_ranges.param)
print("Param names:",param_names)
pfts = list(param_ranges.pft)
organs = list(param_ranges.organ)

param_ranges.info()

Number of params: 61
Number of pfts: 5
Param names: ['fates_recruit_seed_alloc_mature', 'fates_recruit_seed_alloc_mature', 'fates_recruit_seed_alloc_mature', 'fates_fire_alpha_SH', 'fates_fire_alpha_SH', 'fates_fire_alpha_SH', 'fates_fire_bark_scaler', 'fates_fire_bark_scaler', 'fates_fire_bark_scaler', 'fates_fire_crown_kill', 'fates_fire_crown_kill', 'fates_fire_crown_kill', 'fates_fire_drying_ratio', 'fates_fire_nignitions', 'fates_fire_threshold', 'fates_mort_bmort', 'fates_mort_bmort', 'fates_mort_bmort', 'fates_leaf_slatop', 'fates_leaf_slatop', 'fates_leaf_slatop', 'fates_leaf_vcmax25top', 'fates_leaf_vcmax25top', 'fates_leaf_vcmax25top', 'fates_mort_scalar_cstarvation', 'fates_mort_scalar_cstarvation', 'fates_mort_scalar_cstarvation', 'fates_fire_frac_resprout', 'fates_fire_frac_resprout', 'fates_frag_seed_decay_rate', 'fates_frag_seed_decay_rate', 'fates_frag_seed_decay_rate', 'fates_recruit_seed_germination_rate', 'fates_recruit_seed_germination_rate', 'fates_recruit_seed_ger

In [89]:
param_ranges_inequalities = param_ranges_full.loc[param_ranges_full['pft_ineq'] != 'FALSE']

True

### Sample the parameter space using a Latin Hypercube approach

In [91]:
n_inst = 5

sampler = qmc.LatinHypercube(d=n_params, seed=31)
sample = sampler.random(n=n_inst)

# scale to parameter ranges
l_bounds = param_ranges['value_min']
u_bounds = param_ranges['value_max']

scaled_sample = qmc.scale(sample, l_bounds, u_bounds)

print("ensemble dimensions:",scaled_sample.shape)

#Create a dataframe of the LHS ensemble
col_names = [v + "_" + str(p) for v,p in zip(param_names,pfts)]
lhs_df = pd.DataFrame(data=scaled_sample,columns=col_names)

ensemble dimensions: (5, 61)


### Visualize distribution of parameter ranges sampled by the LHS

In [33]:
# if lhs_df.shape[1] < 50:
#    lhs_df.hist(bins=10, figsize=(20,15))
#    plt.show()

### Some parameters are held equal for all conifers

There are some parameters that we keep equal among the conifers. We want the value pulled for pine from the Latin hypercube to apply to all conifers.

In [None]:
#equal_among_conifers = ["fates_recruit_seed_alloc_mature"]

In [44]:
scaled_sample.shape

(36, 61)

### Construct parameter files from the LHS samples

In [100]:
importlib.reload(mp)

<module 'modp' from '/home/adam/cloud/gdrive/postdoc/california-fates/ensemble_tools/modp.py'>

In [125]:
base_param_file_name = 'ca_ahb_5pfts_090123.nc'
param_files_subdir = 'testing_091923'

In [101]:
## Read in FATES file with values that will be used for all non-varying parameters

# This parameter file has many changes associated with it compared to the default FATES file
# It also has new parameter added as part of the development required for this experiment.
input_fname = os.path.join(path_to_base_param_files_root,base_param_file_name)

print(input_fname)

# for each sample 
for i in range(0,n_inst) :
    
    param_file_end = str(i+1).rjust(4, '0')
    
    print(param_file_end)
    
    # final parameter file name
    new_file_name = 'ca_5pfts_091923_{0}.nc'.format(param_file_end)
    fout = os.path.join(path_to_ensemble_param_files_root,param_files_subdir,new_file_name)
    
    shutil.copyfile(input_fname, fout)                                                                                                                             
   
    # loop through each parameter and apply either to the correct pft or globally
    for j in range(0, n_params) : 
        
        var = param_names[j]
        pft = pfts[j]
        organ = organs[j]
        
        val = scaled_sample[i, j]
    
        mp.main(var = var, pft = pft, fin = fout, val = val, 
                    fout = fout, O = 1, organ = organ)

/home/adam/cloud/gdrive/postdoc/parameters/ca_ahb_5pfts_090123.nc
0001
0002
0003
0004
0005


### Add values that require a pft-inequality

In [138]:
def get_param_value_from_ineq(ref_pft_value,min_val,max_val,pft_ineq):
    '''returns the parameter value for the pft that depends on a reference pft value'''
    if ">" in pft_ineq:
        min_val = ref_pft_value
    if "<" in pft_ineq:
        max_val = ref_pft_value
    
    return np.random.uniform(min_val,max_val)

In [135]:
np.random.uniform(3.1,4.5)

3.507320078423806

In [113]:
pft_names.index("cedar")

1

In [131]:
def get_ref_pft_index(ineq):  
    for p in pft_names:
        if p in ineq:
            return pft_names.index(p)

1

In [123]:
def find_files_with_substring(directory, substring):
    """
    Returns a list of filenames in the given directory that contain the given substring.
    
    :param directory: The path to the directory to search in.
    :param substring: The substring to search for in filenames.
    :return: A list of filenames containing the substring.
    """

    # List all files in the directory
    all_files = os.listdir(directory)

    # Filter the ones that contain the substring
    matching_files = [f for f in all_files if substring in f]

    return matching_files

In [130]:
param_ranges_inequalities

Unnamed: 0,pft_ineq,param,value_min,value_max,pft,organ
1,<pine,fates_recruit_seed_alloc_mature,0.04,0.39286,2,
2,<pine,fates_recruit_seed_alloc_mature,0.04,0.39286,3,
6,>pine,fates_fire_alpha_SH,0.1,0.9,2,
7,>pine,fates_fire_alpha_SH,0.1,0.9,3,
11,<pine,fates_fire_bark_scaler,0.02,0.08,2,
12,<cedar,fates_fire_bark_scaler,0.02,0.08,3,
16,>pine,fates_fire_crown_kill,0.2,1.0,2,
17,>pine,fates_fire_crown_kill,0.2,1.0,3,
24,pine,fates_mort_bmort,0.002,0.008,2,
25,pine,fates_mort_bmort,0.002,0.008,3,


In [121]:
for inst in range(0,n_inst) :
    
    param_file_end = str(i+1).rjust(4, '0')
    
    #get param file with inst tag
    ref_nc_file = find_files_with_substring(os.path.join(path_to_ensemble_param_files_root,param_files_subdir), param_file_end)
    ref_nc_file_full_path = os.path.join(path_to_ensemble_param_files_root,param_files_subdir,ref_nc_file)
    print(ref_nc_file_full_path)

    for i in param_ranges_inequalities.index[:15]:

        # reference file
        

        # input data from the inequalities df
        d = param_ranges_inequalities.loc[i]

        if "fates" in d['pft_ineq']:
            output_val = extract_variable_from_netcdf(ref_nc_file_full_path, d['pft_ineq'],None)
        
        elif:
            #fixed
        
        else:
            ref_pft_index = get_ref_pft_index(d['pft_ineq'])
            print(ref_pft_index)
            ref_pft_value = extract_variable_from_netcdf(ref_nc_file_full_path, d['param'],ref_pft_index)
            
    
    

1
1
1
1
1
2
1
1
1
1
1
2
None
1
2


### Check the variable values in the parameter files

In [133]:
def extract_variable_from_netcdf(file_path, variable_name,pft_index):
    """
    Extract a variable from a NetCDF file.

    Parameters:
    - file_path: The path to the NetCDF file.
    - variable_name: The name of the variable to extract.

    Returns:
    - The extracted variable data.
    """
    with nc4.Dataset(file_path, 'r') as dataset:
        # Check if the variable exists in the dataset
        if variable_name in dataset.variables:
            variable_data = dataset.variables[variable_name][:]
            if pft_index == None:
                return variable_data
            else:
                return variable_data[pft_index]
        else:
            raise ValueError(f"'{variable_name}' not found in the NetCDF file.")

# Usage
# file_path = '/glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/conifer_allom_083023/conifer_allom_083023_0006.nc'
# variable_name = 'fates_leaf_vcmax25top'
# #variable_name = 'fates_allom_d2ca_coefficient_max'
# data = extract_variable_from_netcdf(file_path, variable_name)
# #dim: (organ,pft)
# print(data[0,:])
# print(len(data.shape))