# Synthesize land model output from perturbed parameter ensemble

This script evaluates model output from a set of ensemble members in a perturbed parameter experiment. It identifies the best-performing ensemble members

## Import modules

In [2]:
import sys
#Path to the esm_tools.py script
sys.path.append('/glade/u/home/adamhb/Earth-System-Model-Tools/process_output')
import os
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import functools
import netCDF4 as nc4
import importlib
import esm_tools
import esm_viz
importlib.reload(esm_tools)
importlib.reload(esm_viz)
import math
pd.set_option('display.max_rows', 500)

## User-defined parameters

In [3]:
apply_to_one_case = False

case_name = 'ca_5pfts_20cases_4320inst_101223_02_-17e2acb6a_FATES-031f28ff'

# For multicase
case_name_prefix = 'CZ2_equilibrium_101723_'
case_name_suffix = '_-17e2acb6a_FATES-8a054a12'
case_numbers = [1,2,3]
case_tags = [str(case_num).rjust(2, '0') for case_num in case_numbers]
print(case_tags)

case_tags_short_list = ['01', '01', '01', '03', '02', '01', '03', '01', '01', '01', '02', '03', '03', '01', '02']
inst_short_list = [21, 22, 19, 8, 7, 3, 12, 23, 14, 11, 31, 7, 23, 24, 15]
inst_short_list = [str(inst).rjust(4, '0') for inst in inst_short_list]


inst_check = pd.DataFrame(inst_short_list)
inst_check['case_tags'] = case_tags_short_list
inst_check.columns = ['inst','case_tag']

pd.DataFrame(inst_short_list)

# Subdirectory where the parameter file for each ensemble member is stored
param_sub_dir="ca_5pfts_20cases_4320inst_101223_02"
param_sub_dir_prefix = "CZ2_equilibrium_101723_"


# File name of the file that stores the parameter ranges for the ensemble
param_range_file_name = 'param_ranges_100223.csv'

# How many years of data to average over for the structure variables
last_n_years=1

# Just testing script?
test=False

# For tree stem density
dbh_min = 10

# Calculate variables that require loading much more data (e.g. fire regime?)
decadal_scale_metrics=True

# How many years of data to average over for the structure variables
decadal_n_years=30

visualize = False

# Optional
case_path = None
manual_case_path = None

['01', '02', '03']


In [4]:
inst_check

Unnamed: 0,inst,case_tag
0,21,1
1,22,1
2,19,1
3,8,3
4,7,2
5,3,1
6,12,3
7,23,1
8,14,1
9,11,1


## Define paths and script parameters

In [5]:
pft_names = ["pine","cedar","fir","shrub","oak"]

# Benchmarking metrics
my_metrics = ["BA","AGB","TreeStemD","ResproutD_oak","ResproutD_shrub","ShannonE","NPP","FailedPFTs",
              "Pct_shrub_cover_canopy","Pct_shrub_cover",
              "Combustible_fuel"]

if decadal_scale_metrics == True:
    my_metrics.extend(["Burned_area","Pct_high_severity_1700","Pct_high_severity_3500"])

# Path where case output lives
case_output_root = '/glade/scratch/adamhb/archive'

# Path to ensemble params
params_root = '/glade/u/home/adamhb/ahb_params/fates_api_25/ensembles'

# Path to put any processed output
processed_output_root = '/glade/scratch/adamhb/processed_output'

# Path to param range files
param_range_root = '/glade/u/home/adamhb/california-fates/parameter_ranges/param_range_archive'

output_path_for_case = os.path.join(processed_output_root,case_name)
esm_tools.create_directory(output_path_for_case)

print("Calculating the following variables:",my_metrics)

Directory '/glade/scratch/adamhb/processed_output/ca_5pfts_20cases_4320inst_101223_02_-17e2acb6a_FATES-031f28ff' already exists!
Calculating the following variables: ['BA', 'AGB', 'TreeStemD', 'ResproutD_oak', 'ResproutD_shrub', 'ShannonE', 'NPP', 'FailedPFTs', 'Pct_shrub_cover_canopy', 'Pct_shrub_cover', 'Combustible_fuel', 'Burned_area', 'Pct_high_severity_1700', 'Pct_high_severity_3500']


## Variables to import

In [6]:
# Keep first two no matter what. They are needed to unravel multi-plexed dimensions
fields = ['FATES_SEED_PROD_USTORY_SZ','FATES_VEGC_AP','FATES_BURNFRAC',
          'FATES_NPLANT_PF','FATES_NPLANT_SZPF','FATES_NPLANT_RESPROUT_PF','FATES_FIRE_INTENSITY_BURNFRAC','FATES_IGNITIONS',
          'FATES_MORTALITY_FIRE_SZPF','FATES_BASALAREA_SZPF','FATES_CANOPYCROWNAREA_APPF','FATES_CANOPYCROWNAREA_PF','FATES_CROWNAREA_PF',
          'FATES_CROWNAREA_APPF','FATES_FUEL_AMOUNT_APFC','FATES_NPLANT_SZPF','FATES_FUEL_AMOUNT_APFC',
          'FATES_PATCHAREA_AP','FATES_CROWNAREA_PF','FATES_VEGC_ABOVEGROUND','FATES_NPP_PF']

## Benchmarking functions

In [9]:
inst_check

Unnamed: 0,inst,case_tag
0,21,1
1,22,1
2,19,1
3,8,3
4,7,2
5,3,1
6,12,3
7,23,1
8,14,1
9,11,1


In [16]:
q = inst_check.loc[inst_check['case_tag'] == "01"]['inst']
if "0021" in list(q)
#skip_inst(inst_check,"01","0021")

True

In [17]:
def setup_benchmarking_data_structure(metrics,parameters,pft_names):
    
    metrics_out = metrics.copy()
    
    # add pft-specific vars
    pft_specific_ba_metrics = ["BA_" + pft for pft in pft_names]  
    metrics_out.extend(pft_specific_ba_metrics)
    
    # add inst tag
    metrics_out.append("inst")    
    metrics_out.extend(parameters)
    
    benchmarking_dict = {}
    for i in metrics_out:
        benchmarking_dict[i] = []
    return benchmarking_dict

def skip_inst(inst_check,case_tag,inst):
    qualifying_inst_tags = inst_check.loc[inst_check['case_tag'] == case_tag]['inst']
    if inst in list(qualifying_inst_tags):
        return False
    else:
        return True


def get_benchmarks(case_name,metrics,last_n_years,param_sub_dir,param_range_file_name,
                   test = False, pft_names = np.array(["pine","cedar","fir","shrub","oak"]),
                   pft_colors = ['gold','darkorange','darkolivegreen','brown','springgreen'],
                   param_range_root = param_range_root,
                   params_root = params_root,
                   manual_case_path = None, decadal_scale_metrics = False, decadal_n_years = 50, inst_check = inst_check):
    
    print("casetag",case_tag)
    print("Case:",case_name)
    
    
    # 1. Get info about the case
    if manual_case_path != None:
        full_case_path = manual_case_path
    
    else:
        full_case_path = esm_tools.get_path_to_sim(case_name,case_output_root)
    
    inst_tags = esm_tools.get_unique_inst_tags(full_case_path)
    
    print(inst_tags)
    if test == True:
        inst_tags = inst_tags[:3]
    
    n_inst = len(inst_tags)
    print("ninst:",n_inst)
    
    # 2. Set up the benchmarking data structure
    perturbed_params_df = pd.read_csv(os.path.join(param_range_root,param_range_file_name))
    perturbed_params = []
    for i in range(len(perturbed_params_df)):
        perturbed_params.append(perturbed_params_df['param'][i] + "_" + str(perturbed_params_df['pft'][i]))
    
    bench_dict = setup_benchmarking_data_structure(metrics,perturbed_params,pft_names)  
                                 
    # 3. Add param values to the data structure
    for inst in inst_tags:
        print("casetag",case_tag)
        print("inst",inst)
        skip = skip_inst(inst_check,case_tag,inst)
        print("skip",skip)
        
        if skip == True:
            continue
        
        param_file_path = esm_tools.get_parameter_file_of_inst(params_root,param_sub_dir,inst)
        #print(param_file_path)
        for i in perturbed_params_df.index:
            
            d = perturbed_params_df.loc[i]
            param = d['param']
            pft_index = max(0,int(d['pft'] - 1))
            organ = d['organ']
            
            if (param == "fates_frag_maxdecomp") & (organ > 1):
                continue
                
            if math.isnan(organ):
                organ_index = None
            else:
                organ_index = int(organ - 1)
           
            bench_dict[perturbed_params[i]].append(esm_tools.extract_variable_from_netcdf_specify_organ(
                                                           param_file_path,param,pft_index,organ_index))
    
    # 4. Add the model output to the data structure
    for inst in inst_tags:
        
        
        
        skip = skip_inst(inst_check,case_tag,inst)
        
       
        
        if skip == True:
            continue
        
        print("Working on ensemble memeber",inst,"of",len(inst_tags),"members")
        
        # Import the model output data for one ensemble member
        inst_files_last_n_years = esm_tools.get_files_of_inst(full_case_path,
                                                 inst,
                                                 last_n_years)
        
        ds = esm_tools.multiple_netcdf_to_xarray(inst_files_last_n_years,fields)
        
        
        bench_dict['inst'].append(inst)
        
        ## Basal area [m2 ha-1] ##
        if "BA" in bench_dict.keys():
            
            ## Pft-specific BA
            pft_level_ba = esm_tools.get_pft_level_basal_area(ds)
            
            for i in range(len(pft_names)):
                pft_name = pft_names[i]
                bench_dict['BA_' + pft_name].append(pft_level_ba[i])
            
            ## Shannon equitability index (wrt BA) ##
            bench_dict['ShannonE'].append(esm_tools.shannon_equitability(pft_level_ba))
            
            ## Number of failed pfts ##
            bench_dict['FailedPFTs'].append(esm_tools.get_n_failed_pfts(pft_level_ba,ba_thresh=0.1))
            
            ## Total BA
            bench_dict['BA'].append(pft_level_ba.sum())
                  
        ## Stem density [N ha-1] ##
        if "TreeStemD" in bench_dict.keys():
            
            ## Total tree stem density
            bench_dict["TreeStemD"].append(esm_tools.get_total_stem_den(ds,trees_only=True,dbh_min=dbh_min))
        
        if "ResproutD_oak" in bench_dict.keys():
            bench_dict["ResproutD_oak"].append(esm_tools.get_resprout_stem_den(ds,4))
            
        if "ResproutD_shrub" in bench_dict.keys():
            bench_dict["ResproutD_shrub"].append(esm_tools.get_resprout_stem_den(ds,3))
        
        ## AGB [kg C m-2]
        if "AGB" in bench_dict.keys():
            bench_dict["AGB"].append(esm_tools.get_AGB(ds))
        
        ## Total NPP [kg C m-2]
        if "NPP" in bench_dict.keys():
            bench_dict["NPP"].append(esm_tools.get_total_npp(ds))
        
        ## Shrub canopy layer cover [m2 m-2]
        if "Pct_shrub_cover_canopy" in bench_dict.keys():
            bench_dict["Pct_shrub_cover_canopy"].append(esm_tools.get_pft_level_crown_area(ds,pft_index = 3))
            
        if "Pct_shrub_cover" in bench_dict.keys():    
            bench_dict["Pct_shrub_cover"].append(esm_tools.get_pft_level_crown_area(ds,pft_index = 3,canopy_area_only = False))
        
        ## Fuel Load
        if "Combustible_fuel" in bench_dict.keys():
            bench_dict["Combustible_fuel"].append(esm_tools.get_combustible_fuel(ds))
        
        if decadal_scale_metrics == True:
            inst_files_decadal = esm_tools.get_files_of_inst(full_case_path,
                                                 inst,
                                                 decadal_n_years)
            ds_decadal = esm_tools.multiple_netcdf_to_xarray(inst_files_decadal,fields)
            
            if "Burned_area" in bench_dict.keys():
                bench_dict["Burned_area"].append(esm_tools.get_mean_annual_burn_frac(ds_decadal))
                
            if "Pct_high_severity_1700" in bench_dict.keys():
                bench_dict["Pct_high_severity_1700"].append(esm_tools.get_PHS_FLI_thresh(ds_decadal,1700))
            
            if "Pct_high_severity_3500" in bench_dict.keys():
                bench_dict["Pct_high_severity_3500"].append(esm_tools.get_PHS_FLI_thresh(ds_decadal,3500))
            
        
    return bench_dict

## Apply to case

In [14]:
if apply_to_one_case == True:
    output_dict = get_benchmarks(case_name=case_name,
                                  metrics = my_metrics,
                                  last_n_years=last_n_years,
                                  param_sub_dir=param_sub_dir,
                                  param_range_file_name = param_range_file_name,
                                  pft_names = pft_names,
                                  test=test,
                                  decadal_n_years=decadal_n_years,
                                  decadal_scale_metrics=decadal_scale_metrics)#,
                                  #manual_case_path = manual_case_path)
    df = pd.DataFrame(output_dict)
    if decadal_scale_metrics == True:
        file_name = case_name + "_decadal_metrics"
    else:
        file_name = case_name + "final_years_" + str(last_n_years)
    esm_tools.store_output_csv(case_name,file_name,df,processed_output_root = processed_output_root)
    
    cols = list(df.columns)[:17]
    df.sort_values("FailedPFTs",ascending=True)[cols]

### Apply to multiple cases

In [18]:
if apply_to_one_case == False:
    for case_tag in case_tags: 
        
        case_name = case_name_prefix + case_tag + case_name_suffix
        
        # Subdirectory where the parameter file for each ensemble member is stored
        param_sub_dir=param_sub_dir_prefix + case_tag
        output_dict = get_benchmarks(case_name=case_name,
                                  metrics = my_metrics,
                                  last_n_years=last_n_years,
                                  param_sub_dir=param_sub_dir,
                                  param_range_file_name = param_range_file_name,
                                  pft_names = pft_names,
                                  test=test,
                                  decadal_n_years=decadal_n_years,
                                  decadal_scale_metrics=decadal_scale_metrics)#,
                                  #manual_case_path = manual_case_path)
        
        if decadal_scale_metrics == True:
            file_name = case_name + "_decadal_metrics"
        else:
            file_name = case_name + "final_years_" + str(last_n_years)
        
        df = pd.DataFrame(output_dict)
        esm_tools.store_output_csv(case_name,file_name,df,processed_output_root = processed_output_root)

casetag 01
Case: CZ2_equilibrium_101723_01_-17e2acb6a_FATES-8a054a12
['0001' '0002' '0003' '0004' '0005' '0006' '0007' '0008' '0009' '0010'
 '0011' '0012' '0013' '0014' '0015' '0016' '0017' '0018' '0019' '0020'
 '0021' '0022' '0023' '0024' '0025' '0026' '0027' '0028' '0029' '0030'
 '0031' '0032' '0033' '0034' '0035' '0036']
ninst: 36
casetag 01
inst 0001
skip True
casetag 01
inst 0002
skip True
casetag 01
inst 0003
skip False
casetag 01
inst 0004
skip True
casetag 01
inst 0005
skip True
casetag 01
inst 0006
skip True
casetag 01
inst 0007
skip True
casetag 01
inst 0008
skip True
casetag 01
inst 0009
skip True
casetag 01
inst 0010
skip True
casetag 01
inst 0011
skip False
casetag 01
inst 0012
skip True
casetag 01
inst 0013
skip True
casetag 01
inst 0014
skip False
casetag 01
inst 0015
skip True
casetag 01
inst 0016
skip True
casetag 01
inst 0017
skip True
casetag 01
inst 0018
skip True
casetag 01
inst 0019
skip False
casetag 01
inst 0020
skip True
casetag 01
inst 0021
skip False
casetag 

  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Working on ensemble memeber 0011 of 36 members


  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Working on ensemble memeber 0014 of 36 members


  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Working on ensemble memeber 0019 of 36 members


  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Working on ensemble memeber 0021 of 36 members


  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Working on ensemble memeber 0022 of 36 members


  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Working on ensemble memeber 0023 of 36 members


  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Working on ensemble memeber 0024 of 36 members


  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Directory '/glade/scratch/adamhb/processed_output/CZ2_equilibrium_101723_01_-17e2acb6a_FATES-8a054a12' already exists!
casetag 02
Case: CZ2_equilibrium_101723_02_-17e2acb6a_FATES-8a054a12
['0001' '0002' '0003' '0004' '0005' '0006' '0007' '0008' '0009' '0010'
 '0011' '0012' '0013' '0014' '0015' '0016' '0017' '0018' '0019' '0020'
 '0021' '0022' '0023' '0024' '0025' '0026' '0027' '0028' '0029' '0030'
 '0031' '0032' '0033' '0034' '0035' '0036']
ninst: 36
casetag 02
inst 0001
skip True
casetag 02
inst 0002
skip True
casetag 02
inst 0003
skip True
casetag 02
inst 0004
skip True
casetag 02
inst 0005
skip True
casetag 02
inst 0006
skip True
casetag 02
inst 0007
skip False
casetag 02
inst 0008
skip True
casetag 02
inst 0009
skip True
casetag 02
inst 0010
skip True
casetag 02
inst 0011
skip True
casetag 02
inst 0012
skip True
casetag 02
inst 0013
skip True
casetag 02
inst 0014
skip True
casetag 02
inst 0015
skip False
casetag 02
inst 0016
skip True
casetag 02
inst 0017
skip True
casetag 02
inst 

  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Working on ensemble memeber 0015 of 36 members


  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Working on ensemble memeber 0031 of 36 members


  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Directory '/glade/scratch/adamhb/processed_output/CZ2_equilibrium_101723_02_-17e2acb6a_FATES-8a054a12' already exists!
casetag 03
Case: CZ2_equilibrium_101723_03_-17e2acb6a_FATES-8a054a12
['0001' '0002' '0003' '0004' '0005' '0006' '0007' '0008' '0009' '0010'
 '0011' '0012' '0013' '0014' '0015' '0016' '0017' '0018' '0019' '0020'
 '0021' '0022' '0023' '0024' '0025' '0026' '0027' '0028' '0029' '0030'
 '0031' '0032' '0033' '0034' '0035' '0036']
ninst: 36
casetag 03
inst 0001
skip True
casetag 03
inst 0002
skip True
casetag 03
inst 0003
skip True
casetag 03
inst 0004
skip True
casetag 03
inst 0005
skip True
casetag 03
inst 0006
skip True
casetag 03
inst 0007
skip False
casetag 03
inst 0008
skip False
casetag 03
inst 0009
skip True
casetag 03
inst 0010
skip True
casetag 03
inst 0011
skip True
casetag 03
inst 0012
skip False
casetag 03
inst 0013
skip True
casetag 03
inst 0014
skip True
casetag 03
inst 0015
skip True
casetag 03
inst 0016
skip True
casetag 03
inst 0017
skip True
casetag 03
inst

  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Working on ensemble memeber 0008 of 36 members


  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Working on ensemble memeber 0012 of 36 members


  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Working on ensemble memeber 0023 of 36 members


  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))
  return func(*(_execute_task(a, cache) for a in args))


Directory '/glade/scratch/adamhb/processed_output/CZ2_equilibrium_101723_03_-17e2acb6a_FATES-8a054a12' already exists!


## Visualize

In [None]:
if visualize == True:
    col_selector = ["fates" in i for i in df.columns]
    perturbed_params = df.columns[col_selector]


    for p in perturbed_params:
        esm_viz.plot_multi_panel(df = df, x_col = p,
                                 y_cols = my_metrics, figsize=(12, 16),
                                 save_fig=True,
                                 output_path_for_case=output_path_for_case)