# Evaluate ensemble members

The goal of this script is to identify ensemble members that meet specific ecological criteria

In [97]:
import pandas as pd
import numpy as np
import os
import esm_tools
pd.set_option('display.max_rows', 500) 
import shutil

In [98]:
write_csv_of_passing_members = True
setup_new_simulation = True
check_CZ2_100_yrs = True
check_CZ2_PEAS = False

if write_csv_of_passing_members == True:
    output_file_path = "/glade/scratch/adamhb/processed_output/CZ2_100_years_passing/passing_CZ2_100_yrs_ca_5pfts_ml_assisted_720_100123_XX_-17e2acb6a_FATES-8a054a12_20231103104816.csv"

# Path to ensemble data to be evaluated
path_to_ensemble_data = '/glade/scratch/adamhb/processed_output/ca_5pfts_ml_assisted_720_100123_XX_-17e2acb6a_FATES-8a054a12_20231103104816/ensemble_output_ca_5pfts_ml_assisted_720_100123_XX_-17e2acb6a_FATES-8a054a12_20231103104816_structure_metrics_last3_years.csv'

# Path to where ensemble parameter files are stored
path_to_ensemble_param_files_root = '/glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/'

key_metrics = ['inst','BA','ShannonE','AGB','BA_pine','BA_cedar','BA_fir','BA_shrub','BA_oak',
'FailedPFTs','Pct_shrub_cover_canopy','Pct_shrub_cover','TreeStemD','ResproutD_oak','ResproutD_shrub','NPP','Combustible_fuel','inst_id','param_file_path']

## Functions

In [99]:
def delete_all_files_in_directory(directory_path):
    try:
        # List all files in the directory
        file_list = os.listdir(directory_path)

        # Iterate over the files and delete them
        for file_name in file_list:
            file_path = os.path.join(directory_path, file_name)
            if os.path.isfile(file_path):
                os.remove(file_path)
        
        print(f"All files in {directory_path} have been deleted.")
    except Exception as e:
        print(f"An error occurred: {e}")


def get_most_recent_file_in_dir(folder_path):

    # Get a list of all files in the folder
    files = os.listdir(folder_path)

    # Filter out only files (excluding subdirectories)
    files = [file for file in files if os.path.isfile(os.path.join(folder_path, file))]

    # Sort the files by creation time (most recent first)
    files.sort(key=lambda x: os.path.getctime(os.path.join(folder_path, x)), reverse=True)

    # Check if there are any files in the folder
    if files:
        most_recent_file = files[0]
        print(f"The most recently created file is: {most_recent_file}")
        return most_recent_file
    else:
        print("The folder is empty.")
        
        
def aggregate_passing_paramsets(successfull_param_files,n_new_cases,n_inst_per_case,new_subdirs_prefix,
                                path_to_ensemble_param_files_root,new_param_file_base_name):
    
    '''
    This function relabels and prepares parameter sets that passed criteria for a new group of cases to be run
    '''
    
    os.makedirs("tmp",exist_ok=True)
    delete_all_files_in_directory("tmp")
    
    # Make new subdirectories for the parameter files for the new cases
    for n in range(n_new_cases):
        new_param_subdir_suffix = str(n+1).rjust(2, '0')
        new_param_subdir = os.path.join(path_to_ensemble_param_files_root,new_subdirs_prefix) + "_" + new_param_subdir_suffix
        os.makedirs(new_param_subdir,exist_ok=True)
    
    
    # Put all successful param files in a temp direcotry
    tmp_inst = 0
    for reference_param_file in successfull_param_files:
        
        # Number them 0001 ... n
        tmp_inst = tmp_inst + 1
        
        # Get full path of originl param file
        ref_nc_file_full_path = reference_param_file
        print("originl_file:",ref_nc_file_full_path)

        #Copy to reference param file to new folder
        
        new_tag = str(tmp_inst).rjust(4, '0')
        new_name = new_param_file_base_name + "_" + new_tag + ".nc"
        dst_file = os.path.join("tmp",new_name)
        print("tmp file:",dst_file)
        shutil.copy(ref_nc_file_full_path,dst_file)
        
        # Generate new case tags for a new group of cases
        new_case_tags = [str(case_tag+1).rjust(2, '0') for case_tag in range(n_new_cases)]
         
    for i,new_case_tag in enumerate(new_case_tags):
            
            print("\n")
            
            # Make new subdir for the new cases
            new_param_subdir = os.path.join(path_to_ensemble_param_files_root,new_subdirs_prefix) + "_" + new_case_tag
            
            tmp_inst = i * n_inst_per_case
            
            for j in range(n_inst_per_case):
                
                tmp_inst = tmp_inst + 1
                tmp_inst_tag = str(tmp_inst).rjust(4, '0')
                
                 # Get temp param file with inst tag
                ref_nc_file = esm_tools.find_files_with_substring(directory="tmp",
                                                substring=tmp_inst_tag)

                # Get full path of originl param file
                ref_nc_file_full_path = os.path.join("tmp",ref_nc_file[0])
                
                print("tmp_file:",ref_nc_file_full_path)
                
                new_tag = str(j + 1).rjust(4, '0')
                new_name = new_param_file_base_name + "_" + new_tag + ".nc"
                dst_file = os.path.join(path_to_ensemble_param_files_root,new_param_subdir,new_name)
                
                shutil.copy(ref_nc_file_full_path,dst_file)
                
                print("destination:",dst_file)    

## Load ensemble data

In [100]:
df = pd.read_csv(path_to_ensemble_data)
df = df.drop(df.columns[0], axis=1)

## Ecological expectations and preliminary filters

### Check on coexistence and extreme shrub dominance

In [101]:
if check_CZ2_100_yrs == True:
    
    # Filter criteria
    filter_at_100_yrs = (df['FailedPFTs'] == 0) & (df['Pct_shrub_cover'] > 0.01) & (df['Pct_shrub_cover_canopy'] < 0.5)
    
    
    number_passing = len(df[filter_at_100_yrs])
    print(number_passing, "pass check on coexistence and shrub dominance")
    print(number_passing / len(df) * 100,"percent success")
    df_passing_CZ2_100_yrs = df[filter_at_100_yrs]
    if write_csv_of_passing_members == True:
        df_passing_CZ2_100_yrs.to_csv(output_file_path)
        print("Wrote file:",output_file_path)
    #df_passing_CZ2_100_yrs[key_metrics].sort_values("ShannonE",ascending = False)
    #df[filter_at_100_yrs][key_metrics]
    
    if setup_new_simulation == True:
        aggregate_passing_paramsets(successfull_param_files = list(df_passing_CZ2_100_yrs['param_file_path']),
                                n_new_cases = 4,
                                n_inst_per_case = 36,
                                new_subdirs_prefix = "CZ2_equilibrium_110323", # Without underscore
                                path_to_ensemble_param_files_root = path_to_ensemble_param_files_root,
                                new_param_file_base_name = 'CZ2_equilibrium_110323') # Without underscore

151 pass check on coexistence and shrub dominance
22.076023391812864 percent success
Wrote file: /glade/scratch/adamhb/processed_output/CZ2_100_years_passing/passing_CZ2_100_yrs_ca_5pfts_ml_assisted_720_100123_XX_-17e2acb6a_FATES-8a054a12_20231103104816.csv
All files in tmp have been deleted.
originl_file: /glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/ca_5pfts_ml_assisted_720_100123_01/ca_5pfts_100523_0005.nc
tmp file: tmp/CZ2_equilibrium_110323_0001.nc
originl_file: /glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/ca_5pfts_ml_assisted_720_100123_01/ca_5pfts_100523_0010.nc
tmp file: tmp/CZ2_equilibrium_110323_0002.nc
originl_file: /glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/ca_5pfts_ml_assisted_720_100123_01/ca_5pfts_100523_0011.nc
tmp file: tmp/CZ2_equilibrium_110323_0003.nc
originl_file: /glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/ca_5pfts_ml_assisted_720_100123_01/ca_5pfts_100523_0014.nc
tmp file: tmp/CZ2_equilibrium_110323_0004.nc
originl_file: 

In [102]:
#df_passing_CZ2_100_yrs[key_metrics].sort_values("ShannonE",ascending = False)

### CZ2, Pre-Euro-American Management, equilibrium

In [103]:
if check_CZ2_PEAS == True:

    # Burned area
    ba_dry = (df['Burned_area'] > 0.0294) & (df['Pct_high_severity_1700'] < 0.0909) # Williams et al., 2023
    print(sum(ba_dry),"are within range of observations for burned area")

    # Percent high severity
    phs_dry = (df['Pct_high_severity_1700'] > 1) & (df['Pct_high_severity_1700'] < 6)
    print(sum(phs_dry),"are within obs for PHS 1700 kW m-1")
    phs_dry = (df['Pct_high_severity_3500'] > 1) & (df['Pct_high_severity_3500'] < 6)
    print(sum(phs_dry),"are within obs for PHS 3500 kW m-1")

    # Shrub cover
    shrub_cov = (df['Pct_shrub_cover'] > 0.14) & (df['Pct_shrub_cover'] < 0.32)
    print(sum(shrub_cov),"are within range of obs for shrub cover")

    # Pine significant
    df["pine_frac"] = df["BA_pine"] / df["BA"]

    pine_frac_thresh = 0.10
    pine_sig = df['pine_frac'] > pine_frac_thresh
    print(sum(pine_sig),'have pine basal area >', pine_frac_thresh)

    #All metrics
    all_metrics = len(df.loc[ba_dry & phs_dry & shrub_cov & pine_sig])
    print(all_metrics,'meet all expectations for CZ2 PEAS')