## Send parameter files to new case

This script sends a subset of parameter files from an existing case, or cases, to a new case or cases.

It sends just the parameter sets that are flags as "promising".

In [23]:
import pandas as pd
import glob
import os
import netCDF4 as nc4
import sys
sys.path.append('/glade/u/home/adamhb/Earth-System-Model-Tools')
import esm_tools
import re
import math
import shutil
import numpy as np
from matplotlib import pyplot as plt
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)
np.set_printoptions(threshold=1000)
import seaborn as sns

In [9]:
#path_to_aggreagated_data_3rd_ensemble = '/glade/work/adamhb/processed_output/ml_supported_ensemble_050224_XX_-17e2acb6a_FATES-1449c787/ml_supported_ensemble_2560_050224_metrics_and_params.csv'
#path_to_aggreagated_data_1st_and_2nd_ensemble = '/glade/work/adamhb/processed_output/CZ2_equilibrium_700yrs_042524_01_-17e2acb6a_FATES-1449c787/equilibrium_700yrs_042524_metrics_and_params.csv'

## Functions

In [24]:
def copy_and_rename_param_files(param_file_paths,inst_per_new_case, new_param_subdir_base_name,
                                param_subdir_root = '/glade/u/home/adamhb/ahb_params/fates_api_25/ensembles'):
    '''
    This is a function that takes a list of successful ensemble members, read's the file path to the parameter file of each successful ensemble member,
    and moves these old param files into new param file subdirs then creates a new param file subdir
    '''
    param_file_paths = np.array(param_file_paths)
    inst_per_new_case = min(inst_per_new_case,len(param_file_paths))

    path_to_ensemble_prov = '/glade/u/home/adamhb/ensemble_provenance_logs'
    n_cases = math.ceil(len(param_file_paths) / inst_per_new_case)
    case_numbers = [str(i).zfill(2) for i in range(1,n_cases + 1)]
    new_param_subdir_base_path = os.path.join(param_subdir_root,new_param_subdir_base_name)
    new_param_subdir_paths = [new_param_subdir_base_path + "_" + case_number for case_number in case_numbers]
    [os.mkdir(p) for p in new_param_subdir_paths if os.path.exists(p) == False]

    dst_file_paths = []
    for case_number in case_numbers:
        for inst_tag in [str(i).zfill(4) for i in range(1,inst_per_new_case + 1)]:
            dst_file_paths.append(f'{param_subdir_root}/{new_param_subdir_base_name}_{case_number}/ca_5pfts_100523_{inst_tag}.nc')

    if len(dst_file_paths) != len(param_file_paths):
        print("srcn",len(param_file_paths))
        print("dstn",len(dst_file_paths))
        print("Error in number of dst files")
        return

    pd.DataFrame({'src':param_file_paths,'dst':dst_file_paths}).to_csv(f'{path_to_ensemble_prov}/{new_param_subdir_base_name}_provenence.csv')

    for i in range(len(param_file_paths)):
        print("Copying",param_file_paths[i],"to",dst_file_paths[i])
        shutil.copy(param_file_paths[i],dst_file_paths[i])

In [93]:
#df = pd.read_csv(path_to_aggreagated_data_3rd_ensemble)
#df2 = pd.read_csv(path_to_aggreagated_data_1st_and_2nd_ensemble)

# Additional criteria for 3 ensemble (oak)
#some_oak = df['BA_oak'] > 0.01
# Get param from 3rd simulation
#params_for_next_sim_3rd_ensemble = df.loc[(df['promising'] == 1) & (some_oak)].sort_values('ShannonE',ascending = False)['param_file_path']

#params_for_next_sim_1st_and_2nd_ensemble = df2.loc[df2['promising'] == True]['param_file_path']
#params_for_next_sim = list(params_for_next_sim_1st_and_2nd_ensemble) + list(params_for_next_sim_3rd_ensemble)

#params_for_next_sim = esm_tools.inst_to_tag(np.array(df.loc[df['promising'] == True]['param_file_path']))
#print(params_for_next_sim)

params_for_next_sim = sorted(glob.glob(os.path.join('/glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/equilibrium_700yrs_050924_01','*')))

del params_for_next_sim[6]
del params_for_next_sim[60]

#params_for_next_sim[60]
copy_and_rename_param_files(params_for_next_sim,80, "equilibrium_700yrs_050924_01_inst_80",
                               param_subdir_root = '/glade/u/home/adamhb/ahb_params/fates_api_25/ensembles')

Copying /glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/equilibrium_700yrs_050924_01/ca_5pfts_100523_0001.nc to /glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/equilibrium_700yrs_050924_01_inst_80_01/ca_5pfts_100523_0001.nc
Copying /glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/equilibrium_700yrs_050924_01/ca_5pfts_100523_0002.nc to /glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/equilibrium_700yrs_050924_01_inst_80_01/ca_5pfts_100523_0002.nc
Copying /glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/equilibrium_700yrs_050924_01/ca_5pfts_100523_0003.nc to /glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/equilibrium_700yrs_050924_01_inst_80_01/ca_5pfts_100523_0003.nc
Copying /glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/equilibrium_700yrs_050924_01/ca_5pfts_100523_0004.nc to /glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/equilibrium_700yrs_050924_01_inst_80_01/ca_5pfts_100523_0004.nc
Copying /glade/u/home/adamhb/ahb_params/fates_ap

## Transfer history files when you need to delete one

In [None]:
# def transfer_history_delete_one(src_dir,dst_dir,inst_int_to_remove,debug = True):
#     if inst_tag == None:
#         file_names = [f"{case_name}.clm2.h0.{str(year)}-{str(month).rjust(2, '0')}.nc"
#                   for year in years for month in months]
#     else:
#         file_names = [f"{case_name}.clm2_{inst_tag}.h0.{str(year)}-{str(month).rjust(2, '0')}.nc"
#                   for year in years for month in months]
    
#     full_paths = [os.path.join(model_output_root, case_name, 'lnd/hist', fname) for fname in file_names]

## Transfer restarts when you need to delete one

In [94]:
import re

# Function to adjust the number in the string
def adjust_number(s,inst_int_to_remove):
    # Use regular expression to find a four-digit number that occurs after an underscore
    # and before a period or the end of the string
    match = re.search(r'(?<=_)\d{4}(?=[^0-9]|$)', s)
    if match:
        num = int(match.group())
        # Adjust the number if it's greater than 0007
        if num > inst_int_to_remove:
            num -= 1
        # Format the number back to four digits
        new_num = f'{num:04}'
        # Replace the old number with the new number in the string
        s = s.replace(match.group(), new_num)
    return s

def remove_inst_from_rest_files_and_copy(src_dir,dst_dir,inst_int_to_remove,debug = True):
    tag_to_remove = esm_tools.inst_to_tag([inst_int_to_remove])[0]
    print("Tag to remove",tag_to_remove)
    src_paths = esm_tools.find_nc_and_rpointer_files(src_dir)
    src_files = [os.path.basename(src_path) for src_path in src_paths]
    print("Length of src files",len(src_files))
    filtered_src_files = sorted([item for item in src_files if tag_to_remove not in item])
    filtered_src_paths = [os.path.join(src_dir,file) for file in filtered_src_files]
    print("Length of filtered src_files",len(filtered_src_files))
    
    dst_files = [adjust_number(item,inst_int_to_remove) for item in filtered_src_files]
    print("Length of dst files",len(dst_files))
    dst_paths = [os.path.join(dst_dir,file) for file in dst_files]

    for i in range(len(filtered_src_paths)):
        if debug == True:
            print("Would copy",filtered_src_paths[i],"to",dst_paths[i])
        else:
            print("Copying",filtered_src_paths[i],"to",dst_paths[i])
            shutil.copy(filtered_src_paths[i],dst_paths[i])


src_dir = '/glade/derecho/scratch/adamhb/supIg_1.25_051424-1951-2020_-17e2acb6a_FATES-1449c787/rest/2015-01-01-00000'
dst_dir = '/glade/work/adamhb/f2015_rest_files/untreated_inst80'


remove_inst_from_rest_files_and_copy(src_dir,dst_dir,61,debug = False)

Tag to remove 0061
Length of src files 648
Length of filtered src_files 640
Length of dst files 640
Copying /glade/derecho/scratch/adamhb/supIg_1.25_051424-1951-2020_-17e2acb6a_FATES-1449c787/rest/2015-01-01-00000/rpointer.atm_0001 to /glade/work/adamhb/f2015_rest_files/untreated_inst80/rpointer.atm_0001
Copying /glade/derecho/scratch/adamhb/supIg_1.25_051424-1951-2020_-17e2acb6a_FATES-1449c787/rest/2015-01-01-00000/rpointer.atm_0002 to /glade/work/adamhb/f2015_rest_files/untreated_inst80/rpointer.atm_0002
Copying /glade/derecho/scratch/adamhb/supIg_1.25_051424-1951-2020_-17e2acb6a_FATES-1449c787/rest/2015-01-01-00000/rpointer.atm_0003 to /glade/work/adamhb/f2015_rest_files/untreated_inst80/rpointer.atm_0003
Copying /glade/derecho/scratch/adamhb/supIg_1.25_051424-1951-2020_-17e2acb6a_FATES-1449c787/rest/2015-01-01-00000/rpointer.atm_0004 to /glade/work/adamhb/f2015_rest_files/untreated_inst80/rpointer.atm_0004
Copying /glade/derecho/scratch/adamhb/supIg_1.25_051424-1951-2020_-17e2acb6a

In [72]:
#df = pd.read_csv(path_to_aggreagated_data_3rd_ensemble)
#df2 = pd.read_csv(path_to_aggreagated_data_1st_and_2nd_ensemble)

# Additional criteria for 3 ensemble (oak)
#some_oak = df['BA_oak'] > 0.01
# Get param from 3rd simulation
#params_for_next_sim_3rd_ensemble = df.loc[(df['promising'] == 1) & (some_oak)].sort_values('ShannonE',ascending = False)['param_file_path']

#params_for_next_sim_1st_and_2nd_ensemble = df2.loc[df2['promising'] == True]['param_file_path']
#params_for_next_sim = list(params_for_next_sim_1st_and_2nd_ensemble) + list(params_for_next_sim_3rd_ensemble)

#params_for_next_sim = esm_tools.inst_to_tag(np.array(df.loc[df['promising'] == True]['param_file_path']))
#print(params_for_next_sim)


copy_and_rename_param_files(params_for_next_sim,81, "supIg_1.25_051424",
                               param_subdir_root = '/glade/u/home/adamhb/ahb_params/fates_api_25/ensembles')

In [None]:
for i in range(len(filtered_list)):
        print("Copying",filtered_list[i],"to",dst_file_paths[i])
        shutil.copy(param_file_paths[i],dst_file_paths[i])