# Create new parameter files and restart files

This notebook is designed to create new parameter files and restart files for a new case.

It is useful when you just need to make one-off parameter tweaks to existing param files for a new case.

It also copies restart files to make new ones if you want to increase the number of ensemble members in a hybrid case.

In [11]:
import sys
import os
import netCDF4 as nc4
import glob
import shutil
import re
import numpy as np
import fnmatch

# Import esm tools
sys.path.append('/glade/u/home/adamhb/Earth-System-Model-Tools/')
import esm_tools

In [2]:
def extract_variable_from_netcdf(file_path, variable_name):
    """
    Extract a variable from a NetCDF file.

    Parameters:
    - file_path: The path to the NetCDF file.
    - variable_name: The name of the variable to extract.

    Returns:
    - The extracted variable data.
    """
    with nc4.Dataset(file_path, 'r') as dataset:
        
        # Check if the variable exists in the dataset
        if variable_name in dataset.variables:

            # Extract just the data
            variable_data = dataset.variables[variable_name][:].data
            
            #if len(variable_data.shape) > 1:
            #    variable_data = variable_data[0,:]
            #return variable_data.data[pft_index]
            return variable_data
        else:
            raise ValueError(f"'{variable_name}' not found in the NetCDF file.")


def assign_variable_to_netcdf(file_path, variable_name, new_value):
    with nc4.Dataset(file_path, 'r+') as dataset:
        if variable_name in dataset.variables:
            # Access the variable
            variable = dataset.variables[variable_name]
        
            # Assign a value
            # The way you assign depends on the shape and dimensions of the variable
            # For a single-value variable:
            variable[...] = new_value  # Replace new_value with the value you want to assign
        
            # For a multi-dimensional variable, specify indices or slices
            # Example for a 2D variable (like temperature at a specific time and place):
            # variable[time_index, place_index] = new_value
        
            print(f"Value {new_value} assigned to {variable_name}.")
        else:
            print(f"Variable {variable_name} not found in the dataset.")

def list_files_matching_pattern(directory_path, pattern):
    matching_files = glob.glob(os.path.join(directory_path, pattern))
    return sorted(matching_files)


def replace_four_digit_number_between_underscore_and_period(input_string, new_number):
    # Use regular expressions to find the four-digit number
    pattern = r'_(\d{4})\.'
    match = re.search(pattern, input_string)
    
    if match:
        # Replace the found four-digit number with the new_number
        replaced_string = input_string[:match.start(1)] + new_number + input_string[match.end(1):]
        return replaced_string
    
    # If no matching pattern is found, return the original string
    return input_string

## User-defined params

In [13]:
# Reference dir where reference (template) param files are stored
ref_param_dir = '/glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/CZ2_trans_110923_01'

# New dir for parameter files for the new case
new_param_dir = '/glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/CZ2_trans_012524'

# Total number of paramer files for the new case
n_param_files_new_case = 108

param_change_dict = {'var_name':['fates_fire_nignitions','fates_fire_nignitions'],
               'var_value':[0.0825,0.11],
               'inst_range':[[np.arange(1,55)],[np.arange(55,109)]]}

# Reference dir for the restart files
ref_rest_path = '/glade/derecho/scratch/adamhb/CZ2_equilibrium_011824_-17e2acb6a_FATES-5b076b69/rest/1870-01-01-00000'
# New dir for the restart files. This will be in the run directory of the new case.
new_rest_path = '/glade/derecho/scratch/adamhb/CZ2_trans_1870_1951_012524_-17e2acb6a_FATES-5b076b69/run'
# Total number of restart files for the new case
n_rest_files_new_case = 108

## Create new restart files

In [9]:
def extract_substring(s: str) -> str:
    # This regular expression looks for a sequence of characters that are
    # after "ensembles/" and followed by a "/", without including the "/"
    match = re.search(r'ensembles/([^/]*)', s)
    if match:
        # The substring is in the first capturing group
        return match.group(1)
    else:
        # You can change this to return None or an empty string if preferred
        return "No match found"

def get_case_from_reference_param_file(reference_param_file):
    prefix = extract_substring(reference_param_file)
    return prefix + '_-17e2acb6a_FATES-8a054a12'

def find_nc_and_rpointer_files(directory):
    matches = []
    for root, dirnames, filenames in os.walk(directory):
        for filename in fnmatch.filter(filenames, '*.nc'):
            matches.append(os.path.join(root, filename))
        for filename in filenames:
            if "rpointer" in filename:
                matches.append(os.path.join(root, filename))
    return matches

def extract_number_from_filename(filename):
    pattern = re.compile(r'(\d{4})\.nc$')
    match = pattern.search(filename)
    if match:
        return match.group(1)
    else:
        return None

def replace_number_in_filename(filename, new_number):
    # Check if the new number is exactly four digits
    if not re.match(r'^\d{4}$', new_number):
        raise ValueError("The new number must be a four-digit string.")

    # Regular expression to find four-digit numbers between an "_" and "." or at the end of the string
    pattern = re.compile(r'(?<=_)(\d{4})(?=\.|$)')

    # Find all matches
    matches = pattern.findall(filename)

    # Raise an error if there are more than two matches
    if len(matches) > 2:
        raise ValueError("The filename contains more than two four-digit numbers, cannot proceed.")

    # Replace the first occurrence of a four-digit number after "_" and before "." or at the end of the string
    new_filename = pattern.sub(new_number, filename, count=1)

    return new_filename

def replace_two_digit_number(filename):
    # Regular expression to match a two-digit number between two underscores
    pattern = re.compile(r'(?<=_)\d{2}(?=_)')

    # Replace the two-digit number with "01"
    new_filename = pattern.sub("01", filename)

    return new_filename

def replace_before_dot_with_string(original_string, replacement, keyword="FATES"):
    # Check if the keyword "FATES" is in the original string
    if keyword in original_string:
        # Split the original string into two parts at the first dot
        parts = original_string.split('.', 1)
        # Check if there's at least one dot to split on
        if len(parts) > 1:
            # Replace the part before the first dot with the user-defined string
            return replacement + '.' + parts[1]
    # If the keyword isn't found, or there's no dot, return the original string
    return original_string



def transfer_restart_files_to_new_case(ref_param_file,
                                       harmonized_reference_case_name = None, # The prefix in the reference restart files
                                       destination_run_dir = None,
                                       destination_inst_tag = None,
                                       manual_case_name = None,
                                       manual_rundir = None): # The name of the reference case to find the reference run dir
    ref = ref_param_file
    if manual_case_name != None:
        case_name = manual_case_name
    else:
        case_name = get_case_from_reference_param_file(ref)
        
    if manual_rundir != None:    
        case_rundir = manual_rundir
    else:
        case_rundir = os.path.join('/glade/scratch/adamhb/archive',case_name,'run')
    ref_inst_tag = extract_number_from_filename(ref)
    print(ref)
    print(ref_inst_tag)
    all_nc_and_pointer_files = find_nc_and_rpointer_files(case_rundir)
    matching_files = [f for f in all_nc_and_pointer_files if ref_inst_tag in f]
    for i in matching_files:
        new_file_name = replace_before_dot_with_string(os.path.basename(replace_number_in_filename(i,destination_inst_tag)),harmonized_reference_case_name)
        new_full_file_path = os.path.join(destination_run_dir,new_file_name)
        print("Copying",i,"to",new_full_file_path)
        shutil.copy(i,new_full_file_path)

def transfer_restart_files_to_new_case(ref_param_file,
                                       harmonized_reference_case_name = None, # The prefix in the reference restart files
                                       destination_run_dir = None,
                                       destination_inst_tag = None,
                                       manual_case_name = None,
                                       manual_rundir = None): # The name of the reference case to find the reference run dir
    ref = ref_param_file
    if manual_case_name != None:
        case_name = manual_case_name
    else:
        case_name = get_case_from_reference_param_file(ref)
        
    if manual_rundir != None:    
        case_rundir = manual_rundir
    else:
        case_rundir = os.path.join('/glade/scratch/adamhb/archive',case_name,'run')
    ref_inst_tag = extract_number_from_filename(ref)
    print(ref)
    print(ref_inst_tag)
    all_nc_and_pointer_files = find_nc_and_rpointer_files(case_rundir)
    matching_files = [f for f in all_nc_and_pointer_files if ref_inst_tag in f]
    for i in matching_files:
        new_file_name = os.path.basename(replace_number_in_filename(i,destination_inst_tag))
        new_full_file_path = os.path.join(destination_run_dir,new_file_name)
        print("Copying",i,"to",new_full_file_path)
        shutil.copy(i,new_full_file_path)
    

def transfer_all_restart_files_to_new_case(ref_param_files,
                                           harmonized_reference_case_name,
                                           destination_run_dir = None,
                                           manual_case_name = None,
                                           manual_rundir = None):
    inst_nums = list(range(1,len(ref_param_files) + 1))
    print(len(inst_nums))
    for i,ref in enumerate(ref_param_files):
        new_tag = str(inst_nums[i]).rjust(4, '0')
        transfer_restart_files_to_new_case(ref,
                                           harmonized_reference_case_name,
                                           destination_run_dir = destination_run_dir,
                                           destination_inst_tag = new_tag,
                                           manual_case_name = manual_case_name,
                                           manual_rundir = manual_rundir)

## Run script

In [15]:
# Copy reference param files to new folder
ref_files = list_files_matching_pattern(ref_param_dir, '*.nc')

# Create first batch of param files that will have the same names as the reference
for ref_file in ref_files:
    
    ## Parameter files
    dst_file = os.path.join(new_param_dir,os.path.basename(ref_file))
    shutil.copy(ref_file,dst_file)

    #Update the new file
    inst_num = int(esm_tools.extract_digits(dst_file))
    new_tag = str(inst_num).zfill(4)
    
    if inst_num in param_change_dict['inst_range'][0][0]:
        assign_variable_to_netcdf(dst_file,param_change_dict['var_name'][0],param_change_dict['var_value'][0])


    ## Transfer restart files
    transfer_restart_files_to_new_case(ref_file, #Reference param file
                                       harmonized_reference_case_name = None, # The prefix in the reference restart files
                                       destination_run_dir = new_rest_path, #Path to the destination run dir
                                       destination_inst_tag = new_tag,
                                       manual_case_name = None, 
                                       manual_rundir = ref_rest_path) # Path to the reference restart files

# If new number of param files is double the reference
if n_param_files_new_case == 2 * len(ref_files):

    # Create new tags to complete the param files
    starting_inst_num = len(ref_files) + 1
    end_inst_num = (starting_inst_num - 1) + (n_param_files_new_case // 2)
    new_tags = [str(inst_num).zfill(4) for inst_num in np.arange(starting_inst_num,(end_inst_num + 1))]

    # Create new param files
    for i,ref_file in enumerate(ref_files):
        dst_file = os.path.join(new_param_dir,os.path.basename(replace_four_digit_number_between_underscore_and_period(ref_file, new_tags[i])))
        shutil.copy(ref_file,dst_file)
        inst_num = int(esm_tools.extract_digits(dst_file))
        new_tag = str(inst_num).zfill(4)

        ## Transfer restart files
        transfer_restart_files_to_new_case(ref_file, #Reference param file
                                       harmonized_reference_case_name = None, # The prefix in the reference restart files
                                       destination_run_dir = new_rest_path, #Path to the destination run dir
                                       destination_inst_tag = new_tag,
                                       manual_case_name = None, 
                                       manual_rundir = ref_rest_path) # Path to the reference restart files
        
        if inst_num in param_change_dict['inst_range'][1][0]:
            assign_variable_to_netcdf(dst_file,param_change_dict['var_name'][1],param_change_dict['var_value'][1])

print(len(os.listdir(new_param_dir)),"files in new directory")

Value 0.0825 assigned to fates_fire_nignitions.
/glade/u/home/adamhb/ahb_params/fates_api_25/ensembles/CZ2_trans_110923_01/ca_5pfts_100523_0001.nc
0001
Copying /glade/derecho/scratch/adamhb/CZ2_equilibrium_011824_-17e2acb6a_FATES-5b076b69/rest/1870-01-01-00000/CZ2_equilibrium_011824_-17e2acb6a_FATES-5b076b69.cpl_0001.r.1870-01-01-00000.nc to /glade/derecho/scratch/adamhb/CZ2_trans_1870_1951_012524_-17e2acb6a_FATES-5b076b69/run/CZ2_equilibrium_011824_-17e2acb6a_FATES-5b076b69.cpl_0001.r.1870-01-01-00000.nc
Copying /glade/derecho/scratch/adamhb/CZ2_equilibrium_011824_-17e2acb6a_FATES-5b076b69/rest/1870-01-01-00000/CZ2_equilibrium_011824_-17e2acb6a_FATES-5b076b69.clm2_0001.rh0.1870-01-01-00000.nc to /glade/derecho/scratch/adamhb/CZ2_trans_1870_1951_012524_-17e2acb6a_FATES-5b076b69/run/CZ2_equilibrium_011824_-17e2acb6a_FATES-5b076b69.clm2_0001.rh0.1870-01-01-00000.nc
Copying /glade/derecho/scratch/adamhb/CZ2_equilibrium_011824_-17e2acb6a_FATES-5b076b69/rest/1870-01-01-00000/CZ2_equilibrium