## Modify restart files

The script modifies all restart files in a directory. It applies the same modification to all restart files in the direcotry.

In [88]:
import netCDF4 as nc4
import xarray as xr
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
sys.path.append('/glade/u/home/adamhb/Earth-System-Model-Tools/')
import esm_tools

In [1]:
# Path to the restart files you want to modify
path_to_rest_files = '/glade/derecho/scratch/adamhb/supIg105_treated_041124-2015-2098-2015-2098_-17e2acb6a_FATES-1449c787/run/rest_temp'
test_file_path = '/glade/derecho/scratch/adamhb/supIg105_treated_041124-2015-2098-2015-2098_-17e2acb6a_FATES-1449c787/run/rest_temp/supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0001.r.2015-01-01-00000.nc'

In [105]:
rest_file.fates_leaf_fines_vec_001

In [77]:
data_var_names = np.array(list(rest_file.data_vars))

In [None]:
def assign_multi_dim_variable_to_netcdf(file_path, variable_name, new_value):
    with nc4.Dataset(file_path, 'r+') as dataset:
        if variable_name in dataset.variables:
            # Access the variable
            variable = dataset.variables[variable_name]

            # Assign a value
            # The way you assign depends on the shape and dimensions of the variable
            # For a single-value variable:

            variable[...] = new_value  # Replace new_value with the value you want to assign
            print("Changed {} to {}".format(variable_name,new_value))
            # For a multi-dimensional variable, specify indices or slices
            # Example for a 2D variable (like temperature at a specific time and place):
            # variable[time_index, place_index] = new_value

            #print(f"Value {new_value} assigned to {variable_name}.")
        else:
            print(f"Variable {variable_name} not found in the dataset.")

In [129]:
def treat_forest(path_to_rest_file,pfts_to_treat,max_size_to_treat,cwd_scalar):

    rest_file = xr.open_dataset(path_to_rest_file,decode_times=False)
    df = pd.DataFrame({'pft':rest_file.fates_pft.values,'n':rest_file.fates_nplant.values,
                       'dbh':rest_file.fates_dbh,'cwd':rest_file.fates_cwdagin_vec_001,'leaf_litt':rest_file.fates_leaf_fines_vec_001})
    
    
    # Remove trees
    not_shrub = df['pft'].isin(pfts_to_treat)
    small_trees = df['dbh'] < ,max_size_to_treat
    df['nplant_treated'] = np.where((not_shrub) & (small_trees), 0, df['n'])

    # Remove CWD and leaf litter
    df['cwd'] = df['cwd'] * cwd_scalar
    df['leaf_litt'] = df['leaf_litt'] * cwd_scalar
    
    #print(len(df.loc[(not_shrub) & (small_trees)]),'cohorts remain')


    #Reassign
    assign_multi_dim_variable_to_netcdf(path_to_rest_file,"fates_nplant",  df['nplant_treated'].values)
    assign_multi_dim_variable_to_netcdf(path_to_rest_file,"fates_cwdagin_vec_001",  df['cwd'].values)
    assign_multi_dim_variable_to_netcdf(path_to_rest_file,"fates_leaf_fines_vec_001",  df['leaf_litt'].values)

    
    return df

#treated_df = treat_forest(path_to_rest_file = test_file_path,pfts_to_treat=[1.0,2.0,3.0,5.0],max_size_to_treat = 40.0,cwd_scalar = 0)

In [127]:
import os

def find_matching_files(directory):
    """
    Finds and returns a list of file names in the specified directory that contain
    both 'clm2_' and '.r.' in their file names.

    :param directory: String, the path to the directory where the files are located
    :return: List of strings, the file names that match the criteria
    """
    matching_files = []
    for filename in os.listdir(directory):
        if "clm2_" in filename and ".r." in filename:
            matching_files.append(filename)
    return matching_files

In [128]:
files_to_treat = find_matching_files(path_to_rest_files)
sorted(files_to_treat)

['supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0001.r.2015-01-01-00000.nc',
 'supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0002.r.2015-01-01-00000.nc',
 'supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0003.r.2015-01-01-00000.nc',
 'supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0004.r.2015-01-01-00000.nc',
 'supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0005.r.2015-01-01-00000.nc',
 'supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0006.r.2015-01-01-00000.nc',
 'supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0007.r.2015-01-01-00000.nc',
 'supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0008.r.2015-01-01-00000.nc',
 'supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0009.r.2015-01-01-00000.nc',
 'supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0010.r.2015-01-01-00000.nc',
 'supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0011.r.2015-01-01-00000.nc',
 'supIg105_020224-1951-2020_-17e

In [137]:
paths_files_to_treat = [os.path.join(path_to_rest_files,f) for f in files_to_treat]

for file in paths_files_to_treat:
    print(file)
    treat_forest(path_to_rest_file = file,pfts_to_treat=[1.0,2.0,3.0,5.0],max_size_to_treat = 40.0,cwd_scalar = 0)

/glade/derecho/scratch/adamhb/supIg105_treated_041124-2015-2098-2015-2098_-17e2acb6a_FATES-1449c787/run/rest_temp/supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0023.r.2015-01-01-00000.nc
Changed fates_nplant to [0.         0.         0.00957963 ... 0.         0.         0.        ]
Changed fates_cwdagin_vec_001 to [0. 0. 0. ... 0. 0. 0.]
Changed fates_leaf_fines_vec_001 to [0. 0. 0. ... 0. 0. 0.]
/glade/derecho/scratch/adamhb/supIg105_treated_041124-2015-2098-2015-2098_-17e2acb6a_FATES-1449c787/run/rest_temp/supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0040.r.2015-01-01-00000.nc
Changed fates_nplant to [0. 0. 0. ... 0. 0. 0.]
Changed fates_cwdagin_vec_001 to [0. 0. 0. ... 0. 0. 0.]
Changed fates_leaf_fines_vec_001 to [0. 0. 0. ... 0. 0. 0.]
/glade/derecho/scratch/adamhb/supIg105_treated_041124-2015-2098-2015-2098_-17e2acb6a_FATES-1449c787/run/rest_temp/supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0003.r.2015-01-01-00000.nc
Changed fates_nplant to [0. 

In [132]:
ds = xr.open_dataset('/glade/derecho/scratch/adamhb/supIg105_treated_041124-2015-2098-2015-2098_-17e2acb6a_FATES-1449c787/run/rest_temp/supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0023.r.2015-01-01-00000.nc')

In [134]:
ds_with_netcdf4 = nc4.Dataset('/glade/derecho/scratch/adamhb/supIg105_treated_041124-2015-2098-2015-2098_-17e2acb6a_FATES-1449c787/run/rest_temp/supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0023.r.2015-01-01-00000.nc')

In [136]:
ds_with_netcdf4.variables['fates_nplant']

<class 'netCDF4._netCDF4.Variable'>
float64 fates_nplant(cohort)
    long_name: ed cohort - number of plants in the cohort
    units: /patch
    interpinic_flag: 1
    interpinic_flag_meanings: 1=nearest neighbor, 2=copy directly, 3=skip, 4=area
    varnames_on_old_files: fates_nplant
    _FillValue: 1e+36
    missing_value: 1e+36
unlimited dimensions: 
current shape = (1500,)
filling on

In [118]:
test_file_path = '/glade/derecho/scratch/adamhb/supIg105_treated_041124-2015-2098-2015-2098_-17e2acb6a_FATES-1449c787/run/rest_temp/test_supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0001.r.2015-01-01-00000.nc'

assign_multi_dim_variable_to_netcdf(file_path= test_file_path,variable_name='fates_nplant',new_value=treated_df['nplant_treated'].values)

Changed fates_nplant to [0.00000000e+00 1.00531165e-05 0.00000000e+00 ... 0.00000000e+00
 0.00000000e+00 0.00000000e+00]


In [113]:
treated_df['nplant_treated'].values

array([0.00000000e+00, 1.00531165e-05, 0.00000000e+00, ...,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00])

In [104]:
rest_file.fates_cwdagin_vec_001
rest_file.fates_leaf_fines_vec_001

In [97]:
filter = ['cwd' in i for i in data_var_names]
data_var_names[filter]

array(['fates_ag_cwd_vec_001', 'fates_bg_cwd_vec_001',
       'fates_ag_cwd_frag_vec_001', 'fates_bg_cwd_frag_vec_001',
       'fates_cwdagin_vec_001', 'fates_cwdbgin_vec_001'], dtype='<U32')

In [89]:
hist_file = xr.open_dataset('/glade/derecho/scratch/adamhb/supIg105_treated_041124-2015-2098-2015-2098_-17e2acb6a_FATES-1449c787/run/rest_temp/supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69.clm2_0001.h0.2014-12.nc')
esm_tools.get_combustible_fuel(hist_file,all_fuel=True)

array([8.222789], dtype=float32)

In [92]:
age_by_fuel = esm_tools.agefuel_to_age_by_fuel(hist_file.FATES_FUEL_AMOUNT_APFC,hist_file)

In [94]:
age_by_fuel.sum(dim = "fates_levage")

In [95]:
rest_file.fates_ag_cwd_vec_001

In [85]:
list(rest_file.fates_ag_cwd_vec_001.values)

[0.1162195317156473,
 0.25303050605153155,
 1.0123526592263383,
 20.900777796356284,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.10282716960148507,
 0.2251337089017067,
 0.7714652875866648,
 15.289310866725911,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0

In [None]:
fates_termcarea_canopy

In [7]:
nc4.Dataset(test_file_path)

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF3_64BIT_OFFSET data model, file format NETCDF3):
    Conventions: CF-1.0
    history: created on 02/03/24 20:10:05
    username: adamhb
    host: derecho
    version: unknown
    source: Community Terrestrial Systems Model
    revision_id: $Id: restFileMod.F90 41292 2012-10-26 13:51:45Z erik $
    case_title: supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69
    case_id: supIg105_020224-1951-2020_-17e2acb6a_FATES-5b076b69
    surface_dataset: /glade/work/adamhb/input_data/CZ2_wrf_1950_2020/surfdata_my_point_hist_16pfts_Irrig_CMIP6_ahb_simyr2000_c230301.nc
    flanduse_timeseries: 
    title: CLM Restart information
    create_crop_landunit: false
    irrigate: false
    created_glacier_mec_landunits: true
    ipft_not_vegetated: 0
    ipft_needleleaf_evergreen_temperate_tree: 1
    ipft_needleleaf_evergreen_boreal_tree: 2
    ipft_needleleaf_deciduous_boreal_tree: 3
    ipft_broadleaf_evergreen_tropical_tree: 4
    ipft_broadleaf_e

In [6]:
def open_nc_file(file_path):
    with nc4.Dataset(file_path, 'r+') as dataset:
        return dataset

data = open_nc_file(test_file_path)
data

RuntimeError: NetCDF: Not a valid ID

In [None]:
with nc4.Dataset(test_file_path, 'r+') as dataset:
        if variable_name in dataset.variables:
            # Access the variable
            variable = dataset.variables[variable_name]

            # Assign a value
            # The way you assign depends on the shape and dimensions of the variable
            # For a single-value variable:

            if (new_value == -999) | (new_value == "-999"):
                print("Not changing variable value")
                return

            variable[...] = new_value  # Replace new_value with the value you want to assign
            print("Changed {} to {}".format(variable_name,new_value))
            # For a multi-dimensional variable, specify indices or slices
            # Example for a 2D variable (like temperature at a specific time and place):
            # variable[time_index, place_index] = new_value

            #print(f"Value {new_value} assigned to {variable_name}.")
        else:
            print(f"Variable {variable_name} not found in the dataset.")
