In [1]:
import os
import shutil
from glob import glob
from datetime import datetime, timedelta
import xarray as xr
from dateutil.relativedelta import relativedelta

In [2]:
# expt_name = 'DAv7_M36_ASCAT_type_13_no_catdef_fp', 'DAv7_M36_ASCAT_type_2_fp_precip', 'DAv7_M36_ASCAT_type_13_test_catdef'
expt_name = 'DAv7_M36_MULTI_type_13_comb_fp_scaled'
expt_name_short = 'MLT_DA'

start_date = datetime(2020, 7, 1)
end_date = datetime(2020, 7, 1)

root_directory = f'/discover/nobackup/amfox/Experiments/{expt_name}/{expt_name}/output/SMAP_EASEv2_M36_GLOBAL/'
root_directory = '/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/fp_scaled/output/SMAP_EASEv2_M36_GLOBAL/'

output_directory = f'/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/fp_scaled/data/{expt_name_short}/'

In [3]:


current_date = start_date

while current_date <= end_date:
    year_month_cat_directory = os.path.join(root_directory,
                                        'cat/ens_avg', 
                                        f"Y{current_date.year}", 
                                        f"M{current_date.month:02d}")
    year_month_ana_directory = os.path.join(root_directory,
                                        'ana/ens_avg', 
                                        f"Y{current_date.year}", 
                                        f"M{current_date.month:02d}")

    print(year_month_cat_directory)

    # Loop through each day in the month and find the .nc4 files with the correct timestamp in their name which ends with e.g. 20200724.nc4

    curr_day = current_date.replace(day=1)
    last_day = current_date.replace(day=1) + relativedelta(months=1) - timedelta(days=1)

    while curr_day <= last_day:
        inst3_1d_lndfcstana_Nt_file_path = os.path.join(year_month_cat_directory, f'{expt_name}.inst3_1d_lndfcstana_Nt.{curr_day.strftime("%Y%m%d")}.nc4')
        SMAP_L4_SM_gph_file_path = os.path.join(year_month_cat_directory, f'{expt_name}.SMAP_L4_SM_gph.{curr_day.strftime("%Y%m%d")}.nc4')
        catch_progn_incr_file_path = os.path.join(year_month_cat_directory, f'{expt_name}.catch_progn_incr.{curr_day.strftime("%Y%m%d")}.nc4')

        if os.path.exists(inst3_1d_lndfcstana_Nt_file_path) and os.path.exists(SMAP_L4_SM_gph_file_path) and os.path.exists(catch_progn_incr_file_path):
            print(inst3_1d_lndfcstana_Nt_file_path)
            print(SMAP_L4_SM_gph_file_path)
            print(catch_progn_incr_file_path)

            year_month_day_directory = os.path.join(output_directory,
                                                    f"Y{current_date.year}", 
                                                    f"M{current_date.month:02d}",
                                                    f"D{curr_day.day:02d}")
            
            # Ensure the directory exists
            os.makedirs(year_month_day_directory, exist_ok=True)

            # Open the NetCDF file using xarray
            inst3_1d_lndfcstana_Nt_ds = xr.open_dataset(inst3_1d_lndfcstana_Nt_file_path)
            SMAP_L4_SM_gph_ds = xr.open_dataset(SMAP_L4_SM_gph_file_path)
            catch_progn_incr_ds = xr.open_dataset(catch_progn_incr_file_path)

            # Make an output .nc4 file name by joining the root directory, the year, the month, the day, and a new filename by combining expt_name, .inst3_1d_lndfcstana_Nt.subsetted,'{first_day.strftime("%Y%m%d")}.nc4'
            inst3_1d_lndfcstana_Nt_output_file_path = os.path.join(year_month_day_directory, f"{expt_name_short}.inst3_1d_lndfcstana_Nt.subsetted.{curr_day.strftime('%Y%m%d')}.nc4")
            SMAP_L4_SM_gph_output_file_path = os.path.join(year_month_day_directory, f"{expt_name_short}.SMAP_L4_SM_gph.subsetted.{curr_day.strftime('%Y%m%d')}.nc4")
            catch_progn_incr_output_file_path = os.path.join(year_month_day_directory, f"{expt_name_short}.catch_progn_incr.subsetted.{curr_day.strftime('%Y%m%d')}.nc4")

            # Write inst3_1d_lndfcstana_Nt_ds[['time_stamp', 'lon', 'lat', 'IG', 'JG', 'SFMC_ANA', 'SFMC_FCST', 'RZMC_ANA', 'RZMC_FCST']] to inst3_1d_lndfcstana_Nt_output_file_path
            inst3_1d_lndfcstana_Nt_ds[['time_stamp', 'lon', 'lat', 'SFMC_ANA', 'SFMC_FCST', 'RZMC_ANA', 'RZMC_FCST']].to_netcdf(inst3_1d_lndfcstana_Nt_output_file_path)
            # Write SMAP_L4_SM_gph_ds[['time_stamp', 'lon', 'lat', 'IG', 'JG', 'sm_surface', 'sm_rootzone']] to SMAP_L4_SM_gph_output_file_path
            SMAP_L4_SM_gph_ds[['time_stamp', 'lon', 'lat', 'sm_surface', 'sm_rootzone']].to_netcdf(SMAP_L4_SM_gph_output_file_path)
            # Write catch_progn_incr_ds[['time_stamp', 'lon', 'lat', 'IG', 'JG', 'SRFEXC_INCR', 'RZEXC_INCR']] to catch_progn_incr_output_file_path
            catch_progn_incr_ds[['time_stamp', 'lon', 'lat', 'SRFEXC_INCR', 'RZEXC_INCR']].to_netcdf(catch_progn_incr_output_file_path)

            # Close the NetCDF file
            inst3_1d_lndfcstana_Nt_ds.close()
            SMAP_L4_SM_gph_ds.close()
            catch_progn_incr_ds.close()
            
            # Define the pattern to match
            pattern = os.path.join(year_month_ana_directory,f"{expt_name}.ens_avg.ldas_ObsFcstAna.{current_date.strftime('%Y%m%d')}_??00z.bin")

            # Iterate through the files matching the pattern
            for file in glob(pattern):
                # Get the base filename
                base_filename = os.path.basename(file)
    
                # Replace the expt_name with expt_name_short
                new_filename = base_filename.replace(expt_name, expt_name_short)
    
                # Define the full destination path
                destination_file = os.path.join(year_month_day_directory, new_filename)
    
                # Copy the file with the new name
                shutil.copy2(file, destination_file)
                
            curr_day += timedelta(days=1)  # Increment the day

    current_date += relativedelta(months=1)  # Move to the next month



/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/fp_scaled/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg/Y2020/M07
/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/fp_scaled/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg/Y2020/M07/DAv7_M36_MULTI_type_13_comb_fp_scaled.inst3_1d_lndfcstana_Nt.20200701.nc4
/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/fp_scaled/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg/Y2020/M07/DAv7_M36_MULTI_type_13_comb_fp_scaled.SMAP_L4_SM_gph.20200701.nc4
/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/fp_scaled/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg/Y2020/M07/DAv7_M36_MULTI_type_13_comb_fp_scaled.catch_progn_incr.20200701.nc4
/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/fp_scaled/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg/Y2020/M07/DAv7_M36_MULTI_type_13_comb_fp_scaled.inst3_1d_lndfcstana_Nt.20200702.nc4
/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/fp_scaled/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg/Y2020/M07/DAv7_M36_MULTI_type_13_comb_fp_sca