In [1]:
import os
import glob
import xarray as xr
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta

In [2]:

expt_name = '1e_LS_DAv8_M36_qc6'

start_date = datetime(2002, 10, 1)
end_date = datetime(2003, 10, 1)

start_date_str = start_date.strftime('%Y%m%d')
end_date_str = end_date.strftime('%Y%m%d')

# Define the path directory
# root_directory = f'/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/{expt_name}/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg'
root_directory = f'/discover/nobackup/projects/land_da/snow_qc_expts/1e_LS_DAv8_M36_0/{expt_name}/output/SMAP_EASEv2_M36_GLOBAL/cat/ens0000'

In [3]:
%%time

current_date = start_date
while current_date <= end_date:
    year_month_directory = os.path.join(root_directory, 
                                        f"Y{current_date.year}", 
                                        f"M{current_date.month:02d}")

    # Find files
    files = glob.glob(f"{year_month_directory}/*catch_progn_incr*.nc4")

    # Load data
    data = xr.open_mfdataset(files, combine='nested', concat_dim="time")

    # Compute total increment at each time step
    total_increment = (
        data['WESNN1_INCR'] +
        data['WESNN2_INCR'] +
        data['WESNN3_INCR']
    )  # shape: [time, lat, lon]

    # Sum of total increment over the month
    total_increment_sum = total_increment.sum(dim='time')
    total_increment_sum.name = 'TOTAL_INCREMENT_SUM'

    # Count of significant increment events
    threshold = 0.001
    increment_event_mask = (total_increment < -threshold) | (total_increment > threshold)
    increment_event_count = increment_event_mask.sum(dim='time')
    increment_event_count.name = 'TOTAL_INCREMENT_EVENT_COUNT'

    # Add time dimension
    total_increment_sum = total_increment_sum.expand_dims(time=[current_date])
    increment_event_count = increment_event_count.expand_dims(time=[current_date])

    # Merge and save
    combined = xr.merge([total_increment_sum, increment_event_count])

    output_directory = root_directory
    os.makedirs(output_directory, exist_ok=True)
    output_filename = os.path.join(
        output_directory, 
        f"{expt_name}.monthly_snow_increments.{current_date.strftime('%Y%m')}.nc"
    )
    combined.to_netcdf(output_filename)

    # Advance to next month
    current_date += relativedelta(months=1)

CPU times: user 17.2 s, sys: 1.73 s, total: 18.9 s
Wall time: 2min 20s
