In [4]:
import os
import glob
import xarray as xr
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta

In [5]:

expt_name = 'LS_DAv8_M36'

start_date = datetime(2020, 1, 1)
end_date = datetime(2020, 1, 6)

start_date_str = start_date.strftime('%Y%m%d')
end_date_str = end_date.strftime('%Y%m%d')

# Define the path directory
root_directory = f'/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/{expt_name}/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg'


In [7]:
current_date = start_date
while current_date <= end_date:
    year_month_directory = os.path.join(root_directory, 
                                        f"Y{current_date.year}", 
                                        f"M{current_date.month:02d}")

    # Find the files
    files = glob.glob(f"{year_month_directory}/*catch_progn_incr*.nc4")

    # Load the data
    data = xr.open_mfdataset(files, combine='nested', concat_dim="time")

    wesnn1_incr = data['WESNN1_INCR']
    wesnn2_incr = data['WESNN2_INCR']
    wesnn3_incr = data['WESNN3_INCR']

    snow_incr = wesnn1_incr + wesnn2_incr + wesnn3_incr
    # Ensure snow_incr is properly defined
    if not isinstance(snow_incr, xr.DataArray):
        raise ValueError("snow_incr is not a valid xarray.DataArray")

    # Find snow increments that are larger than abs(0.001)
    snow_incr = snow_incr.where(np.abs(snow_incr) > 0.001)

    # Count and find the mean of the snow increments, ignoring NaNs
    snow_incr_count = snow_incr.count(dim='time')
    snow_incr_mean = snow_incr.mean(dim='time', skipna=True)  

    # .expand_dims(time=[current_date])
    snow_incr_count = snow_incr_count.expand_dims(time=[current_date])
    snow_incr_mean = snow_incr_mean.expand_dims(time=[current_date])

    if not isinstance(snow_incr_count, xr.DataArray):
        raise ValueError("snow_incr is not a valid xarray.DataArray")

  # Combine variables into a single dataset
    output_data = xr.Dataset({
        'snow_incr_count': snow_incr_count,
        'snow_incr_mean': snow_incr_mean
    })

    # Save the data to a single file
    output_filename = f"{root_directory}/snow_incrs_{current_date.strftime('%Y%m')}.compressed.nc4"
    encoding = {var: {'zlib': True, 'complevel': 5} for var in output_data.data_vars}
    output_data.to_netcdf(output_filename, encoding=encoding)

    # Increment the date
    current_date += relativedelta(months=1)
