In [1]:
import os
import glob
import xarray as xr
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta

In [2]:

expt_name = 'LS_DAv8_M36'

start_date = datetime(2020, 1, 1)
end_date = datetime(2020, 1, 6)

start_date_str = start_date.strftime('%Y%m%d')
end_date_str = end_date.strftime('%Y%m%d')

# Define the path directory
root_directory = f'/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/{expt_name}/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg'


In [3]:
current_date = start_date
while current_date <= end_date:
    year_month_directory = os.path.join(root_directory, 
                                        f"Y{current_date.year}", 
                                        f"M{current_date.month:02d}")

    # Find the files
    files = glob.glob(f"{year_month_directory}/*catch_progn_incr*.nc4")

    # Load the data
    data = xr.open_mfdataset(files, combine='nested', concat_dim="time")

    wesnn1_incr = data['WESNN1_INCR']
    wesnn2_incr = data['WESNN2_INCR']
    wesnn3_incr = data['WESNN3_INCR']

    snow_incr = wesnn1_incr + wesnn2_incr + wesnn3_incr
    # Ensure snow_incr is properly defined
    if not isinstance(snow_incr, xr.DataArray):
        raise ValueError("snow_incr is not a valid xarray.DataArray")

    # Find snow increments that are larger than abs(0.001)
    snow_incr = snow_incr.where(np.abs(snow_incr) > 0.001)

    if not isinstance(snow_incr, xr.DataArray):
        raise ValueError("snow_incr is not a valid xarray.DataArray")
    print(snow_incr.dims)  # Check dimensions
    print(snow_incr.coords)  # Check coordinates
    print(snow_incr.attrs)  # Check attributes

    # Count and find the mean of the snow increments, ignoring NaNs
    snow_incr_count = snow_incr.count(dim='time')
    snow_incr_mean = snow_incr.mean(dim='time', skipna=True)  

    # .expand_dims(time=[current_date])
    snow_incr_count = snow_incr_count.expand_dims(time=[current_date])
    snow_incr_mean = snow_incr_mean.expand_dims(time=[current_date])

    if not isinstance(snow_incr_count, xr.DataArray):
        raise ValueError("snow_incr is not a valid xarray.DataArray")
    print(snow_incr_count.dims)  # Check dimensions
    print(snow_incr_count.coords)  # Check coordinates
    print(snow_incr_count.attrs)  # Check attributes  

    # Print the max and min values for incr_count and incr_mean
    print(f"Max snow_incr_count: {snow_incr_count.values.max()}, Min snow_incr_count: {snow_incr_count.values.min()}")
    print(f"Max snow_incr_mean: {snow_incr_mean.values.max()}, Min snow_incr_mean: {snow_incr_mean.values.min()}")

  # Combine variables into a single dataset
    output_data = xr.Dataset({
        'snow_incr_count': snow_incr_count,
        'snow_incr_mean': snow_incr_mean
    })

    # Save the data to a single file
    output_filename = f"{root_directory}/snow_incrs_{current_date.strftime('%Y%m')}.nc4"
    output_data.to_netcdf(output_filename)

    # Increment the date
    current_date += relativedelta(months=1)


('time', 'tile')
Coordinates:
    *empty*
{}
('time', 'tile')
Coordinates:
  * time     (time) datetime64[ns] 8B 2020-01-01
{}
Max snow_incr_count: 6, Min snow_incr_count: 0
Max snow_incr_mean: nan, Min snow_incr_mean: nan


In [None]:

    # Define the variables to be extracted
    variables = {
    'sm_surface': 'SFMC',
    'sm_rootzone': 'RZMC',
    'sm_profile': 'PRMC',
    'precipitation_total_surface_flux': 'PRECTOTCORRLAND',
    'vegetation_greenness_fraction': 'GRN',
    'leaf_area_index': 'LAI',
    'snow_mass': 'SNOMASLAND',
    'surface_temperature_of_land_incl_snow': 'TSURFLAND',
    'soil_temperature_layer_1': 'TSOIL1',
    'snowfall_land': 'PRECSNOCORRLAND',
    'snow_depth_within_snow_covered_area_fraction_on_land': 'SNODPLAND',
    'snowpack_evaporation_latent_heat_flux_on_land': 'LHLANDSBLN',
    'overland_runoff_including_throughflow': 'RUNSURFLAND',
    'baseflow_flux_land': 'BASEFLOWLAND',
    'snowmelt_flux_land': 'SMLAND',
    'total_evaporation_land': 'EVLAND',
    'net_shortwave_flux_land': 'SWLAND',
    'total_water_storage_land': 'TWLAND',
    'fractional_area_of_snow_on_land': 'FRLANDSNO'  # New variable added
    }

    # Extract the variables and calculate the mean along the time dimension
    data_extracted = data[list(variables.values())].mean(dim='time')

    # Add a time dimension to the extracted data
    data_extracted = data_extracted.expand_dims(time=[current_date])

    # Save the data
    output_directory = f'/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/{expt_name}/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg'
    os.makedirs(output_directory, exist_ok=True)
    output_filename = os.path.join(output_directory, f"{expt_name}.tavg24_1d_lnd_Nt.{current_date.strftime('%Y%m')}.nc")
    data_extracted.to_netcdf(output_filename)

    # Increment the date
    current_date += relativedelta(months=1)