In [None]:
import numpy as np
import struct
import os
import glob
import xarray as xr
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from mapper_functions import plot_global_tight_pcm, plot_NA_tight_pcm, plot_region

from helper import read_ObsFcstAna, read_tilecoord, get_tile_species_obs_values

In [None]:
expt_name_1 = 'LS_DAv8_M36'
expt_name_2 = 'LS_DAv8_M36_t10'

start_date = datetime(2002, 10, 1)
end_date = datetime(2006, 10, 1)

start_date_str = start_date.strftime('%Y/%m/%d')
end_date_str = end_date.strftime('%Y/%m/%d')

ana_directory_1 = f'/discover/nobackup/projects/land_da/Experiment_archive/M21C_land_sweeper_DAv8_M36/{expt_name_1}/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg'

ana_directory_2 = f'/discover/nobackup/projects/land_da/snow_qc_expts/LS_DAv8_M36_t10/{expt_name_2}/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg'

In [None]:
# Process each month (1-12)
for month in range(1, 13):

    # Generate the month name only
    month_name = datetime(2000, month, 1).strftime("%B")  # 2000 is arbitrary, we only want the month name

    # Initialize list for expt_1
    OFA_list = []
    
    # Find all directories for this month within date range
    month_pattern = os.path.join(ana_directory_1, 'Y*', f'M{month:02d}')
    month_dirs = sorted(glob.glob(month_pattern))
    
    # Process each directory if within date range
    for month_dir in month_dirs:
        # Extract year from path more explicitly
        try:
            year = int(month_dir.split('/Y')[-1][:4])
        except ValueError:
            print(f"Warning: Skipping directory with unexpected format: {month_dir}")
            continue
        
        # Check if directory is within date range
        if start_date <= datetime(year, month, 1) <= end_date:
            # Load all .bin files in directory
            OFA_list.extend(read_ObsFcstAna(fname) for fname in sorted(glob.glob(os.path.join(month_dir, '*.ldas_ObsFcstAna.*.bin'))))

    # Initialize lists to store filtered data
    all_tilenums = []
    all_species = []
    all_lats = []
    all_lons = []
    all_obs = []

    # Process each element in OFA_list
    for ofa in OFA_list:
        # Create mask for this element
        assim_mask = ofa['obs_assim'] == 1
        
        # Append filtered data
        all_tilenums.append(ofa['obs_tilenum'][assim_mask])
        all_species.append(ofa['obs_species'][assim_mask])
        all_lats.append(ofa['obs_lat'][assim_mask])
        all_lons.append(ofa['obs_lon'][assim_mask])
        all_obs.append(ofa['obs_obs'][assim_mask])

    # Create filtered dictionary with concatenated data
    filtered_data = {
        'obs_tilenum': np.concatenate(all_tilenums),
        'obs_species': np.concatenate(all_species),
        'obs_lat': np.concatenate(all_lats),
        'obs_lon': np.concatenate(all_lons),
        'obs_obs': np.concatenate(all_obs)
    }

    # Process filtered data
    stats_1 = get_tile_species_obs_values(filtered_data)

    # Print summary
    print(f"Number of unique tiles: {len(stats_1['tiles'])}")
    print(f"Number of species: {len(stats_1['max_values'])}")
    for species, max_vals in stats_1['max_values'].items():
        print(f"Species {species}: max value = {np.max(max_vals)}")

    # Initialize map_array with NaN values
    map_array = np.empty((stats_1['lon'].shape[0], 3))  # Shape: (number of tiles, 3)
    map_array.fill(np.nan)

    # Fill longitude and latitude columns
    map_array[:, 1] = stats_1['lon']  # Assuming `lon` contains longitude values
    map_array[:, 2] = stats_1['lat']  # Assuming `lat` contains latitude values

    map_array[:, 0] = stats_1['max_values'][12]
    plot_global_tight_pcm(map_array, True, False, f"{expt_name_1} {start_date_str} - {end_date_str}:\n Max MYD10C1 obs: {month_name}", 'fraction', 0, 1)

    map_array[:, 0] = stats_1['max_values'][13]
    plot_global_tight_pcm(map_array, False, False, f"{expt_name_1} {start_date_str} - {end_date_str}:\n Max MOD10C1 obs {month_name}", 'fraction', 0, 1)

    map_array[:, 0] = stats_1['num_obs'][12]
    plot_global_tight_pcm(map_array, False, False, f"{expt_name_1} {start_date_str} - {end_date_str}:\n MYD10C1 N obs: {month_name}", '-', 0, 120)

    map_array[:, 0] = stats_1['num_obs'][13]
    plot_global_tight_pcm(map_array, False, False, f"{expt_name_1} {start_date_str} - {end_date_str}:\n MOD10C1 N obs {month_name}", '-', 0, 120)

    map_array[:, 0] = stats_1['num_obs_gt_0.9'][12]
    plot_global_tight_pcm(map_array, False, False, f"{expt_name_1} {start_date_str} - {end_date_str}:\n MYD10C1 N obs > 0.9: {month_name}", '-', 0, 40)

    map_array[:, 0] = stats_1['num_obs_gt_0.9'][13]
    plot_global_tight_pcm(map_array, False, False, f"{expt_name_1} {start_date_str} - {end_date_str}:\n MOD10C1 N obs > 0.9 {month_name}", '-', 0, 40)

#############################################################################
# Initialize list for expt_2
    OFA_list = []
    
    # Find all directories for this month within date range
    month_pattern = os.path.join(ana_directory_2, 'Y*', f'M{month:02d}')
    month_dirs = sorted(glob.glob(month_pattern))
    
    # Process each directory if within date range
    for month_dir in month_dirs:
        # Extract year from path more explicitly
        try:
            year = int(month_dir.split('/Y')[-1][:4])
        except ValueError:
            print(f"Warning: Skipping directory with unexpected format: {month_dir}")
            continue
        
        # Check if directory is within date range
        if start_date <= datetime(year, month, 1) <= end_date:
            # Load all .bin files in directory
            OFA_list.extend(read_ObsFcstAna(fname) for fname in sorted(glob.glob(os.path.join(month_dir, '*.ldas_ObsFcstAna.*.bin'))))

    # Initialize lists to store filtered data
    all_tilenums = []
    all_species = []
    all_lats = []
    all_lons = []
    all_obs = []

    # Process each element in OFA_list
    for ofa in OFA_list:
        # Create mask for this element
        assim_mask = ofa['obs_assim'] == 1
        
        # Append filtered data
        all_tilenums.append(ofa['obs_tilenum'][assim_mask])
        all_species.append(ofa['obs_species'][assim_mask])
        all_lats.append(ofa['obs_lat'][assim_mask])
        all_lons.append(ofa['obs_lon'][assim_mask])
        all_obs.append(ofa['obs_obs'][assim_mask])

    # Create filtered dictionary with concatenated data
    filtered_data = {
        'obs_tilenum': np.concatenate(all_tilenums),
        'obs_species': np.concatenate(all_species),
        'obs_lat': np.concatenate(all_lats),
        'obs_lon': np.concatenate(all_lons),
        'obs_obs': np.concatenate(all_obs)
    }

    # Process filtered data
    stats_2 = get_tile_species_obs_values(filtered_data)

    # Print summary
    print(f"Number of unique tiles: {len(stats_2['tiles'])}")
    print(f"Number of species: {len(stats_2['max_values'])}")
    for species, max_vals in stats_2['max_values'].items():
        print(f"Species {species}: max value = {np.max(max_vals)}")

    # Initialize map_array with NaN values
    map_array = np.empty((stats_2['lon'].shape[0], 3))  # Shape: (number of tiles, 3)
    map_array.fill(np.nan)

    # Fill longitude and latitude columns
    map_array[:, 1] = stats_2['lon']  # Assuming `lon` contains longitude values
    map_array[:, 2] = stats_2['lat']  # Assuming `lat` contains latitude values

    map_array[:, 0] = stats_2['max_values'][12]
    plot_global_tight_pcm(map_array, True, False, f"{expt_name_2} {start_date_str} - {end_date_str}:\n Max MYD10C1 obs: {month_name}", 'fraction', 0, 1)

    map_array[:, 0] = stats_2['max_values'][13]
    plot_global_tight_pcm(map_array, False, False, f"{expt_name_2} {start_date_str} - {end_date_str}:\n Max MOD10C1 obs {month_name}", 'fraction', 0, 1)

    map_array[:, 0] = stats_2['num_obs'][12]
    plot_global_tight_pcm(map_array, True, False, f"{expt_name_2} {start_date_str} - {end_date_str}:\n MYD10C1 N obs: {month_name}", '-', 0, 120)

    map_array[:, 0] = stats_2['num_obs'][13]
    plot_global_tight_pcm(map_array, False, False, f"{expt_name_2} {start_date_str} - {end_date_str}:\n MOD10C1 N obs {month_name}", '-', 0, 120)

    map_array[:, 0] = stats_2['num_obs_gt_0.9'][12]
    plot_global_tight_pcm(map_array, True, False, f"{expt_name_2} {start_date_str} - {end_date_str}:\n MYD10C1 N obs > 0.9: {month_name}", '-', 0, 40)

    map_array[:, 0] = stats_2['num_obs_gt_0.9'][13]
    plot_global_tight_pcm(map_array, False, False, f"{expt_name_2} {start_date_str} - {end_date_str}:\n MOD10C1 N obs > 0.9 {month_name}", '-', 0, 40)


    # Find matching tiles between stats_1 and stats_2
    # matching_tiles = set(stats_1['tiles']).intersection(set(stats_2['tiles']))

    # # Initialize arrays to store differences
    # diff_num_obs_aq = []
    # diff_num_obs_te = []
    # diff_max_values_aq = []
    # diff_max_values_te = []
    # diff_num_obs_aq_gt_0_9 = []
    # diff_num_obs_te_gt_0_9 = []
    # matching_lats = []
    # matching_lons = []    

    # # Calculate differences for matching tiles
    # for tile in matching_tiles:
    #     index_1 = np.where(stats_1['tiles'] == tile)[0][0]
    #     index_2 = np.where(stats_2['tiles'] == tile)[0][0]
    #     diff_num_obs_aq.append(stats_2['num_obs'][12][index_2] - stats_1['num_obs'][12][index_1])
    #     diff_num_obs_te.append(stats_2['num_obs'][13][index_2] - stats_1['num_obs'][13][index_1])
    #     diff_max_values_aq.append(stats_2['max_values'][12][index_2] - stats_1['max_values'][12][index_1])
    #     diff_max_values_te.append(stats_2['max_values'][13][index_2] - stats_1['max_values'][13][index_1])
    #     diff_num_obs_aq_gt_0_9.append(stats_2['num_obs_gt_0.9'][12][index_2] - stats_1['num_obs_gt_0.9'][12][index_1])
    #     diff_num_obs_te_gt_0_9.append(stats_2['num_obs_gt_0.9'][13][index_2] - stats_1['num_obs_gt_0.9'][13][index_1])
    #     # Append lat/lon for matching tiles
    #     matching_lats.append(stats_1['lat'][index_1])
    #     matching_lons.append(stats_1['lon'][index_1])       

    # Use all tiles from stats_1
    base_tiles = stats_1['tiles']
    
    # Initialize arrays to store differences
    diff_num_obs_aq = []
    diff_num_obs_te = []
    diff_max_values_aq = []
    diff_max_values_te = []
    diff_num_obs_aq_gt_0_9 = []
    diff_num_obs_te_gt_0_9 = []
    matching_lats = []
    matching_lons = []    

    # Calculate differences for all tiles in stats_1
    for tile in base_tiles:
        index_1 = np.where(stats_1['tiles'] == tile)[0][0]
        
        # Find corresponding index in stats_2 or use zero values
        if tile in stats_2['tiles']:
            index_2 = np.where(stats_2['tiles'] == tile)[0][0]
            stats_2_num_obs_aq = stats_2['num_obs'][12][index_2]
            stats_2_num_obs_te = stats_2['num_obs'][13][index_2]
            stats_2_max_values_aq = stats_2['max_values'][12][index_2]
            stats_2_max_values_te = stats_2['max_values'][13][index_2]
            stats_2_num_obs_gt_0_9_aq = stats_2['num_obs_gt_0.9'][12][index_2]
            stats_2_num_obs_gt_0_9_te = stats_2['num_obs_gt_0.9'][13][index_2]
        else:
            stats_2_num_obs_aq = 0.
            stats_2_num_obs_te = 0.
            stats_2_max_values_aq = 0.
            stats_2_max_values_te = 0.
            stats_2_num_obs_gt_0_9_aq = 0.
            stats_2_num_obs_gt_0_9_te = 0.

        if np.isnan(stats_1['num_obs'][12][index_1]):
            print(f"Found NaN in stats_1['num_obs'][12] at index {index_1}")
        if np.isnan(stats_2_num_obs_aq):
            print(f"Found NaN in stats_2_num_obs_aq for tile {tile}")
    
        # Calculate differences
        diff_num_obs_aq.append(stats_2_num_obs_aq - stats_1['num_obs'][12][index_1])
        diff_num_obs_te.append(stats_2_num_obs_te - stats_1['num_obs'][13][index_1])
        diff_max_values_aq.append(stats_2_max_values_aq - stats_1['max_values'][12][index_1])
        diff_max_values_te.append(stats_2_max_values_te - stats_1['max_values'][13][index_1])
        diff_num_obs_aq_gt_0_9.append(stats_2_num_obs_gt_0_9_aq - stats_1['num_obs_gt_0.9'][12][index_1])
        diff_num_obs_te_gt_0_9.append(stats_2_num_obs_gt_0_9_te - stats_1['num_obs_gt_0.9'][13][index_1])

        if np.isnan(stats_2_num_obs_aq - stats_1['num_obs'][12][index_1]):
            print("Found nan")
        
        # Append lat/lon for tiles
        matching_lats.append(stats_1['lat'][index_1])
        matching_lons.append(stats_1['lon'][index_1]) 

    # Initialize map_array with NaN values
    map_array = np.empty((len(matching_lats), 3))  # Shape: (number of matching tiles, 3)
    map_array.fill(np.nan)
    # Fill longitude and latitude columns
    map_array[:, 1] = np.array(matching_lons)  # Assuming `lon` contains longitude values
    map_array[:, 2] = np.array(matching_lats)  # Assuming `lat` contains latitude values

    map_array[:, 0] = np.array(diff_num_obs_aq)
    plot_global_tight_pcm(map_array, True, False, f"{expt_name_2} - {expt_name_1} :\n Difference MYD10C1 N obs: {month_name}", '-', -120, 120)
    map_array[:, 0] = np.array(diff_num_obs_te)
    plot_global_tight_pcm(map_array, False, False, f"{expt_name_2} - {expt_name_1} :\n MOD10C1 N obs diff {month_name}", '-', -120, 120)

    map_array[:, 0] = np.array(diff_max_values_aq)
    plot_global_tight_pcm(map_array, True, False, f"{expt_name_2} - {expt_name_1} :\n Difference MYD10C1 max obs: {month_name}", 'fraction', -1., 1.)
    map_array[:, 0] = np.array(diff_max_values_te)
    plot_global_tight_pcm(map_array, False, False, f"{expt_name_2} - {expt_name_1} :\n MOD10C1 max obs diff {month_name}", 'fraction', -1., 1.)

    map_array[:, 0] = np.array(diff_num_obs_aq_gt_0_9)
    plot_global_tight_pcm(map_array, True, False, f"{expt_name_2} - {expt_name_1} :\n Difference MYD10C1 N obs > 0.9: {month_name}", '-', -50, 50)
    map_array[:, 0] = np.array(diff_num_obs_te_gt_0_9)
    plot_global_tight_pcm(map_array, False, False, f"{expt_name_2} - {expt_name_1} :\n MOD10C1 N obs > 0.9 diff {month_name}", '-', -50, 50)      



In [None]:
cat_directory_1 = f'/discover/nobackup/projects/land_da/Experiment_archive/M21C_land_sweeper_DAv8_M36/{expt_name_1}/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg'

cat_directory_2 = f'/discover/nobackup/projects/land_da/snow_qc_expts/LS_DAv8_M36_t10/{expt_name_2}/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg'

# Open the dataset for a specific file that contains lat/lon for M36 grid
ds_latlon = xr.open_dataset('DAv7_M36.inst3_1d_lndfcstana_Nt.20150901.nc4')

# Extract longitude and latitude variables
lon = ds_latlon['lon']
lat = ds_latlon['lat']

# Determine the number of tiles based on the latitude array
n_tile = len(lat)

# Initialize an observation array with NaN values
# The array has dimensions [n_tile, 3], where:
# - Column 0 is reserved for future use
# - Column 1 stores longitude values
# - Column 2 stores latitude values
map_array = np.empty([n_tile, 3])
map_array.fill(np.nan)
map_array[:, 1] = lon
map_array[:, 2] = lat

In [None]:
end_date = datetime(2006, 9, 30)

# Define the variables to be extracted
variables = {
'sm_surface': 'SFMC',
#'sm_rootzone': 'RZMC',
#'sm_profile': 'PRMC',
#'precipitation_total_surface_flux': 'PRECTOTCORRLAND',
#'vegetation_greenness_fraction': 'GRN',
#'leaf_area_index': 'LAI',
'snow_mass': 'SNOMASLAND',
#'surface_temperature_of_land_incl_snow': 'TSURFLAND',
'soil_temperature_layer_1': 'TSOIL1',
#'snowfall_land': 'PRECSNOCORRLAND',
'snow_depth_within_snow_covered_area_fraction_on_land': 'SNODPLAND',
'snowpack_evaporation_latent_heat_flux_on_land': 'LHLANDSBLN',
#'overland_runoff_including_throughflow': 'RUNSURFLAND',
#'baseflow_flux_land': 'BASEFLOWLAND',
'snowmelt_flux_land': 'SMLAND',
#'total_evaporation_land': 'EVLAND',
#'net_shortwave_flux_land': 'SWLAND',
#'total_water_storage_land': 'TWLAND',
'fractional_area_of_snow_on_land': 'FRLANDSNO'  # New variable added
}

# Process each month (1-12)
for month in range(1, 13):

    # Generate the month name only
    month_name = datetime(2000, month, 1).strftime("%B")  # 2000 is arbitrary, we only want the month name

    # Calculate number of seconds in the month
    num_seconds_in_month = (datetime(2000, month, 1) + relativedelta(months=1) - datetime(2000, month, 1)).total_seconds()    
    
    # Find all directories for this month within date range
    month_pattern = os.path.join(cat_directory_1, 'Y*', f'M{month:02d}')
    month_dirs = sorted(glob.glob(month_pattern))
    
    # Process each directory if within date range
    for month_dir in month_dirs:
        # Extract year from path more explicitly
        try:
            year = int(month_dir.split('/Y')[-1][:4])
        except ValueError:
            print(f"Warning: Skipping directory with unexpected format: {month_dir}")
            continue
        
        # Check if directory is within date range
        if start_date <= datetime(year, month, 1) <= end_date:
            # Find the files
            files = glob.glob(os.path.join(month_dir,"*tavg24_1d_lnd_Nt*.nc4"))

            # Load the data
            data = xr.open_mfdataset(files, combine='nested', concat_dim="time")

            # Extract the variable
            data_mean_1 = data[list(variables.values())].mean(dim='time')
            data_max_1 = data[list(variables.values())].max(dim='time')

#########################################################################
    # Find all directories for this month within date range
    month_pattern = os.path.join(cat_directory_2, 'Y*', f'M{month:02d}')
    month_dirs = sorted(glob.glob(month_pattern))
    
    # Process each directory if within date range
    for month_dir in month_dirs:
        # Extract year from path more explicitly
        try:
            year = int(month_dir.split('/Y')[-1][:4])
        except ValueError:
            print(f"Warning: Skipping directory with unexpected format: {month_dir}")
            continue
        
        # Check if directory is within date range
        if start_date <= datetime(year, month, 1) <= end_date:
            # Find the files
            files = glob.glob(os.path.join(month_dir,"*tavg24_1d_lnd_Nt*.nc4"))

            # Load the data
            data = xr.open_mfdataset(files, combine='nested', concat_dim="time")

            # Extract the variable
            data_mean_2 = data[list(variables.values())].mean(dim='time')
            data_max_2 = data[list(variables.values())].max(dim='time')

# Plot max snowcover and snowmass

#    map_array[:, 0] = data_max_1['SNOMASLAND'].values  
#    plot_global_tight_pcm(map_array, False, False, f"{expt_name_1} {start_date_str} - {end_date_str}:\n Max snow mass {month_name}", 'kg m-2', 0, 100)   
#    map_array[:, 0] = data_max_2['SNOMASLAND'].values
#    plot_global_tight_pcm(map_array, False, False, f"{expt_name_2} {start_date_str} - {end_date_str}:\n Max snow mass {month_name}", 'kg m-2', 0, 100)
#    map_array[:, 0] = data_max_1['FRLANDSNO'].values
#    plot_global_tight_pcm(map_array, False, False, f"{expt_name_1} {start_date_str} - {end_date_str}:\n Max snow cover fraction {month_name}", 'fraction', 0, 1)
#    map_array[:, 0] = data_max_2['FRLANDSNO'].values
#    plot_global_tight_pcm(map_array, False, False, f"{expt_name_2} {start_date_str} - {end_date_str}:\n Max snow cover fraction {month_name}", 'fraction', 0, 1)       

# Plot mean SNOMASLAND, FRLANDSNO, SMLAND, LHLANDSBLN

    map_array[:, 0] = data_mean_1['FRLANDSNO'].values
    plot_global_tight_pcm(map_array, True, False, f"{expt_name_1} {start_date_str} - {end_date_str}:\n Mean daily snow cover fraction: {month_name}", 'fraction', 0, 1)
    map_array[:, 0] = data_mean_1['SMLAND'].values * num_seconds_in_month
    plot_global_tight_pcm(map_array, True, False, f"{expt_name_1} {start_date_str} - {end_date_str}:\n Mean monthly snow melt: {month_name}", 'kg m-2', 0, 400)    

#    map_array[:, 0] = data_mean_2['SNOMASLAND'].values
#    plot_global_tight_pcm(map_array, False, False, f"{expt_name_2} {start_date_str} - {end_date_str}:\n Snow mass mean {month_name}", 'kg m-2') #, 0, 100)
    map_array[:, 0] = data_mean_2['FRLANDSNO'].values
    plot_global_tight_pcm(map_array, True, False, f"{expt_name_2} {start_date_str} - {end_date_str}:\n Mean daily snow cover fraction: {month_name}", 'fraction', 0, 1)
    map_array[:, 0] = data_mean_2['SMLAND'].values * num_seconds_in_month
    plot_global_tight_pcm(map_array, True, False, f"{expt_name_2} {start_date_str} - {end_date_str}:\n Mean monthly snow melt: {month_name}", 'kg m-2', 0, 400)
#    map_array[:, 0] = data_mean_2['LHLANDSBLN'].values
#    plot_global_tight_pcm(map_array, False, False, f"{expt_name_2} {start_date_str} - {end_date_str}:\n Snow sublimation mean {month_name}", 'W m-2') #, 0, 1)
    

# Calculate  difference between the two experiments means for SNOMASLAND, FRLANDSNO, SMLAND, LHLANDSBLN
# Exp 2 - Exp 1 
#    map_array[:, 0] = data_mean_2['SNOMASLAND'].values - data_mean_1['SNOMASLAND'].values
#    plot_global_tight_pcm(map_array, False, False, f"{expt_name_2} - {expt_name_1} {start_date_str} - {end_date_str}:\n Snow mass mean diff {month_name}", 'kg m-2') #, -0.5, 0.5)   
    map_array[:, 0] = data_mean_2['FRLANDSNO'].values - data_mean_1['FRLANDSNO'].values
    plot_global_tight_pcm(map_array, True, False, f"{expt_name_2} - {expt_name_1} {start_date_str} - {end_date_str}:\n Diff in mean daily snow cover fraction: {month_name}", 'fraction', -0.2, 0.2)   
    map_array[:, 0] = data_mean_2['SMLAND'].values * num_seconds_in_month - data_mean_1['SMLAND'].values * num_seconds_in_month
    plot_global_tight_pcm(map_array, True, False, f"{expt_name_2} - {expt_name_1} {start_date_str} - {end_date_str}:\n Diff in mean montly snow melt: {month_name}", 'kg m-2', -200, 200)   
#    map_array[:, 0] = data_mean_2['LHLANDSBLN'].values - data_mean_1['LHLANDSBLN'].values
#    plot_global_tight_pcm(map_array, False, False, f"{expt_name_2} - {expt_name_1} {start_date_str} - {end_date_str}:\n Snow sublimation mean diff {month_name}", 'W m-2') #, -0.5, 0.5)
    

In [None]:
print(data_mean_2['FRLANDSNO'])

In [None]:
map_array[:,0] = data_mean_2['FRLANDSNO']
plot_global_tight_pcm(map_array, False, False, f"{expt_name_2} - {expt_name_1} :\n MOD10C1 N obs > 0.9 diff {month_name}", '-', -0, 1)     

map_array[:,0] = data_max_2['FRLANDSNO']
plot_global_tight_pcm(map_array, False, False, f"{expt_name_2} - {expt_name_1} :\n MOD10C1 N obs > 0.9 diff {month_name}", '-', -0, 1) 


In [None]:
current_date = start_date
while current_date <= end_date:
    year_month_directory = os.path.join(ana_directory, 
                                        f"Y{current_date.year}", 
                                        f"M{current_date.month:02d}")    

    OFA_list =[]

    for fname in sorted([os.path.join(year_month_directory, f) for f in os.listdir(year_month_directory) if '.ldas_ObsFcstAna.' in f and f.endswith('.bin')]):
        if os.path.isfile(fname):
            OFA_list.append(read_ObsFcstAna(fname))

    if OFA_list:
        print(OFA_list[0].keys())
    else:
        print("OFA_list is empty.")   

    # Initialize lists to store filtered data
    all_tilenums = []
    all_species = []
    all_lats = []
    all_lons = []
    all_obs = []

    # Process each element in OFA_list
    for ofa in OFA_list:
        # Create mask for this element
        assim_mask = ofa['obs_assim'] == 1
        
        # Append filtered data
        all_tilenums.append(ofa['obs_tilenum'][assim_mask])
        all_species.append(ofa['obs_species'][assim_mask])
        all_lats.append(ofa['obs_lat'][assim_mask])
        all_lons.append(ofa['obs_lon'][assim_mask])
        all_obs.append(ofa['obs_obs'][assim_mask])

    # Create filtered dictionary with concatenated data
    filtered_data = {
        'obs_tilenum': np.concatenate(all_tilenums),
        'obs_species': np.concatenate(all_species),
        'obs_lat': np.concatenate(all_lats),
        'obs_lon': np.concatenate(all_lons),
        'obs_obs': np.concatenate(all_obs)
    }

    # Process filtered data
    stats = get_tile_species_obs_values(filtered_data)

    # Print summary
    print(f"Number of unique tiles: {len(stats['tiles'])}")
    print(f"Number of species: {len(stats['max_values'])}")
    for species, max_vals in stats['max_values'].items():
        print(f"Species {species}: max value = {np.max(max_vals)}")

    # Initialize map_array with NaN values
    map_array = np.empty((stats['lon'].shape[0], 3))  # Shape: (number of tiles, 3)
    map_array.fill(np.nan)

    # Fill longitude and latitude columns
    map_array[:, 1] = stats['lon']  # Assuming `lon` contains longitude values
    map_array[:, 2] = stats['lat']  # Assuming `lat` contains latitude values

    map_array[:, 0] = stats['max_values'][12]
    plot_global_tight_pcm(map_array, False, False, f"{expt_name} {start_date_str} - {end_date_str}:\n Max MYD10C1 obs {current_date.strftime('%B %Y')}", 'fraction', 0, 1)

    map_array[:, 0] = stats['max_values'][13]
    plot_global_tight_pcm(map_array, False, False, f"{expt_name} {start_date_str} - {end_date_str}:\n Max MOD10C1 obs {current_date.strftime('%B %Y')}", 'fraction', 0, 1)

    map_array[:, 0] = stats['num_obs'][12]
    plot_global_tight_pcm(map_array, False, False, f"{expt_name} {start_date_str} - {end_date_str}:\n MYD10C1 N obs {current_date.strftime('%B %Y')}", '-', 0, 30)

    map_array[:, 0] = stats['num_obs'][13]
    plot_global_tight_pcm(map_array, False, False, f"{expt_name} {start_date_str} - {end_date_str}:\n MOD10C1 N obs {current_date.strftime('%B %Y')}", '-', 0, 30)

    map_array[:, 0] = stats['num_obs_gt_0.9'][12]
    plot_global_tight_pcm(map_array, False, False, f"{expt_name} {start_date_str} - {end_date_str}:\n MYD10C1 N obs > 0.9 {current_date.strftime('%B %Y')}", '-', 0, 10)

    map_array[:, 0] = stats['num_obs_gt_0.9'][13]
    plot_global_tight_pcm(map_array, False, False, f"{expt_name} {start_date_str} - {end_date_str}:\n MOD10C1 N obs > 0.9 {current_date.strftime('%B %Y')}", '-', 0, 10)

    # Increment the date
    current_date += relativedelta(months=1)

In [None]:

# Usage over CONUS
lon_min = -125.0
lon_max = -66.0
lat_min = 24.0
lat_max = 50.0
    
    # Plot the data
plot_region(map_array, 
        lon_min, lon_max,
        lat_min, lat_max,
        meanflag=False,
        saveflag=False,
        units='SCF',
        plot_title=f'{expt_name} {start_date_str} - {end_date_str}:\n MODIS Snow Cover Fraction',
        cmin=0,
        cmax=1,
        cmap='viridis')


In [None]:
root_directory = '/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/snow_qc_expts/LS_DAv8_M36_snow_qc/test_LS_DAv8_M36_snow_qc/output/SMAP_EASEv2_M36_GLOBAL/cat/ens_avg/Y2005/M07/'

files = glob.glob(os.path.join(root_directory, 'test_LS_DAv8_M36_snow_qc.inst3_1d_lndfcstana_Nt.*.nc4'))

# Load the data 
ds_lndfcstana = xr.open_mfdataset(files, combine='nested', concat_dim="time")

# Sort the dataset by the time dimension
ds_lndfcstana = ds_lndfcstana.sortby('time')

# Extract the time variable
time_var = ds_lndfcstana['time']
# Convert the time variable to a numpy array
time_values = time_var.values

def create_date_array(time_values, start_date):
    """Convert array of 3-hour increments to datetime objects"""
    # Convert numpy integers to Python integers
    time_ints = [int(t) for t in time_values]
    
    # Create datetime array
    dates = [start_date + timedelta(hours=3*t) for t in time_ints]
    return np.array(dates)

# Create date array
start_date = datetime(2005, 7, 1, 3)
dates = create_date_array(time_values, start_date)

# Verify conversion
print(f"First date: {dates[0]}")
print(f"Last date: {dates[-1]}")
print(f"Total timesteps: {len(dates)}")

# Print the shape of the dataset
print(ds_lndfcstana.dims)
# Print the names of the variables in the dataset
print(ds_lndfcstana.data_vars)

TSOIL1_FCST = ds_lndfcstana['TSOIL1_FCST']

# Calculate min and max per tile
tile_min = TSOIL1_FCST.min(dim='time') - 273.15
tile_max = TSOIL1_FCST.max(dim='time') - 273.15

# Convert to numpy arrays (compute dask arrays)
min_vals = tile_min.compute()
max_vals = tile_max.compute()

temperature_threshold = 10.0

t_thres_str = f'{temperature_threshold:.1f}C'

min_vals_thres = xr.where(min_vals > temperature_threshold, 1, 0)
max_vals_thres = xr.where(max_vals > temperature_threshold, 1, 0)
# Convert to numpy arrays
min_vals_thres = min_vals_thres.values
max_vals_thres = max_vals_thres.values

# Print some statistics
print(f"Global min: {min_vals.min():.2f}K")
print(f"Global max: {max_vals.max():.2f}K")
print(f"Number of tiles: {len(min_vals)}")

# Convert xarray time coordinates to datetime
TSOIL1_FCST = TSOIL1_FCST.assign_coords(time=dates)

# Resample to daily frequency and get maximum
daily_max = TSOIL1_FCST.resample(time='D').max()

# Get minimum of daily maximums for each tile
min_of_daily_max = daily_max.min(dim='time') - 273.15

min_of_daily_max_thres = xr.where(min_of_daily_max > temperature_threshold, 1, 0)

# Resample to daily frequency and get mean
daily_mean = TSOIL1_FCST.resample(time='D').mean()
# Get minimum of daily means for each tile
min_of_daily_mean = daily_mean.min(dim='time') - 273.15
min_of_daily_mean_thres = xr.where(min_of_daily_mean > temperature_threshold, 1, 0)

print(f"Shape of min_of_daily_max: {min_of_daily_max.shape}")
print(f"Global min of daily max: {min_of_daily_max.min().values:.2f}°C")
print(f"Global max of daily max: {min_of_daily_max.max().values:.2f}°C")


# Get lons from first time slice
lons = ds_lndfcstana['lon'].isel(time=0).values
lats = ds_lndfcstana['lat'].isel(time=0).values

# Create map_array for plotting
map_array = np.empty((len(lons), 3))  # Shape: (number of tiles, 3)
map_array.fill(np.nan)
# Fill longitude and latitude columns
map_array[:, 1] = lons  # Assuming `lon` contains longitude values
map_array[:, 2] = lats  # Assuming `lat` contains latitude values
# Fill temperature values
map_array[:, 0] = min_vals
# Plotting
plot_global_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Min TSOIL1_FCST 2005-07', 'C', 0, 30)
plot_NA_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Min TSOIL1_FCST 2005-07', 'C', 0, 30)

map_array[:, 0] = min_vals_thres
# Plotting
plot_global_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Min TSOIL1_FCST 2005-07', f'{t_thres_str} threshold exceeded', 0, 1)
plot_NA_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Min TSOIL1_FCST 2005-07', f'{t_thres_str} threshold exceeded', 0, 1)

# Fill temperature values
map_array[:, 0] = max_vals
# Plotting
plot_global_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Max TSOIL1_FCST 2005-07', 'tC', 0, 30)
plot_NA_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Max TSOIL1_FCST 2005-07', 'C', 0, 30)

map_array[:, 0] = max_vals_thres
# Plotting
plot_global_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Max TSOIL1_FCST 2005-07', f'{t_thres_str} threshold exceeded', 0, 1)
plot_NA_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Max TSOIL1_FCST 2005-07', f'{t_thres_str} threshold exceeded', 0, 1)

map_array[:, 0] = min_of_daily_max
# Plotting
plot_global_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Min of daily max TSOIL1_FCST 2005-07', 'C', 0, 30)
plot_NA_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Min of daily max TSOIL1_FCST 2005-07', 'C', 0, 30)

map_array[:, 0] = min_of_daily_max_thres
# Plotting
plot_global_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Min of daily max TSOIL1_FCST 2005-07', f'{t_thres_str} threshold exceeded', 0, 1)
plot_NA_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Min of daily max TSOIL1_FCST 2005-07', f'{t_thres_str} threshold exceeded', 0, 1)

map_array[:, 0] = min_of_daily_mean

# Plotting
plot_global_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Min of daily mean TSOIL1_FCST 2005-07', 'C', 0, 30)
plot_NA_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Min of daily mean TSOIL1_FCST 2005-07', 'C', 0, 30)

map_array[:, 0] = min_of_daily_mean_thres

# Plotting
plot_global_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Min of daily mean TSOIL1_FCST 2005-07', f'{t_thres_str} threshold exceeded', 0, 1)
plot_NA_tight_pcm(map_array, False, False, f'{expt_name} {start_date_str} - {end_date_str}:\n Min of daily mean TSOIL1_FCST 2005-07', f'{t_thres_str} threshold exceeded', 0, 1)

# Close the dataset
ds_lndfcstana.close()
