In [None]:
from pyhdf.SD import SD, SDC
import numpy as np
import os
from mapper_functions import plot_global_tight_pcm, plot_NA_tight_pcm


In [None]:


def inspect_hdf4_data(filepath):
    """Inspect HDF4 data content and metadata"""
    
    # Open file
    hdf = SD(filepath, SDC.READ)
    
    # Get snow cover dataset
    snow_cover = hdf.select('Day_CMG_Snow_Cover')
    
    # Get metadata
    attrs = snow_cover.attributes()
    
    # Print information
    print("\nDataset Information:")
    print(f"Shape: {snow_cover.info()[2]}")
    print(f"Data type: {snow_cover.info()[3]}")
    
    print("\nAttributes:")
    for key, value in attrs.items():
        print(f"{key}: {value}")
    
    # Get data
    data = snow_cover.get()
    
    print("\nData Statistics:")
    print(f"Min value: {np.min(data)}")
    print(f"Max value: {np.max(data)}")
    print(f"Mean value: {np.mean(data)}")
    
    hdf.end()
    return data, attrs

# Usage
filepath = "/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_OLv8_M36/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/MYD10C1.A2005196.061.hdf"
snow_cover_data, metadata = inspect_hdf4_data(filepath)

In [None]:
def read_modis_scf_hdf(fname, lon_min, lon_max, lat_min, lat_max, clear_index=20, snow_spatial=2):
    """Read MODIS Snow Cover Fraction from HDF4 file."""
    
    # Constants
    CMG_N_lon = 7200
    CMG_N_lat = 3600
    CMG_ll_lon = -180.0
    CMG_ll_lat = -90.0
    CMG_ur_lon = 180.0
    CMG_ur_lat = 90.0
    CMG_dlon = 0.05
    CMG_dlat = 0.05
    
    # QC Parameters
    qc_snow_cover_max = 100
    qc_clear_index_min = clear_index
    qc_snow_spatial_max = snow_spatial
    
    # Calculate array indices for lat/lon bounds
    start_lon = int((lon_min - CMG_ll_lon)/CMG_dlon)
    start_lat = int((CMG_ur_lat - lat_max)/CMG_dlat)
    end_lon = int((lon_max - CMG_ll_lon)/CMG_dlon)
    end_lat = int((CMG_ur_lat - lat_min)/CMG_dlat)
    
    N_lon = end_lon - start_lon + 1
    N_lat = end_lat - start_lat + 1
    
    # Read HDF file
    hdf = SD(fname, SDC.READ)
    
    # Read datasets
    snow_cover = hdf.select('Day_CMG_Snow_Cover')[start_lat:end_lat+1, start_lon:end_lon+1]
    clear_index = hdf.select('Day_CMG_Clear_Index')[start_lat:end_lat+1, start_lon:end_lon+1]
    snow_spatial_qa = hdf.select('Snow_Spatial_QA')[start_lat:end_lat+1, start_lon:end_lon+1]
    
    # Generate lat/lon arrays
    lon_ind = np.arange(N_lon)
    lat_ind = np.arange(N_lat)
    
    lon_c = CMG_ll_lon + 0.5*CMG_dlon + (start_lon + lon_ind)*CMG_dlon
    lat_c = CMG_ur_lat - 0.5*CMG_dlat - (start_lat + lat_ind)*CMG_dlat
    
    # Apply QC and normalize SCF
    valid_mask = ((snow_cover <= qc_snow_cover_max) & 
                 (clear_index > qc_clear_index_min) & 
                 (snow_spatial_qa <= qc_snow_spatial_max))
    
    # Create output arrays
    lon_out = []
    lat_out = []
    scf_out = []
    
    for i in range(N_lon):
        for j in range(N_lat):
            if valid_mask[j,i]:
                scf = float(snow_cover[j,i])/float(clear_index[j,i])
                lon_out.append(lon_c[i])
                lat_out.append(lat_c[j])
                scf_out.append(scf)
    
    hdf.end()
    
    return np.array(lon_out), np.array(lat_out), np.array(scf_out)

# Usage over CONUS
lon_min = -125.0
lon_max = 66.0
lat_min = -20.0
lat_max = 50.0
fname = "/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_OLv8_M36/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/MYD10C1.A2005196.061.hdf"
lon_out, lat_out, scf_out = read_modis_scf_hdf(fname, lon_min, lon_max, lat_min, lat_max)
# Print the first 10 values
print("Longitude:", lon_out[:10])
print("Latitude:", lat_out[:10])
print("SCF:", scf_out[:10])

map_array = np.zeros((len(lon_out), 3))
# Fill in the map_array with the data
map_array[:, 1] = lon_out
map_array[:, 2] = lat_out
# Fill in the first column with the snow cover fraction
map_array[:, 0] = scf_out

# Plot the data
plot_global_tight_pcm(map_array, False, False, f'MYD10C1.A2005197.061.hdf:\n MODIS Snow Cover Fraction', 'scf', 0, 1)

In [None]:
from datetime import datetime, timedelta

def parse_modis_filename(filename):
    """Parse MODIS filename to get date"""
    # Extract date portion (assume fixed format)
    date_str = filename.split('.A')[1].split('.')[0]
    
    # Split into year and doy
    year = int(date_str[:4])
    doy_base = int(date_str[4:6])  # 19
    i = int(date_str[6:])          # 0-9
    
    # Combine base DOY and i
    doy = doy_base * 10 + i
    
    # Convert to datetime
    date = datetime(year, 1, 1) + timedelta(days=doy-1)
    
    return date

# Example usage
filename = "MOD10C1.A2005090.061.hdf"
date = parse_modis_filename(filename)
print(f"Date: {date.strftime('%Y-%m-%d')}")

In [None]:
# Usage over CONUS
lon_min = -125.0
lon_max = -66.0
lat_min = 24.0
lat_max = 50.0
path = "/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_OLv8_M36/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/"

for i in range(10):
    fname = f"MYD10C1.A200519{i}.061.hdf"
    path_fname = os.path.join(path, fname)
    date = parse_modis_filename(fname)

    # Read the data
    lon_out, lat_out, scf_out = read_modis_scf_hdf(path_fname, lon_min, lon_max, lat_min, lat_max)
    
    map_array = np.zeros((len(lon_out), 3))
    map_array[:, 1] = lon_out
    map_array[:, 2] = lat_out
    map_array[:, 0] = scf_out
    
    # Plot the data
   #  plot_NA_tight_pcm(map_array, True, False, f"{fname}:\n MODIS SCF {date.strftime('%Y-%m-%d')}", 'scf', 0, 1)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import xarray as xr
import re
from pathlib import Path

def load_ease_grid(ease_path):
    """Load EASE grid data"""
    lats = np.fromfile(f'{ease_path}/EASE2_M36km.lats.964x406x1.double', 
                      dtype=np.float64).reshape((406,964))
    lons = np.fromfile(f'{ease_path}/EASE2_M36km.lons.964x406x1.double', 
                      dtype=np.float64).reshape((406,964))
    return lats, lons

def load_grid(grid_type='ease', ease_path=None):
    """
    Load lat/lon grid for either 'ease' or 'modis'.

    Parameters
    ----------
    grid_type : str
        'ease' or 'modis'
    ease_path : str
        Path to EASE grid files (required if grid_type='ease')

    Returns
    -------
    lat_2d, lon_2d : np.ndarray
        2D arrays of latitude and longitude
    """
    if grid_type == 'ease':
        if ease_path is None:
            raise ValueError("ease_path must be provided for EASE grid")
        lats = np.fromfile(f'{ease_path}/EASE2_M36km.lats.964x406x1.double', 
                           dtype=np.float64).reshape((406, 964))
        lons = np.fromfile(f'{ease_path}/EASE2_M36km.lons.964x406x1.double', 
                           dtype=np.float64).reshape((406, 964))
        return lats, lons
    elif grid_type == 'modis':
        nlat, nlon = 3600, 7200
        res = 0.05
        lat_array = 90 - res * (np.arange(nlat) + 0.5)
        lon_array = -180 + res * (np.arange(nlon) + 0.5)
        lon_2d, lat_2d = np.meshgrid(lon_array, lat_array)
        return lat_2d, lon_2d
    else:
        raise ValueError(f"Unrecognized grid type: {grid_type}")

def create_grid_mapping(array, lat_2d, lon_2d):
    """Map array data (val, lon, lat) to a given lat/lon grid"""
    grid = np.full(lat_2d.shape, -9998., dtype=np.float64)
    for i in range(len(array)):
        val, lon, lat = array[i]
        if not np.isnan(val):
            row = np.abs(lat_2d[:, 0] - lat).argmin()
            col = np.abs(lon_2d[0, :] - lon).argmin()
            if row < grid.shape[0] and col < grid.shape[1]:
                grid[row, col] = val
    return grid

def create_grid_mapping_vectorized(array, lat_2d, lon_2d):
    """Fast mapping using vectorized nearest-neighbor indexing"""
    # Get 1D lat/lon axes
    lat_axis = lat_2d[:, 0]
    lon_axis = lon_2d[0, :]

    # Extract values
    vals = array[:, 0]
    lons = array[:, 1]
    lats = array[:, 2]

    # Filter out NaNs early
    valid_mask = ~np.isnan(vals)
    vals = vals[valid_mask]
    lons = lons[valid_mask]
    lats = lats[valid_mask]

    # Convert to row/col indices using searchsorted
    row_idx = np.searchsorted(lat_axis[::-1], lats, side='left')
    col_idx = np.searchsorted(lon_axis, lons, side='left')

    # Convert row index from reversed lat_axis to normal indexing
    row_idx = len(lat_axis) - row_idx - 1

    # Clip to bounds to prevent out-of-bounds indexing
    row_idx = np.clip(row_idx, 0, lat_2d.shape[0] - 1)
    col_idx = np.clip(col_idx, 0, lon_2d.shape[1] - 1)

    # Initialize output grid
    grid = np.full(lat_2d.shape, -9998., dtype=np.float64)

    # Assign values
    grid[row_idx, col_idx] = vals

    return grid    

def plot_region(array, lon_min, lon_max, lat_min, lat_max, 
                grid_type='ease',
                ease_path='../test_data',
                saveflag=False, 
                meanflag=False, 
                plot_title='regional_plot', 
                units='na', 
                cmin=None, 
                cmax=None, 
                cmap=None,
                output_dir='./plots',
                save_fmt='png',
                save_dpi=600,
                star_lon=None,
                star_lat=None):
    """
    Plot data for specified region using either EASE or MODIS grid.
    
    Parameters
    ----------
    array : np.ndarray
        Array of shape (n,3) with values, lons, lats
    grid_type : str
        Either 'ease' or 'modis'
    """
    
    lat_2d, lon_2d = load_grid(grid_type=grid_type, ease_path=ease_path)
    grid = create_grid_mapping_vectorized(array, lat_2d, lon_2d)
    
    # Handle counts/percentages
    if 'Number' in plot_title or 'Percent' in plot_title:
        grid[grid == -9998] = 0
    
    # Calculate statistics
    mean = np.nanmean(array[:, 0])
    std = np.nanstd(array[:, 0])
    textstr = format_stats(mean, std, units, plot_title)
    
    # Set up colormap
    if cmin is None or cmax is None:
        cmin, cmax, cmap = colorbar_info(array)
    if cmap is None:
        cmap = plt.get_cmap('RdBu_r' if cmin < 0 else 'viridis', 20).copy()
    else:
        cmap = plt.get_cmap(cmap)
    cmap.set_under('lightgrey')
    
    # Create plot
    fig = plt.figure(figsize=(10, 6))
    ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree(central_longitude=0))
    
    # Set region extent
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    
    # Add map features
    setup_map_features(ax)
    
    # Plot data
    sc = ax.pcolormesh(lon_2d, lat_2d, grid, 
                       transform=ccrs.PlateCarree(), 
                       cmap=cmap, 
                       vmin=cmin, 
                       vmax=cmax)
    
    # Add star if star_lon/lat provided
    if star_lon is not None and star_lat is not None:
        ax.plot(star_lon, star_lat, 'r*', markersize=10, transform=ccrs.PlateCarree())
    
    # Add colorbar and labels
    setup_colorbar(sc, ax, cmin, cmax, units)
    plt.title(plot_title, fontsize=18)
    
    if meanflag:
        ax.text(0.38, 0.05, textstr, fontsize=14, transform=ax.transAxes, ha='left')
    
    if saveflag:
        save_plot(plot_title)

    # Overlay EASE grid lines (only if grid_type is 'modis')
    if grid_type == 'modis':
        ease_lat, ease_lon = load_grid(grid_type='ease', ease_path=ease_path)

        # Plot EASE latitude lines
        for i in range(0, ease_lat.shape[0], 1):  # every 1 rows
            ax.plot(ease_lon[i, :], ease_lat[i, :], color='grey', linewidth=0.5, alpha=0.6,
                    transform=ccrs.PlateCarree())

        # Plot EASE longitude lines
        for j in range(0, ease_lon.shape[1], 1):  # every 1 columns
            ax.plot(ease_lon[:, j], ease_lat[:, j], color='grey', linewidth=0.5, alpha=0.6,
                    transform=ccrs.PlateCarree())    
    
    plt.show()
    
    return grid

def plot_region_scatter(array, lon_min, lon_max, lat_min, lat_max, 
                grid_type='ease',
                ease_path='../test_data',
                saveflag=False, 
                meanflag=False, 
                plot_title='regional_plot', 
                units='na', 
                cmin=None, 
                cmax=None, 
                cmap=None,
                output_dir='./plots',
                save_fmt='png',
                save_dpi=600,
                star_lon=None,
                star_lat=None,
                point_size=6):
    """
    Plot data for specified region using either EASE or MODIS grid.
    
    Parameters
    ----------
    array : np.ndarray
        Array of shape (n,3) with values, lons, lats
    grid_type : str
        Either 'ease' or 'modis'
    """  
    
    # Calculate statistics
    mean = np.nanmean(array[:, 0])
    std = np.nanstd(array[:, 0])
    textstr = format_stats(mean, std, units, plot_title)

    # Extract values
    lons = array[:, 1]
    lats = array[:, 2]
    vals = array[:, 0]
    
    # Set up colormap
    if cmin is None or cmax is None:
        cmin, cmax, cmap = colorbar_info(array)
    if cmap is None:
        cmap = plt.get_cmap('RdBu_r' if cmin < 0 else 'viridis', 20).copy()
    else:
        cmap = plt.get_cmap(cmap)
    cmap.set_under('lightgrey')
    
    # Create plot
    fig = plt.figure(figsize=(15, 9))
    ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree(central_longitude=0))
    
    # Set region extent
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    
    # Add map features
    setup_map_features(ax)
    
    # Plot data
    sc = ax.scatter(lons, lats, c=vals, s=point_size, cmap=cmap, 
                    vmin=cmin, vmax=cmax, edgecolor='none',
                    transform=ccrs.PlateCarree())
    
    # Add star if star_lon/lat provided
    if star_lon is not None and star_lat is not None:
        ax.plot(star_lon, star_lat, 'r*', markersize=10, transform=ccrs.PlateCarree())
    
    # Add colorbar and labels
    setup_colorbar(sc, ax, cmin, cmax, units)
    plt.title(plot_title, fontsize=18)
    
    if meanflag:
        ax.text(0.38, 0.05, textstr, fontsize=14, transform=ax.transAxes, ha='left')
    
    if saveflag:
        save_plot(plot_title)  

    # Overlay EASE grid lines (only if grid_type is 'modis')
    if grid_type == 'modis':
        ease_lat, ease_lon = load_grid(grid_type='ease', ease_path=ease_path)

        # Plot EASE latitude lines
        for i in range(0, ease_lat.shape[0], 1):  # every 1 rows
            ax.plot(ease_lon[i, :], ease_lat[i, :], color='grey', linewidth=0.5, alpha=0.6,
                    transform=ccrs.PlateCarree())

        # Plot EASE longitude lines
        for j in range(0, ease_lon.shape[1], 1):  # every 1 columns
            ax.plot(ease_lon[:, j], ease_lat[:, j], color='grey', linewidth=0.5, alpha=0.6,
                    transform=ccrs.PlateCarree())  
    
    plt.show()

def format_stats(mean, std, units, plot_title):
    """Format statistics string"""
    if 'Relative $\Delta$ StdDev' in plot_title:
        return f'Mean = {mean:.1f}±{std:.1f} {units}'
    
    def format_number(num):
        if abs(num) < 0.01:
            return f'{num:.4f}'
        elif abs(num) < 1.0:
            return f'{num:.2f}'
        return f'{num:.3g}'
    
    return f'Mean = {format_number(mean)}±{format_number(std)} {units}'

def setup_map_features(ax):
    """Set up map features and gridlines"""
    #gl = ax.gridlines(crs=ccrs.PlateCarree(central_longitude=0), 
    #                 draw_labels=True,
    #                 linewidth=1, 
    #                 color='gray', 
    #                 alpha=0.5, 
    #                 linestyle='-')
    #gl.xlabel_style = {'size': 5, 'color': 'black'}
    #gl.ylabel_style = {'size': 5, 'color': 'black'}
    ax.tick_params(labelbottom=False, labeltop=False, 
                  labelleft=False, labelright=False)
    ax.add_feature(cfeature.LAND, facecolor='lightgrey')
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS)

def setup_colorbar(sc, ax, cmin, cmax, units):
    """Set up colorbar"""
    cbar = plt.colorbar(sc, ax=ax, orientation="horizontal", 
                       pad=.05, fraction=0.04)
    cbar.set_ticks(np.arange(cmin, cmax+0.000000001, (cmax-cmin)/4))
    cbar.ax.tick_params(labelsize=10)
    cbar.set_label(f'({units})', fontsize=12)
 

def save_plot(plot_title, output_dir='./plots', fmt='png', dpi=600):
    """
    Save plot to specified directory
    
    Parameters
    ----------
    plot_title : str
        Title of plot used for filename
    output_dir : str or Path
        Directory to save plots
    fmt : str
        File format (png, pdf, jpg)
    dpi : int
        Resolution for raster formats
    """
    # Create output directory if it doesn't exist
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    
    # Clean filename
    clean_title = re.sub('[^0-9a-zA-Z]+', '_', plot_title)
    
    # Construct full save path
    savename = output_path / f"{clean_title}.{fmt}"
    print(f"Saving figure as {savename}")
    
    # Save with specified parameters
    plt.savefig(savename, dpi=dpi, bbox_inches='tight', format=fmt)

def colorbar_info(array):

    # Compute and print some stats for the data
    # -----------------------------------------
    stdev = np.nanstd(array[:,0])  # Standard deviation
    omean = np.nanmean(array[:, 0]) # Mean of the data
    datmi = np.nanmin(array[:, 0])  # Min of the data
    datma = np.nanmax(array[:, 0])  # Max of the data
    abmm = np.nanmax(np.abs(array[:, 0])) # Abs max of the data

    # Min max for colorbar
    # --------------------
    if np.nanmin(array[:, 0]) < 0:
        cmax = abmm
        cmin = abmm * -1
        cmap = 'RdBu'
    else:
        cmax = datma
        cmin = datmi
        cmap = 'viridis'

    return cmin, cmax, cmap    



In [None]:
# Usage over CONUS
lon_min = -125.0
lon_max = -66.0
lat_min = 24.0
lat_max = 50.0
path = "/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_OLv8_M36/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/"

scf_dict = {}

for i in range(182, 213):
    fname = f"MYD10C1.A2005{i:03d}.061.hdf"
    path_fname = os.path.join(path, fname)
    date = parse_modis_filename(fname)
    
    # Read the data
    lon_out, lat_out, scf_out = read_modis_scf_hdf(path_fname, lon_min, lon_max, lat_min, lat_max)

    # Update dictionary with higher values
    for lon, lat, scf in zip(lon_out, lat_out, scf_out):
        key = (lat, lon)
        if key not in scf_dict or scf > scf_dict[key]:
            scf_dict[key] = scf
    
    # map_array = np.zeros((len(lon_out), 3))
    # map_array[:, 1] = lon_out
    # map_array[:, 2] = lat_out
    # map_array[:, 0] = scf_out
    
    # # Plot the data
    # plot_region(map_array, 
    #        lon_min, lon_max,
    #        lat_min, lat_max,
    #        grid_type='modis',
    #        meanflag=False,
    #        saveflag=False,
    #        units='SCF',
    #        plot_title=f"{fname} over CONUS:\n MODIS SCF {date.strftime('%Y-%m-%d')}")
    
    # # Convert final dictionary to arrays
    # lats, lons = zip(*scf_dict.keys())
    # scfs = list(scf_dict.values())
    # scf_array = np.array([scfs, lons, lats]).T

    # # Plot the final dictionary
    # plot_region(scf_array, 
    #         lon_min, lon_max,
    #         lat_min, lat_max,
    #         grid_type='modis',
    #         meanflag=True,
    #         saveflag=False,
    #         units='SCF',
    #         plot_title=f"Max MODIS SCF over CONUS:\n MODIS SCF {date.strftime('%Y-%m')}")
    

# Convert final dictionary to arrays
lats, lons = zip(*scf_dict.keys())
scfs = list(scf_dict.values())
scf_array = np.array([scfs, lons, lats]).T

# Plot the final dictionary
plot_region_scatter(scf_array, 
        lon_min, lon_max,
        lat_min, lat_max,
        grid_type='modis',
        meanflag=True,
        saveflag=False,
        units='SCF',
        plot_title=f"Max MYD10C1 SCF over CONUS:\n MODIS SCF {date.strftime('%B %Y')}",
        point_size=2)

In [None]:
# Usage over Greenbelt
# lon_value = -76.87
# lat_value = 39.0

lon_min = -77.87
lon_max = -75.87
lat_min = 38.0
lat_max = 40.0
path = "/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_OLv8_M36/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/"

scf_dict = {}
count_dict = {}
gt09_dict = {}


for i in range(182, 213):
    fname = f"MOD10C1.A2005{i:03d}.061.hdf"
    path_fname = os.path.join(path, fname)
    date = parse_modis_filename(fname)
    
    # Read the data
    lon_out, lat_out, scf_out = read_modis_scf_hdf(path_fname, lon_min, lon_max, lat_min, lat_max)

    # Update dictionary with higher values
    for lon, lat, scf in zip(lon_out, lat_out, scf_out):
        key = (lat, lon)

        # Track max SCF per pixel
        if key not in scf_dict or scf > scf_dict[key]:
            scf_dict[key] = scf

        # Count all observations
        count_dict[key] = count_dict.get(key, 0) + 1   

        # Count SCF > 0.9
        if scf > 0.9:
            gt09_dict[key] = gt09_dict.get(key, 0) + 1

    map_array = np.zeros((len(lon_out), 3))
    map_array[:, 1] = lon_out
    map_array[:, 2] = lat_out
    map_array[:, 0] = scf_out

    plot_region_scatter(map_array, 
        lon_min, lon_max,
        lat_min, lat_max,
        grid_type='modis',
        meanflag=False,
        saveflag=False,
        units='SCF',
        plot_title=f"MOD10C1 SCF over Mid-Atlantic: {date.strftime('%Y-%m-%d')}",
        star_lon=-76.87,
        star_lat=39.0,
        point_size=50,
        cmin=0,
        cmax=1)  
    
# Extract to arrays
lats, lons, scfs, counts, gt09s = [], [], [], [], []
for (lat, lon), scf in scf_dict.items():
    lats.append(lat)
    lons.append(lon)
    scfs.append(scf)
    counts.append(count_dict[(lat, lon)])
    gt09s.append(gt09_dict.get((lat, lon), 0))

scf_array = np.array([scfs, lons, lats]).T
count_array = np.array([counts, lons, lats]).T
gt09_array = np.array([gt09s, lons, lats]).T    


In [None]:

# Plot the final dictionary
plot_region_scatter(scf_array, 
        lon_min, lon_max,
        lat_min, lat_max,
        grid_type='modis',
        meanflag=False,
        saveflag=False,
        units='SCF',
        plot_title=f"Max Monthly MOD10C1 SCF over Mid-Atlantic:\n {date.strftime('%Y-%m')}",
        star_lon=-76.87,
        star_lat=39.0,
        point_size=50,
        cmin=0,
        cmax=1,
        cmap='Blues')    

# Plot the final dictionary
plot_region_scatter(scf_array, 
        lon_min, lon_max,
        lat_min, lat_max,
        grid_type='modis',
        meanflag=False,
        saveflag=False,
        units='SCF',
        plot_title=f"Max MOD10C1 over Mid-Atlantic:\n {date.strftime('%B %Y')}",
        star_lon=-76.87,
        star_lat=39.0,
        point_size=80,
        cmin=0,
        cmax=1)    


# Plot the final dictionary
plot_region_scatter(count_array, 
        lon_min, lon_max,
        lat_min, lat_max,
        grid_type='modis',
        meanflag=False,
        saveflag=False,
        units='Number',
        plot_title=f"Number of MOD10C1 over Mid-Atlantic:\n {date.strftime('%B %Y')}",
        star_lon=-76.87,
        star_lat=39.0,
        point_size=80,
        cmin=0,
        cmax=28)  

# Plot the final dictionary
plot_region_scatter(gt09_array, 
        lon_min, lon_max,
        lat_min, lat_max,
        grid_type='modis',
        meanflag=False,
        saveflag=False,
        units='Number',
        plot_title=f"Number of MOD10C1 > 0.9 over Mid-Atlantic:\n {date.strftime('%B %Y')}",
        star_lon=-76.87,
        star_lat=39.0,
        point_size=80,
        cmin=0,
        cmax=2) 

In [None]:
# Usage over CONUS
lon_min = -95.0
lon_max = -75.0
lat_min = 29.0
lat_max = 41.0
path = "/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_OLv8_M36/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/"

scf_dict = {}
count_dict = {}
gt09_dict = {}


for i in range(182, 213):
    fname = f"MOD10C1.A2005{i:03d}.061.hdf"
    path_fname = os.path.join(path, fname)
    date = parse_modis_filename(fname)
    
    # Read the data
    lon_out, lat_out, scf_out = read_modis_scf_hdf(path_fname, lon_min, lon_max, lat_min, lat_max)

    # Update dictionary with higher values
    for lon, lat, scf in zip(lon_out, lat_out, scf_out):
        key = (lat, lon)

        # Track max SCF per pixel
        if key not in scf_dict or scf > scf_dict[key]:
            scf_dict[key] = scf

        # Count all observations
        count_dict[key] = count_dict.get(key, 0) + 1   

        # Count SCF > 0.9
        if scf > 0.9:
            gt09_dict[key] = gt09_dict.get(key, 0) + 1

    map_array = np.zeros((len(lon_out), 3))
    map_array[:, 1] = lon_out
    map_array[:, 2] = lat_out
    map_array[:, 0] = scf_out

    plot_region_scatter(map_array, 
        lon_min, lon_max,
        lat_min, lat_max,
        grid_type='modis',
        meanflag=False,
        saveflag=False,
        units='SCF',
        plot_title=f"MOD10C1 SCF over Southeastern US: {date.strftime('%Y-%m-%d')}",
        point_size=6,
        cmin=0,
        cmax=1)  
    
# Extract to arrays
lats, lons, scfs, counts, gt09s = [], [], [], [], []
for (lat, lon), scf in scf_dict.items():
    lats.append(lat)
    lons.append(lon)
    scfs.append(scf)
    counts.append(count_dict[(lat, lon)])
    gt09s.append(gt09_dict.get((lat, lon), 0))

scf_array = np.array([scfs, lons, lats]).T
count_array = np.array([counts, lons, lats]).T
gt09_array = np.array([gt09s, lons, lats]).T   


In [None]:
# Plot the final dictionary
plot_region_scatter(scf_array, 
        lon_min, lon_max,
        lat_min, lat_max,
        grid_type='modis',
        meanflag=False,
        saveflag=False,
        units='SCF',
        plot_title=f"Max MOD10C1 over Southeastern US:\n {date.strftime('%B %Y')}",
        point_size=6,
        cmin=0,
        cmax=1)    


# Plot the final dictionary
plot_region_scatter(count_array, 
        lon_min, lon_max,
        lat_min, lat_max,
        grid_type='modis',
        meanflag=False,
        saveflag=False,
        units='Number',
        plot_title=f"Number of MOD10C1 over Southeastern US:\n {date.strftime('%B %Y')}",
        point_size=6,
        cmin=0,
        cmax=28)  

# Plot the final dictionary
plot_region_scatter(gt09_array, 
        lon_min, lon_max,
        lat_min, lat_max,
        grid_type='modis',
        meanflag=False,
        saveflag=False,
        units='Number',
        plot_title=f"Number of MOD10C1 > 0.9 over Southeastern US:\n {date.strftime('%B %Y')}",
        point_size=6,
        cmin=0,
        cmax=2) 

In [None]:
# Usage over CONUS
lon_min = -95.0
lon_max = -75.0
lat_min = 29.0
lat_max = 41.0
path = "/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_OLv8_M36/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/"

# Initialize storage for each clear_idx iteration
results = {}

for clear_idx in [10, 20, 50, 90]:

    scf_dict = {}
    count_dict = {}
    gt09_dict = {}


    for i in range(182, 213):
        fname = f"MOD10C1.A2005{i:03d}.061.hdf"
        path_fname = os.path.join(path, fname)
        date = parse_modis_filename(fname)
        
        # Read the data
        lon_out, lat_out, scf_out = read_modis_scf_hdf(path_fname, lon_min, lon_max, lat_min, lat_max, clear_index=clear_idx)

        # Update dictionary with higher values
        for lon, lat, scf in zip(lon_out, lat_out, scf_out):
            key = (lat, lon)

            # Track max SCF per pixel
            if key not in scf_dict or scf > scf_dict[key]:
                scf_dict[key] = scf

            # Count all observations
            count_dict[key] = count_dict.get(key, 0) + 1   

            # Count SCF > 0.9
            if scf > 0.9:
                gt09_dict[key] = gt09_dict.get(key, 0) + 1
        
    # Extract to arrays
    lats, lons, scfs, counts, gt09s = [], [], [], [], []
    for (lat, lon), scf in scf_dict.items():
        lats.append(lat)
        lons.append(lon)
        scfs.append(scf)
        counts.append(count_dict[(lat, lon)])
        gt09s.append(gt09_dict.get((lat, lon), 0))

    scf_array = np.array([scfs, lons, lats]).T
    count_array = np.array([counts, lons, lats]).T
    gt09_array = np.array([gt09s, lons, lats]).T 
   
    # Plot the final dictionary
    plot_region_scatter(scf_array, 
            lon_min, lon_max,
            lat_min, lat_max,
            grid_type='modis',
            meanflag=False,
            saveflag=False,
            units='SCF',
            plot_title=f"Max MOD10C1 over Southeastern US: {date.strftime('%B %Y')}\n  QC_Clear_Index_Min: {clear_idx}",
            point_size=6,
            cmin=0,
            cmax=1)    

    # Plot the final dictionary
    plot_region_scatter(count_array, 
            lon_min, lon_max,
            lat_min, lat_max,
            grid_type='modis',
            meanflag=False,
            saveflag=False,
            units='Number',
            plot_title=f"Number of MOD10C1 over Southeastern US: {date.strftime('%B %Y')}\n  QC_Clear_Index_Min: {clear_idx}",
            point_size=6,
            cmin=0,
            cmax=28)  

    # Plot the final dictionary
    plot_region_scatter(gt09_array, 
            lon_min, lon_max,
            lat_min, lat_max,
            grid_type='modis',
            meanflag=False,
            saveflag=False,
            units='Number',
            plot_title=f"Number of MOD10C1 > 0.9 over Southeastern US: {date.strftime('%B %Y')}\n  QC_Clear_Index_Min: {clear_idx}",
            point_size=6,
            cmin=0,
            cmax=2) 

    # Store arrays for this clear_idx in dictionary
    results[clear_idx] = {
        'lons': np.array(lons),
        'lats': np.array(lats),
        'scfs': np.array(scfs),
        'counts': np.array(counts)
    }

# Now results[clear_idx] contains all arrays for that threshold
# Example access: results[20]['scfs'] gets SCF values for clear_idx=20    

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Usage over CONUS
lon_min = -95.0
lon_max = -75.0
lat_min = 29.0
lat_max = 41.0
path = "/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_OLv8_M36/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/"

# Initialize storage for each clear_idx iteration
results = {}

for clear_idx in range(0, 96, 5):

    scf_dict = {}     # highest SCF per pixel
    count_dict = {}   # total observations per pixel
    gt09_dict = {}    # count of SCF > 0.9 per pixel

    for i in range(182, 213):  
        fname = f"MOD10C1.A2005{i:03d}.061.hdf" 
        path_fname = os.path.join(path, fname)
        date = parse_modis_filename(fname)
        
        # Read the data
        lon_out, lat_out, scf_out = read_modis_scf_hdf(path_fname, lon_min, lon_max, lat_min, lat_max, clear_index=clear_idx)

        # Update dictionary with higher values
        for lon, lat, scf in zip(lon_out, lat_out, scf_out):
            key = (lat, lon)

            # Track max SCF per pixel
            if key not in scf_dict or scf > scf_dict[key]:
                scf_dict[key] = scf

            # Count all observations
            count_dict[key] = count_dict.get(key, 0) + 1   

            # Count SCF > 0.9
            if scf > 0.9:
                gt09_dict[key] = gt09_dict.get(key, 0) + 1

    # Extract to arrays
    lats, lons, scfs, counts, gt09s = [], [], [], [], []
    for (lat, lon), scf in scf_dict.items():
        lats.append(lat)
        lons.append(lon)
        scfs.append(scf)
        counts.append(count_dict[(lat, lon)])
        gt09s.append(gt09_dict.get((lat, lon), 0))

    # Store arrays for this clear_idx in dictionary
    results[clear_idx] = {
        'lons': np.array(lons),
        'lats': np.array(lats),
        'scfs': np.array(scfs),
        'counts': np.array(counts),
        'gt09': np.array(gt09s)
    }


In [None]:
# Usage over CONUS - repeat with different spatial qc
lon_min = -95.0
lon_max = -75.0
lat_min = 29.0
lat_max = 41.0
path = "/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_OLv8_M36/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/"

# Initialize storage for each clear_idx iteration
results_spatial_qc = {}

for clear_idx in range(0, 96, 5):

    scf_dict = {}     # highest SCF per pixel
    count_dict = {}   # total observations per pixel
    gt09_dict = {}    # count of SCF > 0.9 per pixel

    for i in range(182, 213):  
        fname = f"MOD10C1.A2005{i:03d}.061.hdf" 
        path_fname = os.path.join(path, fname)
        date = parse_modis_filename(fname)
        
        # Read the data
        lon_out, lat_out, scf_out = read_modis_scf_hdf(path_fname, lon_min, lon_max, lat_min, lat_max, 
                                                       clear_index=clear_idx, snow_spatial=1)

        # Update dictionary with higher values
        for lon, lat, scf in zip(lon_out, lat_out, scf_out):
            key = (lat, lon)

            # Track max SCF per pixel
            if key not in scf_dict or scf > scf_dict[key]:
                scf_dict[key] = scf

            # Count all observations
            count_dict[key] = count_dict.get(key, 0) + 1   

            # Count SCF > 0.9
            if scf > 0.9:
                gt09_dict[key] = gt09_dict.get(key, 0) + 1

    # Extract to arrays
    lats, lons, scfs, counts, gt09s = [], [], [], [], []
    for (lat, lon), scf in scf_dict.items():
        lats.append(lat)
        lons.append(lon)
        scfs.append(scf)
        counts.append(count_dict[(lat, lon)])
        gt09s.append(gt09_dict.get((lat, lon), 0))

    # Store arrays for this clear_idx in dictionary
    results_spatial_qc[clear_idx] = {
        'lons': np.array(lons),
        'lats': np.array(lats),
        'scfs': np.array(scfs),
        'counts': np.array(counts),
        'gt09': np.array(gt09s)
    }        

In [None]:

# Calculate metrics for each clear_idx
clear_idx_values = sorted(results.keys())
total_obs = []
total_gt09 = []

for idx in clear_idx_values:
    total_obs.append(np.sum(results[idx]['counts']))
    total_gt09.append(np.sum(results[idx]['gt09']))

# Create figure with two y-axes
fig, ax1 = plt.subplots(figsize=(10,6))
ax2 = ax1.twinx()

# Plot data
line1 = ax1.plot(clear_idx_values, total_gt09, 'b-o', label='Total SCF > 0.9')
line2 = ax2.plot(clear_idx_values, total_obs, 'r-s', label='Total Observations')

# Customize axes
ax1.set_xlabel('Clear Index Threshold')
ax1.set_ylabel('Number of SCF > 0.9', color='b')
ax2.set_ylabel('Total Observations', color='r')

# Add grid aligned with the first y-axis
ax1.grid(visible=True, which='both', linestyle='--', linewidth=0.5, alpha=0.7)

# Add legend
lines = line1 + line2
labels = [l.get_label() for l in lines]
ax1.legend(lines, labels, loc='upper right')

plt.title(f'SCF > 0.9 Counts and Total Observations vs Clear Index Threshold \n {date.strftime("%B %Y")} over the Southestern US')
plt.show()

In [None]:
# Calculate metrics for each clear_idx
clear_idx_values = sorted(results.keys())
total_obs = []
total_gt09 = []

for idx in clear_idx_values:
    total_obs.append(np.sum(results[idx]['counts']))
    total_gt09.append(np.sum(results[idx]['gt09']))

# Normalize values
total_gt09_normalized = [value / max(total_gt09) * 100 for value in total_gt09]
total_obs_normalized = [value / max(total_obs) * 100 for value in total_obs]

# Create figure with two y-axes
fig, ax1 = plt.subplots(figsize=(10, 6))
ax2 = ax1.twinx()

# Plot normalized data
line1 = ax1.plot(clear_idx_values, total_gt09_normalized, 'b-o', label='Total SCF > 0.9 (Normalized)')
line2 = ax2.plot(clear_idx_values, total_obs_normalized, 'r-s', label='Total Observations (Normalized)')

# Customize axes
ax1.set_xlabel('Clear Index Threshold')
ax1.set_ylabel('SCF > 0.9 (% of Max)', color='b')
ax2.set_ylabel('Total Observations (% of Max)', color='r')

# Add grid aligned with the first y-axis
ax1.grid(visible=True, which='both', linestyle='--', linewidth=0.5, alpha=0.7)

# Add legend
lines = line1 + line2
labels = [l.get_label() for l in lines]
ax1.legend(lines, labels, loc='upper right')

plt.title(f'Normalized SCF > 0.9 Counts and Total Observations vs Clear Index Threshold \n {date.strftime("%B %Y")} over the Southeastern US')
plt.show()

In [None]:
# Usage over Tibet
lon_min = 75.0 #70.0
lon_max = 95.0 #105.0
lat_min = 27.0
lat_max = 39.0
path = "/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_OLv8_M36/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/"

start_doy = 182 # 60
end_doy = 213 # 90

# Initialize storage for each clear_idx iteration
results = {}

for clear_idx in range(0, 96, 5):

    print(f"Processing clear index: {clear_idx}")

    scf_dict = {}     # highest SCF per pixel
    count_dict = {}   # total observations per pixel
    gt09_dict = {}    # count of SCF > 0.9 per pixel

    for i in range(start_doy, end_doy):  
        fname = f"MOD10C1.A2005{i:03d}.061.hdf" 
        path_fname = os.path.join(path, fname)
        date = parse_modis_filename(fname)
        
        # Read the data
        lon_out, lat_out, scf_out = read_modis_scf_hdf(path_fname, lon_min, lon_max, lat_min, lat_max, clear_index=clear_idx)

        # Update dictionary with higher values
        for lon, lat, scf in zip(lon_out, lat_out, scf_out):
            key = (lat, lon)

            # Track max SCF per pixel
            if key not in scf_dict or scf > scf_dict[key]:
                scf_dict[key] = scf

            # Count all observations
            count_dict[key] = count_dict.get(key, 0) + 1   

            # Count SCF > 0.9
            if scf > 0.9:
                gt09_dict[key] = gt09_dict.get(key, 0) + 1

    # Extract to arrays
    lats, lons, scfs, counts, gt09s = [], [], [], [], []
    for (lat, lon), scf in scf_dict.items():
        lats.append(lat)
        lons.append(lon)
        scfs.append(scf)
        counts.append(count_dict[(lat, lon)])
        gt09s.append(gt09_dict.get((lat, lon), 0))

    # Store arrays for this clear_idx in dictionary
    results[clear_idx] = {
        'lons': np.array(lons),
        'lats': np.array(lats),
        'scfs': np.array(scfs),
        'counts': np.array(counts),
        'gt09': np.array(gt09s)
    }

In [None]:
clear_idx = 20
gt09_array = np.array([results[clear_idx]['gt09'], results[clear_idx]['lons'], results[clear_idx]['lats']]).T 


# Plot the final dictionary
plot_region_scatter(gt09_array, 
        lon_min, lon_max,
        lat_min, lat_max,
        grid_type='modis',
        meanflag=False,
        saveflag=False,
        units='Number',
        plot_title=f"Number of MOD10C1 > 0.9 over Tibetan Plateau: {date.strftime('%B %Y')}\n  QC_Clear_Index_Min: {clear_idx}",
        point_size=6,
        cmin=0,
        cmax=8) 


# Calculate metrics for each clear_idx
clear_idx_values = sorted(results.keys())
total_obs = []
total_gt09 = []

for idx in clear_idx_values:
    total_obs.append(np.sum(results[idx]['counts']))
    total_gt09.append(np.sum(results[idx]['gt09']))

# Normalize values
total_gt09_normalized = [value / max(total_gt09) * 100 for value in total_gt09]
total_obs_normalized = [value / max(total_obs) * 100 for value in total_obs]

# Create figure with two y-axes
fig, ax1 = plt.subplots(figsize=(10,6))
ax2 = ax1.twinx()

# Plot data
line1 = ax1.plot(clear_idx_values, total_gt09, 'b-o', label='Total SCF > 0.9')
line2 = ax2.plot(clear_idx_values, total_obs, 'r-s', label='Total Observations')

# Customize axes
ax1.set_xlabel('Clear Index Threshold')
ax1.set_ylabel('Number of SCF > 0.9', color='b')
ax2.set_ylabel('Total Observations', color='r')

# Add grid aligned with the first y-axis
ax1.grid(visible=True, which='both', linestyle='--', linewidth=0.5, alpha=0.7)

# Add legend
lines = line1 + line2
labels = [l.get_label() for l in lines]
ax1.legend(lines, labels, loc='upper right')

plt.title(f'SCF > 0.9 Counts and Total Observations vs Clear Index Threshold \n {date.strftime("%B %Y")} over the Tibetan Plateau')
plt.show()

# Create figure with two y-axes
fig, ax1 = plt.subplots(figsize=(10, 6))
ax2 = ax1.twinx()

# Plot normalized data
line1 = ax1.plot(clear_idx_values, total_gt09_normalized, 'b-o', label='Total SCF > 0.9 (Normalized)')
line2 = ax2.plot(clear_idx_values, total_obs_normalized, 'r-s', label='Total Observations (Normalized)')

# Customize axes
ax1.set_xlabel('Clear Index Threshold')
ax1.set_ylabel('Number of SCF > 0.9 (% of Max)', color='b')
ax2.set_ylabel('Total Observations (% of Max)', color='r')

# Add grid aligned with the first y-axis
ax1.grid(visible=True, which='both', linestyle='--', linewidth=0.5, alpha=0.7)

# Add legend
lines = line1 + line2
labels = [l.get_label() for l in lines]
ax1.legend(lines, labels, loc='upper right')

plt.title(f'Normalized SCF > 0.9 Counts and Total Observations vs Clear Index Threshold \n {date.strftime("%B %Y")} over the Tibetan Plateau')
plt.show()

In [None]:
# Usage over western Europe
lon_min = -5.0
lon_max = 15.0
lat_min = 43.0
lat_max = 55.0

start_doy = 182 # 60
end_doy = 213 # 90

path = "/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_OLv8_M36/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/"

# Initialize storage for each clear_idx iteration
results = {}

for clear_idx in range(0, 96, 5):

    print(f"Processing clear index: {clear_idx}")

    scf_dict = {}     # highest SCF per pixel
    count_dict = {}   # total observations per pixel
    gt09_dict = {}    # count of SCF > 0.9 per pixel

    for i in range(start_doy, end_doy):  
        fname = f"MOD10C1.A2005{i:03d}.061.hdf" 
        path_fname = os.path.join(path, fname)
        date = parse_modis_filename(fname)
        
        # Read the data
        lon_out, lat_out, scf_out = read_modis_scf_hdf(path_fname, lon_min, lon_max, lat_min, lat_max, clear_index=clear_idx)

        # Update dictionary with higher values
        for lon, lat, scf in zip(lon_out, lat_out, scf_out):
            key = (lat, lon)

            # Track max SCF per pixel
            if key not in scf_dict or scf > scf_dict[key]:
                scf_dict[key] = scf

            # Count all observations
            count_dict[key] = count_dict.get(key, 0) + 1   

            # Count SCF > 0.9
            if scf > 0.9:
                gt09_dict[key] = gt09_dict.get(key, 0) + 1

    # Extract to arrays
    lats, lons, scfs, counts, gt09s = [], [], [], [], []
    for (lat, lon), scf in scf_dict.items():
        lats.append(lat)
        lons.append(lon)
        scfs.append(scf)
        counts.append(count_dict[(lat, lon)])
        gt09s.append(gt09_dict.get((lat, lon), 0))

    # Store arrays for this clear_idx in dictionary
    results[clear_idx] = {
        'lons': np.array(lons),
        'lats': np.array(lats),
        'scfs': np.array(scfs),
        'counts': np.array(counts),
        'gt09': np.array(gt09s)
    }

In [None]:
clear_idx = 20
gt09_array = np.array([results[clear_idx]['gt09'], results[clear_idx]['lons'], results[clear_idx]['lats']]).T 


# Plot the final dictionary
plot_region_scatter(gt09_array, 
        lon_min, lon_max,
        lat_min, lat_max,
        grid_type='modis',
        meanflag=False,
        saveflag=False,
        units='Number',
        plot_title=f"Number of MOD10C1 > 0.9 over Western Europe: {date.strftime('%B %Y')}\n  QC_Clear_Index_Min: {clear_idx}",
        point_size=6,
        cmin=0,
        cmax=2) 


# Calculate metrics for each clear_idx
clear_idx_values = sorted(results.keys())
total_obs = []
total_gt09 = []

for idx in clear_idx_values:
    total_obs.append(np.sum(results[idx]['counts']))
    total_gt09.append(np.sum(results[idx]['gt09']))

# Normalize values
total_gt09_normalized = [value / max(total_gt09) * 100 for value in total_gt09]
total_obs_normalized = [value / max(total_obs) * 100 for value in total_obs]

# Create figure with two y-axes
fig, ax1 = plt.subplots(figsize=(10,6))
ax2 = ax1.twinx()

# Plot data
line1 = ax1.plot(clear_idx_values, total_gt09, 'b-o', label='Total SCF > 0.9')
line2 = ax2.plot(clear_idx_values, total_obs, 'r-s', label='Total Observations')

# Customize axes
ax1.set_xlabel('Clear Index Threshold')
ax1.set_ylabel('Number of SCF > 0.9', color='b')
ax2.set_ylabel('Total Observations', color='r')

# Add grid aligned with the first y-axis
ax1.grid(visible=True, which='both', linestyle='--', linewidth=0.5, alpha=0.7)

# Add legend
lines = line1 + line2
labels = [l.get_label() for l in lines]
ax1.legend(lines, labels, loc='upper right')

plt.title(f'SCF > 0.9 Counts and Total Observations vs Clear Index Threshold \n {date.strftime("%B %Y")} over the Western Europe')
plt.show()

# Create figure with two y-axes
fig, ax1 = plt.subplots(figsize=(10, 6))
ax2 = ax1.twinx()

# Plot normalized data
line1 = ax1.plot(clear_idx_values, total_gt09_normalized, 'b-o', label='Total SCF > 0.9 (Normalized)')
line2 = ax2.plot(clear_idx_values, total_obs_normalized, 'r-s', label='Total Observations (Normalized)')

# Customize axes
ax1.set_xlabel('Clear Index Threshold')
ax1.set_ylabel('Number of SCF > 0.9 (% of Max)', color='b')
ax2.set_ylabel('Total Observations (% of Max)', color='r')

# Add grid aligned with the first y-axis
ax1.grid(visible=True, which='both', linestyle='--', linewidth=0.5, alpha=0.7)

# Add legend
lines = line1 + line2
labels = [l.get_label() for l in lines]
ax1.legend(lines, labels, loc='upper right')

plt.title(f'Normalized SCF > 0.9 Counts and Total Observations vs Clear Index Threshold \n {date.strftime("%B %Y")} over the Western Europe')
plt.show()

In [None]:
# Usage over Southeastern US
lon_min = -95.0
lon_max = -75.0
lat_min = 29.0
lat_max = 41.0

start_doy = 182 # 60
end_doy = 213 # 90
 
path = "/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_OLv8_M36/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/"

# Initialize storage for each clear_idx iteration
results = {}

for clear_idx in range(0, 96, 5):

    print(f"Processing clear index: {clear_idx}")

    scf_dict = {}     # highest SCF per pixel
    count_dict = {}   # total observations per pixel
    gt09_dict = {}    # count of SCF > 0.9 per pixel

    for i in range(start_doy, end_doy):  
        fname = f"MOD10C1.A2005{i:03d}.061.hdf" 
        path_fname = os.path.join(path, fname)
        date = parse_modis_filename(fname)
        
        # Read the data
        lon_out, lat_out, scf_out = read_modis_scf_hdf(path_fname, lon_min, lon_max, lat_min, lat_max, clear_index=clear_idx)

        # Update dictionary with higher values
        for lon, lat, scf in zip(lon_out, lat_out, scf_out):
            key = (lat, lon)

            # Track max SCF per pixel
            if key not in scf_dict or scf > scf_dict[key]:
                scf_dict[key] = scf

            # Count all observations
            count_dict[key] = count_dict.get(key, 0) + 1   

            # Count SCF > 0.9
            if scf > 0.9:
                gt09_dict[key] = gt09_dict.get(key, 0) + 1

    # Extract to arrays
    lats, lons, scfs, counts, gt09s = [], [], [], [], []
    for (lat, lon), scf in scf_dict.items():
        lats.append(lat)
        lons.append(lon)
        scfs.append(scf)
        counts.append(count_dict[(lat, lon)])
        gt09s.append(gt09_dict.get((lat, lon), 0))

    # Store arrays for this clear_idx in dictionary
    results[clear_idx] = {
        'lons': np.array(lons),
        'lats': np.array(lats),
        'scfs': np.array(scfs),
        'counts': np.array(counts),
        'gt09': np.array(gt09s)
    }

In [None]:
clear_idx = 90
gt09_array = np.array([results[clear_idx]['gt09'], results[clear_idx]['lons'], results[clear_idx]['lats']]).T 


# Plot the final dictionary
plot_region_scatter(gt09_array, 
        lon_min, lon_max,
        lat_min, lat_max,
        grid_type='modis',
        meanflag=False,
        saveflag=False,
        units='Number',
        plot_title=f"Number of MOD10C1 > 0.9 over Southeastern US: {date.strftime('%B %Y')}\n  QC_Clear_Index_Min: {clear_idx}",
        point_size=6,
        cmin=0,
        cmax=2) 


In [None]:


# Calculate metrics for each clear_idx
clear_idx_values = sorted(results.keys())
total_obs = []
total_gt09 = []

for idx in clear_idx_values:
    total_obs.append(np.sum(results[idx]['counts']))
    total_gt09.append(np.sum(results[idx]['gt09']))

# Normalize values
total_gt09_normalized = [value / max(total_gt09) * 100 for value in total_gt09]
total_obs_normalized = [value / max(total_obs) * 100 for value in total_obs]

# Create figure with two y-axes
fig, ax1 = plt.subplots(figsize=(10,6))
ax2 = ax1.twinx()

# Plot data
line1 = ax1.plot(clear_idx_values, total_gt09, 'b-o', label='Total SCF > 0.9')
line2 = ax2.plot(clear_idx_values, total_obs, 'r-s', label='Total Observations')

# Customize axes
ax1.set_xlabel('Clear Index Threshold')
ax1.set_ylabel('Number of SCF > 0.9', color='b')
ax2.set_ylabel('Total Observations', color='r')

# Add grid aligned with the first y-axis
ax1.grid(visible=True, which='both', linestyle='--', linewidth=0.5, alpha=0.7)

# Add legend
lines = line1 + line2
labels = [l.get_label() for l in lines]
ax1.legend(lines, labels, loc='upper right')

plt.title(f'SCF > 0.9 Counts and Total Observations vs Clear Index Threshold \n {date.strftime("%B %Y")} over the Southeastern US')
plt.show()

# Create figure with two y-axes
fig, ax1 = plt.subplots(figsize=(10, 6))
ax2 = ax1.twinx()

# Plot normalized data
line1 = ax1.plot(clear_idx_values, total_gt09_normalized, 'b-o', label='Total SCF > 0.9 (Normalized)')
line2 = ax2.plot(clear_idx_values, total_obs_normalized, 'r-s', label='Total Observations (Normalized)')

# Customize axes
ax1.set_xlabel('Clear Index Threshold')
ax1.set_ylabel('Number of SCF > 0.9 (% of Max)', color='b')
ax2.set_ylabel('Total Observations (% of Max)', color='r')

# Add grid aligned with the first y-axis
ax1.grid(visible=True, which='both', linestyle='--', linewidth=0.5, alpha=0.7)

# Add legend
lines = line1 + line2
labels = [l.get_label() for l in lines]
ax1.legend(lines, labels, loc='upper right')

plt.title(f'Normalized SCF > 0.9 Counts and Total Observations vs Clear Index Threshold \n {date.strftime("%B %Y")} over the Southeastern US')
plt.show()

In [None]:
# Usage over CONUS
lon_min = -95.0
lon_max = -75.0
lat_min = 29.0
lat_max = 41.0
path = "/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_OLv8_M36/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/"

start_doy = 182 # 60
end_doy = 213 # 90

# Initialize storage for each clear_idx iteration
results = {}

for snow_spatial in [0, 1, 2, 237, 239, 250, 255]:

    clear_idx = 20

    scf_dict = {}
    count_dict = {}
    gt09_dict = {}


    for i in range(start_doy, end_doy):
        fname = f"MOD10C1.A2005{i:03d}.061.hdf"
        path_fname = os.path.join(path, fname)
        date = parse_modis_filename(fname)
        
        # Read the data
        lon_out, lat_out, scf_out = read_modis_scf_hdf(path_fname, lon_min, lon_max, lat_min, lat_max, 
                                                       clear_index=clear_idx, snow_spatial=snow_spatial)

        # Update dictionary with higher values
        for lon, lat, scf in zip(lon_out, lat_out, scf_out):
            key = (lat, lon)

            # Track max SCF per pixel
            if key not in scf_dict or scf > scf_dict[key]:
                scf_dict[key] = scf

            # Count all observations
            count_dict[key] = count_dict.get(key, 0) + 1   

            # Count SCF > 0.9
            if scf > 0.9:
                gt09_dict[key] = gt09_dict.get(key, 0) + 1
        
    # Extract to arrays
    lats, lons, scfs, counts, gt09s = [], [], [], [], []
    for (lat, lon), scf in scf_dict.items():
        lats.append(lat)
        lons.append(lon)
        scfs.append(scf)
        counts.append(count_dict[(lat, lon)])
        gt09s.append(gt09_dict.get((lat, lon), 0))

    scf_array = np.array([scfs, lons, lats]).T
    count_array = np.array([counts, lons, lats]).T
    gt09_array = np.array([gt09s, lons, lats]).T 
   
    print("Snow_spatial:", snow_spatial)
    print("Mean of SCF array:", np.mean(scf_array[:, 0]))
    print("Mean of Count array:", np.mean(count_array[:, 0]))
    print("Mean of GT09 array:", np.mean(gt09_array[:, 0]))
    print("Sum of SCF array:", np.sum(scf_array[:, 0]))
    print("Sum of Count array:", np.sum(count_array[:, 0]))
    print("Sum of GT09 array:", np.sum(gt09_array[:, 0]))


    # Plot the final dictionary
    plot_region_scatter(scf_array, 
            lon_min, lon_max,
            lat_min, lat_max,
            grid_type='modis',
            meanflag=True,
            saveflag=False,
            units='SCF',
            plot_title=f"Max MOD10C1 over Southeastern US: {date.strftime('%B %Y')}\n  QC_Clear_Index_Min: {clear_idx} QC_Snow_Spatial_Max: {snow_spatial}",
            point_size=6,
            cmin=0,
            cmax=1)    

    # Plot the final dictionary
    plot_region_scatter(count_array, 
            lon_min, lon_max,
            lat_min, lat_max,
            grid_type='modis',
            meanflag=True,
            saveflag=False,
            units='Number',
            plot_title=f"Number of MOD10C1 over Southeastern US: {date.strftime('%B %Y')}\n  QC_Clear_Index_Min: {clear_idx} QC_Snow_Spatial_Max: {snow_spatial}",
            point_size=6,
            cmin=0,
            cmax=28)  

    # Plot the final dictionary
    plot_region_scatter(gt09_array, 
            lon_min, lon_max,
            lat_min, lat_max,
            grid_type='modis',
            meanflag=True,
            saveflag=False,
            units='Number',
            plot_title=f"Number of MOD10C1 > 0.9 over Southeastern US: {date.strftime('%B %Y')}\n  QC_Clear_Index_Min: {clear_idx} QC_Snow_Spatial_Max: {snow_spatial}",
            point_size=6,
            cmin=0,
            cmax=2) 

    # Store arrays for this snow_spatial in dictionary
    results[snow_spatial] = {
        'lons': np.array(lons),
        'lats': np.array(lats), 
        'scfs': np.array(scfs),
        'counts': np.array(counts),
        'gt09s': np.array(gt09s)
    }

In [None]:
# Calculate metrics for each snow_spatial_values
snow_spatial_values = sorted(results.keys())
total_obs = []
total_gt09 = []

for idx in snow_spatial_values:
    total_obs.append(np.sum(results[idx]['counts']))
    total_gt09.append(np.sum(results[idx]['gt09s']))

# Normalize values
total_gt09_normalized = [value / max(total_gt09) * 100 for value in total_gt09]
total_obs_normalized = [value / max(total_obs) * 100 for value in total_obs]

# Create figure with two y-axes
fig, ax1 = plt.subplots(figsize=(10,6))
ax2 = ax1.twinx()

# Create range for x-axis
x_range = np.arange(len(snow_spatial_values))

# Plot using x_range but label with snow_spatial_values
line1 = ax1.plot(x_range, total_gt09, 'b-o', label='Total SCF > 0.9')
line2 = ax2.plot(x_range, total_obs, 'r-s', label='Total Observations')

# Set x-ticks to show snow_spatial_values
ax1.set_xticks(x_range)
ax1.set_xticklabels(snow_spatial_values)

# Customize axes
ax1.set_xlabel('Snow Spatial Threshold')
ax1.set_ylabel('Number of SCF > 0.9', color='b')
ax2.set_ylabel('Total Observations', color='r')

# Add grid aligned with the first y-axis
ax1.grid(visible=True, which='both', linestyle='--', linewidth=0.5, alpha=0.7)

# Add legend
lines = line1 + line2
labels = [l.get_label() for l in lines]
ax1.legend(lines, labels, loc='lower right')

plt.title(f'SCF > 0.9 Counts and Total Observations vs Snow Spatial Threshold \n {date.strftime("%B %Y")} over the Southeastern US')
plt.show()

# Create figure with two y-axes
fig, ax1 = plt.subplots(figsize=(10, 6))
ax2 = ax1.twinx()

# Plot normalized data
line1 = ax1.plot(x_range, total_gt09_normalized, 'b-o', label='Total SCF > 0.9 (Normalized)')
line2 = ax2.plot(x_range, total_obs_normalized, 'r-s', label='Total Observations (Normalized)')

# Set x-ticks to show snow_spatial_values
ax1.set_xticks(x_range)
ax1.set_xticklabels(snow_spatial_values)

# Customize axes
ax1.set_xlabel('Snow Spatial Threshold')
ax1.set_ylabel('Number of SCF > 0.9 (% of Max)', color='b')
ax2.set_ylabel('Total Observations (% of Max)', color='r')

# Add grid aligned with the first y-axis
ax1.grid(visible=True, which='both', linestyle='--', linewidth=0.5, alpha=0.7)

# Add legend
lines = line1 + line2
labels = [l.get_label() for l in lines]
ax1.legend(lines, labels, loc='lower right')

plt.title(f'Normalized SCF > 0.9 Counts and Total Observations vs Snow Spatial Threshold \n {date.strftime("%B %Y")} over the Southeastern US')
plt.show()