In [None]:
import os
import glob
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta



In [None]:
expt_name = '1e_LS_DAv8_M36_debug'

start_date = datetime(2002, 10, 1)
end_date = datetime(2002, 10, 11)

start_date_str = start_date.strftime('%Y/%m/%d')
end_date_str = end_date.strftime('%Y/%m/%d')

# Define the path directory
directory = f'/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/snow_qc_expts/1e_LS_DAv8_M36_0/{expt_name}/output/SMAP_EASEv2_M36_GLOBAL/cat/ens0000/Y2002/M10'

In [None]:
tile_number = 3438 # 81728 # 77787  # Tile number for which to plot the time series

tile_index = tile_number - 1 # Adjust for zero-based indexing

In [None]:
pattern = os.path.join(directory, '*debug.catch_progn_incr*')

files = glob.glob(pattern)

# Filter files based on the date range
filtered_files = []
for file in files:
    # Extract the date part from the filename
    filename = os.path.basename(file)
    print(f'Processing file: {filename}')
    parts = filename.split('.')
    if len(parts) >= 3:
        date_str = parts[-2]  # '20021002'
        try:
            file_date = datetime.strptime(date_str, '%Y%m%d')
        except ValueError:
            print(f"Could not parse date from {date_str}")
    else:
        print("Filename format not recognized")

    # Check if the file date is within the range
    if start_date <= file_date < end_date:
        filtered_files.append(file)

# Load the data 
ds = xr.open_mfdataset(filtered_files, combine='nested', concat_dim="time")
ds = ds.sortby('time_stamp')

# Convert DataArray to numpy array, then to strings
time_strs = ds['time_stamp'].values.astype(str)

# Convert to datetime objects
time_objs = [datetime.strptime(ts, "%Y%m%d_%H%Mz") for ts in time_strs]

print(time_objs[11])



In [None]:
# Define the new pattern for the dataset
pattern_debug2 = os.path.join(directory, '*debug2.catch_progn_incr*')

# Find files matching the new pattern
files_debug2 = glob.glob(pattern_debug2)

# Filter files based on the date range
filtered_files_debug2 = []
for file in files_debug2:
    # Extract the date part from the filename
    filename = os.path.basename(file)
    print(f'Processing file: {filename}')
    parts = filename.split('.')
    if len(parts) >= 3:
        date_str = parts[-2]  # '20021002'
        try:
            file_date = datetime.strptime(date_str, '%Y%m%d')
        except ValueError:
            print(f"Could not parse date from {date_str}")
    else:
        print("Filename format not recognized")

    # Check if the file date is within the range
    if start_date <= file_date < end_date:
        filtered_files_debug2.append(file)

# Load the data
ds_debug2 = xr.open_mfdataset(filtered_files_debug2, combine='nested', concat_dim="time")
ds_debug2 = ds_debug2.sortby('time_stamp')

# Convert DataArray to numpy array, then to strings
time_strs_debug2 = ds_debug2['time_stamp'].values.astype(str)

# Convert to datetime objects
time_objs_debug2 = [datetime.strptime(ts, "%Y%m%d_%H%Mz") for ts in time_strs_debug2]

print(time_objs_debug2[11])

In [None]:
# Read latitude and longitude for the specified tile_index
lat = ds['lat'].sel(tile=tile_index).values
lon = ds['lon'].sel(tile=tile_index).values

print(f"Latitude: {lat[0]}, Longitude: {lon[0]}")

import cartopy.crs as ccrs

# Example: lat[0], lon[0] are your coordinates
plt.figure(figsize=(8, 4))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.coastlines()
ax.set_global()

# Plot the point
ax.plot(lon[0], lat[0], marker='o', color='red', markersize=8, transform=ccrs.PlateCarree())
ax.set_title(f"Location: lat={lat[0]}, lon={lon[0]}")

plt.show()


In [None]:
# Select the time series for each variable using .sel
var_ts = ds['GHTCNT1_INCR'].sel(tile=tile_index)
var_ts_snow = ds['WESNN1_INCR'].sel(tile=tile_index) + ds['WESNN2_INCR'].sel(tile=tile_index) + ds['WESNN3_INCR'].sel(tile=tile_index)
var_ts_wilting = ds['TCFWLT_INCR'].sel(tile=tile_index)
var_ts2 = ds_debug2['GHTCNT1_INCR'].sel(tile=tile_index)
var_ts_snow2 = ds_debug2['WESNN1_INCR'].sel(tile=tile_index) + ds_debug2['WESNN2_INCR'].sel(tile=tile_index) + ds_debug2['WESNN3_INCR'].sel(tile=tile_index)
var_ts_wilting2 = ds_debug2['TCFWLT_INCR'].sel(tile=tile_index)

fig, axs = plt.subplots(3, 1, figsize=(14, 7), sharex=True)

# 1. Soil heat content increment
axs[0].vlines(x=time_objs, ymin=0, ymax=var_ts.values, color='tab:blue', alpha=0.7, linewidth=2)
axs[0].scatter(time_objs, var_ts.values, color='tab:blue', s=10)
axs[0].set_ylabel('GHTCNT1_INCR (J m-2)')
axs[0].set_title(f'Ground Head Content (1) Increment for tile {tile_number}')
axs[0].grid(True)

# 2. Snow mass increment
axs[1].vlines(x=time_objs, ymin=0, ymax=var_ts_snow.values, color='tab:blue', label='ght', alpha=0.7, linewidth=2)
axs[1].scatter(time_objs, var_ts_snow.values, color='tab:blue', s=10)
axs[1].plot(time_objs_debug2, var_ts_snow2.values, color='tab:orange', label='minSWE', linewidth=1.5)
axs[1].legend()
axs[1].set_ylabel('WESN_INCR (kg m-2)')
axs[1].set_title(f'Snow Mass Increment for tile {tile_number}')
axs[1].grid(True)

# 3. Wilting zone temperature increment
axs[2].vlines(x=time_objs, ymin=0, ymax=var_ts_wilting.values, color='tab:blue', alpha=0.7, linewidth=2)
axs[2].scatter(time_objs, var_ts_wilting.values, color='tab:blue', s=10)
axs[2].set_ylabel('TCFWLT_INCR (K)')
axs[2].set_title(f'Wilting Zone Temperature Increment for tile {tile_number}')
axs[2].set_xlabel('Time')
axs[2].grid(True)

plt.tight_layout()
plt.show()

In [None]:
pattern = os.path.join(directory, '*debug.inst3_1d_lndfcstana_Nt*')

files = glob.glob(pattern)

# Filter files based on the date range
filtered_files = []
for file in files:
    # Extract the date part from the filename
    filename = os.path.basename(file)
    print(f'Processing file: {filename}')
    parts = filename.split('.')
    if len(parts) >= 3:
        date_str = parts[-2]  # '20021002'
        try:
            file_date = datetime.strptime(date_str, '%Y%m%d')
        except ValueError:
            print(f"Could not parse date from {date_str}")
    else:
        print("Filename format not recognized")

    # Check if the file date is within the range
    if start_date <= file_date < end_date:
        filtered_files.append(file)

# Load the data 
ds = xr.open_mfdataset(filtered_files, combine='nested', concat_dim="time")
ds = ds.sortby('time_stamp')

# Convert DataArray to numpy array, then to strings
time_strs = ds['time_stamp'].values.astype(str)

# Convert to datetime objects
time_objs = [datetime.strptime(ts, "%Y%m%d_%H%Mz") for ts in time_strs]

print(time_objs[11])

In [None]:
pattern = os.path.join(directory, '*OL.inst3_1d_lndfcstana_Nt*')

files = glob.glob(pattern)

# Filter files based on the date range
filtered_files = []
for file in files:
    # Extract the date part from the filename
    filename = os.path.basename(file)
    print(f'Processing file: {filename}')
    parts = filename.split('.')
    if len(parts) >= 3:
        date_str = parts[-2]  # '20021002'
        try:
            file_date = datetime.strptime(date_str, '%Y%m%d')
        except ValueError:
            print(f"Could not parse date from {date_str}")
    else:
        print("Filename format not recognized")

    # Check if the file date is within the range
    if start_date <= file_date < end_date:
        filtered_files.append(file)

# Load the data 
ds_open_loop = xr.open_mfdataset(filtered_files, combine='nested', concat_dim="time")
ds_open_loop = ds_open_loop.sortby('time_stamp')

# Convert DataArray to numpy array, then to strings
time_strs_open_loop = ds_open_loop['time_stamp'].values.astype(str)

# Convert to datetime objects
time_objs_open_loop = [datetime.strptime(ts, "%Y%m%d_%H%Mz") for ts in time_strs_open_loop]

print(time_objs_open_loop[11])

In [None]:
pattern_debug2 = os.path.join(directory, '*debug2.inst3_1d_lndfcstana_Nt*')

files_debug2 = glob.glob(pattern_debug2)

# Filter files based on the date range
filtered_files_debug2 = []
for file in files_debug2:
    # Extract the date part from the filename
    filename = os.path.basename(file)
    print(f'Processing file: {filename}')
    parts = filename.split('.')
    if len(parts) >= 3:
        date_str = parts[-2]  # '20021002'
        try:
            file_date = datetime.strptime(date_str, '%Y%m%d')
        except ValueError:
            print(f"Could not parse date from {date_str}")
    else:
        print("Filename format not recognized")

    # Check if the file date is within the range
    if start_date <= file_date < end_date:
        filtered_files_debug2.append(file)

# Load the data
ds_debug2 = xr.open_mfdataset(filtered_files_debug2, combine='nested', concat_dim="time")
ds_debug2 = ds_debug2.sortby('time_stamp')

# Convert DataArray to numpy array, then to strings
time_strs_debug2 = ds_debug2['time_stamp'].values.astype(str)

# Convert to datetime objects
time_objs_debug2 = [datetime.strptime(ts, "%Y%m%d_%H%Mz") for ts in time_strs_debug2]

print(time_objs_debug2[11])

In [None]:
# Select the time series for the forecast and analysis variables
var_ts_soil_temp = ds['TSOIL1_FCST'].sel(tile=tile_index)
var_ts_soil_temp_analysis = ds['TSOIL1_ANA'].sel(tile=tile_index)

# Create the plot
fig, ax = plt.subplots(figsize=(12, 6))

# Plot forecast data
ax.plot(time_objs, var_ts_soil_temp.values, label='Forecast', color='tab:blue', alpha=0.7)

# Plot analysis data
ax.plot(time_objs, var_ts_soil_temp_analysis.values, label='Analysis', color='tab:orange', alpha=0.7)

# Add labels, title, and legend
ax.set_xlabel('Time')
ax.set_ylabel('Soil Temperature Layer 1 (K)')
ax.set_title(f'Soil Temperature Layer 1 Forecast vs Analysis Time Series for tile {tile_number}')
ax.legend()
ax.grid(True)

plt.tight_layout()
plt.show()

In [None]:
# Prepare new time and value arrays
new_times = []
new_values = []

for t, fcst, ana in zip(time_objs, var_ts_soil_temp.values, var_ts_soil_temp_analysis.values):
    new_times.append(t)
    new_values.append(fcst)
    # Add analysis value 1 second after forecast
    new_times.append(t + timedelta(seconds=1))
    new_values.append(ana)

# Select the time series for the forecast variable from both datasets
var_ts_open_loop_fcst = ds_open_loop['TSOIL1_FCST'].sel(tile=tile_index)
var_ts_exp_fcst = ds['TSOIL1_FCST'].sel(tile=tile_index)
var_ts_exp2_fcst = ds_debug2['TSOIL1_FCST'].sel(tile=tile_index)


fig, axs = plt.subplots(2, 1, figsize=(14, 7), sharex=True)

# Subplot 1: Sawtooth Forecast/Analysis
axs[0].plot(new_times, new_values, label='Forecast/Analysis Sawtooth', color='tab:purple', marker='o', markersize=2, linestyle='-')
axs[0].set_xlabel('Time')
axs[0].set_ylabel('Soil Temperature Layer 1 (K)')
axs[0].set_title(f'Sawtooth Forecast/Analysis Soil Temperature (1) for tile {tile_number}')
axs[0].legend()
axs[0].grid(True)

# Subplot 2: Forecasts from three experiments
axs[1].plot(time_objs_open_loop, var_ts_open_loop_fcst.values, label='Open Loop', alpha=0.7)
axs[1].plot(time_objs_debug2, var_ts_exp2_fcst.values, label='minSWE', alpha=0.7)
axs[1].plot(time_objs, var_ts_exp_fcst.values, label='ght', alpha=0.7)
axs[1].set_xlabel('Time')
axs[1].set_ylabel('Soil Temperature Layer 1 (K)')
axs[1].set_title(f'Soil Temperature Layer 1 Forecast for tile {tile_number}')
axs[1].legend()
axs[1].grid(True)

plt.tight_layout()
plt.show()

In [None]:
pattern = os.path.join(directory, '*debug.SMAP_L4_SM_gph*')

files = glob.glob(pattern)

# Filter files based on the date range
filtered_files = []
for file in files:
    # Extract the date part from the filename
    filename = os.path.basename(file)
    print(f'Processing file: {filename}')
    parts = filename.split('.')
    if len(parts) >= 3:
        date_str = parts[-2]  # '20021002'
        try:
            file_date = datetime.strptime(date_str, '%Y%m%d')
        except ValueError:
            print(f"Could not parse date from {date_str}")
    else:
        print("Filename format not recognized")

    # Check if the file date is within the range
    if start_date <= file_date < end_date:
        filtered_files.append(file)

# Load the data 
ds = xr.open_mfdataset(filtered_files, combine='nested', concat_dim="time")
ds = ds.sortby('time_stamp')

# Convert DataArray to numpy array, then to strings
time_strs = ds['time_stamp'].values.astype(str)

# Convert to datetime objects
time_objs = [datetime.strptime(ts, "%Y%m%d_%H%Mz") for ts in time_strs]

In [None]:
pattern = os.path.join(directory, '*OL.SMAP_L4_SM_gph*')

files = glob.glob(pattern)

# Filter files based on the date range
filtered_files = []
for file in files:
    # Extract the date part from the filename
    filename = os.path.basename(file)
    print(f'Processing file: {filename}')
    parts = filename.split('.')
    if len(parts) >= 3:
        date_str = parts[-2]  # '20021002'
        try:
            file_date = datetime.strptime(date_str, '%Y%m%d')
        except ValueError:
            print(f"Could not parse date from {date_str}")
    else:
        print("Filename format not recognized")

    # Check if the file date is within the range
    if start_date <= file_date < end_date:
        filtered_files.append(file)

# Load the data 
ds_open_loop = xr.open_mfdataset(filtered_files, combine='nested', concat_dim="time")
ds_open_loop = ds_open_loop.sortby('time_stamp')

# Convert DataArray to numpy array, then to strings
time_strs_open_loop = ds_open_loop['time_stamp'].values.astype(str)

# Convert to datetime objects
time_objs_open_loop = [datetime.strptime(ts, "%Y%m%d_%H%Mz") for ts in time_strs_open_loop]

In [None]:
pattern_debug2 = os.path.join(directory, '*debug2.SMAP_L4_SM_gph*')

files_debug2 = glob.glob(pattern_debug2)

# Filter files based on the date range
filtered_files_debug2 = []
for file in files_debug2:
    # Extract the date part from the filename
    filename = os.path.basename(file)
    print(f'Processing file: {filename}')
    parts = filename.split('.')
    if len(parts) >= 3:
        date_str = parts[-2]  # '20021002'
        try:
            file_date = datetime.strptime(date_str, '%Y%m%d')
        except ValueError:
            print(f"Could not parse date from {date_str}")
    else:
        print("Filename format not recognized")

    # Check if the file date is within the range
    if start_date <= file_date < end_date:
        filtered_files_debug2.append(file)

# Load the data
ds_debug2 = xr.open_mfdataset(filtered_files_debug2, combine='nested', concat_dim="time")
ds_debug2 = ds_debug2.sortby('time_stamp')

# Convert DataArray to numpy array, then to strings
time_strs_debug2 = ds_debug2['time_stamp'].values.astype(str)

# Convert to datetime objects
time_objs_debug2 = [datetime.strptime(ts, "%Y%m%d_%H%Mz") for ts in time_strs_debug2]

In [None]:
var_ts_snow_melt_flux = ds['snow_melt_flux'].sel(tile=tile_index) 
var_ts_snow_melt_flux_open_loop = ds_open_loop['snow_melt_flux'].sel(tile=tile_index) 
var_ts_snow_melt_flux_debug2 = ds_debug2['snow_melt_flux'].sel(tile=tile_index)
var_ts_precipitation = ds['precipitation_total_surface_flux'].sel(tile=tile_index)
var_ts_snowfall = ds['snowfall_surface_flux'].sel(tile=tile_index)
var_ts_snow_mass = ds['snow_mass'].sel(tile=tile_index)
var_ts_snow_mass_open_loop = ds_open_loop['snow_mass'].sel(tile=tile_index)
var_ts_snow_mass_debug2 = ds_debug2['snow_mass'].sel(tile=tile_index)

fig, axs = plt.subplots(3, 1, figsize=(14, 7), sharex=True)

# --- Subplot 1: Precipitation and Snowfall Surface Flux ---
axs[0].plot(time_objs, var_ts_precipitation.values, label='Precipitation Total Surface Flux', color='tab:blue', alpha=0.7)
axs[0].plot(time_objs, var_ts_snowfall.values, label='Snowfall Surface Flux', color='tab:orange', alpha=0.7)
axs[0].set_ylabel('Flux (kg m-2 s-1)')
axs[0].set_title(f'Precipitation and Snowfall Surface Flux for tile {tile_number}')
axs[0].legend()
axs[0].grid(True)

# --- Subplot 2: Snow Mass ---
axs[1].plot(time_objs_open_loop, var_ts_snow_mass_open_loop.values, label='Open Loop', alpha=0.7)
axs[1].plot(time_objs_debug2, var_ts_snow_mass_debug2.values, label='minSWE', alpha=0.7)
axs[1].plot(time_objs, var_ts_snow_mass.values, label='ght', alpha=0.7)
axs[1].set_ylabel('Snow Mass (kg m-2)')
axs[1].set_title(f'Snow Mass for tile {tile_number}')
axs[1].legend()
axs[1].grid(True)

# --- Subplot 3: Snow Melt Flux ---
axs[2].plot(time_objs_open_loop, var_ts_snow_melt_flux_open_loop.values, label='Open Loop', alpha=0.7)
axs[2].plot(time_objs_debug2, var_ts_snow_melt_flux_debug2.values, label='minSWE', alpha=0.7)
axs[2].plot(time_objs, var_ts_snow_melt_flux.values, label='ght', alpha=0.7)
axs[2].set_xlabel('Time')
axs[2].set_ylabel('Snow Melt Flux (kg m-2 s-1)')
axs[2].set_title(f'Snow Melt Flux for tile {tile_number}')
axs[2].legend()
axs[2].grid(True)

plt.tight_layout()
plt.show()
