In [None]:
import numpy as np
import struct
import os
import glob
import xarray as xr
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from mapper_functions import plot_global_tight_pcm, plot_NA_tight_pcm, plot_region, plot_aus_tight_pcm

from helper import read_ObsFcstAna, read_tilecoord, get_tile_species_obs_values

In [None]:
expt_name_1 = 'OLv8_M36_Aus'
expt_name_2 = 'DAv8_M36_Aus'

start_date = datetime(2018, 8, 1)
end_date = datetime(2018, 9, 1)

start_date_str = start_date.strftime('%Y/%m/%d')
end_date_str = end_date.strftime('%Y/%m/%d')


ana_directory_1 = f'/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/CYGNSS_Experiments/OLv8_M36_Aus/OLv8_M36_Aus/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/Y2018/M08'

ana_directory_2 = f'/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/CYGNSS_Experiments/DAv8_M36_Aus_v2/DAv8_M36_Aus/output/SMAP_EASEv2_M36_GLOBAL/ana/ens_avg/Y2018/M08'

In [None]:
species_number = 12

OFA_list = []

OFA_list.extend(read_ObsFcstAna(fname) for fname in sorted(glob.glob(os.path.join(ana_directory_1, 'OLv8_M36_Aus.ens_avg.ldas_ObsFcstAna.201808*.bin'))))

# Initialize lists to store filtered data
all_tilenums = []
all_species = []
all_lats = []
all_lons = []
all_obs = []

# Process each element in OFA_list
for ofa in OFA_list:
    # Create mask for this element
    assim_mask = ofa['obs_assim'] == 0 
    species_mask = ofa['obs_species'] == species_number 
    combined_mask = assim_mask & species_mask
    
    # Append filtered data
    all_tilenums.append(ofa['obs_tilenum'][combined_mask])
    all_species.append(ofa['obs_species'][combined_mask])
    all_lats.append(ofa['obs_lat'][combined_mask])
    all_lons.append(ofa['obs_lon'][combined_mask])
    all_obs.append(ofa['obs_obs'][combined_mask])

# Create filtered dictionary with concatenated data
filtered_data = {
    'obs_tilenum': np.concatenate(all_tilenums),
    'obs_species': np.concatenate(all_species),
    'obs_lat': np.concatenate(all_lats),
    'obs_lon': np.concatenate(all_lons),
    'obs_obs': np.concatenate(all_obs)
}

# Process filtered data
stats_1 = get_tile_species_obs_values(filtered_data)

# Print summary
print(f"Number of unique tiles: {len(stats_1['tiles'])}")
print(f"Number of species: {len(stats_1['max_values'])}")
for species, max_vals in stats_1['max_values'].items():
    print(f"Species {species}: max value = {np.max(max_vals)}")

# Initialize map_array with NaN values
map_array = np.empty((stats_1['lon'].shape[0], 3))  # Shape: (number of tiles, 3)
map_array.fill(np.nan)

# Fill longitude and latitude columns
map_array[:, 1] = stats_1['lon']  # Assuming `lon` contains longitude values
map_array[:, 2] = stats_1['lat']  # Assuming `lat` contains latitude values


map_array[:, 0] = stats_1['num_obs'][species_number]
plot_aus_tight_pcm(map_array, False, True, f"{expt_name_1} {start_date_str} - {end_date_str}:\n Num CYGNSS obs", '-') #, 0, 0.5)


map_array[:, 0] = stats_1['max_values'][species_number]
plot_aus_tight_pcm(map_array, False, True, f"{expt_name_1} {start_date_str} - {end_date_str}:\n Max CYGNSS obs", 'fraction', 0, 0.3)

map_array[:, 0] = stats_1['mean_values'][species_number]
plot_aus_tight_pcm(map_array, False, True, f"{expt_name_1} {start_date_str} - {end_date_str}:\n Mean CYGNSS obs", 'fraction', 0, 0.3)


In [None]:
OFA_list = []

OFA_list.extend(read_ObsFcstAna(fname) for fname in sorted(glob.glob(os.path.join(ana_directory_2, 'DAv8_M36_Aus.ens_avg.ldas_ObsFcstAna.2018*.bin'))))


In [None]:
species_number = 12


# Initialize lists to store filtered data
all_tilenums = []
all_species = []
all_lats = []
all_lons = []
all_obs = []

# Process each element in OFA_list
for ofa in OFA_list:
    # Create mask for this element
    assim_mask = ofa['obs_assim'] == 1 
    species_mask = ofa['obs_species'] == species_number 
    combined_mask = assim_mask & species_mask
    
    # Append filtered data
    all_tilenums.append(ofa['obs_tilenum'][combined_mask])
    all_species.append(ofa['obs_species'][combined_mask])
    all_lats.append(ofa['obs_lat'][combined_mask])
    all_lons.append(ofa['obs_lon'][combined_mask])
    all_obs.append(ofa['obs_obs'][combined_mask])

# Create filtered dictionary with concatenated data
filtered_data = {
    'obs_tilenum': np.concatenate(all_tilenums),
    'obs_species': np.concatenate(all_species),
    'obs_lat': np.concatenate(all_lats),
    'obs_lon': np.concatenate(all_lons),
    'obs_obs': np.concatenate(all_obs)
}

# Process filtered data
stats_2 = get_tile_species_obs_values(filtered_data)

# Print summary
print(f"Number of unique tiles: {len(stats_2['tiles'])}")
print(f"Number of species: {len(stats_2['max_values'])}")
for species, max_vals in stats_2['max_values'].items():
    print(f"Species {species}: max value = {np.max(max_vals)}")


In [None]:

# Initialize map_array with NaN values
map_array = np.empty((stats_2['lon'].shape[0], 3))  # Shape: (number of tiles, 3)
map_array.fill(np.nan)

# Fill longitude and latitude columns
map_array[:, 1] = stats_2['lon']  # Assuming `lon` contains longitude values
map_array[:, 2] = stats_2['lat']  # Assuming `lat` contains latitude values

map_array[:, 0] = stats_2['num_obs'][species_number]
plot_aus_tight_pcm(map_array, False, True, f"{expt_name_2} {start_date_str} - {end_date_str}:\n Num CYGNSS obs", '-') #, 0, 0.5)


map_array[:, 0] = stats_2['max_values'][species_number]
plot_aus_tight_pcm(map_array, False, True, f"{expt_name_2} {start_date_str} - {end_date_str}:\n Max CYGNSS obs", 'fraction', 0, 0.3)

map_array[:, 0] = stats_2['mean_values'][species_number]
plot_aus_tight_pcm(map_array, False, True, f"{expt_name_2} {start_date_str} - {end_date_str}:\n Mean CYGNSS obs", 'fraction', 0, 0.3)


In [None]:
# Extract tiles and mean values for the given species number
tiles_1 = stats_1['tiles']
mean_values_1 = stats_1['mean_values'][species_number]

tiles_2 = stats_2['tiles']
mean_values_2 = stats_2['mean_values'][species_number]

# Find the intersection of tiles
matching_tiles, idx_1, idx_2 = np.intersect1d(tiles_1, tiles_2, return_indices=True)

# Compute the difference in mean values for matching tiles
mean_diff = mean_values_1[idx_1] - mean_values_2[idx_2]

# Print results
print(f"Number of matching tiles: {len(matching_tiles)}")

map_array = np.empty((len(matching_tiles), 3))  # Shape: (number of matching tiles, 3)
map_array.fill(np.nan)
map_array[:, 0] = mean_diff
map_array[:, 1] = stats_1['lon'][idx_1]  # Assuming `lon` contains longitude values
map_array[:, 2] = stats_1['lat'][idx_1]  # Assuming `lat` contains latitude values

plot_aus_tight_pcm(map_array, False, True, f"Difference in Mean CYGNSS obs: {expt_name_1} - {expt_name_2} {start_date_str} - {end_date_str}:\n Mean CYGNSS obs", 'fraction', -0.3, 0.3)

In [None]:

import matplotlib.pyplot as plt

# Extract longitude, latitude, and max_values for species 12
lon = stats_2['lon']
lat = stats_2['lat']
max_values = stats_2['max_values'][species_number]

# Create scatter plot
plt.figure(figsize=(10, 6))
scatter = plt.scatter(lon, lat, c=max_values, cmap='viridis', s=9)
plt.colorbar(scatter, label='Max Values')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Scatter Plot of CYGNSS obs Max Values from DA run ObsFcstAna')
plt.show()


In [None]:
import xarray as xr

scaling_file = '/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/CYGNSS_Experiments/DAv8_M36_Aus_v2/' \
'DAv8_M36_Aus/output/SMAP_EASEv2_M36_GLOBAL/stats/LS_OLv8_M36_ASCAT_zscore_stats_AD_all_pentads.nc4'

scaling_file = '/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/CYGNSS_Experiments/DAv8_M36_Aus_v2/' \
'DAv8_M36_Aus/output/SMAP_EASEv2_M36_GLOBAL/stats/M36_zscore_stats_2018_doy213_2024_doy181_W_75d_Nmin_5_sp_ALL_all_pentads.nc4'

ds = xr.open_dataset(scaling_file)

print(ds)

# Extract the variable of interest
lon = ds['ll_lon'].values
lat = ds['ll_lat'].values

n_data = ds['n_data'].values
o_mean = ds['o_mean'].values
o_std = ds['o_std'].values


In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import numpy as np

# Sum n_data across pentads
n_data_sum = ds.o_mean.mean(dim='pentad')

n_data_sum = n_data_sum.where(n_data_sum != 0, np.nan)

# Create coordinate arrays for full grid
lons = np.linspace(-179.875, 180-0.125, n_data_sum.shape[0])
lats = np.linspace(-89.875, 90-0.125, n_data_sum.shape[1])

# Define Australia bounds
lon_min, lon_max = 110, 155
lat_min, lat_max = -45, -10

lon_min, lon_max = 132, 138
lat_min, lat_max = -28, -22

# Create figure
fig = plt.figure(figsize=(12, 8))
ax = plt.axes(projection=ccrs.PlateCarree())

# Set map extent to Australia
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())

# Plot data
pcm = ax.pcolormesh(lons, lats, n_data_sum.T,
                    transform=ccrs.PlateCarree(),
                    cmap='viridis')

# Add map features
ax.coastlines(resolution='50m')
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.gridlines(draw_labels=True)

plt.colorbar(pcm, orientation='horizontal', label='sfmc', shrink=0.5)
plt.title('SM observation mean over Australia')
plt.show()

In [None]:
import xarray as xr
import glob

# Define the file pattern
file_pattern = '/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/cygnss_test/data/cyg.ddmi.s202006*.nc'

# Get all files matching the pattern
file_list = sorted(glob.glob(file_pattern))

# Load all files into a single xarray dataset, nested by time
ds_combined = xr.open_mfdataset(file_list, combine='nested', concat_dim='time')

print(ds_combined)
'/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/cygnss_test/data/cyg.ddmi.s20200601-030000-e202006*.nc'

# Extract and print the length of "time" variable
print(len(ds_combined['time']))

# Calculate temporal mean
temporal_mean = ds_combined.SM_subdaily.mean(dim=['time', 'timeslices']).compute()


In [None]:

# Create figure
fig = plt.figure(figsize=(12, 8))
ax = plt.axes(projection=ccrs.PlateCarree())

# Plot data
pcm = ax.pcolormesh(ds_combined.longitude, ds_combined.latitude, temporal_mean,
                    transform=ccrs.PlateCarree(),
                    cmap='viridis')

# Add map features
ax.coastlines(resolution='50m')
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.gridlines(draw_labels=True)

# Add colorbar and title
plt.colorbar(pcm, orientation='horizontal', label='Soil Moisture')
plt.title('Mean Soil Moisture')

plt.show()


In [None]:
# Create figure
fig = plt.figure(figsize=(12, 8))
ax = plt.axes(projection=ccrs.PlateCarree())

# Define Australia bounds
lon_min, lon_max = 110, 155
lat_min, lat_max = -45, -10

# Set map extent to Australia
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())

# Plot data
pcm = ax.pcolormesh(ds_combined.longitude, ds_combined.latitude, temporal_mean,
                    transform=ccrs.PlateCarree(),
                    cmap='viridis')

# Add map features
ax.coastlines(resolution='50m')
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.gridlines(draw_labels=True)

# Add colorbar and title
plt.colorbar(pcm, orientation='horizontal', label='Soil Moisture')
plt.title('Mean Soil Moisture')

plt.show()

# Convert data to 1D arrays for scatter plot
lon = ds_combined.longitude.values.flatten()
lat = ds_combined.latitude.values.flatten()
values = temporal_mean.values.flatten()

# Mask NaN values
mask = ~np.isnan(values)
lon = lon[mask]
lat = lat[mask]
values = values[mask]


In [None]:

# Create scatter plot
plt.figure(figsize=(12, 8))
ax = plt.axes(projection=ccrs.PlateCarree())

# Define Australia bounds
lon_min, lon_max = 110, 155
lat_min, lat_max = -45, -10

lon_min, lon_max = 134, 136
lat_min, lat_max = -27, -25

# Set map extent to Australia
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())

# Plot data
scatter = ax.scatter(lon, lat, c=values, cmap='viridis', s=5, transform=ccrs.PlateCarree())

# Add map features
ax.coastlines(resolution='50m')
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.gridlines(draw_labels=True)

# Add colorbar and title
plt.colorbar(scatter, orientation='horizontal', label='Soil Moisture', shrink=0.5)
plt.title('Mean CYGNSS Soil Moisture, June 2020')

plt.show()

In [None]:
# ftc = '/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/LS_DAv8_M36/output/SMAP_EASEv2_M36_GLOBAL/rc_out/LS_DAv8_M36.ldas_tilecoord.bin'

ftc = '/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/CYGNSS_Experiments/DAv8_M36_Aus/DAv8_M36_Aus/output/SMAP_EASEv2_M36_GLOBAL/rc_out/DAv8_M36_Aus.ldas_tilecoord.bin'

tc = read_tilecoord(ftc)
print(tc)

# Create figure with map projection
fig = plt.figure(figsize=(12, 8))
ax = plt.axes(projection=ccrs.PlateCarree())

# Define Australia bounds
lon_min, lon_max = 110, 155
lat_min, lat_max = -45, -10

lon_min, lon_max = 133, 137
lat_min, lat_max = -27, -23

# Set map extent to Australia
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())


# Plot data
pcm = ax.pcolormesh(lons, lats, n_data_sum.T,
                    transform=ccrs.PlateCarree(),
                    cmap='Greens')


# Plot data
scatter1 = ax.scatter(lon, lat, 
                      c=values, 
                      cmap='viridis', 
                      s=20, 
                      transform=ccrs.PlateCarree())


# Create scatter plot
c_ones = np.ones(len(tc['min_lon']))

scatter2 = ax.scatter(tc['min_lon'], tc['min_lat'], 
                      c=c_ones,
                      transform=ccrs.PlateCarree(),
                      cmap='terrain',
                      s=80,
                      marker='+')

# Extract longitude, latitude, and max_values for species 12
lon2 = stats_2['lon']
lat2 = stats_2['lat']
max_values = stats_2['max_values'][species_number]

scatter3 = ax.scatter(lon2, lat2, 
                      c=max_values, 
                      cmap='viridis', 
                      s=50, 
                      marker='x',
                      transform=ccrs.PlateCarree())

# Add map features
ax.coastlines()
ax.gridlines(draw_labels=True)

# Add colorbar and title
plt.colorbar(scatter, label='Elevation (m)')
plt.title('Tile Distribution with Elevation')

plt.show()
     

In [None]:
# Extract longitude, latitude, and max_values for species 12
lon2 = stats_2['lon']
lat2 = stats_2['lat']
max_values = stats_2['max_values'][species_number]

# Create scatter plot
plt.figure(figsize=(10, 6))
ax = plt.axes(projection=ccrs.PlateCarree())

# Set map extent to Australia
# ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())

scatter3 = ax.scatter(lon2, lat2, c=max_values, cmap='viridis', s=9, transform=ccrs.PlateCarree())

# Add map features
ax.coastlines()
ax.gridlines(draw_labels=True)

# Add colorbar and title
plt.colorbar(scatter3, label='Max Values')

plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Scatter Plot of CYGNSS obs Max Values from DA run ObsFcstAna')
plt.show()

In [None]:
# do i=1,N_catd
    
#     ! Check for no-data-values in observation (any neg value is no_data)

#     if (tmp_obs(i)>=0.) then
        
#         ! ll_lon and ll_lat refer to lower left corner of grid cell
#         ! (as opposed to the grid point in the center of the grid cell)
        
#         this_lon = tmp_lon(i)
#         this_lat = tmp_lat(i)
        
#         ! Find indices for current tile lat and lon on scaling parameter grid

#         i_ind = ceiling((this_lon - ll_lon)/dlon) 
#         j_ind = ceiling((this_lat - ll_lat)/dlat) 
        
#         ! Check for no-data-values in observation and fit parameters
#         ! (any negative number could be no-data-value for observations)
        
#         if ( sclprm_mean_obs(j_ind, i_ind)>0.   .and.        &
#             sclprm_mean_mod(j_ind, i_ind)>0.   .and.        &
#             sclprm_std_obs(j_ind, i_ind)>=0.   .and.        &
#             sclprm_std_mod(j_ind, i_ind)>=0.         ) then


tmp_lon = ds_combined['longitude'].values
tmp_lat = ds_combined['latitude'].values
values1 = temporal_mean

unique_tmp_lon = np.unique(tmp_lon[(tmp_lon >= 125) & (tmp_lon <= 145)])
print(unique_tmp_lon)

print(np.unique(tmp_lon[tmp_lon % 1 == 0]))

# tmp_lon[tmp_lon == 135] = 135.0001

ll_lon = ds.ll_lon.values
ll_lat = ds.ll_lat.values
dlon = ds.d_lon.values
dlat = ds.d_lat.values

i_ind = np.ceil((tmp_lon - ll_lon) / dlon).astype(int)
j_ind = np.ceil((tmp_lat - ll_lat) / dlat).astype(int)

i_ind = np.floor((tmp_lon - ll_lon) / dlon).astype(int) + 1
j_ind = np.floor((tmp_lat - ll_lat) / dlat).astype(int) + 1

print(i_ind)
print(j_ind)

print("np.ceil((135. - ll_lon) / dlon).astype(int):", np.ceil((135. - ll_lon) / dlon).astype(int))
print("np.ceil((135.001 - ll_lon) / dlon).astype(int):", np.ceil((135.001 - ll_lon) / dlon).astype(int))
print("np.ceil((135.0000001 - ll_lon) / dlon).astype(int):", np.ceil((135.0000001 - ll_lon) / dlon).astype(int))

sclprm_mean_obs = ds.o_mean.values
sclprm_mean_mod = ds.o_mean.values
sclprm_std_obs = ds.o_std.values
sclprm_std_mod = ds.o_std.values

# Create figure with map projection
fig = plt.figure(figsize=(12, 8))
ax = plt.axes(projection=ccrs.PlateCarree())

# Define Australia bounds
lon_min, lon_max = 110, 155
lat_min, lat_max = -45, -10

lon_min, lon_max = 132, 138
lat_min, lat_max = -28, -22

# Set map extent to Australia
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())


# Plot data
pcm = ax.pcolormesh(lons, lats, n_data_sum.T,
                    transform=ccrs.PlateCarree(),
                    cmap='Greens')


# Plot data
scatter1 = ax.scatter(tmp_lon, tmp_lat, 
                      c=values1, 
                      cmap='viridis', 
                      s=15, 
                      transform=ccrs.PlateCarree())


# Create scatter plot
# c_ones = np.ones(len(tc['min_lon']))

# scatter2 = ax.scatter(tc['min_lon'], tc['min_lat'], 
#                       c=c_ones,
#                       transform=ccrs.PlateCarree(),
#                       cmap='terrain',
#                       s=80,
#                       marker='+')

latt = lats[j_ind-1]
lonn = lons[i_ind-1]

scatter3 = ax.scatter(lonn, latt,
                      marker='s',
                      facecolors='none',
                      edgecolors='black',
                      s=180,
                      transform=ccrs.PlateCarree())

# Add map features
ax.coastlines()
ax.gridlines(draw_labels=True)