In [None]:
import numpy as np
import os
from datetime import datetime
from dateutil.relativedelta import relativedelta
from netCDF4 import Dataset
import matplotlib.pyplot as plt
import pickle

from helper.read_GEOSldas import read_tilecoord, read_obs_param
from helper.util import make_folder, array2grid
from helper.plot import plotMap
from helper.smapeasev2 import smapeasev2_ind2latlon

import warnings; warnings.filterwarnings("ignore")
import sys 
import io

#sys.stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True)
#sys.stderr = io.TextIOWrapper(open(sys.stderr.fileno(), 'wb', 0), write_through=True)

In [None]:
expdir = '/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/land_sweeper/'
expid = 'LS_DAv8_M36'
domain = 'SMAP_EASEv2_M36_GLOBAL'

start_time = datetime(2000,6,1)
end_time = datetime(2024,4,1)

start_date_str = start_time.strftime('%Y/%m/%d')
end_date_str = end_time.strftime('%Y/%m/%d')

In [None]:
expdir = '/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/CYGNSS_Experiments/DAv8_M36_Aus/'
expid = 'DAv8_M36_Aus'
domain = 'SMAP_EASEv2_M36_GLOBAL'

start_time = datetime(2018,8,1)
end_time = datetime(2024,7,1)

start_date_str = start_time.strftime('%Y/%m/%d')
end_date_str = end_time.strftime('%Y/%m/%d')

In [None]:
# Define a minimum threshold for the temporal data points to ensure statistical reliability
# of the computed metrics. 
Nmin = 20

# Base directory for storing monthly files
# This can be the same as the experiment directory (expdir) or a different location
out_path_mo = expdir+expid+'/output/'+domain+'/ana/ens_avg/'

# Directory for diagnostic plots
out_path = expdir+expid+'/output/'+domain+'/figures/'
make_folder(out_path)

# Variable list for computing sum and sum of squared
var_list = ['obs_obs', 'obs_obsvar','obs_fcst','obs_fcstvar','obs_ana','obs_anavar']

In [None]:
# Read tilecoord and obsparam for tile and obs species information
ftc = os.path.join(expdir, expid, 'output', domain, 'rc_out', f'{expid}.ldas_tilecoord.bin')
tc = read_tilecoord(ftc)
n_tile = tc['N_tile']

# Construct the file path dynamically using start_time
fop = os.path.join(
    expdir, expid, 'output', domain, 'rc_out',
    'Y' + start_time.strftime('%Y'),
    'M' + start_time.strftime('%m'),
    f"{expid}.ldas_obsparam.{start_time.strftime('%Y%m%d')}_0000z.txt"
)
obs_param = read_obs_param(fop)
n_spec = len(obs_param)

In [None]:
species_groups = {
    "SMOS": [0, 1, 2, 3],
    "SMAP": [4, 5, 6, 7],
    "ASCAT": [8, 9, 10],
    "MODIS": [11, 12]
}

In [None]:
species_groups = {
    "SMOS": [0, 1, 2, 3],
    "SMAP": [4, 5, 6, 7],
    "ASCAT": [8, 9, 10],
    "CYGNSS": [11]
}

In [None]:

# Load the lists of monthly data from the file

with open(out_path + f'monthly_OmF_data_{expid}_{start_time.strftime("%Y%m%d")}_{end_time.strftime("%Y%m%d")}.pkl', 'rb') as f:
    monthly_data = pickle.load(f)

# Extract the data from the loaded dictionary
N_data_group_all_months = monthly_data['N_data_group_all_months']
OmF_mean_all_months = monthly_data['OmF_mean_all_months']
OmF_stdv_all_months = monthly_data['OmF_stdv_all_months']
monthly_timestamps = monthly_data['monthly_timestamps']

# Load the group_metrics dictionary from the file
with open(out_path + f'group_metrics_{expid}_{start_time.strftime("%Y%m%d")}_{end_time.strftime("%Y%m%d")}.pkl', 'rb') as f:
    loaded_group_metrics = pickle.load(f)

print("loaded_group_metrics keys:", loaded_group_metrics.keys())    


In [None]:
# Plot N_data_group for each species group
plt.figure(figsize=(10, 6))
for group, values in N_data_group_all_months.items():
    values_in_millions = [v / 1e6 for v in values]  # Convert to millions
    plt.plot(monthly_timestamps, values_in_millions, label=group)
    print(f"{group} N_data (in millions): {values_in_millions}")
plt.gcf().autofmt_xdate()
plt.xlabel('Month')
plt.ylabel('N_data (Millions)')
plt.title('Total N_data by Species Group (in Millions)')
plt.legend()
plt.grid()
plt.savefig(out_path + 'N_data_group.png')  # Save the plot for all groups
plt.show()
plt.close()


# Plot OmF_mean for each species group
for group, values in OmF_mean_all_months.items():
    plt.figure(figsize=(10, 6))
    plt.plot(monthly_timestamps, values, label=f'{group} OmF Mean', color='blue')
    plt.gcf().autofmt_xdate()
    plt.xlabel('Month')
    plt.ylabel('OmF Mean')
    plt.title(f'OmF Mean for {group}')
    plt.legend()
    plt.grid()
    plt.savefig(out_path + f'OmF_Mean_{group}.png')  # Save the plot for each group
    plt.show()
    plt.close()


# Plot OmF_stdv for each species group
for group, values in OmF_stdv_all_months.items():
    plt.figure(figsize=(10, 6))
    plt.plot(monthly_timestamps, values, label=f'{group} OmF Stdv', color='orange')
    plt.gcf().autofmt_xdate()
    plt.xlabel('Month')
    plt.ylabel('OmF Standard Deviation')
    plt.title(f'OmF Standard Deviation for {group}')
    plt.legend()
    plt.grid()
    plt.savefig(out_path + f'OmF_Stdv_{group}.png')  # Save the plot for each group
    plt.show()
    plt.close()

In [None]:
print(len(monthly_timestamps))
plt.figure(figsize=(10, 6))
plt.plot(monthly_timestamps, label='Monthly Timestamps')
plt.gcf().autofmt_xdate()
plt.xlabel('Index')
plt.ylabel('Timestamps')
plt.title('Monthly Timestamps Plot')
plt.legend()
plt.grid()
plt.show()

In [None]:
#loaded_group_metrics = group_metrics


fig, axes = plt.subplots(2,2, figsize=(18,10))
plt.rcParams.update({'font.size':14})

Nobs_data = loaded_group_metrics['SMOS']['Nobs_data']
OmF_mean = loaded_group_metrics['SMOS']['OmF_mean']
OmF_stdv = loaded_group_metrics['SMOS']['OmF_stdv']
OmF_norm_mean = loaded_group_metrics['SMOS']['OmF_norm_mean']
OmF_norm_stdv = loaded_group_metrics['SMOS']['OmF_norm_stdv']

for i in np.arange(2):
    for j in np.arange(2):
        units = '[k]'
        if i == 0 and j == 0:
            tile_data = Nobs_data
            # crange is [cmin, cmax]
            crange =[0, 5000] # np.ceil((end_time-start_time).days/150)*300]
            colormap = plt.get_cmap('jet',20)
            title_txt = expid + ' SMOS Tb Nobs '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
            units = '[-]'
        if i == 0 and j ==1:
            tile_data = OmF_mean
            crange =[-10, 10]
            colormap = plt.get_cmap('bwr', 15) 
            title_txt = expid + ' SMOS Tb O-F mean '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
        if i == 1 and j == 0:
            tile_data = OmF_stdv
            crange =[0, 15]
            colormap = plt.get_cmap ('jet',15)
            title_txt = expid + ' SMOS Tb O-F stdv '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
        if i == 1 and j == 1:
            tile_data = OmF_norm_stdv
            crange =[0, 15]
            colormap = plt.get_cmap ('jet',15)
            title_txt = expid + ' SMOS Tb normalized O-F stdv '+ start_time.strftime('%Y%m%d')+'_'+end_time.strftime('%Y%m%d')

        colormap.set_bad(color='0.9') # light grey, 0-black, 1-white

        # Regrid 1d tile_data to 2d grid_data for map plots
        if '_M09_' in domain: # special case  
            grid_data_M09 = np.zeros((1624, 3856)) + np.nan  
            grid_data_M09[tc['j_indg'],tc['i_indg']] = tile_data
            
            # Reshape the data into 4x4 blocks
            reshaped = grid_data_M09.reshape(1624//4, 4, 3856//4, 4)

            # Combine each 4x4 M09 block into a M36 grid
            if i==0 and j==0:
                grid_data = np.sum(reshaped,axis=(1, 3)) 
            else:
                grid_data = np.nanmean(reshaped,axis=(1, 3))
                
            lat_M36, lon_M36 = smapeasev2_ind2latlon(np.arange(406), np.arange(964),'M36')
            lon_2d,lat_2d = np.meshgrid(lon_M36,lat_M36)
        else:
            grid_data, uy,ux = array2grid(tile_data, lat = tc['com_lat'], lon = tc['com_lon'])
            lon_2d,lat_2d = np.meshgrid(ux, uy)
            
        if 'normalized' in title_txt:
            title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (np.nanmean(grid_data), np.nanmean(np.abs(grid_data-1.)))+' '+units
        elif 'mean' in title_txt:
            title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f" % (np.nanmean(grid_data), np.nanmean(np.abs(grid_data)))+' '+units
        else:
            title_txt = title_txt + '\n' + "avg=%.2f" % (np.nanmean(grid_data)) +' '+units                

        if 'normalized' in title_txt:
            grid_data = np.log10(grid_data)
            crange = [-0.6, 0.45]

        # Print the mean, min and max values of the grid data
        print('Mean of grid_data:', np.nanmean(grid_data))
        print('Min of grid_data:', np.nanmin(grid_data))
        print('Max of grid_data:', np.nanmax(grid_data))
    
            
        mm, cs = plotMap(grid_data, ax =axes[i,j], lat=lat_2d, lon=lon_2d, cRange=crange, \
                    title=title_txt, cmap=colormap, bounding=[-60, 80, -180,180])            

plt.tight_layout()
# Save figure to file
fig.savefig(out_path+'Map_SMOS_OmF_'+expid+'_'+start_time.strftime('%Y%m')+'_'+\
                    end_time.strftime('%Y%m')+'.png')
plt.show()
plt.close(fig)

In [None]:
# Plotting
fig, axes = plt.subplots(2,2, figsize=(18,10))
plt.rcParams.update({'font.size':14})

Nobs_data = loaded_group_metrics['SMAP']['Nobs_data']
OmF_mean = loaded_group_metrics['SMAP']['OmF_mean']
OmF_stdv = loaded_group_metrics['SMAP']['OmF_stdv']
OmF_norm_mean = loaded_group_metrics['SMAP']['OmF_norm_mean']
OmF_norm_stdv = loaded_group_metrics['SMAP']['OmF_norm_stdv']

for i in np.arange(2):
    for j in np.arange(2):
        units = '[k]'
        if i == 0 and j == 0:
            tile_data = Nobs_data
            # crange is [cmin, cmax]
            crange =[0, 7000] #np.ceil((end_time-start_time).days/150)*300]
            colormap = plt.get_cmap('jet',20)
            title_txt = expid + ' SMAP Tb Nobs '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
            units = '[-]'
        if i == 0 and j ==1:
            tile_data = OmF_mean
            crange =[-10, 10]
            colormap = plt.get_cmap('bwr', 15) 
            title_txt = expid + ' SMAP Tb O-F mean '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
        if i == 1 and j == 0:
            tile_data = OmF_stdv
            crange =[0, 15]
            colormap = plt.get_cmap ('jet',15)
            title_txt = expid + ' SMAP Tb O-F stdv '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
        if i == 1 and j == 1:
            tile_data = OmF_norm_stdv
            crange =[0, 15]
            colormap = plt.get_cmap ('jet',15)
            title_txt = expid + ' SMAP Tb normalized O-F stdv '+ start_time.strftime('%Y%m%d')+'_'+end_time.strftime('%Y%m%d')

        colormap.set_bad(color='0.9') # light grey, 0-black, 1-white

        # Regrid 1d tile_data to 2d grid_data for map plots
        if '_M09_' in domain: # special case  
            grid_data_M09 = np.zeros((1624, 3856)) + np.nan  
            grid_data_M09[tc['j_indg'],tc['i_indg']] = tile_data
            
            # Reshape the data into 4x4 blocks
            reshaped = grid_data_M09.reshape(1624//4, 4, 3856//4, 4)

            # Combine each 4x4 M09 block into a M36 grid
            if i==0 and j==0:
                grid_data = np.sum(reshaped,axis=(1, 3)) 
            else:
                grid_data = np.nanmean(reshaped,axis=(1, 3))
                
            lat_M36, lon_M36 = smapeasev2_ind2latlon(np.arange(406), np.arange(964),'M36')
            lon_2d,lat_2d = np.meshgrid(lon_M36,lat_M36)
        else:
            grid_data, uy,ux = array2grid(tile_data, lat = tc['com_lat'], lon = tc['com_lon'])
            lon_2d,lat_2d = np.meshgrid(ux, uy)
            
        if 'normalized' in title_txt:
            title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (np.nanmean(grid_data), np.nanmean(np.abs(grid_data-1.)))+' '+units
        elif 'mean' in title_txt:
            title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f" % (np.nanmean(grid_data), np.nanmean(np.abs(grid_data)))+' '+units
        else:
            title_txt = title_txt + '\n' + "avg=%.2f" % (np.nanmean(grid_data)) +' '+units                

        if 'normalized' in title_txt:
            grid_data = np.log10(grid_data)
            crange = [-0.6, 0.45]

        # Print the mean, min and max values of the grid data
        print('Mean of grid_data:', np.nanmean(grid_data))
        print('Min of grid_data:', np.nanmin(grid_data))
        print('Max of grid_data:', np.nanmax(grid_data))            
            
        mm, cs = plotMap(grid_data, ax =axes[i,j], lat=lat_2d, lon=lon_2d, cRange=crange, \
                    title=title_txt, cmap=colormap, bounding=[-60, 80, -180,180])            

plt.tight_layout()
# Save figure to file
fig.savefig(out_path+'Map_SMAP_OmF_'+expid+'_'+start_time.strftime('%Y%m')+'_'+\
                    end_time.strftime('%Y%m')+'.png')
plt.show()
plt.close(fig)

In [None]:
# Plotting
fig, axes = plt.subplots(2,2, figsize=(18,10))
plt.rcParams.update({'font.size':14})

Nobs_data = loaded_group_metrics['ASCAT']['Nobs_data']
OmF_mean = loaded_group_metrics['ASCAT']['OmF_mean']
OmF_stdv = loaded_group_metrics['ASCAT']['OmF_stdv']
OmF_norm_mean = loaded_group_metrics['ASCAT']['OmF_norm_mean']
OmF_norm_stdv = loaded_group_metrics['ASCAT']['OmF_norm_stdv']

for i in np.arange(2):
    for j in np.arange(2):
        units = '[m3/m3]'
        if i == 0 and j == 0:
            tile_data = Nobs_data
            # crange is [cmin, cmax]
            crange =[0, 12000] #np.ceil((end_time-start_time).days/150)*300]
            colormap = plt.get_cmap('jet',20)
            title_txt = expid + ' ASCAT SM Nobs '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
            units = '[-]'
        if i == 0 and j ==1:
            tile_data = OmF_mean
            crange =[-0.05, 0.05]
            colormap = plt.get_cmap('bwr', 15) 
            title_txt = expid + ' ASCAT SM O-F mean '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
        if i == 1 and j == 0:
            tile_data = OmF_stdv
            crange =[0, 0.1]
            colormap = plt.get_cmap ('jet',15)
            title_txt = expid + ' ASCAT SM O-F stdv '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
        if i == 1 and j == 1:
            tile_data = OmF_norm_stdv
            crange =[0, 15]
            colormap = plt.get_cmap ('jet',15)
            title_txt = expid + ' ASCAt SM normalized O-F stdv '+ start_time.strftime('%Y%m%d')+'_'+end_time.strftime('%Y%m%d')

        colormap.set_bad(color='0.9') # light grey, 0-black, 1-white

        # Regrid 1d tile_data to 2d grid_data for map plots
        if '_M09_' in domain: # special case  
            grid_data_M09 = np.zeros((1624, 3856)) + np.nan  
            grid_data_M09[tc['j_indg'],tc['i_indg']] = tile_data
            
            # Reshape the data into 4x4 blocks
            reshaped = grid_data_M09.reshape(1624//4, 4, 3856//4, 4)

            # Combine each 4x4 M09 block into a M36 grid
            if i==0 and j==0:
                grid_data = np.sum(reshaped,axis=(1, 3)) 
            else:
                grid_data = np.nanmean(reshaped,axis=(1, 3))
                
            lat_M36, lon_M36 = smapeasev2_ind2latlon(np.arange(406), np.arange(964),'M36')
            lon_2d,lat_2d = np.meshgrid(lon_M36,lat_M36)
        else:
            grid_data, uy,ux = array2grid(tile_data, lat = tc['com_lat'], lon = tc['com_lon'])
            lon_2d,lat_2d = np.meshgrid(ux, uy)
            
        if 'normalized' in title_txt:
            title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (np.nanmean(grid_data), np.nanmean(np.abs(grid_data-1.)))+' '+units
        elif 'mean' in title_txt:
            title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f" % (np.nanmean(grid_data), np.nanmean(np.abs(grid_data)))+' '+units
        else:
            title_txt = title_txt + '\n' + "avg=%.2f" % (np.nanmean(grid_data)) +' '+units                

        if 'normalized' in title_txt:
            grid_data = np.log10(grid_data)
            crange = [-0.6, 0.45]

        # Print the mean, min and max values of the grid data
        print('Mean of grid_data:', np.nanmean(grid_data))
        print('Min of grid_data:', np.nanmin(grid_data))
        print('Max of grid_data:', np.nanmax(grid_data))                
            
        mm, cs = plotMap(grid_data, ax =axes[i,j], lat=lat_2d, lon=lon_2d, cRange=crange, \
                    title=title_txt, cmap=colormap, bounding=[-60, 80, -180,180])            

plt.tight_layout()
# Save figure to file
fig.savefig(out_path+'Map_ASCAT_OmF_'+expid+'_'+start_time.strftime('%Y%m')+'_'+\
                    end_time.strftime('%Y%m')+'.png')
plt.show()
plt.close(fig)

In [None]:
# Plotting
fig, axes = plt.subplots(2,2, figsize=(18,10))
plt.rcParams.update({'font.size':14})

Nobs_data = loaded_group_metrics['MODIS']['Nobs_data']
OmF_mean = loaded_group_metrics['MODIS']['OmF_mean']
OmF_stdv = loaded_group_metrics['MODIS']['OmF_stdv']
OmF_norm_mean = loaded_group_metrics['MODIS']['OmF_norm_mean']
OmF_norm_stdv = loaded_group_metrics['MODIS']['OmF_norm_stdv']

for i in np.arange(2):
    for j in np.arange(2):
        units = '[frac.]'
        if i == 0 and j == 0:
            tile_data = Nobs_data
            # crange is [cmin, cmax]
            crange =[0, 20000] #np.ceil((end_time-start_time).days/150)*300]
            colormap = plt.get_cmap('jet',20)
            title_txt = expid + ' SCF Nobs '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
            units = '[-]'
        if i == 0 and j ==1:
            tile_data = OmF_mean
            crange =[-1, 1]
            colormap = plt.get_cmap('bwr', 15) 
            title_txt = expid + ' SCF O-F mean '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
        if i == 1 and j == 0:
            tile_data = OmF_stdv
            crange =[0, 0.5]
            colormap = plt.get_cmap ('jet',15)
            title_txt = expid + ' SCF O-F stdv '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
        if i == 1 and j == 1:
            tile_data = OmF_norm_stdv
            crange =[0, 15]
            colormap = plt.get_cmap ('jet',15)
            title_txt = expid + ' SCF normalized O-F stdv '+ start_time.strftime('%Y%m%d')+'_'+end_time.strftime('%Y%m%d')

        colormap.set_bad(color='0.9') # light grey, 0-black, 1-white

        # Regrid 1d tile_data to 2d grid_data for map plots
        if '_M09_' in domain: # special case  
            grid_data_M09 = np.zeros((1624, 3856)) + np.nan  
            grid_data_M09[tc['j_indg'],tc['i_indg']] = tile_data
            
            # Reshape the data into 4x4 blocks
            reshaped = grid_data_M09.reshape(1624//4, 4, 3856//4, 4)

            # Combine each 4x4 M09 block into a M36 grid
            if i==0 and j==0:
                grid_data = np.sum(reshaped,axis=(1, 3)) 
            else:
                grid_data = np.nanmean(reshaped,axis=(1, 3))
                
            lat_M36, lon_M36 = smapeasev2_ind2latlon(np.arange(406), np.arange(964),'M36')
            lon_2d,lat_2d = np.meshgrid(lon_M36,lat_M36)
        else:
            grid_data, uy,ux = array2grid(tile_data, lat = tc['com_lat'], lon = tc['com_lon'])
            lon_2d,lat_2d = np.meshgrid(ux, uy)
            
        if 'normalized' in title_txt:
            title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (np.nanmean(grid_data), np.nanmean(np.abs(grid_data-1.)))+' '+units
        elif 'mean' in title_txt:
            title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f" % (np.nanmean(grid_data), np.nanmean(np.abs(grid_data)))+' '+units
        else:
            title_txt = title_txt + '\n' + "avg=%.2f" % (np.nanmean(grid_data)) +' '+units                

        if 'normalized' in title_txt:
            grid_data = np.log10(grid_data)
            crange = [-0.6, 0.45]
            
        mm, cs = plotMap(grid_data, ax =axes[i,j], lat=lat_2d, lon=lon_2d, cRange=crange, \
                    title=title_txt, cmap=colormap, bounding=[-60, 80, -180,180])    

        # Print the mean, min and max values of the grid data
        print('Mean of grid_data:', np.nanmean(grid_data))
        print('Min of grid_data:', np.nanmin(grid_data))
        print('Max of grid_data:', np.nanmax(grid_data))                  

plt.tight_layout()
# Save figure to file
fig.savefig(out_path+'Map_SCF_OmF_'+expid+'_'+start_time.strftime('%Y%m')+'_'+\
                    end_time.strftime('%Y%m')+'.png')
plt.show()
plt.close(fig)

In [None]:
# Plotting
fig, axes = plt.subplots(2,2, figsize=(18,10))
plt.rcParams.update({'font.size':14})

Nobs_data = loaded_group_metrics['CYGNSS']['Nobs_data']
OmF_mean = loaded_group_metrics['CYGNSS']['OmF_mean']
OmF_stdv = loaded_group_metrics['CYGNSS']['OmF_stdv']
OmF_norm_mean = loaded_group_metrics['CYGNSS']['OmF_norm_mean']
OmF_norm_stdv = loaded_group_metrics['CYGNSS']['OmF_norm_stdv']

for i in np.arange(2):
    for j in np.arange(2):
        units = '[frac.]'
        if i == 0 and j == 0:
            tile_data = Nobs_data
            # crange is [cmin, cmax]
            crange =[0, 3000] #np.ceil((end_time-start_time).days/150)*300]
            colormap = plt.get_cmap('jet',20)
            title_txt = expid + ' CYGNSS Nobs '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
            units = '[-]'
        if i == 0 and j ==1:
            tile_data = OmF_mean
            crange =[-0.25, 0.25]
            colormap = plt.get_cmap('bwr', 15) 
            title_txt = expid + ' CYGNSS O-F mean '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
        if i == 1 and j == 0:
            tile_data = OmF_stdv
            crange =[0, 0.1]
            colormap = plt.get_cmap ('jet',15)
            title_txt = expid + ' CYGNSS O-F stdv '+ start_time.strftime('%Y%m')+'_'+end_time.strftime('%Y%m')
        if i == 1 and j == 1:
            tile_data = OmF_norm_stdv
            crange =[0, 15]
            colormap = plt.get_cmap ('jet',15)
            title_txt = expid + ' CYGNSS normalized O-F stdv '+ start_time.strftime('%Y%m%d')+'_'+end_time.strftime('%Y%m%d')

        colormap.set_bad(color='0.9') # light grey, 0-black, 1-white

        # Regrid 1d tile_data to 2d grid_data for map plots
        if '_M09_' in domain: # special case  
            grid_data_M09 = np.zeros((1624, 3856)) + np.nan  
            grid_data_M09[tc['j_indg'],tc['i_indg']] = tile_data
            
            # Reshape the data into 4x4 blocks
            reshaped = grid_data_M09.reshape(1624//4, 4, 3856//4, 4)

            # Combine each 4x4 M09 block into a M36 grid
            if i==0 and j==0:
                grid_data = np.sum(reshaped,axis=(1, 3)) 
            else:
                grid_data = np.nanmean(reshaped,axis=(1, 3))
                
            lat_M36, lon_M36 = smapeasev2_ind2latlon(np.arange(406), np.arange(964),'M36')
            lon_2d,lat_2d = np.meshgrid(lon_M36,lat_M36)
        else:
            grid_data, uy,ux = array2grid(tile_data, lat = tc['com_lat'], lon = tc['com_lon'])
            lon_2d,lat_2d = np.meshgrid(ux, uy)
            
        if 'normalized' in title_txt:
            title_txt = title_txt + '\n' + "avg=%.3f, avg(abs(nstdv-1))=%.3f" % (np.nanmean(grid_data), np.nanmean(np.abs(grid_data-1.)))+' '+units
        elif 'mean' in title_txt:
            title_txt = title_txt + '\n' + "avg=%.3f, avg(abs)=%.3f" % (np.nanmean(grid_data), np.nanmean(np.abs(grid_data)))+' '+units
        else:
            title_txt = title_txt + '\n' + "avg=%.2f" % (np.nanmean(grid_data)) +' '+units                

        if 'normalized' in title_txt:
            grid_data = np.log10(grid_data)
            crange = [-0.6, 0.45]
            
        mm, cs = plotMap(grid_data, ax =axes[i,j], lat=lat_2d, lon=lon_2d, cRange=crange, \
                    title=title_txt, cmap=colormap, bounding=[-60, 80, -180,180])    

        # Print the mean, min and max values of the grid data
        print('Mean of grid_data:', np.nanmean(grid_data))
        print('Min of grid_data:', np.nanmin(grid_data))
        print('Max of grid_data:', np.nanmax(grid_data))                  

plt.tight_layout()
# Save figure to file
fig.savefig(out_path+'Map_SCF_OmF_'+expid+'_'+start_time.strftime('%Y%m')+'_'+\
                    end_time.strftime('%Y%m')+'.png')
plt.show()
plt.close(fig)

In [None]:
# Load the lists of monthly data from the file
expdir ='/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/CYGNSS_Experiments/DAv8_M36_Aus/' 
expid = 'DAv8_M36_Aus'
out_path = expdir+expid+'/output/'+domain+'/figures/'

with open(out_path + f'monthly_OmF_data_{expid}_{start_time.strftime("%Y%m%d")}_{end_time.strftime("%Y%m%d")}.pkl', 'rb') as f:
    monthly_data_da = pickle.load(f)

with open(out_path + f'group_metrics_{expid}_{start_time.strftime("%Y%m%d")}_{end_time.strftime("%Y%m%d")}.pkl', 'rb') as f:
    group_metrics_da = pickle.load(f)   

monthly_timestamps_da = monthly_data_da['monthly_timestamps']    

expdir ='/Users/amfox/Desktop/GEOSldas_diagnostics/test_data/CYGNSS_Experiments/OLv8_M36_Aus/' 
expid = 'OLv8_M36_Aus'
out_path = expdir+expid+'/output/'+domain+'/figures/'
# Load the lists of monthly data from the file
with open(out_path + f'monthly_OmF_data_{expid}_{start_time.strftime("%Y%m%d")}_{end_time.strftime("%Y%m%d")}.pkl', 'rb') as f:
    monthly_data_ol = pickle.load(f)

with open(out_path + f'group_metrics_{expid}_{start_time.strftime("%Y%m%d")}_{end_time.strftime("%Y%m%d")}.pkl', 'rb') as f:
    group_metrics_ol = pickle.load(f)

monthly_timestamps_ol = monthly_data_ol['monthly_timestamps']    



In [None]:
# Plot OmF_stdv for each species group in monthly_data_da
plt.figure(figsize=(10, 6))
for group, values in monthly_data_ol['OmF_stdv_all_months'].items():
    mean_value = np.nanmean([v for v in values if v != 0])
    plt.plot(monthly_timestamps_da, values, label=f'{group} (mean={mean_value:.3f})')
    print(f"{group} OmF StdDev: {values}")
plt.gcf().autofmt_xdate()
plt.xlabel('Month')
plt.ylabel('OmF Standard Deviation')
plt.title('OmF Standard Deviation for Each Species Group')
plt.legend()
plt.grid()
plt.show()

In [None]:
def group_species_data(monthly_data, species_groups):
    # Initialize grouped data dictionaries
    N_data_grouped = {group: [] for group in species_groups.keys()}
    OmF_mean_grouped = {group: [] for group in species_groups.keys()}
    OmF_stdv_grouped = {group: [] for group in species_groups.keys()}
    
    # For each month
    for month_idx in range(len(monthly_data['monthly_timestamps'])):
        # Process each group
        for group_name, species_indices in species_groups.items():
            # Sum N_data across species in group
            N_data_sum = sum(monthly_data['N_data_group_all_months'][species][month_idx] 
                           for species in species_indices)
            
            # Calculate weighted means for OmF metrics
            OmF_mean_weights = [monthly_data['N_data_group_all_months'][species][month_idx] 
                              for species in species_indices]
            OmF_mean_values = [monthly_data['OmF_mean_all_months'][species][month_idx] 
                             for species in species_indices]
            OmF_stdv_values = [monthly_data['OmF_stdv_all_months'][species][month_idx] 
                             for species in species_indices]
            
            # Calculate weighted means, handling zero weights
            if sum(OmF_mean_weights) > 0:
                weighted_OmF_mean = np.average(OmF_mean_values, weights=OmF_mean_weights)
                weighted_OmF_stdv = np.average(OmF_stdv_values, weights=OmF_mean_weights)
            else:
                weighted_OmF_mean = np.nan
                weighted_OmF_stdv = np.nan
            
            # Store results
            N_data_grouped[group_name].append(N_data_sum)
            OmF_mean_grouped[group_name].append(weighted_OmF_mean)
            OmF_stdv_grouped[group_name].append(weighted_OmF_stdv)
    
    return {
        'N_data_group_all_months': N_data_grouped,
        'OmF_mean_all_months': OmF_mean_grouped,
        'OmF_stdv_all_months': OmF_stdv_grouped,
        'monthly_timestamps': monthly_data['monthly_timestamps']
    }


In [None]:
# Usage
grouped_data_da = group_species_data(monthly_data_da, species_groups)
grouped_data_ol = group_species_data(monthly_data_ol, species_groups)

# Now you can use the grouped data with your existing plotting code
N_data_group_all_months_da = grouped_data_da['N_data_group_all_months']
N_data_group_all_months_ol = grouped_data_ol['N_data_group_all_months']
OmF_mean_all_months_da = grouped_data_da['OmF_mean_all_months']
OmF_mean_all_months_ol = grouped_data_ol['OmF_mean_all_months']
OmF_stdv_all_months_da = grouped_data_da['OmF_stdv_all_months']
OmF_stdv_all_months_ol = grouped_data_ol['OmF_stdv_all_months']
monthly_timestamps_da = grouped_data_da['monthly_timestamps']
monthly_timestamps_ol = grouped_data_ol['monthly_timestamps']

# Replace 0 with NaN in the data
for group, values in OmF_mean_all_months_da.items():
    OmF_mean_all_months_da[group] = [np.nan if v == 0 else v for v in values]

for group, values in OmF_stdv_all_months_da.items():
    OmF_stdv_all_months_da[group] = [np.nan if v == 0 else v for v in values]

for group, values in OmF_mean_all_months_ol.items():
    OmF_mean_all_months_ol[group] = [np.nan if v == 0 else v for v in values]

for group, values in OmF_stdv_all_months_ol.items():
    OmF_stdv_all_months_ol[group] = [np.nan if v == 0 else v for v in values]


In [None]:

# Plot N_data_group for each species group for both DA and OL experiments on the same plot
plt.figure(figsize=(10, 6))
for group, values in N_data_group_all_months_da.items():
    values_in_millions = [v / 1e6 for v in values]  # Convert to millions
    plt.plot(monthly_timestamps_da, values_in_millions, label=f'{group} DA')
    print(f"{group} DA N_data (in millions): {values_in_millions}")
for group, values in N_data_group_all_months_ol.items():
    values_in_millions = [v / 1e6 for v in values]  # Convert to millions
    plt.plot(monthly_timestamps_ol, values_in_millions, label=f'{group} OL', linestyle='--')
    print(f"{group} OL N_data (in millions): {values_in_millions}")
plt.gcf().autofmt_xdate()
plt.xlabel('Month')
plt.ylabel('N_data (Millions)')
plt.title('Total N_data by Species Group (in Millions) for DA and OL')
plt.legend()
plt.grid()
plt.savefig(out_path + 'N_data_group_OLS_OL.png')  # Save the plot for all groups
plt.show()
plt.close()

# Plot OmF_mean for each species group for both DA and OL experiments on the same plot
for group, values in OmF_mean_all_months_da.items():
    plt.figure(figsize=(10, 6))
    mean_da = np.nanmean(values)
    mean_ol = np.nanmean(OmF_mean_all_months_ol[group])
    plt.plot(monthly_timestamps_da, values, label=f'{group} DA (mean={mean_da:.3f})')
    print(f"{group} DA OmF Mean: {values}, Mean: {mean_da:.3f}")
    plt.plot(monthly_timestamps_ol, OmF_mean_all_months_ol[group], label=f'{group} OL (mean={mean_ol:.3f})', linestyle='--')
    print(f"{group} OL OmF Mean: {OmF_mean_all_months_ol[group]}, Mean: {mean_ol:.3f}")
    plt.gcf().autofmt_xdate()
    plt.xlabel('Month')
    plt.ylabel('OmF Mean')
    plt.title(f'OmF Mean for {group} (DA vs OL)')
    plt.legend()
    plt.grid()
    plt.savefig(out_path + f'OmF_Mean_{group}_OLS_OL.png')  # Save the plot for each group
    plt.show()
    plt.close()

# Plot OmF_stdv for each species group for both DA and OL experiments on the same plot
for group, values in OmF_stdv_all_months_da.items():
    plt.figure(figsize=(10, 6))
    mean_da = np.nanmean(values)
    mean_ol = np.nanmean(OmF_stdv_all_months_ol[group])
    plt.plot(monthly_timestamps_da, values, label=f'{group} DA (mean={mean_da:.3f})')
    print(f"{group} DA OmF Stdv: {values}, Mean: {mean_da:.3f}")
    plt.plot(monthly_timestamps_ol, OmF_stdv_all_months_ol[group], label=f'{group} OL (mean={mean_ol:.3f})', linestyle='--')
    print(f"{group} OL OmF Stdv: {OmF_stdv_all_months_ol[group]}, Mean: {mean_ol:.3f}")
    plt.gcf().autofmt_xdate()
    plt.xlabel('Month')
    plt.ylabel('OmF Standard Deviation')
    plt.title(f'OmF Standard Deviation for {group} (DA vs OL)')
    plt.legend()
    plt.grid()
    plt.savefig(out_path + f'OmF_Stdv_{group}_OLS_OL.png')  # Save the plot for each group
    plt.show()
    plt.close()

# Plot N_data_group for each species group for both DA and OL experiments on separate plots
for group, values in N_data_group_all_months_da.items():
    plt.figure(figsize=(10, 6))
    values_in_millions_da = [v / 1e6 for v in values]  # Convert to millions
    values_in_millions_ol = [v / 1e6 for v in N_data_group_all_months_ol[group]]  # Convert to millions
    mean_da = np.nanmean(values_in_millions_da)
    mean_ol = np.nanmean(values_in_millions_ol)
    plt.plot(monthly_timestamps_da, values_in_millions_da, label=f'{group} DA (mean={mean_da:.3f})')
    print(f"{group} DA N_data (in millions): {values_in_millions_da}, Mean: {mean_da:.3f}")
    plt.plot(monthly_timestamps_ol, values_in_millions_ol, label=f'{group} OL (mean={mean_ol:.3f})', linestyle='--')
    print(f"{group} OL N_data (in millions): {values_in_millions_ol}, Mean: {mean_ol:.3f}")
    plt.gcf().autofmt_xdate()
    plt.xlabel('Month')
    plt.ylabel('N_data (Millions)')
    plt.title(f'Total N_data for {group} (DA vs OL)')
    plt.legend()
    plt.grid()
    plt.savefig(out_path + f'N_data_{group}_OLS_OL.png')  # Save the plot for each group
    plt.show()
    plt.close()  


In [None]:
import sys
sys.path.append('../Jupyter')
from mapper_functions import plot_global_tight_pcm, plot_aus_tight_pcm

lat = tc['com_lat']
lon = tc['com_lon']

# Determine the number of tiles based on the latitude array
n_tile = len(lat)

# Initialize an observation array with NaN values
# The array has dimensions [n_tile, 3], where:
# - Column 0 is reserved for future use
# - Column 1 stores longitude values
# - Column 2 stores latitude values
map_array = np.empty([n_tile, 3])
map_array.fill(np.nan)
map_array[:, 1] = lon
map_array[:, 2] = lat

In [None]:

group_id = 'SMOS'

# Avoid division by zero by replacing zeros in the denominator with np.nan
denominator = np.array(group_metrics_ol[group_id]['OmF_stdv'])
denominator[denominator == 0] = np.nan

# Compute the percentage difference safely
map_array[:, 0] = ((np.array(group_metrics_da[group_id]['OmF_stdv']) - denominator) / denominator) * 100

# Calculate max and min values, ignoring NaNs
maxval = np.nanmax(map_array[:, 0])
minval = np.nanmin(map_array[:, 0])

plot_global_tight_pcm(map_array, True, True, f'DA-OL relative delta StdDev of OmF:\n {group_id} Tb (Max: {maxval:.3g} Min: {minval:.3g})', '%', -60, 60)

plot_aus_tight_pcm(map_array, True, True, f'DA-OL relative delta StdDev of OmF:\n {group_id} Tb (Max: {maxval:.3g} Min: {minval:.3g})', '%', -60, 60)


In [None]:

group_id = 'SMAP'

# Avoid division by zero by replacing zeros in the denominator with np.nan
denominator = np.array(group_metrics_ol[group_id]['OmF_stdv'])
denominator[denominator == 0] = np.nan

# Compute the percentage difference safely
map_array[:, 0] = ((np.array(group_metrics_da[group_id]['OmF_stdv']) - denominator) / denominator) * 100

# Calculate max and min values, ignoring NaNs
maxval = np.nanmax(map_array[:, 0])
minval = np.nanmin(map_array[:, 0])

plot_global_tight_pcm(map_array, True, True, f'DA-OL relative delta StdDev of OmF:\n {group_id} Tb (Max: {maxval:.3g} Min: {minval:.3g})', '%', -60, 60)

plot_aus_tight_pcm(map_array, True, True, f'DA-OL relative delta StdDev of OmF:\n {group_id} Tb (Max: {maxval:.3g} Min: {minval:.3g})', '%', -60, 60)


In [None]:
group_id = 'ASCAT'

# Avoid division by zero by replacing zeros in the denominator with np.nan
denominator = np.array(group_metrics_ol[group_id]['OmF_stdv'])
denominator[denominator == 0] = np.nan

# Compute the percentage difference safely
map_array[:, 0] = ((np.array(group_metrics_da[group_id]['OmF_stdv']) - denominator) / denominator) * 100

# Calculate max and min values, ignoring NaNs
maxval = np.nanmax(map_array[:, 0])
minval = np.nanmin(map_array[:, 0])

plot_global_tight_pcm(map_array, True, True, f'DA-OL relative delta StdDev of OmF:\n {group_id} SM (Max: {maxval:.3g} Min: {minval:.3g})', '%', -60, 60)

plot_aus_tight_pcm(map_array, True, True, f'DA-OL relative delta StdDev of OmF:\n {group_id} SM (Max: {maxval:.3g} Min: {minval:.3g})', '%', -60, 60)


In [None]:
group_id = 'CYGNSS'

# Avoid division by zero by replacing zeros in the denominator with np.nan
denominator = np.array(group_metrics_ol[group_id]['OmF_stdv'])
denominator[denominator == 0] = np.nan

# Compute the percentage difference safely
map_array[:, 0] = ((np.array(group_metrics_da[group_id]['OmF_stdv']) - denominator) / denominator) * 100

# Calculate max and min values, ignoring NaNs
maxval = np.nanmax(map_array[:, 0])
minval = np.nanmin(map_array[:, 0])

plot_global_tight_pcm(map_array, True, True, f'DA-OL relative delta StdDev of OmF:\n {group_id} SM (Max: {maxval:.3g} Min: {minval:.3g})', '%', -60, 60)

plot_aus_tight_pcm(map_array, True, True, f'DA-OL relative delta StdDev of OmF:\n {group_id} SM (Max: {maxval:.3g} Min: {minval:.3g})', '%', -60, 60)

mask = np.array(group_metrics_ol['SMAP']['OmF_stdv'])
mask = np.where(~np.isnan(mask), 1, np.nan)

map_array[:, 0] = map_array[:, 0]* mask
# Calculate max and min values, ignoring NaNs
maxval = np.nanmax(map_array[:, 0])
minval = np.nanmin(map_array[:, 0])

plot_aus_tight_pcm(map_array, True, True, f'DA-OL relative delta StdDev of OmF:\n {group_id} SM (SMAP "mask") (Max: {maxval:.3g} Min: {minval:.3g})', '%', -60, 60)

In [None]:
group_id = 'CYGNSS'

denominator = np.array(group_metrics_ol[group_id]['Nobs_data'])
denominator[denominator == 0] = np.nan

map_array[:, 0] = denominator

# Calculate max and min values, ignoring NaNs
maxval = np.nanmax(map_array[:, 0])
minval = np.nanmin(map_array[:, 0])

plot_aus_tight_pcm(map_array, True, True, f'Number of observations:\n {group_id} SM (Max: {maxval:.3g} Min: {minval:.3g})', '-', 0, 3000)

denominator = np.array(group_metrics_ol[group_id]['OmF_mean'])
denominator[denominator == 0] = np.nan

map_array[:, 0] = denominator

# Calculate max and min values, ignoring NaNs
maxval = np.nanmax(map_array[:, 0])
minval = np.nanmin(map_array[:, 0])

plot_aus_tight_pcm(map_array, True, True, f'OL OmF Mean:\n {group_id} SM (Max: {maxval:.3g} Min: {minval:.3g})', 'm³/m³', -0.3, 0.3)

denominator = np.array(group_metrics_da[group_id]['OmF_mean'])
denominator[denominator == 0] = np.nan

map_array[:, 0] = denominator

# Calculate max and min values, ignoring NaNs
maxval = np.nanmax(map_array[:, 0])
minval = np.nanmin(map_array[:, 0])

plot_aus_tight_pcm(map_array, True, True, f'DA OmF Mean:\n {group_id} SM (Max: {maxval:.3g} Min: {minval:.3g})', 'm³/m³', -0.3, 0.3)

denominator = np.array(group_metrics_ol[group_id]['OmF_stdv'])
denominator[denominator == 0] = np.nan

map_array[:, 0] = denominator

# Calculate max and min values, ignoring NaNs
maxval = np.nanmax(map_array[:, 0])
minval = np.nanmin(map_array[:, 0])

plot_aus_tight_pcm(map_array, True, True, f'OL OmF StDv:\n {group_id} SM (Max: {maxval:.3g} Min: {minval:.3g})', 'm³/m³', 0, 0.1)

denominator = np.array(group_metrics_da[group_id]['OmF_stdv'])
denominator[denominator == 0] = np.nan

map_array[:, 0] = denominator

# Calculate max and min values, ignoring NaNs
maxval = np.nanmax(map_array[:, 0])
minval = np.nanmin(map_array[:, 0])

plot_aus_tight_pcm(map_array, True, True, f'DA OmF StDv:\n {group_id} SM (Max: {maxval:.3g} Min: {minval:.3g})', 'm³/m³', 0., 0.1)



In [None]:

denominator = np.array(group_metrics_ol[group_id]['OmF_stdv'])
denominator[denominator == 0] = np.nan

map_array[:, 0] = (np.array(group_metrics_da[group_id]['OmF_stdv']) - denominator) / denominator

# Calculate max and min values, ignoring NaNs
maxval = np.nanmax(map_array[:, 0])
minval = np.nanmin(map_array[:, 0])

plot_aus_tight_pcm(map_array, True, True, f'(DA - OL OmF StDv)/OL:\n {group_id} SM (Max: {maxval:.3g} Min: {minval:.3g})', '-', -1, 1)