In [None]:
"""
Sample script for computing/plotting data assimilation (DA) diagnostics from 
pre-saved monthly sums.

"""

import sys;       sys.path.append('../../shared/python/')
import warnings;  warnings.filterwarnings("ignore")
import os

import numpy             as np
import matplotlib.pyplot as plt

from datetime               import datetime, timedelta
from dateutil.relativedelta import relativedelta
from netCDF4                import Dataset, num2date
# from mpl_toolkits.basemap   import Basemap

from read_GEOSldas          import read_tilecoord, read_obs_param
from tile2grid              import tile2grid
#from plot                   import plotMap
from EASEv2                 import EASEv2_ind2latlon

from postproc_ObsFcstAna    import postproc_ObsFcstAna

# Uncomment if to run the script in the background to see the standard output while running 
# import io
#sys.stdout = io.TextIOWrapper(open(sys.stdout.fileno(), 'wb', 0), write_through=True)
#sys.stderr = io.TextIOWrapper(open(sys.stderr.fileno(), 'wb', 0), write_through=True)

In [None]:
# User provided time range for processing (Year, Month, Day)
start_time = datetime(2002,10,1)
end_time   = datetime(2006,10,1)

# -------------------------------- Experiments Information -----------------------------------------
# Supports single experiment or multiple experiments.
# All experiments must have identical tilecoords and number/order of observation species.
# If the default "species" number/order do not match, need to set the *optional*
#   "select_species" key to get a match, i.e. same species sequences.
# This capability is required to enable calculating OmF/OmA statistics for one experiment
#   using observations from another experiment. See note below.

# exp_main = { 'expdir' : '/discover/nobackup/projects/land_da/CYGNSS_Experiments/OLv8_M36_Aus/',
#                     'expid' : 'OLv8_M36_Aus',
#                     'exptag': 'OL', 
#                     'domain':  'SMAP_EASEv2_M36_GLOBAL',
#                     'da_t0' : 3,       # first hour of the month 
#                     'da_dt' : 10800}   # ObsFcstAna file interval in seconds

# exp_sup1 = { 'expdir' : '/discover/nobackup/projects/land_da/CYGNSS_Experiments/DAv8_M36_Aus_v3/',
#                     'expid' : 'DAv8_M36_Aus',
#                     'exptag': 'DA', 
#                     'domain':  'SMAP_EASEv2_M36_GLOBAL',
#                     'da_t0' : 3,       # first hour of the month 
#                     'da_dt' : 10800}   # ObsFcstAna file interval in seconds

exp_main = { 'expdir' : '/discover/nobackup/projects/land_da/snow_qc_expts/1e_LS_DAv8_M36_0/',
                    'expid' : '1e_LS_OLv8_M36_0',
                    'exptag': 'OL', 
                    'domain':  'SMAP_EASEv2_M36_GLOBAL',
                    'da_t0' : 3,       # first hour of the month 
                    'da_dt' : 10800}   # ObsFcstAna file interval in second

exp_sup1 = { 'expdir' : '/discover/nobackup/projects/land_da/snow_qc_expts/1e_LS_DAv8_M36_0/',
                    'expid' : '1e_LS_DAv8_M36_0',
                    'exptag': 'DA', 
                    'domain':  'SMAP_EASEv2_M36_GLOBAL',
                    'da_t0' : 3,       # first hour of the month 
                    'da_dt' : 10800}   # ObsFcstAna file interval in seconds


# Uses forecasts/analyses from first experiment in list.
# Observations from experiment specified by 'obs_from' index.
# The mostly likely use case for this is that _scaled_ observations from a DA experiment
#   are used to compute OmF etc diagnostics for a corresponding open loop experiment.

exp_list = [exp_main, exp_sup1]
obs_from = 1                            # obs is from "exp_sup1" (0-based indexing)
if obs_from >= len(exp_list):
    raise ValueError('Invalid "obs_from" value')


# User provided monthly sum files directory
monthly_sums_path = exp_list[0]['expdir']+exp_list[0]['expid']+ \
              '/output/'+exp_list[0]['domain']+'/ana/ens_avg/'

# User provided output directory 
out_path = exp_list[0]['expdir']+exp_list[0]['expid']+ \
              '/output/'+exp_list[0]['domain']+'/figures/'
os.makedirs(out_path, exist_ok=True)

In [None]:
# Add tilecoord and obs_param information to each experiment
for exp in exp_list:
    expdir   = exp['expdir']
    expid    = exp['expid']
    domain   = exp['domain']
    fop      = expdir+expid+'/output/'+domain+'/rc_out/Y'+start_time.strftime('%Y')+'/M'+start_time.strftime('%m')+'/'+expid+'.ldas_obsparam.'+start_time.strftime('%Y%m%d')+'_0000z.txt'
    obsparam = read_obs_param(fop)

    # get the species list and default to list of all species if doesn't exist 
    species_list = exp.get('species_list',[int(obsparam[i]['species']) for i in range(len(obsparam))])
    
    # reorder obsparam to match across experiments
    obsparam_new = []
    for i in range(len(obsparam)):
        if int(obsparam[i]['species']) in species_list:
               obsparam_new.append(obsparam[i])              
    obsparam = obsparam_new
    
    ftc = expdir+expid+'/output/'+ domain+'/rc_out/'+ expid+'.ldas_tilecoord.bin'
    tc = read_tilecoord(ftc)

    exp.update({'tilecoord':tc,'obsparam':obsparam})

In [None]:
if len(exp_list) >1 :
    stats_file  = out_path + 'tmp_stats_'+exp_list[0]['exptag']+'_obsfrom_'+ \
                  exp_list[obs_from]['exptag']+'_'+start_time.strftime('%Y%m%d')+'_'+ \
                  end_time.strftime('%Y%m%d')+'.nc4'
else:
    stats_file  = out_path + 'tmp_stats_'+exp_list[0]['exptag']+'_'+ start_time.strftime('%Y%m%d')+'_'+ \
                  end_time.strftime('%Y%m%d')+'.nc4'
    
print('stats_file:', stats_file)    

In [None]:
#  Postprocess raw ObsFcstAna output data into monthly sums for simpler and faster postprocessing;
#  computes mean, variance from monthly sums that can be used to compute DA diagnostics directly

if not os.path.isfile(stats_file):
    # Initialize the postprocessing object
    postproc = postproc_ObsFcstAna(exp_list, start_time, end_time, obs_from=obs_from)
    # Step 1: Compute and save monthly sums 
    postproc.save_monthly_sum(monthly_sums_path)
    # Step 2: Compute statistics from monthly sums, option to save result to file
    stats = postproc.calculate_stats_from_sums(mo_path=monthly_sums_path, write_to_nc=True, filename=stats_file)
else:
    print('reading stats nc4 file '+stats_file)
    stats = {}
    with Dataset(stats_file,'r') as nc:
        for key, value in nc.variables.items():
            stats[key] = value[:].filled(np.nan)

In [None]:
# Sample of final compuation of selected diagnostic metrics 
 
Nmin = 20

# Then computer metrics of O-F, O-A, etc. based on above computed
N_data = stats['N_data']
O_mean = stats['obs_mean']
# mean(x-y) = E[x] - E[y]   
OmF_mean = stats['obs_mean'] - stats['fcst_mean']
OmA_mean = stats['obs_mean'] - stats['ana_mean']
# var(x-y) = var(x) + var(y) - 2cov(x,y)
# cov(x,y) = E[xy] - E[x]E[y]
OmF_stdv  = np.sqrt(stats['obs_variance'] + stats['fcst_variance'] - \
                    2 * (stats['oxf_mean'] - stats['obs_mean']*stats['fcst_mean']))
                    
OmA_stdv  = np.sqrt(stats['obs_variance'] + stats['ana_variance'] - \
                    2 * (stats['oxa_mean'] - stats['obs_mean']*stats['ana_mean']))

 # "fcstvar" is assumed constant here for convenience. Modify if necessary
OmF_norm_mean = OmF_mean / np.sqrt(stats['obsvar_mean'] + stats['fcstvar_mean']) 
OmF_norm_stdv = np.sqrt(OmF_stdv**2 / (stats['obsvar_mean'] + stats['fcstvar_mean']) )
  
# Mask out data points with insufficent observations using the Nmin threshold
# Do NOT apply to N_data
OmF_mean[     N_data < Nmin] = np.nan
OmF_stdv[     N_data < Nmin] = np.nan
OmF_norm_mean[N_data < Nmin] = np.nan
OmF_norm_stdv[N_data < Nmin] = np.nan
OmA_mean[     N_data < Nmin] = np.nan
OmA_stdv[     N_data < Nmin] = np.nan
N_data[       N_data < Nmin] = 0

In [None]:
def process_obsparam(obsparam):
    # Create groups based on instrument types
    groups = {
        'SMOS': [],    # SMOS Tb observations
        'SMAP': [],    # SMAP Tb observations  
        'ASCAT': [],   # ASCAT soil moisture
        'MODIS': []    # MODIS snow cover
    }
    
    # Map each species to its group based on description
    for param in obsparam:
        if 'SMOS_fit_Tb' in param['descr']:
            groups['SMOS'].append(int(param['species']))
        elif 'SMAP_L1C_Tb' in param['descr']:
            groups['SMAP'].append(int(param['species']))
        elif 'ASCAT' in param['descr']:
            groups['ASCAT'].append(int(param['species']))
        elif 'MOD10C1' in param['descr'] or 'MYD10C1' in param['descr']:
            groups['MODIS'].append(int(param['species']))
    
    # Remove empty groups
    return {k:v for k,v in groups.items() if v}

sensor_groups = process_obsparam(obsparam)
print("Grouped species by sensor:")
for sensor, species_list in sensor_groups.items():
    print(f"{sensor}: Species {species_list}")

In [None]:
print("OmF_stdv.shape:", OmF_stdv.shape)