In [1]:
#!/usr/bin/env python
# coding: utf-8

# This script is used to compare two ensemble outputs (e.g., gauge-based GMET and NLDAS-based GMET)
import matplotlib
matplotlib.use('Agg')
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from   matplotlib.dates import DateFormatter
import os
import pandas as pd
import xarray as xr
import datetime

startTime = datetime.datetime.now()

def read_ens(out_forc_name_base, metric, start_yr, end_yr):
    for yr in range(start_yr, end_yr+1):        
        
        file = os.path.join(out_forc_name_base + '.' + str(yr) + '.'+metric+'.nc')
        f=xr.open_dataset(file)
        time = f['time'][:]
        pcp = f.variables['pcp'][:]
        tmean = f.variables['t_mean'][:]
        tmin = f.variables['t_min'][:]
        tmax = f.variables['t_max'][:]
        trange = f.variables['t_range'][:]
        
        if yr == start_yr:
            time_concat = time
            pcp_concat = pcp
            tmean_concat = tmean
            tmin_concat = tmin
            tmax_concat = tmax
            trange_concat = trange
        else:
            time_concat = np.concatenate((time_concat,time), axis=0) # (time)
            pcp_concat = np.concatenate((pcp_concat, pcp), axis=0) # (time,y,x)
            tmean_concat = np.concatenate((tmean_concat, tmean), axis=0)
            tmin_concat = np.concatenate((tmin_concat, tmin), axis=0)
            tmax_concat = np.concatenate((tmax_concat, tmax), axis=0)
            trange_concat = np.concatenate((trange_concat, trange), axis=0)
            
    time_concat = pd.DatetimeIndex(time_concat)
        
    return time_concat, pcp_concat, tmean_concat, tmin_concat, tmax_concat, trange_concat

# ========================================================================================================================
root_dir = '/glade/u/home/hongli/scratch/2020_04_21nldas_gmet'   
nldas_dir = os.path.join(root_dir,'data/nldas_daily_utc_convert')
start_yr = 2015
end_yr = 2016

gridinfo_file = os.path.join(root_dir,'data/nldas_topo/conus_ens_grid_eighth.nc')

result_dir = os.path.join(root_dir,'test_uniform_perturb')
test_folders = [d for d in os.listdir(result_dir)]
test_folders = sorted(test_folders)
subforlder = 'gmet_ens_summary'
file_basename = 'ens_forc'

time_format = '%Y-%m-%d'
plot_date_start = '2015-01-01'
plot_date_end = '2016-12-31'
plot_date_start_obj = datetime.datetime.strptime(plot_date_start, time_format)
plot_date_end_obj = datetime.datetime.strptime(plot_date_end, time_format)

formatter = DateFormatter('%Y/%m')
line_marker_size = 0.75
line_width = 0.5
line_alpha = 0.8
scatter_marker_size = 3
scatter_alpha = 0.8
bound_alpha = 0.8
dpi_value = 150

output_dir=os.path.join(root_dir,'scripts/step11_plot_temporal_NLDAS_ens')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

#======================================================================================================
print('Read gridinfo mask')
# get xy mask from gridinfo.nc
f_gridinfo = xr.open_dataset(gridinfo_file)
mask_xy = f_gridinfo['mask'].values[:] # (y, x). 1 is valid. 0 is invalid.
# data_mask = f_gridinfo['data_mask'].values[:] # (y, x). 1 is valid. 0 is invalid.

#======================================================================================================
# read historical nldas data
# read historical nldas data
print('Read nldas data')
for yr in range(start_yr, end_yr+1):
    
    nldas_file = 'NLDAS_'+str(yr)+'.nc'
    nldas_path = os.path.join(nldas_dir, nldas_file)
    
    f_nldas = xr.open_dataset(nldas_path)
    if yr == start_yr:
        pcp = f_nldas['pcp'].values[:] # (time, y, x). unit: mm/day
        t_mean = f_nldas['t_mean'].values[:] # (time, y, x). unit: degC
        t_min = f_nldas['t_min'].values[:] 
        t_max = f_nldas['t_max'].values[:]
        t_range = f_nldas['t_range'].values[:]
        time = f_nldas['time'].values[:]
    else:
        pcp = np.concatenate((pcp, f_nldas['pcp'].values[:]), axis = 0)
        t_mean = np.concatenate((t_mean, f_nldas['t_mean'].values[:]), axis = 0)
        t_min = np.concatenate((t_min, f_nldas['t_min'].values[:]), axis = 0)
        t_max = np.concatenate((t_max, f_nldas['t_max'].values[:]), axis = 0)
        t_range = np.concatenate((t_range, f_nldas['t_range'].values[:]), axis = 0)
        time = np.concatenate((time, f_nldas['time'].values[:]), axis = 0)

# get time mask from nldas data
time_obj = pd.to_datetime(time)
mask_t  = (time_obj >= plot_date_start_obj) & (time_obj <= plot_date_end_obj) 
time = time_obj[mask_t]

# time series mean
prcp_mean = np.nanmean(np.nanmean(pcp, axis=2), axis=1) #(time)
tmean_mean = np.nanmean(np.nanmean(t_mean, axis=2), axis=1) 
tmin_mean = np.nanmean(np.nanmean(t_min, axis=2), axis=1) 
tmax_mean = np.nanmean(np.nanmean(t_max, axis=2), axis=1) 
trange_mean = np.nanmean(np.nanmean(t_range, axis=2), axis=1)
del pcp,t_mean,t_min,t_max,t_range

#======================================================================================================
print('Plot')
# manual adjustment
vmin_prcp_mean = -1.0
vmax_prcp_mean = 20

vmin_tmean_mean = -12
vmax_tmean_mean = 35

vmin_trange_mean = 4
vmax_trange_mean = 15

# loop through all uniform tests
for test_folder in test_folders:
    
    print(test_folder)
    test_dir = os.path.join(result_dir, test_folder)
    fig_title= test_folder
    
    # read ensemble mean    
    output_namebase = os.path.join(test_dir,subforlder, file_basename)
    metric = 'ensmean'
    time_ensmean, pcp_ensmean, tmean_ensmean, tmin_ensmean, tmax_ensmean, trange_ensmean = read_ens(output_namebase, metric, start_yr, end_yr)

    # read ensemble lower bound (5th percentile)  
    output_namebase = os.path.join(test_dir,subforlder, file_basename)
    metric = 'enspctl.5'
    time_enslb, pcp_enslb, tmean_enslb, tmin_enslb, tmax_enslb, trange_enslb = read_ens(output_namebase, metric, start_yr, end_yr)

    # read ensemble upper bound (5th percentile)  
    output_namebase = os.path.join(test_dir,subforlder, file_basename)
    metric = 'enspctl.95'
    time_ensub, pcp_ensub, tmean_ensub, tmin_ensub, tmax_ensub, trange_ensub = read_ens(output_namebase, metric, start_yr, end_yr)

    # convert masked values to nan
    pcp_ensmean=np.where(mask_xy==0,np.nan,pcp_ensmean)
    pcp_enslb=np.where(mask_xy==0,np.nan,pcp_enslb)
    pcp_ensub=np.where(mask_xy==0,np.nan,pcp_ensub)
    
    tmean_ensmean=np.where(mask_xy==0,np.nan,tmean_ensmean)
    tmean_enslb=np.where(mask_xy==0,np.nan,tmean_enslb)
    tmean_ensub=np.where(mask_xy==0,np.nan,tmean_ensub)
    
    tmin_ensmean=np.where(mask_xy==0,np.nan,tmin_ensmean)
    tmin_enslb=np.where(mask_xy==0,np.nan,tmin_enslb)
    tmin_ensub=np.where(mask_xy==0,np.nan,tmin_ensub)
    
    tmax_ensmean=np.where(mask_xy==0,np.nan,tmax_ensmean)
    tmax_enslb=np.where(mask_xy==0,np.nan,tmax_enslb)
    tmax_ensub=np.where(mask_xy==0,np.nan,tmax_ensub)
    
    trange_ensmean=np.where(mask_xy==0,np.nan,trange_ensmean)
    trange_enslb=np.where(mask_xy==0,np.nan,trange_enslb)
    trange_ensub=np.where(mask_xy==0,np.nan,trange_ensub)

    # define plot mask for nldas ensemble
    mask_ens_t = (time_ensmean>=plot_date_start_obj) & (time_ensmean<=plot_date_end_obj)
    time_ens = time_ensmean[mask_ens_t]
        
    # calculate mean and bounds
    pcp_ensmean = np.nanmean(np.nanmean(pcp_ensmean[mask_ens_t,:,:], axis=2),axis=1) # shape (time)
    pcp_enslb = np.nanmean(np.nanmean(pcp_enslb[mask_ens_t,:,:], axis=2),axis=1) # shape (time)
    pcp_ensub = np.nanmean(np.nanmean(pcp_ensub[mask_ens_t,:,:], axis=2),axis=1)

    tmean_ensmean = np.nanmean(np.nanmean(tmean_ensmean[mask_ens_t,:,:], axis=2),axis=1) 
    tmean_enslb = np.nanmean(np.nanmean(tmean_enslb[mask_ens_t,:,:], axis=2),axis=1) 
    tmean_ensub = np.nanmean(np.nanmean(tmean_ensub[mask_ens_t,:,:], axis=2),axis=1) 

    tmin_ensmean = np.nanmean(np.nanmean(tmin_ensmean[mask_ens_t,:,:], axis=2),axis=1) 
    tmin_enslb = np.nanmean(np.nanmean(tmin_enslb[mask_ens_t,:,:], axis=2),axis=1) 
    tmin_ensub = np.nanmean(np.nanmean(tmin_ensub[mask_ens_t,:,:], axis=2),axis=1) 

    tmax_ensmean = np.nanmean(np.nanmean(tmax_ensmean[mask_ens_t,:,:], axis=2),axis=1) 
    tmax_enslb = np.nanmean(np.nanmean(tmax_enslb[mask_ens_t,:,:], axis=2),axis=1) 
    tmax_ensub = np.nanmean(np.nanmean(tmax_ensub[mask_ens_t,:,:], axis=2),axis=1) 

    trange_ensmean = np.nanmean(np.nanmean(trange_ensmean[mask_ens_t,:,:], axis=2),axis=1) 
    trange_enslb = np.nanmean(np.nanmean(trange_enslb[mask_ens_t,:,:], axis=2),axis=1) 
    trange_ensub = np.nanmean(np.nanmean(trange_ensub[mask_ens_t,:,:], axis=2),axis=1) 
    
    # plot
    nrow=5 # pcp, tmean, tmin, tmax, trange
    ncol=3 # time series, Q-Q plot
    fig = plt.figure(constrained_layout=False)
    fig.set_figwidth(3.54*ncol) 
    fig.set_figheight(3.54*0.75*nrow)
#     fig.suptitle(fig_title, fontsize='medium', fontweight='semibold', color='g')
    
    mpl.rcParams['savefig.pad_inches'] = 0 # remove any padding from the edges of the figure when saved by savefig
    fig.suptitle(fig_title, fontsize='medium', fontweight='semibold', color='g', y=1.0)
    
    gs = fig.add_gridspec(nrow, ncol)
    ax00 = fig.add_subplot(gs[0, 0:2])
    ax01 = fig.add_subplot(gs[0, 2])
    ax10 = fig.add_subplot(gs[1, 0:2])
    ax11 = fig.add_subplot(gs[1, 2])
    ax20 = fig.add_subplot(gs[2, 0:2])
    ax21 = fig.add_subplot(gs[2, 2])
    ax30 = fig.add_subplot(gs[3, 0:2])
    ax31 = fig.add_subplot(gs[3, 2])
    ax40 = fig.add_subplot(gs[4, 0:2])
    ax41 = fig.add_subplot(gs[4, 2])
    
    # NLDAS vs. Ensemble [PCP]
    ax00.plot_date(time, prcp_mean, 'b-o', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='NLDAS') 
    ax00.plot_date(time_ens, pcp_ensmean, 'r-^', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='Ens Mean')
    ax00.fill_between(time_ens, pcp_enslb, pcp_ensub, linewidth=0, facecolor='grey', alpha=bound_alpha, label='Ens 90% unc bounds')
    ax01.scatter(prcp_mean, pcp_ensmean, s=scatter_marker_size, c='k', marker='o', edgecolors='None', alpha=scatter_alpha)
    
    # NLDAS vs. Ensemble [TMEAN]
    ax10.plot_date(time, tmean_mean, 'b-o', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='NLDAS') 
    ax10.plot_date(time_ens, tmean_ensmean, 'r-^', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='Ens Mean')
    ax10.fill_between(time_ens, tmean_enslb, tmean_ensub, linewidth=0, facecolor='grey', alpha=bound_alpha, label='Ens 90% unc bounds')
    ax11.scatter(tmean_mean, tmean_ensmean, s=scatter_marker_size, c='k', marker='o', edgecolors='None', alpha=scatter_alpha)
    
    # NLDAS vs. Ensemble [TMIN]
    ax20.plot_date(time, tmin_mean, 'b-o', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='NLDAS') 
    ax20.plot_date(time_ens, tmin_ensmean, 'r-^', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='Ens Mean')
    ax20.fill_between(time_ens, tmin_enslb, tmin_ensub, linewidth=0, facecolor='grey', alpha=bound_alpha, label='Ens 90% unc bounds')
    ax21.scatter(tmin_mean, tmin_ensmean, s=scatter_marker_size, c='k', marker='o', edgecolors='None', alpha=scatter_alpha)
    
    # NLDAS vs. Ensemble [TMAX]
    ax30.plot_date(time, tmax_mean, 'b-o', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='NLDAS') 
    ax30.plot_date(time_ens, tmax_ensmean, 'r-^', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='Ens Mean')
    ax30.fill_between(time_ens, tmax_enslb, tmax_ensub, linewidth=0, facecolor='grey', alpha=bound_alpha, label='Ens 90% unc bounds')
    ax31.scatter(tmax_mean, tmax_ensmean, s=scatter_marker_size, c='k', marker='o', edgecolors='None', alpha=scatter_alpha)
    
    # NLDAS vs. Ensemble [TRANGE]
    ax40.plot_date(time, trange_mean, 'b-o', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='NLDAS') 
    ax40.plot_date(time_ens, trange_ensmean, 'r-^', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='Ens Mean')
    ax40.fill_between(time_ens, trange_enslb, trange_ensub, linewidth=0, facecolor='grey', alpha=bound_alpha, label='Ens 90% unc bounds')
    ax41.scatter(trange_mean, trange_ensmean, s=scatter_marker_size, c='k', marker='o', edgecolors='None', alpha=scatter_alpha)
        
    # 45 degree line in Q-Qplot
    axes = [ax01, ax11, ax21, ax31, ax41]
    for ax in axes:
        left, right = ax.get_xlim()
        bottom, top = ax.get_ylim()
        ax_min = min([left, bottom])
        ax_max = max([right, top])
        ax.set_xlim([ax_min, ax_max])
        ax.set_ylim([ax_min, ax_max])
        ax.plot([ax_min, ax_max],[ax_min, ax_max],color='grey',linewidth=1.0)
    
     # title
    ax00_title_str = '(a) Daily Precipitation' 
    ax10_title_str = '(b) Daily Mean Temperature' 
    ax20_title_str = '(c) Daily Min Temperature'     
    ax30_title_str = '(c) Daily Max Temperature'     
    ax40_title_str = '(c) Daily Temperature Range'     
    axes_title_str=[ax00_title_str, ax10_title_str, ax20_title_str, ax30_title_str, ax40_title_str]
    axes=[ax00, ax10, ax20, ax30, ax40]
    for i in range(len(axes)):
        ax=axes[i]
        title_str=axes_title_str[i]
        ax.set_title(title_str, fontsize='small', fontweight='semibold')
                
    # x-axis label and legend
    axes=[ax00, ax10, ax20, ax30, ax40]
    for ax in axes:
        ax.set_xlabel('Date', fontsize='small')
        ax.set_xlim(left=plot_date_start_obj, right=plot_date_end_obj)
        ax.xaxis.set_major_formatter(formatter)
        ax.xaxis.set_tick_params(labelsize='small')#rotation=30,
        ax.legend(loc='upper right', fontsize='small', framealpha=0.5) 
    
    # y-axis label and limit
    for ax in [ax00]:
        ax.set_ylabel('Precipitation (mm/day)', fontsize='small')
        ax.set_ylim(bottom=vmin_prcp_mean, top=vmax_prcp_mean)
    for ax in [ax10, ax20, ax30]:
        ax.set_ylabel('Temperature ($^\circ$C)', fontsize='small')
        ax.set_ylim(bottom=vmin_tmean_mean, top=vmax_tmean_mean)
    for ax in [ax40]:
        ax.set_ylabel('Temperature ($^\circ$C)', fontsize='small')
        ax.set_ylim(bottom=vmin_trange_mean, top=vmax_trange_mean)
    
    for ax in [ax01, ax11, ax21, ax31, ax41]:
        ax.set_xlabel('NLDAS', fontsize='small')
        ax.set_ylabel('Ens mean', fontsize='small')
    
    plt.rc('xtick',labelsize='small')
    plt.rc('ytick',labelsize='small') 
    
    fig.tight_layout()

    output_filename = test_folder+'.png'
    fig.savefig(os.path.join(output_dir, output_filename), dpi=dpi_value)
    plt.close(fig)
    
    del time_ensmean, pcp_ensmean, tmean_ensmean, trange_ensmean
    del time_enslb, pcp_enslb, tmean_enslb, trange_enslb
    del time_ensub, pcp_ensub, tmean_ensub, trange_ensub

print('Done')


  PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)


Read gridinfo mask
Read nldas data


  stack_char_dim=stack_char_dim, use_cftime=use_cftime)
  stack_char_dim=stack_char_dim, use_cftime=use_cftime)
  stack_char_dim=stack_char_dim, use_cftime=use_cftime)
  stack_char_dim=stack_char_dim, use_cftime=use_cftime)
  stack_char_dim=stack_char_dim, use_cftime=use_cftime)
  stack_char_dim=stack_char_dim, use_cftime=use_cftime)


Plot
00810grids



To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


00974grids




01225grids




01610grids




02251grids




03186grids




04951grids




08884grids




18074grids




Done
