In [4]:
#!/usr/bin/env python
# coding: utf-8

# This script is used to compare two ensemble outputs (e.g., gauge-based GMET and NLDAS-based GMET)
import matplotlib
matplotlib.use('Agg')
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from   matplotlib.dates import DateFormatter
import os
import pandas as pd
import xarray as xr
import datetime

startTime = datetime.datetime.now()

def read_ens(out_forc_name_base, metric, start_yr, end_yr):
    for yr in range(start_yr, end_yr+1):        
        
        file = os.path.join(out_forc_name_base + '.' + str(yr) + '.'+metric+'.nc')
        f=xr.open_dataset(file)
        time = f['time'][:]
        pcp = f.variables['pcp'][:]
        tmean = f.variables['t_mean'][:]
        trange = f.variables['t_range'][:]
        
        if yr == start_yr:
            time_concat = time
            pcp_concat = pcp
            tmean_concat = tmean
            trange_concat = trange
        else:
            time_concat = np.concatenate((time_concat,time), axis=0) # (time)
            pcp_concat = np.concatenate((pcp_concat, pcp), axis=0) # (time,y,x)
            tmean_concat = np.concatenate((tmean_concat, tmean), axis=0)
            trange_concat = np.concatenate((trange_concat, trange), axis=0)
            
    time_concat = pd.DatetimeIndex(time_concat)
        
    return time_concat, pcp_concat, tmean_concat, trange_concat

# ========================================================================================================================
root_dir = '/glade/u/home/hongli/scratch/2020_04_21nldas_gmet'   
nldas_dir = os.path.join(root_dir,'data/nldas_daily_utc')
start_yr = 2015
end_yr = 2016

gridinfo_file = os.path.join(root_dir,'scripts/conus_ens_grid_eighth_deg_v1p1.nc')

result_dir = os.path.join(root_dir,'test_uniform')
test_folders = [d for d in os.listdir(result_dir)]
test_folders = sorted(test_folders)
subforlder = 'gmet_ens_summary'
file_basename = 'ens_forc'

time_format = '%Y-%m-%d'
plot_date_start = '2015-01-01'
plot_date_end = '2016-12-31'
plot_date_start_obj = datetime.datetime.strptime(plot_date_start, time_format)
plot_date_end_obj = datetime.datetime.strptime(plot_date_end, time_format)

formatter = DateFormatter('%Y/%m')
line_marker_size = 0.75
line_width = 0.5
line_alpha = 0.8
scatter_marker_size = 3
scatter_alpha = 0.8
bound_alpha = 0.8
dpi_value = 150

output_dir=os.path.join(root_dir,'scripts/step11_plot_temporal_NLDAS_ens')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# #======================================================================================================
# print('Read gridinfo mask')
# # get xy mask from gridinfo.nc
# f_gridinfo = xr.open_dataset(gridinfo_file)
# mask_xy = f_gridinfo['mask'].values[:] # (y, x). 1 is valid. 0 is invalid.
# # data_mask = f_gridinfo['data_mask'].values[:] # (y, x). 1 is valid. 0 is invalid.

# #======================================================================================================
# # read historical nldas data
# print('Read nldas data')
# for yr in range(start_yr, end_yr+1):
    
#     nldas_file = 'NLDAS_'+str(yr)+'.nc'
#     nldas_path = os.path.join(nldas_dir, nldas_file)
    
#     f_nldas = xr.open_dataset(nldas_path)
#     if yr == start_yr:
#         prcp_avg = f_nldas['prcp_avg'].values[:] # (time, y, x). unit: kg/m^2 = mm
#         tair_min = f_nldas['tair_min'].values[:] # (time, y, x). unit: K
#         tair_max = f_nldas['tair_max'].values[:]
#         time = pd.to_datetime(f_nldas['time'].values[:]).strftime(time_format)
#     else:
#         prcp_avg = np.concatenate((prcp_avg, f_nldas['prcp_avg'].values[:]), axis = 0)
#         tair_min = np.concatenate((tair_min, f_nldas['tair_min'].values[:]), axis = 0)
#         tair_max = np.concatenate((tair_max, f_nldas['tair_max'].values[:]), axis = 0)
#         time = np.concatenate((time, pd.to_datetime(f_nldas['time'].values[:]).strftime(time_format)), axis = 0)

# # get time mask from nldas data
# time_obj = np.asarray([datetime.datetime.strptime(t, time_format) for t in time])
# mask_t  = (time_obj >= plot_date_start_obj) & (time_obj <= plot_date_end_obj) 
# time = time_obj[mask_t]
  
# # convert unit and calculate mean values
# prcp_sum = np.multiply(prcp_avg[mask_t,:,:], 24.0) #mm/hr to mm/day
# tair_min = np.subtract(tair_min[mask_t,:,:], 273.15)
# tair_max = np.subtract(tair_max[mask_t,:,:], 273.15)
 
# prcp_mean = np.nanmean(np.nanmean(prcp_sum, axis=2), axis=1) #(time)
# tmean_mean = np.nanmean(np.nanmean(0.5*(tair_min+tair_max), axis=2), axis=1) 
# trange_mean = np.nanmean(np.nanmean((tair_max-tair_min), axis=2), axis=1)
# del prcp_avg,tair_min,tair_max

#======================================================================================================
print('Plot')
# manual adjustment
vmin_prcp_mean = -3.0

# loop through all uniform tests
for test_folder in test_folders:
    
    print(test_folder)
    test_dir = os.path.join(result_dir, test_folder)
    fig_title= test_folder

    # read ensemble mean    
    output_namebase = os.path.join(test_dir,subforlder, file_basename)
    metric = 'ensmean'
    time_ensmean, pcp_ensmean, tmean_ensmean, trange_ensmean = read_ens(output_namebase, metric, start_yr, end_yr)

    # read ensemble lower bound (5th percentile)  
    output_namebase = os.path.join(test_dir,subforlder, file_basename)
    metric = 'enspctl.5'
    time_enslb, pcp_enslb, tmean_enslb, trange_enslb = read_ens(output_namebase, metric, start_yr, end_yr)

    # read ensemble upper bound (5th percentile)  
    output_namebase = os.path.join(test_dir,subforlder, file_basename)
    metric = 'enspctl.95'
    time_ensub, pcp_ensub, tmean_ensub, trange_ensub = read_ens(output_namebase, metric, start_yr, end_yr)

    # convert masked values to nan
    pcp_ensmean=np.where(mask_xy==0,np.nan,pcp_ensmean)
    pcp_enslb=np.where(mask_xy==0,np.nan,pcp_enslb)
    pcp_ensub=np.where(mask_xy==0,np.nan,pcp_ensub)
    
    tmean_ensmean=np.where(mask_xy==0,np.nan,tmean_ensmean)
    tmean_enslb=np.where(mask_xy==0,np.nan,tmean_enslb)
    tmean_ensub=np.where(mask_xy==0,np.nan,tmean_ensub)
    
    trange_ensmean=np.where(mask_xy==0,np.nan,trange_ensmean)
    trange_enslb=np.where(mask_xy==0,np.nan,trange_enslb)
    trange_ensub=np.where(mask_xy==0,np.nan,trange_ensub)

    # define plot mask for nldas ensemble
    mask_ens_t = (time_ensmean>=plot_date_start_obj) & (time_ensmean<=plot_date_end_obj)
    time_ens = time_ensmean[mask_ens_t]
        
    # calculate mean and bounds
    pcp_ensmean = np.nanmean(np.nanmean(pcp_ensmean[mask_ens_t,:,:], axis=2),axis=1) # shape (time)
    pcp_enslb = np.nanmean(np.nanmean(pcp_enslb[mask_ens_t,:,:], axis=2),axis=1) # shape (time)
    pcp_ensub = np.nanmean(np.nanmean(pcp_ensub[mask_ens_t,:,:], axis=2),axis=1)

    tmean_ensmean = np.nanmean(np.nanmean(tmean_ensmean[mask_ens_t,:,:], axis=2),axis=1) 
    tmean_enslb = np.nanmean(np.nanmean(tmean_enslb[mask_ens_t,:,:], axis=2),axis=1) 
    tmean_ensub = np.nanmean(np.nanmean(tmean_ensub[mask_ens_t,:,:], axis=2),axis=1) 

    trange_ensmean = np.nanmean(np.nanmean(trange_ensmean[mask_ens_t,:,:], axis=2),axis=1) 
    trange_enslb = np.nanmean(np.nanmean(trange_enslb[mask_ens_t,:,:], axis=2),axis=1) 
    trange_ensub = np.nanmean(np.nanmean(trange_ensub[mask_ens_t,:,:], axis=2),axis=1) 
    
    # plot
    nrow=3 # pcp, tmean, trange
    ncol=3 # time series, Q-Q plot
    fig = plt.figure(constrained_layout=False)
    fig.set_figwidth(3.54*ncol) 
    fig.set_figheight(3.54*0.75*nrow)
    fig.suptitle(fig_title, fontsize='medium', fontweight='semibold', color='g')
    
    mpl.rcParams['savefig.pad_inches'] = 0 # remove any padding from the edges of the figure when saved by savefig
    #     plt.annotate('Grid ('+str(y_id)+','+str(x_id)+')',(0.05,0.95),xycoords='figure fraction',fontsize='medium',fontweight='semibold')
    
    gs = fig.add_gridspec(nrow, ncol)
    ax00 = fig.add_subplot(gs[0, 0:2])
    ax01 = fig.add_subplot(gs[0, 2])
    ax10 = fig.add_subplot(gs[1, 0:2])
    ax11 = fig.add_subplot(gs[1, 2])
    ax20 = fig.add_subplot(gs[2, 0:2])
    ax21 = fig.add_subplot(gs[2, 2])
    
    # NLDAS vs. Ensemble [PCP]
    ax00.plot_date(time, prcp_mean, 'b-o', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='NLDAS') 
    ax00.plot_date(time_ens, pcp_ensmean, 'r-^', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='Ens Mean')
    ax00.fill_between(time_ens, pcp_enslb, pcp_ensub, linewidth=0, facecolor='grey', alpha=bound_alpha, label='Ens 90% unc bounds')
    ax01.scatter(prcp_mean, pcp_ensmean, s=scatter_marker_size, c='k', marker='o', edgecolors='None', alpha=scatter_alpha)
    
    # NLDAS vs. Ensemble [TMEAN]
    ax10.plot_date(time, tmean_mean, 'b-o', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='NLDAS') 
    ax10.plot_date(time_ens, tmean_ensmean, 'r-^', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='Ens Mean')
    ax10.fill_between(time_ens, tmean_enslb, tmean_ensub, linewidth=0, facecolor='grey', alpha=bound_alpha, label='Ens 90% unc bounds')
    ax11.scatter(tmean_mean, tmean_ensmean, s=scatter_marker_size, c='k', marker='o', edgecolors='None', alpha=scatter_alpha)
    
    # NLDAS vs. Ensemble [TRANGE]
    ax20.plot_date(time, trange_mean, 'b-o', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='NLDAS') 
    ax20.plot_date(time_ens, trange_ensmean, 'r-^', tz=None, linewidth=line_width, markersize=line_marker_size, alpha=line_alpha, label='Ens Mean')
    ax20.fill_between(time_ens, trange_enslb, trange_ensub, linewidth=0, facecolor='grey', alpha=bound_alpha, label='Ens 90% unc bounds')
    ax21.scatter(trange_mean, trange_ensmean, s=scatter_marker_size, c='k', marker='o', edgecolors='None', alpha=scatter_alpha)
        
    # 45 degree line in Q-Qplot
    axes = [ax01, ax11, ax21]
    for ax in axes:
        left, right = ax.get_xlim()
        bottom, top = ax.get_ylim()
        ax_min = min([left, bottom])
        ax_max = max([right, top])
        ax.set_xlim([ax_min, ax_max])
        ax.set_ylim([ax_min, ax_max])
        ax.plot([ax_min, ax_max],[ax_min, ax_max],color='grey',linewidth=1.0)
    
    # title
    ax00_title_str = '(a) Daily Precipitation' 
    ax10_title_str = '(b) Mean Temperature' 
    ax20_title_str = '(c) Temperature Range'     
    axes_title_str=[ax00_title_str, ax10_title_str, ax20_title_str]
    axes=[ax00, ax10, ax20]
    for i in range(len(axes)):
        ax=axes[i]
        title_str=axes_title_str[i]
        ax.set_title(title_str, fontsize='small', fontweight='semibold')
                
    # x-axis label and legend
    axes=[ax00, ax10, ax20]
    for ax in axes:
        ax.set_xlabel('Date', fontsize='small')
        ax.set_xlim(left=plot_date_start_obj, right=plot_date_end_obj)
        ax.xaxis.set_major_formatter(formatter)
        ax.xaxis.set_tick_params(labelsize='small')#rotation=30,
        ax.legend(loc='upper right', fontsize='small', framealpha=0.5) 
    
    # y-axis label and limit
    for ax in [ax00, ax10]:
        ax.set_ylabel('Precipitation (mm/day)', fontsize='small')
    for ax in [ax00]:
        ax.set_ylim(bottom=vmin_prcp_mean)
    for ax in [ax10, ax20]:
        ax.set_ylabel('Temperature ($^\circ$C)', fontsize='small')
    
    for ax in [ax01, ax11, ax21]:
        ax.set_xlabel('NLDAS', fontsize='small')
        ax.set_ylabel('Ens mean', fontsize='small')
    
    plt.rc('xtick',labelsize='small')
    plt.rc('ytick',labelsize='small') 
    
    fig.tight_layout()
    output_filename = test_folder+'.png'
    fig.savefig(os.path.join(output_dir, output_filename), dpi=dpi_value)
    plt.close(fig)
    
    del time_ensmean, pcp_ensmean, tmean_ensmean, trange_ensmean
    del time_enslb, pcp_enslb, tmean_enslb, trange_enslb
    del time_ensub, pcp_ensub, tmean_ensub, trange_ensub

print('Done')


Plot
00822grids
Done


In [9]:
time_obj

array([datetime.datetime(2015, 1, 1, 0, 0),
       datetime.datetime(2015, 1, 2, 0, 0),
       datetime.datetime(2015, 1, 3, 0, 0),
       datetime.datetime(2015, 1, 4, 0, 0),
       datetime.datetime(2015, 1, 5, 0, 0),
       datetime.datetime(2015, 1, 6, 0, 0),
       datetime.datetime(2015, 1, 7, 0, 0),
       datetime.datetime(2015, 1, 8, 0, 0),
       datetime.datetime(2015, 1, 9, 0, 0),
       datetime.datetime(2015, 1, 10, 0, 0),
       datetime.datetime(2015, 1, 11, 0, 0),
       datetime.datetime(2015, 1, 12, 0, 0),
       datetime.datetime(2015, 1, 13, 0, 0),
       datetime.datetime(2015, 1, 14, 0, 0),
       datetime.datetime(2015, 1, 15, 0, 0),
       datetime.datetime(2015, 1, 16, 0, 0),
       datetime.datetime(2015, 1, 17, 0, 0),
       datetime.datetime(2015, 1, 18, 0, 0),
       datetime.datetime(2015, 1, 19, 0, 0),
       datetime.datetime(2015, 1, 20, 0, 0),
       datetime.datetime(2015, 1, 21, 0, 0),
       datetime.datetime(2015, 1, 22, 0, 0),
       datetime.dat

In [8]:
time

array(['2015-07-01', '2015-07-02', '2015-07-03', '2015-07-04',
       '2015-07-05', '2015-07-06', '2015-07-07', '2015-07-08',
       '2015-07-09', '2015-07-10', '2015-07-11', '2015-07-12',
       '2015-07-13', '2015-07-14', '2015-07-15'], dtype=object)