In [8]:
#!/usr/bin/env python
# coding: utf-8

# This script is used to compare ensemble outputs with NLDAS data
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import pandas as pd
import xarray as xr
import datetime

def read_ens(out_forc_name_base, metric, start_yr, end_yr):
    for yr in range(start_yr, end_yr+1):        
        
        file = os.path.join(out_forc_name_base + '.' + str(yr) + '.'+metric+'.nc')
        f=xr.open_dataset(file)
        time = f['time'][:]
        pcp = f.variables['pcp'][:]
        tmean = f.variables['t_mean'][:]
        tmin = f.variables['t_min'][:]
        tmax = f.variables['t_max'][:]
        trange = f.variables['t_range'][:]
        
        if yr == start_yr:
            time_concat = time
            pcp_concat = pcp
            tmean_concat = tmean
            tmin_concat = tmin
            tmax_concat = tmax
            trange_concat = trange
        else:
            time_concat = np.concatenate((time_concat,time), axis=0) # (time)
            pcp_concat = np.concatenate((pcp_concat, pcp), axis=0) # (time,y,x)
            tmean_concat = np.concatenate((tmean_concat, tmean), axis=0)
            tmin_concat = np.concatenate((tmin_concat, tmin), axis=0)
            tmax_concat = np.concatenate((tmax_concat, tmax), axis=0)
            trange_concat = np.concatenate((trange_concat, trange), axis=0)
            
    time_concat = pd.DatetimeIndex(time_concat)
        
    return time_concat, pcp_concat, tmean_concat, tmin_concat, tmax_concat, trange_concat

#======================================================================================================
# main script
root_dir = '/glade/u/home/hongli/scratch/2020_04_21nldas_gmet'   
stn_ens_dir = os.path.join(root_dir,'data/stn_ens_summary')
nldas_dir = os.path.join(root_dir,'data/nldas_daily_utc_convert')
start_yr = 2015
end_yr = 2016

gridinfo_file = os.path.join(root_dir,'data/nldas_topo/conus_ens_grid_eighth.nc')

result_dir = os.path.join(root_dir,'test_uniform_perturb')
test_folders = [d for d in os.listdir(result_dir)]
test_folders = sorted(test_folders)
scenarios_ids = range(0,9) #[0,1,5,8] 
intervals =  range(10,1,-1) #[10,9,5,2]
scenario_num = len(scenarios_ids)

subforlder = 'gmet_ens_summary'
file_basename = 'ens_forc'

ens_num = 100
time_format = '%Y-%m-%d'

dpi_value = 600
plot_date_start = '2015-01-01'
plot_date_end = '2016-12-31'
plot_date_start_obj = datetime.datetime.strptime(plot_date_start, time_format)
plot_date_end_obj = datetime.datetime.strptime(plot_date_end, time_format)

output_dir=os.path.join(root_dir, 'scripts/step19_plot_stn_nldas_IQR_box')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
output_filename_base = 'step19_plot_stn_nldas_IQR_box_'
   
# #======================================================================================================
# print('Read gridinfo mask')
# # get xy mask from gridinfo.nc
# f_gridinfo = xr.open_dataset(gridinfo_file)
# mask_xy = f_gridinfo['mask'].values[:] # (y, x). 1 is valid. 0 is invalid.
# #data_mask = f_gridinfo['data_mask'].values[:] # (y, x). 1 is valid. 0 is invalid.

# #======================================================================================================
# # read historical nldas data summary
# print('Read stn ens summary')

# print(' -- read spatial ensemble')
# for yr in range(start_yr, end_yr+1):
    
#     nldas_file = 'ens_forc.sumamry.'+str(yr)+'.nc'
#     nldas_path = os.path.join(stn_ens_dir, nldas_file)
    
#     f_stn = xr.open_dataset(nldas_path)
#     if yr == start_yr:
#         pcp_lb = f_stn['pcp_lb'].values[:]
#         pcp_ub = f_stn['pcp_ub'].values[:]

#         tmean_lb = f_stn['tmean_lb'].values[:]
#         tmean_ub = f_stn['tmean_ub'].values[:]

#         tmin_lb = f_stn['tmin_lb'].values[:]
#         tmin_ub = f_stn['tmin_ub'].values[:]

#         tmax_lb = f_stn['tmax_lb'].values[:]
#         tmax_ub = f_stn['tmax_ub'].values[:]

#         trange_lb = f_stn['trange_lb'].values[:]
#         trange_ub = f_stn['trange_ub'].values[:]
#         time = f_stn['time'].values[:]
#     else:
#         pcp_lb = np.concatenate((pcp_lb, f_stn['pcp_lb'].values[:]), axis = 0)
#         pcp_ub = np.concatenate((pcp_ub, f_stn['pcp_ub'].values[:]), axis = 0)

#         tmean_lb = np.concatenate((tmean_lb, f_stn['tmean_lb'].values[:]), axis = 0)
#         tmean_ub = np.concatenate((tmean_ub, f_stn['tmean_ub'].values[:]), axis = 0)

#         tmin_lb = np.concatenate((tmin_lb, f_stn['tmin_lb'].values[:]), axis = 0)
#         tmin_ub = np.concatenate((tmin_ub, f_stn['tmin_ub'].values[:]), axis = 0)

#         tmax_lb = np.concatenate((tmax_lb, f_stn['tmax_lb'].values[:]), axis = 0)
#         tmax_ub = np.concatenate((tmax_ub, f_stn['tmax_ub'].values[:]), axis = 0)

#         trange_lb = np.concatenate((trange_lb, f_stn['trange_lb'].values[:]), axis = 0)
#         trange_ub = np.concatenate((trange_ub, f_stn['trange_ub'].values[:]), axis = 0)
#         time = np.concatenate((time, f_stn['time'].values[:]), axis = 0)

# # get time mask from nldas data
# # time_obj = np.asarray([datetime.datetime.strptime(t, time_format) for t in time])
# time_obj = pd.to_datetime(time)
# mask_t  = (time_obj >= plot_date_start_obj) & (time_obj <= plot_date_end_obj) 
# time_ens_stn = time_obj[mask_t]

# print(' -- calculate temporal mean')
# # caluclate time series mean(ny,nx)
# pcp_iqr = np.nanmean(pcp_ub[mask_t,:,:]-pcp_lb[mask_t,:,:],axis=0)     
# tmean_iqr = np.nanmean(tmean_ub[mask_t,:,:]-tmean_lb[mask_t,:,:],axis=0)
# tmin_iqr = np.nanmean(tmin_ub[mask_t,:,:]-tmin_lb[mask_t,:,:],axis=0)
# tmax_iqr = np.nanmean(tmax_ub[mask_t,:,:]-tmax_lb[mask_t,:,:],axis=0)
# trange_iqr = np.nanmean(trange_ub[mask_t,:,:]-trange_lb[mask_t,:,:],axis=0)

# print(' -- extract unmasked values')
# # extract unmasked values
# pcp_iqr=pcp_iqr[mask_xy!=0]    
# tmean_iqr=tmean_iqr[mask_xy!=0] 
# tmin_iqr=tmin_iqr[mask_xy!=0]  
# tmax_iqr=tmax_iqr[mask_xy!=0]   
# trange_iqr=trange_iqr[mask_xy!=0] 

# del pcp_lb,pcp_ub,tmean_lb,tmean_ub,tmin_lb,tmin_ub
# del tmax_lb,tmax_ub,trange_lb,trange_ub

# #======================================================================================================
# # read scenario ensemble results and save to dictionary
# print('Read nldas ens bounds')

# for k in range(scenario_num):

#     test_folder = test_folders[scenarios_ids[k]]
    
#     print(test_folder)
#     test_dir = os.path.join(result_dir, test_folder)
#     fig_title= test_folder

#     print(' -- read spatial ensemble')
#     # read ensemble mean    
#     output_namebase = os.path.join(test_dir,subforlder, file_basename)
#     metric = 'enspctl.5'
#     time_enslb, pcp_enslb, tmean_enslb, tmin_enslb, tmax_enslb, trange_enslb = read_ens(output_namebase, metric, start_yr, end_yr)

#     output_namebase = os.path.join(test_dir,subforlder, file_basename)
#     metric = 'enspctl.95'
#     time_ensub, pcp_ensub, tmean_ensub, tmin_ensub, tmax_ensub, trange_ensub = read_ens(output_namebase, metric, start_yr, end_yr)

#     # define plot mask for nldas ensemble
#     mask_ens_t = (time_enslb>=plot_date_start_obj) & (time_enslb<=plot_date_end_obj)
    
#     print(' -- calculate temporal mean')
#     # caluclate time series mean(ny,nx)
#     pcp_ensiqr = np.nanmean(pcp_ensub[mask_ens_t,:,:]-pcp_enslb[mask_ens_t,:,:],axis=0)     
#     tmean_ensiqr = np.nanmean(tmean_ensub[mask_ens_t,:,:]-tmean_enslb[mask_ens_t,:,:],axis=0)
#     tmin_ensiqr = np.nanmean(tmin_ensub[mask_ens_t,:,:]-tmin_enslb[mask_ens_t,:,:],axis=0)
#     tmax_ensiqr = np.nanmean(tmax_ensub[mask_ens_t,:,:]-tmax_enslb[mask_ens_t,:,:],axis=0)
#     trange_ensiqr = np.nanmean(trange_ensub[mask_ens_t,:,:]-trange_enslb[mask_ens_t,:,:],axis=0)
    
#     print(' -- extract unmasked values')
#     # extract unmasked values
#     pcp_ensiqr=pcp_ensiqr[mask_xy!=0]    
#     tmean_ensiqr=tmean_ensiqr[mask_xy!=0] 
#     tmin_ensiqr=tmin_ensiqr[mask_xy!=0]  
#     tmax_ensiqr=tmax_ensiqr[mask_xy!=0]   
#     trange_ensiqr=trange_ensiqr[mask_xy!=0] 
    
#     # save to array
#     if k == 0:
#         grid_num = len(pcp_ensiqr)
#         pcp_iqr_arr = np.zeros((grid_num,scenario_num))
#         tmean_iqr_arr = np.zeros((grid_num,scenario_num)) 
#         tmin_iqr_arr = np.zeros((grid_num,scenario_num)) 
#         tmax_iqr_arr = np.zeros((grid_num,scenario_num))
#         trange_iqr_arr = np.zeros((grid_num,scenario_num))
    
#     pcp_iqr_arr[:,k] = pcp_ensiqr
#     tmean_iqr_arr[:,k] = tmean_ensiqr 
#     tmin_iqr_arr[:,k] = tmin_ensiqr
#     tmax_iqr_arr[:,k] = tmax_ensiqr
#     trange_iqr_arr[:,k] = trange_ensiqr
    
#     del pcp_ensiqr, tmean_ensiqr, tmin_ensiqr, tmax_ensiqr, trange_ensiqr
#     del pcp_enslb, tmean_enslb, tmin_enslb, tmax_enslb, trange_enslb  
#     del pcp_ensub, tmean_ensub, tmin_ensub, tmax_ensub, trange_ensub 

# #======================================================================================================    
# # create a white-blue linear colormap
# print('create colormap')

# # reference: https://stackoverflow.com/questions/25408393/getting-individual-colors-from-a-color-map-in-matplotlib
# cmap = mpl.cm.get_cmap('jet') # get the blue color of jet 
# c0 = cmap(0.0)
# top = mpl.colors.LinearSegmentedColormap.from_list("", ["white",c0])

# # combine two liner colormaps to create a
# # reference: https://matplotlib.org/3.1.0/tutorials/colors/colormap-manipulation.html
# bottom = mpl.cm.get_cmap('jet')
# newcolors = np.vstack((top(np.linspace(0, 1, int(256*0.1))),bottom(np.linspace(0, 1, int(256*0.9)))))
# newcmp = mpl.colors.LinearSegmentedColormap.from_list("WhiteJet", newcolors)

##======================================================================================================    
# plot
print('Plot')
var_list = ['Precp', 'Tmean', 'Tmin', 'Tmax', 'Trange']
var_units = ['(mm/d)','($^\circ$C)','($^\circ$C)','($^\circ$C)','($^\circ$C)']
# var_list = ['Precp']
for m in range(len(var_list)): # loop all five variables
    var = var_list[m]
    output_filename = output_filename_base+var+'.png'
    print(var)
    
    # data selection
    if m == 0:
        iqr = (-1)*pcp_iqr # NOTE: need to remove (-1) when fixing the stn_summary issue
        ens_iqr_arr = pcp_iqr_arr
    elif m == 1:
        iqr = (-1)*tmean_iqr
        ens_iqr_arr = tmean_iqr_arr
    elif m == 2:
        iqr = (-1)*tmin_iqr
        ens_iqr_arr = tmin_iqr_arr
    elif m == 3:
        iqr = (-1)*tmax_iqr
        ens_iqr_arr = tmax_iqr_arr
    elif m == 4:
        iqr = (-1)*trange_iqr
        ens_iqr_arr = trange_iqr_arr
    
    # xy aixs range
#     vmin_ensiqr=np.nanmin(ens_iqr_arr)
#     vmax_ensiqr=np.nanmax(ens_iqr_arr) 
    
#     vmin = np.nanmin([vmin_ensiqr,np.nanmin(iqr)])
#     vmax = np.nanmax([vmax_ensiqr,np.nanmax(iqr)])
    
    vmin = np.nanmin(iqr)
    vmax = np.nanmax(iqr)
    
    # MAE
    mae=[np.nanmean(np.absolute(ens_iqr_arr[:,j]-iqr)) for j in range(scenario_num)]    
    
    # plot each varaiable seperately
    nrow = 3 # totally 9 sampling scenarios
    ncol = 3
            
    fig, ax = plt.subplots(nrow, ncol, figsize=(4,4.5))

    for i in range(nrow):
        for j in range(ncol):
            k = i*ncol+j
            
#             print('sample scenario '+str(k+1))

            # 2D histograms
            # https://python-graph-gallery.com/83-basic-2d-histograms-with-matplotlib/
            x = iqr
            y = ens_iqr_arr[:,k]
            hist = ax[i,j].hist2d(x, y, bins=(200, 200),cmap=newcmp, 
                                  range=[[vmin, vmax], [vmin, vmax]]) # return (counts, xedges, yedges, Image)
    
            # diagonal
            ax[i,j].plot([vmin, vmax],[vmin, vmax],color='grey',linewidth=0.5, alpha=0.6)
            
            # MAE text
            mae_str = 'MAE = '+str(round(mae[k],2))
            ax[i,j].text(0.5, 0.92,s=mae_str,fontsize='xx-small',fontstyle='italic',
                         horizontalalignment='center', verticalalignment='center', transform=ax[i,j].transAxes)
            
            # limit
            ax[i,j].set_xlim(vmin, vmax)
            ax[i,j].set_ylim(vmin, vmax)

            # label
            if i == nrow-1:
                xlabel = 'Station Ensemble IQR\nof '+var_list[m]+' '+var_units[m]
                ax[i,j].set_xlabel(xlabel, fontsize='xx-small')
            if j == 0:
                ylabel = 'NLDAS Ensemble IQR\nof '+var_list[m]+' '+var_units[m]
                ax[i,j].set_ylabel(ylabel, fontsize='xx-small')
             
            # tick
            ax[i,j].tick_params(axis='both', direction='out',labelsize = 'xx-small', 
                                length=2, width=0.5, pad=1.5)
            if j == 0:
                ax[i,j].tick_params(axis='both',labelleft = True)
            else:
                ax[i,j].tick_params(axis='both',labelleft = False)
            if i == nrow-1:
                ax[i,j].tick_params(axis='both',labelbottom = True)
            else:
                ax[i,j].tick_params(axis='both',labelbottom = False)
                
            # title
            title_str = 'Scenario '+str(k+1) +' (interval = '+str(intervals[k])+')'
            ax[i,j].set_title(title_str, fontsize='xx-small', fontweight='semibold')

           # change subplot border width
            for axis in ['top','bottom','left','right']:
                ax[i,j].spines[axis].set_linewidth(0.5)
    
    # colorbar    
    fig.subplots_adjust(bottom=0.17, top=1, left = 0, right=1, wspace = 0.07, hspace = 0.25)
    cax = fig.add_axes([0.25, 0.05, 0.5, 0.02]) #[left, bottom, width, height]
    cbar = fig.colorbar(hist[3], cax=cax, orientation='horizontal')

    tick1 = hist[0].max()*0.5
    tick2 = hist[0].max()
    cbar.set_ticks([0, tick1, tick2]) 
    cbar.set_ticklabels(['Low', 'Medium', 'High'])  
    cbar.ax.tick_params(labelsize='xx-small', length=2, width=1)

    # set the colorbar ticks and tick labels
    cbar.set_label(label='Number of grids per pixel',size='xx-small')    

    # save plot
    fig.savefig(os.path.join(output_dir, output_filename), dpi=dpi_value, 
                bbox_inches = 'tight', pad_inches = 0.05)
    plt.close(fig)

print('Done')


Plot
Precp
Tmean
Tmin
Tmax
Trange
Done


In [25]:
common_left, common_right

(-0.4068713620305062, 18.381982365250586)