In [5]:
#!/usr/bin/env python
# coding: utf-8

# This script is used to compare ensemble outputs with NLDAS data
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import pandas as pd
import xarray as xr
import datetime

def read_ens(out_forc_name_base, metric, start_yr, end_yr):
    for yr in range(start_yr, end_yr+1):        
        
        file = os.path.join(out_forc_name_base + '.' + str(yr) + '.'+metric+'.nc')
        f=xr.open_dataset(file)
        time = f['time'][:]
        pcp = f.variables['pcp'][:]
        tmean = f.variables['t_mean'][:]
        tmin = f.variables['t_min'][:]
        tmax = f.variables['t_max'][:]
        trange = f.variables['t_range'][:]
        
        if yr == start_yr:
            time_concat = time
            pcp_concat = pcp
            tmean_concat = tmean
            tmin_concat = tmin
            tmax_concat = tmax
            trange_concat = trange
        else:
            time_concat = np.concatenate((time_concat,time), axis=0) # (time)
            pcp_concat = np.concatenate((pcp_concat, pcp), axis=0) # (time,y,x)
            tmean_concat = np.concatenate((tmean_concat, tmean), axis=0)
            tmin_concat = np.concatenate((tmin_concat, tmin), axis=0)
            tmax_concat = np.concatenate((tmax_concat, tmax), axis=0)
            trange_concat = np.concatenate((trange_concat, trange), axis=0)
            
    time_concat = pd.DatetimeIndex(time_concat)
        
    return time_concat, pcp_concat, tmean_concat, tmin_concat, tmax_concat, trange_concat

def plot_basemap(llcrnrlon,llcrnrlat,urcrnrlon,urcrnrlat,ax,lat_0,lon_0,ny,nx):

    m = Basemap(llcrnrlon,llcrnrlat,urcrnrlon,urcrnrlat,resolution='l',projection='cyl', ax=ax)   
#     m = Basemap(llcrnrlon,llcrnrlat,urcrnrlon,urcrnrlat,resolution='l',projection='tmerc', ax=ax,lat_0=lat_0,lon_0=lon_0)

    m.drawstates(linewidth=0.25, linestyle='solid', color='grey')
    m.drawcountries(linewidth=0.25, linestyle='solid', color='k')
    m.drawcoastlines(linewidth=0.1, linestyle='solid', color='k')

    # lat and lon with lables
    m.drawparallels(np.arange(np.floor(llcrnrlat),np.ceil(urcrnrlat),10),labels=[True,False,False,False],
                    dashes=[1,1], fontsize=5, linewidth=0.2, color='grey') # 'xx-small', Draw parallels (latitude lines)
    m.drawmeridians(np.arange(np.floor(llcrnrlon),np.ceil(urcrnrlon),15),labels=[False,False,False,True],
                    dashes=[1,1], fontsize=5, linewidth=0.2, color='grey') # 'xx-small', Draw meridians (longitude lines). Label [left, right, top, bottom]

    return m

# set the colormap and centre the colorbar
class MidpointNormalize(mpl.colors.Normalize):
    """Normalise the colorbar.
    source: http://chris35wills.github.io/matplotlib_diverging_colorbar/
    e.g. im=ax1.imshow(array, norm=MidpointNormalize(midpoint=0.,vmin=-300, vmax=1000))    
    """
    def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False):
        self.midpoint = midpoint
        mpl.colors.Normalize.__init__(self, vmin, vmax, clip)

    def __call__(self, value, clip=None):
        x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1]
        return np.ma.masked_array(np.interp(value, x, y), np.isnan(value))

#======================================================================================================
# main script
root_dir = '/glade/u/home/hongli/scratch/2020_04_21nldas_gmet'   
nldas_dir = os.path.join(root_dir,'data/nldas_daily_utc_convert')
start_yr = 2015
end_yr = 2016

gridinfo_file = os.path.join(root_dir,'data/nldas_topo/conus_ens_grid_eighth.nc')

result_dir = os.path.join(root_dir,'test_uniform_perturb')
test_folders = [d for d in os.listdir(result_dir)]
test_folders = sorted(test_folders)
scenarios_ids = range(0,9) #[0,1,5,8] 
intervals =  range(10,1,-1) #[10,9,5,2]
scenario_num = len(scenarios_ids)

subforlder = 'gmet_ens_summary'
file_basename = 'ens_forc'

ens_num = 100
time_format = '%Y-%m-%d'

dpi_value = 600
plot_date_start = '2015-01-01'
plot_date_end = '2016-12-31'
plot_date_start_obj = datetime.datetime.strptime(plot_date_start, time_format)
plot_date_end_obj = datetime.datetime.strptime(plot_date_end, time_format)

output_dir=os.path.join(root_dir, 'scripts/step18_plot_sample_iqr_box')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
output_filename = 'step18_plot_sample_iqr_box.png'
    
#======================================================================================================
print('Read gridinfo mask')
# get xy mask from gridinfo.nc
f_gridinfo = xr.open_dataset(gridinfo_file)
mask_xy = f_gridinfo['mask'].values[:] # (y, x). 1 is valid. 0 is invalid.
#data_mask = f_gridinfo['data_mask'].values[:] # (y, x). 1 is valid. 0 is invalid.

#======================================================================================================
# read scenario ensemble results and save to dictionary
print('Read ensemble data')

for k in range(scenario_num):

    test_folder = test_folders[scenarios_ids[k]]
    
    print(test_folder)
    test_dir = os.path.join(result_dir, test_folder)
    fig_title= test_folder

    print(' -- read spatial ensemble lower and upper bounds')
    # read ensemble mean    
    output_namebase = os.path.join(test_dir,subforlder, file_basename)
    metric = 'enspctl.5'
    time_enslb, pcp_enslb, tmean_enslb, tmin_enslb, tmax_enslb, trange_enslb = read_ens(output_namebase, metric, start_yr, end_yr)

    output_namebase = os.path.join(test_dir,subforlder, file_basename)
    metric = 'enspctl.95'
    time_ensub, pcp_ensub, tmean_ensub, tmin_ensub, tmax_ensub, trange_ensub = read_ens(output_namebase, metric, start_yr, end_yr)

    # define plot mask for nldas ensemble
    mask_ens_t = (time_enslb>=plot_date_start_obj) & (time_enslb<=plot_date_end_obj)
    
    print(' -- calculate temporal mean')
    # caluclate time series mean(ny,nx)
    pcp_ensiqr = np.nanmean(pcp_ensub[mask_ens_t,:,:]-pcp_enslb[mask_ens_t,:,:],axis=0)     
    tmean_ensiqr = np.nanmean(tmean_ensub[mask_ens_t,:,:]-tmean_enslb[mask_ens_t,:,:],axis=0)
    tmin_ensiqr = np.nanmean(tmin_ensub[mask_ens_t,:,:]-tmin_enslb[mask_ens_t,:,:],axis=0)
    tmax_ensiqr = np.nanmean(tmax_ensub[mask_ens_t,:,:]-tmax_enslb[mask_ens_t,:,:],axis=0)
    trange_ensiqr = np.nanmean(trange_ensub[mask_ens_t,:,:]-trange_enslb[mask_ens_t,:,:],axis=0)
    
    print(' -- extract unmasked values')
    # extract unmasked values
    pcp_ensiqr=pcp_ensiqr[mask_xy!=0]    
    tmean_ensiqr=tmean_ensiqr[mask_xy!=0] 
    tmin_ensiqr=tmin_ensiqr[mask_xy!=0]  
    tmax_ensiqr=tmax_ensiqr[mask_xy!=0]   
    trange_ensiqr=trange_ensiqr[mask_xy!=0] 
    
    # save to array
    if k == 0:
        grid_num = len(pcp_ensiqr)
        pcp_iqr_arr = np.zeros((grid_num,scenario_num))
        tmean_iqr_arr = np.zeros((grid_num,scenario_num)) 
        tmin_iqr_arr = np.zeros((grid_num,scenario_num)) 
        tmax_iqr_arr = np.zeros((grid_num,scenario_num))
        trange_iqr_arr = np.zeros((grid_num,scenario_num))
    
    pcp_iqr_arr[:,k] = pcp_ensiqr
    tmean_iqr_arr[:,k] = tmean_ensiqr 
    tmin_iqr_arr[:,k] = tmin_ensiqr
    tmax_iqr_arr[:,k] = tmax_ensiqr
    trange_iqr_arr[:,k] = trange_ensiqr
    
    del pcp_ensiqr, tmean_ensiqr, tmin_ensiqr, tmax_ensiqr, trange_ensiqr
    del pcp_enslb, tmean_enslb, tmin_enslb, tmax_enslb, trange_enslb  
    del pcp_ensub, tmean_ensub, tmin_ensub, tmax_ensub, trange_ensub 

#======================================================================================================    
# plot
print('Plot')
var_list = ['Precp', 'Tmean']#, 'Tmin', 'Tmax', 'Trange']
var_units = ['(mm/d)','($^\circ$C)']#,'($^\circ$C)','($^\circ$C)','($^\circ$C)']
# labels=[str(k+1)+' ('+str(10-k)+')' for k in range(0,9)]

nrow = len(var_list) # prcp, tmean, tmin, tmax, trange
ncol = 1 # three scenarios
fig, ax = plt.subplots(nrow, ncol, figsize=(5.5,5.5))#, constrained_layout=True)
# fig.suptitle(fig_title, fontsize='x-small', fontweight='semibold', color='g')

for i in range(nrow):
        print(var_list[i])
        
        # select data for each subplot
        if i == 0:
            data=pcp_iqr_arr
        elif i == 1:
            data=tmean_iqr_arr
        elif i == 2:
            data=tmin_iqr_arr
        elif i == 3:
            data=tmax_iqr_arr
#         elif i == 4:
#             data=trange_iqr_arr
        
        # boxplot
        # reference: https://matplotlib.org/3.1.1/gallery/statistics/boxplot_demo.html
        bp = ax[i].boxplot(data, sym='+')#, labels=labels)
        plt.setp(bp['boxes'], color='black')
        plt.setp(bp['whiskers'], color='black')
        plt.setp(bp['fliers'], color='red', marker='+',markersize=1.5)
        
        # Add a horizontal grid to the plot, but make it very light in color
        # so we can use it for reading data values but not be distracting
        ax[i].yaxis.grid(True, linestyle='-', which='major', color='lightgrey',alpha=0.5)
        ax[i].set_axisbelow(True)
        
        # y_lim
        bottom=np.nanmin(data)-0.05*(np.nanmax(data)-np.nanmin(data))
        top=np.nanmax(data)*1.05
        ax[i].set_ylim(bottom=bottom, top=top)

        # Due to the Y-axis scale being different across samples, it can be
        # hard to compare differences in medians across the samples. Add upper
        # X-axis tick labels with the sample medians to aid in comparison
        # (just use two decimal places of precision)
        pos = np.arange(scenario_num) + 1
        medians = [(bp['medians'][k]).get_ydata()[0] for k in range(scenario_num)]
        upper_labels = [str(np.round(s, 2)) for s in medians]
        for tick, label in zip(range(scenario_num), ax[i].get_xticklabels()):
            k = tick % 2
            ax[i].text(pos[tick], 1.02, upper_labels[tick],
                     transform=ax[i].get_xaxis_transform(),
                     horizontalalignment='center', size='xx-small',
                     fontstyle='italic', color='b')

        # set y-axis label
        y_lable = 'Ensemble IQR\n'+var_list[i]+' '+var_units[i]
        ax[i].set_ylabel(y_lable, fontsize='xx-small')
        if i == nrow-1:
            ax[i].set_xlabel('Sampling Scenario', fontsize='xx-small')
        
        ax[i].tick_params(axis='both', labelsize = 'xx-small', pad=1.5)
        
#         # change subplot border width
#         for axis in ['top','bottom','left','right']:
#             ax[i,k].spines[axis].set_linewidth(0.5)

# save plot
fig.tight_layout 
fig.savefig(os.path.join(output_dir, output_filename), dpi=dpi_value, bbox_inches = 'tight', pad_inches = 0.05)
plt.close(fig)

print('Done')


Read gridinfo mask
Read ensemble data
00810grids
 -- read spatial ensemble lower and upper bounds
 -- calculate temporal mean
 -- extract unmasked values
00974grids
 -- read spatial ensemble lower and upper bounds
 -- calculate temporal mean
 -- extract unmasked values
01225grids
 -- read spatial ensemble lower and upper bounds
 -- calculate temporal mean
 -- extract unmasked values
01610grids
 -- read spatial ensemble lower and upper bounds
 -- calculate temporal mean
 -- extract unmasked values
02251grids
 -- read spatial ensemble lower and upper bounds
 -- calculate temporal mean
 -- extract unmasked values
03186grids
 -- read spatial ensemble lower and upper bounds
 -- calculate temporal mean
 -- extract unmasked values
04951grids
 -- read spatial ensemble lower and upper bounds
 -- calculate temporal mean
 -- extract unmasked values
08884grids
 -- read spatial ensemble lower and upper bounds
 -- calculate temporal mean
 -- extract unmasked values
18074grids
 -- read spatial ensemb

In [3]:
a=(pcp_ensub[mask_ens_t,:,:]-pcp_enslb[mask_ens_t,:,:])
np.shape(a)

(731, 224, 464)