In [4]:
#!/usr/bin/env python
# coding: utf-8

# This script is used to compare ensemble outputs with NLDAS data
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import pandas as pd
import xarray as xr
import datetime

def read_stn_ens_metric(out_forc_name_base, start_yr, end_yr):
    for yr in range(start_yr, end_yr+1):

        file = os.path.join(out_forc_name_base, 'ens_forc.sumamry.'+str(yr)+'.nc')
        f_stn = xr.open_dataset(file)
        
        if yr == start_yr:
            pcp_std = f_stn['pcp_std'].values[:]
            tmean_std = f_stn['tmean_std'].values[:]
            tmin_std = f_stn['tmin_std'].values[:]
            tmax_std = f_stn['tmax_std'].values[:]
            trange_std = f_stn['trange_std'].values[:]
            time = f_stn['time'].values[:]
        else:
            pcp_std = np.concatenate((pcp_std, f_stn['pcp_std'].values[:]), axis = 0)
            tmean_std = np.concatenate((tmean_std, f_stn['tmean_std'].values[:]), axis = 0)
            tmin_std = np.concatenate((tmin_std, f_stn['tmin_std'].values[:]), axis = 0)
            tmax_std = np.concatenate((tmax_std, f_stn['tmax_std'].values[:]), axis = 0)
            trange_std = np.concatenate((trange_std, f_stn['trange_std'].values[:]), axis = 0)
            time = np.concatenate((time, f_stn['time'].values[:]), axis = 0)
        
        time_obj = pd.to_datetime(time)
        
    return time_obj,pcp_std,tmean_std,tmin_std,tmax_std,trange_std

def read_nldas_ens_metric(out_forc_name_base, metric, start_yr, end_yr):
    for yr in range(start_yr, end_yr+1):        
        
        file = os.path.join(out_forc_name_base + '.' + str(yr) + '.'+metric+'.nc')
        f=xr.open_dataset(file)
        
        if yr == start_yr:
            pcp = f['pcp'].values[:]
            tmean = f['t_mean'].values[:]
            tmin = f['t_min'].values[:]
            tmax = f['t_max'].values[:]
            trange = f['t_range'].values[:]
            time = f['time'].values[:]
        else:
            pcp = np.concatenate((pcp, f['pcp'].values[:]), axis=0) # (time,y,x)
            tmean = np.concatenate((tmean, f['t_mean'].values[:]), axis=0)
            tmin = np.concatenate((tmin, f['t_min'].values[:]), axis=0)
            tmax = np.concatenate((tmax, f['t_max'].values[:]), axis=0)
            trange = np.concatenate((trange, f['t_range'].values[:]), axis=0)
            time = np.concatenate((time,f['time'].values[:]), axis=0) # (time)
            
        time_obj = pd.to_datetime(time)
        
    return time_obj, pcp, tmean, tmin, tmax, trange

#======================================================================================================
# main script
root_dir = '/glade/u/home/hongli/scratch/2020_04_21nldas_gmet'   
stn_ens_dir = os.path.join(root_dir,'data/stn_ens_summary')
start_yr = 2016
end_yr = 2016

stn_grid_file = os.path.join(root_dir,'data/nldas_topo/conus_ens_grid_eighth.nc')
nldas_grid_file = os.path.join(root_dir,'data/nldas_topo/conus_ens_grid_eighth_deg_v1p1.nc')

result_dir = os.path.join(root_dir,'test_uniform_perturb')
test_folders = [d for d in os.listdir(result_dir)]
test_folders = sorted(test_folders)
scenarios_ids = range(0,9)  
intervals =  range(10,1,-1)
scenario_num = len(scenarios_ids)

subforlder = 'gmet_ens_summary'
file_basename = 'ens_forc'

time_format = '%Y-%m-%d'
plot_date_start = '2016-01-01'
plot_date_end = '2016-12-31'
plot_date_start_obj = datetime.datetime.strptime(plot_date_start, time_format)
plot_date_end_obj = datetime.datetime.strptime(plot_date_end, time_format)

dpi_value = 600 #150
output_dir=os.path.join(root_dir, 'scripts/step28_plot_ens_unc_box')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
output_filename = 'step28_plot_ens_unc_box_bias.png'
    
# #======================================================================================================
# print('Read gridinfo mask')
# # get xy mask from gridinfo.nc
# f_stn_grid = xr.open_dataset(stn_grid_file)
# stn_mask_xy = f_stn_grid['mask'].values[:] # (y, x). 1 is valid. 0 is invalid.

# f_nldas_grid = xr.open_dataset(nldas_grid_file)
# nldas_mask_xy = f_nldas_grid['mask'].values[:] # (y, x). 1 is valid. 0 is invalid.

# # commonly available area
# mask_xy = (stn_mask_xy!=0) & (nldas_mask_xy!=0) 

# #======================================================================================================
# # Read nldas ens summary
# print('Read nldas ens summary')

# for k in range(scenario_num):

#     test_folder = test_folders[scenarios_ids[k]]
    
#     print(test_folder)
#     test_dir = os.path.join(result_dir, test_folder)
#     fig_title= test_folder

#     print(' -- read spatial ensemble metric')
#     # read ensemble mean    
#     output_namebase = os.path.join(test_dir,subforlder, file_basename)
#     metric = 'ensstd'
#     time, pcp_std, tmean_std, tmin_std, tmax_std, trange_std = read_nldas_ens_metric(output_namebase, metric, start_yr, end_yr)

#     # define plot mask for nldas ensemble
#     mask_ens_t = (time>=plot_date_start_obj) & (time<=plot_date_end_obj)
    
#     print(' -- calculate temporal mean')
#     # caluclate time series mean(ny,nx)
#     pcp_std_nldas = np.nanmean(pcp_std[mask_ens_t,:,:],axis=0)     
#     tmean_std_nldas = np.nanmean(tmean_std[mask_ens_t,:,:],axis=0)
#     tmin_std_nldas = np.nanmean(tmin_std[mask_ens_t,:,:],axis=0)
#     tmax_std_nldas = np.nanmean(tmax_std[mask_ens_t,:,:],axis=0)
#     trange_std_nldas = np.nanmean(trange_std[mask_ens_t,:,:],axis=0)
    
#     print(' -- extract unmasked values')
#     # extract unmasked values
#     pcp_std_nldas=pcp_std_nldas[mask_xy]    
#     tmean_std_nldas=tmean_std_nldas[mask_xy] 
#     tmin_std_nldas=tmin_std_nldas[mask_xy]  
#     tmax_std_nldas=tmax_std_nldas[mask_xy]   
#     trange_std_nldas=trange_std_nldas[mask_xy] 
    
#     # save to array
#     if k == 0:
#         grid_num = len(pcp_std_nldas)
#         pcp_std_arr = np.zeros((grid_num,scenario_num+1))
#         tmean_std_arr = np.zeros((grid_num,scenario_num+1)) 
#         tmin_std_arr = np.zeros((grid_num,scenario_num+1)) 
#         tmax_std_arr = np.zeros((grid_num,scenario_num+1))
#         trange_std_arr = np.zeros((grid_num,scenario_num+1))
    
#     pcp_std_arr[:,k] = pcp_std_nldas
#     tmean_std_arr[:,k] = tmean_std_nldas 
#     tmin_std_arr[:,k] = tmin_std_nldas
#     tmax_std_arr[:,k] = tmax_std_nldas
#     trange_std_arr[:,k] = trange_std_nldas
    
#     del pcp_std_nldas, tmean_std_nldas, tmin_std_nldas, tmax_std_nldas, trange_std_nldas
#     del pcp_std, tmean_std, tmin_std, tmax_std, trange_std  

# #======================================================================================================
# # read stn ens summary
# print('Read stn ens summary')

# time,pcp_std,tmean_std,tmin_std,tmax_std,trange_std = read_stn_ens_metric(stn_ens_dir, start_yr, end_yr)

# # get time mask from nldas data
# mask_t  = (time >= plot_date_start_obj) & (time <= plot_date_end_obj) 
# time = time[mask_t]

# # caluclate time-series mean 
# pcp_std_stn = np.nanmean(pcp_std[mask_t,:,:], axis=0) 
# tmean_std_stn = np.nanmean(tmean_std[mask_t,:,:], axis=0)
# tmin_std_stn = np.nanmean(tmin_std[mask_t,:,:], axis=0)
# tmax_std_stn = np.nanmean(tmax_std[mask_t,:,:], axis=0)
# trange_std_stn = np.nanmean(trange_std[mask_t,:,:], axis=0)

# # extract unmasked values
# pcp_std_stn=pcp_std_stn[mask_xy]
# tmean_std_stn=tmean_std_stn[mask_xy]
# tmin_std_stn=tmin_std_stn[mask_xy]
# tmax_std_stn=tmax_std_stn[mask_xy]
# trange_std_stn=trange_std_stn[mask_xy]

# # save to array
# pcp_std_arr[:,-1] = pcp_std_stn
# tmean_std_arr[:,-1] = tmean_std_stn 
# tmin_std_arr[:,-1] = tmin_std_stn
# tmax_std_arr[:,-1] = tmax_std_stn
# trange_std_arr[:,-1] = trange_std_stn

# del pcp_std_stn, tmean_std_stn, tmin_std_stn, tmax_std_stn, trange_std_stn
# del pcp_std,tmean_std,tmin_std,tmax_std,trange_std

#======================================================================================================    
# plot
print('Plot')
var_list = ["Precp", 'Tmean', 'Tmin', 'Tmax', 'Trange']
unit_list = ['(mm/day)', '($^\circ$C)', '($^\circ$C)', '($^\circ$C)', '($^\circ$C)']

nrow = len(var_list) # prcp, tmean, tmin, tmax, trange
ncol = 1 
fig, ax = plt.subplots(nrow, ncol, figsize=(6.5,5.5*1.2))#, constrained_layout=True)

for i in range(nrow):
        print(var_list[i])
        
        # select data for each subplot
        if i == 0:
            data=pcp_std_arr
            top=20
        elif i == 1:
            data=tmean_std_arr
            top=6
        elif i == 2:
            data=tmin_std_arr
            top=6
        elif i == 3:
            data=tmax_std_arr
            top=6
        elif i == 4:
            data=trange_std_arr
            top=4.5     
            
#         # save time-series mean uncertainty of all valid grids and all scenarios (once for all)
#         output_filename_txt = 'ens_std_mean_'+var_list[i]+'.txt'
#         np.savetxt(os.path.join(output_dir, output_filename_txt), data, delimiter=',',
#                     fmt='%f',header='Col is sample scenario. Row is the time-series mean std of flatten valid grids. The last col is for stn_regr.')

#         # read 
#         data = np.loadtxt(os.path.join(output_dir, output_filename_txt), delimiter=',')
        
        # boxplot
        # reference: https://matplotlib.org/3.1.1/gallery/statistics/boxplot_demo.html
        bp = ax[i].boxplot(data, sym='o')#, labels=labels)
        plt.setp(bp['boxes'], color='black')
        plt.setp(bp['whiskers'], color='black')
        plt.setp(bp['fliers'], color='red', marker='o',markersize=0.8)
        
        # Add a horizontal grid to the plot, but make it very light in color
        # so we can use it for reading data values but not be distracting
        ax[i].yaxis.grid(True, linestyle='-', which='major', color='lightgrey',alpha=0.5)
        ax[i].set_axisbelow(True)
        
        # y_lim
#         bottom=np.nanmin(data)-0.05*(np.nanmax(data)-np.nanmin(data))
#         top=np.nanmax(data)*1.05
        ax[i].set_ylim(bottom=0, top=top)

        # Due to the Y-axis scale being different across samples, it can be
        # hard to compare differences in medians across the samples. Add upper
        # X-axis tick labels with the sample medians to aid in comparison
        # (just use two decimal places of precision)
        pos = np.arange(scenario_num+1) 
        medians = [(bp['medians'][k]).get_ydata()[0] for k in range(scenario_num+1)]
        upper_labels = [str(np.round(s, 2)) for s in medians]
        for tick, label in zip(range(scenario_num+1), ax[i].get_xticklabels()):
            k = tick % 2
            ax[i].text(pos[tick]+1.2, 0.9, upper_labels[tick],
                     transform=ax[i].get_xaxis_transform(),
                     horizontalalignment='center', size='xx-small',
                     fontstyle='italic', color='b') #pos[tick], 1.02

        # set y-axis label
        y_lable = 'Ens Std.dev ' + unit_list[i]
        ax[i].set_ylabel(y_lable, fontsize='xx-small')
        if i == nrow-1:
            ax[i].set_xlabel('Sampling Scenario', fontsize='xx-small')
        
        x_ticks = [str(x) for x in range(1,10)]
        x_ticks.append('stn_ens')
        ax[i].set_xticklabels(x_ticks)
        ax[i].tick_params(axis='both', direction='out',labelsize = 'xx-small',
                          length=1.5, width=0.5, pad=1.5)       
        # title
        alpha = chr(ord('a') + i)
        ax[i].set_title('('+alpha+') '+var_list[i], pad=4, 
                        fontsize='xx-small', fontweight='semibold') #pad=9
        
# save plot
fig.tight_layout(pad=0.1, h_pad=0.5) #h_pad=0.7
fig.savefig(os.path.join(output_dir, output_filename), dpi=dpi_value, bbox_inches = 'tight', pad_inches = 0.05)
plt.close(fig)

print('Done')


Plot
Precp
Tmean
Tmin
Tmax
Trange
Done


In [4]:
np.shape(pcp_std_stn)

NameError: name 'pcp_std_stn' is not defined