In [15]:
#!/usr/bin/env python
# coding: utf-8

# This script is used to compare ensemble outputs with NLDAS data
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import pandas as pd
import xarray as xr
import datetime

def read_regr(file):

    file = os.path.join(file)
    f=xr.open_dataset(file)
    time = f['time'].values[:]        
    pcp_error = f.variables['pcp_error'].values[:]
    pcp_error_2 = f.variables['pcp_error_2'].values[:]
    tmean_error = f.variables['tmean_error'].values[:]
    tmean_error_2 = f.variables['tmean_error_2'].values[:]
    trange_error = f.variables['trange_error'].values[:]
    trange_error_2 = f.variables['trange_error_2'].values[:]

    time = pd.DatetimeIndex(time)
        
    return time, pcp_error, pcp_error_2, tmean_error, tmean_error_2, trange_error, trange_error_2

#======================================================================================================
# main script
root_dir='/glade/u/home/hongli/scratch/2019_10_01gssha/ens_forc_wrf2'
grid_file = os.path.join(root_dir,'GMET_tpl/inputs/gridinfo.nc')
# grid_file = '/glade/u/home/hongli/scratch/2019_10_01gssha/ens_forc_wrf2/GMET_tpl/inputs/gridinfo.nc'

result_dir = os.path.join(root_dir,'test_uniform')
# result_dir = os.path.join(root_dir,'test_uniform_v1')
test_folders = [d for d in os.listdir(result_dir)]
test_folders = sorted(test_folders)

scenarios_ids = np.arange(0,3)  
intervals =  np.arange(3,0,-1) 
scenario_num = len(scenarios_ids)

subforlder = 'outputs'
regr_filename = 'regress_ts.nc'

time_format = '%Y-%m-%d'
plot_date_start = '2017-12-02'
plot_date_end = '2018-04-07'
plot_date_start_obj = datetime.datetime.strptime(plot_date_start, time_format)
plot_date_end_obj = datetime.datetime.strptime(plot_date_end, time_format)

dpi_value = 150
output_dir=os.path.join(root_dir, 'scripts/step18_plot_regr_error_box')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
output_filename = 'step18_plot_regr_error_box.png'
# output_filename = 'step18_plot_regr_error_box_v1.png'
    
#======================================================================================================
print('Read gridinfo mask')
# get xy mask from gridinfo.nc
f = xr.open_dataset(grid_file)
mask_xy = f['mask'].values[:] # (y, x). 1 is valid. 0 is invalid.
mask_xy = (mask_xy!=0)

#======================================================================================================
# read scenario regression results and save to dictionary
print('Read regression uncertainty')

for k in range(scenario_num):
# for k in range(1):

    test_folder = test_folders[scenarios_ids[k]]
    
    print(test_folder)
    test_dir = os.path.join(result_dir, test_folder)
    fig_title= test_folder

    print(' -- read spatial uncertainty')
    # read regression uncertainty    
    output_file = os.path.join(test_dir,subforlder, regr_filename)
    time_regr, pcp_error, pcp_error_2, tmean_error, tmean_error_2, trange_error, trange_error_2 = read_regr(output_file)
    
    # define plot mask for nldas ensemble
    mask_ens_t = (time_regr>=plot_date_start_obj) & (time_regr<=plot_date_end_obj)
    
    print(' -- calculate temporal mean')
    # caluclate time series mean(ny,nx)
    pcp_error_mean = np.nanmean(pcp_error[mask_ens_t,:,:],axis=0)     
    pcp_error_2_mean = np.nanmean(pcp_error_2[mask_ens_t,:,:],axis=0)     
    tmean_error_mean = np.nanmean(tmean_error[mask_ens_t,:,:],axis=0)
    tmean_error_2_mean = np.nanmean(tmean_error_2[mask_ens_t,:,:],axis=0)
    trange_error_mean = np.nanmean(trange_error[mask_ens_t,:,:],axis=0)
    trange_error_2_mean = np.nanmean(trange_error_2[mask_ens_t,:,:],axis=0)
    
    print(' -- extract unmasked values')
    # extract unmasked values
    pcp_error_mean=pcp_error_mean[mask_xy]    
    pcp_error_2_mean=pcp_error_2_mean[mask_xy]    
    tmean_error_mean=tmean_error_mean[mask_xy] 
    tmean_error_2_mean=tmean_error_2_mean[mask_xy] 
    trange_error_mean=trange_error_mean[mask_xy] 
    trange_error_2_mean=trange_error_2_mean[mask_xy] 
    
    # save to array
    if k == 0:
        grid_num = len(pcp_error_mean)
        pcp_error_mean_arr = np.zeros((grid_num,scenario_num)) 
        pcp_error_2_mean_arr = np.zeros((grid_num,scenario_num))
        tmean_error_mean_arr = np.zeros((grid_num,scenario_num)) 
        tmean_error_2_mean_arr = np.zeros((grid_num,scenario_num)) 
        trange_error_mean_arr = np.zeros((grid_num,scenario_num))
        trange_error_2_mean_arr = np.zeros((grid_num,scenario_num))
    
    pcp_error_mean_arr[:,k] = pcp_error_mean
    pcp_error_2_mean_arr[:,k] = pcp_error_2_mean
    tmean_error_mean_arr[:,k] = tmean_error_mean 
    tmean_error_2_mean_arr[:,k] = tmean_error_2_mean 
    trange_error_mean_arr[:,k] = trange_error_mean
    trange_error_2_mean_arr[:,k] = trange_error_2_mean
    
    del pcp_error_mean, pcp_error_2_mean, tmean_error_mean, trange_error_mean, tmean_error_2_mean, trange_error_2_mean
    del pcp_error, pcp_error_2, tmean_error, trange_error , tmean_error_2, trange_error_2  

#======================================================================================================    
# save
print('Save')
var_list = ["Precp'", "Precp_2'", 'Tmean', 'Tmean_2', 'Trange', 'Trange_2']

ncol = 2
nrow = int(np.ceil(len(var_list)/ncol)) 
for i in range(nrow):
    for j in range(ncol):
        kk=i*ncol+j
        print(var_list[kk])
        
        # select data for each subplot
        if kk == 0:
            data=pcp_error_mean_arr
        elif kk == 1:
            data=pcp_error_2_mean_arr
        elif kk == 2:
            data=tmean_error_mean_arr
        elif kk == 3:
            data=tmean_error_2_mean_arr
        elif kk == 4:
            data=trange_error_mean_arr
        elif kk == 5:
            data=trange_error_2_mean_arr
            
        # save time-series mean uncertainty of all valid grids and all scenarios (once for all)
        output_filename_txt = var_list[kk]+'_regr_unc.txt'
        np.savetxt(os.path.join(output_dir, output_filename_txt), data, delimiter=',',
                    fmt='%f',header='Col is sample scenario. Row is the time-series mean of regr unc in flatten valid grids. The last col is for stn_regr.')

#======================================================================================================    
# plot 
print('Plot')
var_list = ["Precp'", "Precp_2'", 'Tmean', 'Tmean_2', 'Trange', 'Trange_2']

ncol = 2 
nrow = int(np.ceil(len(var_list)/ncol)) 
fig, ax = plt.subplots(nrow, ncol, figsize=(3*ncol,3*0.6*nrow))

for i in range(nrow):
    for j in range(ncol):
        
        kk=i*ncol+j
        print(var_list[kk])

        # load time-series mean of uncertainty 
        output_filename_txt = var_list[kk]+'_regr_unc.txt'
        data = np.loadtxt(os.path.join(output_dir, output_filename_txt), delimiter=',', skiprows=1)

        # boxplot
        # reference: https://matplotlib.org/3.1.1/gallery/statistics/boxplot_demo.html
        bp = ax[i,j].boxplot(data, sym='o')#, labels=labels)
        plt.setp(bp['boxes'], color='black')
        plt.setp(bp['whiskers'], color='black')
        plt.setp(bp['fliers'], color='red', marker='o',markersize=1.2)

        # Add a horizontal grid to the plot, but make it very light in color
        # so we can use it for reading data values but not be distracting
        ax[i,j].yaxis.grid(True, linestyle='-', which='major', color='lightgrey',alpha=0.5)
        ax[i,j].set_axisbelow(True)

        # Due to the Y-axis scale being different across samples, it can be
        # hard to compare differences in medians across the samples. Add upper
        # X-axis tick labels with the sample medians to aid in comparison
        # (just use two decimal places of precision)
        pos = np.arange(scenario_num+1) 
        medians = [(bp['medians'][k]).get_ydata()[0] for k in range(scenario_num)]
        upper_labels = [str(np.round(s, 2)) for s in medians]
        for tick, label in zip(range(scenario_num), ax[i,j].get_xticklabels()):
            k = tick % 2
            ax[i,j].text(pos[tick]+1.2, 0.9, upper_labels[tick],
                     transform=ax[i,j].get_xaxis_transform(),
                     horizontalalignment='center', size='xx-small',
                     fontstyle='italic', color='b') #pos[tick], 1.02

        # set y-axis label
        y_lable = 'Regression uncertainty'
        ax[i,j].set_ylabel(y_lable, fontsize='xx-small')
        if i == nrow-1:
            ax[i,j].set_xlabel('Number of sampled grids', fontsize='xx-small')

#         x_ticks = [str(x) for x in range(1,10)]
#         x_ticks.append('stn_regr')
        x_ticks=[x.replace('grids','') for x in test_folders]
        ax[i,j].set_xticklabels(x_ticks)
        ax[i,j].tick_params(axis='both', direction='out',labelsize = 'xx-small',
                            length=1.5, width=0.5, pad=1.5)       
        # title
        alpha = chr(ord('a') + kk)
        ax[i,j].set_title('('+alpha+') '+var_list[kk], pad=4, fontsize='xx-small', fontweight='semibold') #pad=9

# save plot
fig.tight_layout(pad=1, h_pad=0.8)
fig.savefig(os.path.join(output_dir, output_filename), dpi=dpi_value, bbox_inches = 'tight', pad_inches = 0.05)
plt.close(fig)

print('Done')

Read gridinfo mask
Read regression uncertainty
046grids
 -- read spatial uncertainty
 -- calculate temporal mean
 -- extract unmasked values
099grids
 -- read spatial uncertainty
 -- calculate temporal mean
 -- extract unmasked values
393grids
 -- read spatial uncertainty
 -- calculate temporal mean
 -- extract unmasked values
Save
Precp'
Precp_2'
Tmean
Tmean_2
Trange
Trange_2
Plot
Precp'
Precp_2'
Tmean
Tmean_2
Trange
Trange_2
Done


In [10]:
test_folders

['046grids', '099grids', '393grids']

In [3]:
i,j=392,0
trange_error_mean_arr[i,j],trange_error_2_mean_arr[i,j]

(0.8064803139665934, 0.8240288874999745)

In [7]:
np.nanmedian(trange_error_mean_arr,axis=0),np.nanmedian(trange_error_2_mean_arr,axis=0)

(array([0.8343549 , 0.72911894, 0.51133275]),
 array([0.85072415, 0.75279393, 0.54295079]))

In [5]:
np.shape(trange_error_mean_arr)

(393, 3)