In [12]:
#!/usr/bin/env python
# coding: utf-8

# This script is used to compare ensemble outputs with NLDAS data
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import os,scipy
import pandas as pd
import xarray as xr
import datetime

def read_nldas_regr(out_forc_name_base, start_yr, end_yr):
    for yr in range(start_yr, end_yr+1):        
        
        file = os.path.join(out_forc_name_base + '.' + str(yr) + '.nc')
        f=xr.open_dataset(file)
        time = f['time'][:]
        pcp_error_update = f.variables['pcp_error_update'][:]
        pcp_error_2_update = f.variables['pcp_error_2_update'][:]
        
        if yr == start_yr:
            time_concat = time
            pcp_error_concat = pcp_error_update
            pcp_error_2_concat = pcp_error_2_update
        else:
            time_concat = np.concatenate((time_concat,time), axis=0) # (time)
            pcp_error_concat = np.concatenate((pcp_error_concat, pcp_error_update), axis=0) # (time,y,x)
            pcp_error_2_concat = np.concatenate((pcp_error_2_concat, pcp_error_2_update), axis=0) 
            
    time_concat = pd.DatetimeIndex(time_concat)
        
    return time_concat, pcp_error_concat, pcp_error_2_concat

def read_ens(out_forc_name_base, metric, start_yr, end_yr):
    for yr in range(start_yr, end_yr+1):        
        
        file = os.path.join(out_forc_name_base + '.' + str(yr) + '.'+metric+'.nc')
        f=xr.open_dataset(file)
        time = f['time'][:]
        pcp = f.variables['pcp'][:]
        tmean = f.variables['t_mean'][:]
        tmin = f.variables['t_min'][:]
        tmax = f.variables['t_max'][:]
        trange = f.variables['t_range'][:]
        
        if yr == start_yr:
            time_concat = time
            pcp_concat = pcp
            tmean_concat = tmean
            tmin_concat = tmin
            tmax_concat = tmax
            trange_concat = trange
        else:
            time_concat = np.concatenate((time_concat,time), axis=0) # (time)
            pcp_concat = np.concatenate((pcp_concat, pcp), axis=0) # (time,y,x)
            tmean_concat = np.concatenate((tmean_concat, tmean), axis=0)
            tmin_concat = np.concatenate((tmin_concat, tmin), axis=0)
            tmax_concat = np.concatenate((tmax_concat, tmax), axis=0)
            trange_concat = np.concatenate((trange_concat, trange), axis=0)
            
    time_concat = pd.DatetimeIndex(time_concat)
        
    return time_concat, pcp_concat, tmean_concat, tmin_concat, tmax_concat, trange_concat

#======================================================================================================
# main script
root_dir = '/glade/u/home/hongli/scratch/2020_04_21nldas_gmet'   
stn_ens_dir = os.path.join(root_dir,'data/stn_ens_summary')
nldas_dir = os.path.join(root_dir,'data/nldas_daily_utc_convert')
start_yr = 2013
end_yr = 2016

gridinfo_file = os.path.join(root_dir,'data/nldas_topo/conus_ens_grid_eighth.nc')

result_dir = os.path.join(root_dir,'test_uniform_perturb')
test_folders = [d for d in os.listdir(result_dir)]
test_folders = sorted(test_folders)
scenarios_ids = range(0,9) 

time_format = '%Y-%m-%d'
plot_date_start = '2013-01-01'
plot_date_end = '2016-12-31'
plot_date_start_obj = datetime.datetime.strptime(plot_date_start, time_format)
plot_date_end_obj = datetime.datetime.strptime(plot_date_end, time_format)

dpi_value = 150
output_dir=os.path.join(root_dir, 'scripts/step27_plot_unctainty_DOY_pcp')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
# #======================================================================================================
# print('Read gridinfo mask')
# # get xy mask from gridinfo.nc
# f_gridinfo = xr.open_dataset(gridinfo_file)
# mask_xy = f_gridinfo['mask'].values[:] # (y, x). 1 is valid. 0 is invalid.

# #======================================================================================================
# # read historical nldas data
# print('Read nldas data')
# for yr in range(start_yr, end_yr+1):
    
#     nldas_file = 'NLDAS_'+str(yr)+'.nc'
#     nldas_path = os.path.join(nldas_dir, nldas_file)
    
#     f_nldas = xr.open_dataset(nldas_path)
#     if yr == start_yr:
#         pcp = f_nldas['pcp'].values[:] # (time, y, x). unit: mm/day
#         time = f_nldas['time'].values[:]
#     else:
#         pcp = np.concatenate((pcp, f_nldas['pcp'].values[:]), axis = 0)
#         time = np.concatenate((time, f_nldas['time'].values[:]), axis = 0)

# # get time mask from nldas data
# time_obj = pd.to_datetime(time)
# mask_t  = (time_obj >= plot_date_start_obj) & (time_obj <= plot_date_end_obj) 
# time_nldas = time_obj[mask_t]

# nt_nldas = len(time_nldas)
# mask_xy_3d_nldas = np.repeat(mask_xy[np.newaxis,:,:],nt_nldas,axis=0)

# pcp = pcp[mask_xy_3d_nldas!=0]    
# pcp = pcp.reshape((nt_nldas,-1))

# # calculate DOY (day of year) mean IQR    
# df_nlds = pd.DataFrame(pcp)    
# time_month = [t.month for t in time_nldas]
# time_day = [t.day for t in time_nldas]
# df_nlds['month']=time_month
# df_nlds['date']=time_day  
# df_nlds2 = df_nlds.groupby(['month','date']).mean()

# del pcp

# #======================================================================================================
# # read scenario regression results 
# print('Read regression uncertainty')
# k=7-1
# test_folder = test_folders[scenarios_ids[k]]

# print(test_folder)
# test_dir = os.path.join(result_dir, test_folder)
# fig_title= test_folder

# # read
# nldas_regr_dir = os.path.join(root_dir,'test_uniform_perturb',test_folder,'gmet_regr')
# output_namebase = os.path.join(nldas_regr_dir,'regress_ts')
# time_regr, pcp_error, pcp_error_2 = read_nldas_regr(output_namebase, start_yr, end_yr)

# # define plot mask for nldas regr
# mask_regr_t = (time_regr>=plot_date_start_obj) & (time_regr<=plot_date_end_obj)
# time_regr = time_regr[mask_regr_t]

# nt_regr = len(time_regr)
# mask_xy_3d_regr = np.repeat(mask_xy[np.newaxis,:,:],nt_regr,axis=0)

# pcp_error = pcp_error[mask_xy_3d_regr!=0]    
# pcp_error_2 = pcp_error_2[mask_xy_3d_regr!=0]    

# # reshape
# # reshpae (nt,ny,nx) -> (nt,ny*nx)
# pcp_error = pcp_error.reshape((nt_regr,-1))
# pcp_error_2 = pcp_error_2.reshape((nt_regr,-1))

##======================================================================================================    
# plot
print('Plot')
for k in range(2):
    if k == 0:
        data = pcp_error 
        output_filename = 'step27_plot_unctainty_DOY_pcp.png'
        print('pcp')
    else:
        data = pcp_error_2
        output_filename = 'step27_plot_unctainty_DOY_pcp_2.png'
        print('pcp_2')

    var_list = ['Precp (when NLDAS = 0)', 'Precp (when NLDAS ≠ 0)']
    var_units = ['(mm/d)','(mm/d)']
    
    # plot each varaiable seperately
    nrow = len(var_list) 
    ncol = 1           
    fig, ax = plt.subplots(nrow, ncol, figsize=(3.54,3.54*0.75))

    c_iqr = 'b' #'tab:blue'
    c_nldas = 'tab:red'

    for i in range(nrow):
        print(var_list[i])

        # calculate DOY (day of year) mean IQR    
        df = pd.DataFrame(data)    
        time_month = [t.month for t in time_regr]
        time_day = [t.day for t in time_regr]
        df['month']=time_month
        df['date']=time_day    
        df2 = df.groupby(['month','date']).mean()

        if i == 0:
            df3 = df2[df_nlds2==0]
        else:
            df3 = df2[df_nlds2!=0]

        # vmin and vmax
        vmin = np.nanmin(df3)
        vmax = np.nanmax(df3) #np.nanmax(data) #np.percentile(data,75)

        unc_doy = np.nanmean(df3,axis=1) #[DOY,1]

        # plot uncertainty
        ax[i].plot(np.arange(1,1+len(unc_doy)),unc_doy, color=c_iqr, marker='s', 
                   linewidth=0.5, markersize=1, markeredgecolor='none', alpha=0.7)

        # limit
        ax[i].set_xlim(1,len(unc_doy))

        # label
        if i == nrow-1:
            xlabel = 'Day of Year (DOY) '
            ax[i].set_xlabel(xlabel, fontsize='xx-small')
        ax[i].set_ylabel('Uncertainty', fontsize='xx-small') #+var_units[i]

        # title
        alpha = chr(ord('a') + i)
        ax[i].set_title('('+alpha+') '+var_list[i], fontsize='xx-small', fontweight='semibold')

        # tick
        ax[i].tick_params(axis='both', direction='out',labelsize = 'xx-small',
                          length=1, width=0.5, pad=1.5, labelcolor='k')

        # change subplot border width
        for axis in ['top','bottom','left','right']:
            ax[i].spines[axis].set_linewidth(0.5)

    # save plot
    fig.tight_layout(pad=0.1, h_pad=0.5) 
    fig.savefig(os.path.join(output_dir, output_filename), dpi=dpi_value,
                bbox_inches = 'tight', pad_inches = 0.05)
    plt.close(fig)
print('Done')


Plot
pcp
Precp (when NLDAS = 0)
Precp (when NLDAS ≠ 0)
pcp_2
Precp (when NLDAS = 0)
Precp (when NLDAS ≠ 0)
Done


In [10]:
np.shape(unc_doy),np.shape(df3)

((366,), (366, 80439))

In [None]:
unc_doy = np.nanmean(df3,axis=1)