In [25]:
#!/usr/bin/env python
# coding: utf-8


# This script is used to calculate some summary statistics of yearly ensemble.

import numpy as np
import argparse, os
import xarray as xr
import datetime
import netCDF4 as nc

startTime = datetime.datetime.now()

def process_command_line():
    '''Parse the commandline'''
    parser = argparse.ArgumentParser(description='Script to subset a netcdf file based on a list of IDs.')
    parser.add_argument('EnsDirBase', help='parent path of ens data.')
    parser.add_argument('EnsFolder',help='ens folder name.')
    parser.add_argument('yr',help='year.')
    parser.add_argument('startEns',help='start ensemble member id.')
    parser.add_argument('stopEns',help='end ensemble member id.')
    parser.add_argument('lb_perct',help='percentile value for lower bound.')
    parser.add_argument('ub_perct',help='percentile value for upper bound.')
    args = parser.parse_args()
    return(args)

#======================================================================================================
# main script
# # process command line
# args = process_command_line()
# EnsDirBase = args.EnsDirBase
# EnsFolder = args.EnsFolder

# yr=int(args.yr)
# startEns=int(args.startEns)   
# stopEns=int(args.stopEns)  
# ens_num = (stopEns-startEns+1)

# lb_perct=int(args.lb_perct)
# ub_perct=int(args.ub_perct)

EnsDirBase='/glade/u/home/hongli/scratch/2020_04_21nldas_gmet/data'
EnsFolder='stn_ens' 

yr=2015 
startEns=1 
stopEns=3 
ens_num = (stopEns-startEns+1)

lb_perct=5 
ub_perct=95

lb_perct_str = str(lb_perct)
ub_perct_str = str(ub_perct)

EnsDir = os.path.join(EnsDirBase, EnsFolder)
outdir = os.path.join(EnsDirBase, EnsFolder+'_summary')
if not os.path.exists(outdir):
    os.makedirs(outdir)

#=================================================================================
# read ensemble data
print('read ensemble data')
for i in range(ens_num):
    NUM = startEns+i
    ens_file = os.path.join(EnsDir, 'conus_daily_eighth_'+ str(yr) + '0101_' + str(yr) + '1231_'+ str('%03d' % (NUM)) +'.nc4')

    f=xr.open_dataset(ens_file)
    time = f['time'][:]
    pcp = f.variables['pcp'][:]
    tmean = f.variables['t_mean'][:]
    trange = f.variables['t_range'][:]

    if NUM == startEns: # create ens array for one member
        (nt,ny,nx) = np.shape(pcp)
        pcp_ens = np.zeros((nt,ny,nx,ens_num))
        tmean_ens = np.zeros((nt,ny,nx,ens_num))
        trange_ens = np.zeros((nt,ny,nx,ens_num))

    pcp_ens[:,:,:,i] = pcp
    tmean_ens[:,:,:,i] = tmean
    trange_ens[:,:,:,i] = trange

#=================================================================================
# calculate ensemble statistics. (time,y,x)
print('calculate ensemble statistics')
print('pcp')
pcp_ens_mean = np.nanmean(pcp_ens, axis = 3)
pcp_ens_median = np.nanmedian(pcp_ens, axis = 3)
pcp_ens_std = np.std(pcp_ens, axis = 3)
pcp_ens_lb = np.percentile(pcp_ens, lb_perct, axis = 3)
pcp_ens_ub = np.percentile(pcp_ens, ub_perct, axis = 3)
del pcp_ens

print('tmean')
tmean_ens_mean = np.nanmean(tmean_ens, axis = 3)
tmean_ens_median = np.nanmedian(tmean_ens, axis = 3)
tmean_ens_std = np.std(tmean_ens, axis = 3)
tmean_ens_lb = np.percentile(tmean_ens, lb_perct, axis = 3)
tmean_ens_ub = np.percentile(tmean_ens, ub_perct, axis = 3)
del tmean_ens

print('trange')
trange_ens_mean = np.nanmean(trange_ens, axis = 3)
trange_ens_median = np.nanmedian(trange_ens, axis = 3)
trange_ens_std = np.std(trange_ens, axis = 3)
trange_ens_lb = np.percentile(trange_ens, lb_perct, axis = 3)
trange_ens_ub = np.percentile(trange_ens, ub_perct, axis = 3)
del trange_ens

#=================================================================================
#save statistics summary
print('save statistics')
SrcFile=os.path.join(EnsDir, 'conus_daily_eighth_'+ str(yr) + '0101_' + str(yr) + '1231_001.nc4')
with nc.Dataset(SrcFile) as src:   
    DstFile = os.path.join(outdir, 'ens_forc.sumamry.'+ str(yr)+'.nc')
    with nc.Dataset(DstFile, "w") as dst:

        # copy dimensions
        for name, dimension in src.dimensions.items():
             dst.createDimension(
                name, (len(dimension) if not dimension.isunlimited() else None))

        # copy variable attributes all at once via dictionary (for the included variables)
        include = ['latitude', 'longitude', 'time']
        for name, variable in src.variables.items():
            if name in include:
                x = dst.createVariable(name, variable.datatype, variable.dimensions)               
                dst[name].setncatts(src[name].__dict__)
                dst[name][:]=src[name][:]                

        # create summary variables 
        vars_short = ['pcp_mean','pcp_median','pcp_std','pcp_ub','pcp_lb',
                     'tmean_mean','tmean_median','tmean_std','tmean_ub','tmean_lb',
                     'trange_mean','trange_median','trange_std','trange_ub','trange_lb']
        vars_long = ['Mean daily precipitation','Median daily precipitation',
                     'Standard deviation of daily precipitation',
                     ub_perct_str+'th percentile of daily precipitation',
                     lb_perct_str+'th percentile of daily precipitation',
                     'Mean daily temperature', 'Median daily temperature',
                     'Standard deviation of daily mean temperature',
                     ub_perct_str+'th percentile of daily mean temperature',
                     lb_perct_str+'th percentile of daily mean temperature',
                     'Mean daily temperature range', 'Median daily temperature range',
                     'Standard deviation of daily temperature range',
                     ub_perct_str+'th percentile of daily temperature range',
                     lb_perct_str+'th percentile of daily temperature range']
        units = ['mm/day', 'mm/day', 'mm/day', 'mm/day','mm/day',
                 'degC', 'degC', 'degC', 'degC', 'degC',
                 'degC', 'degC', 'degC', 'degC','degC']

        for i, var in enumerate(vars_short):
            var_i = dst.createVariable(var,np.float64,('time','lat','lon')) # note: unlimited dimension is leftmost
            var_i.long_name = vars_long[i]
            var_i.units = units[i] 

        dst.variables['pcp_mean'][:] = pcp_ens_mean
        dst.variables['pcp_median'][:] = pcp_ens_median
        dst.variables['pcp_std'][:] = pcp_ens_std 
        dst.variables['pcp_ub'][:] = pcp_ens_lb
        dst.variables['pcp_lb'][:] = pcp_ens_ub 

        dst.variables['tmean_mean'][:] = tmean_ens_mean
        dst.variables['tmean_median'][:] = tmean_ens_median
        dst.variables['tmean_std'][:] = tmean_ens_std 
        dst.variables['tmean_ub'][:] = tmean_ens_lb
        dst.variables['tmean_lb'][:] = tmean_ens_ub 

        dst.variables['trange_mean'][:] = trange_ens_mean
        dst.variables['trange_median'][:] = trange_ens_median
        dst.variables['trange_std'][:] = trange_ens_std 
        dst.variables['trange_ub'][:] = trange_ens_lb
        dst.variables['trange_lb'][:] = trange_ens_ub 
            
print('Done')
print(datetime.datetime.now()-startTime)


save statistics
Done
0:00:03.067909


In [15]:
np.shape(pcp_ens),np.shape(pcp_ens_mean)

((365, 224, 464, 3), (365, 224, 464))

In [26]:
pcp_ens[0,:,:,0],pcp_ens_mean[0,200,200]

(array([[       nan,        nan,        nan, ...,        nan,        nan,
                nan],
        [       nan,        nan,        nan, ...,        nan,        nan,
                nan],
        [       nan,        nan,        nan, ...,        nan,        nan,
                nan],
        ...,
        [0.1       , 0.1       , 0.17772385, ..., 1.78561664, 0.40108433,
         0.64557612],
        [0.34056649, 0.1       , 0.1555008 , ..., 0.10929642, 0.12778546,
         0.1       ],
        [0.36982411, 1.25603187, 0.37433627, ..., 1.0359112 , 0.40113106,
         0.1       ]]), 3.333457072575887)

In [21]:
(2.5307498+ 2.63094163+ 4.83867979)/3

3.3334570733333333

In [22]:
pcp_ens[0:1,200:203,200:203,:]

array([[[[2.5307498 , 2.63094163, 4.83867979],
         [1.64616776, 1.04680431, 4.50561953],
         [0.96762699, 0.96605194, 2.53194118]],

        [[1.69602013, 1.37978685, 6.34213686],
         [1.21125102, 1.2239244 , 5.67758608],
         [0.99794692, 0.84607494, 3.14887786]],

        [[1.1934967 , 2.14177108, 5.92372131],
         [1.12614596, 1.77862322, 3.73518395],
         [0.65913612, 2.38473797, 4.53950787]]]])