In [None]:
import os
import pandas as pd
import xarray as xr
import datetime
import numpy as np

def read_ens(out_forc_name_base, ens_num):
    for i in range(ens_num):
        ens_file = os.path.join(out_forc_name_base + '.' + str('%03d' % (i+1)) +'.nc')
        
        f=xr.open_dataset(ens_file)
        pcp = f.variables['pcp'][:]
        t_mean = f.variables['t_mean'][:]
        t_range = f.variables['t_range'][:]

        if i == 0:
            lats = f['latitude'].values[:] #shape (y,x)
            lons = f['longitude'].values[:]
            time = pd.DatetimeIndex(f['time'][:].dt.floor('D').to_pandas())
                 
            pcp_ens = np.zeros((np.shape(pcp)[0], np.shape(pcp)[1], np.shape(pcp)[2], ens_num))# create ens array 
            t_mean_ens = np.zeros((np.shape(pcp)[0], np.shape(pcp)[1], np.shape(pcp)[2], ens_num))
            t_range_ens = np.zeros((np.shape(pcp)[0], np.shape(pcp)[1], np.shape(pcp)[2], ens_num))

        pcp_ens[:,:,:,i] = pcp
        t_mean_ens[:,:,:,i] = t_mean
        t_range_ens[:,:,:,i] = t_range
       
    return lats, lons, time, pcp_ens, t_mean_ens, t_range_ens

root_dir = '/glade/u/home/hongli/work/2020_04_21nldas_gmet'   
nldas_dir = os.path.join(root_dir,'data/nldas_daily_utc')
start_yr = 2015
end_yr = 2016

gridinfo_file = os.path.join(root_dir,'scripts/conus_ens_grid_eighth_deg_v1p1.nc')

result_dir = os.path.join(root_dir,'test_uniform')
test_folders = [d for d in os.listdir(result_dir)]
test_folders = sorted(test_folders)

time_format = '%Y-%m-%d'
# ens_date_start = '2015-01-01'
# ens_date_end = '2016-12-31'
# ens_date_start_obj = datetime.datetime.strptime(plot_date_start, time_format)
# ens_date_end_obj = datetime.datetime.strptime(plot_date_end, time_format)

ens_num = 100
ens_ofolder = 'gmet_ens_combine'
ens_ofile_basename = 'ens_forc.2015-2016'
ens_ofolder_bc = 'gmet_ens_combine_bc' # bias correction

output_dir=os.path.join(root_dir,'scripts/step9_bias_correct_ens')
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# #======================================================================================================
# print('Read gridinfo mask')
# # get xy mask from gridinfo.nc
# f_gridinfo = xr.open_dataset(gridinfo_file)
# mask_xy = f_gridinfo['mask'].values[:] # (y, x). 1 is valid. 0 is invalid.

# #======================================================================================================
# # read historical nldas data
# print('Read nldas data')
# for yr in range(start_yr, end_yr+1):
    
#     nldas_file = 'NLDAS_'+str(yr)+'.nc'
#     nldas_path = os.path.join(nldas_dir, nldas_file)
    
#     f_nldas = xr.open_dataset(nldas_path)
#     if yr == start_yr:
#         prcp_avg = f_nldas['prcp_avg'].values[:] # (time, y, x). unit: kg/m^2 = mm
#         tair_min = f_nldas['tair_min'].values[:] # (time, y, x). unit: K
#         tair_max = f_nldas['tair_max'].values[:]
#         time = pd.to_datetime(f_nldas['time'].values[:]).strftime(time_format)
#     else:
#         prcp_avg = np.concatenate((prcp_avg, f_nldas['prcp_avg'].values[:]), axis = 0)
#         tair_min = np.concatenate((tair_min, f_nldas['tair_min'].values[:]), axis = 0)
#         tair_max = np.concatenate((tair_max, f_nldas['tair_max'].values[:]), axis = 0)
#         time = np.concatenate((time, pd.to_datetime(f_nldas['time'].values[:]).strftime(time_format)), axis = 0)
#     f_nldas.close()
 
# # convert unit and calculate mean values
# time_obj = np.asarray([datetime.datetime.strptime(t, time_format) for t in time])
# prcp_sum = np.multiply(prcp_avg[mask_t,:,:], 24.0) # mm/hr to mm/day. (time,y,x)
# tair_min = np.subtract(tair_min[mask_t,:,:], 273.15) # K to degC.
# tair_max = np.subtract(tair_max[mask_t,:,:], 273.15)
# tair_mean = 0.5*(tair_max-tair_min)

for test_folder in test_folders[0:1]:    
    print(test_folder)
    
    # read ensemble output
    output_namebase = os.path.join(result_dir,test_folder,ens_ofolder,ens_ofile_basename)
    ens_lats, ens_lons, ens_time, pcp_ens, tmean_ens, trange_ens = read_ens(output_namebase, ens_num)
        
    # calculate ensemble mean over 100 members
    pcp_ens_mean  = np.nanmean(pcp_ens, axis=3) # (time,y,x)
    tmean_ens_mean = np.nanmean(tmean_ens, axis=3)
        
    # get time mask for nldas data
    nldas_mask_t  = (time_obj >= ens_time[0]) & (time_obj <= ens_time[-1]) 

    # calcualte delta for pcp, tmin, and tmax
    pcp_delta = prcp_sum[nldas_mask_t,:,:] - pcp_ens_mean # (time,y,x)
    tmean_delta = tair_mean[nldas_mask_t,:,:] - tmean_ens_mean
    
    # bias correct ensemble
    pcp_ens_correct = pcp_ens + pcp_delta
    tmean_ens_correct = trange_ens + tmean_delta

#     # save bias-crrected ensemble
#     if not os.path.exists(os.path.join(result_dir,test_folder,ens_ofolder_bc)):
#         os.path.makedirs(os.path.join(result_dir,test_folder,ens_ofolder_bc))
        
#     for m in range(ens_num):
#         NUM = str('%03d' % (i+1))
#         SrcFile = os.path.join(result_dir,test_folder,ens_ofolder,ens_ofile_basename+ '.' + NUM +'.nc')
#         DstFile = os.path.join(result_dir,test_folder,ens_ofolder_bc,ens_ofile_basename+ '.' + NUM +'.nc')
        
#         with nc.Dataset(SrcFile) as src:
#             with nc.Dataset(DstFile, "w") as dst:

#                 # copy dimensions
#                 for name, dimension in src.dimensions.items():
#                      dst.createDimension(
#                         name, (len(dimension) if not dimension.isunlimited() else None))

#                 # copy variable attributes all at once via dictionary (for the included variables)
#                 include = ['pcp', 't_mean']
#                 for name, variable in src.variables.items():
#                     x = dst.createVariable(name, variable.datatype, variable.dimensions)               
#                     dst[name].setncatts(src[name].__dict__)
#                     if not name in include:
#                         dst[name][:]=src[name][:]                

#                 # assign values for variables ([:] is necessary)
#                 dst.variables['time'][:] = nc.date2num(datetime_unique, dst.variables['time'].units)

#                 # create Prcp, Tmin, and Tmax variables 
#                 vars_short = ['tmin','tmax','prcp']
#                 vars_long = ['Minimum daily air temperature', 'Maximum daily air temperature', 'Total daily precipitation']
#                 units = ['degC', 'degC', 'mm/day']

#                 for i, var in enumerate(vars_short):
#                     print(var)

#                     # create
#                     var_i = dst.createVariable(var,np.float64,('time','y','x')) # note: unlimited dimension is leftmost
#                     var_i.long_name = vars_long[i]
#                     var_i.units = units[i] 

#                 dst.variables['tmax'][:] = tmax_daily
#                 dst.variables['tmin'][:] = tmin_daily
#                 dst.variables['prcp'][:] = prcp_daily 
        
# del prcp_avg,tair_min,tair_max
print('Done')    


  PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)
  3: pd.Panel}


00822grids


In [None]:
pcp_ens_mean[0,100:103,100:103],tmean_ens_mean[0,100:103,100:103]