In [1]:
import os
import datetime
import numpy as np
import xarray as xr
import pandas as pd
import netCDF4 as nc

root_dir = '/glade/u/home/hongli/work/russian/ens_forc_wrf2'
    
wrf_raw_file = os.path.join(root_dir,'scripts/step1_asc_to_nc/WestWRF_daily_20171201_20180408.nc')
stnlist_dir = os.path.join(root_dir, 'scripts/step4_sample_stnlist_random')
stnlist_name_base = 'stnlist'
outnc_tpl = os.path.join(root_dir, 'scripts/stn_data_tpl.nc')
outfolder = 'scripts/step5_prepare_stndata_random'

time_format='%Y-%m-%d'
extract_start_date = '2017-12-02'
extract_end_date   = '2018-04-07'

# ============================================================================================================
# read historical WRF netcdf files
print('read WRF data')
f_wrf = xr.open_dataset(wrf_raw_file)
prcp = f_wrf['prcp'].values[:] # (time, lat, lon). unit:mm/day
tmin = f_wrf['tmin'].values[:] # (time, lat, lon). unit: degC
tmax = f_wrf['tmax'].values[:] 
time = pd.to_datetime(f_wrf['time'].values[:]).strftime(time_format)
f_wrf.close()
    
# wrf mask on the time dimension
time_obj = np.asarray([datetime.datetime.strptime(t, time_format) for t in time])
start_date_obj = datetime.datetime.strptime(extract_start_date, time_format)
end_date_obj = datetime.datetime.strptime(extract_end_date, time_format)
wrf_mask  = (time_obj >= start_date_obj) & (time_obj <= end_date_obj) 

# ============================================================================================================
print('write')
stnlist_files = [f for f in os.listdir(stnlist_dir) if stnlist_name_base in f]
stnlist_files = sorted(stnlist_files)
with nc.Dataset(outnc_tpl) as src:
    for stnlist_file in stnlist_files:
        
        # create sub-outfolder
        sub_folder = 'stndata_'+(stnlist_file.split('_')[-1]).split('.')[0]
        if not os.path.exists(os.path.join(root_dir, outfolder, sub_folder)):
            os.makedirs(os.path.join(root_dir, outfolder, sub_folder)) 
        print(sub_folder)

        # read selected wrf list 
        stn_ids = np.loadtxt(os.path.join(stnlist_dir,stnlist_file), skiprows=2, usecols=[0], delimiter=',', dtype='str') # STA_ID[0], LAT[1], LON[2] ELEV[3], SLP_N[4], SLP_E[5], STA_NAME[6]
        stnlist = np.loadtxt(os.path.join(stnlist_dir,stnlist_file), skiprows=2, usecols=[1,2,3,4,5], delimiter=',') 
        stn_num = len(stn_ids)

        # write point output one-by-one
        include = ['GHCND_id', 'elevation', 'latitude', 'longitude', 'prcp', 'time', 'tmax', 'tmin']

        for i in range(stn_num):
            stn_id = stn_ids[i]    
            stn_lat_id = int(stn_id[3:3+3]) #start from zero
            stn_lon_id = int(stn_id[9:9+3]) #start from zero

            with nc.Dataset(os.path.join(root_dir, outfolder, sub_folder, stn_id+'.nc'), "w") as dst:

                # create global attributes
                dst.description = 'Daily WRF derived from ERDC hourly ascii forcing.'
                
                # copy dimensions
                for name, dimension in src.dimensions.items():
                     dst.createDimension(
                        name, (len(dimension) if not dimension.isunlimited() else None))

                # copy variable attributes all at once via dictionary (for the included variables)
                for name, variable in src.variables.items():
                    if name in include:
                        x = dst.createVariable(name, variable.datatype, variable.dimensions)               
                        dst[name].setncatts(src[name].__dict__)

                # assign values for variables ([:] is necessary)
                dst.variables['GHCND_id'][:] = nc.stringtochar(np.array([stn_id], dtype='S'))
                dst.variables['latitude'][:] = stnlist[i,0]
                dst.variables['longitude'][:] = stnlist[i,1]
                dst.variables['elevation'][:] = stnlist[i,2]

                dst.variables['time'][:] = nc.date2num(time_obj[wrf_mask], dst.variables['time'].units)
                dst.variables['tmax'][:] = tmax[wrf_mask,stn_lat_id, stn_lon_id]
                dst.variables['tmin'][:] = tmin[wrf_mask,stn_lat_id, stn_lon_id]
                dst.variables['prcp'][:] = prcp[wrf_mask,stn_lat_id, stn_lon_id]          
print('Done')

  PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)


read WRF data
write
stndata_010percent
stndata_020percent
stndata_030percent
stndata_040percent
stndata_050percent
stndata_060percent
stndata_070percent
stndata_080percent
stndata_090percent
stndata_100percent
Done
