In [None]:
import os
import datetime
import numpy as np
import xarray as xr
import pandas as pd
import netCDF4 as nc

root_dir = '/glade/u/home/hongli/work/2020_04_21nldas_gmet'   
nldas_dir = os.path.join(root_dir,'data/nldas_daily_utc')
stnlist_dir = os.path.join(root_dir, 'scripts/step4_sample_stnlist')
stnlist_name_base = 'stnlist'

time_format='%Y-%m-%d'
start_yr = 2015
end_yr = 2016

time_format='%Y-%m-%d'
extract_start_date = '2015-01-01'
extract_end_date   = '2016-12-31'

outfolder = 'scripts/step2_prepare_stndata'
if not os.path.exists(os.path.join(root_dir, outfolder)):
    os.makedirs(os.path.join(root_dir, outfolder))
outnc_tpl = os.path.join(root_dir, 'scripts/stn_data_tpl.nc')

# ============================================================================================================
# read historical nldas 
print('Read nldas data')
for yr in range(start_yr, end_yr+1):
    
    nldas_file = 'NLDAS_'+str(yr)+'.nc'
    nldas_path = os.path.join(nldas_dir, nldas_file)
    
    f_nldas = xr.open_dataset(nldas_path)
    if yr == start_yr:
        prcp_avg = f_nldas['prcp_avg'].values[:] # (time, lat, lon). unit: kg/m^2 = mm
        tair_min = f_nldas['tair_min'].values[:] # (time, lat, lon). unit: K
        tair_max = f_nldas['tair_max'].values[:]
        time = pd.to_datetime(f_nldas['time'].values[:]).strftime(time_format)
    else:
        prcp_avg = np.concatenate((prcp_avg, f_nldas['prcp_avg'].values[:]), axis = 0)
        tair_min = np.concatenate((tair_min, f_nldas['tair_min'].values[:]), axis = 0)
        tair_max = np.concatenate((tair_max, f_nldas['tair_max'].values[:]), axis = 0)
        time = np.concatenate((time, pd.to_datetime(f_nldas['time'].values[:]).strftime(time_format)), axis = 0)
    f_nldas.close()
    
prcp_sum = np.multiply(prcp_avg, 24.0) #mm/hr to mm/day
tair_min = np.subtract(tair_min, 273.15)
tair_max = np.subtract(tair_max, 273.15)

# nldas mask on the time dimension
time_obj = np.asarray([datetime.datetime.strptime(t, time_format) for t in time])
start_date_obj = datetime.datetime.strptime(extract_start_date, time_format)
end_date_obj = datetime.datetime.strptime(extract_end_date, time_format)
nldas_mask  = (time_obj >= start_date_obj) & (time_obj <= end_date_obj) 

# ============================================================================================================
# write point output one-by-one
print('Write')
stnlist_files = [f for f in os.listdir(stnlist_dir) if stnlist_name_base in f]
stnlist_files = sorted(stnlist_files)

include = ['GHCND_id', 'elevation', 'latitude', 'longitude', 'prcp', 'time', 'tmax', 'tmin']

with nc.Dataset(outnc_tpl) as src:
    for stnlist_file in stnlist_files:
#     for stnlist_file in stnlist_files[0:1]:
        
        # create sub-outfolder
        sub_folder = 'stndata_'+(stnlist_file.split('.')[0].split('_')[1])
        if not os.path.exists(os.path.join(root_dir, outfolder, sub_folder)):
            os.makedirs(os.path.join(root_dir, outfolder, sub_folder)) 
        print(sub_folder)

        # read selected stn list 
        stn_ids = np.loadtxt(os.path.join(stnlist_dir,stnlist_file), skiprows=2, usecols=[0], delimiter=',', dtype='str') # STA_ID[0], LAT[1], LON[2] ELEV[3], SLP_N[4], SLP_E[5], STA_NAME[6]
        stnlist = np.loadtxt(os.path.join(stnlist_dir,stnlist_file), skiprows=2, usecols=[1,2,3,4,5], delimiter=',') 
        stn_num = len(stn_ids)

        for i in range(stn_num):
            stn_id = stn_ids[i]    
            stn_lat_id = int(stn_id[3:3+3]) #start from zero
            stn_lon_id = int(stn_id[9:9+3]) #start from zero
#             print(stn_id)

            with nc.Dataset(os.path.join(root_dir, outfolder, sub_folder, stn_id+'.nc'), "w") as dst:

                # copy dimensions
                for name, dimension in src.dimensions.items():
                     dst.createDimension(
                        name, (len(dimension) if not dimension.isunlimited() else None))

                # copy variable attributes all at once via dictionary (for the included variables)
                for name, variable in src.variables.items():
                    if name in include:
                        x = dst.createVariable(name, variable.datatype, variable.dimensions)               
                        dst[name].setncatts(src[name].__dict__)

                # assign values for variables ([:] is necessary)
                dst.variables['GHCND_id'][:] = nc.stringtochar(np.array([stn_id], dtype='S'))
                dst.variables['latitude'][:] = stnlist[i,0]
                dst.variables['longitude'][:] = stnlist[i,1]
                dst.variables['elevation'][:] = stnlist[i,2]

                dst.variables['time'][:] = nc.date2num(time_obj[nldas_mask], dst.variables['time'].units)
                dst.variables['tmax'][:] = tair_max[nldas_mask,stn_lat_id, stn_lon_id]
                dst.variables['tmin'][:] = tair_min[nldas_mask,stn_lat_id, stn_lon_id]
                dst.variables['prcp'][:] = prcp_sum[nldas_mask,stn_lat_id, stn_lon_id]          
# del prcp_sum, tair_min, tair_max
print('Done')


read nldas data
Write
stndata_00822grids
stndata_00986grids


In [3]:
stn_id

'Row0Col110'