In [1]:
from pyproj import Proj
import numpy as np
from datetime import datetime
import os
import netCDF4 as nc
import xarray as xr
import pandas as pd 

root_dir = '/glade/u/home/hongli/work/russian/ens_forc_wrf2/scripts'
asc_dir = os.path.join(root_dir, 'hmet_ascii_data_1day_lead')
asc_files = [f for f in os.listdir(asc_dir) if '.asc' in f]
asc_files = sorted(asc_files)

outfolder = 'step1_asc_to_nc'
if not os.path.exists(os.path.join(root_dir, outfolder)):
    os.makedirs(os.path.join(root_dir, outfolder))
ofile_base = 'WestWRF'
ofile_daily_base = 'WestWRF_daily'

# =========================================================================
# convert UTM projection to WGS lat/lon
print('(1) convert UTM projection to WGS lat/lon')
# read a asc file
with open(os.path.join(asc_dir, asc_files[0]), 'r') as f:
    content = f.readlines()
    for line in content:
        line = line.strip()        
        if line:
            if ('ncols' in line):
                ncols = int(line.split()[1])
                nx = ncols
            elif ('nrows' in line):
                nrows = int(line.split()[1])
                ny = nrows
            elif ('xllcorner' in line):
                xllcorner = float(line.split()[1])
            elif ('yllcorner' in line):
                yllcorner = float(line.split()[1])
            elif ('cellsize' in line):
                cellsize = float(line.split()[1])


# convert projected coordinates to geographic coordinates
y = np.asarray([yllcorner+cellsize*i+cellsize*0.5 for i in range(ny)]) # north, latitude 
x = np.asarray([xllcorner+cellsize*i+cellsize*0.5 for i in range(nx)]) # east, longitude

y_2d = np.repeat(np.reshape(y, (ny,1)), nx, axis=1) #(ny, nx)
x_2d = np.repeat(np.reshape(x, (1,nx)), ny, axis=0) 

p = Proj(proj='utm',zone=10,ellps='WGS84', preserve_units=False)
lons, lats = p(x_2d, y_2d, inverse=True) # inverse transform

# write lat/lon
f=open(os.path.join(root_dir, outfolder,'PointXY.txt'),'w')
f.write('FID,Latitude,Longitude,Y,X\n')
for i in range(ny):
    for j in range(nx):
        f.write('%d,%f,%f,%f,%f\n' % (i*nx+j+1, lats[i,j], lons[i,j], y_2d[i,j], x_2d[i,j]))
f.close()

# write start_lat, end_lat, start_lon, end_lon
start_lon, start_lat = p(xllcorner, yllcorner, inverse=True)
end_lon, end_lat = p(xllcorner+cellsize*nx, yllcorner+cellsize*ny, inverse=True)

f=open(os.path.join(root_dir, outfolder,'PointXY_Bounds.txt'),'w')
f.write('Start_lat,End_lat,Start_lon,End_lon\n')
f.write('%f,%f,%f,%f\n' % (start_lat, end_lat, start_lon, end_lon))

f.write('\n')
f.write('Lat_width,Lon_width\n')
f.write('%f,%f\n' % (end_lat-start_lat, end_lon-start_lon))
f.close()

# =========================================================================
# get time series
print('(2) get time series')
time_str = [f.split('_')[0] for f in asc_files if 'Temp' in f]
time_str = sorted(time_str)
times = [datetime.strptime(t_str,'%Y%m%d%H') for t_str in time_str]

# =========================================================================
# write netcdf   
print('(3) write netcdf')
ofile = ofile_base+'_'+time_str[0]+'_'+time_str[-1]+'.nc'
with nc.Dataset(os.path.join(root_dir, outfolder, ofile), "w") as ncfile:

    # create global attributes
    ncfile.source = 'Hourly forcing from GSSHA West WRF ASCII'

    # create dimensions
    lat_dim = ncfile.createDimension('y', ny) # latitude axis
    lon_dim = ncfile.createDimension('x', nx) # longitude axis
    time_dim = ncfile.createDimension('time', None) # unlimited axis

    # =========================================================================
    # create variables for lat/lon coordinates and time 
    lat = ncfile.createVariable('lat', np.float32, ('y','x'))
    lat.units = 'degrees_north'
    lat.long_name = 'latitude'

    lon = ncfile.createVariable('lon', np.float32, ('y','x'))
    lon.units = 'degrees_east'
    lon.long_name = 'longitude'

    time = ncfile.createVariable('time', np.float64, ('time',))
    time.units = 'hours since 1980-01-01 00:00:00'
    time.long_name = 'time'

    # assign values for variables ([:] is necessary)
    lat[:] = lats
    lon[:] = lons
    time[:] = nc.date2num(times, ncfile.variables['time'].units)

    # =========================================================================
    # create forcing variables [loop variables and loop times]
    vars_short = ['Temp','Clod','Drad','Grad','Pres','RlHm','WndS','Prcp']
    vars_long = ['Temperature', 'Cloud Cover', 'Direct Radiation', 
                 'Global Radiation', 'Pressure', 'Relative Humidity', 'Wind Speed', 'Precipitation']
    units = ['Fahrenheit', 'Percent', 'Watt hour per meter squared',
             'Watt hour per meter squared', 'Inches Hg', 'Percent', 'Knots', 'mm/hr']

    for i, var in enumerate(vars_short):
        print(var)

        # create
        var_i = ncfile.createVariable(var,np.float64,('time','y','x')) # note: unlimited dimension is leftmost
        var_i.long_name = vars_long[i]
        var_i.units = units[i] 

        # write
        var_files = [f for f in asc_files if var in f]
        var_files = sorted(var_files)
        for var_file in var_files:
#             print(var_file)
            time_index = time_str.index(var_file.split('_')[0])
            data = np.loadtxt(os.path.join(asc_dir, var_file), skiprows=5, dtype=float)
            var_i[time_index,:,:] = np.flipud(data)
# ==========================================================================================================
# Convert hourly WRF forcing to daily in order to run GMET (only P and T)
print('(4) write daily netcdf')
time_format_daily = '%Y%m%d'

ofile = ofile_base+'_'+time_str[0]+'_'+time_str[-1]+'.nc'
f_wrf = xr.open_dataset(os.path.join(root_dir, outfolder, ofile))
time = pd.to_datetime(f_wrf['time'].values[:]).strftime(time_format_daily) #yyyy-mm-dd
prcp_hour = f_wrf['Prcp'].values[:] # (time, lat, lon). unit: mm/hr
temp_hour_F = f_wrf['Temp'].values[:] # (time, lat, lon). unit: F

# use mask when converting from F to C
(nt_hour,ny,nx)=np.shape(temp_hour_F)
mask = (np.ones((ny,nx))!=1)
mask[0:4,0]=True
mask[0,-1]=True
mask[-2,0:2]=True
mask[-1,0:10]=True
mask[-8:,-1]=True
mask = np.flipud(mask)
mask = np.repeat(mask[np.newaxis,:,:],nt_hour,axis=0)

temp_hour_F_mask = np.ma.array(temp_hour_F, mask=mask)
temp_hour_C_mask = np.multiply(np.subtract(temp_hour_F_mask, 32), 5.0/9.0) # F to C

# daily datetime
time_str_unique = np.unique(time)
datetime_unique = [datetime.strptime(t, time_format_daily) for t in time_str_unique]
date_len = len(time_str_unique)

# convert hourly to daily
[hr_len, ny, nx] = np.shape(prcp_hour)
prcp_daily = np.zeros((date_len, ny, nx))
tmin_daily = np.zeros((date_len, ny, nx))
tmax_daily = np.zeros((date_len, ny, nx))

for i, d in enumerate(time_str_unique):
    index=[i for i in range(len(time)) if time[i]==d]
    prcp_daily[i,:,:] = np.nansum(prcp_hour[index,:,:], axis=0)
    tmin_daily[i,:,:] = (temp_hour_C_mask[index,:,:].min(axis=0)).filled(fill_value=0)
    tmax_daily[i,:,:] = (temp_hour_C_mask[index,:,:].max(axis=0)).filled(fill_value=0)

# save daily output
ofile_daily = ofile_daily_base+'_'+time_str_unique[0]+'_'+time_str_unique[-1]+'.nc'

with nc.Dataset(os.path.join(root_dir, outfolder, ofile)) as src:
    with nc.Dataset(os.path.join(root_dir, outfolder, ofile_daily), "w") as dst:

        # create global attributes
        dst.description = 'Daily WRF derived from ERDC hourly ascii forcing.'

        # copy dimensions
        for name, dimension in src.dimensions.items():
             dst.createDimension(
                name, (len(dimension) if not dimension.isunlimited() else None))

        # copy variable attributes all at once via dictionary (for the included variables)
        include = ['lat', 'lon', 'time']
        for name, variable in src.variables.items():
            if name in include:
                x = dst.createVariable(name, variable.datatype, variable.dimensions)               
                dst[name].setncatts(src[name].__dict__)
                if name!='time':
                    dst[name][:]=src[name][:]                

        # assign values for variables ([:] is necessary)
        dst.variables['time'][:] = nc.date2num(datetime_unique, dst.variables['time'].units)

        # create Prcp, Tmin, and Tmax variables 
        vars_short = ['tmin','tmax','prcp']
        vars_long = ['Minimum daily air temperature', 'Maximum daily air temperature', 'Total daily precipitation']
        units = ['degC', 'degC', 'mm/day']

        for i, var in enumerate(vars_short):
            print(var)

            # create
            var_i = dst.createVariable(var,np.float64,('time','y','x')) # note: unlimited dimension is leftmost
            var_i.long_name = vars_long[i]
            var_i.units = units[i] 

        dst.variables['tmax'][:] = tmax_daily
        dst.variables['tmin'][:] = tmin_daily
        dst.variables['prcp'][:] = prcp_daily 
        
print('Done')


  PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)


(1) convert UTM projection to WGS lat/lon
(2) get time series
(3) write netcdf
Temp
Clod
Drad
Grad
Pres
RlHm
WndS
Prcp
(4) write daily netcdf
tmin
tmax
prcp
Done
