### Save NLDAS forcing daily averages
This script takes hourly NLDAS forcing variables as PFB files (saved on Verde) and computes the daily, (monthly, and yearly) average to save as a NetCDF files.

Inputs:
- Directory where forcing is and directory where you want to save output
- Hourly PFB files of NLDAS3 forcing
- water year and day start/end
- variable that you want to compute and save averages for:  
['Temp', 'APCP', 'DLWR', 'DSWR', 'Press', 'SPFH', 'UGRD', 'VGRD']

Outputs:
- NetCDF files for daily average of each variable

Notes (10/7/22):
- Need to determine when is the daily start and end for US time zone, NLDAS3 forcing is UTC
- Need to add in monthly and yearly averages

In [1]:
import numpy as np
from parflow import Run
import sys
from parflow.tools.io import read_pfb,write_pfb
import xarray as xr

In [2]:
#################################################################################
#these 3 entries (year, day start and day end) will eventually be argv to the script so that it can be run from bash script
water_year = 2003
day_start = 0 #day_start = 0 is the first day of the water year, Oct 1 or 000001_000024
day_end = 2 #day_end = 364 is the final day of the water year, Sept 30 or 008737_to_008760

#list of forcing variables you want
variables_forcing = ['Temp']
#indication whether you want the mean (1) or the sum (0)
variables_forcing_mean = [1]

# directory where hourly forcing files are located
directory_in_forcing = f'/glade/p/univ/ucsm0002/CONUS2/Forcing'

# directory to save averages to
directory_out_forcing = f'/glade/scratch/tijerina/NLDAS_averages/WY{water_year}'
#################################################################################

In [3]:
# ### READING ALL STATIC VARIABLES NEEDED
# nz = 10
# ny = 3256
# nx = 4442

# dx = 1000
# dy = 1000
# dz = 200

# #apparently it's good to use high numbers when saving files to speed up reading?

# ### DO WE NEED THIS? Think we can just submit as a serial process
# ### ALSO, aren't the p, q, r for the dist here? which is set to False
# p = 48
# q = 36
# r = 1

In [3]:
for day in range(day_start,day_end):

    timestamp_day_out = str(int(day+1)).rjust(3, '0')

    #READING FORCING VARIABLES
    #timestamps beginning and ending of the day
    h_start_forcing = str(int(day*24+1)).rjust(6, '0')
    h_end_forcing = str(int((day+1)*24)).rjust(6, '0')
    
    #looping through the forcing variables you want an average for
    for ind_forcing in range(len(variables_forcing)):
        var=variables_forcing[ind_forcing]
        forcing = read_pfb(f'{directory_in_forcing}/WY{water_year}/NLDAS.{var}.{h_start_forcing}_to_{h_end_forcing}.pfb')
        #sum over the 24h and divide by 24
        if variables_forcing_mean[ind_forcing]==1:
            forcing = np.sum(forcing,axis=0)/24
        
        
        
        ######### TRY THIS!!!
        df = xr.DataArray(Data, coords=[('lon', longitude), ('lat', latitude)])
        df.to_netcdf('filename.nc')
        ############# https://stackoverflow.com/questions/59734176/save-np-array-to-netcdf4-files-with-python
        
        nc_filename = f'{directory_out_forcing}/NLDAS.{var}.daily.{timestamp_day_out}.nc'
        print ('saving to ', nc_filename)
        forcing.to_netcdf(path=nc_filename)
        print ('finished saving')

        #write_pfb(f'{directory_out_forcing}/NLDAS.{var}.daily.{timestamp_day_out}.pfb',forcing,dx=dx,dy=dy,dz=dz,P=p,Q=q,R=r,dist=False)
        #print(f'Saving {var}: day {timestamp_day_out}')

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


saving to  /glade/scratch/tijerina/NLDAS_averages/WY2003/NLDAS.Temp.daily.001.nc


AttributeError: 'numpy.ndarray' object has no attribute 'to_netcdf'

# How to save pfbs so they are only 2D??

In [8]:
day1_Temp = np.squeeze(read_pfb(f'{directory_out_forcing}/NLDAS.Temp.daily.001.pfb'))

In [9]:
day1_Temp.shape

(3256, 4442)

In [10]:
day1_Temp[103:108,2185:2190]

array([[        -inf,         -inf,         -inf,         -inf,
                -inf],
       [        -inf, 301.00572713, 300.99767431, 300.99038315,
                -inf],
       [301.02525457, 301.01606623, 301.00750351, 301.00183741,
        301.00152079],
       [301.03162511, 301.02574412, 301.01710002, 301.00990168,
        301.00638072],
       [301.02079391, 301.01398849, 301.00410843, 300.99659348,
        300.99309667]])