### Process and save ParFlow daily averages
This script takes hourly PF outputs as PFB files and computes the daily, (monthly, and yearly) averages to be saved as PFB files.

Inputs:
- Directory where PF outputs are and directory where you want to save output
- Hourly PFB files of PF outputs
- water year and day start/end

Outputs:
- PFB files for daily average of each variable:  
    - Overland flow at each grid cell (flow)
    - Soil moisture (SM)
    - Water table depth (WTd)

  - Subsurface Storage
    - Total Subsurface Storage (SUBstorage)
    - GW storage (GWstorage)
    - Soil moisture storage (SMstorage)

  - Total Water Storage
    - Surface water storage (Surf_Wat)
    - Total water storage (TWS)
    
Notes (10/21/22):
- Need to determine when is the daily start and end for US time zone, NLDAS3 forcing is UTC
- Need to add in monthly and yearly averages
- 

In [1]:
import numpy as np
from parflow import Run
import sys
from parflow.tools.io import read_pfb,write_pfb
import parflow.tools.hydrology as hydro

In [2]:
#NCLMOUTPUTS = 13 + 4 #13 (number variables) + number of layers over which CLM is active, NZ root

#these 3 entries (year, day start and day end) will eventually be argv to the script so that it can be run from bash script
water_year = 2003
day_start = 0 #day_start = 0 is the first day of the water year, Oct 1 (e.g., day_start = 2 starts at hour 49)
day_end = 3 #day_end = 364 is the final day of the water year, Sept 30

# water_year = int(sys.argv[1])
# day_start = int(sys.argv[1])
# day_end = int(sys.argv[1])

# path to PF outputs and CLM outputs
path_outputs = '/glade/scratch/tijerina/CONUS2/spinup_WY2003/run_inputs/' #f'/WY{water_year}/'


runname = 'spinup.wy2003' #f'CONUS2_{water_year}'

# directory to save averages to
directory_out = f'/glade/scratch/tijerina/CONUS2/spinup_WY2003/averages'
#directory_out_forcing = f'where_you_want_forcing_averages_saved'

In [3]:
run = Run.from_definition(f'{path_outputs}/{runname}.pfidb')
data = run.data_accessor

porosity = data.computed_porosity 
specific_storage = data.specific_storage 
mannings = data.mannings

## remove input filenames for TopoSlopes to force the data accessor to read the output slopes
## this fixes a windows issue
run.TopoSlopesX.FileName = None
run.TopoSlopesY.FileName = None

slopex = data.slope_x 
slopey = data.slope_y 
mask = data.mask

# formatting the mask so that values outside the domain are NA and inside the domain are 1
# check with mask that has 0 and 1
active_mask=mask.copy()
active_mask[active_mask > 0] = 1

Solver: Field BinaryOutDir is not part of the expected schema <class 'parflow.tools.database.generated.Solver'>
Solver.OverlandKinematic: Field SeepageOne is not part of the expected schema <class 'parflow.tools.database.generated.OverlandKinematic'>
Solver.OverlandKinematic: Field SeepageTwo is not part of the expected schema <class 'parflow.tools.database.generated.OverlandKinematic'>
 => Error during CLM import - CLM specific key have been skipped


In [4]:
###READING ALL STATIC VARIABLES NEEDED
# Read in porosity data
#porosity = read_pfb(f'{path_outputs}{runname}.out.porosity.pfb')
#...
#etc.

#nz,ny,nx = porosity.shape()

nz = 10
ny = 3256
nx = 4442

dx = 1000
dy = 1000
dz = 200
dz_3d = data.dz

# apparently it's good to use high numbers when saving files to speed up reading?
# for write_pfb function
p = 72
q = 48
r = 1

data.time
# #list of clm variables you want
# variables_clm = ['eflx_lh_tot','qflx_evap_grnd','qflx_tran_veg','swe_out','t_grnd','t_soil']
# #indication whether you want the mean (1) or the sum (0)
# variables_clm_mean = [0,0,0,1,1,1]

# ALL_CLM = ['eflx_lh_tot','eflx_lwrad_out','eflx_sh_tot','eflx_soil_grnd','qflx_evap_tot','qflx_evap_grnd','qflx_evap_soi','qflx_evap_veg','qflx_tran_veg','qflx_infl','swe_out','t_grnd','qflx_qirr','t_soil']

0

In [9]:
porosity.shape

(10, 3256, 4442)

In [10]:
active_mask.shape

(10, 3256, 4442)

In [11]:
for day in range(day_start,day_end):

    timestamp_day_out = str(int(day+1)).rjust(3, '0')

    ##INITIALIZE WHATEVER DYNAMIC VARIABLES THAT NEED HOURLY READING
    soil_moisture = np.zeros((nz,ny,nx))
    subsurface_storage = np.zeros((nz,ny,nx))
    surface_storage = np.zeros((ny,nx))
    wtd = np.zeros((ny, nx)) 
    et = np.zeros((ny,nx)) 
    #overland_flow = np.zeros((ny, nx)) 
    
    #if not variables_clm == False:
    #    clm_output = np.zeros((NCLMOUTPUTS,ny,nx))
    for h in range(day*24+1,(day+1)*24+1):
        timestamp_reading = str(int(h)).rjust(5, '0')
        
        #read pressure and saturation at timestep 
        saturation = read_pfb(f'{path_outputs}{runname}.out.satur.{timestamp_reading}.pfb') * active_mask
#        pressure = read_pfb(f'{path_outputs}{runname}.out.press.{timestamp_reading}.pfb') * active_mask
        print(f'reading {path_outputs}{runname}.out at time {timestamp_reading}')
        
        ################### 
        # Computations
        ###################
        
        #Soil Moisture
        soil_moisture += saturation * porosity
        
        # Subsurface Storage
#        subsurface_storage += hydro.calculate_subsurface_storage(porosity, pressure, saturation, specific_storage, dx, dy, dz_3d, mask = active_mask)
        
        # Surface Storage
        ## total surface storage for this time step is the summation of substorage surface across all x/y slices <-- from other script, is this still TRUE??
#        surface_storage += hydro.calculate_surface_storage(pressure, dx, dy, mask = active_mask)
        
        # Water Table Depth
#        wtd = hydro.calculate_water_table_depth(pressure, saturation, dz_3d)
        
        # Flow [m^3/h]
#        overland_flow = hydro.calculate_overland_flow_grid(pressure, slopex, slopey, mannings, dx, dy, mask = active_mask)
        
        

        #CLM Variables
        #clm_output += read_pfb(f'{path_outputs}{runname}.out.clm_output.{timestamp_reading}.C.pfb')

    ### compute average for average variables
    soil_moisture /= 24
    subsurface_storage /= 24
    surface_storage /= 24
    wtd /= 24 # CHANGE THIS TO BE ACCUMULATED?? 10/7/22
    overland_flow /= 24


    subsurface[active_mask==0]=-10**(38)
    ### SAVE VARIABLES AS PFB FILES
    write_pfb(f'{directory_out}/SM.{water_year}.daily.{timestamp_day_out}.pfb',soil_moisture,dx=dx,dy=dy,dz=dz,P=p,Q=q,R=r,dist=False)
    # IS THIS 'GWstore' OR IS IT 'storage'
    write_pfb(f'{directory_out}/GWstorage.{water_year}.daily.{timestamp_day_out}.pfb',subsurface_storage,dx=dx,dy=dy,dz=dz,P=p,Q=q,R=r,dist=False)
    write_pfb(f'{directory_out}/surf_wat.{water_year}.daily.{timestamp_day_out}.pfb',surface_storage,dx=dx,dy=dy,dz=dz,P=p,Q=q,R=r,dist=False)
    write_pfb(f'{directory_out}/WTd.{water_year}.daily.{timestamp_day_out}.pfb',wtd,dx=dx,dy=dy,dz=dz,P=p,Q=q,R=r,dist=False)
    write_pfb(f'{directory_out}/flow.{water_year}.daily.{timestamp_day_out}.pfb',overland_flow,dx=dx,dy=dy,dz=dz,P=p,Q=q,R=r,dist=False)


reading /glade/scratch/tijerina/CONUS2/spinup_WY2003/run_inputs/output-pf/spinup.wy2003.out at time 00049
reading /glade/scratch/tijerina/CONUS2/spinup_WY2003/run_inputs/output-pf/spinup.wy2003.out at time 00050
reading /glade/scratch/tijerina/CONUS2/spinup_WY2003/run_inputs/output-pf/spinup.wy2003.out at time 00051
reading /glade/scratch/tijerina/CONUS2/spinup_WY2003/run_inputs/output-pf/spinup.wy2003.out at time 00052
reading /glade/scratch/tijerina/CONUS2/spinup_WY2003/run_inputs/output-pf/spinup.wy2003.out at time 00053
reading /glade/scratch/tijerina/CONUS2/spinup_WY2003/run_inputs/output-pf/spinup.wy2003.out at time 00054
reading /glade/scratch/tijerina/CONUS2/spinup_WY2003/run_inputs/output-pf/spinup.wy2003.out at time 00055
reading /glade/scratch/tijerina/CONUS2/spinup_WY2003/run_inputs/output-pf/spinup.wy2003.out at time 00056
reading /glade/scratch/tijerina/CONUS2/spinup_WY2003/run_inputs/output-pf/spinup.wy2003.out at time 00057
reading /glade/scratch/tijerina/CONUS2/spinup_

In [37]:
 ### SAVE VARIABLES AS NETCDF FILES
try:nc_file.close()  # just to be safe, make sure dataset is not already open.
except: pass
#nc_filename = f'{directory_out}/SM.{water_year}.daily.{timestamp_day_out}.nc'
nc_filename = f'{directory_out}/SM.{water_year}.daily.999.nc'
nc_file = nc.Dataset(nc_filename, 'w', format='NETCDF4')


In [38]:
print(nc_file)


<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): 
    variables(dimensions): 
    groups: 


In [39]:
# define dimensions for nc file
ny_dim = nc_file.createDimension('ny', ny)
nx_dim = nc_file.createDimension('nx', nx)
time_dim = nc_file.createDimension('time', None)


In [40]:
### DONT NEED THIS PRINT STATEMENT
for dim in nc_file.dimensions.items():
    print(dim)

('ny', <class 'netCDF4._netCDF4.Dimension'>: name = 'ny', size = 3256)
('nx', <class 'netCDF4._netCDF4.Dimension'>: name = 'nx', size = 4442)
('time', <class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'time', size = 0)


In [41]:
nc_file.title='Test Data'
print(nc_file.title)

Test Data


In [42]:
print(time_dim)

<class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'time', size = 0


In [43]:
# add nc variables
time = nc_file.createVariable('time', np.float32, ('time_dim',))
# y = nc_file.createVariable('y', 'int', ('y',))
# x = nc_file.createVariable('x', 'int', ('x',))
# value = nc_file.createVariable('value', np.float32, ('time', 'y', 'x',))
# value.units = 'Unknown'

ValueError: cannot find dimension time_dim in this group or parent groups

In [36]:
nc.close()

AttributeError: module 'netCDF4' has no attribute 'close'

In [None]:
day1_sm = read_pfb(f'{directory_out}/spinup.wy2003.out.SM.001.pfb')

In [None]:
day1_sm.shape

In [None]:
day1_sm[9,2000:2005,2000:2005]

In [None]:
flow_array = np.squeeze(read_pfb(f'{directory_out}/flow.2003.daily.001.pfb'))

In [None]:
flow_array.shape

In [None]:
flow_array.max()

In [10]:
# 4: qflx_evap_tot for total evaporation [mm/s]
qflx_evap_CLM = np.zeros((5, ny, nx))
for i in range(5):    
    print(f'reading file {i}')
    CLM_file = read_pfb(f'{path_outputs}/{runname}.out.clm_output.{str(i+1).zfill(5)}.C.pfb')
    CLM_file[CLM_file<-9000] = 0 # set values outside of the domain to zero
    qflx_evap_CLM[i, ...] = CLM_file[4,...] # fill qflx_evap_CLM array by flagging the 4 position in the CLM file for qflx_evap_tot

reading file 0
reading file 1
reading file 2
reading file 3
reading file 4


In [19]:
# convert ET from mm/s to mm/h
qflx_evap_CLM = qflx_evap_CLM*3600
qflx_evap_CLM.shape

(5, 3256, 4442)

In [16]:
# Calling the first timestep at a location for qflx_evap
qflx_evap_CLM[0,2000:2005,2000:2005]

array([[ 0.01368996, -0.00062362, -0.00064269,  0.01355883, -0.00067061],
       [-0.00063027, -0.00064082, -0.00064565,  0.01361385, -0.00064218],
       [-0.00061999, -0.0006448 , -0.00064711, -0.0006081 , -0.00064518],
       [-0.00063097, -0.00064912, -0.00064875, -0.00064578, -0.00064648],
       [-0.00064049, -0.00065066, -0.00064876, -0.00064892, -0.00064619]])

In [24]:
# Calling the first timestep at a location
day1_ET = np.squeeze(read_pfb(f'{directory_out}/ET.2003.daily.001.pfb'))
day1_ET[9,2000:2005,2000:2005]

array([[ 0.38027669, -0.01732272, -0.01785239,  0.37663422, -0.01862814],
       [-0.01750741, -0.01780053, -0.01793485,  0.37816239, -0.01783835],
       [-0.01722189, -0.0179112 , -0.01797532, -0.01689159, -0.01792179],
       [-0.01752681, -0.01803109, -0.01802073, -0.01793827, -0.01795774],
       [-0.01779131, -0.01807393, -0.0180212 , -0.01802567, -0.01794969]])

In [22]:
sumET = day1_ET.sum(axis=0)
sumET.shape

(3256, 4442)

In [23]:
sumET[2000:2005,2000:2005]

array([[1490.68460902,  -67.90504868,  -69.98137281, 1476.4061264 ,
         -73.02232083],
       [ -68.62904675,  -69.77806435,  -70.30461678, 1482.39656555,
         -69.92634035],
       [ -67.50982266,  -70.21188456,  -70.46327344,  -66.21504232,
         -70.25341397],
       [ -68.70510275,  -70.68188807,  -70.64124413,  -70.31800288,
         -70.39433259],
       [ -69.74195392,  -70.84980775,  -70.64312293,  -70.66063916,
         -70.36277345]])

In [None]:
try: ncfile.close()  # just to be safe, make sure dataset is not already open.
except: pass
ncfile = Dataset('../../../data/new.nc',mode='w',format='NETCDF4_CLASSIC') 
print(ncfile)

In [None]:
lat_dim = ncfile.createDimension('lat', 73)     # latitude axis
lon_dim = ncfile.createDimension('lon', 144)    # longitude axis
time_dim = ncfile.createDimension('time', None) # unlimited axis (can be appended to).
for dim in ncfile.dimensions.items():
    print(dim)