In [31]:
from datetime import datetime
from metloom.pointdata import SnotelPointData
import pandas as pd
import cartopy
import geoviews as gv
import geopandas as gpd
import holoviews as hv
import xarray as xr
from metsim import MetSim
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
from metpy.units import units
import metpy.calc as mpcalc
import math
import scipy

import sys
sys.path.append('/Users/clintonalden/Documents/Research/summa_work/')
from utils import lw_clr
from utils import forcing_filler as ff

In [32]:
snotel_point = SnotelPointData("728:WA:SNTL", "MyStation")
df = snotel_point.get_hourly_data(
    datetime(2020, 7, 2), datetime(2021, 9, 30),
    [snotel_point.ALLOWED_VARIABLES.PRECIPITATIONACCUM, snotel_point.ALLOWED_VARIABLES.TEMP]
)

# Clean up the dataframe
df.reset_index(inplace=True)
print(df)
df.drop(columns=['site', 'ACCUMULATED PRECIPITATION_units', 'geometry', 'AIR TEMP_units', 'datasource'], inplace=True)

# Rename columns
replace = {'ACCUMULATED PRECIPITATION':'accppt','AIR TEMP':'airtemp', 'datetime':'time'}
df.rename(columns=replace, inplace=True)
df.set_index('time', inplace=True)

                       datetime         site  \
0     2020-07-02 08:00:00+00:00  728:WA:SNTL   
1     2020-07-02 09:00:00+00:00  728:WA:SNTL   
2     2020-07-02 10:00:00+00:00  728:WA:SNTL   
3     2020-07-02 11:00:00+00:00  728:WA:SNTL   
4     2020-07-02 12:00:00+00:00  728:WA:SNTL   
...                         ...          ...   
10889 2021-09-30 04:00:00+00:00  728:WA:SNTL   
10890 2021-09-30 05:00:00+00:00  728:WA:SNTL   
10891 2021-09-30 06:00:00+00:00  728:WA:SNTL   
10892 2021-09-30 07:00:00+00:00  728:WA:SNTL   
10893 2021-09-30 08:00:00+00:00  728:WA:SNTL   

                                       geometry  ACCUMULATED PRECIPITATION  \
0      POINT Z (-119.83830 48.65518 4460.00000)                       13.5   
1      POINT Z (-119.83830 48.65518 4460.00000)                       13.5   
2      POINT Z (-119.83830 48.65518 4460.00000)                       13.5   
3      POINT Z (-119.83830 48.65518 4460.00000)                       13.6   
4      POINT Z (-119.83830 48.655

In [33]:
df

Unnamed: 0_level_0,accppt,airtemp
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-07-02 08:00:00+00:00,13.5,43.52
2020-07-02 09:00:00+00:00,13.5,42.44
2020-07-02 10:00:00+00:00,13.5,42.62
2020-07-02 11:00:00+00:00,13.6,44.06
2020-07-02 12:00:00+00:00,13.5,46.58
...,...,...
2021-09-30 04:00:00+00:00,16.8,42.62
2021-09-30 05:00:00+00:00,16.9,42.26
2021-09-30 06:00:00+00:00,16.9,42.98
2021-09-30 07:00:00+00:00,16.9,43.16


In [34]:
# Covert air temperature to celsius
df['airtemp'] = (df['airtemp'] - 32) * 5.0/9.0

# Convert precipitation to mm
df['accppt'] = df['accppt'] * 25.4

# Convert from geodataframe to dataframe
df = pd.DataFrame(df)


In [35]:
from pytz import UTC
# Interpolate the missing values
df.interpolate(inplace=True)

# Seperate the data into two dataframes, before and after October 1
spinstart = pd.to_datetime('2020-07-03').tz_localize('UTC')
spinend = pd.to_datetime('2020-09-30').tz_localize('UTC')
spinup = df.loc[spinstart:spinend]
data = df.loc[datetime(2020, 10, 1).replace(tzinfo=UTC):]

# Copy the dataframe a2 to a2_copy
data_copy = data.copy()

# Create a mask to identify rows where the index is less than or equal to October 2, 2023
mask = data_copy.index <= datetime(2020, 10, 2).replace(tzinfo=UTC)

# Set the 'precip_accum' column to 0 for rows that satisfy the mask condition
data_copy.loc[mask, 'accppt'] = 0

# Update the value of a2 to the modified copy
data = data_copy

# Calculate the difference between the maximum value of 'precip_accum' and the previous value
spinup['pptrate'] = spinup['accppt'].cummax().diff()
data['pptrate'] = data['accppt'].cummax().diff()

# Drop accppt column
spinup.drop(columns=['accppt'], inplace=True)
data.drop(columns=['accppt'], inplace=True)

## Generate SW from MetSim

In [36]:
# Create empty dataset
dates = pd.date_range('10/01/2020', '09/30/2021')
shape = (len(dates), 1, 1, )
dims = ('time', 'lat', 'lon', )

# We are running only one site, at these coordinates
lats = [48.66]
lons = [-119.84]
elev = 1359.4 # meters
coords = {'time': dates, 'lat': lats, 'lon': lons}

# Create the initial met data input data structure
met_data = xr.Dataset(coords=coords)

In [37]:
for varname in ['prec', 't_min', 't_max']:
    met_data[varname] = xr.DataArray(data=np.full(shape, np.nan),
                                     coords=coords, dims=dims,
                                     name=varname)

In [38]:
# Resample the data to daily frequency and calculate the maximum and minimum temperatures
tmax_vals = data['airtemp'].resample('D').max()
tmin_vals = data['airtemp'].resample('D').min()

# Calculate the daily precipitation values
prec_vals = data['pptrate'].resample('D').sum()

# Interpolate the temperature values to fill in any missing days
# tmax_vals = tmax_vals.interpolate(method='linear')
# tmin_vals = tmin_vals.interpolate(method='linear')

met_data['prec'].values[:, 0, 0] = prec_vals

# Assign the daily maximum and minimum temperatures to the met_data xarray, converting to Celsius
met_data['t_min'].values[:, 0, 0] = tmin_vals
met_data['t_max'].values[:, 0, 0] = tmax_vals

met_data.to_netcdf('./input/rc_forcing.nc')

In [39]:
# We form the domain in a similar fashion
# First, by creating the data structure
coords = {'lat': lats, 'lon': lons}
domain = xr.Dataset(coords=coords)
domain['elev'] = xr.DataArray(data=np.full((1,1,), np.nan),
                          coords=coords,
                          dims=('lat', 'lon', ))
domain['mask'] = xr.DataArray(data=np.full((1,1,), np.nan),
                          coords=coords,
                          dims=('lat', 'lon', ))

# Add the data
domain['elev'][0, 0] = elev
domain['mask'][0, 0] = 1
domain.to_netcdf('./input/rc_domain.nc')

In [40]:
# Finally, we create the state file - the dates are 90 days prior to 
# the MetSim run dates - as usual, create an empty data structure to
# read the data into
dates = pd.date_range('07/03/2020', '09/30/2020')
shape = (len(dates), 1, 1, )
dims = ('time', 'lat', 'lon', )
lats = [48.66]
lons = [-119.84]
elev = 1359.4 # meters
coords = {'time': dates, 'lat': lats, 'lon': lons}
state = xr.Dataset(coords=coords)
for varname in ['prec', 't_min', 't_max']:
    state[varname] = xr.DataArray(data=np.full(shape, np.nan),
                               coords=coords, dims=dims,
                               name=varname)
    
# Resample precip to daily
prec_vals = spinup['pptrate'].resample('D').sum()

# Resample the data to daily frequency and calculate the maximum and minimum temperatures
tmax_vals = spinup['airtemp'].resample('D').max()
tmin_vals = spinup['airtemp'].resample('D').min()

# Do precip data
state['prec'].values[:, 0, 0] = prec_vals

# And now temp data and convert to C
state['t_min'].values[:, 0, 0] = tmin_vals
state['t_max'].values[:, 0, 0] = tmax_vals
state.to_netcdf('./input/rc_state.nc')

In [41]:
dates = pd.date_range('10/01/2020', '09/30/2021')
params = {
    'time_step'    : "60",       
    'start'        : dates[0],
    'stop'         : dates[-1],
    'forcing'      : './input/rc_forcing.nc',     
    'domain'       : './input/rc_domain.nc',
    'state'        : './input/rc_state.nc',
    'forcing_fmt'  : 'netcdf',
    'out_dir'      : './output',
    'out_prefix': 'salmon',
    'scheduler'    : 'threading',
    'chunks'       : 
        {'lat': 1, 'lon': 1},
    'forcing_vars' : 
        {'prec' : 'prec', 't_max': 't_max', 't_min': 't_min'},
    'state_vars'   : 
        {'prec' : 'prec', 't_max': 't_max', 't_min': 't_min'},
    'domain_vars'  : 
        {'elev': 'elev', 'lat': 'lat', 'lon': 'lon', 'mask': 'mask'}
    }               

ms = MetSim(params)
ms.run()
output = ms.open_output().load()

In [42]:
output

## Create SUMMA forcing netCDF

In [43]:
out_df = output.to_dataframe()
out_df.reset_index(inplace=True)
out_df.set_index('time', inplace=True)
data

Unnamed: 0_level_0,airtemp,pptrate
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-10-01 00:00:00+00:00,17.6,
2020-10-01 01:00:00+00:00,15.2,0.0
2020-10-01 02:00:00+00:00,13.1,0.0
2020-10-01 03:00:00+00:00,12.1,0.0
2020-10-01 04:00:00+00:00,11.0,0.0
...,...,...
2021-09-30 04:00:00+00:00,5.9,0.0
2021-09-30 05:00:00+00:00,5.7,0.0
2021-09-30 06:00:00+00:00,6.1,0.0
2021-09-30 07:00:00+00:00,6.2,0.0


In [44]:
data.index = data.index.tz_convert(None)


In [45]:
# Convert precipitation rate from m hr^-1 to kg m^-2 s^-1
data['pptrate'] = data['pptrate']/3600

# Generate relative humidity assuming T_d is overnight low temperature
# Used to calculate specific humidity and longwave radiation
ff.fill_rel_hum(data)

# Convert airtemp to Kelvin
data['airtemp'] = (1.03*(data['airtemp']-0.9)) + 273.15 # Currier snotel temp correction

# Generate pressure from hypsometric equation and site elevation (1981m)
ff.fill_pressure(data, elev)

# Generate specific humidity
ff.fill_spec_hum(data)

# Set shortwave radiation to MetSim output
data['SWRadAtm'] = out_df['shortwave']

# Generate longwave radiation
data['LWRadAtm'] = lw_clr.dilleyobrien1998(data['airtemp'], data['rh'])

# Can alternatively use the MetSim LW radiation
# data['LWRadAtm'] = out_df['longwave']

# Set wind to 2 m/s
data['windspd'] = 2

# Drop unnecessary columns
data = data.drop(columns=['rh'])

In [46]:
template = xr.open_dataset('../summa_forcing_template.nc')
data

Unnamed: 0_level_0,airtemp,pptrate,airpres,spechum,SWRadAtm,LWRadAtm,windspd
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-10-01 00:00:00,290.351,,91788.726188,0.011697,0.000000,325.346828,2
2020-10-01 01:00:00,287.879,0.0,91701.016610,0.010167,0.000000,310.609336,2
2020-10-01 02:00:00,285.716,0.0,91626.663262,0.009383,0.000000,300.269214,2
2020-10-01 03:00:00,284.686,0.0,91589.110118,0.008725,0.000000,293.933079,2
2020-10-01 04:00:00,283.553,0.0,91549.468411,0.008310,0.000000,288.501377,2
...,...,...,...,...,...,...,...
2021-09-30 04:00:00,278.300,0.0,91355.240705,0.005341,0.000000,256.744415,2
2021-09-30 05:00:00,278.094,0.0,91351.488285,0.005857,0.000000,259.516984,2
2021-09-30 06:00:00,278.506,0.0,91366.190600,0.005966,11.902636,261.277627,2
2021-09-30 07:00:00,278.609,0.0,91368.407978,0.005763,55.454407,260.279012,2


In [52]:
data['pptrate'] = data['pptrate'].fillna(0)


In [53]:
# Convert dataframe to xarray
dsx = data.to_xarray()

# Loop through variables and add attributes from template forcing file
for data_var in dsx:
    dsx[data_var].attrs = template[data_var].attrs
    
# Add hru dimension
dsx = dsx.expand_dims(dim={'hru':1})

# Add gap-filled and datastep variables
dsx['gap_filled'] = xr.DataArray(np.ones((1,dsx.time.shape[0])),dims = ['hru','time'])
dsx['data_step'] = 3600 # 3600 seconds for 1hr timesteps

# Transpose gap filled variable to match dimensions with the rest
# dsx['gap_filled'] = dsx['gap_filled'].T

# Convert all to float64
for var in dsx.data_vars:
    dsx[var] = dsx[var].astype(np.float64)

# Set hruID based on template
dsx['hruId'] = (xr.DataArray(np.ones((1))*template['hruId'].values,dims = ['hru'])).astype(np.int32)

# Transpose all variables to match SUMMA dimensions
count = 0
for var in dsx.data_vars:
    print(var,count)
    count += 1
    if count <= 7:
        attribs = dsx[var].attrs
        arr_t = dsx[var].values.T
        dsx[var] = xr.DataArray(dims = ['time','hru'],data = arr_t)
        dsx[var].attrs = attribs

# Set encoding for the time variable
# dsx['time'].encoding = {'_FillValue': np.nan, 'units': 'hours since 1990-01-01', 'calendar': 'proleptic_gregorian'}

# Set hruID based on template
dsx['hruId'] = (xr.DataArray(np.ones((1))*template['hruId'].values,dims = ['hru'])).astype(np.float64).fillna(0).astype(np.int32)

dsx.to_netcdf('../forcings/salmon_WY21.nc',
                        encoding = {"time":
                                        {'dtype' : 'float64',
                                         'units' : 'hours since 1990-01-01 00:00:00',
                                         'calendar' : 'standard'}})

airtemp 0
pptrate 1
airpres 2
spechum 3
SWRadAtm 4
LWRadAtm 5
windspd 6
gap_filled 7
data_step 8
hruId 9


In [54]:
dsx

In [56]:
data.spechum.max()

0.02094446066086862