In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import scipy
from metpy.units import units
import metpy.calc as mpcalc
import math
import datetime as dt

import sys
sys.path.append('/Users/clintonalden/Documents/Research/summa_work/')
from utils import lw_clr
from utils import forcing_filler as ff


In [2]:
# Read in csv data from Harts Pass SNOTEL
# Subset after October 17 as the data is poor before then
harts = pd.read_csv('hrpw1_WY24.csv', skiprows=10)
harts = harts[harts['Date_Time'] > '2023-10-17'] 

# Load template forcing file with correct attributes
template = xr.open_dataset('../summa_forcing_template.nc')

# Pull in only necessary columns and rename to match SUMMA formatting
harts = harts[['Date_Time', 'air_temp_set_1', 'wind_speed_set_1', 'solar_radiation_set_1', 'precip_accum_set_1']]
replace = {'air_temp_set_1':'airtemp','wind_speed_set_1':'windspd', 'solar_radiation_set_1':'SWRadAtm', 
           'precip_accum_set_1':'accppt', 'Date_Time':'time'}
harts.rename(columns=replace, inplace=True)


# Convert 'time' column to datetime format
harts['time'] = pd.to_datetime(harts['time'])

# Convert time column to desired format
harts['time'] = harts['time'].dt.strftime('%Y-%m-%d %H:%M:%S')

# Use cummax to fix the issues with SNOTEL hourly data
harts['accppt'] = harts['accppt'].cummax()

# Convert precip from mm hr-1 to kg m-2 s-1
harts['pptrate'] = harts['accppt'].diff()/3600

# Generate relative humidity assuming T_d is overnight low temperature
ff.fill_rel_hum(harts)

# Convert temperature from C to K
harts['airtemp'] = harts['airtemp'] + 273.15

# Set NaN SW radiation values to 0
harts['SWRadAtm'] = harts['SWRadAtm'].fillna(0)

# Pyranometer at Harts Pass looks biased low by 3.81 W m-2, adding to fix
harts['SWRadAtm'] = harts['SWRadAtm'] + 3.81

# Set NaN precip values to 0
harts['pptrate'] = harts['pptrate'].fillna(0)

# Generate pressure from hypsometric equation and site elevation (1981m)
ff.fill_pressure(harts, 1981)

# Generate specific humidity
ff.fill_spec_hum(harts)

# Generate longwave radiation
harts['LWRadAtm'] = lw_clr.dilleyobrien1998(harts['airtemp'], harts['rh'])

# Drop unnecessary columns
harts = harts.drop(columns=['accppt', 'rh'])

# Set time as index
harts.set_index('time', inplace=True)
harts.index = pd.to_datetime(harts.index)

harts



Unnamed: 0_level_0,airtemp,windspd,SWRadAtm,pptrate,airpres,spechum,LWRadAtm
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-10-17 00:00:00,277.65,0.0,53.66,0.000000,87102.275619,0.005594,255.003155
2023-10-17 01:00:00,276.25,0.0,4.68,0.001411,87033.711659,0.005401,250.172569
2023-10-17 02:00:00,276.05,0.0,3.81,0.000000,87022.942121,0.005272,248.833151
2023-10-17 03:00:00,275.65,0.0,3.81,0.000000,86998.337679,0.004681,243.876902
2023-10-17 04:00:00,273.55,0.0,3.81,0.000706,86893.256677,0.004324,236.175772
...,...,...,...,...,...,...,...
2024-03-15 08:00:00,276.55,0.0,3.81,0.000000,87041.534899,0.004680,246.158699
2024-03-15 09:00:00,277.05,0.0,3.81,0.000000,87065.627068,0.004702,247.598304
2024-03-15 10:00:00,276.35,0.0,3.81,0.000000,87031.863656,0.004670,245.576909
2024-03-15 11:00:00,275.75,0.0,3.81,0.000000,87002.733996,0.004635,243.812595


In [11]:
# Create timesteps
# Specify the start date and time
start_date = '2023-10-17'
start_time = '00:00'

# Combine start date and time to create a datetime object
start_datetime = pd.to_datetime(f'{start_date} {start_time}')

# Specify the number of rows
num_rows = 3607

# Generate datetime values for the specified number of rows
time = pd.date_range(start=start_datetime, periods=num_rows, freq='H')  # 'H' for hourly intervals

# Set index of dataframe to new timesteps
harts.set_index(time, inplace=True)
harts.index.name = 'time'

  time = pd.date_range(start=start_datetime, periods=num_rows, freq='H')  # 'H' for hourly intervals


In [3]:
# Convert dataframe to xarray
dsx = harts.to_xarray()

# Loop through variables and add attributes from template forcing file
for data_var in dsx:
    dsx[data_var].attrs = template[data_var].attrs
    
# Add hru dimension
dsx = dsx.expand_dims(dim={'hru':1})

# Add gap-filled and datastep variables
dsx['gap_filled'] = xr.DataArray(np.ones((1,dsx.time.shape[0])),dims = ['hru','time'])
dsx['data_step'] = 3600 # 3600 seconds for 1hr timesteps

# Transpose gap filled variable to match dimensions with the rest
# dsx['gap_filled'] = dsx['gap_filled'].T

# Convert all to float64
for var in dsx.data_vars:
    dsx[var] = dsx[var].astype(np.float64)

# Set hruID based on template
dsx['hruId'] = (xr.DataArray(np.ones((1))*template['hruId'].values,dims = ['hru'])).astype(np.int32)

# Transpose all variables to match SUMMA dimensions
count = 0
for var in dsx.data_vars:
    print(var,count)
    count += 1
    if count <= 7:
        attribs = dsx[var].attrs
        arr_t = dsx[var].values.T
        dsx[var] = xr.DataArray(dims = ['time','hru'],data = arr_t)
        dsx[var].attrs = attribs

# Set encoding for the time variable
# dsx['time'].encoding = {'_FillValue': np.nan, 'units': 'hours since 1990-01-01', 'calendar': 'proleptic_gregorian'}

# Set hruID based on template
dsx['hruId'] = (xr.DataArray(np.ones((1))*template['hruId'].values,dims = ['hru'])).astype(np.float64).fillna(0).astype(np.int32)

# Save to netcdf
dsx.to_netcdf('./harts_SNTL_WY24.nc',
              engine='h5netcdf')

airtemp 0
windspd 1
SWRadAtm 2
pptrate 3
airpres 4
spechum 5
LWRadAtm 6
gap_filled 7
data_step 8
hruId 9


In [4]:
dsx
