In [20]:
# Import libraries for accessing & formatting daymet data
import requests
from datetime import datetime as dt
from datetime import datetime, timedelta
from scipy.interpolate import CubicSpline
import itertools
import pandas as pd
import numpy as np
from scipy.signal import savgol_filter
import json

# Import libraries for sql connection
import mysql.connector
import pickle
import warnings

warnings.filterwarnings('ignore')

In [26]:
def daymet_daily_est(station, s_d, e_d, threshold = 0):
    '''
    Given a station, the start/end date, and year, this function will return the daily
    min/max estimates for a given station. A threshold of 15 degrees is automatically applied
    to both bounds, as well as the filter being fitted through the estimates the match what is
    produced by our estimates.
    '''
    # Get daymet data from api access function
    dm_data = get_daymet_min_max(station, s_d, e_d, threshold)

    # Group to get daily min and max estimates for each month_day pairing (366 values)
    dm_data = dm_data.groupby('month_day').agg({'daily_min': 'min', 'daily_max': 'max'})  
    
    dm_data.loc['02-29'] = dm_data[dm_data.index == '02-28'].values.flatten()

    # Apply filter to format the same as daily estimates
    min_est = dm_data.daily_min.values
    max_est = dm_data.daily_max.values
    smoothed_min = savgol_filter(min_est, window_length=15, polyorder=2)
    smoothed_max = savgol_filter(max_est, window_length=15, polyorder=2)
    dm_data['daily_min'] = smoothed_min
    dm_data['daily_max'] = smoothed_max
    
    return dm_data # return final estimates

def get_daymet_min_max(station, s_d, e_d, threshold = 0):
    '''
    Function to access the daymet api and acquire min/max estimates for a given timespan.
    A threshold can be applied to the min/max (used for estimates, not for synthetic data).
    '''
    # Load credentials for login (hidden and not added to repo)
    with open("login_cred.pkl", "rb") as fp:
        config = pickle.load(fp)
        
    cnx = mysql.connector.connect(**config) # connection point
    
    # Query for accessing station information
    info_q = (f"""SELECT station_id, station_latdeg, station_lngdeg
             FROM stations_awn
             WHERE station_id = {station} """)

    tmp = pd.read_sql(info_q, cnx)

    # Get latitude and longitude
    lat = tmp.station_latdeg.values[0]
    long = tmp.station_lngdeg.values[0]
    url = "https://daymet.ornl.gov/single-pixel/api/data"

    # Daymet API request (json format)
    r = requests.get(f"{url}?lat={lat}&lon={long}&vars=tmax,tmin&start={s_d}&end={e_d}&format=json")

    # Convert to dataframe if successful, otherwise exit with error code
    if r.status_code == 200:
        dm_data = pd.DataFrame(r.json()['data'])
    else:
        print("Error occurred:", r.status_code) 

    # Convert year + year day into YYYY-MM-DD format
    x = lambda row: dt.strptime(str(int(row['year'])) + "-" + str(int(row['yday'])), "%Y-%j").strftime("%Y-%m-%d")

    dm_data['date'] = dm_data.apply(x, axis=1)

    # Split date into year and month_day pairings
    dm_data[['year', 'month_day']] = [x.split('-',1) for x in dm_data.date.values]

    # Drop unused columns
    dm_data = dm_data.drop(columns=['yday', 'date', 'year'])

    # Rename temperature columns
    dm_data = dm_data.rename(columns={'tmax (deg c)': 'daily_max', 'tmin (deg c)': 'daily_min'})

    # Convert celsius to farenheit
    c_to_f = lambda x: ((9/5) * x) + 32

    # Apply threshold of 15 degrees to both min and max
    dm_data['daily_max'] = dm_data['daily_max'].apply(c_to_f) + threshold
    dm_data['daily_min'] = dm_data['daily_min'].apply(c_to_f) - threshold
    
    return dm_data

def main():
    '''
    Main function that will get input for station, start date, and end date to use for Daymet
    estimation. The estimates will then be saved to json in the same directory.
    
    REQUIRED: have login_cred.pkl file in the same directory as script,
              this is required to query the lat/long values from DAS.
              
    Nothing is returned, daily estimates are saved within function.
    '''
    # Get user input for station and dates
    station = int(input('Enter Station Number: '))
    s_d = input('Enter Start Date (YYYY-MM-DD): ')
    e_d = input('Enter End Date (YYYY-MM-DD): ')
    
    # Get daymet daily estimates using provided information
    est = daymet_daily_est(station, s_d, e_d)
    
    # Create file name
    f_name = f'Station_{station}_params'
    
    # Save parameters to specific format (same directory as file)
    est.to_json(f'{f_name}.json')
    print('----- Parameters Saved. ------')
    
    # Delete later (used to show output)
    display(est)
    
    return

if __name__ == '__main__':
    main()    

Enter Station Number: 87
Enter Start Date (YYYY-MM-DD): 2010-01-01
Enter End Date (YYYY-MM-DD): 2023-12-31
Parameters Saved.


Unnamed: 0_level_0,daily_min,daily_max
month_day,Unnamed: 1_level_1,Unnamed: 2_level_1
01-01,1.244829,53.061218
01-02,3.751102,52.890081
01-03,5.894147,52.772442
01-04,7.673962,52.708299
01-05,9.090549,52.697653
...,...,...
12-27,16.292291,51.754556
12-28,15.962600,51.616553
12-29,15.458117,51.508199
12-30,14.778840,51.429495


In [5]:
# Format of the pkl file (hidden since user/password is shown)
# pd.read_pickle(r'login_cred.pkl')

In [27]:
# Format of JSON File:

# {'daily_min': { '01-01': 29.11, '01-02': ..., ... },
#  'daily_max': { '01-01': 37.89, '01-02': ..., ...} }

# Access by: data['daily_min']['MM-DD']
#            data['daily_max']['MM-DD']

s_num = 87

r = open(f'Station_{s_num}_params.json')
data = json.load(r)

In [28]:
data['daily_min']['01-01'], data['daily_max']['01-01']

(1.2448292833, 53.0612176648)