In [1]:
import ulmo
import numpy as np 
import pandas as pd
import plotly.express as px
import json
from datetime import datetime as dt, date


## Connect to the correct API endpoint to be able to pull in snotel data

In [2]:
#This is the latest CUAHSI API endpoint
wsdlurl = 'https://hydroportal.cuahsi.org/Snotel/cuahsi_1_1.asmx?WSDL'

## Pull in meta data for all the different snotel sites

In [3]:
sites = ulmo.cuahsi.wof.get_sites(wsdlurl)


### Organizing into a nice dataframe

In [4]:
sites_df = (pd.DataFrame.from_dict(sites, orient='index')
            .dropna()
            .reset_index()
            .rename(columns = {'index':'sitecode'})
)

location = pd.json_normalize(sites_df['location'])
site_property = pd.json_normalize(sites_df['site_property'])

snotel_meta = pd.concat(objs = [sites_df[['network','sitecode','name','elevation_m']], location, site_property], ignore_index=False, axis = 1)
snotel_meta['begin_date'] = snotel_meta['site_comments'].str.split(' ').str[0].str.split('=').str[1]
snotel_meta = snotel_meta[['network','sitecode','name','state','county','elevation_m','latitude','longitude','begin_date']]
snotel_meta = snotel_meta.astype({'begin_date':'datetime64'})
snotel_meta.sample(5)

Unnamed: 0,network,sitecode,name,state,county,elevation_m,latitude,longitude,begin_date
739,SNOTEL,SNOTEL:758_MT_SNTL,Silver Run,Montana,Carbon,2020.823974609375,45.150001525878906,-109.34999847412108,1975-10-01
584,SNOTEL,SNOTEL:1138_NM_SNTL,Navajo Whiskey Ck,New Mexico,San Juan,2758.43994140625,36.17728042602539,-108.94690704345705,2009-05-12
601,SNOTEL,SNOTEL:267_WY_SNTL,Nowood,Wyoming,Washakie,2621.280029296875,43.43333053588867,-107.38333129882812,1980-09-01
646,SNOTEL,SNOTEL:698_NV_SNTL,Pole Creek R.S.,Nevada,Elko,2548.1279296875,41.87255096435546,-115.24713134765624,1977-02-01
827,SNOTEL,SNOTEL:813_MT_SNTL,Tepee Creek,Montana,Madison,2438.39990234375,44.785621643066406,-111.70999908447266,1971-10-01


## Grab all the counties with major ski resorts and take a quick look at the snotels in these locations

In [5]:
# States of interest
states_of_interest = ["Washington",'Oregon','California','Idaho','Montana','Colorado','New Mexico','Utah','Wyoming']


# Counties with many ski resorts
wa_c = ['Whatcom','Chelan','Kittitas']
or_c = ['Deschutes','Clackamas']
ca_c = ['Placer','Nevada','Amador','Mono','San Bernardino']
id_c = ['Bonner','Blaine','Valley']
mt_c = ['Gallatin','Madison','Flathead']
co_c = ['Summit','San Juan','Pitkin','Routt']
nm_c = ['Taos']
ut_c = ['Salt Lake','Utah','Weber']
wy_c = ['Teton','Sublette','Albany']

counties_of_interest = wa_c + or_c + ca_c + id_c + mt_c + co_c + nm_c + ut_c + wy_c

In [6]:
fig = px.scatter(snotel_meta.query("state in @states_of_interest and county in @counties_of_interest"), 
                x = 'begin_date',
                y = 'elevation_m', 
                color = 'state',
                title = "Snotels by when they came online and their elevation")
fig.show()

In [7]:
print('Number of Snotels in the areas of interest at least as old as 1980:')
snotel_meta.query("state in @states_of_interest and county in @counties_of_interest and begin_date < '1980-10-01'").groupby('state')['sitecode'].nunique()

Number of Snotels in the areas of interest at least as old as 1980:


state
California     6
Colorado      15
Idaho         14
Montana       22
New Mexico     2
Oregon         8
Utah          15
Washington     9
Wyoming       16
Name: sitecode, dtype: int64

### Select a stratified random sample of snotels in the area of interest

In [8]:
snotel_sample = snotel_meta.query("state in @states_of_interest and county in @counties_of_interest and begin_date < '1980-10-01'").groupby('state').sample(frac = .10).sort_values(by = 'state')
snotel_sample

Unnamed: 0,network,sitecode,name,state,county,elevation_m,latitude,longitude,begin_date
465,SNOTEL,SNOTEL:587_CA_SNTL,Lobdell Lake,California,Mono,2819.09521484375,38.43745040893555,-119.36572265625,1978-10-01
134,SNOTEL,SNOTEL:386_CO_SNTL,Cascade,Colorado,San Juan,2706.6240234375,37.65082931518555,-107.80602264404295,1978-10-01
422,SNOTEL,SNOTEL:556_CO_SNTL,Kiln,Colorado,Pitkin,2926.080078125,39.317241668701165,-106.61453247070312,1979-10-01
36,SNOTEL,SNOTEL:323_ID_SNTL,Bear Mountain,Idaho,Bonner,1645.9200439453125,48.30577087402344,-116.07447814941406,1979-10-01
222,SNOTEL,SNOTEL:450_ID_SNTL,Dollarhide Summit,Idaho,Blaine,2566.416015625,43.602500915527344,-114.67417144775392,1979-10-01
732,SNOTEL,SNOTEL:754_MT_SNTL,Shower Falls,Montana,Gallatin,2468.8798828125,45.401248931884766,-110.95758056640624,1964-10-01
564,SNOTEL,SNOTEL:649_MT_SNTL,Mount Lockhart,Montana,Teton,1950.719970703125,47.91727066040039,-112.82379913330078,1968-10-01
100,SNOTEL,SNOTEL:252_MT_SNTL,BRIDGER BOWL,Montana,Gallatin,2209.800048828125,45.799999237060554,-110.91667175292967,1965-10-01
568,SNOTEL,SNOTEL:651_OR_SNTL,Mt Hood Test Site,Oregon,Clackamas,1636.7760009765625,45.32096862792969,-121.7157974243164,1979-10-01
454,SNOTEL,SNOTEL:579_UT_SNTL,Lily Lake,Utah,Summit,2783.73828125,40.86492919921875,-110.79812622070312,1980-06-12


## Pull in time series Snotel data

### Establish what data is available for a given snotel
- Quick reference guide (from the USDA):
    - Time intervals:
        - _H: hourly
        - _D: daily
        - _m : monthly
        - _sm: semi-monthly
    - Standard SNOTEL Metrics:
        - WTEQ: snow water equivalent - Depth of water that would theoretically result if the entire snowpack were melted instantaneously
        - PREC: precipitation accumulation value - Water year accumulated precipitation
        - TMAX: Air temperature maximum - measured at a sub hourly sampling frequency (TMIN and TAVG are air temp minimum and average respectively)
        - PRCP: precipitation increment - total precipitation (presumably in the time period chosen but this has not been confirmed)

In [9]:
sitecode = 'SNOTEL:858_MT_SNTL' 
ulmo.cuahsi.wof.get_site_info(wsdlurl, sitecode)['series'].keys()

dict_keys(['SNOTEL:BATT_D', 'SNOTEL:BATT_H', 'SNOTEL:BATX_H', 'SNOTEL:PRCP_y', 'SNOTEL:PRCP_sm', 'SNOTEL:PRCP_m', 'SNOTEL:PRCP_wy', 'SNOTEL:PRCPSA_y', 'SNOTEL:PRCPSA_D', 'SNOTEL:PRCPSA_sm', 'SNOTEL:PRCPSA_m', 'SNOTEL:PRCPSA_wy', 'SNOTEL:PREC_sm', 'SNOTEL:PREC_m', 'SNOTEL:PREC_wy', 'SNOTEL:SNWD_D', 'SNOTEL:SNWD_sm', 'SNOTEL:SNWD_H', 'SNOTEL:SNWD_m', 'SNOTEL:TAVG_y', 'SNOTEL:TAVG_D', 'SNOTEL:TAVG_sm', 'SNOTEL:TAVG_m', 'SNOTEL:TAVG_wy', 'SNOTEL:TAVG_H', 'SNOTEL:TMAX_y', 'SNOTEL:TMAX_D', 'SNOTEL:TMAX_sm', 'SNOTEL:TMAX_m', 'SNOTEL:TMAX_wy', 'SNOTEL:TMAX_H', 'SNOTEL:TMIN_y', 'SNOTEL:TMIN_D', 'SNOTEL:TMIN_sm', 'SNOTEL:TMIN_m', 'SNOTEL:TMIN_wy', 'SNOTEL:TMIN_H', 'SNOTEL:TOBS_D', 'SNOTEL:TOBS_sm', 'SNOTEL:TOBS_H', 'SNOTEL:TOBS_m', 'SNOTEL:WTEQ_D', 'SNOTEL:WTEQ_sm', 'SNOTEL:WTEQ_H', 'SNOTEL:WTEQ_m'])

### Define a function to extract time series data from a given snotel

In [10]:
def get_snotel_data(wsdlurl, sitecode, variablecode, start_date, end_date):
    # query the appropriate data 
    data = ulmo.cuahsi.wof.get_values(wsdlurl, sitecode, variablecode, start = start_date, end = end_date)
    # make the data pretty
    df = pd.DataFrame.from_dict(data['values'])[['datetime','value']]
    df = (df
    .assign(sitecode = sitecode)
    .rename(columns = {'value':variablecode})
    .astype({'datetime':'datetime64'})
    .iloc[:,[0,2,1]]
    )
    return(df)

#### test the function for a single snotel and metric

In [249]:
sitecode = 'SNOTEL:655_OR_SNTL' 
variablecode = 'SNOTEL:WTEQ_sm'
test = get_snotel_data(wsdlurl= wsdlurl, sitecode = sitecode, variablecode=variablecode, start_date= '1980-01-01', end_date = '2022-09-30')

###  Snow Water Equivalent Data Pull

In [11]:
snotel_sample_list = snotel_sample['sitecode'].values.tolist()
start = '1999-10-01'
end = '2022-09-30'


variablecode = 'SNOTEL:WTEQ_d'
swe_df = pd.DataFrame()

for i in snotel_sample_list:
    data = get_snotel_data(wsdlurl= wsdlurl, sitecode = i, variablecode=variablecode, start_date = start, end_date = end)
    swe_df = pd.concat([swe_df,data], axis = 0)

KeyboardInterrupt: 

In [50]:
swe_df.tail()

Unnamed: 0,datetime,sitecode,SNOTEL:WTEQ_sm
1003,2021-12-10,SNOTEL:597_WY_SNTL,0.0
1004,2021-12-25,SNOTEL:597_WY_SNTL,0.0
1005,2022-01-09,SNOTEL:597_WY_SNTL,0.0
1006,2022-01-24,SNOTEL:597_WY_SNTL,0.0
1007,2022-02-08,SNOTEL:597_WY_SNTL,0.0


### Semi Monthly Accumulated Precipitation Data Pull

In [12]:
variablecode = 'SNOTEL:PRCP_d'
prec_df = pd.DataFrame()

for i in snotel_sample_list:
    data = get_snotel_data(wsdlurl= wsdlurl, sitecode = i, variablecode=variablecode, start_date = start, end_date = end)
    prec_df = pd.concat([prec_df,data], axis = 0)


In [49]:
prec_df.tail()

Unnamed: 0,datetime,sitecode,SNOTEL:PRCP_sm
1003,2021-12-10,SNOTEL:597_WY_SNTL,0.3
1004,2021-12-25,SNOTEL:597_WY_SNTL,2.0
1005,2022-01-09,SNOTEL:597_WY_SNTL,0.4
1006,2022-01-24,SNOTEL:597_WY_SNTL,0.8
1007,2022-02-08,SNOTEL:597_WY_SNTL,1.7


### Semi Monthly Temp Data Pull

In [13]:
variablecode = 'SNOTEL:TMIN_d'
tmin_df = pd.DataFrame()

for i in snotel_sample_list:
    data = get_snotel_data(wsdlurl= wsdlurl, sitecode = i, variablecode=variablecode, start_date = start, end_date = end)
    tmin_df = pd.concat([tmin_df,data], axis = 0)

variablecode = 'SNOTEL:TMAX_sm'
tmax_df = pd.DataFrame()

for i in snotel_sample_list:
    data = get_snotel_data(wsdlurl= wsdlurl, sitecode = i, variablecode=variablecode, start_date = start, end_date = end)
    tmax_df = pd.concat([tmax_df,data], axis = 0)

variablecode = 'SNOTEL:TAVG_sm'
tavg_df = pd.DataFrame()

for i in snotel_sample_list:
    data = get_snotel_data(wsdlurl= wsdlurl, sitecode = i, variablecode=variablecode, start_date = start, end_date = end)
    tavg_df = pd.concat([tavg_df,data], axis = 0)



In [51]:
tavg_df.tail()

Unnamed: 0,datetime,sitecode,SNOTEL:TAVG_sm
883,2022-01-05,SNOTEL:597_WY_SNTL,60.418
884,2022-01-20,SNOTEL:597_WY_SNTL,57.548
885,2022-02-04,SNOTEL:597_WY_SNTL,55.985
886,2022-02-19,SNOTEL:597_WY_SNTL,54.548
887,2022-03-06,SNOTEL:597_WY_SNTL,46.088


### Daily Snow Depth Data

In [363]:
variablecode = 'SNOTEL:SNWD_d'
snwd_df = pd.DataFrame()

for i in snotel_sample_list:
    data = get_snotel_data(wsdlurl= wsdlurl, sitecode = i, variablecode=variablecode, start_date = start, end_date = end)
    snwd_df = pd.concat([snwd_df,data], axis = 0)


### Combining the time series 

In [44]:
df = swe_df.merge(tmin_df, on = ['datetime','sitecode'], how = 'left')
        

df.tail()

Unnamed: 0,datetime,sitecode,SNOTEL:WTEQ_sm,SNOTEL:TMIN_sm
14029,2021-12-10,SNOTEL:597_WY_SNTL,0.0,
14030,2021-12-25,SNOTEL:597_WY_SNTL,0.0,
14031,2022-01-09,SNOTEL:597_WY_SNTL,0.0,
14032,2022-01-24,SNOTEL:597_WY_SNTL,0.0,
14033,2022-02-08,SNOTEL:597_WY_SNTL,0.0,


In [None]:
  
        .merge(tmin_df, on = ['datetime','sitecode'], how = 'left')
        .merge(tmax_df, on = ['datetime','sitecode'], how = 'left')
        .merge(tavg_df, on = ['datetime','sitecode'], how = 'left')
        .rename(columns = {'SNOTEL:WTEQ_sm':'swe',
                            'SNOTEL:PRCP_sm':'prec',
                            'SNOTEL:TMIN_sm':'min_temp',
                            'SNOTEL:TMAX_sm':'max_temp',
                            'SNOTEL:TAVG_sm':'avg_temp'})
        .assign(year = lambda x: x.datetime.dt.year,
                doy = lambda x: x.datetime.dt.dayofyear)
    

In [None]:
# Day of Water Year: Sept 30 is last day, October 1 is first day
df['dowy'] = df['doy'] - 273
df.loc[df['dowy'] <= 0, 'dowy'] += 365

# Get the correct water year for a given observation
df['water_year'] = df['year']
df.loc[df['doy'] < 273, 'water_year'] = df['year'] - 1

df = df.astype({'swe':'float64',
                'prec':'float64',
                'min_temp':'float64',
                'max_temp':'float64',
                'avg_temp':'float64',
                'year':'int64',
                'doy':'int64',
                'dowy':'int64',
                'water_year':'int64'})

# Add in snotel meta data
df = df.merge(snotel_meta, on = 'sitecode', how = 'left')

In [38]:
df.tail()

Unnamed: 0,datetime,sitecode,swe,prec,min_temp,max_temp,avg_temp,year,doy,dowy,water_year,network,name,state,county,elevation_m,latitude,longitude,begin_date
14029,2021-12-10,SNOTEL:597_WY_SNTL,0.0,0.3,,,,2021,344,71,2021,SNOTEL,Loomis Park,Wyoming,Sublette,2511.552001953125,43.17387008666992,-110.14006805419922,1979-10-01
14030,2021-12-25,SNOTEL:597_WY_SNTL,0.0,2.0,,,,2021,359,86,2021,SNOTEL,Loomis Park,Wyoming,Sublette,2511.552001953125,43.17387008666992,-110.14006805419922,1979-10-01
14031,2022-01-09,SNOTEL:597_WY_SNTL,0.0,0.4,,,,2022,9,101,2021,SNOTEL,Loomis Park,Wyoming,Sublette,2511.552001953125,43.17387008666992,-110.14006805419922,1979-10-01
14032,2022-01-24,SNOTEL:597_WY_SNTL,0.0,0.8,,,,2022,24,116,2021,SNOTEL,Loomis Park,Wyoming,Sublette,2511.552001953125,43.17387008666992,-110.14006805419922,1979-10-01
14033,2022-02-08,SNOTEL:597_WY_SNTL,0.0,1.7,,,,2022,39,131,2021,SNOTEL,Loomis Park,Wyoming,Sublette,2511.552001953125,43.17387008666992,-110.14006805419922,1979-10-01


In [39]:
df.describe()

Unnamed: 0,swe,prec,min_temp,max_temp,avg_temp,year,doy,dowy,water_year
count,14034.0,11088.0,0.0,0.0,0.0,14034.0,14034.0,14034.0,14034.0
mean,5.395048,-5.7807,,,,2001.040331,183.616432,181.986675,2000.299131
std,84.948955,268.540147,,,,11.8944,105.87753,105.518646,11.892623
min,-9999.0,-9999.0,,,,1980.0,1.0,1.0,1980.0
25%,0.0,0.4,,,,1991.0,92.0,91.0,1990.0
50%,1.4,1.0,,,,2001.0,184.0,181.0,2000.0
75%,9.6,1.9,,,,2011.0,275.0,274.0,2011.0
max,72.8,30.8,,,,2022.0,366.0,365.0,2021.0


In [29]:


fig = px.line(df, 
                x = 'datetime', 
                y = 'swe', 
                color = 'state',
                facet_row = 'name',
                height = 1000
                )
fig.show()

## Exploratory Data Analysis

In [289]:
fig = px.line(swe.query("state =='Colorado'"), x = 'datetime', y = 'swe', color = 'sitecode')
fig.show()

In [353]:
df.groupby('sitecode', as_index = False)['swe'].idmax()

AttributeError: 'DataFrameGroupBy' object has no attribute 'idmax'

In [290]:
snotel = swe.query("name =='Summit Ranch'")

fig = px.line(snotel, 
            x = 'datetime', 
            y = 'swe',
            title = 'Snow Water Equivalent',
            color_discrete_sequence=px.colors.sequential.Inferno)
fig.show()