# Winter 2022 and 2023 Precipitation and Sublimation Comparison

Author: Daniel Hogan
Created: May 30, 2024

This notebook will look at how precipitation and sublimation are related to one another by each month and wind direction, do they correlate to one another?
This tries to get at the root question of: does more sublimation occur over a winter with more precipitation events, regardless of if they are large or small? Does frequency of events matter more than their magnitude?

In [2]:
# general
import os
import glob
import datetime as dt
import json
# data 
import xarray as xr 
from sublimpy import utils, variables, tidy, turbulence
import numpy as np
import pandas as pd
from act import discovery, plotting
# plotting
import matplotlib.pyplot as plt
from metpy.cbook import get_test_data
from metpy.plots import add_metpy_logo, SkewT
import plotly.express as px 
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.io as pio
# helper tools
from scripts.get_sail_data import get_sail_data
from scripts.helper_funcs import create_windrose_df, simple_sounding, mean_sounding
import scripts.helper_funcs as hf
from metpy import calc, units
import scipy.stats as stats
from sklearn.linear_model import LinearRegression
import time
# make plotly work 
init_notebook_mode(connected=True)
cf.go_offline()

nctoolkit is using Climate Data Operators version 2.3.0


# Laser Disdrometer data
Here, I will gather the laser disdrometer data and organize it into hourly data to plot alongside wind speeds and direction.

In [7]:
# gather laser disdrometer data for 2023
w23_ld_mtcb_ds = xr.open_dataset('/storage/dlhogan/synoptic_sublimation/sail_data/winter_22_23/laser_disdrometer_mt_cb_20221001_20230930.nc')
w23_ld_gts_ds = xr.open_dataset('/storage/dlhogan/synoptic_sublimation/sail_data/winter_22_23/laser_disdrometer_gothic_20221001_20230930.nc')

# gather laser disdrometer data for 2022
w22_ld_mtcb_ds = xr.open_dataset('/storage/dlhogan/synoptic_sublimation/sail_data/winter_21_22/laser_disdrometer_mt_cb_20211001_20220930.nc')
w22_ld_gts_ds = xr.open_dataset('/storage/dlhogan/synoptic_sublimation/sail_data/winter_21_22/laser_disdrometer_gothic_20211001_20220930.nc')

# gather tilden precip data
tilden_precip_ds = pd.read_csv('../../01_data/processed_data/splash/tilden_precip_2022_2023.csv',index_col=0, parse_dates=True).to_xarray()
# rename datetime to time
tilden_precip_ds = tilden_precip_ds.rename({'datetime':'time'})

Laser disdrometer is 1 minute data, with other dimensions of particle size (mm) and raw_fall_velocity (m/s).
The variable of interest is `precip_rate` and `qc_precip_rate`

In [8]:
# Now let's filtter the data to our winter period (Dec 1 - Mar 31) and only look at the precip_rate data
w23_prcp_rate_mtcb_ds = w23_ld_mtcb_ds.sel(time=slice('2022-12-01', '2023-03-31'))[['precip_rate', 'qc_precip_rate']]
w23_prcp_rate_gts_ds = w23_ld_gts_ds.sel(time=slice('2022-12-01', '2023-03-31'))[['precip_rate', 'qc_precip_rate']]
w22_prcp_rate_mtcb_ds = w22_ld_mtcb_ds.sel(time=slice('2021-12-01', '2022-03-31'))[['precip_rate', 'qc_precip_rate']]
w22_prcp_rate_gts_ds = w22_ld_gts_ds.sel(time=slice('2021-12-01', '2022-03-31'))[['precip_rate', 'qc_precip_rate']]
tilden_prcp_ds = tilden_precip_ds.sel(time=slice('2022-12-01', '2023-03-31'))['Precip_mm']

In [9]:
# Now let's filter any data that has a qc_precip_rate value of 0
w23_prcp_rate_mtcb_ds = w23_prcp_rate_mtcb_ds.where(w23_prcp_rate_mtcb_ds.qc_precip_rate == 0, np.nan)
w23_prcp_rate_gts_ds = w23_prcp_rate_gts_ds.where(w23_prcp_rate_gts_ds.qc_precip_rate == 0, np.nan)
w22_prcp_rate_mtcb_ds = w22_prcp_rate_mtcb_ds.where(w22_prcp_rate_mtcb_ds.qc_precip_rate == 0, np.nan)
w22_prcp_rate_gts_ds = w22_prcp_rate_gts_ds.where(w22_prcp_rate_gts_ds.qc_precip_rate == 0, np.nan)
# now let's resample the data to hourly sums
w23_prcp_rate_1H_mtcb_ds = w23_prcp_rate_mtcb_ds.resample(time='1H').sum()/60
w23_prcp_rate_1H_gts_ds = w23_prcp_rate_gts_ds.resample(time='1H').sum()/60
w22_prcp_rate_1H_mtcb_ds = w22_prcp_rate_mtcb_ds.resample(time='1H').sum()/60
w22_prcp_rate_1H_gts_ds = w22_prcp_rate_gts_ds.resample(time='1H').sum()/60

### Wind Data
Now, let's grab the SOS data, the SAIL met data, and the SPLASH data and add the precipitation data to each

In [85]:
# Now let's grab wind data from SOS for 2023 at Kettle Ponds and wind data from SAIL at Gothic
sos_1H_max_wspd_ds = xr.open_dataset('../../01_data/processed_data/sos_ds_1H_max_wspd_storage.nc')
sos_1H_mean_wspd_ds = xr.open_dataset('../../01_data/raw_data/sos_ds_1H_storage.nc')
# SOS data is in utc, let's convert it to mountain time
# First let's add the UTC timezone to the datasets 
sos_1H_max_wspd_ds['time'] = pd.to_datetime(sos_1H_max_wspd_ds.time).tz_localize('UTC').tz_convert('MST').tz_localize(None)
sos_1H_mean_wspd_ds['time'] = pd.to_datetime(sos_1H_mean_wspd_ds.time).tz_localize('UTC').tz_convert('MST').tz_localize(None)

# Now we grab the SPLASH data from Avery Picnic and Kettle Ponds
w23_splash_ap_qc_ds_1H = xr.open_dataset("../../01_data/processed_data/splash/w23_splash_ap_qc_1H.nc")
w22_splash_ap_qc_ds_1H = xr.open_dataset("../../01_data/processed_data/splash/w22_splash_ap_qc_1H.nc")
w23_splash_kp_qc_ds_1H = xr.open_dataset("../../01_data/processed_data/splash/w23_splash_kp_qc_1H.nc")
w22_splash_kp_qc_ds_1H = xr.open_dataset("../../01_data/processed_data/splash/w22_splash_kp_qc_1H.nc")

# convert the SPLASH time series to mountain time
w23_splash_ap_qc_ds_1H['time'] = pd.to_datetime(w23_splash_ap_qc_ds_1H.time).tz_localize('UTC').tz_convert('MST').tz_localize(None)
w22_splash_ap_qc_ds_1H['time'] = pd.to_datetime(w22_splash_ap_qc_ds_1H.time).tz_localize('UTC').tz_convert('MST').tz_localize(None)
w23_splash_kp_qc_ds_1H['time'] = pd.to_datetime(w23_splash_kp_qc_ds_1H.time).tz_localize('UTC').tz_convert('MST').tz_localize(None)
w22_splash_kp_qc_ds_1H['time'] = pd.to_datetime(w22_splash_kp_qc_ds_1H.time).tz_localize('UTC').tz_convert('MST').tz_localize(None)

# Now grab the SAIL data 
w23_sail_kp_qc_ds_1H = pd.read_csv('../../01_data/processed_data/sail_processed/w23_sail_met_kp_qc_sublimation.csv', index_col=0, parse_dates=True).to_xarray()

In [86]:
# establish the start and end time for each dataset at 12-01 to 03-31
sos_1H_max_wspd_ds = sos_1H_max_wspd_ds.sel(time=slice('2022-12-01', '2023-03-31'))
sos_1H_mean_wspd_ds = sos_1H_mean_wspd_ds.sel(time=slice('2022-12-01', '2023-03-31'))
w23_splash_ap_qc_ds_1H = w23_splash_ap_qc_ds_1H.sel(time=slice('2022-12-01', '2023-03-31'))
w22_splash_ap_qc_ds_1H = w22_splash_ap_qc_ds_1H.sel(time=slice('2021-12-01', '2022-03-31'))
w22_splash_kp_qc_ds_1H = w22_splash_kp_qc_ds_1H.sel(time=slice('2021-12-01', '2022-03-31'))
w23_sail_kp_qc_ds_1H = w23_sail_kp_qc_ds_1H.sel(time=slice('2022-12-01', '2023-03-31'))

In [87]:
# calculate the 1 hour sublimation rate
rho_w = 1000
g2kg = 1/1000
# seconds in timestep
s_in_hour = 3600

sos_vars = [variable for variable in hf.TURBULENCE_VARIABLES if 'w_h2o__10m' in variable]+[variable for variable in hf.WIND_VARIABLES if 'dir_10m' in variable]
splash_vars = ['wdir_max','Hl', 'wdir_vec_mean']

# Now let's filter sos and splash datasets
sos_1H_max_wspd_ds = sos_1H_max_wspd_ds[sos_vars]
sos_1H_mean_wspd_ds = sos_1H_mean_wspd_ds[sos_vars]
w23_splash_ap_qc_ds_1H = w23_splash_ap_qc_ds_1H[splash_vars]
w22_splash_ap_qc_ds_1H = w22_splash_ap_qc_ds_1H[splash_vars]
w22_splash_kp_qc_ds_1H = w22_splash_kp_qc_ds_1H[splash_vars]

# Now let's add precip_rate to all the datasets, not going to use sail data
# 2023 datasets
sos_1H_max_wspd_ds['precip_rate'] = w23_prcp_rate_1H_gts_ds.precip_rate
sos_1H_max_wspd_ds['precip_rate_tilden'] = tilden_prcp_ds
sos_1H_mean_wspd_ds['precip_rate'] = w23_prcp_rate_1H_gts_ds.precip_rate
sos_1H_mean_wspd_ds['precip_rate_tilden'] = tilden_prcp_ds
w23_splash_ap_qc_ds_1H['precip_rate'] = w23_prcp_rate_1H_gts_ds.precip_rate
w23_splash_ap_qc_ds_1H['precip_rate_tilden'] = tilden_prcp_ds
w23_sail_kp_qc_ds_1H['precip_rate'] = w23_prcp_rate_1H_gts_ds.precip_rate
w23_sail_kp_qc_ds_1H['precip_rate_tilden'] = tilden_prcp_ds
# 2022 datasets
w22_splash_ap_qc_ds_1H['precip_rate'] = w22_prcp_rate_1H_gts_ds.precip_rate
w22_splash_kp_qc_ds_1H['precip_rate'] = w22_prcp_rate_1H_gts_ds.precip_rate

# add sublimation for the datasets without it
# SOS
sublimation_1H = (sos_1H_max_wspd_ds[[variable for variable in hf.TURBULENCE_VARIABLES if 'w_h2o__10m' in variable]] * 1/1000 * rho_w * g2kg * s_in_hour)
# get the mean at the 3 meter height
sublimation_1H = sublimation_1H[[var for var in sublimation_1H if 'w_h2o__10m' in var]].to_dataframe().mean(axis=1).to_xarray()
sos_1H_max_wspd_ds['sublimation'] = sublimation_1H
sos_1H_mean_wspd_ds['sublimation'] = sublimation_1H

# SPLASH
# Now let's calculate the sublimation rate for the splash data
sublimation_1H_23 = (w23_splash_ap_qc_ds_1H['Hl'] * s_in_hour / 2.8341e6 )
sublimation_1H_22_ap = (w22_splash_ap_qc_ds_1H['Hl'] * s_in_hour / 2.8341e6 )
sublimation_1H_22_kp = (w22_splash_kp_qc_ds_1H['Hl'] * s_in_hour / 2.8341e6 )

w23_splash_ap_qc_ds_1H['sublimation'] = sublimation_1H_23
w22_splash_ap_qc_ds_1H['sublimation'] = sublimation_1H_22_ap
w22_splash_kp_qc_ds_1H['sublimation'] = sublimation_1H_22_kp

# add binary precip
# create a binary precipitation dataset for 2022
w22_splash_kp_qc_ds_1H['precip_binary'] = (w22_splash_kp_qc_ds_1H['precip_rate'].to_dataframe())['precip_rate'].apply(lambda x: 1 if x > 0.1 else 0)
w22_splash_ap_qc_ds_1H['precip_binary'] = (w22_splash_ap_qc_ds_1H['precip_rate'].to_dataframe())['precip_rate'].apply(lambda x: 1 if x > 0.1 else 0)
# create a binary precipitation dataset for 2023
w23_splash_ap_qc_ds_1H['precip_binary'] = (w23_splash_ap_qc_ds_1H['precip_rate'].to_dataframe())['precip_rate'].apply(lambda x: 1 if x > 0.1 else 0)
w23_sail_kp_qc_ds_1H['precip_binary'] = (w23_sail_kp_qc_ds_1H['precip_rate'].to_dataframe())['precip_rate'].apply(lambda x: 1 if x > 0.1 else 0)

w23_splash_ap_qc_ds_1H['precip_binary_tilden'] = (w23_splash_ap_qc_ds_1H['precip_rate_tilden'].to_dataframe())['precip_rate_tilden'].apply(lambda x: 1 if x > 0.1 else 0)
w23_sail_kp_qc_ds_1H['precip_binary_tilden'] = (w23_sail_kp_qc_ds_1H['precip_rate_tilden'].to_dataframe())['precip_rate_tilden'].apply(lambda x: 1 if x > 0.1 else 0)
# add binary precip to sos data
sos_1H_max_wspd_ds['precip_binary'] = (sos_1H_max_wspd_ds['precip_rate'].to_dataframe())['precip_rate'].apply(lambda x: 1 if x > 0.1 else 0)
sos_1H_mean_wspd_ds['precip_binary'] = (sos_1H_mean_wspd_ds['precip_rate'].to_dataframe())['precip_rate'].apply(lambda x: 1 if x > 0.1 else 0)
sos_1H_max_wspd_ds['precip_binary_tilden'] = (sos_1H_max_wspd_ds['precip_rate_tilden'].to_dataframe())['precip_rate_tilden'].apply(lambda x: 1 if x > 0.1 else 0)
sos_1H_mean_wspd_ds['precip_binary_tilden'] = (sos_1H_mean_wspd_ds['precip_rate_tilden'].to_dataframe())['precip_rate_tilden'].apply(lambda x: 1 if x > 0.1 else 0)


Okay, now we have SPLASH and SOS datasets with:
- hourly wind direction
- hourly sublimation
- hourly precipitation
- hourly binary precipitation
- months


## SOS Kettle Ponds 2023

In [88]:
# function for prepping the data
# wind direction bins
wdir_bins = [0, 45, 90, 135, 180, 225, 270, 315, 360]

df_to_use = sos_1H_mean_wspd_ds[['dir_10m_ue','sublimation', 'precip_rate','precip_binary']].to_dataframe()
pre_len = len(df_to_use)
# filter out any nan values
df_to_use = df_to_use.dropna()
print('Number of Hours Dropped:', pre_len - len(df_to_use))

# wind direction bins
wdir_bins = [0, 45, 90, 135, 180, 225, 270, 315, 360]

# wdir cut and sum sublimation for each bin
wdir_assignment = pd.cut(df_to_use['dir_10m_ue'], bins=wdir_bins, labels=wdir_bins[:-1])
wdir_assignment.name = 'wdir_assignment'

# combine the two assignments and the sublimation
df_to_use['wdir_assignment'] = wdir_assignment.values

# groupby the wspd and wdir assignments and get the sum of w_h2o__3m_uw
grouped_sub = (df_to_use.groupby(['wdir_assignment']).sum()['sublimation'])
grouped_prcp = (df_to_use.groupby(['wdir_assignment']).sum()['precip_rate'])
grouped_prcp_binary = (df_to_use.groupby(['wdir_assignment']).sum()['precip_binary'])


Number of Hours Dropped: 301




In [89]:
# Let's make a few plots using make_sublots in plotly 
fig = go.Figure()
# for the first figure lets make two y axes, one where we'll have a barplot of the precipitation occurence and the other where we'll have the sublimation rate
fig.add_trace(go.Scatter(x=grouped_prcp_binary.index, y=grouped_prcp_binary.values, name='Precipitation Occurence', yaxis='y1'))
fig.update_yaxes(title_text='Precipitation Occurence')
# add second y-axis
fig.add_trace(go.Scatter(x=grouped_sub.index, y=grouped_sub.values, name='Sublimation', yaxis='y2'))
fig.add_trace(go.Scatter(x=grouped_prcp.index, y=grouped_prcp.values, name='Precipitation', yaxis='y3'))
fig.update_layout(xaxis=dict(domain=[0.1,0.9],
                             title='Wind Direction Bins',
                             range=[-10,360],
                             tickvals=[0,45,90,135,180,225,270,315],
                             ticktext=['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']),
                  yaxis=dict(title='Precipitation Occurence (Hours)',
                             range=[-1,200]),
                  yaxis2=dict(title='Sublimation (mm)',
                              overlaying='y',
                              titlefont=dict(
                                  color="#d62728"
                              ),
                              tickfont=dict(
                                  color="#d62728"
                              ),
                              side='right',
                              anchor='x',
                              position=0.8,
                              range=[-.1,10]),
                  yaxis3=dict(
                              title="Precipitation (mm)",
                              anchor="free",
                              overlaying="y",
                              side="right",
                              position=0.98,
                              range=[-1,200*35/25]
                              ))
# remove the grid line for axis 1 and 2
fig.update_yaxes(showgrid=False)
fig.update_layout(
                  height=600,
                  width=800)
# move the legend to the top right of the plot
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.11
))

## SAIL Kettle Ponds 2023

In [90]:
# function for prepping the data
# wind direction bins
wdir_bins = [0, 45, 90, 135, 180, 225, 270, 315, 360]

df_to_use = w23_sail_kp_qc_ds_1H[['wind_dir','sublimation', 'precip_rate','precip_binary']].to_dataframe()
pre_len = len(df_to_use)
# filter out any nan values
df_to_use = df_to_use.dropna()
print('Number of Hours Dropped:', pre_len - len(df_to_use))

# wind direction bins
wdir_bins = [0, 45, 90, 135, 180, 225, 270, 315, 360]

# wdir cut and sum sublimation for each bin
wdir_assignment = pd.cut(df_to_use['wind_dir'], bins=wdir_bins, labels=wdir_bins[:-1])
wdir_assignment.name = 'wdir_assignment'

# combine the two assignments and the sublimation
df_to_use['wdir_assignment'] = wdir_assignment.values

# groupby the wspd and wdir assignments and get the sum of w_h2o__3m_uw
grouped_sub = (df_to_use.groupby(['wdir_assignment']).sum()['sublimation'])
grouped_prcp = (df_to_use.groupby(['wdir_assignment']).sum()['precip_rate'])
grouped_prcp_binary = (df_to_use.groupby(['wdir_assignment']).sum()['precip_binary'])
grouped_sub.sum()

Number of Hours Dropped: 1082




23.2183144389857

In [93]:
# Let's make a few plots using make_sublots in plotly 
fig = go.Figure()
# for the first figure lets make two y axes, one where we'll have a barplot of the precipitation occurence and the other where we'll have the sublimation rate
fig.add_trace(go.Scatter(x=grouped_prcp_binary.index, y=grouped_prcp_binary.values, name='Precipitation Occurence', yaxis='y1'))
fig.update_yaxes(title_text='Precipitation Occurence')
# add second y-axis
fig.add_trace(go.Scatter(x=grouped_sub.index, y=grouped_sub.values, name='Sublimation', yaxis='y2'))
fig.add_trace(go.Scatter(x=grouped_prcp.index, y=grouped_prcp.values, name='Precipitation', yaxis='y3'))
fig.update_layout(xaxis=dict(domain=[0.1,0.9],
                             title='Wind Direction Bins',
                             range=[-10,360],
                             tickvals=[0,45,90,135,180,225,270,315],
                             ticktext=['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']),
                  yaxis=dict(title='Precipitation Occurence (Hours)',
                             range=[-1,200]),
                  yaxis2=dict(title='Sublimation (mm)',
                              overlaying='y',
                              titlefont=dict(
                                  color="#d62728"
                              ),
                              tickfont=dict(
                                  color="#d62728"
                              ),
                              side='right',
                              anchor='x',
                              position=0.8,
                              range=[-.1,10]),
                  yaxis3=dict(
                              title="Precipitation (mm)",
                              anchor="free",
                              overlaying="y",
                              side="right",
                              position=0.98,
                              range=[-1,200*35/25]
                              ))
# remove the grid line for axis 1 and 2
fig.update_yaxes(showgrid=False)
fig.update_layout(
                  height=600,
                  width=800)
# move the legend to the top right of the plot
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.11
))

# 2022 Kettle Ponds SPLASH

In [94]:
# function for prepping the data
# wind direction bins
wdir_bins = [0, 45, 90, 135, 180, 225, 270, 315, 360]

df_to_use = w22_splash_kp_qc_ds_1H[['wdir_vec_mean','sublimation', 'precip_rate','precip_binary']].to_dataframe()
pre_len = len(df_to_use)
# filter out any nan values
df_to_use = df_to_use.dropna()
print('Number of Hours Dropped:', pre_len - len(df_to_use))
# wind direction bins
wdir_bins = [0, 45, 90, 135, 180, 225, 270, 315, 360]

# wdir cut and sum sublimation for each bin
wdir_assignment = pd.cut(df_to_use['wdir_vec_mean'], bins=wdir_bins, labels=wdir_bins[:-1])
wdir_assignment.name = 'wdir_assignment'

# combine the two assignments and the sublimation
df_to_use['wdir_assignment'] = wdir_assignment.values

# groupby the wspd and wdir assignments and get the sum of w_h2o__3m_uw
grouped_sub = (df_to_use.groupby(['wdir_assignment']).sum()['sublimation'])
grouped_prcp = (df_to_use.groupby(['wdir_assignment']).sum()['precip_rate'])
grouped_prcp_binary = (df_to_use.groupby(['wdir_assignment']).sum()['precip_binary'])
# unstack and put into a dataframe
grouped_prcp_intensity = grouped_prcp_binary / grouped_prcp
# get the correlation between the sublimation and the grouped_prcp_intensity
correlation = grouped_sub.corr(grouped_prcp)
grouped_sub.sum()

Number of Hours Dropped: 426




15.089749642427376

In [96]:
# Let's make a few plots using make_sublots in plotly 
fig = go.Figure()
# for the first figure lets make two y axes, one where we'll have a barplot of the precipitation occurence and the other where we'll have the sublimation rate
fig.add_trace(go.Scatter(x=grouped_prcp_binary.index, y=grouped_prcp_binary.values, name='Precipitation Occurence', yaxis='y1'))
fig.update_yaxes(title_text='Precipitation Occurence')
# add second y-axis
fig.add_trace(go.Scatter(x=grouped_sub.index, y=grouped_sub.values, name='Sublimation', yaxis='y2'))
fig.add_trace(go.Scatter(x=grouped_prcp.index, y=grouped_prcp.values, name='Precipitation', yaxis='y3'))
fig.update_layout(xaxis=dict(domain=[0.1,0.9],
                             title='Wind Direction Bins',
                             range=[-10,360],
                             tickvals=[0,45,90,135,180,225,270,315],
                             ticktext=['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']),
                  yaxis=dict(title='Precipitation Occurence (Hours)',
                             range=[-1,200]),
                  yaxis2=dict(title='Sublimation (mm)',
                              titlefont=dict(
                                  color="#d62728"
                              ),
                              tickfont=dict(
                                  color="#d62728"
                              ),
                              overlaying='y',
                              side='right',
                              anchor='x',
                              position=0.8,
                              range=[-.1,10]),
                  yaxis3=dict(
                              title="Precipitation (mm)",
                              anchor="free",
                              overlaying="y",
                              side="right",
                              position=0.98,
                              range=[-1,200*35/25]
                              ))
# remove the grid line for axis 1 and 2
fig.update_yaxes(showgrid=False)
fig.update_layout(
                  height=600,
                  width=800)
# move the legend to the top right of the plot
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.11
))

### 2022 Avery Picnic SPLASH

In [97]:
# function for prepping the data
# wind direction bins
wdir_bins = [0, 45, 90, 135, 180, 225, 270, 315, 360]

df_to_use = w22_splash_ap_qc_ds_1H[['wdir_vec_mean','sublimation', 'precip_rate','precip_binary']].to_dataframe()
pre_len = len(df_to_use)
# filter out any nan values
df_to_use = df_to_use.dropna()
print('Number of Hours Dropped:', pre_len - len(df_to_use))

# wind direction bins
wdir_bins = [0, 45, 90, 135, 180, 225, 270, 315, 360]

# wdir cut and sum sublimation for each bin
wdir_assignment = pd.cut(df_to_use['wdir_vec_mean'], bins=wdir_bins, labels=wdir_bins[:-1])
wdir_assignment.name = 'wdir_assignment'

# combine the two assignments and the sublimation
df_to_use['wdir_assignment'] = wdir_assignment.values

# groupby the wspd and wdir assignments and get the sum of w_h2o__3m_uw
grouped_sub = (df_to_use.groupby(['wdir_assignment']).sum()['sublimation'])
grouped_prcp = (df_to_use.groupby(['wdir_assignment']).sum()['precip_rate'])
grouped_prcp_binary = (df_to_use.groupby(['wdir_assignment']).sum()['precip_binary'])
# unstack and put into a dataframe
grouped_prcp_intensity = grouped_prcp_binary / grouped_prcp
# get the correlation between the sublimation and the grouped_prcp_intensity
correlation = grouped_sub.corr(grouped_prcp)
grouped_sub.sum()

Number of Hours Dropped: 643




16.90016019318921

In [99]:
# Let's make a few plots using make_sublots in plotly 
fig = go.Figure()
# for the first figure lets make two y axes, one where we'll have a barplot of the precipitation occurence and the other where we'll have the sublimation rate
fig.add_trace(go.Scatter(x=grouped_prcp_binary.index, y=grouped_prcp_binary.values, name='Precipitation Occurence', yaxis='y1'))
fig.update_yaxes(title_text='Precipitation Occurence')
# add second y-axis
fig.add_trace(go.Scatter(x=grouped_sub.index, y=grouped_sub.values, name='Sublimation', yaxis='y2'))
fig.add_trace(go.Scatter(x=grouped_prcp.index, y=grouped_prcp.values, name='Precipitation', yaxis='y3'))
fig.update_layout(xaxis=dict(domain=[0.1,0.9],
                             title='Wind Direction Bins',
                             range=[-10,360],
                             tickvals=[0,45,90,135,180,225,270,315],
                             ticktext=['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']),
                  yaxis=dict(title='Precipitation Occurence (Hours)',
                             range=[-1,200]),
                  yaxis2=dict(title='Sublimation (mm)',
                              overlaying='y',
                              side='right',
                              anchor='x',
                              titlefont=dict(
                                  color="#d62728"
                              ),
                              tickfont=dict(
                                  color="#d62728"
                              ),
                              position=0.8,
                              range=[-.1,10]),
                  yaxis3=dict(
                              title="Precipitation (mm)",
                              anchor="free",
                              overlaying="y",
                              side="right",
                              position=0.98,
                              range=[-1,200*35/25]
                              ))
# remove the grid line for axis 1 and 2
fig.update_yaxes(showgrid=False)
fig.update_layout(
                  height=600,
                  width=800)
# move the legend to the top right of the plot
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.21
))

### 2023 Avery Picnic SPLASH

In [100]:
# function for prepping the data
# wind direction bins
wdir_bins = [0, 45, 90, 135, 180, 225, 270, 315, 360]

df_to_use = w23_splash_ap_qc_ds_1H[['wdir_vec_mean','sublimation', 'precip_rate','precip_binary']].to_dataframe()
pre_len = len(df_to_use)
# filter out any nan values
df_to_use = df_to_use.dropna()
print('Number of Hours Dropped:', pre_len - len(df_to_use))

# wind direction bins
wdir_bins = [0, 45, 90, 135, 180, 225, 270, 315, 360]

# wdir cut and sum sublimation for each bin
wdir_assignment = pd.cut(df_to_use['wdir_vec_mean'], bins=wdir_bins, labels=wdir_bins[:-1])
wdir_assignment.name = 'wdir_assignment'

# combine the two assignments and the sublimation
df_to_use['wdir_assignment'] = wdir_assignment.values

# groupby the wspd and wdir assignments and get the sum of w_h2o__3m_uw
grouped_sub = (df_to_use.groupby(['wdir_assignment']).sum()['sublimation'])
grouped_prcp = (df_to_use.groupby(['wdir_assignment']).sum()['precip_rate'])
grouped_prcp_binary = (df_to_use.groupby(['wdir_assignment']).sum()['precip_binary'])
# unstack and put into a dataframe
grouped_prcp_intensity = grouped_prcp_binary / grouped_prcp
# get the correlation between the sublimation and the grouped_prcp_intensity
correlation = grouped_sub.corr(grouped_prcp)


Number of Hours Dropped: 1885




In [101]:
# Let's make a few plots using make_sublots in plotly 
fig = go.Figure()
# for the first figure lets make two y axes, one where we'll have a barplot of the precipitation occurence and the other where we'll have the sublimation rate
fig.add_trace(go.Scatter(x=grouped_prcp_binary.index, y=grouped_prcp_binary.values, name='Precipitation Occurence', yaxis='y1'))
fig.update_yaxes(title_text='Precipitation Occurence')
# add second y-axis
fig.add_trace(go.Scatter(x=grouped_sub.index, y=grouped_sub.values, name='Sublimation', yaxis='y2'))
fig.add_trace(go.Scatter(x=grouped_prcp.index, y=grouped_prcp.values, name='Precipitation', yaxis='y3'))
fig.update_layout(xaxis=dict(domain=[0.1,0.9],
                             title='Wind Direction Bins',
                             range=[-10,360],
                             tickvals=[0,45,90,135,180,225,270,315],
                             ticktext=['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']),
                  yaxis=dict(title='Precipitation Occurence (Hours)',
                             range=[-1,200]),
                  yaxis2=dict(title='Sublimation (mm)',
                              overlaying='y',
                              side='right',
                              anchor='x',
                              titlefont=dict(
                                  color="#d62728"
                              ),
                              tickfont=dict(
                                  color="#d62728"
                              ),
                              position=0.8,
                              range=[-.1,10]),
                  yaxis3=dict(
                              title="Precipitation (mm)",
                              anchor="free",
                              overlaying="y",
                              side="right",
                              position=0.98,
                              range=[-1,200*35/25]
                              ))
# remove the grid line for axis 1 and 2
fig.update_yaxes(showgrid=False)
fig.update_layout(
                  height=600,
                  width=800)
# move the legend to the top right of the plot
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.21
))