# ALl things sublimation
### Author: Daniel Hogan
### 9/23/2024

This notebook will try to work through building relationships and a relatively continuous time series between different sublimation observations at differente locaitons and times throughout the East River Valley during the sublimation of snow campaign alongside SAIL and SPLASH

In [1]:
# general
import os
import glob
import datetime as dt
import json
import time
# data 
import xarray as xr 
from sublimpy import utils, variables, tidy, turbulence
import numpy as np
import pandas as pd
from act import discovery, plotting
# plotting
import matplotlib.pyplot as plt
from metpy.cbook import get_test_data
from metpy.plots import add_metpy_logo, SkewT
import plotly.express as px 
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.io as pio
# helper tools
from scripts.get_sail_data import get_sail_data
from scripts.helper_funcs import create_windrose_df, mean_sounding, simple_sounding
import scripts.helper_funcs as hf
from metpy import calc, units
# make plotly work 
init_notebook_mode(connected=True)
cf.go_offline()

nctoolkit is using Climate Data Operators version 2.3.0


### Open our SOS data

In [93]:
sos_1hr = xr.open_dataset('../../01_data/processed_data/sos_ds_1H_storage.nc')
sos_5min = xr.open_dataset('../../01_data/processed_data/sos_ds_5min_storage.nc')


### Open SAIL data 

In [85]:
# water year 2022
wy_22_sail_kp_qc = xr.open_dataset('/storage/dlhogan/synoptic_sublimation/sail_data/winter_21_22/eddy_covariance_kettle_ponds_20211001_20220930.nc')
wy_22_sail_kp_no_qc = xr.open_dataset('/storage/dlhogan/synoptic_sublimation/sail_data/winter_21_22/eddy_covariance_kettle_ponds_noqc_20211001_20220930.nc')

# water year 2023
wy_23_sail_kp_qc = xr.open_dataset('/storage/dlhogan/synoptic_sublimation/sail_data/winter_22_23/eddy_covariance_kettle_ponds_20221001_20230930.nc')
wy_23_sail_kp_no_qc = xr.open_dataset('/storage/dlhogan/synoptic_sublimation/sail_data/winter_22_23/eddy_covariance_kettle_ponds_noqc_20221001_20230930.nc')

# print the start and end times of each dataset
print('wy_22_sail_kp_qc')
print(wy_22_sail_kp_qc.time.min().values, wy_22_sail_kp_qc.time.max().values)
print('wy_23_sail_kp_qc')
print(wy_23_sail_kp_qc.time.min().values, wy_23_sail_kp_qc.time.max().values)

wy_22_sail_kp_qc
2021-10-14T21:00:00.000000000 2022-09-30T23:30:00.000000000
wy_23_sail_kp_qc
2022-10-01T00:00:00.000000000 2023-06-16T14:30:00.000000000


### Open SPLASH data 

In [86]:
splash_ap = xr.open_dataset('../../01_data/raw_data/splash_ASFS_AP_all_storage.nc')
splash_kp = xr.open_dataset('../../01_data/raw_data/splash_ASFS_KP_all_storage.nc')

# print the start and end times of each dataset
print('splash_ap')
print(splash_ap.time.min().values, splash_ap.time.max().values)
print('splash_kp')
print(splash_kp.time.min().values, splash_kp.time.max().values)

splash_ap
2021-10-12T00:00:00.000000000 2023-06-21T23:50:00.000000000
splash_kp
2021-09-28T00:00:00.000000000 2023-07-19T23:50:00.000000000


### Limit to winter time for each water year

In [94]:
# time period of interest
start_22 = '2021-12-01'
end_22 = '2022-04-01'
start_23 = '2022-12-01'
end_23 = '2023-04-01'

# filter the data
# Winter 2022
w22_sail_kp_qc = wy_22_sail_kp_qc.sel(time=slice(start_22, end_22))
w22_sail_kp_no_qc = wy_22_sail_kp_no_qc.sel(time=slice(start_22, end_22))
w22_splash_ap = splash_ap.sel(time=slice(start_22, end_22))
w22_splash_kp = splash_kp.sel(time=slice(start_22, end_22))

# Winter 2023
w23_sail_kp_qc = wy_23_sail_kp_qc.sel(time=slice(start_23, end_23))
w23_sail_kp_no_qc = wy_23_sail_kp_no_qc.sel(time=slice(start_23, end_23))
w23_splash_ap = splash_ap.sel(time=slice(start_23, end_23))
w23_splash_kp = splash_kp.sel(time=slice(start_23, end_23))
w23_sos_1hr = sos_1hr.sel(time=slice(start_23, end_23))
w23_sos_5min = sos_5min.sel(time=slice(start_23, end_23))

### Let's check a few things
- consistent values
- consistent time-steps
- consistent units

In [119]:
# create an empty dataframe to store the results
results = pd.DataFrame(columns=['dataset', 'variable', 'mean', 'std', 'min', 'max', 'Valid %'])


In [120]:
print("SAIL KP QC Data - 2022")
# we'll start with checking the SAIL data
print(w22_sail_kp_qc['latent_heat_flux'].units)
print(f"Sensor Height: {w22_sail_kp_no_qc.attrs['sensor_height']}")
print(f"{w22_sail_kp_qc['time'].diff('time').median().values / np.timedelta64(1, 's') / 60} minutes")
# print the max, min, and mean values of the latent heat flux
print(f"Max: {w22_sail_kp_qc['latent_heat_flux'].max().values}")
print(f"Min: {w22_sail_kp_qc['latent_heat_flux'].min().values}")
print(f"Mean: {w22_sail_kp_qc['latent_heat_flux'].mean().values}")
# print the percent of data that is not NAN
print(f"Percent of data that is not NAN: {(w22_sail_kp_qc['latent_heat_flux'].count().values / w22_sail_kp_qc['latent_heat_flux'].size * 100).round(0)}%")
# add the results to the dataframe
results.loc[0] = ['SAIL KP QC Data - 2022', 'latent_heat_flux', 
                   w22_sail_kp_qc['latent_heat_flux'].mean().values, 
                   w22_sail_kp_qc['latent_heat_flux'].std().values, 
                   w22_sail_kp_qc['latent_heat_flux'].min().values, 
                   w22_sail_kp_qc['latent_heat_flux'].max().values, 
                   (w22_sail_kp_qc['latent_heat_flux'].count().values / w22_sail_kp_qc['latent_heat_flux'].size * 100).round(0)]


SAIL KP QC Data - 2022
W/m^2
Sensor Height: 3 m AGL
30.0 minutes
Max: 135.380615234375
Min: -103.9486312866211
Mean: 5.323911666870117
Percent of data that is not NAN: 55.0%


In [121]:
print("SAIL KP Raw Data - 2022")
# we'll start with checking the SAIL data
print(w22_sail_kp_no_qc['lv_e'].units)
print(f"Sensor Height: {w22_sail_kp_no_qc.attrs['sensor_height']}")
print(f"{w22_sail_kp_no_qc['time'].diff('time').median().values / np.timedelta64(1, 's') / 60} minutes")
# print the max, min, and mean values of the latent heat flux
print(f"Max: {w22_sail_kp_no_qc['lv_e'].max().values}")
print(f"Min: {w22_sail_kp_no_qc['lv_e'].min().values}")
print(f"Mean: {w22_sail_kp_no_qc['lv_e'].mean().values}")
# print the percent of data that is not NAN
print(f"Percent of data that is not NAN: {(w22_sail_kp_no_qc['lv_e'].count().values / w22_sail_kp_no_qc['lv_e'].size * 100).round(0)}%")
# add the results to the dataframe
results.loc[1] = ['SAIL KP Raw Data - 2022', 'lv_e', 
                   w22_sail_kp_no_qc['lv_e'].mean().values, 
                   w22_sail_kp_no_qc['lv_e'].std().values, 
                   w22_sail_kp_no_qc['lv_e'].min().values, 
                   w22_sail_kp_no_qc['lv_e'].max().values, 
                   (w22_sail_kp_no_qc['lv_e'].count().values / w22_sail_kp_no_qc['lv_e'].size * 100).round(0)]

SAIL KP Raw Data - 2022
W/m^2
Sensor Height: 3 m AGL
30.0 minutes
Max: 244.0
Min: -149.1999969482422
Mean: 5.021153450012207
Percent of data that is not NAN: 58.0%


In [122]:
print("SAIL KP QC Data - 2023")
# we'll start with checking the SAIL data
print(w23_sail_kp_qc['latent_heat_flux'].units)
print(f"Sensor Height: {w23_sail_kp_no_qc.attrs['sensor_height']}")
print(f"{w23_sail_kp_qc['time'].diff('time').median().values / np.timedelta64(1, 's') / 60} minutes")
# print the max, min, and mean values of the latent heat flux
print(f"Max: {w23_sail_kp_qc['latent_heat_flux'].max().values}")
print(f"Min: {w23_sail_kp_qc['latent_heat_flux'].min().values}")
print(f"Mean: {w23_sail_kp_qc['latent_heat_flux'].mean().values}")
# print the percent of data that is not NAN
print(f"Percent of data that is not NAN: {(w23_sail_kp_qc['latent_heat_flux'].count().values / w23_sail_kp_qc['latent_heat_flux'].size * 100).round(0)}%")
# add the results to the dataframe
results.loc[2] = ['SAIL KP QC Data - 2023', 'latent_heat_flux', 
                   w23_sail_kp_qc['latent_heat_flux'].mean().values, 
                   w23_sail_kp_qc['latent_heat_flux'].std().values, 
                   w23_sail_kp_qc['latent_heat_flux'].min().values, 
                   w23_sail_kp_qc['latent_heat_flux'].max().values, 
                   (w23_sail_kp_qc['latent_heat_flux'].count().values / w23_sail_kp_qc['latent_heat_flux'].size * 100).round(0)]

SAIL KP QC Data - 2023
W/m^2
Sensor Height: 3 m AGL
30.0 minutes
Max: 506.8079833984375
Min: -151.21078491210938
Mean: 10.031121253967285
Percent of data that is not NAN: 77.0%


In [123]:
print("SAIL KP Raw Data - 2023")
# we'll start with checking the SAIL data
print(w23_sail_kp_no_qc['lv_e'].units)
print(f"Sensor Height: {w23_sail_kp_no_qc.attrs['sensor_height']}")
print(f"{w23_sail_kp_no_qc['time'].diff('time').median().values / np.timedelta64(1, 's') / 60} minutes")
# print the max, min, and mean values of the latent heat flux
print(f"Max: {w23_sail_kp_no_qc['lv_e'].max().values}")
print(f"Min: {w23_sail_kp_no_qc['lv_e'].min().values}")
print(f"Mean: {w23_sail_kp_no_qc['lv_e'].mean().values}")
# print the percent of data that is not NAN
print(f"Percent of data that is not NAN: {(w23_sail_kp_no_qc['lv_e'].count().values / w23_sail_kp_no_qc['lv_e'].size * 100).round(0)}%")
# add the results to the dataframe
results.loc[3] = ['SAIL KP Raw Data - 2023', 'lv_e', 
                   w23_sail_kp_no_qc['lv_e'].mean().values, 
                   w23_sail_kp_no_qc['lv_e'].std().values, 
                   w23_sail_kp_no_qc['lv_e'].min().values, 
                   w23_sail_kp_no_qc['lv_e'].max().values, 
                   (w23_sail_kp_no_qc['lv_e'].count().values / w23_sail_kp_no_qc['lv_e'].size * 100).round(0)]

SAIL KP Raw Data - 2023
W/m^2
Sensor Height: 3 m AGL
30.0 minutes
Max: 532.0999755859375
Min: -270.1000061035156
Mean: 7.726357936859131
Percent of data that is not NAN: 85.0%


In [124]:
print("SPLASH AP Data - 2022")
# we'll start with checking the SAIL data
print(w22_splash_ap['Hl'].units)
print(w23_splash_ap['Hl'].height)
print(f"{w22_splash_ap['time'].diff('time').median().values / np.timedelta64(1, 's') / 60} minutes")
# print the max, min, and mean values of the latent heat flux
print(f"Max: {w22_splash_ap['Hl'].max().values}")
print(f"Min: {w22_splash_ap['Hl'].min().values}")
print(f"Mean: {w22_splash_ap['Hl'].mean().values}")
# print the percent of data that is not NAN
print(f"Percent of data that is not NAN: {(w22_splash_ap['Hl'].count().values / w22_splash_ap['Hl'].size * 100).round(0)}%")
# add the results to the dataframe
results.loc[4] = ['SPLASH AP Data - 2022', 'Hl', 
                   w22_splash_ap['Hl'].mean().values, 
                   w22_splash_ap['Hl'].std().values, 
                   w22_splash_ap['Hl'].min().values, 
                   w22_splash_ap['Hl'].max().values, 
                   (w22_splash_ap['Hl'].count().values / w22_splash_ap['Hl'].size * 100).round(0)]

SPLASH AP Data - 2022
W/m2
4.62 m
10.0 minutes
Max: 124.64649414432603
Min: -108.92249619810212
Mean: 6.026119961920328
Percent of data that is not NAN: 76.0%


In [125]:
print("SPLASH KP Data - 2022")
# we'll start with checking the SAIL data
print(w22_splash_kp['Hl'].units)
print(w22_splash_kp['Hl'].height)
print(f"{w22_splash_kp['time'].diff('time').median().values / np.timedelta64(1, 's') / 60} minutes")
# print the max, min, and mean values of the latent heat flux
print(f"Max: {w22_splash_kp['Hl'].max().values}")
print(f"Min: {w22_splash_kp['Hl'].min().values}")
print(f"Mean: {w22_splash_kp['Hl'].mean().values}")
# print the percent of data that is not NAN
print(f"Percent of data that is not NAN: {(w22_splash_kp['Hl'].count().values / w22_splash_kp['Hl'].size * 100).round(0)}%")
# add the results to the dataframe
results.loc[5] = ['SPLASH KP Data - 2022', 'Hl', 
                   w22_splash_kp['Hl'].mean().values, 
                   w22_splash_kp['Hl'].std().values, 
                   w22_splash_kp['Hl'].min().values, 
                   w22_splash_kp['Hl'].max().values, 
                   (w22_splash_kp['Hl'].count().values / w22_splash_kp['Hl'].size * 100).round(0)]

SPLASH KP Data - 2022
W/m2
4.62 m
10.0 minutes
Max: 259.28858793118894
Min: -194.92358117023784
Mean: 4.91600556694804
Percent of data that is not NAN: 84.0%


In [126]:
print("SPLASH AP Data - 2023")
# we'll start with checking the SAIL data
print(w23_splash_ap['Hl'].units)
print(w23_splash_ap['Hl'].height)
print(f"{w23_splash_ap['time'].diff('time').median().values / np.timedelta64(1, 's') / 60} minutes")
# print the max, min, and mean values of the latent heat flux
print(f"Max: {w23_splash_ap['Hl'].max().values}")
print(f"Min: {w23_splash_ap['Hl'].min().values}")
print(f"Mean: {w23_splash_ap['Hl'].mean().values}")
# print the percent of data that is not NAN
print(f"Percent of data that is not NAN: {(w23_splash_ap['Hl'].count().values / w23_splash_ap['Hl'].size * 100).round(0)}%")
# add the results to the dataframe
results.loc[6] = ['SPLASH AP Data - 2023', 'Hl', 
                   w23_splash_ap['Hl'].mean().values, 
                   w23_splash_ap['Hl'].std().values, 
                   w23_splash_ap['Hl'].min().values, 
                   w23_splash_ap['Hl'].max().values, 
                   (w23_splash_ap['Hl'].count().values / w23_splash_ap['Hl'].size * 100).round(0)]

SPLASH AP Data - 2023
W/m2
4.62 m
10.0 minutes
Max: 154.4309717713504
Min: -74.94708843206352
Mean: 5.640879060388656
Percent of data that is not NAN: 33.0%


In [127]:
print("SPLASH KP Data - 2023")
# we'll start with checking the SAIL data
print(w23_splash_kp['Hl'].units)
print(w23_splash_kp['Hl'].height)
print(f"{w23_splash_kp['time'].diff('time').median().values / np.timedelta64(1, 's') / 60} minutes")
# print the max, min, and mean values of the latent heat flux
print(f"Max: {w23_splash_kp['Hl'].max().values}")
print(f"Min: {w23_splash_kp['Hl'].min().values}")
print(f"Mean: {w23_splash_kp['Hl'].mean().values}")
# print the percent of data that is not NAN
print(f"Percent of data that is not NAN: {(w23_splash_kp['Hl'].count().values / w23_splash_kp['Hl'].size * 100).round(0)}%")
# add the results to the dataframe
results.loc[7] = ['SPLASH KP Data - 2023', 'Hl', 
                   w23_splash_kp['Hl'].mean().values, 
                   w23_splash_kp['Hl'].std().values, 
                   w23_splash_kp['Hl'].min().values, 
                   w23_splash_kp['Hl'].max().values, 
                   (w23_splash_kp['Hl'].count().values / w23_splash_kp['Hl'].size * 100).round(0)]

SPLASH KP Data - 2023
W/m2
4.62 m
10.0 minutes
Max: nan
Min: nan
Mean: nan
Percent of data that is not NAN: 0.0%


Degrees of freedom <= 0 for slice.


In [163]:
print("SOS Data - 2023")
# we'll start with checking the SAIL data
w23_sos_5min_3m = w23_sos_5min[[var for var in hf.TURBULENCE_VARIABLES if 'w_h2o__3m' in var]].to_dataframe().mean(axis=1)
w23_sos_5min_5m = w23_sos_5min[[var for var in hf.TURBULENCE_VARIABLES if 'w_h2o__5m' in var]].to_dataframe().mean(axis=1)
w23_sos_5min_10m = w23_sos_5min[[var for var in hf.TURBULENCE_VARIABLES if 'w_h2o__10m' in var]].to_dataframe().mean(axis=1)

print(w23_sos_5min['w_h2o__3m_c'].units)
print(f"{w23_sos_5min['time'].diff('time').median().values / np.timedelta64(1, 's') / 60} minutes")
# print the max, min, and mean values of the latent heat flux
print(f"Max: {w23_sos_5min_3m.max()}")
print(f"Min: {w23_sos_5min_3m.min()}")
print(f"Mean: {w23_sos_5min_3m.mean()}")
# print the percent of data that is not NAN
print(f"Percent of data that is not NAN: {(w23_sos_5min_3m.count() / w23_sos_5min_3m.size * 100).round(0)}%")
# add the results to the dataframe
results.loc[8] = ['SOS Data - 2023', 'w_h2o__3m', 
                   w23_sos_5min_3m.mean()*(2509+333), 
                   w23_sos_5min_3m.std()*(2509+333), 
                   w23_sos_5min_3m.min()*(2509+333), 
                   w23_sos_5min_3m.max()*(2509+333), 
                   (w23_sos_5min_3m.count() / w23_sos_5min_3m.size * 100).round(0)]

SOS Data - 2023
m/s g/m^3
5.0 minutes
Max: 0.12049627304077148
Min: -0.09110678732395172
Mean: 0.0018694115760923313
Percent of data that is not NAN: 93.0%


In [164]:
print(w23_sos_5min['w_h2o__5m_c'].units)
print(f"{w23_sos_5min['time'].diff('time').median().values / np.timedelta64(1, 's') / 60} minutes")
# print the max, min, and mean values of the latent heat flux
print(f"Max: {w23_sos_5min_5m.max()}")
print(f"Min: {w23_sos_5min_5m.min()}")
print(f"Mean: {w23_sos_5min_5m.mean()}")
# print the percent of data that is not NAN
print(f"Percent of data that is not NAN: {(w23_sos_5min_5m.count() / w23_sos_5min_5m.size * 100).round(0)}%")
# add the results to the dataframe
results.loc[9] = ['SOS Data - 2023', 'w_h2o__5m', 
                   w23_sos_5min_5m.mean()*(2509+333), 
                   w23_sos_5min_5m.std()*(2509+333), 
                   w23_sos_5min_5m.min()*(2509+333), 
                   w23_sos_5min_5m.max()*(2509+333), 
                   (w23_sos_5min_5m.count() / w23_sos_5min_5m.size * 100).round(0)]

m/s g/m^3
5.0 minutes
Max: 0.10055775940418243
Min: -0.07827582955360413
Mean: 0.001961611747019509
Percent of data that is not NAN: 96.0%


In [165]:
print(w23_sos_5min['w_h2o__10m_c'].units)
print(f"{w23_sos_5min['time'].diff('time').median().values / np.timedelta64(1, 's') / 60} minutes")
# print the max, min, and mean values of the latent heat flux
print(f"Max: {w23_sos_5min_10m.max()}")
print(f"Min: {w23_sos_5min_10m.min()}")
print(f"Mean: {w23_sos_5min_10m.mean()}")
# print the percent of data that is not NAN
print(f"Percent of data that is not NAN: {(w23_sos_5min_10m.count() / w23_sos_5min_10m.size * 100).round(0)}%")
# add the results to the dataframe
results.loc[10] = ['SOS Data - 2023', 'w_h2o__10m', 
                   w23_sos_5min_10m.mean()*2835, 
                   w23_sos_5min_10m.std()*2835, 
                   w23_sos_5min_10m.min()*2835, 
                   w23_sos_5min_10m.max()*2835, 
                   (w23_sos_5min_10m.count() / w23_sos_5min_10m.size * 100).round(0)]

m/s g/m^3
5.0 minutes
Max: 0.13156071305274963
Min: -0.10629261285066605
Mean: 0.001976939450693535
Percent of data that is not NAN: 94.0%


In [166]:
results

Unnamed: 0,dataset,variable,mean,std,min,max,Valid %
0,SAIL KP QC Data - 2022,latent_heat_flux,5.3239117,17.774063,-103.94863,135.38062,55.0
1,SAIL KP Raw Data - 2022,lv_e,5.0211535,17.462376,-149.2,244.0,58.0
2,SAIL KP QC Data - 2023,latent_heat_flux,10.031121,26.791023,-151.21078,506.80798,77.0
3,SAIL KP Raw Data - 2023,lv_e,7.726358,24.784393,-270.1,532.1,85.0
4,SPLASH AP Data - 2022,Hl,6.026119961920328,12.376787453863749,-108.92249619810212,124.64649414432604,76.0
5,SPLASH KP Data - 2022,Hl,4.91600556694804,16.693744031344185,-194.92358117023784,259.28858793118894,84.0
6,SPLASH AP Data - 2023,Hl,5.640879060388656,11.730572338579414,-74.94708843206352,154.4309717713504,33.0
7,SPLASH KP Data - 2023,Hl,,,,,0.0
9,SOS Data - 2023,w_h2o__5m,5.574901,16.276926,-222.459908,285.785152,96.0
10,SOS Data - 2023,w_h2o__10m,5.604623,18.422123,-301.339557,372.974622,94.0


### Relationships between datasets
- need to establish relationships for datasets of the same time length
- get everything to hourly mean?
- get everything to half hourly means?
- need to calculate SOS 30-minute product. 