In [2]:
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.enable('json')

from sublimpy import turbulence
import matplotlib.pyplot as plt
from sublimpy import tidy
import metpy.constants

# Open data

## Open precipitation dataset

In [3]:
precip_file = "/storage/elilouis/sublimationofsnow/tilden_precip_data/kettle_ponds_precip.csv"

precip_df = pd.read_csv(precip_file)

precip_df['date'] = pd.to_datetime(precip_df['date'])

acc_precip_on_first_day = precip_df.set_index('date').loc['20221130'].acc_prec

precip_df = precip_df.set_index('date').loc['20221130': '20230510'].reset_index()
precip_df['acc_prec']  = precip_df['acc_prec'] - acc_precip_on_first_day

## Open SOS Measurement Dataset

In [4]:
start_date = '20221130'
end_date = '20230509'
# open files
tidy_df_5Min = pd.read_parquet(f'../sos/tidy_df_{start_date}_{end_date}_noplanar_fit_clean.parquet')
tidy_df_30Min = pd.read_parquet(f'../sos/tidy_df_30Min_{start_date}_{end_date}_noplanar_fit.parquet')
# convert time column to datetime
tidy_df_5Min['time'] = pd.to_datetime(tidy_df_5Min['time'])
tidy_df_30Min['time'] = pd.to_datetime(tidy_df_30Min['time'])
# limit data to our dates of interest, based on continuous snow cover at Kettle Ponds
tidy_df_5Min = tidy_df_5Min.set_index('time').sort_index().loc[start_date:end_date].reset_index()
tidy_df_30Min = tidy_df_30Min.set_index('time').sort_index().loc[start_date:end_date].reset_index()

In [5]:
# quick way to get variable info if we want it 
# import xarray as xr
# ds = xr.open_dataset("/storage/elilouis/sublimationofsnow/sosnoqc/isfs_20221228.nc")
# ds['SWE_p2_c']

## Open Modeled Dataset

In [6]:
# Open Model Ensemble Dataset
model_df = pd.read_parquet("model_results.parquet")
# add a bunch of columns that are descriptive, from the config column which has multiple bits of info
model_df['z0'] = model_df['config'].apply(
    lambda v: float(v.split(' ')[-1])
)
model_df['e_sat_curve'] = model_df['config'].apply(
    lambda v: 'metpy' if 'metpy' in v else 'alduchov'
)
model_df['surface_measurement'] = model_df['config'].apply(
    lambda v: v.split(' ')[-3]
)
model_df['scheme'] = model_df['config'].apply(
    lambda v: 'andreas' if 'andreas lengths' in v else 'yang'
)
model_df['most_config'] = model_df['config'].apply(lambda s: ' '.join(s.split(' ')[:-3]))
# remove the scalar roughness length parameterization info 
model_df['most_config'] = model_df['most_config'].str.replace(' andreas lengths', '')
model_df.head()
### Handle a pesky outlier
model_df.loc[(model_df.time == "2023-01-22 1400") & (model_df.surface_measurement == 'Tsurf_d'), 'latent heat flux'] = 0
model_df.loc[(model_df.time == "2023-01-22 1400") & (model_df.surface_measurement == 'Tsurf_d'), 'sensible heat flux'] = 0
## Calculate cumulative sublimation (mm)
### Modeled data

In [7]:
ec_lhflux_and_counts_variables = [
    # ('w_h2o__2m_c', 'counts_2m_c_1'), 
    ('w_h2o__3m_c', 'counts_3m_c_1'), 
    ('w_h2o__5m_c', 'counts_5m_c_1'), 
    ('w_h2o__10m_c', 'counts_10m_c_1'), 
    ('w_h2o__15m_c', 'counts_15m_c_1'), 
    ('w_h2o__20m_c', 'counts_20m_c_1'), 


    # ('w_h2o__1m_d', 'counts_1m_d_1'), 
    ('w_h2o__3m_d', 'counts_3m_d_1'), 
    ('w_h2o__10m_d', 'counts_10m_d_1'), 
      
    # ('w_h2o__1m_ue', 'counts_1m_ue_1'), 
    ('w_h2o__3m_ue', 'counts_3m_ue_1'), 
    ('w_h2o__10m_ue', 'counts_10m_ue_1'), 


    # ('w_h2o__1m_uw',  'counts_1m_uw_1'), 
    ('w_h2o__3m_uw', 'counts_3m_uw_1'), 
    ('w_h2o__10m_uw', 'counts_10m_uw_1'), 
]
ec_lhflux_variables = list(zip(*ec_lhflux_and_counts_variables))[0]

# Calculate measured daily cumulative sublimation (for plotting)

In [8]:
tidy_df_daily = tidy_df_5Min[tidy_df_5Min.measurement == 'Cumulative sublimation measured'].set_index('time').groupby(
    [pd.Grouper(freq='1440Min'), 'tower', 'height', 'measurement', 'variable']
).max().reset_index()

# Calculate model cumulative sublimation

## Ensemble of TurbPy solutions

In [9]:
model_df_cumsum = pd.DataFrame(model_df.sort_values("time").set_index(
    ["time", "config", "scheme", "z0", "e_sat_curve", "surface_measurement", "most_config"]
).groupby(["config", "scheme", "z0", "e_sat_curve", "surface_measurement", "most_config"])['latent heat flux'].cumsum()).reset_index()

model_df_cumsum_daily = pd.DataFrame(model_df_cumsum.set_index("time").groupby(
    [pd.Grouper(freq='1440Min'), 'config', "scheme", "z0", "e_sat_curve", "surface_measurement", "most_config"]
)['latent heat flux'].max()).reset_index()

model_df_cumsum_daily = model_df_cumsum_daily[
    model_df_cumsum_daily.time < '2023-05-10'
][
    model_df_cumsum_daily.time >= '2022-11-30'
]

from metpy.constants import density_water
seconds_per_30min = 60*30
model_df_cumsum_daily['latent heat flux (mm)'] = model_df_cumsum_daily['latent heat flux'].values * seconds_per_30min/density_water.magnitude

  model_df_cumsum_daily = model_df_cumsum_daily[


## Ensemble of simple solutions

In [10]:
# modeled_lhflux_variables = tidy_df_5Min[tidy_df_5Min.variable.str.contains('w_h2o__3m_c predicted')].variable.unique()
# modeled_lhflux_variables

In [11]:
# seconds_per_30min = 60*30 
# for var in modeled_lhflux_variables:
#     print(var)
#     this_df = tidy_df_30Min[tidy_df_30Min.variable == var]
#     tower = var.split(' ')[0][-1]
#     height = int(var.split(' ')[0].split('m_')[0].split('__')[1])
#     cumulative_sublimation_values = np.nancumsum(this_df.value.values*seconds_per_30min)/metpy.constants.density_water.magnitude
#     new_var_name = var + ' cumulative mm SWE'
#     tidy_df_30Min = tidy.tidy_df_add_variable(
#         tidy_df_30Min,
#         cumulative_sublimation_values,
#         new_var_name,
#         "Cumulative sublimation predicted",  
#         height,
#         tower
#     )

# Plot

In [12]:
# my_own_modeled_chart = alt.Chart(
#     tidy_df_30Min[
#         tidy_df_30Min.variable != 'w_h2o__3m_c predicted (0.005) cumulative mm SWE'
#     ].query("measurement == 'Cumulative sublimation predicted'").query("height > 1")
# ).mark_line().encode(
#     alt.X("time:T"),
#     alt.Y("value:Q").title("Cumulative sublimation (mm)"),
#     color = 'variable:N',
#     tooltip = 'variable'
# ).properties(width = 250, height = 250)
# my_own_modeled_chart

In [13]:
# my_own_modeled_chart = alt.Chart(
#     tidy_df_30Min[tidy_df_30Min.variable.isin([
#         'w_h2o__3m_c predicted (1e-05) cumulative mm SWE',
#         'w_h2o__3m_c predicted (5e-05) cumulative mm SWE',
#         'w_h2o__3m_c predicted (0.0001) cumulative mm SWE',
#     ])]
# ).mark_line().encode(
#     alt.X("time:T"),
#     alt.Y("value:Q").title("Cumulative sublimation (mm)"),
#     color = 'variable:N'
# ).properties(width = 250, height = 250)
# my_own_modeled_chart

In [14]:
tidy_df_daily

Unnamed: 0,time,tower,height,measurement,variable,value
0,2022-11-30,c,3.0,Cumulative sublimation measured,cumulative_sub_measured_3m_c,-0.000738
1,2022-11-30,c,5.0,Cumulative sublimation measured,cumulative_sub_measured_5m_c,0.008788
2,2022-11-30,c,10.0,Cumulative sublimation measured,cumulative_sub_measured_10m_c,0.010753
3,2022-11-30,c,15.0,Cumulative sublimation measured,cumulative_sub_measured_15m_c,0.007828
4,2022-11-30,c,20.0,Cumulative sublimation measured,cumulative_sub_measured_20m_c,0.002069
...,...,...,...,...,...,...
1766,2023-05-09,d,10.0,Cumulative sublimation measured,cumulative_sub_measured_10m_d,54.871938
1767,2023-05-09,ue,3.0,Cumulative sublimation measured,cumulative_sub_measured_3m_ue,29.730144
1768,2023-05-09,ue,10.0,Cumulative sublimation measured,cumulative_sub_measured_10m_ue,35.046725
1769,2023-05-09,uw,3.0,Cumulative sublimation measured,cumulative_sub_measured_3m_uw,34.111715


In [19]:
tidy_df_daily.query("measurement == 'Cumulative sublimation measured'").query("height > 1")

Unnamed: 0,time,tower,height,measurement,variable,value
0,2022-11-30,c,3.0,Cumulative sublimation measured,cumulative_sub_measured_3m_c,-0.000738
1,2022-11-30,c,5.0,Cumulative sublimation measured,cumulative_sub_measured_5m_c,0.008788
2,2022-11-30,c,10.0,Cumulative sublimation measured,cumulative_sub_measured_10m_c,0.010753
3,2022-11-30,c,15.0,Cumulative sublimation measured,cumulative_sub_measured_15m_c,0.007828
4,2022-11-30,c,20.0,Cumulative sublimation measured,cumulative_sub_measured_20m_c,0.002069
...,...,...,...,...,...,...
1766,2023-05-09,d,10.0,Cumulative sublimation measured,cumulative_sub_measured_10m_d,54.871938
1767,2023-05-09,ue,3.0,Cumulative sublimation measured,cumulative_sub_measured_3m_ue,29.730144
1768,2023-05-09,ue,10.0,Cumulative sublimation measured,cumulative_sub_measured_10m_ue,35.046725
1769,2023-05-09,uw,3.0,Cumulative sublimation measured,cumulative_sub_measured_3m_uw,34.111715


In [31]:
measurements_chart = alt.Chart(
    tidy_df_daily.query("measurement == 'Cumulative sublimation measured'").query("height > 1")
).mark_line(opacity=1, strokeWidth=1).encode(
    alt.X("time:T"),
    alt.Y("value:Q").title("Cumulative sublimation (mm)").scale(domain=[-5, 60]),
    alt.Color("height:O").scale(scheme='turbo'),
    detail = 'variable:N',
    # tooltip = 'variable'
).properties(width = 250, height = 250)
measurements_chart

In [32]:
models_chart = alt.Chart(
    model_df_cumsum_daily.query("z0 <= 1.e-03").dropna()
).mark_line(opacity = 0.5, color='grey', strokeWidth=0.2).encode(
    alt.X("time:T"),
    alt.Y("latent heat flux (mm)").scale(domain=[-5, 150]),
    detail = 'config'
)

In [33]:
snowpillow_chart = alt.Chart(
    tidy_df_30Min.query("variable == 'SWE_p2_c'").dropna()
).transform_window(
    frame = [-48, 48],
    rolling_median = 'median(value)'
).mark_line().encode(
    alt.X("time:T").axis(labels=False).title(None),
    alt.Y("rolling_median:Q").title(["Snow water", "equivalent (mm)"])
).properties(width = 250, height = 83)

# snowpillow_and_precip_chart = snowpillow_chart +\
# alt.Chart(
#     precip_df
# ).mark_line(strokeDash=[2,4]).encode(
#     alt.X('date:T'),
#     alt.Y("acc_prec")
# )

In [34]:
(
    snowpillow_chart &
    (measurements_chart)
)

In [18]:
(
    snowpillow_chart &
    (measurements_chart + models_chart)
)

In [19]:
model_df_cumsum_daily.query("z0 <= 1.e-03").dropna().z0.unique()

array([1.e-04, 5.e-04, 1.e-03, 1.e-05, 5.e-05])

In [20]:
models_chart_by_z0 = alt.Chart(
    model_df_cumsum_daily.query("z0 <= 1.e-03").dropna()
).mark_line(opacity = 0.75, color='grey', strokeWidth=0.2).encode(
    alt.X("time:T"),
    alt.Y("latent heat flux (mm)").scale(domain=[-5, 150]),
    alt.Color('z0:O').scale(
        domain = [1.e-03, 1.e-04,  1.e-05],
        range = ['#d62728', '#ff7f0e', '#bcbd22']
    ),
    detail = 'config'
)


(
    snowpillow_chart &
    (measurements_chart + models_chart_by_z0)
)

In [21]:
models_chart_by_ts_meas = alt.Chart(
    model_df_cumsum_daily[model_df_cumsum_daily.config.isin([
    'MO Holtslag de Bruin Tsurf_rad_d e_sat_alduchov 1e-05',
    'MO Holtslag de Bruin Tsurf_d e_sat_alduchov 1e-05',
])]
).mark_line(opacity = 0.75, color='grey', strokeWidth=0.2).encode(
    alt.X("time:T"),
    alt.Y("latent heat flux (mm)").scale(domain=[-5, 150]),
    alt.Color('z0:O').scale(
        domain = [1.e-03, 1.e-04,  1.e-05],
        range = ['#d62728', '#ff7f0e', '#bcbd22']
    ),
    detail = 'config'
)


(
    snowpillow_chart &
    (measurements_chart + models_chart_by_z0)
)

In [22]:
start_date = "2023-03-01"
end_date = "2023-03-05"
meas_chart = alt.Chart(
    tidy_df_30Min.set_index('time').sort_index().loc[start_date:end_date].reset_index().query("variable == 'w_h2o__3m_c'")
).mark_line().encode(
    alt.X("time:T"),
    alt.Y("value:Q")

)
model_chart = alt.Chart(
    model_df[model_df.config == 'MO Holtslag de Bruin andreas lengths Tsurf_c e_sat_alduchov 1e-05'].set_index('time').sort_index().loc[start_date:end_date].reset_index()
).mark_line(color='red').encode(
    alt.X("time:T"),
    alt.Y("latent heat flux:Q")
)

(meas_chart + model_chart).properties(width = 600)