In [3]:
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.enable('json')
alt.renderers.enable('jupyterlab')

RendererRegistry.enable('jupyterlab')

# Open precipitation dataset

In [None]:
precip_file = "/data2/elilouis/sublimationofsnow/tilden_precip_data/kettle_ponds_precip.csv"

precip_df = pd.read_csv(precip_file)

precip_df['date'] = pd.to_datetime(precip_df['date'])

acc_precip_on_first_day = precip_df.set_index('date').loc['20221130'].acc_prec

precip_df = precip_df.set_index('date').loc['20221130': '20230510'].reset_index()
precip_df['acc_prec']  = precip_df['acc_prec'] - acc_precip_on_first_day

# Open SOS Measurement Dataset

In [4]:
start_date = '20221130'
end_date = '20230509'
# open files
tidy_df_5Min = pd.read_parquet('../sos/tidy_df_20221130_20230517_noplanar_fit.parquet')
tidy_df_30Min = pd.read_parquet('../sos/tidy_df_30Min_20221130_20230517_noplanar_fit.parquet')
# convert time column to datetime
tidy_df_5Min['time'] = pd.to_datetime(tidy_df_5Min['time'])
tidy_df_30Min['time'] = pd.to_datetime(tidy_df_30Min['time'])
# limit data to our dates of interest, based on continuous snow cover at Kettle Ponds
tidy_df_5Min = tidy_df_5Min.set_index('time').loc[start_date:end_date].reset_index()
tidy_df_30Min = tidy_df_30Min.set_index('time').loc[start_date:end_date].reset_index()

  tidy_df_5Min = tidy_df_5Min.set_index('time').loc[start_date:end_date].reset_index()


In [9]:
from metpy.units import units
from sublimpy import tidy

In [10]:
## add sensible heat flux in W/m^2 variable, reverse sign too (positive energy flux INTO snowpack)
specific_heat_capacity_air = - 1.0005 * units('kilojoules/(K*kg)')
air_density = tidy_df_30Min.query("variable == 'airdensity_3m_c'")['value'].values * units("kg/m^3")
sensible_heat_flux_meas_units = tidy_df_30Min.query("variable == 'w_tc__3m_c'")['value'].values * units("K*m/s")
sensible_heat_flux_watts_per_m2 = (sensible_heat_flux_meas_units * specific_heat_capacity_air * air_density).to("W/m^2").magnitude
tidy_df_30Min = tidy.tidy_df_add_variable(
    tidy_df_30Min,
    sensible_heat_flux_watts_per_m2,
    'w_tc__3m_c w/m2',
    'w_tc_ w/m2',
    3,
    'c'
)

## add latent heat flux in W/m^2 variable, reverse sign too (positive energy flux INTO snowpack)
latent_heat_of_vaporization = - 2838 * units("J/g")
latent_heat_flux_meas_units = tidy_df_30Min.query("variable == 'w_h2o__3m_c'")['value'].values * units("g/(m^2 * s)")
latent_heat_flux_watts_per_m2 = (latent_heat_flux_meas_units * latent_heat_of_vaporization).to("W/m^2").magnitude
tidy_df_30Min = tidy.tidy_df_add_variable(
    tidy_df_30Min,
    latent_heat_flux_watts_per_m2,
    'w_h2o__3m_c w/m2',
    'w_h2o_ w/m2',
    3,
    'c'
)

In [26]:
src = tidy_df_30Min[tidy_df_30Min.variable.isin(['w_tc__3m_c w/m2', 'w_h2o__3m_c w/m2',])]
# src = src.pivot_table(values = 'value', index='time', columns=['measurement']).reset_index()
# src = src.set_index('time').groupby([pd.Grouper(freq="60min")]).mean().reset_index()
src.head()

Unnamed: 0,time,measurement,variable,height,tower,value
0,2022-11-30 00:00:00,w_tc_ w/m2,w_tc__3m_c w/m2,3.0,c,49.699411
1,2022-11-30 00:30:00,w_tc_ w/m2,w_tc__3m_c w/m2,3.0,c,4.518318
2,2022-11-30 01:00:00,w_tc_ w/m2,w_tc__3m_c w/m2,3.0,c,6.275104
3,2022-11-30 01:30:00,w_tc_ w/m2,w_tc__3m_c w/m2,3.0,c,11.600037
4,2022-11-30 02:00:00,w_tc_ w/m2,w_tc__3m_c w/m2,3.0,c,14.262819


In [27]:
alt.Chart(src).mark_line().encode(
    alt.X("time:T"),
    alt.Y("value:Q"),
    alt.Row("variable:N")
)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [20]:
alt.Chart(src).transform_fold(['w_tc__3m_c w/m2', 'w_h2o__3m_c w/m2'], ).mark_line().encode(
    alt.X("time:T"),
    alt.Y("value:Q"),
    alt.Row("key:N")
)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [None]:
# quick way to get variable info if we want it 
# import xarray as xr
# ds = xr.open_dataset("/data2/elilouis/sublimationofsnow/sosnoqc/isfs_20221228.nc")
# ds['SWE_p2_c']

## Clean the data

### Step 1: remove all LH flux data points with less than 90% of 20hz data being good
### Step 2: remove all LH flux data points with magnitude greater than 1 g/m^2/s

In [None]:
ec_lhflux_and_counts_variables = [
    ('w_h2o__2m_c', 'counts_2m_c_1'), 
    ('w_h2o__3m_c', 'counts_3m_c_1'), 
    ('w_h2o__5m_c', 'counts_5m_c_1'), 
    ('w_h2o__10m_c', 'counts_10m_c_1'), 
    ('w_h2o__15m_c', 'counts_15m_c_1'), 
    ('w_h2o__20m_c', 'counts_20m_c_1'), 


    ('w_h2o__1m_d', 'counts_1m_d_1'), 
    ('w_h2o__3m_d', 'counts_3m_d_1'), 
    ('w_h2o__10m_d', 'counts_10m_d_1'), 
      
    ('w_h2o__1m_ue', 'counts_1m_ue_1'), 
    ('w_h2o__3m_ue', 'counts_3m_ue_1'), 
    ('w_h2o__10m_ue', 'counts_10m_ue_1'), 


    ('w_h2o__1m_uw',  'counts_1m_uw_1'), 
    ('w_h2o__3m_uw', 'counts_3m_uw_1'), 
    ('w_h2o__10m_uw', 'counts_10m_uw_1'), 
]
ec_lhflux_variables = list(zip(*ec_lhflux_and_counts_variables))[0]

In [None]:
all_lhflux_measurements = tidy_df_5Min[tidy_df_5Min.variable.isin(ec_lhflux_variables)].value
all_lhflux_measurements.mean(), all_lhflux_measurements.std(), all_lhflux_measurements.min(), all_lhflux_measurements.max()

In [None]:
####################################################################################
# Remove all data points at once - perform both steps 1 and 2 simultaneously
####################################################################################
# for flux_var, counts_var in ec_lhflux_and_counts_variables:
#     print(flux_var, counts_var)
#     counts_src = tidy_df_5Min[tidy_df_5Min.variable == counts_var]
#     times_with_good_data_50percent = counts_src[counts_src.value >= 5400].time
#     n_before_dropping = len(tidy_df_5Min.loc[(tidy_df_5Min['variable'] == flux_var)].dropna())
#     tidy_df_5Min.loc[
#         (~tidy_df_5Min['time'].isin(times_with_good_data_50percent)) &
#         (tidy_df_5Min['variable'] == flux_var),
#         'value'
#     ] = np.nan
#     n_after_step_1 = len(tidy_df_5Min.loc[(tidy_df_5Min['variable'] == flux_var)].dropna())

#     variable_src = tidy_df_5Min[tidy_df_5Min.variable == flux_var]
#     times_with_outofbounds_values = variable_src[np.abs(variable_src.value) > 1].time
#     tidy_df_5Min.loc[
#         (tidy_df_5Min['time'].isin(times_with_outofbounds_values)) & 
#         (tidy_df_5Min['variable'] == flux_var),
#         'value'
#     ] = np.nan
#     n_after_step_2 = len(tidy_df_5Min.loc[(tidy_df_5Min['variable'] == flux_var)].dropna())
#     print(n_before_dropping, n_after_step_1, n_after_step_2)
#     print(round((n_before_dropping-n_after_step_2)/n_before_dropping, 3))

####################################################################################
# Perform steps 1 and 2 separately 
####################################################################################
for flux_var, counts_var in ec_lhflux_and_counts_variables:
    counts_src = tidy_df_5Min[tidy_df_5Min.variable == counts_var]
    times_with_good_data_50percent = counts_src[counts_src.value >= 5400].time
    tidy_df_5Min.loc[
        (~tidy_df_5Min['time'].isin(times_with_good_data_50percent)) &
        (tidy_df_5Min['variable'] == flux_var),
        'value'
    ] = np.nan

all_lhflux_measurements = tidy_df_5Min[tidy_df_5Min.variable.isin(ec_lhflux_variables)].value
mean = all_lhflux_measurements.mean() 
stddev = all_lhflux_measurements.std()
print(mean, stddev, all_lhflux_measurements.min(), all_lhflux_measurements.max())

for flux_var, counts_var in ec_lhflux_and_counts_variables:
    variable_src = tidy_df_5Min[tidy_df_5Min.variable == flux_var]
    times_with_outofbounds_values = variable_src[
        ((variable_src.value) > (mean + 5*stddev)) |
        ((variable_src.value) < (mean - 5*stddev))
    ].time
    tidy_df_5Min.loc[
        (tidy_df_5Min['time'].isin(times_with_outofbounds_values)) & 
        (tidy_df_5Min['variable'] == flux_var),
        'value'
    ] = np.nan

In [None]:
all_lhflux_measurements = tidy_df_5Min[tidy_df_5Min.variable.isin(ec_lhflux_variables)].value
print(all_lhflux_measurements.mean(), all_lhflux_measurements.std(), all_lhflux_measurements.min(), all_lhflux_measurements.max())

# Open Model Ensemble Dataset

In [None]:
model_df = pd.read_parquet("model_results.parquet")
# add a bunch of columns that are descriptive, from the config column which has multiple bits of info
model_df['z0'] = model_df['config'].apply(
    lambda v: float(v.split(' ')[-1])
)
model_df['e_sat_curve'] = model_df['config'].apply(
    lambda v: 'metpy' if 'metpy' in v else 'alduchov'
)
model_df['surface_measurement'] = model_df['config'].apply(
    lambda v: v.split(' ')[-3]
)
model_df['scheme'] = model_df['config'].apply(
    lambda v: 'andreas' if 'andreas lengths' in v else 'yang'
)
model_df['most_config'] = model_df['config'].apply(lambda s: ' '.join(s.split(' ')[:-3]))
# remove the scalar roughness length parameterization info 
model_df['most_config'] = model_df['most_config'].str.replace(' andreas lengths', '')
model_df.head()

### Handle a pesky outlier

In [None]:
model_df.loc[(model_df.time == "2023-01-22 1400") & (model_df.surface_measurement == 'Tsurf_d'), 'latent heat flux'] = 0
model_df.loc[(model_df.time == "2023-01-22 1400") & (model_df.surface_measurement == 'Tsurf_d'), 'sensible heat flux'] = 0

# Calculate cumulative sublimation (mm)

## EC Measurements

In [None]:
from sublimpy import tidy
import metpy.constants

In [None]:
ec_lhflux_and_counts_variables

In [None]:
ec_lhflux_variables = list(zip(*ec_lhflux_and_counts_variables))[0]
seconds_per_5min = 60*5
for variable in ec_lhflux_variables:
    height = int(variable.split('_')[-2].split('m')[0])
    tower = variable.split('_')[-1]
    # print(len(tidy_df_5Min.query(f"variable == '{variable}'")))
    # print(len(np.nancumsum(tidy_df_5Min.query(f"variable == '{variable}'")['value']*seconds_per_5min)/metpy.constants.density_water.magnitude,))
    tidy_df_5Min = tidy.tidy_df_add_variable(
        tidy_df_5Min,
        np.nancumsum(tidy_df_5Min.query(f"variable == '{variable}'")['value']*seconds_per_5min)/metpy.constants.density_water.magnitude,
        f"cumulative_sub_measured_{height}m_{tower}",
        "Cumulative sublimation measured",  
        height,
        tower
    )

## Model Results

In [None]:
src = model_df.set_index('time').loc["20230211":"20230211"]
src_day =  src.loc["20230211 0800":"20230211 1600"]
src_day['latent heat flux (mm)'] = src_day['latent heat flux']* seconds_per_30min/density_water.magnitude
src_daytime_sublimation_total = pd.DataFrame(src_day.groupby("config")['latent heat flux (mm)'].sum()).reset_index()
src_daytime_sublimation_total['z0'] = src_daytime_sublimation_total['config'].apply(lambda s: s.split(' ')[-1])

In [None]:
src_daytime_sublimation_total['latent heat flux (mm)'].median()

In [None]:
(
    alt.Chart(src_daytime_sublimation_total).mark_bar().encode(
        alt.X("latent heat flux (mm)").bin(maxbins=40).title("Sublimation (mm)"),
        alt.Y("count()")
    )
    +
    alt.Chart(src_daytime_sublimation_total).mark_rule(color='red', strokeWidth=2).encode(
        alt.X("median(latent heat flux (mm))")
    )
).configure_axis(grid=False).properties(width=250, height = 150).display(renderer='svg')

In [None]:
model_df_cumsum = pd.DataFrame(model_df.sort_values("time").set_index(
    ["time", "config", "scheme", "z0", "e_sat_curve", "surface_measurement", "most_config"]
).groupby(["config", "scheme", "z0", "e_sat_curve", "surface_measurement", "most_config"])['latent heat flux'].cumsum()).reset_index()
model_df_cumsum_daily = pd.DataFrame(model_df_cumsum.set_index("time").groupby(
    ['config', "scheme", "z0", "e_sat_curve", "surface_measurement", "most_config", pd.Grouper(freq='1440Min')]
)['latent heat flux'].max()).reset_index()

model_df_cumsum_daily = model_df_cumsum_daily[
    model_df_cumsum_daily.time < '2023-05-10'
][
    model_df_cumsum_daily.time >= '2022-11-30'
]

from metpy.constants import density_water
seconds_per_30min = 60*30
model_df_cumsum_daily['latent heat flux (mm)'] = model_df_cumsum_daily['latent heat flux'].values * seconds_per_30min/density_water.magnitude

In [None]:
measured_df_cumsum_daily = pd.DataFrame(tidy_df_5Min.query("measurement == 'Cumulative sublimation measured'").set_index('time').groupby(
    ['variable', 'measurement', 'tower', 'height', pd.Grouper(freq='1440Min')]
)['value'].max()).reset_index()

In [None]:
measured_df_cumsum_daily.query("height > 1")

In [None]:
measurements_chart = alt.Chart(
    measured_df_cumsum_daily.query("height > 1")
).mark_line(opacity=0.5, strokeWidth=1).encode(
    alt.X("time:T"),
    alt.Y("value:Q").title(["Cumulative sublimation (mm)"]).scale(domain=[-5, 150]),
    detail = 'variable:N',
    tooltip = 'variable'
).properties(width = 250, height = 250)

measurements_chart_grey = alt.Chart(
    measured_df_cumsum_daily.query("height > 1")
).mark_line(opacity=0.5, strokeWidth=1, color='grey').encode(
    alt.X("time:T"),
    alt.Y("value:Q").title(["Cumulative sublimation (mm)"]).scale(domain=[-5, 150]),
    detail = 'variable:N',
    tooltip = 'variable'
).properties(width = 250, height = 250)


measurements_chart | measurements_chart_grey

In [None]:
src = model_df_cumsum_daily.query("z0 <= 1.e-03").dropna()
src[src.time == src.time.max()].sort_values("latent heat flux (mm)").head(20)

In [None]:
models_chart = alt.Chart(
    model_df_cumsum_daily.query("z0 <= 1.e-03").dropna()
).mark_line(opacity = 0.5, color='grey', strokeWidth=0.2).encode(
    alt.X("time:T"),
    alt.Y("latent heat flux (mm)").scale(domain=[-5, 150]),
    detail = 'config',
    tooltip = 'config'
)
models_chart

In [None]:
snowpillow_chart = alt.Chart(
    tidy_df_30Min.query("variable == 'SWE_p2_c'").dropna()
).transform_window(
    frame = [-48, 48],
    rolling_median = 'median(value)'
).mark_line().encode(
    alt.X("time:T").axis(labels=False).title(None),
    alt.Y("rolling_median:Q").title(["Snow water", "equivalent (mm)"])
).properties(width = 250, height = 83)

# snowpillow_and_precip_chart = snowpillow_chart +\
# alt.Chart(
#     precip_df
# ).mark_line(strokeDash=[2,4]).encode(
#     alt.X('date:T'),
#     alt.Y("acc_prec")
# )

In [None]:
plot1 = (
    snowpillow_chart &
    (measurements_chart)
)
plot1.save('/Users/elischwat/Downloads/meas_vs_model_plot1.png', ppi=400)
plot1

In [None]:
plot2 = (
    snowpillow_chart &
    (measurements_chart + models_chart)
)
plot2.save('/Users/elischwat/Downloads/meas_vs_model_plot2.png', ppi=400)
plot2

In [None]:
models_chart_by_z0 = alt.Chart(
    model_df_cumsum_daily[
        ~model_df_cumsum_daily.most_config.str.contains("Louis")
    ][
        ~model_df_cumsum_daily.most_config.str.contains("Standard")
    ][
        model_df_cumsum_daily.z0.isin([1.e-05, 1.e-04, 1.e-03])
    ].dropna()
).mark_area(opacity=0.3).encode(
    alt.X("time:T"),
    alt.Y("min(latent heat flux (mm))").scale(domain=[-5, 150]),
    alt.Y2("max(latent heat flux (mm))"),
    alt.Color('z0:N', legend=None).scale(
        # domain = [1.e-03, 1.e-04,  1.e-05],
        # range = ['#ff7f0e', '#2ca02c', '#d62728']
    ),
)


plot3 = (
    snowpillow_chart &
    (measurements_chart_grey + models_chart_by_z0)
)
plot3.save('/Users/elischwat/Downloads/meas_vs_model_plot3.png', ppi=400)
plot3

In [None]:
models_chart_by_ts_meas = alt.Chart(
    model_df_cumsum_daily[model_df_cumsum_daily.config.isin([
        'MO Holtslag de Bruin andreas lengths Tsurf_rad_d e_sat_alduchov 1e-05',
        'MO Holtslag de Bruin andreas lengths Tsurf_d e_sat_alduchov 1e-05',
    ])].dropna()
).mark_line().encode(
    alt.X("time:T"),
    alt.Y("latent heat flux (mm)").scale(domain=[-5, 150]),
    alt.Color('config:N', legend=None),
    # detail = 'config'
)


plot4 = (
    snowpillow_chart &
    (measurements_chart_grey + models_chart_by_ts_meas)
)
plot4.save('/Users/elischwat/Downloads/meas_vs_model_plot4.png', ppi=400)
plot4

## Plot Pot. Virt. Temp Profile and LH Flux profiles

In [None]:



src = tidy_df_5Min.set_index('time').loc['202302111200': '202302111230']
src_pvt = src[src.measurement.isin([
        'potential virtual temperature',
        'surface potential virtual temperature',
])].query(
    "tower == 'c'"
)
src_lhflux = src[src.measurement == 'w_h2o_'].query(
    "tower == 'c'"
)

lhflux_chart = alt.Chart(src_lhflux.reset_index()).mark_line(
    point=True
).encode(
    alt.X("mean(value):Q").sort('-y').title("Sublimation/Latent Heat Flux (g/m²/s)").scale(domain = [-0.001, 0.01]),
    alt.Y("height:Q").title('Height (m)'),
    color=alt.value("black")
    # alt.Facet("time:T").header(format='%H%m')
).properties(width=150, height=150)

pvt_chart = alt.Chart(src_pvt.reset_index()).mark_line(
    point=True
).encode(
    alt.X("mean(value):Q").sort('-y').scale(domain=[23,26]).title("Potential Virtual Temperature (˚C)"),
    alt.Y("height:Q").title('Height (m)'),
    color=alt.value("#ff7f0e")
    # alt.Facet("time:T").header(format='%H%m')
).properties(width=150, height=150)




In [None]:
(lhflux_chart).configure_axis(
    grid=False
).configure_view(
    strokeWidth=0
).display(renderer='svg')
(lhflux_chart + pvt_chart).resolve_scale(
    x='independent'
).configure_axis(
    grid=False
).configure_view(
    strokeWidth=0
).configure_axisTop(
  labelColor='#ff7f0e',
  titleColor='#ff7f0e'
).configure_axisBottom(
  labelColor='black',
  titleColor='black'
).display(renderer='svg')