In [2]:
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

# Open SOS Measurement Dataset

In [3]:
start_date = '20221130'
end_date = '20230509'
# open files
tidy_df_5Min = pd.read_parquet('../sos/tidy_df_20221130_20230517_noplanar_fit.parquet')
tidy_df_30Min = pd.read_parquet('../sos/tidy_df_30Min_20221130_20230517_noplanar_fit.parquet')
# convert time column to datetime
tidy_df_5Min['time'] = pd.to_datetime(tidy_df_5Min['time'])
tidy_df_30Min['time'] = pd.to_datetime(tidy_df_30Min['time'])
# limit data to our dates of interest, based on continuous snow cover at Kettle Ponds
tidy_df_5Min = tidy_df_5Min.set_index('time').loc[start_date:end_date].reset_index()
tidy_df_30Min = tidy_df_30Min.set_index('time').loc[start_date:end_date].reset_index()

  tidy_df_5Min = tidy_df_5Min.set_index('time').loc[start_date:end_date].reset_index()


In [4]:
tke_src = tidy_df_5Min[tidy_df_5Min.measurement.isin([
    'turbulent kinetic energy',
    'wind speed'
])].query("tower == 'c'")

In [5]:
tke_wind_src = tke_src.pivot_table(
    index = ['time', 'tower', 'height'],
    columns = 'measurement',
    values = 'value'
)
tke_wind_src.columns = tke_wind_src.columns.to_flat_index()
tke_wind_src = tke_wind_src.reset_index()
tke_wind_src
tke_wind_src['turbulent intensity'] = tke_wind_src['turbulent kinetic energy']**0.5

In [6]:
calm_winter = alt.Chart(
    tke_wind_src.set_index('time').loc['20230131': '20230202'].reset_index(),
    title = 'calm_winter'
).transform_filter(
    alt.FieldOneOfPredicate('height', [3, 10, 20])
).mark_errorband(opacity=0.5).encode(
    alt.X("wind speed:Q").bin(True),
    alt.Y("turbulent intensity:Q"),
    alt.Color("height:N")
).properties(width = 200, height = 200)

blowingsnow_winter = alt.Chart(
    tke_wind_src.set_index('time').loc['20230219': '20230222'].reset_index(),
    title = 'blowingsnow_winter'
).transform_filter(
    alt.FieldOneOfPredicate('height', [3, 10, 20])
).mark_errorband().encode(
    alt.X("wind speed:Q").bin(True),
    alt.Y("turbulent intensity:Q"),
    alt.Color("height:N")
).properties(width = 200, height = 200)

calm_spring = alt.Chart(
    tke_wind_src.set_index('time').loc['20230316': '20230319'].reset_index(),
    title = 'calm_spring'
).transform_filter(
    alt.FieldOneOfPredicate('height', [3, 10, 20])
).mark_errorband().encode(
    alt.X("wind speed:Q").bin(True),
    alt.Y("turbulent intensity:Q"),
    alt.Color("height:N")
).properties(width = 200, height = 200)

(calm_winter | blowingsnow_winter | calm_spring).resolve_scale(y='shared', x='shared')

In [7]:
calm_winter = alt.Chart(
    tke_wind_src.set_index('time').loc['20230131': '20230202'].reset_index(),
    title = 'calm_winter'
).transform_filter(
    alt.FieldOneOfPredicate('height', [3, 10, 20])
).mark_circle(opacity = 0.5, size=5 ).encode(
    alt.X("wind speed:Q"),
    alt.Y("turbulent intensity:Q"),
    alt.Color("height:N")
).properties(width = 200, height = 200)

blowingsnow_winter = alt.Chart(
    tke_wind_src.set_index('time').loc['20230219': '20230222'].reset_index(),
    title = 'blowingsnow_winter'
).transform_filter(
    alt.FieldOneOfPredicate('height', [3, 10, 20])
).mark_circle(opacity = 0.5, size=5 ).encode(
    alt.X("wind speed:Q"),
    alt.Y("turbulent intensity:Q"),
    alt.Color("height:N")
).properties(width = 200, height = 200)

calm_spring = alt.Chart(
    tke_wind_src.set_index('time').loc['20230316': '20230319'].reset_index(),
    title = 'calm_spring'
).transform_filter(
    alt.FieldOneOfPredicate('height', [3, 10, 20])
).mark_circle(opacity = 0.5, size=5 ).encode(
    alt.X("wind speed:Q"),
    alt.Y("turbulent intensity:Q"),
    alt.Color("height:N")
).properties(width = 200, height = 200)

(calm_winter | blowingsnow_winter | calm_spring).resolve_scale(y='shared', x='shared')

# Find days with net sublimation in the winter and days without net sublimation in the winter

In [8]:
mean_daily_sublimation = tidy_df_30Min.query("variable == 'w_h2o__3m_c'")[
    ['time', 'value']
].set_index('time').groupby(pd.Grouper(freq='1440Min')).mean().reset_index()
mean_daily_sublimation['net sublimation'] = mean_daily_sublimation['value'] > 0
alt.Chart(mean_daily_sublimation).mark_tick().encode(x='time:T', y = 'net sublimation:N', color='net sublimation:N')

In [9]:
winter_days = mean_daily_sublimation[mean_daily_sublimation.time.dt.month.isin([12,1])]
winter_days = winter_days[winter_days['net sublimation']]
winter_days['time'] = winter_days['time'].dt.date

spring_days = mean_daily_sublimation[mean_daily_sublimation.time.dt.month.isin([3,4])]
spring_days = spring_days[spring_days['net sublimation']]
spring_days['time'] = spring_days['time'].dt.date

In [10]:
top_winter_days = winter_days.query(f"value > {winter_days['value'].quantile(0.75)}")
top_spring_days = spring_days.query(f"value > {spring_days['value'].quantile(0.75)}")

In [11]:
winter_sublimation_days = alt.Chart(
    tke_wind_src[tke_wind_src.time.dt.date.isin(top_winter_days.time)],
    title = 'winter sublimation days'
).transform_filter(
    alt.FieldOneOfPredicate('height', [3, 10, 20])
).mark_errorband(opacity=0.5).encode(
    alt.X("wind speed:Q").bin(True),
    alt.Y("turbulent intensity:Q"),
    alt.Color("height:N")
).properties(width = 200, height = 200)

spring_sublimation_days = alt.Chart(
    tke_wind_src[tke_wind_src.time.dt.date.isin(top_spring_days.time)],
    title = 'spring sublimation days'
).transform_filter(
    alt.FieldOneOfPredicate('height', [3, 10, 20])
).mark_errorband(opacity=0.5).encode(
    alt.X("wind speed:Q").bin(True),
    alt.Y("turbulent intensity:Q"),
    alt.Color("height:N")
).properties(width = 200, height = 200)


winter_sublimation_days | spring_sublimation_days

In [12]:
top_winter_sub_days = alt.Chart(
    tke_wind_src[tke_wind_src.time.dt.date.isin(top_winter_days.time)],
    title = 'top winter sublimation days'
).transform_filter(
    alt.FieldOneOfPredicate('height', [3, 10, 20])
# ).mark_circle(opacity = 0.5, size=5 ).encode(
).mark_errorband(opacity=0.5).encode(
    # alt.X("wind speed:Q"),
    alt.X("wind speed:Q").bin(True),
    alt.Y("turbulent intensity:Q"),
    alt.Color("height:N")
).properties(width = 200, height = 200)

top_spring_sub_days = alt.Chart(
    tke_wind_src[tke_wind_src.time.dt.date.isin(top_spring_days.time)],
    title = 'top spring sublimation days'
).transform_filter(
    alt.FieldOneOfPredicate('height', [3, 10, 20])
# ).mark_circle(opacity = 0.5, size=5 ).encode(
).mark_errorband(opacity=0.5).encode(
    # alt.X("wind speed:Q"),
    alt.X("wind speed:Q").bin(True),
    alt.Y("turbulent intensity:Q"),
    alt.Color("height:N")
).properties(width = 200, height = 200)

(top_winter_sub_days | top_spring_sub_days).resolve_scale(y='shared', x='shared')

In [13]:
top_winter_sub_days = alt.Chart(
    tke_wind_src[tke_wind_src.time.dt.date.isin(top_winter_days.time)],
    title = 'top winter sublimation days'
).transform_filter(
    alt.FieldOneOfPredicate('height', [3, 10, 20])
).mark_circle(opacity = 0.5, size=5 ).encode(
    alt.X("wind speed:Q"),
    alt.Y("turbulent intensity:Q"),
    alt.Color("height:N")
).properties(width = 200, height = 200)

top_spring_sub_days

top_spring_sub_days = alt.Chart(
    tke_wind_src[tke_wind_src.time.dt.date.isin(top_spring_days.time)],
    title = 'top spring sublimation days'
).transform_filter(
    alt.FieldOneOfPredicate('height', [3, 10, 20])
).mark_circle(opacity = 0.5, size=5 ).encode(
    alt.X("wind speed:Q"),
    alt.Y("turbulent intensity:Q"),
    alt.Color("height:N")
).properties(width = 200, height = 200)

(top_winter_sub_days | top_spring_sub_days).resolve_scale(y='shared', x='shared')

In [14]:
(alt.Chart(
    tke_wind_src[tke_wind_src.time.dt.date.isin(top_winter_days.time)],
    title = 'Winter Days'
).mark_bar().encode(
    alt.X("wind speed:Q").bin(True, maxbins=10),
    alt.Y("count():Q")
).properties(height = 150) & alt.Chart(
    tke_wind_src[tke_wind_src.time.dt.date.isin(top_spring_days.time)],
    title = 'Spring Days'
).mark_bar().encode(
    alt.X("wind speed:Q").bin(True, maxbins=10),
    alt.Y("count():Q")
).properties(height = 150)).resolve_scale(x='shared')

# Look at vertical profiles of LH flux on days with/without sublimation!

In [110]:
src = tidy_df_30Min[
    ~ tidy_df_30Min.variable.str.contains('predicted')
].query("measurement == 'w_h2o_'").query("tower == 'c'")
src = src.set_index('time').groupby([pd.Grouper(freq="60Min"), 'measurement', 'variable', 'height']).mean().reset_index()
src = src[src.time.dt.hour.isin([0,4,8,12,16,20])]
src['hour'] = src.time.dt.hour
src['day'] = src.time.dt.day

  src = src.set_index('time').groupby([pd.Grouper(freq="60Min"), 'measurement', 'variable', 'height']).mean().reset_index()


In [111]:
import datetime as dt


In [158]:
(alt.Chart(
    tidy_df_30Min.set_index('time').loc["20230206": "20230212"].query("measurement == 'snow flux'").reset_index()
).mark_circle().encode(
    alt.X('time:T'),#.axis(labels=False).title(None),
    alt.Y("value:Q").title("Blowing snow flux (g/m²/s)")
).properties(width = 150, height=100) &\
alt.Chart(
    tidy_df_30Min.set_index('time').loc["20230206": "20230212"].query("variable == 'Ri_3m_c'").reset_index()
).transform_window(
    frame = [-3, 3],
    rolling_avg = 'median(value)'
).mark_line().encode(
    alt.X('time:T'),
    alt.Y("rolling_avg:Q").title("Ri (3m)")
).properties(width = 150, height=100)).display(renderer='svg')


In [155]:
alt.Chart(
    src[src.hour.isin([12])][src.day.isin([6,9,7,11])].set_index("time").loc["20230206": "20230212"].query("height != 15")
).mark_line().encode(
    alt.X("value:Q").sort('-y').title("Hₗ (g/m²/s)"),
    alt.Y("height:Q"),
    # alt.Color("hour:O").scale(scheme='rainbow'),
    alt.Column("day:O"),
).resolve_scale(x='shared').properties(width=100, height=100).display(renderer='svg')

  src[src.hour.isin([12])][src.day.isin([6,9,7,11])].set_index("time").loc["20230206": "20230212"].query("height != 15")


In [144]:
alt.Chart(
    src[src.hour.isin([4,12])].set_index("time").loc["20230206": "20230207"]
).mark_line().encode(
    alt.X("value:Q").sort('-y').title("Hₗ (g/m²/s)"),
    alt.Y("height:Q"),
    alt.Color("hour:O").scale(scheme='rainbow'),
    alt.Column("day:O"),
).resolve_scale(x='shared').properties(width=150, height=100)

In [114]:
alt.Chart(mean_daily_sublimation).mark_boxplot().encode(
    alt.Y("value:Q")
) | (
    # alt.Chart(mean_daily_sublimation).mark_circle().encode(
    #     alt.X("time:T"),
    #     alt.Y("value:Q")
    # ).properties(width = 600) + 
    alt.Chart(mean_daily_sublimation).mark_rule().encode(
        alt.Y("median(value):Q")
    ).properties(width = 600) + 
    alt.Chart(mean_daily_sublimation).transform_calculate(
        in_lower_quartile = alt.datum.value < mean_daily_sublimation.value.quantile(0.25)
    ).mark_circle().encode(
        alt.X("time:T"),
        alt.Y("value:Q"),
        alt.Color("in_lower_quartile:N"),
        tooltip = 'time'
    ).properties(width = 600)
)

Plot mean LH flux profile of days in the bottom quartile of mean sublimation values

In [115]:
sz = mean_daily_sublimation['value'].size-1
mean_daily_sublimation['PCNT_LIN'] = mean_daily_sublimation['value'].rank(method='max').apply(lambda x: 100.0*(x-1)/sz)
mean_daily_sublimation.iloc[60:].head(25)

Unnamed: 0,time,value,net sublimation,PCNT_LIN
60,2023-01-29,0.001748,True,51.25
61,2023-01-30,0.003163,True,70.0
62,2023-01-31,0.000385,True,25.625
63,2023-02-01,-3.1e-05,False,13.125
64,2023-02-02,8.8e-05,True,18.125
65,2023-02-03,-0.000226,False,6.875
66,2023-02-04,0.001596,True,50.0
67,2023-02-05,0.00471,True,80.625
68,2023-02-06,0.002891,True,64.375
69,2023-02-07,0.000215,True,23.75


In [116]:
from sublimpy import tidy
combined_blowing_snow_flux = tidy_df_30Min.query("variable == 'SF_avg_1m_ue'").value.values + tidy_df_30Min.query("variable == 'SF_avg_2m_ue'").value.values 

tidy_df_30Min = tidy.tidy_df_add_variable(
    tidy_df_30Min,
    combined_blowing_snow_flux,
    'SF_avg_ue',
    'snow flux',
    np.nan,
    'ue'
)

daily_max_blowing_snow_measurement = tidy_df_30Min.query("variable == 'SF_avg_ue'")[
    ['time', 'value']
].set_index('time').groupby(pd.Grouper(freq='1440Min')).max().reset_index()

blowing_snow_days = daily_max_blowing_snow_measurement.query("value > 0").time
nobs_days = daily_max_blowing_snow_measurement.query("value == 0").time

In [117]:
nobs_mean_daily_sublimation = mean_daily_sublimation[mean_daily_sublimation.time.isin(nobs_days)]
bs_mean_daily_sublimation = mean_daily_sublimation[mean_daily_sublimation.time.isin(blowing_snow_days)]

In [118]:
lower_quartile_sublimation_days = mean_daily_sublimation[
    mean_daily_sublimation['value'] < mean_daily_sublimation.value.quantile(0.25)
].time
middle_two_quartiles_sublimation_days = mean_daily_sublimation[
    (mean_daily_sublimation['value'] >= mean_daily_sublimation.value.quantile(0.25))
    & (mean_daily_sublimation['value'] <= mean_daily_sublimation.value.quantile(0.75))
].time
upper_quartile_sublimation_days = mean_daily_sublimation[
    mean_daily_sublimation['value'] > mean_daily_sublimation.value.quantile(0.75)
].time

lower_quartile_sublimation_days_nobs = nobs_mean_daily_sublimation[
    nobs_mean_daily_sublimation['value'] < nobs_mean_daily_sublimation.value.quantile(0.25)
].time
middle_two_quartiles_sublimation_days_nobs = nobs_mean_daily_sublimation[
    (nobs_mean_daily_sublimation['value'] >= nobs_mean_daily_sublimation.value.quantile(0.25))
    & (nobs_mean_daily_sublimation['value'] <= nobs_mean_daily_sublimation.value.quantile(0.75))
].time
upper_quartile_sublimation_days_nobs = nobs_mean_daily_sublimation[
    nobs_mean_daily_sublimation['value'] > nobs_mean_daily_sublimation.value.quantile(0.75)
].time

lower_quartile_sublimation_days_bs = bs_mean_daily_sublimation[
    bs_mean_daily_sublimation['value'] < bs_mean_daily_sublimation.value.quantile(0.25)
].time
middle_two_quartiles_sublimation_days_bs = bs_mean_daily_sublimation[
    (bs_mean_daily_sublimation['value'] >= bs_mean_daily_sublimation.value.quantile(0.25))
    & (bs_mean_daily_sublimation['value'] <= bs_mean_daily_sublimation.value.quantile(0.75))
].time
upper_quartile_sublimation_days_bs = bs_mean_daily_sublimation[
    bs_mean_daily_sublimation['value'] > bs_mean_daily_sublimation.value.quantile(0.75)
].time

In [119]:
(
    alt.Chart(
        src.groupby(["height", "hour"]).median().reset_index(),
        title=['Median 12pm Hₗ profile', 'for all days']
    ).transform_filter(
        alt.datum.hour == 12
    ).mark_line().encode(
        alt.X("value:Q").sort('-y').title("Hₗ (g/m²/s)"),
        alt.Y("height:Q"),
        # alt.Color("hour:O")
    ).properties(height = 200, width = 100)
    | (
    alt.Chart(
        src[src.time.dt.date.isin(lower_quartile_sublimation_days)].groupby(["height", "hour"]).median().reset_index(),
        title=['Median 12pm Hₗ profile', 'for sublimation days', 'in the lower quartile']
    ).transform_filter(
        alt.datum.hour == 12
    ).mark_line().encode(
        alt.X("value:Q").sort('-y').title("Hₗ (g/m²/s)"),
        alt.Y("height:Q"),
        # alt.Color("hour:O").scale(scheme='turbo')
    ).properties(height = 200, width = 100)
        +
    alt.Chart(
        src[src.time.dt.date.isin(middle_two_quartiles_sublimation_days)].groupby(["height", "hour"]).median().reset_index(),
        title=['Median 12pm Hₗ profile', 'for sublimation days', 'in the middle two quartiles']
    ).transform_filter(
        alt.datum.hour == 12
    ).mark_line().encode(
        alt.X("value:Q").sort('-y').title("Hₗ (g/m²/s)"),
        alt.Y("height:Q"),
        # alt.Color("hour:O").scale(scheme='turbo')
    ).properties(height = 200, width = 100)
    )
).resolve_scale(x='shared')

  src.groupby(["height", "hour"]).median().reset_index(),
  src[src.time.dt.date.isin(lower_quartile_sublimation_days)].groupby(["height", "hour"]).median().reset_index(),
  src[src.time.dt.date.isin(lower_quartile_sublimation_days)].groupby(["height", "hour"]).median().reset_index(),
  src[src.time.dt.date.isin(middle_two_quartiles_sublimation_days)].groupby(["height", "hour"]).median().reset_index(),
  src[src.time.dt.date.isin(middle_two_quartiles_sublimation_days)].groupby(["height", "hour"]).median().reset_index(),


In [140]:
temp_grad_3m_c = tidy_df_30Min.query("variable == 'temp_gradient_3m_c'")
neutral_times = temp_grad_3m_c[np.abs(temp_grad_3m_c.value) < 0.1]

In [121]:
alt.Chart(
        src[src.time.dt.date.isin(lower_quartile_sublimation_days_nobs)].groupby(["height", "hour"]).median().reset_index(),
        title=['Median 12pm Hₗ profile', 'for sublimation days', 'in the middle two quartiles']
    ).transform_filter(
        alt.datum.hour == 12
    ).mark_line().encode(
        alt.X("value:Q").sort('-y').title("Hₗ (g/m²/s)"),
        alt.Y("height:Q"),
        # alt.Color("hour:O").scale(scheme='turbo')
    ).properties(height = 200, width = 100)

  src[src.time.dt.date.isin(lower_quartile_sublimation_days_nobs)].groupby(["height", "hour"]).median().reset_index(),
  src[src.time.dt.date.isin(lower_quartile_sublimation_days_nobs)].groupby(["height", "hour"]).median().reset_index(),


In [122]:
src[src.time.dt.date.isin(middle_two_quartiles_sublimation_days_nobs)]

  src[src.time.dt.date.isin(middle_two_quartiles_sublimation_days_nobs)]


Unnamed: 0,time,measurement,variable,height,value,hour,day


In [124]:
alt.Chart(
        src[src.time.dt.date.isin(upper_quartile_sublimation_days_nobs)].groupby(["height", "hour"]).median().reset_index(),
        title=['Median 12pm Hₗ profile', 'for sublimation days', 'in the middle two quartiles']
    ).transform_filter(
        alt.datum.hour == 12
    ).mark_line().encode(
        alt.X("value:Q").sort('-y').title("Hₗ (g/m²/s)"),
        alt.Y("height:Q"),
        # alt.Color("hour:O").scale(scheme='turbo')
    ).properties(height = 200, width = 100)

  src[src.time.dt.date.isin(upper_quartile_sublimation_days_nobs)].groupby(["height", "hour"]).median().reset_index(),
  src[src.time.dt.date.isin(upper_quartile_sublimation_days_nobs)].groupby(["height", "hour"]).median().reset_index(),
