In [1]:
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

# Open SOS Measurement Dataset

In [2]:
start_date = '20221130'
end_date = '20230509'
# open files
tidy_df_5Min = pd.read_parquet(f'../sos/tidy_df_{start_date}_{end_date}_noplanar_fit_clean.parquet')
# tidy_df_30Min = pd.read_parquet(f'../sos/tidy_df_30Min_{start_date}_{end_date}_noplanar_fit.parquet')
# convert time column to datetime
tidy_df_5Min['time'] = pd.to_datetime(tidy_df_5Min['time'])
# tidy_df_30Min['time'] = pd.to_datetime(tidy_df_30Min['time'])
# limit data to our dates of interest, based on continuous snow cover at Kettle Ponds
tidy_df_5Min = tidy_df_5Min.set_index('time').sort_index().loc[start_date:end_date].reset_index()
# tidy_df_30Min = tidy_df_30Min.set_index('time').sort_index().loc[start_date:end_date].reset_index()

In [3]:
# quick way to get variable info if we want it 
# import xarray as xr
# ds = xr.open_dataset("/storage/elilouis/sublimationofsnow/sosnoqc/isfs_20221228.nc")
# ds['SWE_p2_c']

# Create dataset labeled by blowing snow/not

In [4]:
blowing_snow_data = tidy_df_5Min[
    tidy_df_5Min.time.isin(
        tidy_df_5Min[tidy_df_5Min.variable.isin(['SF_avg_1m_ue', 'SF_avg_2m_ue'])].query(
            f"value > 0"
        ).time
    )
]
calm_data = tidy_df_5Min[
    ~ tidy_df_5Min.time.isin(
        tidy_df_5Min[tidy_df_5Min.variable.isin(['SF_avg_1m_ue', 'SF_avg_2m_ue'])].query(f"value > 0").time
    )
]

tidy_5min_by_blowing_df = pd.concat([
    blowing_snow_data.assign(type = 'blowing snow'),
    calm_data.assign(type = 'clear')
])

In [5]:
tidy_5min_by_blowing_lh_flux_df = tidy_5min_by_blowing_df[
    tidy_5min_by_blowing_df["variable"].isin(['w_h2o__3m_c', 'spd_3m_c', 'Rsw_in_9m_d', 'Rnet_9m_d', 'T_3m_c', 'tke_3m_c'])
]
tidy_5min_by_blowing_lh_flux_df = tidy_5min_by_blowing_lh_flux_df.pivot_table(
    index=['time','type'],
    values='value',
    columns='variable'
).reset_index()

# add convenience time columns
tidy_5min_by_blowing_lh_flux_df['time_no_date'] = tidy_5min_by_blowing_lh_flux_df['time'].apply(
    lambda x: x.replace(year=2023, month=1, day=1)
)
tidy_5min_by_blowing_lh_flux_df['month'] = tidy_5min_by_blowing_lh_flux_df['time'].dt.month
tidy_5min_by_blowing_lh_flux_df['date'] = tidy_5min_by_blowing_lh_flux_df['time'].dt.date

In [7]:
src = tidy_5min_by_blowing_lh_flux_df.groupby(["time_no_date", "month", "type"]).mean(
    numeric_only=True
).reset_index()

alt.Chart(src).transform_filter(
    (alt.datum.month != 11) & (alt.datum.month != 5)
).transform_window(
    frame = [-6, 6],
    rolling_mean = "mean(w_h2o__3m_c)",
    groupby = ['month', 'type']
).mark_line().encode(
    alt.X("time_no_date:T", title='Time of day'),
    alt.Y("rolling_mean:Q", title=['LH Flux (g/m^2/s)','(1 hour rolling avg)']),
    alt.Color("month:O", sort=[12,1,2,3,4,5]).scale(scheme='turbo'),
    alt.Row("type:N", title=None)
).properties(
    width=300,
    height=150,
    title=['Daily average LH fluxes during each month,','separated by blowing snow and calm conditions']
)