In [1]:
from sublimpy import utils, variables, tidy
import pandas as pd
import altair as alt
import numpy as np

In [2]:
stossel_df = pd.read_csv("Stossel_box_weights - Sheet1.csv")[['date_time', 'delta_box_1', 'delta_box_2']]
stossel_df['time'] = pd.to_datetime(stossel_df['date_time'])

In [3]:
line_base = alt.Chart().transform_calculate(
    a = '0'
).mark_rule()
alt.Chart(stossel_df).mark_circle().encode(
    alt.X("delta_box_1:Q").scale(domain=[-100, 100]),
    alt.Y("delta_box_2:Q").scale(domain=[-100, 100])
) + line_base.encode(x='a:Q') + line_base.encode(y='a:Q')

In [4]:


# filter outliers
stossel_df = stossel_df[np.abs(stossel_df.delta_box_1) < 70]
stossel_df = stossel_df[np.abs(stossel_df.delta_box_2) < 70]
stossel_df['delta_box_mean'] = np.mean([stossel_df['delta_box_1'], stossel_df['delta_box_2']], axis=0)

stossel_df['delta_box_mean_mm'] = 1000 * stossel_df['delta_box_mean'] / 997000  # 1000(mm/m)*(∆mass in g/m^2) / (h2o density in g/m^3) ==> 

In [5]:
line_base = alt.Chart().transform_calculate(
    a = '0'
).mark_rule()
alt.Chart(stossel_df).mark_circle().encode(
    alt.X("delta_box_1:Q").scale(domain=[-100, 100]),
    alt.Y("delta_box_2:Q").scale(domain=[-100, 100])
) + line_base.encode(x='a:Q') + line_base.encode(y='a:Q')


50 grams of water sublimated

there are 997000 g/m^3 of water.

That means 50 / 997000 is the volume of water that sublimated.

# Get sos data

In [6]:
sos_ds = utils.download_sos_data(
    stossel_df['time'].min().strftime('%Y%m%d'),
    stossel_df['time'].max().strftime('%Y%m%d'),
    variable_names=variables.DEFAULT_VARIABLES,
    local_download_dir='/data2/elilouis/sublimationofsnow/sosnoqc/',
    cache=True
)

Caching...skipping download for 20230201
Caching...skipping download for 20230202
Caching...skipping download for 20230203
Caching...skipping download for 20230204
Caching...skipping download for 20230205
Caching...skipping download for 20230206
Caching...skipping download for 20230207
Caching...skipping download for 20230208
Caching...skipping download for 20230209
Caching...skipping download for 20230210
Caching...skipping download for 20230211
Caching...skipping download for 20230212
Caching...skipping download for 20230213
Caching...skipping download for 20230214
Caching...skipping download for 20230215
Caching...skipping download for 20230216
Caching...skipping download for 20230217
Caching...skipping download for 20230218


In [7]:
tidy_df = tidy.get_tidy_dataset(sos_ds, list(sos_ds.data_vars))

## Calculate daily sublimation with the sos datasets, exclude measurements from hours 18-7

In [8]:
daily_sublimation_df = tidy_df[tidy_df['variable'] == 'w_h2o__3m_c'][(tidy_df.time.dt.hour > 7) & (tidy_df.time.dt.hour < 18)].set_index('time').groupby([pd.Grouper(freq='24H'), 'variable', 'height', 'tower', 'measurement']).mean().reset_index()
daily_sublimation_df['value'] = daily_sublimation_df['value']*11*60*60
daily_sublimation_df['value'] = 1000 * daily_sublimation_df['value'] / 997000  # 1000(mm/m)*(∆mass in g/m^2) / (h2o density in g/m^3) ==> 

  daily_sublimation_df = tidy_df[tidy_df['variable'] == 'w_h2o__3m_c'][(tidy_df.time.dt.hour > 7) & (tidy_df.time.dt.hour < 18)].set_index('time').groupby([pd.Grouper(freq='24H'), 'variable', 'height', 'tower', 'measurement']).mean().reset_index()


# Clean stossel data - remove nighttime measurements

In [9]:
stossel_df = stossel_df[pd.to_datetime(stossel_df['date_time']).dt.hour > 12]
stossel_df['time'] = stossel_df['time'].apply(lambda dt: dt.replace(hour=0, minute=0))
stossel_df = stossel_df.assign(
        measurement = 'manual lysimeter',
        delta_box_mean_mm = - stossel_df['delta_box_mean_mm']
).rename(columns={'delta_box_mean_mm':'value'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stossel_df['time'] = stossel_df['time'].apply(lambda dt: dt.replace(hour=0, minute=0))


In [10]:
src = pd.concat([
    stossel_df[['time', 'measurement', 'value']],
    daily_sublimation_df[['time', 'measurement', 'value']]
])

time_series_bar_chart = alt.Chart(src).mark_bar().encode(
        alt.X("date(time):O").title("Day of February"),
        alt.Y("value:Q").title("Sublimation (SWE, mm)"),
        alt.XOffset("measurement:N"),
        alt.Color("measurement:N")
    ).properties(width=600)

scatter_chart = alt.Chart(stossel_df.assign(day = stossel_df.time.dt.day)).mark_point(size=50).encode(
    alt.X("delta_box_1:Q").scale(domain=[-60, 5]).title("Box 1 (g/m^2)"),
    alt.Y("delta_box_2:Q").scale(domain=[-60, 5]).title("Box 2 (g/m^2)"),
    alt.Shape("day:N").title("Day of February")
)

onetoone_line_chart =  alt.Chart(pd.DataFrame({
    'x':np.linspace(-60,5, 10), 
    'y':np.linspace(-60,5, 10)
})).mark_line(color='grey').encode(
    x = 'x',
    y = 'y'
)

(
    time_series_bar_chart.properties(height = 150,width=300) | (onetoone_line_chart + scatter_chart).properties(height = 150, width=150)
).resolve_scale(color='independent', shape='independent').configure_legend(orient='top', columns=4)