In [None]:
import xarray as xr
import glob
import os
from sublimpy import utils
import numpy as np 

In [None]:
filepath = "/Users/elischwat/Development/data/sublimationofsnow/sosqc/sos_qc_geo_tiltcor_v20240307/"
files = glob.glob(os.path.join(filepath, "*.nc"))

In [None]:
# sort files and remove datasets before November
files = sorted(files)[3:] # should be [3:]

# Create the hourly dataset

In [None]:
datasets = []
for fn in files:
    ds = xr.open_dataset(fn)
    datasets.append(ds)

In [None]:
# THIS TAKES AN EXTREMELY LONG TIME TO RUN, ~8 Hours
# To make it run more quickly, just grab the variables you want
sos_ds = xr.concat(datasets, dim='time')

In [None]:
sos_ds

In [None]:
sos_ds = utils.fill_missing_timestamps(sos_ds)

In [None]:
sos_10min_ds = sos_ds.resample(time='10Min').mean()
sos_30min_ds = sos_ds.resample(time='30Min').mean()
sos_hourly_ds = sos_ds.resample(time='60Min').mean()
sos_twohourly_ds = sos_ds.resample(time='120Min').mean()

In [None]:
sos_hourly_ds.to_netcdf('sos_hourly_ds.nc')

# Create the 5min dataset (include only eddy covariance latent heat flux measurements)

In [None]:
# datasets = []
# for fn in files:
#     ds = xr.open_dataset(fn)
#     vars_to_grab = []
#     if 'w_h2o__3m_c' in ds:
#         vars_to_grab.append('w_h2o__3m_c')
#     if 'w_h2o__5m_c' in ds:
#         vars_to_grab.append('w_h2o__5m_c')
#     if 'w_h2o__10m_c' in ds:
#         vars_to_grab.append('w_h2o__10m_c')
#     if 'w_h2o__15m_c' in ds:
#         vars_to_grab.append('w_h2o__15m_c')
#     if 'w_h2o__20m_c' in ds:
#         vars_to_grab.append('w_h2o__20m_c')

#     ds = ds[vars_to_grab]
#     datasets.append(ds)

all_vars_i_want = [
        'w_h2o__3m_c',
        'w_h2o__5m_c',
        'w_h2o__10m_c',
        'w_h2o__15m_c',
        'w_h2o__20m_c',
        'w_h2o__3m_ue',
        'w_h2o__10m_ue',
        'w_h2o__3m_uw',
        'w_h2o__10m_uw',
        'w_h2o__3m_ud',
        'w_h2o__10m_ud',

        'w_3m_c',
        'w_5m_c',
        'w_10m_c',
        'w_15m_c',
        'w_20m_c',

        'h2o_3m_c',
        'h2o_5m_c',
        'h2o_10m_c',
        'h2o_15m_c',
        'h2o_20m_c',
    ]

datasets = []
for fn in files:
    ds = xr.open_dataset(fn)
    vars_to_grab = []
    for var in all_vars_i_want:
        if var in ds:
            vars_to_grab.append(var)
    ds = ds[vars_to_grab]
    datasets.append(ds)

In [None]:
sos_ds = xr.concat(datasets, dim='time')

In [None]:
sos_ds = utils.fill_missing_timestamps(sos_ds)

In [None]:
new_values = []

for i in range(0, int(len(sos_ds.time)/12)):
    start_i = i*12
    end_i = (i+1)*12
    cov_values = sos_ds['w_h2o__3m_c'][start_i: end_i].values
    w_values = sos_ds['w_3m_c'][start_i: end_i].values
    h2o_values = sos_ds['h2o_3m_c'][start_i: end_i].values
    new_val  = (
        (1/12)*(cov_values + w_values*h2o_values).sum() 
        - (
            (1/12)*w_values.sum()
            * (1/12)*h2o_values.sum()
        )
    )
    new_values.append(new_val)

Correctly aggregate latent heat flux covariances

In [None]:
sos_df = sos_ds.to_dataframe()

In [None]:
import pandas as pd

In [None]:
sos_df.head()

In [None]:
covariance_variable_sets = [
    ('w_h2o__3m_c', 'w_3m_c', 'h2o_3m_c'),
    ('w_h2o__5m_c', 'w_5m_c', 'h2o_5m_c'),
    ('w_h2o__10m_c', 'w_10m_c', 'h2o_10m_c'),
    ('w_h2o__15m_c', 'w_15m_c', 'h2o_15m_c'),
    ('w_h2o__20m_c', 'w_20m_c', 'h2o_20m_c'),   
]
agg_df_list = []
for cov, w, h2o in covariance_variable_sets:
    agg_df = pd.DataFrame({
        cov: sos_df.groupby(pd.Grouper(freq='60Min')).apply(
            lambda row: 
                (1/12)*(row[cov] + row[w]*row[h2o]).sum()
                - (
                    (1/12)*row[w].sum()
                    * (1/12)*row[h2o].sum()
                )
        )
    })
    agg_df_list.append(agg_df)

In [None]:
combined_agg_df = agg_df_list[0]
for i in range(1, len(agg_df_list)):
    combined_agg_df = combined_agg_df.join(agg_df_list[i])

In [None]:
combined_agg_df

In [None]:
wrong_combined_agg_df = sos_df[[
    'w_h2o__3m_c',
    'w_h2o__5m_c',
    'w_h2o__10m_c',
    'w_h2o__15m_c',
    'w_h2o__20m_c',
]].groupby(pd.Grouper(freq='60Min')).mean()

In [None]:
wrong_combined_agg_df.loc['20221221': '20221224'].reset_index().melt(id_vars='time')

In [None]:
import altair as alt
(
    alt.Chart(
        wrong_combined_agg_df.loc['20221221': '20221224'].reset_index().melt(id_vars='time')
    ).mark_line().encode(
        alt.X("time:T"),
        alt.Y("value:Q").title('Latent heat flux (g/m^2/s)'),
        alt.Color("variable:O").scale(scheme='rainbow', domain=[
                'w_h2o__3m_c',
                'w_h2o__5m_c',
                'w_h2o__10m_c',
                'w_h2o__15m_c',
                'w_h2o__20m_c',
        ])
    ).properties(title = 'Incorrect covariance averages') | alt.Chart(
        combined_agg_df.loc['20221221': '20221224'].reset_index().melt(id_vars='time')
    ).mark_line().encode(
        alt.X("time:T"),
        alt.Y("value:Q").title('Latent heat flux (g/m^2/s)'),
        alt.Color("variable:O").scale(scheme='rainbow', domain=[
                'w_h2o__3m_c',
                'w_h2o__5m_c',
                'w_h2o__10m_c',
                'w_h2o__15m_c',
                'w_h2o__20m_c',
        ])
    ).properties(title = 'Correct covariance averages')
).resolve_scale(x='shared', y='shared', color='shared').display(renderer='svg')

Test with a single variable using simpler code

In [None]:
new_values = []
for i in range(0, int(len(sos_ds.time)/12)):
    start_i = i*12
    end_i = (i+1)*12
    cov_values = sos_ds['w_h2o__3m_c'][start_i: end_i].values
    w_values = sos_ds['w_3m_c'][start_i: end_i].values
    h2o_values = sos_ds['h2o_3m_c'][start_i: end_i].values
    new_val  = (
        (1/12)*(cov_values + w_values*h2o_values).sum() 
        - (
            (1/12)*w_values.sum()
            * (1/12)*h2o_values.sum()
        )
    )
    new_values.append(new_val)

In [None]:
np.unique(combined_agg_df['w_h2o__3m_c'].values - np.array(new_values))

# Save 60 min lh flux values

In [None]:
combined_agg_df.to_xarray().to_netcdf('sos_hourly_aggregated_lhflux_ds.nc')

# Check that the cumulative sublimation measurements look right.

In [None]:
from metpy.constants import density_water

In [None]:
sos_ds_mm = sos_ds*5*60/density_water.magnitude

In [None]:
sos_ds_mm['w_h2o__3m_c'].cumsum().plot()
sos_ds_mm['w_h2o__5m_c'].cumsum().plot()
sos_ds_mm['w_h2o__10m_c'].cumsum().plot()
sos_ds_mm['w_h2o__15m_c'].cumsum().plot()
sos_ds_mm['w_h2o__20m_c'].cumsum().plot()