In [1]:
import numpy as np
import pandas as pd
from sublimpy import utils, tidy
import altair as alt
alt.data_transformers.enable('json')
from scipy import interpolate

import swifter
import xarray as xr
import tqdm
import matplotlib.pyplot as plt
from metpy.units import units

import datetime as dt
from sklearn.metrics import r2_score
from metpy.units import units
import math 
import geopandas as gpd

In [2]:
ls process_slow_data | grep parquet

tidy_df_20221101_20230619_planar_fit_multiplane_STRAIGHTUP_q7_flags9000_ARCHIVE.parquet


In [None]:
HEIGHTS = [1,3,10]
HORIZ_GRID_SPACING = 50
VERT_GRID_SPACING = 20
# start_date = '20221130'
# end_date = '20230509'
start_date = '20221101'
end_date = '20230619'

# data_start_date = '20221130'
# data_cutoff_date = '20230508'

data_start_date = '20221107'
data_cutoff_date = '20230619'

## PARAMETERS FOR SOS DATA
# streamwise coordinates
sos_tidy_fn = f"process_slow_data/tidy_df_20221101_20230619_planar_fit_multiplane_STRAIGHTUP_q7_flags9000_10sectors.parquet"

## PARAMETERS FOR SPLASH DATA
# download dir
avp_download_dir = "/Users/elischwat/Development/data/sublimationofsnow/asfs/ASFS-50_Level2_SPLASH2021-2023/"
kps_download_dir = "/Users/elischwat/Development/data/sublimationofsnow/asfs/ASFS-30_Level2_SPLASH2021-2023/"

ftp_url = 'ftp1.esrl.noaa.gov'
# Avery Picnic product
avp_url = f'Observations/Campaigns/SPLASH/asfs50/2_level_ingest/'
# Kettle Ponds product
kps_url = f'Observations/Campaigns/SPLASH/asfs30/2_level_ingest/'

# Open data

## SOS

In [None]:
tidy_df = pd.read_parquet(sos_tidy_fn)

# Convert data timezone to local and clean up data on the ends
# convert time column to datetime
tidy_df['time'] = pd.to_datetime(tidy_df['time'])
tidy_df = utils.modify_df_timezone(tidy_df, 'UTC', 'US/Mountain')
# limit data to our dates of interest, based on continuous snow cover at Kettle Ponds
tidy_df = tidy_df[tidy_df.time > data_start_date][tidy_df.time < data_cutoff_date]
tidy_df = tidy_df.set_index('time').sort_index().loc[data_start_date:data_cutoff_date].reset_index()

In [None]:
print(len(tidy_df.query("variable == 'T_3m_c'").set_index('time').loc['20230301':'20230401']))
print(len(tidy_df.query("variable == 'T_3m_c'").set_index('time').loc['20230301':'20230401'].dropna()))

In [None]:
print(len(tidy_df.query("variable == 'T_3m_c'").set_index('time').loc['20230401':'20230501']))
print(len(tidy_df.query("variable == 'T_3m_c'").set_index('time').loc['20230401':'20230501'].dropna()))

In [None]:
src = tidy_df.query(f"variable == 'dir_3m_c'")['value'].dropna()
pd.cut(
    src,
    [0,80, 140, 292,332, 360]
).value_counts() / len(src)

## SPLASH

In [None]:

# Get the list of files in the directory
file_list = os.listdir(kps_download_dir)

# file_list = [f for f in file_list if 'sledmet.asfs30.level2.0.1min.' in f]
file_list = [f for f in file_list if 'sledmet.asfs30.level2.0.1min.' in f]

# Sort the file list
file_list.sort()

# Create an empty list to store the dataarrays
mixingratio_dataarray_list = []
abshum_dataarray_list = []
snowdepth_list = []
temp_dataarray_list = []
rh_dataarray_list = []
w_dataarray_list = []

# Iterate over each file with tqdm
for file_name in tqdm.tqdm(file_list):
    # Open the file using xarray
    dataset = xr.open_dataset(os.path.join(kps_download_dir, file_name))
    
    # Extract the dataarray for the variable 'mixing_ratio', Add the dataarray to the list
    mixingratio_dataarray_list.append(dataset['mixing_ratio'])
    abshum_dataarray_list.append(dataset['h2o_licor'])
    snowdepth_list.append(dataset['snow_depth'])
    temp_dataarray_list.append(dataset['temp'])  # Add this line
    rh_dataarray_list.append(dataset['rh'])  # Add this line
    w_dataarray_list.append(dataset['wspd_w_mean'])  # Add this line

mixingratio_ds = xr.concat(mixingratio_dataarray_list, dim='time')
mixingratio_ds = utils.modify_xarray_timezone(mixingratio_ds, 'UTC', 'US/Mountain')
abshum_ds = xr.concat(abshum_dataarray_list, dim='time')
abshum_ds = utils.modify_xarray_timezone(abshum_ds, 'UTC', 'US/Mountain')
annex_snowdepth_ds = xr.concat(snowdepth_list, dim='time')
annex_snowdepth_ds = utils.modify_xarray_timezone(annex_snowdepth_ds, 'UTC', 'US/Mountain')
temp_ds = xr.concat(temp_dataarray_list, dim='time')
temp_ds = utils.modify_xarray_timezone(temp_ds, 'UTC', 'US/Mountain')
rh_ds = xr.concat(rh_dataarray_list, dim='time')
rh_ds = utils.modify_xarray_timezone(rh_ds, 'UTC', 'US/Mountain')
w_ds = xr.concat(w_dataarray_list, dim='time')
w_ds = utils.modify_xarray_timezone(w_ds, 'UTC', 'US/Mountain')

# Create categories

In [None]:
# Identify lists of timestamps for different categories
bs_times = set(
    tidy_df.query("variable == 'SF_avg_1m_ue'").query("value > 0").time
).union(
    set(tidy_df.query("variable == 'SF_avg_2m_ue'").query("value > 0").time)
)
nobs_times = set(tidy_df.time).difference(bs_times)

decoupled_times = tidy_df.query("variable == 'omega_3m_c'").query("value < 0.43").time
weaklycoupled_times = tidy_df.query("variable == 'omega_3m_c'").query("value >= 0.43").query("value <= 0.61").time
coupled_times = tidy_df.query("variable == 'omega_3m_c'").query("value > 0.61").time

ri_stable_times = tidy_df.query("variable == 'Ri_3m_c'").query("value > 0.25").time
ri_unstable_times = tidy_df.query("variable == 'Ri_3m_c'").query("value < -0.01").time
ri_neutral_times = tidy_df.query("variable == 'Ri_3m_c'").query("value >= -0.01").query("value <= 0.25").time

tgrad_stable_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query("value > 0.01").time
tgrad_unstable_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query("value < -0.01").time
tgrad_neutral_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query("value >= -0.01").query("value <= 0.01").time

upvalley_wind_times = tidy_df[tidy_df.variable == 'dir_3m_c'].query("value < 152").query("value > 92").time.values
downvalley_wind_times = tidy_df[tidy_df.variable == 'dir_3m_c'].query("value < 342").query("value > 292").time.values

len(upvalley_wind_times),len(downvalley_wind_times)

In [None]:
print(len(ri_stable_times))
print(len(ri_unstable_times))
print(len(ri_neutral_times))
print(len(tgrad_stable_times))
print(len(tgrad_unstable_times))
print(len(tgrad_neutral_times))

In [None]:
s_annex_df = mixingratio_ds.to_dataframe()[['mixing_ratio']].rename(columns={'mixing_ratio': 'mixing_ratio_annex'}) / 1000
s_kps_df_4m = tidy_df.query("variable == 'mixingratio_4m_c'")[['time', 'value']].set_index('time').rename(columns={'value': 'mixing_ratio_kps_4m'})
s_kps_df_3m = tidy_df.query("variable == 'mixingratio_3m_c'")[['time', 'value']].set_index('time').rename(columns={'value': 'mixing_ratio_kps_3m'})
s_kps_df_2m = tidy_df.query("variable == 'mixingratio_2m_c'")[['time', 'value']].set_index('time').rename(columns={'value': 'mixing_ratio_kps_2m'})
s_df = s_kps_df_2m.join(s_kps_df_3m).join(s_kps_df_4m).join(s_annex_df)
alt.Chart(
    s_df[s_df.index.isin(nobs_times)].reset_index()
).transform_fold([
    'mixing_ratio_kps_2m', 'mixing_ratio_kps_3m', 'mixing_ratio_kps_4m', 'mixing_ratio_annex'
]).mark_line().encode(
    alt.X('hours(time):T'),
    alt.Y('mean(value):Q').scale(zero=False),
    alt.Facet('month(time):T').sort(['Nov', 'Dec']),
    alt.Color('key:N')
).resolve_scale(y='independent')

## Calibrate SOS gas analyzer measurements

We calibrate by assuming that all gas analyzers have the same seasonal mean as the corresponding hygrometer measurement on the central tower (at a given height)

### With seasonal mean

In [None]:
hygrometer_absolute_humidity_mean = (
    1000 * tidy_df[tidy_df.measurement=='specific humidity'].groupby(['tower', 'height'])[['value']].mean() *\
    tidy_df[tidy_df.measurement=='air density'].groupby(['tower', 'height'])[['value']].mean()
).reset_index().query("tower == 'c'")

In [None]:
ec_absolute_humidity_mean = tidy_df[tidy_df.measurement=='Water vapor density'].groupby(['variable', 'tower', 'height'])[['value']].mean().reset_index()

In [None]:
corrections_df = ec_absolute_humidity_mean.merge(
    hygrometer_absolute_humidity_mean[['height', 'value']].rename(columns={'value': 'truth'}),
    on='height'
)
corrections_df['offset'] = corrections_df['value'] - corrections_df['truth']
corrections_df

Update dataset with corrections

In [None]:
src = tidy_df[tidy_df.measurement=='Water vapor density']
src = src[src.height.isin([1,3,10])]
src

In [None]:
alt.Chart(
    (
    1000 * tidy_df[tidy_df.measurement=='specific humidity'].groupby(['tower', 'height'])[['value']].mean() *\
    tidy_df[tidy_df.measurement=='air density'].groupby(['tower', 'height'])[['value']].mean()
    ).reset_index()
).mark_point(shape='square', filled=True, color='black', size=20).encode(
    alt.X("value:Q"),
    alt.Y("height:Q")
).properties(width=150, height = 150)\
+ alt.Chart(
    tidy_df[tidy_df.measurement=='Water vapor density'].groupby(['variable', 'tower', 'height'])[['value']].mean().reset_index()
).mark_circle(size=40).encode(
    alt.X("value:Q"),
    alt.Y("height:Q"),
    alt.Color('tower:N')
).properties(width=150, height = 150)

In [None]:


alt.Chart(
    src[ src.time > '20221212' ][ src.time < '20221214' ]
).mark_line().encode(
    alt.X("time:T"),
    alt.Y("value:Q"),
    alt.Color("height:N"),
    detail='variable'
)

In [None]:
for idx, row in corrections_df.iterrows():
    src = tidy_df.query(f"variable == '{row['variable']}'")
    src = src.assign(value = src.value - row['offset'])
    tidy_df = tidy_df[tidy_df.variable != row['variable']]
    tidy_df = pd.concat([tidy_df, src])

In [None]:
alt.Chart(
    (
    1000 * tidy_df[tidy_df.measurement=='specific humidity'].groupby(['tower', 'height'])[['value']].mean() *\
    tidy_df[tidy_df.measurement=='air density'].groupby(['tower', 'height'])[['value']].mean()
    ).reset_index()
).mark_point(shape='square', filled=True, color='black', size=20).encode(
    alt.X("value:Q"),
    alt.Y("height:Q")
).properties(width=150, height = 150)\
+ alt.Chart(
    tidy_df[tidy_df.measurement=='Water vapor density'].groupby(['variable', 'tower', 'height'])[['value']].mean().reset_index()
).mark_circle(size=40).encode(
    alt.X("value:Q"),
    alt.Y("height:Q"),
    alt.Color('tower:N')
).properties(width=150, height = 150)

In [None]:
src = tidy_df[tidy_df.measurement=='Water vapor density']
src = src[src.height.isin([1,3,10])]
abs_hum = alt.Chart(
    src[ src.time > '20221212' ][ src.time < '20221214' ]
).mark_line(strokeWidth=0.5).encode(
    alt.X("time:T"),
    alt.Y("value:Q").title("Absolute humidity (g/m^3)").scale(zero=False),
    alt.Color("height:N"),
    alt.Shape('tower:N'),
    detail='variable'
).properties(width=600)

src = tidy_df[tidy_df.measurement=='snow depth']
snowdepth = alt.Chart(
    src[ src.time > '20221212' ][ src.time < '20221214' ]
).mark_line(strokeWidth=0.5).encode(
    alt.X("time:T"),
    alt.Y("value:Q").title("Snow depth (m)"),
    alt.Shape('tower:N'),
    detail='variable'
).properties(width=600, height=150)

(snowdepth & abs_hum).resolve_scale(color='independent', shape='independent')

### With monthly means

In [None]:
# filtered_spechumidity = tidy_df[tidy_df.measurement=='specific humidity']
# hygrometer_absolute_humidity_mean = (
#     1000 * filtered_spechumidity.groupby(['tower', 'height', filtered_spechumidity.time.dt.month])[['value']].mean() *\
#     tidy_df[tidy_df.measurement=='air density'].groupby(['tower', 'height'])[['value']].mean()
# ).reset_index().query("tower == 'c'")

In [None]:
# filtered_abshumidity = tidy_df[tidy_df.measurement=='Water vapor density']
# ec_absolute_humidity_mean = filtered_abshumidity.groupby([
#     'variable', 'tower', 'height', filtered_abshumidity.time.dt.month
# ])[['value']].mean().reset_index()
# ec_absolute_humidity_mean

In [None]:
# corrections_df = ec_absolute_humidity_mean.merge(
#     hygrometer_absolute_humidity_mean[['height', 'value', 'time', 'tower']].rename(columns={'value': 'truth'}),
#     on=['height', 'tower', 'time']
# )
# corrections_df['offset'] = corrections_df['value'] - corrections_df['truth']


Update dataset with corrections

In [None]:
# corrected_measurements = []
# for variable in corrections_df.variable.unique():
#     for month in corrections_df[corrections_df.variable == variable].time.unique():
#         src = tidy_df.query(f"variable == '{variable}'")
#         src = src[src.time.dt.month == month]
#         row = corrections_df.set_index(['variable', 'time']).loc[variable, month]
#         src = src.assign(value = src.value - row['offset'])
#         corrected_measurements.append(src)

In [None]:
# for variable in corrections_df.variable.unique():
#     tidy_df = tidy_df[tidy_df.variable != variable]
# tidy_df = pd.concat([tidy_df] + corrected_measurements)

In [None]:
# hygr_vals = (
#     1000 * tidy_df[tidy_df.measurement=='specific humidity'].groupby(['tower', 'height'])[['value']].mean() *\
#     tidy_df[tidy_df.measurement=='air density'].groupby(['tower', 'height'])[['value']].mean()
#     ).reset_index()
# irga_vals = tidy_df[tidy_df.measurement=='Water vapor density'].groupby(['variable', 'tower', 'height'])[['value']].mean().reset_index()
# # hygr_vals = hygr_vals[hygr_vals.time.dt.month==12]
# # irga_vals = irga_vals[irga_vals.time.dt.month==12]
# alt.Chart(hygr_vals).mark_point(shape='square', filled=True, color='black', size=20).encode(
#     alt.X("value:Q"),
#     alt.Y("height:Q")
# ).properties(width=150, height = 150)\
# + alt.Chart(irga_vals).mark_circle(size=40).encode(
#     alt.X("value:Q"),
#     alt.Y("height:Q"),
#     alt.Color('tower:N')
# ).properties(width=150, height = 150)

# Create tables

## Instrument location info (georeferenced)
We use a file with theodolite/GPS readings provided by NCAR. 

In [None]:
instrument_loc_df = pd.read_csv("~/Development/data/sublimationofsnow/SOSm.txt", names = ['ec', 'x', 'y', 'z'])
instrument_loc_df = instrument_loc_df[ 
    instrument_loc_df['ec'].str.startswith('CS')
    |
    instrument_loc_df['ec'].str.startswith('DS') 
    |
    instrument_loc_df['ec'].str.startswith('UWS') 
    |
    instrument_loc_df['ec'].str.startswith('UES') 
]
instrument_loc_df = instrument_loc_df[ 
    instrument_loc_df['ec'].str.endswith('T') 
    |
    instrument_loc_df['ec'].str.endswith('B') 
]
instrument_loc_df['top or bottom'] = instrument_loc_df['ec'].str[-1]
instrument_loc_df['tower'] = instrument_loc_df['ec'].apply(lambda str: str.split('S')[0].lower())
instrument_loc_df['height'] = instrument_loc_df['ec'].apply(lambda str: int(str.split('S')[1][:-1]))
instrument_loc_df = instrument_loc_df.drop(columns='ec')
instrument_loc_df = instrument_loc_df.pivot(index=['height', 'tower'], columns='top or bottom').reset_index()
instrument_loc_df = instrument_loc_df.set_index(['height', 'tower']).groupby(level=0, axis=1).mean()
instrument_loc_df

## If we want to, we can convert instrument locations to streamwise coordinates too

In [None]:
# for height in [1,2,3,5,10,15,20]:
#     instrument_loc_df.loc[(height,'c'), 'z'] = height
#     instrument_loc_df.loc[(height,'d'), 'z'] = height
#     instrument_loc_df.loc[(height,'ue'), 'z'] = height
#     instrument_loc_df.loc[(height,'uw'), 'z'] = height
# instrument_loc_df = instrument_loc_df.dropna()

In [None]:
instrument_loc_df

## Wind field measurements

In [None]:
wind_field_df = tidy_df[tidy_df.measurement.isin(['u','v','w']) & tidy_df.height.isin(HEIGHTS)]
wind_field_df = wind_field_df.pivot_table(index='time', columns=['height', 'tower', 'measurement'], values='value')
wind_field_df

## Turbulent water vapor flux measurements

In [None]:
turb_flux_field_df = tidy_df[tidy_df.measurement.isin(['u_h2o_','v_h2o_','w_h2o_']) & tidy_df.height.isin(HEIGHTS)]
turb_flux_field_df = turb_flux_field_df.pivot_table(index='time', columns=['height', 'tower', 'measurement'], values='value')
turb_flux_field_df

## Turbulent temperature flux measurements

In [None]:
temp_turb_flux_field_df = tidy_df[tidy_df.measurement.isin(['u_tc_','v_tc_','w_tc_']) & tidy_df.height.isin(HEIGHTS)]
temp_turb_flux_field_df = temp_turb_flux_field_df.pivot_table(index='time', columns=['height', 'tower', 'measurement'], values='value'), 
temp_turb_flux_field_df

## Humidity measurements (from Irgas)

In [None]:
abs_hum_field_df = tidy_df[tidy_df.measurement.isin(['Water vapor density']) & tidy_df.height.isin(HEIGHTS)]
abs_hum_field_df.measurement = 'q'
abs_hum_field_df = abs_hum_field_df.pivot_table(
        index='time', columns=['height', 'tower', 'measurement'], values='value'
    )
abs_hum_field_df

## Advective flux measurements

In [None]:
ls = []
for h in wind_field_df.columns.get_level_values('height').unique():
    for t in wind_field_df.columns.get_level_values('tower').unique():
        this_wind_df = wind_field_df[(h,t)].copy()
        this_abs_hum_df = abs_hum_field_df[(h,t)].copy()  
        this_wind_df['uq'] = this_wind_df['u']*this_abs_hum_df['q']
        this_wind_df['vq'] = this_wind_df['v']*this_abs_hum_df['q']
        this_wind_df['wq'] = this_wind_df['w']*this_abs_hum_df['q']
        new = pd.concat([this_wind_df], axis=1, keys=[(h,t)])
        ls.append(new.drop(columns=[(h,t,'u'),(h,t,'v'),(h,t,'w')]))

adv_flux_field_df = ls[0]
for l in ls[1:]:
    adv_flux_field_df = adv_flux_field_df.join(l)
adv_flux_field_df.columns = adv_flux_field_df.columns.set_names('height', level=0)
adv_flux_field_df.columns = adv_flux_field_df.columns.set_names('tower', level=1)
adv_flux_field_df

## Dry air density measurements

In [None]:
# gather dry air density measurements
dryair_density_field_df = tidy_df[tidy_df.measurement.isin(['dry air density']) & tidy_df.height.isin(HEIGHTS)]
dryair_density_field_df.measurement = 'rho'
dryair_density_field_df = dryair_density_field_df.pivot_table(
        index='time', columns=['height', 'tower', 'measurement'], values='value'
    )

# duplicate the dry air density measurements across the towers (THIS IS NAIVE)
dryair_density_for_tower_d = dryair_density_field_df.copy()
dryair_density_for_tower_d.columns = pd.MultiIndex.from_tuples([(cs[0], 'd', cs[2]) for cs in dryair_density_for_tower_d.columns])

dryair_density_for_tower_uw = dryair_density_field_df.copy()
dryair_density_for_tower_uw.columns = pd.MultiIndex.from_tuples([(cs[0], 'uw', cs[2]) for cs in dryair_density_for_tower_d.columns])

dryair_density_for_tower_ue = dryair_density_field_df.copy()
dryair_density_for_tower_ue.columns = pd.MultiIndex.from_tuples([(cs[0], 'ue', cs[2]) for cs in dryair_density_for_tower_d.columns])

dryair_density_field_df = dryair_density_field_df.join(
    dryair_density_for_tower_d
).join(
    dryair_density_for_tower_ue
).join(
    dryair_density_for_tower_uw
)

dryair_density_field_df.columns = dryair_density_field_df.columns.set_names(['height', 'tower', 'measurement'])

# convert from kg/m^3 to g/m^3
dryair_density_field_df = dryair_density_field_df*1000

dryair_density_field_df

## Temperature measurements

In [None]:
# gather dry air density measurements
temp_field_df = tidy_df[tidy_df.measurement.isin(['temperature']) & tidy_df.height.isin(HEIGHTS)]
temp_field_df.measurement = 'T'
temp_field_df = temp_field_df.pivot_table(
        index='time', columns=['height', 'tower', 'measurement'], values='value'
    )

# duplicate the dry air density measurements across the towers (THIS IS NAIVE)
temp_for_tower_d = temp_field_df.copy()
temp_for_tower_d.columns = pd.MultiIndex.from_tuples([(cs[0], 'd', cs[2]) for cs in temp_for_tower_d.columns])

temp_for_tower_uw = temp_field_df.copy()
temp_for_tower_uw.columns = pd.MultiIndex.from_tuples([(cs[0], 'uw', cs[2]) for cs in temp_for_tower_d.columns])

temp_for_tower_ue = temp_field_df.copy()
temp_for_tower_ue.columns = pd.MultiIndex.from_tuples([(cs[0], 'ue', cs[2]) for cs in temp_for_tower_d.columns])

temp_field_df = temp_field_df.join(
    temp_for_tower_d
).join(
    temp_for_tower_ue
).join(
    temp_for_tower_uw
)

temp_field_df.columns = temp_field_df.columns.set_names(['height', 'tower', 'measurement'])

temp_field_df

## Mixing ratio measurements (from Irgas and other sensors)

In [None]:
mixing_ratio_field_df = abs_hum_field_df.droplevel(2, 1) / dryair_density_field_df.droplevel(2, 1)

mixing_ratio_field_df.columns = pd.MultiIndex.from_product(mixing_ratio_field_df.columns.levels + [['r']])
mixing_ratio_field_df.columns = mixing_ratio_field_df.columns.set_names('measurement', level=2)

mixing_ratio_field_df

# 3D Differential Solution

## Calculate interpolated fields (3D)

In [None]:
HEIGHTS = [3,10]

# gather all measurements into a dataframe, isolate to the heights we care about
df = wind_field_df.join(
    turb_flux_field_df
).join(
    temp_turb_flux_field_df
).join(
    abs_hum_field_df
).join(
    adv_flux_field_df
).join(
    dryair_density_field_df
).join(
    temp_field_df
).join(
    mixing_ratio_field_df
)
data_df = df[HEIGHTS]

# Gather the instrument locations into a dataframe, isolate to heights we care about
instrument_loc_limited_heights = instrument_loc_df[instrument_loc_df.index.get_level_values(0).isin(HEIGHTS)]

# Transform the dataframe of instrument locations into a form that can be merged with the dataframe of measurements
#   transform
xxx = pd.DataFrame(instrument_loc_limited_heights.unstack().unstack()).T
xxx.columns = xxx.columns.swaplevel(0,2)
xxx.columns = xxx.columns.set_names('measurement', level=2)
#   duplicate the sensor locations so we can join (duplicate) x,y,z info into the dataframe of measurements
instrument_loc_limited_heights_repeated = xxx.loc[xxx.index.repeat(len(data_df))]
instrument_loc_limited_heights_repeated.index = data_df.index
instrument_loc_limited_heights_repeated
data_df = data_df.join(instrument_loc_limited_heights_repeated)

# Create a meshgrid for the interpolation and isolate the x,y,z locations of measurements
xx, yy, zz = np.meshgrid(
    np.linspace(instrument_loc_limited_heights.x.min(), instrument_loc_limited_heights.x.max(), HORIZ_GRID_SPACING),
    np.linspace(instrument_loc_limited_heights.y.min(), instrument_loc_limited_heights.y.max(), HORIZ_GRID_SPACING),
    np.linspace(instrument_loc_limited_heights.z.min(), instrument_loc_limited_heights.z.max(), VERT_GRID_SPACING)
)
spacing_x = np.diff(xx[0,:,0]).mean()
spacing_y = np.diff(yy[:,0,0]).mean()
spacing_z = np.diff(zz[0,0,:]).mean()
points = np.transpose(np.vstack((instrument_loc_limited_heights.x, instrument_loc_limited_heights.y, instrument_loc_limited_heights.z)))

# Calculate interpolated fields
    # VECTOR FIELDS
    # wind velocity fields
u_interp = data_df.loc[:, (slice(None),slice(None),['u'])].apply(
    lambda row:  interpolate.griddata(points, row.values, (xx, yy, zz), method='linear'),
    axis=1
)
v_interp = data_df.loc[:, (slice(None),slice(None),['v'])].apply(
    lambda row:  interpolate.griddata(points, row.values, (xx, yy, zz), method='linear'),
    axis=1
)
w_interp = data_df.loc[:, (slice(None),slice(None),['w'])].apply(
    lambda row:  interpolate.griddata(points, row.values, (xx, yy, zz), method='linear'),
    axis=1
)
    # turb. flux fields
u_q__interp = data_df.loc[:, (slice(None),slice(None),['u_h2o_'])].apply(
    lambda row:  interpolate.griddata(points, row.values, (xx, yy, zz), method='linear'),
    axis=1
)
v_q__interp = data_df.loc[:, (slice(None),slice(None),['v_h2o_'])].apply(
    lambda row:  interpolate.griddata(points, row.values, (xx, yy, zz), method='linear'),
    axis=1
)
w_q__interp = data_df.loc[:, (slice(None),slice(None),['w_h2o_'])].apply(
    lambda row:  interpolate.griddata(points, row.values, (xx, yy, zz), method='linear'),
    axis=1
)
    # SCALAR FIELDS
rho_interp = data_df.loc[:, (slice(None),slice(None),['rho'])].apply(
    lambda row:  interpolate.griddata(points, row.values, (xx, yy, zz), method='linear'),
    axis=1
)
r_interp = data_df.loc[:, (slice(None),slice(None),['r'])].apply(
    lambda row:  interpolate.griddata(points, row.values, (xx, yy, zz), method='linear'),
    axis=1
)
q_interp = data_df.loc[:, (slice(None),slice(None),['q'])].apply(
    lambda row:  interpolate.griddata(points, row.values, (xx, yy, zz), method='linear'),
    axis=1
)

### PLOT: Example of a 3d interpolated wind field

In [None]:

ax = plt.figure(figsize=(10,10)).add_subplot(projection='3d')
ax.quiver(
    xx[::5,::5,::2], 
    yy[::5,::5,::2],
    zz[::5,::5,::2],
    u_interp[100][::5,::5,::2],
    v_interp[100][::5,::5,::2], 
    w_interp[100][::5,::5,::2],
    label='interpolated'
)
# plt.quiver(
#     data_df.iloc[100][(slice(None),slice(None),'x')].values.astype('float'),
#     data_df.iloc[100][(slice(None),slice(None),'y')].values.astype('float'),
#     data_df.iloc[100][(slice(None),slice(None),'z')].values.astype('float'),
#     data_df.iloc[100][(slice(None),slice(None),'u')].values.astype('float'),
#     data_df.iloc[100][(slice(None),slice(None),'v')].values.astype('float'),
#     data_df.iloc[100][(slice(None),slice(None),'w')].values.astype('float'),
#     label='measured',
#     color='red'
# )

In [None]:
stable_mean_u_field = np.nanmean(np.stack(u_interp[u_interp.index.isin(ri_stable_times)].values), axis = 0)
stable_mean_v_field = np.nanmean(np.stack(v_interp[v_interp.index.isin(ri_stable_times)].values), axis = 0)
stable_mean_w_field = np.nanmean(np.stack(w_interp[w_interp.index.isin(ri_stable_times)].values), axis = 0)
# 3d plot
ax = plt.figure(figsize=(10,10)).add_subplot(projection='3d')
ax.quiver(xx[::5,::5,::2],     yy[::5,::5,::2],    zz[::5,::5,::2], stable_mean_u_field[::5,::5,::2], stable_mean_v_field[::5,::5,::2],  stable_mean_w_field[::5,::5,::2],label='interpolated',)

In [None]:
fig, axes = plt.subplots(3,2, figsize=(5,7.5), sharex=True, sharey=True)
for ax in axes.flatten():
    ax.set_aspect('equal')

stable_mean_u_field = np.nanmean(np.stack(u_interp[u_interp.index.isin(ri_stable_times)].values), axis = 0)
stable_mean_v_field = np.nanmean(np.stack(v_interp[v_interp.index.isin(ri_stable_times)].values), axis = 0)
stable_mean_w_field = np.nanmean(np.stack(w_interp[w_interp.index.isin(ri_stable_times)].values), axis = 0)
axes[0,0].quiver(
    xx[::5,::5,4],  yy[::5,::5,4],
    stable_mean_u_field[::5,::5,4], stable_mean_v_field[::5,::5,4], 
)
axes[0,0].set_title('stable, 4.5m')
axes[0,1].quiver(
    xx[::5,::5,15],  yy[::5,::5,15],
    stable_mean_u_field[::5,::5,15], stable_mean_v_field[::5,::5,15], 
)
axes[0,1].set_title('stable, 8m')

unstable_mean_u_field = np.nanmean(np.stack(u_interp[u_interp.index.isin(ri_unstable_times)].values), axis = 0)
unstable_mean_v_field = np.nanmean(np.stack(v_interp[v_interp.index.isin(ri_unstable_times)].values), axis = 0)
unstable_mean_w_field = np.nanmean(np.stack(w_interp[w_interp.index.isin(ri_unstable_times)].values), axis = 0)
axes[1,0].quiver(
    xx[::5,::5,4],  yy[::5,::5,4],
    unstable_mean_u_field[::5,::5,4], unstable_mean_v_field[::5,::5,4], 
)
axes[1,0].set_title('unstable, 4.5m')
axes[1,1].quiver(
    xx[::5,::5,15],  yy[::5,::5,15],
    unstable_mean_u_field[::5,::5,15], unstable_mean_v_field[::5,::5,15], 
)
axes[1,1].set_title('unstable, 8m')

neutral_mean_u_field = np.nanmean(np.stack(u_interp[u_interp.index.isin(ri_neutral_times)].values), axis = 0)
neutral_mean_v_field = np.nanmean(np.stack(v_interp[v_interp.index.isin(ri_neutral_times)].values), axis = 0)
neutral_mean_w_field = np.nanmean(np.stack(w_interp[w_interp.index.isin(ri_neutral_times)].values), axis = 0)
axes[2,0].quiver(
    xx[::5,::5,4],  yy[::5,::5,4],
    neutral_mean_u_field[::5,::5,4], neutral_mean_v_field[::5,::5,4], 
)
axes[2,0].set_title('neutral, 4.5m')
axes[2,1].quiver(
    xx[::5,::5,15],  yy[::5,::5,15],
    neutral_mean_u_field[::5,::5,15], neutral_mean_v_field[::5,::5,15], 
)
axes[2,1].set_title('neutral, 8m')
plt.tight_layout()

In [None]:
src_unstable = tidy_df[tidy_df.variable.isin(['w_h2o__3m_c', 'w_h2o__20m_c'])]
src_unstable = src_unstable[(src_unstable.time >= '20221130') & (src_unstable.time < '20230509')]
src_unstable = src_unstable[src_unstable.time.isin(set(ri_unstable_times).intersection(set(nobs_times)))]
src_unstable = src_unstable.pivot_table(index='time', values='value', columns='variable')
src_unstable['diff_unstable'] = src_unstable['w_h2o__20m_c'] - src_unstable['w_h2o__3m_c']

src_stable = tidy_df[tidy_df.variable.isin(['w_h2o__3m_c', 'w_h2o__20m_c'])]
src_stable = src_stable[(src_stable.time >= '20221130') & (src_stable.time < '20230509')]
src_stable = src_stable[src_stable.time.isin(set(ri_stable_times).intersection(set(nobs_times)))]
src_stable = src_stable.pivot_table(index='time', values='value', columns='variable')
src_stable['diff_stable'] = src_stable['w_h2o__20m_c'] - src_stable['w_h2o__3m_c']

src_neutral = tidy_df[tidy_df.variable.isin(['w_h2o__3m_c', 'w_h2o__20m_c'])]
src_neutral = src_neutral[(src_neutral.time >= '20221130') & (src_neutral.time < '20230509')]
src_neutral = src_neutral[src_neutral.time.isin(set(ri_neutral_times).intersection(set(nobs_times)))]
src_neutral = src_neutral.pivot_table(index='time', values='value', columns='variable')
src_neutral['diff_neutral'] = src_neutral['w_h2o__20m_c'] - src_neutral['w_h2o__3m_c']

src = pd.concat([
    src_neutral[['diff_neutral']].reset_index(),
    src_stable[['diff_stable']].reset_index(),
    src_unstable[['diff_unstable']].reset_index(),
])
alt.Chart(
    src.reset_index()
    # src.reset_index()
).transform_fold([
    'diff_neutral', 'diff_stable', 'diff_unstable'
]).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('key:N')
).properties(width=150, height=150)

In [None]:
src_unstable = tidy_df[tidy_df.variable.isin(['w_h2o__3m_c', 'w_h2o__20m_c'])]
src_unstable = src_unstable[(src_unstable.time >= '20221130') & (src_unstable.time < '20230509')]
src_unstable = src_unstable[src_unstable.time.isin(set(ri_unstable_times).intersection(set(nobs_times)))]
src_unstable = src_unstable.pivot_table(index='time', values='value', columns='variable')
src_unstable = src_unstable.reset_index()

src_stable = tidy_df[tidy_df.variable.isin(['w_h2o__3m_c', 'w_h2o__20m_c'])]
src_stable = src_stable[(src_stable.time >= '20221130') & (src_stable.time < '20230509')]
src_stable = src_stable[src_stable.time.isin(set(ri_stable_times).intersection(set(nobs_times)))]
src_stable = src_stable.pivot_table(index='time', values='value', columns='variable')
src_stable = src_stable.reset_index()

src_neutral = tidy_df[tidy_df.variable.isin(['w_h2o__3m_c', 'w_h2o__20m_c'])]
src_neutral = src_neutral[(src_neutral.time >= '20221130') & (src_neutral.time < '20230509')]
src_neutral = src_neutral[src_neutral.time.isin(set(ri_neutral_times).intersection(set(nobs_times)))]
src_neutral = src_neutral.pivot_table(index='time', values='value', columns='variable')
src_neutral = src_neutral.reset_index()

chart = alt.Chart(
).transform_fold([
    'w_h2o__3m_c', 'w_h2o__20m_c'
]).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('key:N')
)

alt.layer(
    chart,
    data =  src_stable[(src_stable.time >= '20221130') & (src_stable.time < '20230509')].reset_index()
) | alt.layer(
    chart,
    data =  src_neutral[(src_neutral.time >= '20221130') & (src_neutral.time < '20230509')].reset_index()
) | alt.layer(
    chart,
    data =  src_unstable[(src_unstable.time >= '20221130') & (src_unstable.time < '20230509')].reset_index()
)



In [None]:
src_unstable = tidy_df[tidy_df.variable.isin(['w_h2o__3m_c', 'w_h2o__20m_c'])]
src_unstable = src_unstable[src_unstable.time.isin(set(ri_unstable_times).intersection(set(nobs_times)))]
src_unstable = src_unstable.pivot_table(index='time', values='value', columns='variable')
src_unstable['diff_unstable'] = src_unstable['w_h2o__20m_c'] - src_unstable['w_h2o__3m_c']

src_stable = tidy_df[tidy_df.variable.isin(['w_h2o__3m_c', 'w_h2o__20m_c'])]
src_stable = src_stable[src_stable.time.isin(set(ri_stable_times).intersection(set(nobs_times)))]
src_stable = src_stable.pivot_table(index='time', values='value', columns='variable')
src_stable['diff_stable'] = src_stable['w_h2o__20m_c'] - src_stable['w_h2o__3m_c']

src_neutral = tidy_df[tidy_df.variable.isin(['w_h2o__3m_c', 'w_h2o__20m_c'])]
src_neutral = src_neutral[src_neutral.time.isin(set(ri_neutral_times).intersection(set(nobs_times)))]
src_neutral = src_neutral.pivot_table(index='time', values='value', columns='variable')
src_neutral['diff_neutral'] = src_neutral['w_h2o__20m_c'] - src_neutral['w_h2o__3m_c']

src = pd.concat([
    src_neutral[['diff_neutral']].reset_index(),
    src_stable[['diff_stable']].reset_index(),
    src_unstable[['diff_unstable']].reset_index(),
])
alt.Chart(
    src[(src.time >= '20221130') & (src.time < '20230509')].reset_index()
    # src.reset_index()
).transform_fold([
    'diff_neutral', 'diff_stable', 'diff_unstable'
]).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('key:N')
)

In [None]:
src = tidy_df.query("measurement == 'wind direction'")
src = src[src.time.isin(ri_unstable_times)]
src
alt.Chart(src).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('height:O').scale(scheme='turbo'),
    alt.Facet('tower:N')
)

## Calculate terms (3D)

Calculate advective terms in both the Paw U form

$u \rho \dfrac{\partial s}{\partial x} + v \rho \dfrac{\partial s}{\partial y} + w \rho \dfrac{\partial s}{\partial z}$

and in the Sun form

$u \dfrac{\partial q}{\partial x} + v \dfrac{\partial q}{\partial y} + w \dfrac{\partial q}{\partial z}$

and also calculate the turbulent flux divergence terms (same for Paw U and Sun)

$\dfrac{\partial \overline{u'q'}}{\partial x} + \dfrac{\partial \overline{v'q'}}{\partial y} + \dfrac{\partial \overline{w'q'}}{\partial z}$

In [None]:
# Create dataframe with grids of interpolated data
u_interp.name = 'u'
v_interp.name = 'v'
w_interp.name = 'w'
u_q__interp.name = 'u_q_'
v_q__interp.name = 'v_q_'
w_q__interp.name = 'w_q_'
rho_interp.name = 'rho'
r_interp.name = 'r'
q_interp.name = 'q'

fields_df = pd.DataFrame(u_interp).join(
    v_interp
).join(
    w_interp
).join(
    u_q__interp
).join(
    v_q__interp
).join(
    w_q__interp
).join(
    rho_interp
).join(
    r_interp
).join(
    q_interp
)

In [None]:
# Iterate over the time series of fields and calculate the terms 
# We take the median of the gridded values.
# The apply function returns a tuple of 4 values, with each tuple contains values 
# for the following in order:
# means of:  lateral_advection_pawu , vertical_advection_pawu ,  lateral_advection_sun , vertical_advection_sun, lateral_turb_flux_div, vertical_turb_flux_div 
# medians of:  lateral_advection_pawu , vertical_advection_pawu ,  lateral_advection_sun , vertical_advection_sun, lateral_turb_flux_div, vertical_turb_flux_div 
advective_terms_3d = fields_df.apply(
    lambda row: 
    (
        # Lateral advection Paw U style
        np.nanmean(
            row['u']*row['rho']*np.gradient(row['r'], spacing_x, axis=0)
            +
            row['v']*row['rho']*np.gradient(row['r'], spacing_y, axis=1)
        ),
        # Vertical advection Paw U style
        np.nanmean(row['w']*row['rho']*np.gradient(row['r'], spacing_z, axis=2)),
        # Lateral advection Sun style
        np.nanmean(
            row['u']*np.gradient(row['q'], spacing_x, axis=0)
            +
            row['v']*np.gradient(row['q'], spacing_y, axis=1)
        ),
        # Vertical advection Sun style
        np.nanmean(row['w']*np.gradient(row['q'], spacing_z, axis=2)),
        # Lateral turb. flux divergence
        np.nanmean(
            np.gradient(row['u_q_'], spacing_x, axis=0)
            +
            np.gradient(row['v_q_'], spacing_y, axis=1)
        ),
        # Vertical turb. flux divergence
        np.nanmean(np.gradient(row['w_q_'], spacing_z, axis=2)),
        # Vertical air density flux term (Paw U only)
        ###???
        # Lateral advection Paw U style
        np.nanmedian(
            row['u']*row['rho']*np.gradient(row['r'], spacing_x, axis=0)
            +
            row['v']*row['rho']*np.gradient(row['r'], spacing_y, axis=1)
        ),
        # Vertical advection Paw U style
        np.nanmedian(row['w']*row['rho']*np.gradient(row['r'], spacing_z, axis=2)),
        # Lateral advection Sun style
        np.nanmedian(
            row['u']*np.gradient(row['q'], spacing_x, axis=0)
            +
            row['v']*np.gradient(row['q'], spacing_y, axis=1)
        ),
        # Vertical advection Sun style
        np.nanmedian(row['w']*np.gradient(row['q'], spacing_z, axis=2)),
        # Lateral turb. flux divergence
        np.nanmedian(
            np.gradient(row['u_q_'], spacing_x, axis=0)
            +
            np.gradient(row['v_q_'], spacing_y, axis=1)
        ),
        # Vertical turb. flux divergence
        np.nanmedian(np.gradient(row['w_q_'], spacing_z, axis=2)),
        # Vertical air density flux term (Paw U only)
        ###???
    )
    ,
    axis = 1
)
advective_terms_3d = pd.DataFrame(
    [[a, b, c, d, e, f, g, h, i, j, k, l] for a,b,c,d,e,f,g,h,i,j,k,l in advective_terms_3d.values], 
    columns=[
        'lateral_advection_pawu (mean)',
        'vertical_advection_pawu (mean)',
        'lateral_advection_sun (mean)',
        'vertical_advection_sun (mean)',
        'lateral_turb_flux_div (mean)',
        'vertical_turb_flux_div (mean)',
        
        'lateral_advection_pawu (median)',
        'vertical_advection_pawu (median)',
        'lateral_advection_sun (median)',
        'vertical_advection_sun (median)',
        'lateral_turb_flux_div (median)',
        'vertical_turb_flux_div (median)',
    ]
)
advective_terms_3d.index = fields_df.index

In [None]:
advective_terms_3d = advective_terms_3d*7
advective_terms_3d

## Calculate vertical velocity from horizontal divergence.

Following Vickers and Mahrt (2006), for incompressible mass continuity, time averaged vertical velocity based on divergence in 

$$ w(h) = - \int_{z=0}^{z=h} (\frac{\partial u}{\partial x} + \frac{\partial v}{\partial y}) dz$$

In [None]:
def get_w_for_row(row):
    data = - np.nansum(
        (
            np.gradient(row['u'], spacing_x, axis=0)
            + np.gradient(row['v'], spacing_y, axis=1)
        ),
        axis = 2
    ) * (
        instrument_loc_limited_heights.z.max() 
        - instrument_loc_limited_heights.z.min()
    )

    data[data == 0] = np.nan
    data
    return np.nanmedian(data)

w_from_div = fields_df.apply(get_w_for_row, axis = 1)

alt.Chart(pd.DataFrame(w_from_div.rename('w')).reset_index()).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(w):Q')
)

## Examine results

In [None]:
mean_chart = (alt.Chart(
    (advective_terms_3d/7).reset_index()
).transform_fold(
    [c for c in list(advective_terms_3d.columns) if 'turb' not in c and 'mean' in c]
).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('key:N')
).properties(width=200, height = 150) | alt.Chart(
    advective_terms_3d.reset_index()
).transform_fold(
    [c for c in list(advective_terms_3d.columns) if 'turb' in c and 'mean' in c]
).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('key:N')
).properties(width=200, height = 150)).resolve_scale(color='independent')

median_chart = (alt.Chart(
    (advective_terms_3d/7).reset_index()
).transform_fold(
    [c for c in list(advective_terms_3d.columns) if 'turb' not in c and 'median' in c]
).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('key:N')
).properties(width=200, height = 150) | alt.Chart(
    advective_terms_3d.reset_index()
).transform_fold(
    [c for c in list(advective_terms_3d.columns) if 'turb' in c and 'median' in c]
).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('key:N')
).properties(width=200, height = 150)).resolve_scale(color='independent')

(mean_chart & median_chart).resolve_scale(y='shared', x='shared')

In [None]:
alt.Chart(
    advective_terms_3d.loc['20230201': '20230205'].reset_index()
).transform_fold(
    list(advective_terms_3d.columns)
).mark_line().encode(
    alt.X('time:T'),
    alt.Y('value:Q'),
    # .scale(domain=[0,0.02], clamp=True),
    alt.Color('key:N')
)

# 2D Differential Solution

## 3m height

Repeat all the steps we did above, but just for a single plane at 3m

In [None]:
HEIGHTS_2D = [3]

# gather all measurements into a dataframe, isolate to the heights we care about
df = wind_field_df.join(
    turb_flux_field_df
).join(
    temp_turb_flux_field_df
).join(
    abs_hum_field_df
).join(
    adv_flux_field_df
).join(
    dryair_density_field_df
).join(
    temp_field_df
).join(
    mixing_ratio_field_df
)
data_df_2d = df[HEIGHTS_2D]

# Gather the instrument locations into a dataframe, isolate to heights we care about
instrument_loc_limited_heights_2d = instrument_loc_df[instrument_loc_df.index.get_level_values(0).isin(HEIGHTS_2D)]

# Transform the dataframe of instrument locations into a form that can be merged with the dataframe of measurements
#   transform
xxx = pd.DataFrame(instrument_loc_limited_heights_2d.unstack().unstack()).T
xxx.columns = xxx.columns.swaplevel(0,2)
xxx.columns = xxx.columns.set_names('measurement', level=2)
#   duplicate the sensor locations so we can join (duplicate) x,y,z info into the dataframe of measurements
instrument_loc_limited_heights_2d_repeated = xxx.loc[xxx.index.repeat(len(data_df_2d))]
instrument_loc_limited_heights_2d_repeated.index = data_df_2d.index
instrument_loc_limited_heights_2d_repeated
data_df_2d = data_df_2d.join(instrument_loc_limited_heights_2d_repeated)
data_df_2d

# Create a meshgrid for the interpolation and isolate the x,y,z locations of measurements
xx_2d, yy_2d = np.meshgrid(
    np.linspace(instrument_loc_limited_heights_2d.x.min(), instrument_loc_limited_heights_2d.x.max(), HORIZ_GRID_SPACING),
    np.linspace(instrument_loc_limited_heights_2d.y.min(), instrument_loc_limited_heights_2d.y.max(), HORIZ_GRID_SPACING)
)
spacing_x_2d = np.diff(xx_2d[0,:]).mean()
spacing_y_2d = np.diff(yy_2d[:,0]).mean()
points_2d = np.transpose(np.vstack((instrument_loc_limited_heights_2d.x, instrument_loc_limited_heights_2d.y)))

# Calculate interpolated fields
    # VECTOR FIELDS
    # wind velocity fields
u_interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['u'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
v_interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['v'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
w_interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['w'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
    # turb. flux fields
u_q__interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['u_h2o_'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
v_q__interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['v_h2o_'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
w_q__interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['w_h2o_'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
    # SCALAR FIELDS
rho_interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['rho'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
r_interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['r'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
q_interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['q'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)

# Create dataframe with grids of interpolated data
u_interp_2d.name = 'u'
v_interp_2d.name = 'v'
w_interp_2d.name = 'w'
u_q__interp_2d.name = 'u_q_'
v_q__interp_2d.name = 'v_q_'
w_q__interp_2d.name = 'w_q_'
rho_interp_2d.name = 'rho'
r_interp_2d.name = 'r'
q_interp_2d.name = 'q'

fields_df_2d = pd.DataFrame(u_interp_2d).join(
    v_interp_2d
).join(
    w_interp_2d
).join(
    u_q__interp_2d
).join(
    v_q__interp_2d
).join(
    w_q__interp_2d
).join(
    rho_interp_2d
).join(
    r_interp_2d
).join(
    q_interp_2d
)

# Iterate over the time series of fields and calculate the terms 
# We take the median of the gridded values.
# The apply function returns a tuple of 4 values, with each tuple contains values 
# for the following in order:
#  means of: lateral_advection_pawu , vertical_advection_pawu ,  lateral_advection_sun , vertical_advection_sun, lateral_turb_flux_div, vertical_turb_flux_div 
#  medians of: lateral_advection_pawu , vertical_advection_pawu ,  lateral_advection_sun , vertical_advection_sun, lateral_turb_flux_div, vertical_turb_flux_div 
advective_terms_2d_3m = fields_df_2d.apply(
    lambda row: 
    (
        # Lateral advection Paw U style
        np.nanmean(
            row['u']*row['rho']*np.gradient(row['r'], spacing_x_2d, axis=0)
            +
            row['v']*row['rho']*np.gradient(row['r'], spacing_y_2d, axis=1)
        ),
        # Lateral advection Sun style
        np.nanmean(
            row['u']*np.gradient(row['q'], spacing_x_2d, axis=0)
            +
            row['v']*np.gradient(row['q'], spacing_y_2d, axis=1)
        ),
        # Lateral turb. flux divergence
        np.nanmean(
            np.gradient(row['u_q_'], spacing_x_2d, axis=0)
            +
            np.gradient(row['v_q_'], spacing_y_2d, axis=1)
        ),
        # Lateral advection Paw U style
        np.nanmedian(
            row['u']*row['rho']*np.gradient(row['r'], spacing_x_2d, axis=0)
            +
            row['v']*row['rho']*np.gradient(row['r'], spacing_y_2d, axis=1)
        ),
        # Lateral advection Sun style
        np.nanmedian(
            row['u']*np.gradient(row['q'], spacing_x_2d, axis=0)
            +
            row['v']*np.gradient(row['q'], spacing_y_2d, axis=1)
        ),
        # Lateral turb. flux divergence
        np.nanmedian(
            np.gradient(row['u_q_'], spacing_x_2d, axis=0)
            +
            np.gradient(row['v_q_'], spacing_y_2d, axis=1)
        ),
    ),
    axis = 1
)
advective_terms_2d_3m = pd.DataFrame(
    [[a, b, c, d, e, f] for a, b, c, d, e, f in advective_terms_2d_3m.values], 
    columns=[
        'lateral_advection_pawu (mean)', 
        'lateral_advection_sun (mean)',
        'lateral_turb_flux_div (mean)', 
        'lateral_advection_pawu (median)', 
        'lateral_advection_sun (median)',
        'lateral_turb_flux_div (median)', 
    ]
)
advective_terms_2d_3m.index = fields_df_2d.index

### PLOT: Example of a 2d interpolated wind field

In [None]:

plt.quiver(
    xx_2d[::4], 
    yy_2d[::4],
    u_interp_2d[100][::4],
    v_interp_2d[100][::4], 
    label='interpolated'
)
plt.quiver(
    data_df_2d.iloc[100][(slice(None),slice(None),'x')].values.astype('float'),
    data_df_2d.iloc[100][(slice(None),slice(None),'y')].values.astype('float'),
    data_df_2d.iloc[100][(slice(None),slice(None),'u')].values.astype('float'),
    data_df_2d.iloc[100][(slice(None),slice(None),'v')].values.astype('float'),
    label='measured',
    color='red'
)

## 10m height

Repeat all the steps we did above, but just for a single plane at 3m

In [None]:
HEIGHTS_2D = [10]

# gather all measurements into a dataframe, isolate to the heights we care about
df = wind_field_df.join(
    turb_flux_field_df
).join(
    temp_turb_flux_field_df
).join(
    abs_hum_field_df
).join(
    adv_flux_field_df
).join(
    dryair_density_field_df
).join(
    temp_field_df
).join(
    mixing_ratio_field_df
)
data_df_2d = df[HEIGHTS_2D]

# Gather the instrument locations into a dataframe, isolate to heights we care about
instrument_loc_limited_heights_2d = instrument_loc_df[instrument_loc_df.index.get_level_values(0).isin(HEIGHTS_2D)]

# Transform the dataframe of instrument locations into a form that can be merged with the dataframe of measurements
#   transform
xxx = pd.DataFrame(instrument_loc_limited_heights_2d.unstack().unstack()).T
xxx.columns = xxx.columns.swaplevel(0,2)
xxx.columns = xxx.columns.set_names('measurement', level=2)
#   duplicate the sensor locations so we can join (duplicate) x,y,z info into the dataframe of measurements
instrument_loc_limited_heights_2d_repeated = xxx.loc[xxx.index.repeat(len(data_df_2d))]
instrument_loc_limited_heights_2d_repeated.index = data_df_2d.index
instrument_loc_limited_heights_2d_repeated
data_df_2d = data_df_2d.join(instrument_loc_limited_heights_2d_repeated)
data_df_2d

# Create a meshgrid for the interpolation and isolate the x,y,z locations of measurements
xx_2d, yy_2d = np.meshgrid(
    np.linspace(instrument_loc_limited_heights_2d.x.min(), instrument_loc_limited_heights_2d.x.max(), HORIZ_GRID_SPACING),
    np.linspace(instrument_loc_limited_heights_2d.y.min(), instrument_loc_limited_heights_2d.y.max(), HORIZ_GRID_SPACING)
)
spacing_x_2d = np.diff(xx_2d[0,:]).mean()
spacing_y_2d = np.diff(yy_2d[:,0]).mean()
points_2d = np.transpose(np.vstack((instrument_loc_limited_heights_2d.x, instrument_loc_limited_heights_2d.y)))

# Calculate interpolated fields
    # VECTOR FIELDS
    # wind velocity fields
u_interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['u'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
v_interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['v'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
w_interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['w'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
    # turb. flux fields
u_q__interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['u_h2o_'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
v_q__interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['v_h2o_'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
w_q__interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['w_h2o_'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
    # SCALAR FIELDS
rho_interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['rho'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
r_interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['r'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)
q_interp_2d = data_df_2d.loc[:, (slice(None),slice(None),['q'])].apply(
    lambda row:  interpolate.griddata(points_2d, row.values, (xx_2d, yy_2d), method='linear'),
    axis=1
)

# Create dataframe with grids of interpolated data
u_interp_2d.name = 'u'
v_interp_2d.name = 'v'
w_interp_2d.name = 'w'
u_q__interp_2d.name = 'u_q_'
v_q__interp_2d.name = 'v_q_'
w_q__interp_2d.name = 'w_q_'
rho_interp_2d.name = 'rho'
r_interp_2d.name = 'r'
q_interp_2d.name = 'q'

fields_df_2d = pd.DataFrame(u_interp_2d).join(
    v_interp_2d
).join(
    w_interp_2d
).join(
    u_q__interp_2d
).join(
    v_q__interp_2d
).join(
    w_q__interp_2d
).join(
    rho_interp_2d
).join(
    r_interp_2d
).join(
    q_interp_2d
)

# Iterate over the time series of fields and calculate the terms 
# We take the median of the gridded values.
# The apply function returns a tuple of 4 values, with each tuple contains values 
# for the following in order:
#  lateral_advection_pawu , vertical_advection_pawu ,  lateral_advection_sun , vertical_advection_sun, lateral_turb_flux_div, vertical_turb_flux_div 
advective_terms_2d_10m = fields_df_2d.apply(
    lambda row: 
    (
        # Lateral advection Paw U style
        np.nanmean(
            row['u']*row['rho']*np.gradient(row['r'], spacing_x_2d, axis=0)
            +
            row['v']*row['rho']*np.gradient(row['r'], spacing_y_2d, axis=1)
        ),
        # Lateral advection Sun style
        np.nanmean(
            row['u']*np.gradient(row['q'], spacing_x_2d, axis=0)
            +
            row['v']*np.gradient(row['q'], spacing_y_2d, axis=1)
        ),
        # Lateral turb. flux divergence
        np.nanmean(
            np.gradient(row['u_q_'], spacing_x_2d, axis=0)
            +
            np.gradient(row['v_q_'], spacing_y_2d, axis=1)
        ),
        # Lateral advection Paw U style
        np.nanmedian(
            row['u']*row['rho']*np.gradient(row['r'], spacing_x_2d, axis=0)
            +
            row['v']*row['rho']*np.gradient(row['r'], spacing_y_2d, axis=1)
        ),
        # Lateral advection Sun style
        np.nanmedian(
            row['u']*np.gradient(row['q'], spacing_x_2d, axis=0)
            +
            row['v']*np.gradient(row['q'], spacing_y_2d, axis=1)
        ),
        # Lateral turb. flux divergence
        np.nanmedian(
            np.gradient(row['u_q_'], spacing_x_2d, axis=0)
            +
            np.gradient(row['v_q_'], spacing_y_2d, axis=1)
        ),
    ),
    axis = 1
)
advective_terms_2d_10m = pd.DataFrame(
    [[a, b, c, d, e, f] for a, b, c, d, e, f in advective_terms_2d_10m.values], 
    columns=[
        'lateral_advection_pawu (mean)', 
        'lateral_advection_sun (mean)',
        'lateral_turb_flux_div (mean)', 
        'lateral_advection_pawu (median)', 
        'lateral_advection_sun (median)',
        'lateral_turb_flux_div (median)', 
    ]
)
advective_terms_2d_10m.index = fields_df_2d.index

## Average the two planes

In [None]:
advective_terms_2d_3m

In [None]:
advective_terms_2d_10m

In [None]:
advective_terms_2d = (advective_terms_2d_3m + advective_terms_2d_10m)/2
advective_terms_2d = advective_terms_2d*7

## Examine results

In [None]:
alt.Chart(
    advective_terms_2d.reset_index()
).transform_fold(
    [c for c in list(advective_terms_2d.columns) if 'turb' not in c]
).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('key:N')
).properties(width=200, height = 150) | alt.Chart(
    advective_terms_2d.reset_index()
).transform_fold(
    [c for c in list(advective_terms_2d.columns) if 'turb' in c]
).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('key:N')
).properties(width=200, height = 150)

In [None]:
alt.Chart(
    advective_terms_3d.reset_index()
).transform_fold(
    [c for c in list(advective_terms_3d.columns) if 'turb' not in c]
).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('key:N')
).properties(width=200, height = 150) | alt.Chart(
    advective_terms_3d.reset_index()
).transform_fold(
    [c for c in list(advective_terms_3d.columns) if 'turb' in c]
).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('key:N')
).properties(width=200, height = 150)

# Surface Integral Solution

## Define area of triangular prism faces

In [None]:
# Define face areas
A1 = A2 = A3 = 37 * 7 * units('m^2')
A4 = A5 = 580.2 * units('m^2')
CV_HEIGHT = 7*units('m')
VOLUME = A4*CV_HEIGHT
A1, A2, A3, A4, A5, VOLUME

## Define normal vectors to the triangular prism faces

In [None]:
# ELI'S angles
n1 = np.array([ np.cos(np.deg2rad(258.7)),   np.sin(np.deg2rad(258.7))])*units(None)
n2 = np.array([ np.cos(np.deg2rad(20.1)),   np.sin(np.deg2rad(20.1))])*units(None)
n3 = np.array([ np.cos(np.deg2rad(139.26)),   np.sin(np.deg2rad(139.26))])*units(None)

# DANNY'S angles
# n1 = np.array([ np.cos(np.deg2rad(269.5)),   np.sin(np.deg2rad(269.5))])*units(None)
# n2 = np.array([ np.cos(np.deg2rad(19.5)),   np.sin(np.deg2rad(19.5))])*units(None)
# n3 = np.array([ np.cos(np.deg2rad(143.5)),   np.sin(np.deg2rad(143.5))])*units(None)

In [None]:

instrument_loc_df = gpd.GeoDataFrame(
    instrument_loc_df,
    geometry = gpd.points_from_xy(
        instrument_loc_df.x, 
        instrument_loc_df.y, 
        instrument_loc_df.z
    ),
    crs = 'EPSG:32613'
)
instrument_loc_df = instrument_loc_df.to_crs('EPSG:4326')
instrument_loc_df

## Calculate storage term

In [None]:
storage_change_term = tidy_df[
    tidy_df.measurement.isin(['specific humidity', 'air density'])
].query("tower == 'c'").query("height <= 10").query("height >= 3").groupby(
    ['time', 'tower', 'measurement']
)[['value']].mean().reset_index() 
storage_change_term = storage_change_term.pivot_table(index='time', values='value', columns='measurement')

In [None]:
timestep = storage_change_term.index.diff()[1].seconds * units('seconds')
print(timestep)
absolute_humidity = storage_change_term['specific humidity'].values * units("g/g")  * (
    storage_change_term['air density'].values * units("kg/m^3")
)
delta_humidity = np.diff(absolute_humidity * VOLUME, prepend=np.nan)
dq_dt = delta_humidity / timestep

In [None]:
storage_change_term['absolute humidity (g/m^3)'] = absolute_humidity.to('g/m^3')
storage_change_term['delta water storage (g/s)'] = dq_dt.to('grams/second')

## Calculate advective flux

In [None]:
advective_flux_3m_ue = wind_field_df[3]['ue'].multiply(
    abs_hum_field_df[3]['ue']['q'],
    axis=0
).rename(columns={'u': 'uq', 'v': 'vq','w': 'wq',})

advective_flux_10m_ue = wind_field_df[10]['ue'].multiply(
    abs_hum_field_df[10]['ue']['q'],
    axis=0
).rename(columns={'u': 'uq', 'v': 'vq','w': 'wq',})

advective_flux_3m_uw = wind_field_df[3]['uw'].multiply(
    abs_hum_field_df[3]['uw']['q'],
    axis=0
).rename(columns={'u': 'uq', 'v': 'vq','w': 'wq',})

advective_flux_10m_uw = wind_field_df[10]['uw'].multiply(
    abs_hum_field_df[10]['uw']['q'],
    axis=0
).rename(columns={'u': 'uq', 'v': 'vq','w': 'wq',})

advective_flux_3m_d = wind_field_df[3]['d'].multiply(
    abs_hum_field_df[3]['d']['q'],
    axis=0
).rename(columns={'u': 'uq', 'v': 'vq','w': 'wq',})

advective_flux_10m_d = wind_field_df[10]['d'].multiply(
    abs_hum_field_df[10]['d']['q'],
    axis=0
).rename(columns={'u': 'uq', 'v': 'vq','w': 'wq',})

advective_flux_3m_c = wind_field_df[3]['c'].multiply(
    abs_hum_field_df[3]['c']['q'],
    axis=0
).rename(columns={'u': 'uq', 'v': 'vq','w': 'wq',})

advective_flux_10m_c = wind_field_df[10]['c'].multiply(
    abs_hum_field_df[10]['c']['q'],
    axis=0
).rename(columns={'u': 'uq', 'v': 'vq','w': 'wq',})

In [None]:
# Calculate the average u, v, and w components needed to calculate flux through each face
face1_avg_u_flux = 0.25*(
    advective_flux_3m_uw['uq'] + advective_flux_10m_uw['uq'] + 
    advective_flux_3m_d['uq'] + advective_flux_10m_d['uq']
)

face1_avg_v_flux = 0.25*(
    advective_flux_3m_uw['vq'] + advective_flux_10m_uw['vq'] + 
    advective_flux_3m_d['vq'] + advective_flux_10m_d['vq']
)

face2_avg_u_flux = 0.25*(
    advective_flux_3m_ue['uq'] + advective_flux_10m_ue['uq'] + 
    advective_flux_3m_d['uq'] + advective_flux_10m_d['uq']
)

face2_avg_v_flux = 0.25*(
    advective_flux_3m_ue['vq'] + advective_flux_10m_ue['vq'] + 
    advective_flux_3m_d['vq'] + advective_flux_10m_d['vq']
)

face3_avg_u_flux = 0.25*(
    advective_flux_3m_ue['uq'] + advective_flux_10m_ue['uq'] + 
    advective_flux_3m_uw['uq'] + advective_flux_10m_uw['uq']
)

face3_avg_v_flux = 0.25*(
    advective_flux_3m_ue['vq'] + advective_flux_10m_ue['vq'] + 
    advective_flux_3m_uw['vq'] + advective_flux_10m_uw['vq']
)

face4_avg_w_flux = 0.25*(
    advective_flux_10m_ue['wq'] + advective_flux_10m_uw['wq'] + 
    advective_flux_10m_c['wq'] + advective_flux_10m_d['wq']
)

face5_avg_w_flux = - 0.25*(
    advective_flux_3m_ue['wq'] + advective_flux_3m_uw['wq'] + 
    advective_flux_3m_c['wq'] + advective_flux_3m_d['wq']
)

# Combine the (separate) u and v components into a list of vectors for the lateral faces
face1_avg_adv_flux = np.array([
    face1_avg_u_flux,
    face1_avg_v_flux
]).T

face2_avg_adv_flux = np.array([
    face2_avg_u_flux,
    face2_avg_v_flux
]).T

face3_avg_adv_flux = np.array([
    face3_avg_u_flux,
    face3_avg_v_flux
]).T

# Project the lateral flux vectors onto the face-normal vectors
face1_projected_adv_flux = np.dot(face1_avg_adv_flux, n1.m)
face2_projected_adv_flux = np.dot(face2_avg_adv_flux, n2.m)
face3_projected_adv_flux = np.dot(face3_avg_adv_flux, n3.m)

# Calculate total lateral and vertical flux
total_lateral_adv_divergence = (
    face1_projected_adv_flux*A1.m + 
    face2_projected_adv_flux*A2.m + 
    face3_projected_adv_flux*A3.m
) / VOLUME.m

total_vertical_adv_divergence = (face4_avg_w_flux*A4 + face5_avg_w_flux*A5)  / VOLUME

## Calculate turbulent flux divergence

In [None]:
# Calculate the average u, v, and w components needed to calculate flux through each face
face1_avg_u_turb_flux = 0.25*(
    turb_flux_field_df[3]['uw']['u_h2o_'] + turb_flux_field_df[10]['uw']['u_h2o_'] + 
    turb_flux_field_df[3]['d']['u_h2o_'] + turb_flux_field_df[10]['d']['u_h2o_']
)

face1_avg_v_turb_flux = 0.25*(
    turb_flux_field_df[3]['uw']['v_h2o_'] + turb_flux_field_df[10]['uw']['v_h2o_'] + 
    turb_flux_field_df[3]['d']['v_h2o_'] + turb_flux_field_df[10]['d']['v_h2o_']
)

face2_avg_u_turb_flux = 0.25*(
    turb_flux_field_df[3]['ue']['u_h2o_'] + turb_flux_field_df[10]['ue']['u_h2o_'] + 
    turb_flux_field_df[3]['d']['u_h2o_'] + turb_flux_field_df[10]['d']['u_h2o_']
)

face2_avg_v_turb_flux = 0.25*(
    turb_flux_field_df[3]['ue']['v_h2o_'] + turb_flux_field_df[10]['ue']['v_h2o_'] + 
    turb_flux_field_df[3]['d']['v_h2o_'] + turb_flux_field_df[10]['d']['v_h2o_']
)

face3_avg_u_turb_flux = 0.25*(
    turb_flux_field_df[3]['ue']['u_h2o_'] + turb_flux_field_df[10]['ue']['u_h2o_'] + 
    turb_flux_field_df[3]['uw']['u_h2o_'] + turb_flux_field_df[10]['uw']['u_h2o_']
)

face3_avg_v_turb_flux = 0.25*(
    turb_flux_field_df[3]['ue']['v_h2o_'] + turb_flux_field_df[10]['ue']['v_h2o_'] + 
    turb_flux_field_df[3]['uw']['v_h2o_'] + turb_flux_field_df[10]['uw']['v_h2o_']
)

face4_avg_w_turb_flux = 0.25*(
    turb_flux_field_df[10]['ue']['w_h2o_'] + turb_flux_field_df[10]['uw']['w_h2o_'] + 
    turb_flux_field_df[10]['c']['w_h2o_'] + turb_flux_field_df[10]['d']['w_h2o_']
)

face5_avg_w_turb_flux = - 0.25*(
    turb_flux_field_df[3]['ue']['w_h2o_'] + turb_flux_field_df[3]['uw']['w_h2o_'] + 
    turb_flux_field_df[3]['c']['w_h2o_'] + turb_flux_field_df[3]['d']['w_h2o_']
)

# Combine the (separate) u and v components into a list of vectors for the lateral faces
face1_avg_adv_flux = np.array([
    face1_avg_u_turb_flux,
    face1_avg_v_turb_flux
]).T

face2_avg_adv_flux = np.array([
    face2_avg_u_turb_flux,
    face2_avg_v_turb_flux
]).T

face3_avg_adv_flux = np.array([
    face3_avg_u_turb_flux,
    face3_avg_v_turb_flux
]).T

# Project the lateral flux vectors onto the face-normal vectors
face1_projected_turb_flux = np.dot(face1_avg_adv_flux, n1.m)
face2_projected_turb_flux = np.dot(face2_avg_adv_flux, n2.m)
face3_projected_turb_flux = np.dot(face3_avg_adv_flux, n3.m)

# Calculate total lateral and vertical flux
total_lateral_turb_divergence = (
    face1_projected_turb_flux*A1.m + 
    face2_projected_turb_flux*A2.m + 
    face3_projected_turb_flux*A3.m
) / VOLUME.m

total_vertical_turb_divergence = (face4_avg_w_turb_flux*A4 + face5_avg_w_turb_flux*A5)  / VOLUME

## Examine results

In [None]:
((
    alt.Chart(
        pd.DataFrame(total_vertical_turb_divergence).reset_index()
    ).mark_line().encode(
        alt.X('hoursminutes(time):T'),
        alt.Y('median(w_h2o_):Q')
    )
).properties(title='Vertical Turb. Flux Divergence') | (
    alt.Chart(
        pd.DataFrame(total_lateral_turb_divergence).reset_index().assign(time=total_vertical_turb_divergence.index).rename(columns={0:'u_q_'})
    ).mark_line().encode(
        alt.X('hoursminutes(time):T'),
        alt.Y('median(u_q_):Q')
    )
).properties(title='Lateral Turb. Flux Divergence')).resolve_scale(y='shared', x='shared')

In [None]:
((
    alt.Chart(
        pd.DataFrame(total_vertical_adv_divergence).reset_index()
    ).mark_line().encode(
        alt.X('hoursminutes(time):T'),
        alt.Y('mean(wq):Q')
    )
    +
    alt.Chart(
        pd.DataFrame(total_vertical_adv_divergence).reset_index()
    ).mark_line(color='red').encode(
        alt.X('hoursminutes(time):T'),
        alt.Y('median(wq):Q')
    )
).properties(title='Vertical Advection') | (
    alt.Chart(
        pd.DataFrame(total_lateral_adv_divergence).reset_index().assign(time=total_vertical_adv_divergence.index).rename(columns={0:'uq'})
    ).mark_line().encode(
        alt.X('hoursminutes(time):T'),
        alt.Y('mean(uq):Q')
    )
    +
    alt.Chart(
        pd.DataFrame(total_lateral_adv_divergence).reset_index().assign(time=total_vertical_adv_divergence.index).rename(columns={0:'uq'})
    ).mark_line(color='red').encode(
        alt.X('hoursminutes(time):T'),
        alt.Y('median(uq):Q')
    )
).properties(title='Lateral Advection')).resolve_scale(y='shared', x='shared')

# Perform planar fitting?

## Apply multiple algorithms

In [None]:
from sublimpy import extrautils

#### Plot original w (planar fitted in preprocessing)

In [None]:
src = tidy_df[tidy_df.measurement == 'w'].query("tower == 'c'").set_index('time')
vertical_velocities_normal_planar_fit_chart = alt.Chart(src[src.index.isin(upvalley_wind_times)].reset_index()).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('height:O').scale(scheme='rainbow')
).properties(width=300, height = 150) | alt.Chart(src[src.index.isin(downvalley_wind_times)].reset_index()).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('height:O').scale(scheme='rainbow')
).properties(width=300, height = 150)
vertical_velocities_normal_planar_fit_chart

#### Planar fit per month, split between downvalley, upvalley, and otherwise winds

In [None]:
planar_fitted_valleydir_dfs = []
variable_sets = [
    # ('u_1m_c', 'v_1m_c', 'w_1m_c'),
    ('u_2m_c', 'v_2m_c', 'w_2m_c'),
    ('u_3m_c',   'v_3m_c',   'w_3m_c'),
    ('u_5m_c',   'v_5m_c',   'w_5m_c'),
    ('u_10m_c',   'v_10m_c',   'w_10m_c'),
    ('u_15m_c',   'v_15m_c',   'w_15m_c'),
    ('u_20m_c',   'v_20m_c',   'w_20m_c'),
    
    # ('u_1m_uw', 'v_1m_uw', 'w_1m_uw'),
    # ('u_2m_uw', 'v_2m_uw', 'w_2m_uw'),
    # ('u_2_5m_uw', 'v_2_5m_uw', 'w_2_5m_uw'),
    ('u_3m_uw',   'v_3m_uw',   'w_3m_uw'),
    ('u_10m_uw',   'v_10m_uw',   'w_10m_uw'),
    
    # ('u_1m_ue', 'v_1m_ue', 'w_1m_ue'),
    # ('u_2m_ue', 'v_2m_ue', 'w_2m_ue'),
    ('u_3m_ue',   'v_3m_ue',   'w_3m_ue'),
    ('u_10m_ue',   'v_10m_ue',   'w_10m_ue'),

    # ('u_1m_d', 'v_1m_d', 'w_1m_d'),
    # ('u_2m_d', 'v_2m_d', 'w_2m_d'),
    ('u_3m_d',   'v_3m_d',   'w_3m_d'),
    ('u_10m_d',   'v_10m_d',   'w_10m_d'),
]
VARIABLE_NAMES = list(np.array(variable_sets).flatten())
for month,year in [
    (11,2022),
    (12,2022),
    (1,2023),
    (2,2023),
    (3,2023),
    (4,2023),
    (5,2023),
    (6,2023),
]:
    for u_VAR, v_VAR, w_VAR in variable_sets:
        src = tidy_df[tidy_df.variable.isin([u_VAR, v_VAR, w_VAR])].pivot(
            index='time',
            columns='variable',
            values='value'
        )
        src_upvalley = src[src.index.isin(upvalley_wind_times)]
        src_downvalley = src[src.index.isin(downvalley_wind_times)]
        src_otherwise = src[
            (~src.index.isin(upvalley_wind_times))
            & (~src.index.isin(downvalley_wind_times))
        ]
        
        # we are repeating all this thrice...
        if len(src_upvalley.dropna()) > 0:
            src_upvalley = src_upvalley[src_upvalley.index.month == month]
            src_upvalley = src_upvalley[src_upvalley.index.year == year]
            u,v,w = extrautils.calculate_and_apply_planar_fit(
            src_upvalley[u_VAR], src_upvalley[v_VAR], src_upvalley[w_VAR]
            )
            src_upvalley[u_VAR] = u
            src_upvalley[v_VAR] = v
            src_upvalley[w_VAR] = w
            src_upvalley = src_upvalley.melt(ignore_index=False)
            src_upvalley['height'] = src_upvalley.variable.apply(tidy._height_from_variable_name)
            src_upvalley['measurement'] = src_upvalley.variable.apply(tidy._measurement_from_variable_name)
            src_upvalley['tower'] = src_upvalley.variable.apply(tidy._tower_from_variable_name)
            planar_fitted_valleydir_dfs.append(src_upvalley)

        # we are repeating all this thrice...
        if len(src_downvalley.dropna()) > 0:
            src_downvalley = src_downvalley[src_downvalley.index.month == month]
            src_downvalley = src_downvalley[src_downvalley.index.year == year]
            u,v,w = extrautils.calculate_and_apply_planar_fit(
            src_downvalley[u_VAR], src_downvalley[v_VAR], src_downvalley[w_VAR]
            )
            src_downvalley[u_VAR] = u
            src_downvalley[v_VAR] = v
            src_downvalley[w_VAR] = w
            src_downvalley = src_downvalley.melt(ignore_index=False)
            src_downvalley['height'] = src_downvalley.variable.apply(tidy._height_from_variable_name)
            src_downvalley['measurement'] = src_downvalley.variable.apply(tidy._measurement_from_variable_name)
            src_downvalley['tower'] = src_downvalley.variable.apply(tidy._tower_from_variable_name)
            planar_fitted_valleydir_dfs.append(src_downvalley)

        # we are repeating all this thrice...
        if len(src_otherwise.dropna()) > 0:
            src_otherwise = src_otherwise[src_otherwise.index.month == month]
            src_otherwise = src_otherwise[src_otherwise.index.year == year]
            u,v,w = extrautils.calculate_and_apply_planar_fit(
            src_otherwise[u_VAR], src_otherwise[v_VAR], src_otherwise[w_VAR]
            )
            src_otherwise[u_VAR] = u
            src_otherwise[v_VAR] = v
            src_otherwise[w_VAR] = w
            src_otherwise = src_otherwise.melt(ignore_index=False)
            src_otherwise['height'] = src_otherwise.variable.apply(tidy._height_from_variable_name)
            src_otherwise['measurement'] = src_otherwise.variable.apply(tidy._measurement_from_variable_name)
            src_otherwise['tower'] = src_otherwise.variable.apply(tidy._tower_from_variable_name)
            planar_fitted_valleydir_dfs.append(src_otherwise)

#### Planar fit per month, split into 30˚ sectors

In [None]:
calculated_wind_dir = 270 -np.rad2deg(np.arctan2(
    tidy_df.query("variable == 'v_3m_c'").set_index('time').value,
    tidy_df.query("variable == 'u_3m_c'").set_index('time').value,
))
calculated_wind_dir = calculated_wind_dir.apply(lambda v: v - 360 if v > 360 else v)

In [None]:
calculated_wind_dir.loc[:'20221115'].plot()
tidy_df.query("variable == 'dir_3m_c'").set_index('time').loc[:'20221115'].value.plot()

In [None]:
wind_dir_bins = pd.cut(
    tidy_df.query("variable == 'dir_3m_c'").set_index('time')['value'],
    bins = np.arange(0, 390, 30)
)
grouped_indices = wind_dir_bins.groupby(wind_dir_bins).apply(lambda x: x.index.tolist())
for interval, timestamps in grouped_indices.items():
    print(interval, '\t', len(timestamps))

In [None]:
#### Planar fit per month, split between downvalley, upvalley, and otherwise winds
planar_fitted_sector30_dfs = []
variable_sets = [
    # ('u_1m_c', 'v_1m_c', 'w_1m_c'),
    ('u_2m_c', 'v_2m_c', 'w_2m_c'),
    ('u_3m_c',   'v_3m_c',   'w_3m_c'),
    ('u_5m_c',   'v_5m_c',   'w_5m_c'),
    ('u_10m_c',   'v_10m_c',   'w_10m_c'),
    ('u_15m_c',   'v_15m_c',   'w_15m_c'),
    ('u_20m_c',   'v_20m_c',   'w_20m_c'),
    
    # ('u_1m_uw', 'v_1m_uw', 'w_1m_uw'),
    # ('u_2m_uw', 'v_2m_uw', 'w_2m_uw'),
    # ('u_2_5m_uw', 'v_2_5m_uw', 'w_2_5m_uw'),
    ('u_3m_uw',   'v_3m_uw',   'w_3m_uw'),
    ('u_10m_uw',   'v_10m_uw',   'w_10m_uw'),
    
    # ('u_1m_ue', 'v_1m_ue', 'w_1m_ue'),
    # ('u_2m_ue', 'v_2m_ue', 'w_2m_ue'),
    ('u_3m_ue',   'v_3m_ue',   'w_3m_ue'),
    ('u_10m_ue',   'v_10m_ue',   'w_10m_ue'),

    # ('u_1m_d', 'v_1m_d', 'w_1m_d'),
    # ('u_2m_d', 'v_2m_d', 'w_2m_d'),
    ('u_3m_d',   'v_3m_d',   'w_3m_d'),
    ('u_10m_d',   'v_10m_d',   'w_10m_d'),
]
VARIABLE_NAMES = list(np.array(variable_sets).flatten())
for month,year in [
    (11,2022),
    (12,2022),
    (1,2023),
    (2,2023),
    (3,2023),
    (4,2023),
    (5,2023),
    (6,2023),
]:
    for u_VAR, v_VAR, w_VAR in variable_sets:
        for interval, timestamps in grouped_indices.items():
            src = tidy_df[tidy_df.variable.isin([u_VAR, v_VAR, w_VAR])].pivot(
                index='time',
                columns='variable',
                values='value'
            )
            src = src[src.index.isin(timestamps)]
            src = src[src.index.month == month]
            src = src[src.index.year == year]
            if len(src.dropna()) > 0:
                u,v,w = extrautils.calculate_and_apply_planar_fit(
                    src[u_VAR], src[v_VAR], src[w_VAR]
                )
                src[u_VAR] = u
                src[v_VAR] = v
                src[w_VAR] = w
                src = src.melt(ignore_index=False)
                src['height'] = src.variable.apply(tidy._height_from_variable_name)
                src['measurement'] = src.variable.apply(tidy._measurement_from_variable_name)
                src['tower'] = src.variable.apply(tidy._tower_from_variable_name)
                planar_fitted_sector30_dfs.append(src)

#### Planar fit per month, split into 60˚ sectors

In [None]:
np.arange(0, 420, 60)

In [None]:
wind_dir_bins = pd.cut(
    tidy_df.query("variable == 'dir_3m_c'").set_index('time')['value'],
    bins = np.arange(0, 420, 60)
)
grouped_indices = wind_dir_bins.groupby(wind_dir_bins).apply(lambda x: x.index.tolist())
for interval, timestamps in grouped_indices.items():
    print(interval, '\t', len(timestamps))

In [None]:
#### Planar fit per month, split between downvalley, upvalley, and otherwise winds
planar_fitted_sector60_dfs = []
variable_sets = [
    # ('u_1m_c', 'v_1m_c', 'w_1m_c'),
    ('u_2m_c', 'v_2m_c', 'w_2m_c'),
    ('u_3m_c',   'v_3m_c',   'w_3m_c'),
    ('u_5m_c',   'v_5m_c',   'w_5m_c'),
    ('u_10m_c',   'v_10m_c',   'w_10m_c'),
    ('u_15m_c',   'v_15m_c',   'w_15m_c'),
    ('u_20m_c',   'v_20m_c',   'w_20m_c'),
    
    # ('u_1m_uw', 'v_1m_uw', 'w_1m_uw'),
    # ('u_2m_uw', 'v_2m_uw', 'w_2m_uw'),
    # ('u_2_5m_uw', 'v_2_5m_uw', 'w_2_5m_uw'),
    ('u_3m_uw',   'v_3m_uw',   'w_3m_uw'),
    ('u_10m_uw',   'v_10m_uw',   'w_10m_uw'),
    
    # ('u_1m_ue', 'v_1m_ue', 'w_1m_ue'),
    # ('u_2m_ue', 'v_2m_ue', 'w_2m_ue'),
    ('u_3m_ue',   'v_3m_ue',   'w_3m_ue'),
    ('u_10m_ue',   'v_10m_ue',   'w_10m_ue'),

    # ('u_1m_d', 'v_1m_d', 'w_1m_d'),
    # ('u_2m_d', 'v_2m_d', 'w_2m_d'),
    ('u_3m_d',   'v_3m_d',   'w_3m_d'),
    ('u_10m_d',   'v_10m_d',   'w_10m_d'),
]
VARIABLE_NAMES = list(np.array(variable_sets).flatten())
for month,year in [
    (11,2022),
    (12,2022),
    (1,2023),
    (2,2023),
    (3,2023),
    (4,2023),
    (5,2023),
    (6,2023),
]:
    for u_VAR, v_VAR, w_VAR in variable_sets:
        for interval, timestamps in grouped_indices.items():
            src = tidy_df[tidy_df.variable.isin([u_VAR, v_VAR, w_VAR])].pivot(
                index='time',
                columns='variable',
                values='value'
            )
            src = src[src.index.isin(timestamps)]
            src = src[src.index.month == month]
            src = src[src.index.year == year]
            if len(src.dropna()) > 0:
                u,v,w = extrautils.calculate_and_apply_planar_fit(
                    src[u_VAR], src[v_VAR], src[w_VAR]
                )
                src[u_VAR] = u
                src[v_VAR] = v
                src[w_VAR] = w
                src = src.melt(ignore_index=False)
                src['height'] = src.variable.apply(tidy._height_from_variable_name)
                src['measurement'] = src.variable.apply(tidy._measurement_from_variable_name)
                src['tower'] = src.variable.apply(tidy._tower_from_variable_name)
                planar_fitted_sector60_dfs.append(src)

#### Planar fit per month, split into 10˚ sectors

In [None]:
wind_dir_bins = pd.cut(
    tidy_df.query("variable == 'dir_3m_c'").set_index('time')['value'],
    bins = np.arange(0, 370, 10)
)
grouped_indices = wind_dir_bins.groupby(wind_dir_bins).apply(lambda x: x.index.tolist())
for interval, timestamps in grouped_indices.items():
    print(interval, '\t', len(timestamps))

In [None]:
#### Planar fit per month, split between downvalley, upvalley, and otherwise winds
planar_fitted_sector10_dfs = []
variable_sets = [
    # ('u_1m_c', 'v_1m_c', 'w_1m_c'),
    ('u_2m_c', 'v_2m_c', 'w_2m_c'),
    ('u_3m_c',   'v_3m_c',   'w_3m_c'),
    ('u_5m_c',   'v_5m_c',   'w_5m_c'),
    ('u_10m_c',   'v_10m_c',   'w_10m_c'),
    ('u_15m_c',   'v_15m_c',   'w_15m_c'),
    ('u_20m_c',   'v_20m_c',   'w_20m_c'),
    
    # ('u_1m_uw', 'v_1m_uw', 'w_1m_uw'),
    # ('u_2m_uw', 'v_2m_uw', 'w_2m_uw'),
    # ('u_2_5m_uw', 'v_2_5m_uw', 'w_2_5m_uw'),
    ('u_3m_uw',   'v_3m_uw',   'w_3m_uw'),
    ('u_10m_uw',   'v_10m_uw',   'w_10m_uw'),
    
    # ('u_1m_ue', 'v_1m_ue', 'w_1m_ue'),
    # ('u_2m_ue', 'v_2m_ue', 'w_2m_ue'),
    ('u_3m_ue',   'v_3m_ue',   'w_3m_ue'),
    ('u_10m_ue',   'v_10m_ue',   'w_10m_ue'),

    # ('u_1m_d', 'v_1m_d', 'w_1m_d'),
    # ('u_2m_d', 'v_2m_d', 'w_2m_d'),
    ('u_3m_d',   'v_3m_d',   'w_3m_d'),
    ('u_10m_d',   'v_10m_d',   'w_10m_d'),
]
VARIABLE_NAMES = list(np.array(variable_sets).flatten())
for month,year in [
    (11,2022),
    (12,2022),
    (1,2023),
    (2,2023),
    (3,2023),
    (4,2023),
    (5,2023),
    (6,2023),
]:
    for u_VAR, v_VAR, w_VAR in variable_sets:
        for interval, timestamps in grouped_indices.items():
            src = tidy_df[tidy_df.variable.isin([u_VAR, v_VAR, w_VAR])].pivot(
                index='time',
                columns='variable',
                values='value'
            )
            #############################################
            ## EXPERIMENTAL
            #############################################
            src = src[src.index.isin(winds_times_1to2)]
            #############################################
            #############################################
            src = src[src.index.isin(timestamps)]
            src = src[src.index.month == month]
            src = src[src.index.year == year]
            if len(src.dropna()) > 0:
                u,v,w = extrautils.calculate_and_apply_planar_fit(
                    src[u_VAR], src[v_VAR], src[w_VAR]
                )
                src[u_VAR] = u
                src[v_VAR] = v
                src[w_VAR] = w
                src = src.melt(ignore_index=False)
                src['height'] = src.variable.apply(tidy._height_from_variable_name)
                src['measurement'] = src.variable.apply(tidy._measurement_from_variable_name)
                src['tower'] = src.variable.apply(tidy._tower_from_variable_name)
                planar_fitted_sector10_dfs.append(src)

In [None]:
planar_fitted_valleydir_df = pd.concat(planar_fitted_valleydir_dfs).reset_index()
planar_fitted_sector30_df = pd.concat(planar_fitted_sector30_dfs).reset_index()
planar_fitted_sector60_df = pd.concat(planar_fitted_sector60_dfs).reset_index()
planar_fitted_sector10_df = pd.concat(planar_fitted_sector10_dfs).reset_index()

In [None]:
src = planar_fitted_valleydir_df[planar_fitted_valleydir_df.measurement == 'w'].query("tower == 'c'").set_index('time')

vertical_velocities_valleydir_planar_fit_chart = alt.Chart(
    src[src.index.isin(upvalley_wind_times)].reset_index()
).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('height:O').scale(scheme='rainbow')
).properties(width=300, height = 150) | alt.Chart(src[src.index.isin(downvalley_wind_times)].reset_index()).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('height:O').scale(scheme='rainbow')
).properties(width=300, height = 150)

In [None]:
src = planar_fitted_sector30_df[planar_fitted_sector30_df.measurement == 'w'].query("tower == 'c'").set_index('time')

vertical_velocities_sector30_planar_fit_chart = alt.Chart(
    src[src.index.isin(upvalley_wind_times)].reset_index()
).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('height:O').scale(scheme='rainbow')
).properties(width=300, height = 150) | alt.Chart(src[src.index.isin(downvalley_wind_times)].reset_index()).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('height:O').scale(scheme='rainbow')
).properties(width=300, height = 150)

In [None]:
src = planar_fitted_sector60_df[planar_fitted_sector60_df.measurement == 'w'].query("tower == 'c'").set_index('time')

vertical_velocities_sector60_planar_fit_chart = alt.Chart(
    src[src.index.isin(upvalley_wind_times)].reset_index()
).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('height:O').scale(scheme='rainbow')
).properties(width=300, height = 150) | alt.Chart(src[src.index.isin(downvalley_wind_times)].reset_index()).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('height:O').scale(scheme='rainbow')
).properties(width=300, height = 150)

In [None]:
src = planar_fitted_sector10_df[planar_fitted_sector10_df.measurement == 'w'].query("tower == 'c'").set_index('time')

vertical_velocities_sector10_planar_fit_chart = alt.Chart(
    src[src.index.isin(upvalley_wind_times)].reset_index()
).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('height:O').scale(scheme='rainbow')
).properties(width=300, height = 150) | alt.Chart(src[src.index.isin(downvalley_wind_times)].reset_index()).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('median(value):Q'),
    alt.Color('height:O').scale(scheme='rainbow')
).properties(width=300, height = 150)

In [None]:
(
    vertical_velocities_normal_planar_fit_chart.properties(title='Planar fit: Monthly').resolve_scale(
        x='shared',
        y='shared',
        color='shared'
    )
    & 
    vertical_velocities_valleydir_planar_fit_chart.properties(title='Planar fit: Monthly + 3 sectors').resolve_scale(
        x='shared',
        y='shared',
        color='shared'
    )
    & 
    vertical_velocities_sector30_planar_fit_chart.properties(title='Planar fit: Monthly + 12 sectors').resolve_scale(
        x='shared',
        y='shared',
        color='shared'
    )
    & 
    vertical_velocities_sector60_planar_fit_chart.properties(title='Planar fit: Monthly + 6 sectors').resolve_scale(
        x='shared',
        y='shared',
        color='shared'
    )
    & 
    vertical_velocities_sector10_planar_fit_chart.properties(title='Planar fit: Monthly + 36 sectors').resolve_scale(
        x='shared',
        y='shared',
        color='shared'
    )
).resolve_scale(
    x='shared',
    y='shared',
    color='shared'
)

## Create datasets with different planar fits

In [None]:
# get the original w variables that we'll add back in later 
planar_fitted_original_df = tidy_df[tidy_df.variable.isin(
    planar_fitted_valleydir_df.variable.unique()
)]

# drop variables that we have sectorial-planar-fitted
# THIS COMMAND NEEDS TO BE RUN SECOND
tidy_df = tidy_df[~tidy_df.variable.isin(
    planar_fitted_valleydir_df.variable.unique()
)]


In [None]:
tidy_df_pf_valleydir = pd.concat([
    tidy_df,
    planar_fitted_valleydir_df
])

tidy_df_pf_sector30 = pd.concat([
    tidy_df,
    planar_fitted_sector30_df
])

tidy_df_pf_sector60 = pd.concat([
    tidy_df,
    planar_fitted_sector60_df
])

tidy_df_pf_sector10 = pd.concat([
    tidy_df,
    planar_fitted_sector10_df
])

# THIS COMMAND NEEDS TO RUN LAST
# so we dont add multiple versions of w variables to dataframes created above
tidy_df = pd.concat([
    tidy_df,
    planar_fitted_original_df
])

# 1D continuous solution for vertical advection (new)

## Collect mixing ratio data we need

In [None]:
## Isolate humidity measurements (from hygrometers)
mixing_ratio_profile_df = tidy_df[tidy_df.measurement.isin(['mixing ratio'])]
mixing_ratio_profile_df.measurement = 'q_hygr'
mixing_ratio_profile_df = mixing_ratio_profile_df.pivot_table(
        index='time', columns=['height', 'tower', 'measurement'], values='value'
    )

# Combine with snow depth data
mixing_ratio_profile_df = mixing_ratio_profile_df.melt(ignore_index=False).join(
    tidy_df[tidy_df.variable == 'SnowDepth_c'].set_index('time')['value'].rename('snow depth')
)

# Calculate instrument height above snow surface
mixing_ratio_profile_df['instrument_height'] = mixing_ratio_profile_df['height'] - mixing_ratio_profile_df['snow depth']

# Filter out measurements that are buried in the snow
mixing_ratio_profile_df = mixing_ratio_profile_df[
    (mixing_ratio_profile_df['instrument_height'] > 0)
    | (mixing_ratio_profile_df['height'] == 0)
]

mixing_ratio_profile_df['instrument_height'] = mixing_ratio_profile_df['instrument_height'].where(
    mixing_ratio_profile_df['instrument_height'] > 0,
    0
)

## Define function to calculate gradient for a given set of heights and mixingratio measurements (for single timestamp)

In [None]:
from sublimpy.gradients import LogPolynomialWithRoughness

def calculate_mixingratio_gradient(
        one_ts_groupby,
        height4estimate,
        Z0Q = 0.005
    ):
    """ 
    Calculates mixing ratio gradient from a dataframe with datetimeindex,
    and columns `instrument_height`  and `value`, which holds mixing ratio
    values. These calculations are done by fitting log-polynomial curve to 
    measurements of z and X where X is some measured variable.
    We include a boundary wall condition, applying measured X at
    roughness height (T=T_s at z=z0). We also adjust for snow depth 
    in our calculations.
    """    
    heights = one_ts_groupby.sort_values('instrument_height')['instrument_height']
    values = one_ts_groupby.sort_values('instrument_height')['value']

    heights = heights.replace(0,Z0Q)

    # calculate fitted loglinear parameters
    params = LogPolynomialWithRoughness.fit_function(
        values,
        heights
    )
    a = params[0]
    b = params[1]
    gradient = LogPolynomialWithRoughness.gradient_single_component(height4estimate, a, b)
    return gradient, a, b

## Test the function out

In [None]:
gradient, a, b =  calculate_mixingratio_gradient(
    mixing_ratio_profile_df.groupby('time').get_group('2023-05-05 12:30:00'),
    3   
)
heights = mixing_ratio_profile_df.groupby('time').get_group('2023-05-05 12:30:00').instrument_height
values = mixing_ratio_profile_df.groupby('time').get_group('2023-05-05 12:30:00').value
c = values[0]
heights_fit = pd.Series(np.linspace(0,20,100))
values_fit = heights_fit.apply( lambda z:
    LogPolynomialWithRoughness.function(z, a, b, c)
)
plt.plot(values_fit, heights_fit)
plt.scatter(values, heights)

## Calculate $d\sigma/dz$ for all timestamps, tower measurements at all heights >= 2

In [None]:
def get_dsigma_dz_values_for_height(H):
    dsigma_dz = mixing_ratio_profile_df.reset_index().swifter.groupby('time').apply(
        lambda df: calculate_mixingratio_gradient(df, H)
    )
    dsigma_dz = pd.DataFrame(dsigma_dz.tolist()).rename(columns={
        0: 'dsigma_dz',
        1: 'fit_param_a',
        2: 'fit_param_b',
    }).assign(time = dsigma_dz.index).set_index('time')

    return dsigma_dz.drop_duplicates()

def add_variables_to_dsigma_dz_values(H, dsigma_dz_values, tidy_df_specific_pf):
    return dsigma_dz_values.join(
        tidy_df_specific_pf.query(f"variable == 'w_{H}m_c'").set_index('time')['value'].rename('w')
    ).join(
        tidy_df_specific_pf.query(f"variable == 'airdensity_{H}m_c'").set_index('time')['value'].rename('rho_d')
    ).join(
        tidy_df_specific_pf.query(f"variable == 'w_h2o__{H}m_c_gapfill'").set_index('time')['value'].rename('w_h2o__')
    )

def calculate_transport_terms_with_dsigma_dz(H, dsigma_dz_values):
    dsigma_dz_values['vertical_advection'] = dsigma_dz_values['w']*dsigma_dz_values['rho_d']*dsigma_dz_values['dsigma_dz']*1000
    dsigma_dz_values['w_h2o__corrected'] = (dsigma_dz_values['w_h2o__'] + dsigma_dz_values['vertical_advection']*H)
    return dsigma_dz_values

def get_advection_and_cumsum_df(H, tidy_df_specific_pf):
    df = get_dsigma_dz_values_for_height(H)
    df = add_variables_to_dsigma_dz_values(H, df, tidy_df_specific_pf) 
    df = calculate_transport_terms_with_dsigma_dz(H, df)

    cumsum_df = df.loc['20221130': '20230509']
    cumsum_df['w_h2o___cumsum'] = cumsum_df['w_h2o__'].cumsum()
    cumsum_df['w_h2o__corrected_cumsum'] = cumsum_df['w_h2o__corrected'].cumsum()
    return df, cumsum_df


In [None]:
### 3 sector
dsigma_dz_values_for_height_pf_og = {
    2:      get_advection_and_cumsum_df(2,     tidy_df),
    3:      get_advection_and_cumsum_df(3,     tidy_df),
    5:      get_advection_and_cumsum_df(5,     tidy_df),
    10:     get_advection_and_cumsum_df(10,    tidy_df),
    15:     get_advection_and_cumsum_df(15,    tidy_df),
    20:     get_advection_and_cumsum_df(20,    tidy_df),
}
vertical_advection_estimates_pf_og = pd.concat([
    df[0].assign(height = H) for (H, df) in dsigma_dz_values_for_height_pf_og.items()
])
cumsub_vertical_advection_correction_estimates_pf_og = pd.concat([
    df[1].assign(height = H) for (H, df) in dsigma_dz_values_for_height_pf_og.items()
])

# ### 3 sector
# dsigma_dz_values_for_height_pf_valleydir = {
#     2:      get_advection_and_cumsum_df(2,     tidy_df_pf_valleydir),
#     3:      get_advection_and_cumsum_df(3,     tidy_df_pf_valleydir),
#     5:      get_advection_and_cumsum_df(5,     tidy_df_pf_valleydir),
#     10:     get_advection_and_cumsum_df(10,    tidy_df_pf_valleydir),
#     15:     get_advection_and_cumsum_df(15,    tidy_df_pf_valleydir),
#     20:     get_advection_and_cumsum_df(20,    tidy_df_pf_valleydir),
# }
# vertical_advection_estimates_pf_valleydir = pd.concat([
#     df[0].assign(height = H) for (H, df) in dsigma_dz_values_for_height_pf_valleydir.items()
# ])
# cumsub_vertical_advection_correction_estimates_pf_valleydir = pd.concat([
#     df[1].assign(height = H) for (H, df) in dsigma_dz_values_for_height_pf_valleydir.items()
# ])

# ### 12 sector
# dsigma_dz_values_for_height_pf_sector30 = {
#     2:      get_advection_and_cumsum_df(2,     tidy_df_pf_sector30),
#     3:      get_advection_and_cumsum_df(3,     tidy_df_pf_sector30),
#     5:      get_advection_and_cumsum_df(5,     tidy_df_pf_sector30),
#     10:     get_advection_and_cumsum_df(10,    tidy_df_pf_sector30),
#     15:     get_advection_and_cumsum_df(15,    tidy_df_pf_sector30),
#     20:     get_advection_and_cumsum_df(20,    tidy_df_pf_sector30),
# }
# vertical_advection_estimates_pf_sector30 = pd.concat([
#     df[0].assign(height = H) for (H, df) in dsigma_dz_values_for_height_pf_sector30.items()
# ])
# cumsub_vertical_advection_correction_estimates_pf_sector30 = pd.concat([
#     df[1].assign(height = H) for (H, df) in dsigma_dz_values_for_height_pf_sector30.items()
# ])

# ### 6 sector
# dsigma_dz_values_for_height_pf_sector60 = {
#     2:      get_advection_and_cumsum_df(2,     tidy_df_pf_sector60),
#     3:      get_advection_and_cumsum_df(3,     tidy_df_pf_sector60),
#     5:      get_advection_and_cumsum_df(5,     tidy_df_pf_sector60),
#     10:     get_advection_and_cumsum_df(10,    tidy_df_pf_sector60),
#     15:     get_advection_and_cumsum_df(15,    tidy_df_pf_sector60),
#     20:     get_advection_and_cumsum_df(20,    tidy_df_pf_sector60),
# }
# vertical_advection_estimates_pf_sector60 = pd.concat([
#     df[0].assign(height = H) for (H, df) in dsigma_dz_values_for_height_pf_sector60.items()
# ])
# cumsub_vertical_advection_correction_estimates_pf_sector60 = pd.concat([
#     df[1].assign(height = H) for (H, df) in dsigma_dz_values_for_height_pf_sector60.items()
# ])

# ### 36 sector
# dsigma_dz_values_for_height_pf_sector10 = {
#     2:      get_advection_and_cumsum_df(2,     tidy_df_pf_sector10),
#     3:      get_advection_and_cumsum_df(3,     tidy_df_pf_sector10),
#     5:      get_advection_and_cumsum_df(5,     tidy_df_pf_sector10),
#     10:     get_advection_and_cumsum_df(10,    tidy_df_pf_sector10),
#     15:     get_advection_and_cumsum_df(15,    tidy_df_pf_sector10),
#     20:     get_advection_and_cumsum_df(20,    tidy_df_pf_sector10),
# }
# vertical_advection_estimates_pf_sector10 = pd.concat([
#     df[0].assign(height = H) for (H, df) in dsigma_dz_values_for_height_pf_sector10.items()
# ])
# cumsub_vertical_advection_correction_estimates_pf_sector10 = pd.concat([
#     df[1].assign(height = H) for (H, df) in dsigma_dz_values_for_height_pf_sector10.items()
# ])

In [None]:
def get_casestudy_and_seasonal_charts(
        vert_adv_estimates_df,
        cumsub_estimates_df,
        casestudy_date = '20230505'
):
    flux_div = pd.DataFrame(
        (
            tidy_df.query(f"variable == 'w_h2o__10m_c_raw'").set_index('time').loc[casestudy_date]['value'] - \
            tidy_df.query(f"variable == 'w_h2o__3m_c_raw'").set_index('time').loc[casestudy_date]['value']
        ).rename('flux div') / 7
    ).reset_index()

    casestudy_chart = alt.Chart(
        flux_div
    ).transform_window(
        rolling_avg = 'mean(flux div)',
        frame = [-3, 3]
    ).mark_line(color='black').encode(
        alt.X('time:T'),
        alt.Y('rolling_avg:Q'),
    ) +\
    alt.Chart(
        vert_adv_estimates_df.loc[casestudy_date].reset_index()
    ).transform_window(
        rolling_avg = 'mean(vertical_advection)',
        frame = [-3, 3]
    ).mark_line().encode(
        alt.X('time:T'),
        alt.Y('rolling_avg:Q').scale(domain=[-0.003, 0.003]),
        alt.Color('height:O').scale(scheme='rainbow')
    ).properties(
        width=200,
        height=200
    )

    seasonal_sub_chart = alt.Chart(
        (cumsub_estimates_df.groupby([
        pd.Grouper(freq='24H'), 'height'
        ]).max() * 1.8).reset_index()
    ).transform_fold([
        'w_h2o___cumsum', 'w_h2o__corrected_cumsum'
    ]).mark_point(size=50).encode(
        alt.Y('height:N').sort('-y'),
        alt.X('max(value):Q').scale(domain=[20,42]),
        alt.Shape('key:N'),
        alt.Color('key:N')
    ).properties(
        width=200, height=200
    )

    seasonal_w_chart_upvalley = alt.Chart(
        cumsub_estimates_df[
            cumsub_estimates_df.index.isin(upvalley_wind_times)
        ].reset_index()
    ).mark_line().encode(
        alt.X('hours(time):T'),
        alt.Y('median(w):Q').scale(domain=[-0.05,0.05]),
        alt.Color('height:O').scale(scheme='rainbow')
    ).properties(width=200, height=200)
    seasonal_w_chart_downvalley = alt.Chart(
        cumsub_estimates_df[
            cumsub_estimates_df.index.isin(downvalley_wind_times)
        ].reset_index()
    ).mark_line().encode(
        alt.X('hours(time):T'),
        alt.Y('median(w):Q').scale(domain=[-0.05,0.05]),
        alt.Color('height:O').scale(scheme='rainbow')
    ).properties(width=200, height=200)

    return (
        seasonal_sub_chart & 
        # casestudy_chart & 
        seasonal_w_chart_upvalley & seasonal_w_chart_downvalley
    ).resolve_scale(shape='independent', color='independent')

In [None]:
get_casestudy_and_seasonal_charts(
    vertical_advection_estimates_pf_og,
    cumsub_vertical_advection_correction_estimates_pf_og,
    casestudy_date = '20230505'
).properties(title='Monthly planar fit')

In [None]:
(get_casestudy_and_seasonal_charts(
    vertical_advection_estimates_pf_og,
    cumsub_vertical_advection_correction_estimates_pf_og,
    casestudy_date = '20230505'
).properties(title='Monthly planar fit') |\
get_casestudy_and_seasonal_charts(
    vertical_advection_estimates_pf_valleydir,
    cumsub_vertical_advection_correction_estimates_pf_valleydir,
    casestudy_date = '20230505'
).properties(title='Monthly planar fit + 3 sectors') |\
get_casestudy_and_seasonal_charts(
    vertical_advection_estimates_pf_sector60,
    cumsub_vertical_advection_correction_estimates_pf_sector60,
    casestudy_date = '20230505'
).properties(title='Monthly planar fit + 6 sectors') |\
get_casestudy_and_seasonal_charts(
    vertical_advection_estimates_pf_sector30,
    cumsub_vertical_advection_correction_estimates_pf_sector30,
    casestudy_date = '20230505'
).properties(title='Monthly planar fit + 12 sectors') |\
get_casestudy_and_seasonal_charts(
    vertical_advection_estimates_pf_sector10,
    cumsub_vertical_advection_correction_estimates_pf_sector10,
    casestudy_date = '20230505'
).properties(title='Monthly planar fit + 36 sectors')).configure_legend(orient='top')

# 1D gradient solutions for multiple terms

## Look at relative snow depths

In [None]:
kps_snowdepth_df = tidy_df.query("variable == 'SnowDepth_c'").set_index('time')
annex_snowdepth_df = annex_snowdepth_ds.to_dataframe()#.loc[kps_snowdepth_df.index.min(), kps_snowdepth_df.index.max()]
annex_snowdepth_df = annex_snowdepth_df.resample('30min').median().loc[kps_snowdepth_df.index.min(): kps_snowdepth_df.index.max()]

In [None]:
kps_src = (tidy_df.query(
        "measurement == 'mixing ratio'"
    ).set_index('time').loc['20230201 1000': '20230201 1500'].query(
        "height > 0"
    )).reset_index()
kps_src['value'] = kps_src['value'] * 1000
alt.Chart(
    kps_src.query("height <= 5")
).mark_line().encode(
    alt.X('time:T'),
    alt.Y('value:Q').scale(zero=False),
    alt.Color('height:O')
) +\
alt.Chart(
mixingratio_ds.to_dataframe().sort_index().loc['20230201 1000': '20230201 1500'].resample('30min').mean().reset_index()
).mark_line(color='red').encode(
    alt.X('time:T'),
    alt.Y('mixing_ratio:Q').scale(zero=False)
)

In [None]:
snowdepth_pair = pd.DataFrame(annex_snowdepth_df['snow_depth'].rename('annex')).join(
    (100*kps_snowdepth_df['value'].rename('kps'))
).reset_index()

onetoone_line = alt.Chart(pd.DataFrame({'x':[0, 200], 'y':[0, 200]})).mark_line(color='grey').encode(x='x', y='y')
(alt.Chart(snowdepth_pair).mark_line().transform_fold([
    'annex', 'kps'
]).mark_line().encode(
    alt.X('time:T'),
    alt.Y('value:Q'),
    alt.Color('key:N')
) | onetoone_line+alt.Chart(snowdepth_pair.set_index('time').resample('1440min').mean().reset_index()).mark_circle().encode(
    alt.X('annex:Q'),
    alt.Y('kps:Q')
)).configure_legend(orient='top')

## Calculate air density flux term

In [None]:
airdensityflux_df = tidy_df[tidy_df.variable.isin([
    'dryairdensity_3m_c',   'dryairdensity_20m_c',
    'T_3m_c',   'T_20m_c',
    'mixingratio_3m_c',   'mixingratio_20m_c',
    'w_tc__3m_c',   'w_tc__20m_c',
    'w_h2o__3m_c',   'w_h2o__20m_c',
])].pivot(index='time', columns='variable', values='value')

mean_mixing_ratio = tidy_df.query(
    "measurement == 'mixing ratio'"
).query(
    "height >= 3"
).query(
    "height <= 20"
).groupby('time').value.mean().rename('mixingratio_mean_3to17')

airdensityflux_df = airdensityflux_df.join(mean_mixing_ratio)

# ALL KELVIN
UNITS_FOR_T_MEAS = units('kelvin')
UNITS_FOR_SH_FLUX = units("kelvin*m/s")
airdensityflux_df['T_3m_c'] = (airdensityflux_df['T_3m_c'].values * units("degC")).to(UNITS_FOR_T_MEAS)
airdensityflux_df['T_20m_c'] = (airdensityflux_df['T_20m_c'].values * units("degC")).to(UNITS_FOR_T_MEAS)

# ALL CELSIUS
# UNITS_FOR_T_MEAS = units('degC')
# UNITS_FOR_SH_FLUX = units("degC*m/s")
# airdensityflux_df['T_3m_c'] = (airdensityflux_df['T_3m_c'].values * units("degC")).to(UNITS_FOR_T_MEAS)
# airdensityflux_df['T_20m_c'] = (airdensityflux_df['T_20m_c'].values * units("degC")).to(UNITS_FOR_T_MEAS)

mu = 1/0.622
air_density_flux = (
    (
        airdensityflux_df['mixingratio_mean_3to17'].values * units("g/g")
    ) * (
        (
            ((airdensityflux_df['dryairdensity_20m_c'].values * units("kg/m^3")) / (airdensityflux_df['T_20m_c'].values * UNITS_FOR_T_MEAS))
            * (1 + mu * (airdensityflux_df['mixingratio_20m_c'].values * units("g/g")))
            * (airdensityflux_df['w_tc__20m_c'].values * UNITS_FOR_SH_FLUX) + mu*(airdensityflux_df['w_h2o__20m_c'].values * units("g/m^2/s"))
        )
        -
        (
            ((airdensityflux_df['dryairdensity_3m_c'].values * units("kg/m^3")) / (airdensityflux_df['T_3m_c'].values * UNITS_FOR_T_MEAS))
            * (1 + mu * (airdensityflux_df['mixingratio_3m_c'].values * units("g/g")))
            * (airdensityflux_df['w_tc__3m_c'].values * UNITS_FOR_SH_FLUX) + mu*(airdensityflux_df['w_h2o__3m_c'].values * units("g/m^2/s"))
        )

    ) / (17 * units('m'))
).to(units('g/m^3/s'))

airdensityflux_df['air_density_flux'] = air_density_flux
air_density_flux

## Calculate horizontal advection and storage change term (using two-point solution to derivative)

In [None]:

# combine mixing ratio measurements from the two sites into one dataframe 
kpsannex_mixingratio_measurements = mixingratio_ds.sortby('time').sel(
    time=slice(data_start_date, data_cutoff_date)
).resample(time='30min').mean().rename('annex').to_dataframe() / 1000
kps_mixingratio_measurements_2m = tidy_df.query("variable == 'mixingratio_2m_c'").set_index('time')['value'].rename('kps_2m')
kps_mixingratio_measurements_3m = tidy_df.query("variable == 'mixingratio_3m_c'").set_index('time')['value'].rename('kps_3m')
kps_mixingratio_measurements_4m = tidy_df.query("variable == 'mixingratio_4m_c'").set_index('time')['value'].rename('kps_4m')

kps_lateral_simple_df = kpsannex_mixingratio_measurements.join(
    kps_mixingratio_measurements_2m
).join(
    kps_mixingratio_measurements_3m
).join(
    kps_mixingratio_measurements_4m
)

# isolate measurements to when wind is up (100˚ - 140˚) or downvalley (300 - 340)
kps_lateral_simple_df = kps_lateral_simple_df.join(
    tidy_df.query("variable == 'dir_10m_c'").set_index('time')['value'].rename('dir_10m_c')
).join(
    tidy_df.query("variable == 'spd_3m_c'").set_index('time')['value'].rename('spd_3m_c')
).join(
    1000*tidy_df.query("variable == 'dryairdensity_3m_c'").set_index('time')['value'].rename('rho')
).join(
    tidy_df.query("variable == 'w_h2o__3m_c'").set_index('time')['value'].rename('w_h2o__3m_c')
)

kps_lateral_simple_df = kps_lateral_simple_df[
    ((kps_lateral_simple_df.dir_10m_c >= 112) & (kps_lateral_simple_df.dir_10m_c <= 152))
    |
    ((kps_lateral_simple_df.dir_10m_c >= 292) & (kps_lateral_simple_df.dir_10m_c <= 332))
]
# label each timestamp up or downvalley
kps_lateral_simple_df['direction'] = kps_lateral_simple_df['dir_10m_c'].apply(lambda x: 'down' if 292 <= x <= 332 else 'up')
# # Calculate deltas. During downvalley winds, ds = annex - kps. During up valley winds, ds = kps - annex
kps_lateral_simple_df['ds_2m'] = kps_lateral_simple_df.apply(
    lambda row: row['annex'] - row['kps_2m'] if row['direction'] == 'down' else row['kps_2m'] - row['annex'],
    axis=1
)
kps_lateral_simple_df['ds_3m'] = kps_lateral_simple_df.apply(
    lambda row: row['annex'] - row['kps_3m'] if row['direction'] == 'down' else row['kps_3m'] - row['annex'],
    axis=1
)

kps_lateral_simple_df['ds_3m_uncertainty'] = 0.2/1000

kps_lateral_simple_df['ds_4m'] = kps_lateral_simple_df.apply(
    lambda row: row['annex'] - row['kps_4m'] if row['direction'] == 'down' else row['kps_4m'] - row['annex'],
    axis=1
)
kps_lateral_simple_df['dx'] = 400
kps_lateral_simple_df['ds/dx 2m'] = kps_lateral_simple_df['ds_2m'] / kps_lateral_simple_df['dx']
kps_lateral_simple_df['ds/dx 3m'] = kps_lateral_simple_df['ds_3m'] / kps_lateral_simple_df['dx']
kps_lateral_simple_df['ds/dx 3m uncertainty'] = kps_lateral_simple_df['ds_3m_uncertainty'] / kps_lateral_simple_df['dx']
kps_lateral_simple_df['ds/dx 4m'] = kps_lateral_simple_df['ds_4m'] / kps_lateral_simple_df['dx']

kps_lateral_simple_df['lateral_advection_2m'] = kps_lateral_simple_df['spd_3m_c'] * kps_lateral_simple_df['rho'] * kps_lateral_simple_df['ds/dx 2m']
kps_lateral_simple_df['lateral_advection_3m'] = kps_lateral_simple_df['spd_3m_c'] * kps_lateral_simple_df['rho'] * kps_lateral_simple_df['ds/dx 3m']
kps_lateral_simple_df['lateral_advection_3m_uncertainty'] = kps_lateral_simple_df['spd_3m_c'] * kps_lateral_simple_df['rho'] * kps_lateral_simple_df['ds/dx 3m uncertainty']
kps_lateral_simple_df['lateral_advection_4m'] = kps_lateral_simple_df['spd_3m_c'] * kps_lateral_simple_df['rho'] * kps_lateral_simple_df['ds/dx 4m']


kps_lateral_simple_df['ds/dt'] = kps_lateral_simple_df['rho'] * (kps_lateral_simple_df[['kps_2m', 'kps_3m', 'kps_4m']].mean(axis=1)).diff() * units('g/g') / (30*60*units('seconds'))
# kps_lateral_simple_df

In [None]:
src = kps_lateral_simple_df[['lateral_advection_3m', 'lateral_advection_3m_uncertainty']]
src['ub'] = src['lateral_advection_3m'] + src['lateral_advection_3m_uncertainty']
src['lb'] = src['lateral_advection_3m'] - src['lateral_advection_3m_uncertainty']

alt.Chart(src.reset_index()).mark_area(opacity=0.5).encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('mean(lb):Q'),
    alt.Y2('mean(ub):Q')
) + alt.Chart(src.reset_index()).mark_line().encode(
    alt.X('hoursminutes(time):T'),
    alt.Y('mean(lateral_advection_3m):Q'),
)

### Using KPS Irga measurements

In [None]:
# get mixing ratio measurements from the corner towers
upwind_s = 0.5*(mixing_ratio_field_df[3, 'ue', 'r'] + mixing_ratio_field_df[3, 'uw', 'r'])
upwind_s.name = 'upwind_s'
kps_lateral_simple_irga_df = pd.DataFrame(upwind_s)
kps_lateral_simple_irga_df['downwind_s'] = mixing_ratio_field_df[3, 'd', 'r']


# # Add wind dir variable
kps_lateral_simple_irga_df = kps_lateral_simple_irga_df.join(
    tidy_df.query("variable == 'dir_10m_c'").set_index('time').loc['20221101': '20230620']['value'].rename('dir_10m_c')
).join(
    tidy_df.query("variable == 'spd_3m_c'").set_index('time').loc['20221101': '20230620']['value'].rename('spd_3m_c')
).join(
    1000*tidy_df.query("variable == 'dryairdensity_3m_c'").set_index('time').loc['20221101': '20230620']['value'].rename('rho')
).join(
    tidy_df.query("variable == 'w_h2o__3m_c'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__3m_c')
)


# isolate measurements to when wind is up (100˚ - 140˚) or downvalley (300 - 340)
kps_lateral_simple_irga_df = kps_lateral_simple_irga_df[
    ((kps_lateral_simple_irga_df.dir_10m_c >= 100) & (kps_lateral_simple_irga_df.dir_10m_c <= 140))
    |
    ((kps_lateral_simple_irga_df.dir_10m_c >= 300) & (kps_lateral_simple_irga_df.dir_10m_c <= 340))
]

# label each timestamp up or downvalley
kps_lateral_simple_irga_df['direction'] = kps_lateral_simple_irga_df['dir_10m_c'].apply(lambda x: 'down' if 300 <= x <= 340 else 'up')
# # Calculate deltas. During downvalley winds, ds = annex - kps. During up valley winds, ds = kps - annex
kps_lateral_simple_irga_df['ds'] = kps_lateral_simple_irga_df.apply(
    lambda row: row['downwind_s'] - row['upwind_s'] if row['direction'] == 'down' else row['upwind_s'] - row['downwind_s'],
    axis=1
)

kps_lateral_simple_irga_df['dx'] = 32
kps_lateral_simple_irga_df['ds/dx'] = kps_lateral_simple_irga_df['ds'] / kps_lateral_simple_irga_df['dx']

kps_lateral_simple_irga_df

kps_lateral_simple_irga_df['lateral_advection_irga'] = kps_lateral_simple_irga_df['spd_3m_c'] * kps_lateral_simple_irga_df['rho'] * kps_lateral_simple_irga_df['ds/dx']
kps_lateral_simple_irga_df

## Calculate vertical advection

### Simple two-point solution to the derivative

#### 2 to 10m

In [None]:

kps_mixingratio_measurements_2m = tidy_df.query("variable == 'mixingratio_2m_c'").set_index('time')['value'].rename('mixingratio_2m_c')
kps_mixingratio_measurements_10m = tidy_df.query("variable == 'mixingratio_10m_c'").set_index('time')['value'].rename('mixingratio_10m_c')
kps_vert_simple_df_2to10 = pd.DataFrame(kps_mixingratio_measurements_2m).join(kps_mixingratio_measurements_10m)
kps_vert_simple_df_2to10['ds'] = kps_vert_simple_df_2to10['mixingratio_10m_c'] - kps_vert_simple_df_2to10['mixingratio_2m_c']
kps_vert_simple_df_2to10['ds_uncertainty'] = 0.2 / 1000
kps_vert_simple_df_2to10['dz'] = 8
kps_vert_simple_df_2to10['ds/dz'] = kps_vert_simple_df_2to10['ds'] / kps_vert_simple_df_2to10['dz']
kps_vert_simple_df_2to10['ds/dz uncertainty'] = kps_vert_simple_df_2to10['ds_uncertainty'] / kps_vert_simple_df_2to10['dz']

kps_vert_simple_df_2to10 = kps_vert_simple_df_2to10.join(
    tidy_df[tidy_df.measurement == 'w'].query("tower == 'c'").query("height <= 10").query("height >= 2").groupby(['time'])['value'].mean().rename('w')
).join(
    1000*tidy_df.query("variable == 'dryairdensity_5m_c'").set_index('time').loc['20221101': '20230620']['value'].rename('rho')
).join(
    tidy_df.query("variable == 'w_h2o__2m_c_gapfill'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__2m_c')
).join(
    tidy_df.query("variable == 'w_h2o__5m_c_gapfill'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__5m_c')
).join(
    tidy_df.query("variable == 'w_h2o__10m_c_gapfill'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__10m_c')
).join(
    tidy_df.query("variable == 'w_h2o__20m_c_gapfill'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__20m_c')
).join(
    tidy_df.query("variable == 'w_h2o__2m_c_raw'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__2m_c_raw')
).join(
    tidy_df.query("variable == 'w_h2o__5m_c_raw'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__5m_c_raw')
).join(
    tidy_df.query("variable == 'w_h2o__10m_c_raw'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__10m_c_raw')
).join(
    tidy_df.query("variable == 'w_h2o__20m_c_raw'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__20m_c_raw')
)
kps_vert_simple_df_2to10['vertical_advection_simple_2to10'] =  kps_vert_simple_df_2to10['w'] * kps_vert_simple_df_2to10['rho'] * kps_vert_simple_df_2to10['ds/dz']

kps_vert_simple_df_2to10['vertical_advection_simple_2to10_uncertainty'] =  kps_vert_simple_df_2to10['w'] * kps_vert_simple_df_2to10['rho'] * kps_vert_simple_df_2to10['ds/dz uncertainty']

#### 3 to 10m

In [None]:
kps_mixingratio_measurements_3m = tidy_df.query("variable == 'mixingratio_3m_c'").set_index('time')['value'].rename('mixingratio_3m_c')
kps_mixingratio_measurements_10m = tidy_df.query("variable == 'mixingratio_10m_c'").set_index('time')['value'].rename('mixingratio_10m_c')
kps_vert_simple_df_3to10 = pd.DataFrame(kps_mixingratio_measurements_3m).join(kps_mixingratio_measurements_10m)
kps_vert_simple_df_3to10['ds'] = kps_vert_simple_df_3to10['mixingratio_10m_c'] - kps_vert_simple_df_3to10['mixingratio_3m_c']
kps_vert_simple_df_3to10['ds_uncertainty'] = 0.2 / 1000
kps_vert_simple_df_3to10['dz'] = 7
kps_vert_simple_df_3to10['ds/dz'] = kps_vert_simple_df_3to10['ds'] / kps_vert_simple_df_3to10['dz']
kps_vert_simple_df_3to10['ds/dz uncertainty'] = kps_vert_simple_df_3to10['ds_uncertainty'] / kps_vert_simple_df_3to10['dz']

kps_vert_simple_df_3to10 = kps_vert_simple_df_3to10.join(
    tidy_df[tidy_df.measurement == 'w'].query("tower == 'c'").query("height <= 10").query("height >= 3").groupby(['time'])['value'].mean().rename('w')
).join(
    1000*tidy_df.query("variable == 'dryairdensity_5m_c'").set_index('time').loc['20221101': '20230620']['value'].rename('rho')
).join(
    tidy_df.query("variable == 'w_h2o__3m_c_gapfill'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__3m_c')
).join(
    tidy_df.query("variable == 'w_h2o__5m_c_gapfill'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__5m_c')
).join(
    tidy_df.query("variable == 'w_h2o__10m_c_gapfill'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__10m_c')
).join(
    tidy_df.query("variable == 'w_h2o__20m_c_gapfill'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__20m_c')
).join(
    tidy_df.query("variable == 'w_h2o__3m_c_raw'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__3m_c_raw')
).join(
    tidy_df.query("variable == 'w_h2o__5m_c_raw'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__5m_c_raw')
).join(
    tidy_df.query("variable == 'w_h2o__10m_c_raw'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__10m_c_raw')
).join(
    tidy_df.query("variable == 'w_h2o__20m_c_raw'").set_index('time').loc['20221101': '20230620']['value'].rename('w_h2o__20m_c_raw')
)
kps_vert_simple_df_3to10['vertical_advection_simple_3to10'] =  kps_vert_simple_df_3to10['w'] * kps_vert_simple_df_3to10['rho'] * kps_vert_simple_df_3to10['ds/dz']

kps_vert_simple_df_3to10['vertical_advection_simple_3to10_uncertainty'] =  kps_vert_simple_df_3to10['w'] * kps_vert_simple_df_3to10['rho'] * kps_vert_simple_df_3to10['ds/dz uncertainty']

#### 2 to 20m

In [None]:
kps_mixingratio_measurements_2m = tidy_df.query("variable == 'mixingratio_2m_c'").set_index('time')['value'].rename('mixingratio_2m_c')
kps_mixingratio_measurements_20m = tidy_df.query("variable == 'mixingratio_20m_c'").set_index('time')['value'].rename('mixingratio_20m_c')
kps_vert_simple_df_2to20 = pd.DataFrame(kps_mixingratio_measurements_2m).join(kps_mixingratio_measurements_20m)
kps_vert_simple_df_2to20['ds'] = kps_vert_simple_df_2to20['mixingratio_20m_c'] - kps_vert_simple_df_2to20['mixingratio_2m_c']
kps_vert_simple_df_2to20['ds_uncertainty'] = 0.2 / 1000
kps_vert_simple_df_2to20['dz'] = 18
kps_vert_simple_df_2to20['ds/dz'] = kps_vert_simple_df_2to20['ds'] / kps_vert_simple_df_2to20['dz']
kps_vert_simple_df_2to20['ds/dz uncertainty'] = kps_vert_simple_df_2to20['ds_uncertainty'] / kps_vert_simple_df_2to20['dz']

kps_vert_simple_df_2to20 = kps_vert_simple_df_2to20.join(
    tidy_df[tidy_df.measurement == 'w'].query("tower == 'c'").query("height <= 20").query("height >= 3").groupby(['time'])['value'].mean().rename('w')
).join(
    1000*tidy_df[tidy_df.measurement == 'dry air density'].query("tower == 'c'").groupby(['time'])['value'].mean().rename('rho')
).join(
    tidy_df.query("variable == 'w_h2o__2m_c_gapfill'").set_index('time')['value'].rename('w_h2o__2m_c')
).join(
    tidy_df.query("variable == 'w_h2o__20m_c_gapfill'").set_index('time')['value'].rename('w_h2o__20m_c')
)
kps_vert_simple_df_2to20['vertical_advection_simple_2to20'] =  kps_vert_simple_df_2to20['w'] * kps_vert_simple_df_2to20['rho'] * kps_vert_simple_df_2to20['ds/dz']
kps_vert_simple_df_2to20['vertical_advection_simple_2to20_uncertainty'] =  kps_vert_simple_df_2to20['w'] * kps_vert_simple_df_2to20['rho'] * kps_vert_simple_df_2to20['ds/dz uncertainty']

#### 3 to 20m

In [None]:
kps_mixingratio_measurements_3m = tidy_df.query("variable == 'mixingratio_3m_c'").set_index('time')['value'].rename('mixingratio_3m_c')
kps_mixingratio_measurements_20m = tidy_df.query("variable == 'mixingratio_20m_c'").set_index('time')['value'].rename('mixingratio_20m_c')
kps_vert_simple_df_3to20 = pd.DataFrame(kps_mixingratio_measurements_3m).join(kps_mixingratio_measurements_20m)
kps_vert_simple_df_3to20['ds'] = kps_vert_simple_df_3to20['mixingratio_20m_c'] - kps_vert_simple_df_3to20['mixingratio_3m_c']
kps_vert_simple_df_3to20['ds_uncertainty'] = 0.2 / 1000
kps_vert_simple_df_3to20['dz'] = 17
kps_vert_simple_df_3to20['ds/dz'] = kps_vert_simple_df_3to20['ds'] / kps_vert_simple_df_3to20['dz']
kps_vert_simple_df_3to20['ds/dz uncertainty'] = kps_vert_simple_df_3to20['ds_uncertainty'] / kps_vert_simple_df_3to20['dz']

kps_vert_simple_df_3to20 = kps_vert_simple_df_3to20.join(
    tidy_df[tidy_df.measurement == 'w'].query("tower == 'c'").query("height <= 20").query("height >= 3").groupby(['time'])['value'].mean().rename('w')
).join(
    1000*tidy_df[tidy_df.measurement == 'dry air density'].query("tower == 'c'").groupby(['time'])['value'].mean().rename('rho')
).join(
    tidy_df.query("variable == 'w_h2o__3m_c_gapfill'").set_index('time')['value'].rename('w_h2o__3m_c')
).join(
    tidy_df.query("variable == 'w_h2o__20m_c_gapfill'").set_index('time')['value'].rename('w_h2o__20m_c')
)
kps_vert_simple_df_3to20['vertical_advection_simple_3to20'] =  kps_vert_simple_df_3to20['w'] * kps_vert_simple_df_3to20['rho'] * kps_vert_simple_df_3to20['ds/dz']
kps_vert_simple_df_3to20['vertical_advection_simple_3to20_uncertainty'] =  kps_vert_simple_df_3to20['w'] * kps_vert_simple_df_3to20['rho'] * kps_vert_simple_df_3to20['ds/dz uncertainty']

In [None]:
vertical_advection_estimates_gradient_calculations_df = pd.concat([
    pd.DataFrame(
        ((kps_vert_simple_df_2to20['vertical_advection_simple_2to20']*20 + kps_vert_simple_df_3to20['w_h2o__20m_c'])*1.8).loc[
            '20221130': '20230509'
        ].cumsum().rename('value')
    ).assign(height = 20, type='2to20'),
    pd.DataFrame(
        ((kps_vert_simple_df_3to20['vertical_advection_simple_3to20']*20 + kps_vert_simple_df_3to20['w_h2o__20m_c'])*1.8).loc[
            '20221130': '20230509'
        ].cumsum().rename('value')
    ).assign(height = 20, type='3to20'),
    pd.DataFrame(
        ((kps_vert_simple_df_2to10['vertical_advection_simple_2to10']*10 + kps_vert_simple_df_2to10['w_h2o__10m_c'])*1.8).loc[
            '20221130': '20230509'
        ].cumsum().rename('value')
    ).assign(height = 10, type='2to10'),
    pd.DataFrame(
        ((kps_vert_simple_df_3to10['vertical_advection_simple_3to10']*10 + kps_vert_simple_df_3to10['w_h2o__10m_c'])*1.8).loc[
            '20221130': '20230509'
        ].cumsum().rename('value')
    ).assign(height = 10, type='3to10'),
])

alt.Chart(
        vertical_advection_estimates_gradient_calculations_df
).mark_point(size=50).encode(
    alt.Y('height:N').sort('-y'),
    alt.X('max(value):Q').scale(domain=[20,42]),
    alt.Shape('type:N'),
    alt.Color('type:N')
).properties(
    width=200, height=200
)

### Numerical multi-point solution to the derivative

#### 3 to 10m

In [None]:
# # gather the measurements we want
# kps_mixingratio_measurements = tidy_df[tidy_df.measurement == 'mixing ratio'][tidy_df.height >= 3][tidy_df.height <= 10]
# kps_dryairdensity_measurements = tidy_df[tidy_df.measurement == 'dry air density'][tidy_df.height >= 3][tidy_df.height <= 10]
# kps_verticalvelocity_measurements = tidy_df[tidy_df.measurement == 'w'].query("tower == 'c'")

# # iterate over each timestamp, doing calculations for each
# timestamps = kps_mixingratio_measurements.time.unique()
# vert_adv_ls = []
# ts_ls = []
# for ts in timestamps:
#     # get the measurements we want for this timestamp 
#     ex_s = kps_mixingratio_measurements[kps_mixingratio_measurements.time == ts]
#     ex_rho = kps_dryairdensity_measurements[kps_dryairdensity_measurements.time == ts]
#     ex_w = kps_verticalvelocity_measurements[kps_verticalvelocity_measurements.time == ts]
#     # calculate the scalar gradient profile
#     ds_dz = np.gradient(
#         ex_s.sort_values('height')['value'],
#         ex_s.sort_values('height')['height'],
#     )
#     # calculate the wind profile by interpolating actual measurements to where we have mixing ratio measurements
#     w_interp = np.interp(
#         ex_s.sort_values('height')['height'],
#         ex_w.sort_values('height')['height'],
#         ex_w.sort_values('height')['value']
#     )
#     # Calculate the vertical advection term
#     vert_advection = (1000 * ex_rho.sort_values('height').value.values * w_interp * ds_dz).sum()
#     vert_adv_ls.append(vert_advection)
#     ts_ls.append(ts)

# kps_vert_complex_10m_df = pd.DataFrame({
#     'time': ts_ls,
#     'vertical_advection_complex': vert_adv_ls
# })

#### 3 to 20m

In [None]:
# # gather the measurements we want
# kps_mixingratio_measurements = tidy_df[tidy_df.measurement == 'mixing ratio'][tidy_df.height >= 3][tidy_df.height <= 20]
# kps_dryairdensity_measurements = tidy_df[tidy_df.measurement == 'dry air density'][tidy_df.height >= 3][tidy_df.height <= 20]
# kps_verticalvelocity_measurements = tidy_df[tidy_df.measurement == 'w'].query("tower == 'c'")

# # iterate over each timestamp, doing calculations for each
# timestamps = kps_mixingratio_measurements.time.unique()
# vert_adv_ls = []
# ts_ls = []
# for ts in timestamps:
#     # get the measurements we want for this timestamp 
#     ex_s = kps_mixingratio_measurements[kps_mixingratio_measurements.time == ts]
#     ex_rho = kps_dryairdensity_measurements[kps_dryairdensity_measurements.time == ts]
#     ex_w = kps_verticalvelocity_measurements[kps_verticalvelocity_measurements.time == ts]
#     # calculate the scalar gradient profile
#     ds_dz = np.gradient(
#         ex_s.sort_values('height')['value'],
#         ex_s.sort_values('height')['height'],
#     )
#     # calculate the wind profile by interpolating actual measurements to where we have mixing ratio measurements
#     w_interp = np.interp(
#         ex_s.sort_values('height')['height'],
#         ex_w.sort_values('height')['height'],
#         ex_w.sort_values('height')['value']
#     )
#     # Calculate the vertical advection term
#     vert_advection = (1000 * ex_rho.sort_values('height').value.values * w_interp * ds_dz).sum()
#     vert_adv_ls.append(vert_advection)
#     ts_ls.append(ts)

# kps_vert_complex_10m_df = pd.DataFrame({
#     'time': ts_ls,
#     'vertical_advection_complex': vert_adv_ls
# })

## Aggregate results

### Differential form

In [None]:
advection_1d_fluxdensity_nonnorm_df = kps_vert_simple_df_3to10[[
    'w_h2o__3m_c', 'w_h2o__5m_c', 'w_h2o__10m_c', 'w_h2o__20m_c',
    'w_h2o__3m_c_raw', 'w_h2o__5m_c_raw', 'w_h2o__10m_c_raw', 'w_h2o__20m_c_raw'
]].join(
    kps_vert_simple_df_3to20[['vertical_advection_simple_3to20', 'vertical_advection_simple_3to20_uncertainty']]
).join(
    kps_vert_simple_df_3to10[['vertical_advection_simple_3to10', 'vertical_advection_simple_3to10_uncertainty']]
).join(
    kps_vert_simple_df_2to10[['vertical_advection_simple_2to10', 'vertical_advection_simple_2to10_uncertainty']]
).join(
    kps_vert_simple_df_2to20[['vertical_advection_simple_2to20', 'vertical_advection_simple_2to20_uncertainty']]
).join(
    kps_lateral_simple_df[['lateral_advection_2m', 'lateral_advection_3m', 'lateral_advection_4m', 'lateral_advection_3m_uncertainty']]
).join(
    kps_lateral_simple_irga_df['lateral_advection_irga']
).join(
    kps_lateral_simple_df[['ds/dt']]
).join(
    airdensityflux_df['air_density_flux']
)

### Calculate differential form of measured EC turbulent fluxes
Calculate $$ \frac{\Delta \overline{w'q'}}{\Delta z} \quad \text{and} \quad  \frac{\Delta \overline{w'q'}}{\Delta z}$$

In [None]:
advection_1d_fluxdensity_nonnorm_df['vertical_turb_flux_divergence_3to20'] = (
    advection_1d_fluxdensity_nonnorm_df['w_h2o__20m_c_raw'] - advection_1d_fluxdensity_nonnorm_df['w_h2o__3m_c_raw']
) / 17

advection_1d_fluxdensity_nonnorm_df['vertical_turb_flux_divergence_3to10'] = (
    advection_1d_fluxdensity_nonnorm_df['w_h2o__10m_c_raw'] - advection_1d_fluxdensity_nonnorm_df['w_h2o__3m_c_raw']
) / 7

# Plot results

In [None]:
advection_1d_fluxdensity_nonnorm_df

## Composite - No BS

Define functions

In [None]:
PLOT_WIDTH = 150
PLOT_HEIGHT = 150
# Function to plot diurnal cycles in conservation terms
########################################################
def get_chart_with_errorbands(src, title):
    vars = [
            'vertical_turb_flux_divergence_3to20', 'vertical_advection_simple_2to20',  
            # 'ds/dt', 'air_density_flux'
        ]
    colors = ['#ff7f0e', '#1f77b4'
            #   ,  '#2ca02c', 'black', 'grey'
            ]
    all_data_chart = alt.Chart(src).mark_line().transform_fold(
        vars
    ).encode(
        alt.X('hoursminutes(time):T').axis(labelAlign='center'),
        alt.Y('median(value):Q').title('Flux density (g/m^3/s)').scale(domain = [-0.0005, 0.0005], clamp=True),
        alt.Color('key:N').scale(domain = vars,range = colors)
    ).properties(width=PLOT_WIDTH, height=PLOT_HEIGHT, title=title)
    all_data_chart_vert_adv_uncertainty = alt.Chart(src).mark_area(
        color = colors[1],
        opacity=0.35
    ).encode(
        alt.X('hoursminutes(time):T'),
        alt.Y('median(vertical_advection_lb):Q').title(''),
        alt.Y2('median(vertical_advection_ub):Q').title(''),
    )
    return all_data_chart_vert_adv_uncertainty + all_data_chart 

# Function to plot diurnal cycles in profiles
########################################################
def plot_profiles(src):
    return alt.Chart(src).transform_filter(
        alt.datum.height > 1
    ).transform_filter(
        alt.datum.height != 12
    ).transform_filter(
        alt.datum.height != 6
    ).mark_line(
        point={'size':20}, strokeWidth=1
    ).encode(
        alt.X('mean(value):Q').scale(zero=False).title('Mixing ratio (g/Kg)'),
        alt.Y('height:Q').title('Height (m)'),
                
        alt.Color('conditions:N', sort=
                  ['0-3','3-6','6-9','9-12','12-15','15-18','18-21','21-0' ]
                ).title(
            'time (hours)'
        ),
        alt.Order('height:Q')
    ).properties(width=PLOT_WIDTH, height=PLOT_HEIGHT)
def plot_profiles_nocolor(src):
    return alt.Chart(src).transform_filter(
        alt.datum.height > 1
    ).transform_filter(
        alt.datum.height != 12
    ).transform_filter(
        alt.datum.height != 6
    ).mark_line(
        point={'size':20, 'color': 'black'}, strokeWidth=1, color='black'
    ).encode(
        alt.X('mean(value):Q').scale(zero=False).title('Mixing ratio (g/Kg)'),
        alt.Y('height:Q').title('Height (m)'),
        alt.Order('height:Q'),
        alt.Detail('conditions:N')
    ).properties(width=PLOT_WIDTH, height=PLOT_HEIGHT)

# Function to plot diurnal cycles in w
########################################################
def w_diurnal_chart(src, title):
    line = alt.Chart().transform_calculate(y = '0').mark_rule().encode(y='y:Q')
    return alt.layer(
        line, 
        alt.Chart().mark_line().encode(
            alt.X('hours(time):T').axis(labelAlign='center', values=[0,6,12,18,]),
            alt.Y('mean(value):Q').title('Vertical velocity (m/s)'),
            alt.Color('height:O').scale(scheme='turbo')
        ), 
        data = src
    ).properties(width=PLOT_WIDTH, height=PLOT_HEIGHT, title=title)

Isolate data

In [None]:
# Data for diurnal cycles in conservation terms
########################################################
src_cons_terms = advection_1d_fluxdensity_nonnorm_df.copy()
src_cons_terms['vertical_advection_lb'] = src_cons_terms['vertical_advection_simple_2to20'] - src_cons_terms['vertical_advection_simple_2to20_uncertainty']
src_cons_terms['vertical_advection_ub'] = src_cons_terms['vertical_advection_simple_2to20'] + src_cons_terms['vertical_advection_simple_2to20_uncertainty']


upvalley_nobs_times = set(pd.to_datetime(upvalley_wind_times)).intersection(set(nobs_times))
downvalley_nobs_times = set(pd.to_datetime(downvalley_wind_times)).intersection(set(nobs_times))

# get_chart_with_errorbands(src_cons_terms[src_cons_terms.index.isin(nobs_times)].reset_index(),
#  title='All data') |\
src_cons_terms_upvalley_nobs = src_cons_terms[src_cons_terms.index.isin(upvalley_nobs_times)].reset_index()
src_cons_terms_dovalley_nobs = src_cons_terms[src_cons_terms.index.isin(downvalley_nobs_times)].reset_index()


# Data for diurnal cycles in mixing ratio profiles
########################################################
src_mixingratio = tidy_df[tidy_df.measurement == 'mixing ratio'].query("tower == 'c'")
src_mixingratio = src_mixingratio[src_mixingratio.time.isin(nobs_times)]
src_mixingratio['conditions'] = src_mixingratio.time.dt.hour
# src_mixingratio = src_mixingratio[src_mixingratio.conditions % 4 == 0]
src_mixingratio['conditions'] = pd.cut(
    src_mixingratio.time.dt.hour,
    [-1,3,6,9,12,15,18,21,24],
    labels=['0-3', '3-6', '6-9', '9-12', '12-15', '15-18', '18-21', '21-0'],
    right=False
)

src_mixingratio = src_mixingratio[
    src_mixingratio['conditions'].isin(
        [
            '0-3',
            # '3-6',
            '6-9',
            # '9-12',
            '12-15',
            # '15-18',
            '18-21',
            # '21-0'
        ]
    )
]
src_mixingratio['value'] = src_mixingratio['value']*1000
src_mixingratio_upvalley_nobs = src_mixingratio[src_mixingratio.time.isin(upvalley_nobs_times)]
src_mixingratio_dovalley_nobs = src_mixingratio[src_mixingratio.time.isin(downvalley_nobs_times)]
src_mixingratio_upvalley_nobs = src_mixingratio_upvalley_nobs.query("height > 1")
src_mixingratio_dovalley_nobs = src_mixingratio_dovalley_nobs.query("height > 1")

# Data for diurnal cycles in w
########################################################
src_w = tidy_df[tidy_df.measurement == 'w'].query("tower == 'c'")
src_w_upvalley_nobs = src_w[src_w.time.isin(upvalley_nobs_times)]
src_w_dovalley_nobs = src_w[src_w.time.isin(downvalley_nobs_times)]
src_w_upvalley_nobs = src_w_upvalley_nobs.query("height > 1")
src_w_dovalley_nobs = src_w_dovalley_nobs.query("height > 1")

In [None]:

s_profile_charts = (
    plot_profiles_nocolor(src_mixingratio_upvalley_nobs).properties(title='') &\
    plot_profiles_nocolor(src_mixingratio_dovalley_nobs).properties(title='')
)
w_charts = (
    w_diurnal_chart(src_w_upvalley_nobs, title='') &\
    w_diurnal_chart(src_w_dovalley_nobs, title='')
).resolve_scale(y='shared')
conservation_charts = (
    get_chart_with_errorbands(src_cons_terms_upvalley_nobs, title='') &\
    get_chart_with_errorbands(src_cons_terms_dovalley_nobs, title='')
)

In [None]:
(
    (
        s_profile_charts | conservation_charts | w_charts
    ).resolve_scale(color='independent')
).configure_legend(orient='top', columns=1)

In [None]:
vars = [
    'vertical_turb_flux_divergence_3to20', 
    'vertical_advection_simple_2to20',  
    'ds/dt', 
    'air_density_flux'
]
colors = ['#ff7f0e', '#1f77b4', 'grey', '#2ca02c']
(alt.Chart(src_cons_terms_upvalley_nobs).mark_line().transform_fold(
    vars
).encode(
    alt.X('hoursminutes(time):T').axis(labelAlign='center'),
    alt.Y('median(value):Q').title('Flux density (g/m^3/s)'),
    alt.Color('key:N').scale(domain=vars, range=colors)
).properties(width=200, height=200, title='Upvalley Winds') | alt.Chart(src_cons_terms_dovalley_nobs).mark_line().transform_fold(
    vars
).encode(
    alt.X('hoursminutes(time):T').axis(labelAlign='center'),
    alt.Y('median(value):Q').title('Flux density (g/m^3/s)'),
    alt.Color('key:N').scale(domain=vars, range=colors)
).properties(width=200, height=200, title='Downvalley Winds')).resolve_scale(
    y='shared'
).display(
    renderer='svg'
)

In [None]:
vars = [
    # 'vertical_turb_flux_divergence_3to20', 
    # 'vertical_advection_simple_2to20',  
    # 'ds/dt', 
    'air_density_flux'
]
colors = [
    # 'grey', 
    '#2ca02c']
(alt.Chart(src_cons_terms_upvalley_nobs).mark_line().transform_fold(
    vars
).encode(
    alt.X('hoursminutes(time):T').axis(labelAlign='center'),
    alt.Y('median(value):Q').title('Flux density (g/m^3/s)'),
    alt.Color('key:N').scale(domain=vars, range=colors)
).properties(width=200, height=200, title='Upvalley Winds') | alt.Chart(src_cons_terms_dovalley_nobs).mark_line().transform_fold(
    vars
).encode(
    alt.X('hoursminutes(time):T').axis(labelAlign='center'),
    alt.Y('median(value):Q').title('Flux density (g/m^3/s)'),
    alt.Color('key:N').scale(domain=vars, range=colors)
).properties(width=200, height=200, title='Downvalley Winds')).resolve_scale(
    y='shared'
).display(
    renderer='svg'
)

In [None]:
upvalley_wind_times = tidy_df[tidy_df.variable == 'dir_3m_c'].dropna().query("value < 152").query("value > 92").drop_duplicates().time
downvalley_wind_times = tidy_df[tidy_df.variable == 'dir_3m_c'].dropna().query("value < 342").query("value > 292").drop_duplicates().time
all_wind_times = tidy_df[tidy_df.variable == 'dir_3m_c'].drop_duplicates().time
print(round(len(upvalley_wind_times)/len(all_wind_times),3))
print(round(len(downvalley_wind_times)/len(all_wind_times),3))

In [None]:
from windrose import WindroseAxes
src = tidy_df[tidy_df.time.isin(downvalley_wind_times)][
    tidy_df.variable.isin(['spd_20m_c', 'dir_20m_c'])
].pivot_table(values = 'value', index='time', columns=['measurement']).reset_index()
ax = WindroseAxes.from_ax(figsize=(2,2))
ax.bar(src['wind direction'], src['wind speed'], normed=True, opening=.9, edgecolor='white', bins=1, nsector=17)
ax.set_yticks([])
# ax.set_yticklabels(['10%','25%'])
plt.title("20m wind rose")

In [None]:
from windrose import WindroseAxes
src = tidy_df[tidy_df.time.isin(upvalley_wind_times)][
    tidy_df.variable.isin(['spd_20m_c', 'dir_20m_c'])
].pivot_table(values = 'value', index='time', columns=['measurement']).reset_index()
ax = WindroseAxes.from_ax(figsize=(2,2))
ax.bar(src['wind direction'], src['wind speed'], normed=True, opening=.9, edgecolor='white', bins=1, nsector=17)
ax.set_yticks([])
# ax.set_yticklabels(['10%','25%'])
plt.title("20m wind rose")

In [None]:
(alt.Chart().transform_calculate(y = '0').mark_rule().encode(y='y:Q') + alt.Chart(
    tidy_df[tidy_df.measurement == 'w'][tidy_df.time.isin(upvalley_nobs_times)]
).mark_line().encode(
    alt.X('hours(time):T').axis(labelAlign='center', values=[0,6,12,18,]),
    alt.Y('mean(value):Q').title('Vertical velocity (m/s)'),
    alt.Color('height:O').scale(scheme='turbo'),
    alt.StrokeDash('tower:N')
)) | (alt.Chart().transform_calculate(y = '0').mark_rule().encode(y='y:Q') + alt.Chart(
    tidy_df[tidy_df.measurement == 'w'][tidy_df.time.isin(downvalley_nobs_times)]
).mark_line().encode(
    alt.X('hours(time):T').axis(labelAlign='center', values=[0,6,12,18,]),
    alt.Y('mean(value):Q').title('Vertical velocity (m/s)'),
    alt.Color('height:O').scale(scheme='turbo'),
    alt.StrokeDash('tower:N')
))

## Analyze seasonal advection corrections

### Using 3-10m and 3-20m estimates

In [None]:
# ((1800/1000)*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__3m_c).cumsum().plot()
w_h2o__10m_c_cumsum = ((1800/1000)*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__10m_c).cumsum()
w_h2o__20m_c_cumsum = ((1800/1000)*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__20m_c).cumsum()

w_h2o__10m_c_corrected_cumsum = (
    (1800/1000)*(
        advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__10m_c
        + (
            10*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].vertical_advection_simple_3to10
        )
    )
).cumsum()
w_h2o__20m_c_corrected_cumsum = (
    (1800/1000)*(
        advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__20m_c
        + (
            20*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].vertical_advection_simple_3to20
        )
    )
).cumsum()

w_h2o__10m_c_cumsum.plot(           color = 'tab:blue',     linestyle = '-')
w_h2o__20m_c_cumsum.plot(           color = 'tab:orange',   linestyle = '-')
w_h2o__10m_c_corrected_cumsum.plot( color = 'tab:blue',     linestyle = 'dotted',   label='With vertical advection correction, 10m')
w_h2o__20m_c_corrected_cumsum.plot( color = 'tab:orange',   linestyle = 'dotted',   label='With vertical advection correction, 20m')

plt.legend()
plt.ylabel('Cumulative sublimation (mm)')

In [None]:
print('10m, ec\t\t', round(w_h2o__10m_c_cumsum.iloc[-1], 1))
print('10m, corrected\t', round(w_h2o__10m_c_corrected_cumsum.iloc[-1], 1))
print('20m, ec\t\t', round(w_h2o__20m_c_cumsum.iloc[-1], 1))
print('20m, corrected\t', round(w_h2o__20m_c_corrected_cumsum.iloc[-1], 1))

In [None]:
# ((1800/1000)*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].w_h2o__3m_c).cumsum().plot()
w_h2o__10m_c_cumsum = ((1800/1000)*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].w_h2o__10m_c).cumsum()
w_h2o__20m_c_cumsum = ((1800/1000)*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].w_h2o__20m_c).cumsum()

w_h2o__10m_c_corrected_cumsum = (
    (1800/1000)*(
        advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].w_h2o__10m_c
        + (
            10*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].vertical_advection_simple_3to10
        )
    )
).cumsum()
w_h2o__20m_c_corrected_cumsum = (
    (1800/1000)*(
        advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].w_h2o__20m_c
        + (
            20*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].vertical_advection_simple_3to20
        )
    )
).cumsum()

w_h2o__10m_c_cumsum.plot(           color = 'tab:blue',     linestyle = '-')
w_h2o__20m_c_cumsum.plot(           color = 'tab:orange',   linestyle = '-')
w_h2o__10m_c_corrected_cumsum.plot( color = 'tab:blue',     linestyle = '--',   label='With vertical advection correction, 10m')
w_h2o__20m_c_corrected_cumsum.plot( color = 'tab:orange',   linestyle = '--',   label='With vertical advection correction, 20m')

plt.legend()
plt.ylabel('Cumulative sublimation (mm)')

In [None]:
print('10m, ec\t\t', round(w_h2o__10m_c_cumsum.iloc[-1], 1))
print('10m, corrected\t', round(w_h2o__10m_c_corrected_cumsum.iloc[-1], 1))
print('20m, ec\t\t', round(w_h2o__20m_c_cumsum.iloc[-1], 1))
print('20m, corrected\t', round(w_h2o__20m_c_corrected_cumsum.iloc[-1], 1))

In [None]:
# ((1800/1000)*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__3m_c).cumsum().plot()
w_h2o__10m_c_cumsum = ((1800/1000)*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__10m_c).cumsum()
w_h2o__20m_c_cumsum = ((1800/1000)*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__20m_c).cumsum()

w_h2o__10m_c_corrected_cumsum = (
    (1800/1000)*(
        advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__10m_c
        + (
            10*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].vertical_advection_simple_2to10
        )
    )
).cumsum()
w_h2o__20m_c_corrected_cumsum = (
    (1800/1000)*(
        advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__20m_c
        + (
            20*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].vertical_advection_simple_2to20
        )
    )
).cumsum()

w_h2o__10m_c_cumsum.plot(           color = 'tab:blue',     linestyle = '-')
w_h2o__20m_c_cumsum.plot(           color = 'tab:orange',   linestyle = '-')
w_h2o__10m_c_corrected_cumsum.plot( color = 'tab:blue',     linestyle = '--',   label='With vertical advection correction, 10m')
w_h2o__20m_c_corrected_cumsum.plot( color = 'tab:orange',   linestyle = '--',   label='With vertical advection correction, 20m')

plt.legend()
plt.ylabel('Cumulative sublimation (mm)')

In [None]:
print('10m, ec\t\t', round(w_h2o__10m_c_cumsum.iloc[-1], 1))
print('10m, corrected\t', round(w_h2o__10m_c_corrected_cumsum.iloc[-1], 1))
print('20m, ec\t\t', round(w_h2o__20m_c_cumsum.iloc[-1], 1))
print('20m, corrected\t', round(w_h2o__20m_c_corrected_cumsum.iloc[-1], 1))

In [None]:
daily_diffs_10m = []
daily_diffs_20m = []
for date in pd.Series(w_h2o__10m_c_cumsum.index.date).unique():
    daily_sub_corrected_10m = w_h2o__10m_c_corrected_cumsum.loc[
        date: date+dt.timedelta(days=1)][-1] - w_h2o__10m_c_corrected_cumsum.loc[date: date+dt.timedelta(days=1)][0]
    daily_sub_10m = w_h2o__10m_c_cumsum.loc[
        date: date+dt.timedelta(days=1)][-1] - w_h2o__10m_c_cumsum.loc[date: date+dt.timedelta(days=1)][0]
    daily_sub_corrected_20m = w_h2o__20m_c_corrected_cumsum.loc[
        date: date+dt.timedelta(days=1)][-1] - w_h2o__20m_c_corrected_cumsum.loc[date: date+dt.timedelta(days=1)][0]
    daily_sub_20m = w_h2o__20m_c_cumsum.loc[
        date: date+dt.timedelta(days=1)][-1] - w_h2o__20m_c_cumsum.loc[date: date+dt.timedelta(days=1)][0]
    daily_diffs_10m.append(
        (daily_sub_corrected_10m - daily_sub_10m) / daily_sub_10m
    )
    daily_diffs_20m.append(
        (daily_sub_corrected_20m - daily_sub_20m) / daily_sub_20m
    )
daily_diffs_df = pd.DataFrame({
    'date' : pd.Series(w_h2o__10m_c_cumsum.index.date).unique(),
    'daily_diffs_10m' : daily_diffs_10m,
    'daily_diffs_20m' : daily_diffs_20m
})

In [None]:
w_h2o__10m_c_cumsum.loc['2022-12-18'].plot()
w_h2o__10m_c_corrected_cumsum.loc['2022-12-18'].plot()

In [None]:
daily_diffs_df.sort_values('daily_diffs_10m', ascending=False).head(20)

In [None]:
print(daily_diffs_df.dropna().daily_diffs_10m.mean())

In [None]:
print(daily_diffs_df.dropna().daily_diffs_10m.median())

In [None]:
print(daily_diffs_df.dropna().daily_diffs_10m.quantile(0))
print(daily_diffs_df.dropna().daily_diffs_10m.quantile(.25))
print(daily_diffs_df.dropna().daily_diffs_10m.quantile(.50))
print(daily_diffs_df.dropna().daily_diffs_10m.quantile(.75))
print(daily_diffs_df.dropna().daily_diffs_10m.quantile(1.0))

In [None]:
kps_vert_simple_df_2to10.loc['2023-04-11'][[
    'w','ds','vertical_advection_simple_2to10'
]].plot(subplots=True)

In [None]:
alt.Chart(daily_diffs_df).mark_bar().encode(
    alt.X('daily_diffs_10m:Q').bin(
        extent=[-25, 125], step=5
    ).axis(
        values=[-25, 0, 25, 50, 75, 100, 125]
    ).title('% change in daily sublimation w/ advection correction'),
    alt.Y('count():Q').scale(type='symlog').axis(values=[0,1,2,10,20,50,100])
)

In [None]:
alt.Chart(daily_diffs_df).mark_bar().encode(
    alt.X('daily_diffs_20m:Q').bin(
        extent=[-25, 125], step=5
    ).axis(
        values=[-25, 0, 25, 50, 75, 100, 125]
    ).title('% change in daily sublimation w/ advection correction'),
    alt.Y('count():Q').scale(type='symlog').axis(values=[0,1,2,10,20,50,100])
)

In [None]:
ec_3m = ((1800/1000)*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__3m_c)
ec_5m = ((1800/1000)*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__5m_c)
ec_10m = ((1800/1000)*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__10m_c)
ec_20m = ((1800/1000)*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__20m_c)

ec_advection_corrected_10m = (
    (1800/1000)*(
        advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__10m_c
        + (
            10*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].vertical_advection_simple_3to10
        )
    )
)
ec_advection_corrected_20m = (
    (1800/1000)*(
        advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].w_h2o__20m_c
        + (
            20*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230508'].vertical_advection_simple_3to20
        )
    )
)

cumsub_df = pd.DataFrame({
    'EC (3m)' : ec_3m,
    'EC (5m)' : ec_5m,
    'EC (10m)' : ec_10m,
    'EC (20m)' : ec_20m,
    'EC w/ advection correction (10m)' : ec_advection_corrected_10m,
    'EC w/ advection correction (20m)' : ec_advection_corrected_20m,
})
# calculate a combined 3m/10m estimate, selecting height based on blowing snow
cumsub_df['EC (3 and 10m)'] = cumsub_df.apply(
    lambda row: row['EC (10m)'] if row.name in bs_times else row['EC (3m)'],
    axis=1
)
cumsub_df = cumsub_df.cumsum()

In [None]:
domain = [
        'EC (3m)', 
        'EC (5m)', 
        'EC (10m)', 
        'EC w/ advection correction (10m)',
        'EC (20m)', 
        'EC w/ advection correction (20m)',
        'EC (3 and 10m)',
    ]
color_range = ['#1f77b4', '#ff7f0e', '#2ca02c', '#2ca02c', '#d62728', '#d62728', '#9467bd']
dash_range = [[1,0], [1,0], [1,0], [4,2], [1,0], [4,2], [1,0]]
alt.Chart(cumsub_df.reset_index()).transform_fold(
    domain
).transform_window(
    rolling_avg = 'mean(value)',
    groupby=['key'],
    frame =[-6,6]
).mark_line().encode(
    alt.X('time:T'),
    alt.Y('rolling_avg:Q').title('Cumulative sublimation (mm SWE)').scale(
        domain = [0,45]
    ),
    alt.Color('key:N').title('Sublimation estimate').scale(domain=domain, range=color_range),
    alt.StrokeDash('key:N').scale(domain=domain, range=dash_range)
).properties(width=300, height=300).display(renderer='svg')

In [None]:
alt.Chart(cumsub_df.reset_index()).transform_fold(
    domain
).mark_bar().encode(
    alt.Y('key:N').title('Sublimation estimate').sort(domain),
    alt.X('max(value):Q').title('Seasonal sublimation (mm SWE)'),
).properties(width=300, height=300).display(renderer='svg')

In [None]:
cumsub_df.max()

## Analyze case studies

### December case study

In [None]:
src = advection_1d_fluxdensity_nonnorm_df.loc['20221221 1200': '20221223 0000'][[
        'w_h2o__3m_c',
        'w_h2o__20m_c',
        'vertical_advection_simple_3to10',
        'vertical_advection_simple_3to10_uncertainty',
        'vertical_turb_flux_divergence_3to10',
        'vertical_turb_flux_divergence_3to20',
        'lateral_advection_3m',
        'ds/dt',
        'lateral_advection_3m_uncertainty',
        'vertical_advection_simple_3to20',
        'vertical_advection_simple_3to20_uncertainty',
    ]].reset_index()


src['vertical_advection_lb'] = src['vertical_advection_simple_3to10'] - src['vertical_advection_simple_3to10_uncertainty']
src['vertical_advection_ub'] = src['vertical_advection_simple_3to10'] + src['vertical_advection_simple_3to10_uncertainty']

lines_chart = alt.Chart(src).transform_fold([
    'vertical_advection_simple_3to10',
    # 'vertical_advection_simple_3to20',
    'vertical_turb_flux_divergence_3to10',
    # 'vertical_turb_flux_divergence_3to20',
    # 'ds/dt'
]).mark_line().encode(
    alt.X('time:T'). axis(format='%m/%d').title(None),
    alt.Y('value:Q').title(['Water vapor flux density', '(g/m^3/s)']).scale(
        domain = [-0.002, 0.005], clamp=True
    ),
    alt.Color('key:N', )
).properties(width=250, height = 166.66) 

vert_adv_uncert_chart = alt.Chart(src).mark_area(
    color = '#1f77b4',
    opacity=0.35
).encode(
    alt.X('time:T'),
    alt.Y('vertical_advection_lb:Q').title(''),
    alt.Y2('vertical_advection_ub:Q').title(''),
)    

bs_chart = alt.Chart(
    tidy_df[tidy_df.measurement == 'snow flux'].set_index('time').loc['20221221 1200': '20221223 0000'].reset_index()
).mark_line(color='black').encode(
    alt.X('time:T'). axis(format='%m/%d').title(None),
    alt.Y('value:Q').title(['Blowing snow flux', '(g/m^2/s)']),
    alt.StrokeDash('height:N', legend=None)
)

w_q_chart = alt.Chart(
    tidy_df[tidy_df.variable.isin(
        [
            'w_h2o__2m_c_raw', 'w_h2o__3m_c_raw', 
            'w_h2o__5m_c_raw', 
            'w_h2o__10m_c_raw', 'w_h2o__15m_c_raw','w_h2o__20m_c_raw'])
    ].set_index('time').loc['20221221 1200': '20221223 0000'].reset_index()
).transform_window(
    rolling_avg = 'mean(value)',
    frame=[-1,1],
    groupby = ['height']
).mark_line().encode(
    alt.X('time:T'). axis().title(None),
    alt.Y('rolling_avg:Q').title(["w'q' (g/m^2/s)"]),
    alt.Color('height:O').scale(scheme='turbo')
)


(
    bs_chart.properties(width=250, height = 83.33)
    &
    w_q_chart.properties(width=250, height = 83.33)
    &
    (vert_adv_uncert_chart + lines_chart)
).resolve_scale(
    x='shared', color='independent', strokeDash='independent'
).display(renderer='svg')

### February case study

In [None]:
src = advection_1d_fluxdensity_nonnorm_df.loc['20230211': '20230212'][[
        'w_h2o__3m_c',
        'w_h2o__20m_c',
        'vertical_advection_simple_3to20',
        'vertical_turb_flux_divergence_3to10',
        'lateral_advection_3m',
        'ds/dt'
    ]].reset_index()

feb_casestudy_differential_form = (
    alt.Chart(src).transform_fold([
        'vertical_advection_simple_3to20',
        'vertical_turb_flux_divergence_3to10',
        'lateral_advection_3m',
        'ds/dt'
    ]).mark_line().encode(
        alt.X('time:T'). axis(format='%m/%d').title(None),
        alt.Y('value:Q').title(['Water vapor flux density', '(g/m^2/s)']),
        alt.Color('key:N', )
    ).properties(width=400, height = 200) &
    alt.Chart(
        tidy_df[tidy_df.measurement == 'snow flux'].set_index('time').loc['20230211': '20230212'].reset_index()
    ).mark_line(color='black').encode(
        alt.X('time:T'). axis(format='%m/%d').title(None),
        alt.Y('value:Q').title(['Blowing snow flux', '(g/m^2/s)']),
        alt.StrokeDash('height:N', legend=None)
    ).properties(width=400, height = 200)
).resolve_scale(x='shared', color='independent', strokeDash='independent')

feb_casestudy_differential_form

### April/May case studies

In [None]:
def get_nice_advection_timeseries(date, date2=None, frame=[-2,2], width=300, height = 200, ydomain=[-0.001,0.001]):
    if date2 is None:
        date2 = date
    src = advection_1d_fluxdensity_nonnorm_df.loc[date: date2][[
            'w_h2o__3m_c',
            'w_h2o__20m_c',
            'vertical_advection_simple_3to10',
            'vertical_advection_simple_3to20',
            'vertical_advection_simple_3to20_uncertainty',
            'vertical_turb_flux_divergence_3to20',
            'lateral_advection_3m',
            'lateral_advection_3m_uncertainty',
            'ds/dt'
        ]].reset_index()

    src['vertical_advection_lb'] = src['vertical_advection_simple_3to20'] - src['vertical_advection_simple_3to20_uncertainty']
    src['vertical_advection_ub'] = src['vertical_advection_simple_3to20'] + src['vertical_advection_simple_3to20_uncertainty']
    src['lateral_advection_lb'] = src['lateral_advection_3m'] - src['lateral_advection_3m_uncertainty']
    src['lateral_advection_ub'] = src['lateral_advection_3m'] + src['lateral_advection_3m_uncertainty']

    flux_div_chart = alt.Chart(src).transform_fold([
        # 'vertical_advection_simple_3to10',
        'vertical_advection_simple_3to20',
        'vertical_turb_flux_divergence_3to20',
        # 'lateral_advection_3m',
        # 'ds/dt'
    ]).transform_window(
        rolling_avg = 'mean(value)',
        frame=frame,
        groupby = ['key']
    ).mark_line().encode(
        alt.X('time:T'). axis().title(None),
        alt.Y('rolling_avg:Q').title(['Water vapor flux density', '(g/m^2/s)']).scale(domain=ydomain, clamp=True),
        alt.Color('key:N')
    ).properties(width=width, height = height)

    vert_adv_errorbar = alt.Chart(src).transform_window(
        rollavg_vert_adv_lb = 'mean(vertical_advection_lb)',
        rollavg_vert_adv_ub = 'mean(vertical_advection_ub)',
        frame=frame,
        groupby = ['key']
    ).mark_area(
        opacity = 0.3,
        color = '#1f77b4'
    ).encode(
        alt.X('time:T'). axis().title(None),
        alt.Y('rollavg_vert_adv_lb:Q').title(''),
        alt.Y2('rollavg_vert_adv_ub:Q').title(''),
    )


    return  vert_adv_errorbar + flux_div_chart

In [None]:
def get_chart(date):
    flux_div_chart = get_nice_advection_timeseries(date).properties(
        width=300, height = 125
    )

    w_chart = alt.Chart(
        tidy_df[tidy_df.variable.isin(
            ['w_3m_c', 'w_5m_c', 'w_10m_c', 'w_15m_c','w_20m_c'])
        ].set_index('time').sort_index().loc[date: date].reset_index()
    ).transform_window(
        rolling_avg = 'mean(value)',
        frame=[-2,2],
        groupby = ['height']
    ).mark_line().encode(
        alt.X('time:T'). axis().title(None),
        alt.Y('rolling_avg:Q').title(['Vertical wind speed (m/s)']),
        # alt.Y('value:Q').title(['Vertical wind speed (m/s)']),
        alt.Color('height:O').scale(scheme='turbo')
    ).properties(width=300, height = 125)

    dir_chart = alt.Chart(
        tidy_df[
            tidy_df.variable == 'dir_20m_c'
        ].set_index('time').loc[date: date].reset_index()
    ).mark_line(color='black', strokeDash=[4,2]).encode(
        alt.X('time:T'). axis().title(None),
        alt.Y('value:Q').title(['Wind direction']),
    ).properties(width=300, height = 125)

    mixingratio_chart = alt.Chart(
        tidy_df.query("measurement == 'mixing ratio'").set_index('time').loc[date: date].reset_index()
    ).transform_filter(
        'hours(datum.time)%3 == 0 & hours(datum.time)%6 != 0'
    ).transform_calculate(
        value_g_per_kg = '1000 * datum.value'
    ).mark_line().encode(
        alt.X('mean(value_g_per_kg):Q').sort('-y').title('s (g/kg)').scale(zero=True),
        alt.Y('height:Q'),
        alt.Facet('hours(time):O', spacing=5),
    ).properties(width=70, height = 70)

    # wspd_chart = alt.Chart(
    #     tidy_df.query("measurement == 'wind speed'").query("tower == 'c'").set_index('time').loc[date: date].reset_index()
    # ).transform_filter(
    #     # 'hours(datum.time)%3 == 0 & hours(datum.time)%6 != 0'
    #     'hours(datum.time)%1 == 0'
    # ).mark_point().encode(
    #     alt.X('mean(value):Q').sort('-y'),
    #     alt.Y('height:Q'),
    #     alt.Facet('hours(time):O', spacing=5),
    # ).properties(width=70, height = 70)

    lhflux_chart = alt.Chart(
        tidy_df[tidy_df.variable.isin(
            ['w_h2o__3m_c_raw', 
             'w_h2o__5m_c_raw', 
             'w_h2o__10m_c_raw', 'w_h2o__15m_c_raw','w_h2o__20m_c_raw'])
        ].set_index('time').loc[date: date].reset_index()
    ).transform_window(
        rolling_avg = 'mean(value)',
        frame=[-1,1],
        groupby = ['height']
    ).mark_line().encode(
        alt.X('time:T'). axis().title(None),
        alt.Y('rolling_avg:Q').title(["w'q' (g/m^2/s)"]),
        alt.Color('height:O').scale(scheme='turbo')
    ).properties(width=300, height = 125)

    ri_chart = alt.Chart(
        tidy_df[tidy_df.variable.isin(
            ['Ri_3m_c', 'Ri_20m_c'])
        ].set_index('time').loc[date: date].reset_index()
    ).transform_window(
        rolling_avg = 'median(value)',
        frame=[-1,1],
        groupby = ['height']
    ).mark_line().encode(
        alt.X('time:T'). axis().title(None),
        alt.Y('rolling_avg:Q').title(['Ri']).scale(domain=[-1,2], clamp=True),
        alt.Color('height:O').scale(scheme='turbo')
    ).properties(width=300, height = 125)

    T_chart = alt.Chart(
        tidy_df[tidy_df.variable.isin(
            ['Tsurf_c', 'T_3m_c', 'T_20m_c'])
        ].set_index('time').loc[date: date].reset_index()
    ).transform_window(
        rolling_avg = 'median(value)',
        frame=[-1,1],
        groupby = ['height']
    ).mark_line().encode(
        alt.X('time:T'). axis().title(None),
        alt.Y('rolling_avg:Q').title(['T (˚C)']),
        alt.Color('height:O').scale(scheme='turbo')
    ).properties(width=300, height = 125)

    wspd_chart = alt.Chart(
        tidy_df[tidy_df.variable.isin(
            ['spd_3m_c', 'spd_20m_c'])
        ].set_index('time').loc[date: date].reset_index()
    ).transform_window(
        rolling_avg = 'median(value)',
        frame=[-1,1],
        groupby = ['height']
    ).mark_line().encode(
        alt.X('time:T'). axis().title(None),
        alt.Y('rolling_avg:Q').title(['Wind speed (m/s)']),
        alt.Color('height:O').scale(scheme='turbo')
    ).properties(width=300, height = 125)

    return (
        lhflux_chart 
        & (w_chart + dir_chart).resolve_scale(y='independent') 
        & flux_div_chart 
        # & ri_chart & mixingratio_chart 
        # & wspd_chart 
        # & T_chart
    ).resolve_scale(color='independent')

In [None]:
get_chart('20230505')

In [None]:
(get_chart('20230415') | get_chart('20230417') ).display(renderer='svg')

In [None]:
(get_chart('20230415') | get_chart('20230607') ).display(renderer='svg')

In [None]:
(get_chart('20230415') | get_chart('20230418') ).display(renderer='svg')

In [None]:
rule = alt.Chart().transform_calculate(y='0').mark_rule().encode(y='y:Q')
local_downvalley_wind_times = tidy_df[
    tidy_df.variable == 'dir_3m_c'
].dropna().query("value < 360").query("value > 252").drop_duplicates().time
# src = src[src.time.isin(local_downvalley_wind_times)]
vars = [
        'w_2m_c', 'w_3m_c', 'w_5m_c', 'w_10m_c', 'w_15m_c', 'w_20m_c',
        'w_3m_d', 'w_10m_d',
        'w_3m_ue', 'w_10m_ue',
        'w_3m_uw', 'w_10m_uw',
    ]
(alt.Chart(
    tidy_df[tidy_df.variable.isin(
        ['dir_3m_c'] + vars
    )].pivot(index='time', columns='variable', values='value')
).transform_fold(vars).mark_boxplot(outliers=False).encode(
    alt.X('dir_3m_c:Q').bin(maxbins=20),
    alt.Y('value:Q').scale(domain=[-0.5, 0.5], clamp=True).title('Vertical wind speed (m/s)'),
    alt.Facet('key:O', columns = 3).sort(vars)
)).properties(height = 200, width=300)

In [None]:
rule = alt.Chart().transform_calculate(y='0').mark_rule().encode(y='y:Q')
local_downvalley_wind_times = tidy_df[
    tidy_df.variable == 'dir_3m_c'
].dropna().query("value < 360").query("value > 252").drop_duplicates().time
# src = src[src.time.isin(local_downvalley_wind_times)]
vars = [
        'w_10m_c',
        'w_10m_d',
         'w_10m_ue',
         'w_10m_uw',
    ]
(alt.Chart(
    tidy_df[tidy_df.variable.isin(
        ['dir_3m_c'] + vars
    )].pivot(index='time', columns='variable', values='value')
).transform_fold(vars).mark_boxplot(outliers=False).encode(
    alt.X('dir_3m_c:Q').bin(maxbins=20),
    alt.Y('value:Q').scale(domain=[-0.5, 0.5], clamp=True).title('Vertical wind speed (m/s)'),
    alt.Facet('key:O', columns = 2).sort(vars)
)).properties(height = 200, width=300)

In [None]:
raw = (
    (1800/1000)*(advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].w_h2o__10m_c)
)
corrected_10m = (
    (1800/1000)*(
        advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].w_h2o__10m_c
        + (
            10*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].vertical_advection_simple_3to10
        )
    )
)
corrected_20m = (
    (1800/1000)*(
        advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].w_h2o__10m_c
        + (
            20*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].vertical_advection_simple_3to20
        )
    )
)
raw.loc['20230415': '20230415'].cumsum().plot(label = 'raw')
corrected_10m.loc['20230415': '20230415'].cumsum().plot(label = 'corrected (10m)')
corrected_20m.loc['20230415': '20230415'].cumsum().plot(label = 'corrected (20m)')
plt.legend()

In [None]:
raw = (
    (1800/1000)*(advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].w_h2o__10m_c)
)
corrected_10m = (
    (1800/1000)*(
        advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].w_h2o__10m_c
        + (
            10*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].vertical_advection_simple_3to10
        )
    )
)
corrected_20m = (
    (1800/1000)*(
        advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].w_h2o__10m_c
        + (
            20*advection_1d_fluxdensity_nonnorm_df.loc['20221130': '20230619'].vertical_advection_simple_3to20
        )
    )
)
raw.loc['20230607': '20230607'].cumsum().plot(label = 'raw')
corrected_10m.loc['20230607': '20230607'].cumsum().plot(label = 'corrected (10m)')
corrected_20m.loc['20230607': '20230607'].cumsum().plot(label = 'corrected (20m)')
plt.legend()

In [None]:
measurements = [
    'w_h2o_',   'u_h2o_',   'v_h2o_', 
    'w_tc_',    'u_tc_',    'v_tc_',   
    'u_w_', 'v_w_', 
    'turbulent kinetic energy',
    'mixing ratio',
    'potential temperature',
    'w',
]
heights = [3, 5, 10, 15, 20]
date = '20230417'
src = tidy_df[tidy_df.measurement.isin(measurements)]
src = src[ ~ src.variable.str.contains('predicted')]
src = src.set_index('time').loc[date: date].reset_index()
src = src.query("tower == 'c'")
src = src[src.height.isin(heights)]

alt.Chart(src).mark_line().encode(
    alt.X('time:T'). axis().title(None),
    alt.Y('value:Q'),
    alt.Color('height:O').scale(scheme='turbo'),
    alt.Facet("measurement:N", columns = 3).sort(
        measurements
    )
).properties(width=300, height = 125).resolve_scale(
    y='independent'
)

In [None]:
kps_mixingratio_vars = [
    'Tsurfmixingratio_c',
    'mixingratio_1m_c',
    'mixingratio_2m_c',
    'mixingratio_3m_c',
    'mixingratio_4m_c',
    'mixingratio_5m_c',
    'mixingratio_6m_c',
    'mixingratio_7m_c',
    'mixingratio_8m_c',
    'mixingratio_9m_c'
    'mixingratio_10m_c',
    'mixingratio_11m_c',
    'mixingratio_12m_c',
    'mixingratio_13m_c',
    'mixingratio_14m_c',
    'mixingratio_15m_c',
    'mixingratio_16m_c',
    'mixingratio_17m_c',
    'mixingratio_18m_c',
    'mixingratio_19m_c',
    'mixingratio_20m_c',
]
s_compare_df = tidy_df[tidy_df.variable.isin(kps_mixingratio_vars)].set_index('time').loc['20230505':'20230505'][['value', 'height']]
s_compare_df['site'] = 'kps'
s_compare_df_annex = s_annex_df.sort_index().loc['20230505':'20230505'].rename(columns={'mixing_ratio_annex': 'value'})
s_compare_df_annex = s_compare_df_annex.resample('30min').mean()
s_compare_df_annex['height'] = 2.8
s_compare_df_annex['site'] = 'annex'
s_compare_df = pd.concat([s_compare_df.reset_index(), s_compare_df_annex.reset_index()])
s_compare_df['value'] = s_compare_df['value']*1000
alt.Chart(s_compare_df).mark_line().encode(
    alt.X('time:T'),
    alt.Y('value:Q').scale(zero=False).title('mixing ratio (g/kg)'),
    alt.Color('height:N').scale(
        domain = [2.8, 2, 3, 4],
        range = ['black', '#1f77b4', '#ff7f0e', '#2ca02c']
    )
).properties(width=400, height=200)

In [None]:
(alt.Chart(
    tidy_df.query("measurement == 'wind speed'").set_index('time').loc['20230415'].reset_index()
).mark_line().encode(
    alt.X('time:T'),
    alt.Y('value:Q'),
    alt.Color('height:O').scale(scheme='turbo'),
    alt.StrokeDash('tower:N')
) & alt.Chart(
    tidy_df.query("measurement == 'temperature'").set_index('time').loc['20230415'].reset_index()
).mark_line().encode(
    alt.X('time:T'),
    alt.Y('value:Q'),
    alt.Color('height:O').scale(scheme='turbo'),
    alt.StrokeDash('tower:N')
)).resolve_scale(color='independent', strokeDash='independent')

In [None]:
(alt.Chart(
    tidy_df.query("measurement == 'wind speed'").set_index('time').loc['20230417'].reset_index()
).mark_line().encode(
    alt.X('time:T'),
    alt.Y('value:Q'),
    alt.Color('height:O').scale(scheme='turbo'),
    alt.StrokeDash('tower:N')
) & alt.Chart(
    tidy_df.query("measurement == 'temperature'").set_index('time').loc['20230417'].reset_index()
).mark_line().encode(
    alt.X('time:T'),
    alt.Y('value:Q'),
    alt.Color('height:O').scale(scheme='turbo'),
    alt.StrokeDash('tower:N')
)).resolve_scale(color='independent', strokeDash='independent')

In [None]:
s_compare_df['site_and_height'] = s_compare_df['site'] + '_' + s_compare_df['height'].astype('str')
s_compare_df = s_compare_df.pivot(index='time', values='value', columns='site_and_height')
(s_compare_df['kps_3.0'] - s_compare_df['annex_2.8']).plot()

In [None]:
(s_compare_df['kps_3.0'] - s_compare_df['annex_2.8']).mean(), (s_compare_df['kps_3.0'] - s_compare_df['annex_2.8']).max(), (s_compare_df['kps_3.0'] - s_compare_df['annex_2.8']).min()

In [None]:
(s_compare_df['kps_20.0'] - s_compare_df['kps_3.0']).plot()

In [None]:
(s_compare_df['kps_20.0'] - s_compare_df['kps_3.0']).mean(),(s_compare_df['kps_20.0'] - s_compare_df['kps_3.0']).max(),(s_compare_df['kps_20.0'] - s_compare_df['kps_3.0']).min(),

In [None]:
src = s_compare_df.query("height <= 20").query("height > 0")
alt.Chart(src).transform_filter(
    'hours(datum.time) % 3 == 0 & hours(datum.time) >= 6 & hours(datum.time) <= 18'
).mark_line(point=True).encode(
    alt.X('mean(value):Q').scale(zero=False).title(['mixing ratio', '(g/kg)']).axis(values=[2,3,4]),
    alt.Y('height').title('height (m)'),
    alt.Color('site:N', sort='descending').scale(
        domain = ['kps', 'annex'],
        range = ['#ff7f0e', 'black', ],
    ),
    alt.Order('height'),
    alt.Facet('hours(time):T', columns=5, spacing=2).header(format='%H:%M')
).properties(width=75, height=100).resolve_scale(x='shared').display(renderer='svg')

In [None]:
alt.Chart(
    tidy_df[tidy_df.variable == 'spd_3m_c'].set_index('time').loc['20230505':'20230505'].reset_index()
).mark_line().encode(
    alt.X('time:T'),
    alt.Y('value:Q').title('Horizontal wind speed (m/s)')
).properties(width=400, height=200)

In [None]:
alt.Chart(
    tidy_df[tidy_df.measurement == 'w_h2o_'].query("tower == 'c'").set_index('time').loc['20230505 1000 ': '20230505 1700'].reset_index()
).mark_line().encode(
    alt.X('mean(value):Q').sort('-y'),
    alt.Y('height:Q'),
    alt.Color('minutes(time):N'),
    alt.Facet('hours(time)', columns=8)
).properties(height = 100, width= 100) &\
alt.Chart(
    tidy_df[tidy_df.measurement == 'w'].query("tower == 'c'").set_index('time').loc['20230505 1000 ': '20230505 1700'].reset_index()
).mark_line().encode(
    alt.X('mean(value):Q').sort('-y'),
    alt.Y('height:Q'),
    alt.Color('minutes(time):N'),
    alt.Facet('hours(time)', columns=8)
).properties(height = 100, width= 100) &\
alt.Chart(
    tidy_df[tidy_df.measurement == 'mixing ratio'].query("tower == 'c'").set_index('time').loc['20230505 1000 ': '20230505 1700'].reset_index()
).mark_line().encode(
    alt.X('mean(value):Q').sort('-y'),
    alt.Y('height:Q'),
    alt.Color('minutes(time):N'),
    alt.Facet('hours(time)', columns=8)
).properties(height = 100, width= 100)

In [None]:

src = tidy_df[tidy_df.measurement == 'wind direction'][tidy_df.height.isin([3,10,20])]
src = src.set_index('time').loc['20230501':'20230508'].reset_index()
alt.Chart(src).mark_line().encode(
    alt.X('time:T'),
    alt.Y('value:Q'),
    alt.StrokeDash('height:O')
).properties(width=1000)

In [None]:
src = advection_1d_fluxdensity_nonnorm_df.loc['20230415': '20230420'][[
        'w_h2o__3m_c',
        'w_h2o__20m_c',
        'vertical_advection_simple_3to20',
        'vertical_turb_flux_divergence_3to20',
        'lateral_advection_3m',
        'ds/dt'
    ]].reset_index()

april_casestudy_differential_form = (
    alt.Chart(src).transform_fold([
        'vertical_advection_simple_3to20',
        'vertical_turb_flux_divergence_3to20',
        'lateral_advection_3m',
        'ds/dt'
    ]).transform_window(
        rolling_avg = 'mean(value)',
        frame=[-2,2],
        groupby = ['key']
    ).mark_line().encode(
        alt.X('time:T'). axis(format='%m/%d').title(None),
        alt.Y('rolling_avg:Q').title(['Water vapor flux density', '(g/m^2/s)']),
        alt.Color('key:N')
    ).properties(width=400, height = 200) &
    alt.Chart(
        tidy_df[tidy_df.measurement == 'snow flux'].set_index('time').loc['20230415': '20230420'].reset_index()
    ).mark_line(color='black').encode(
        alt.X('time:T'). axis(format='%m/%d').title(None),
        alt.Y('value:Q').title(['Blowing snow flux', '(g/m^2/s)']),
        alt.StrokeDash('height:N', )
    ).properties(width=400, height = 200)
).resolve_scale(x='shared', color='independent', strokeDash='independent')

april_casestudy_differential_form

### Mid-May Case Study, lateral advection due to source heterogeneity

In [None]:
src = advection_1d_fluxdensity_nonnorm_df.loc['20230510': '20230520'][[
        'w_h2o__3m_c',
        'w_h2o__20m_c',
        'vertical_advection_simple_3to20',
        'vertical_turb_flux_divergence_3to20',
        'lateral_advection_3m',
        'ds/dt'
    ]].reset_index()

may_casestudy_differential_form = (
    alt.Chart(src).transform_fold([
        'vertical_advection_simple_3to20',
        'vertical_turb_flux_divergence_3to20',
        'lateral_advection_3m',
        'ds/dt'
    ]).transform_window(
        rolling_avg = 'mean(value)',
        frame=[-2,2],
        groupby = ['key']
    ).mark_line().encode(
        alt.X('time:T'). axis(format='%m/%d').title(None),
        alt.Y('rolling_avg:Q').title(['Water vapor flux density', '(g/m^2/s)']),
        alt.Color('key:N')
    ).properties(width=400, height = 200) &
    alt.Chart(
        tidy_df[tidy_df.measurement == 'snow flux'].set_index('time').loc['20230510': '20230520'].reset_index()
    ).mark_line(color='black').encode(
        alt.X('time:T'). axis(format='%m/%d').title(None),
        alt.Y('value:Q').title(['Blowing snow flux', '(g/m^2/s)']),
        alt.StrokeDash('height:N', )
    ).properties(width=400, height = 200)
).resolve_scale(x='shared', color='independent', strokeDash='independent')

may_casestudy_differential_form

# Data exploration

## Compare IRGA and Hygrometer measurements

In [None]:
comparison_df = pd.concat([
    (1000*tidy_df[tidy_df.measurement=='specific humidity'].set_index(['time','height','tower', 'measurement'])),
    tidy_df[tidy_df.measurement=='air density'].set_index(['time','height','tower', 'measurement']),
    tidy_df[tidy_df.measurement=='Water vapor density'].set_index(['time','height','tower', 'measurement'])
])[['value']]
comparison_df = comparison_df.reset_index().pivot_table(values='value', columns='measurement', index=['time','height','tower'])
comparison_df ['Water vapor density (hygr)'] = comparison_df['air density'] * comparison_df['specific humidity']
comparison_df = comparison_df.rename(columns={'Water vapor density': 'Water vapor density (irga)'})
comparison_df

In [None]:


src = comparison_df.loc[:,3,'c'].reset_index()
# max_val = math.ceil(
#     src[['Water vapor density (hygr)', 'Water vapor density (irga)']].dropna().values.max()
# )
max_val = 7
line = pd.DataFrame({
    'Goals Conceded': [0, max_val],
    'Goals': [0, max_val],
})

arr = np.array([0, max_val] * (len(src) // 2))
if len(arr) < len(src):
    src['one'] = list(arr) + [0]
    src['onetoone'] = list(arr) + [0]
else:
    src['one'] = list(arr)
    src['onetoone'] = list(arr)

line_plot = alt.Chart().mark_line(color= 'grey').encode(
    alt.X('one', title=''),
    alt.Y('onetoone', title='')
)
scatter_plot = alt.Chart().mark_rect().encode(
    alt.X('Water vapor density (hygr):Q').scale(domain = [0, max_val], clamp=True).bin(maxbins=75).axis(values=[0,1,2,3,4,5,6,7]).title('Water vapor density (hygr)'),
    alt.Y('Water vapor density (irga):Q').scale(domain = [0, max_val], clamp=True).bin(maxbins=75).axis(values=[0,1,2,3,4,5,6,7]).title('Water vapor density (irga)'),
    alt.Color('count()'),
).properties(width=200, height=200)

src['diff'] = (src.set_index('time')['Water vapor density (irga)'] - src.set_index('time')['Water vapor density (hygr)']).values


meanerror_by_month = round(src.groupby(src.time.dt.month).mean().drop(columns='time')['diff'], 3)
r2_by_month = round(src.groupby(src.time.dt.month)[[
    'Water vapor density (hygr)', 'Water vapor density (irga)'
]].apply(
    lambda df: r2_score(df.dropna()['Water vapor density (hygr)'], df.dropna()['Water vapor density (irga)'])
), 3)

def r2_plot(month, title):
    return alt.layer(
        scatter_plot,
        line_plot,
        data=src[(src.time.dt.month == month)]
    ).properties(title=f'{title} (ME: {meanerror_by_month.loc[month]}, R2: {r2_by_month.loc[month]})')

(
    r2_plot(11, 'Nov.') | r2_plot(12, 'Dec.') | r2_plot(1, 'Jan.') | r2_plot(2, 'Feb.') | r2_plot(3, 'Mar.') | r2_plot(4, 'Apr.') | r2_plot(5, 'May') 
) & (
    alt.layer(
        alt.Chart().mark_rule().transform_calculate(
            y = '0'
        ).mark_rule().encode(alt.Y('y:Q')),
        alt.Chart().mark_line().encode(
            alt.X('hoursminutes(time):T'),
            alt.Y('mean(diff):Q').title('Mean difference (irga - hygr) (g/m^3)'),
        ),
        data = src[(src.time.dt.month != 10)]
    ).properties(width=230, height=230).facet(
        column=alt.Column('month(time):T', sort=[11,12,1,2,3,4,5,6])
    )  
)

# DL vertical staring BL turbulence

In [None]:
import act.discovery, act.io
# Inputs
username = os.getenv("ARM_USERNAME")
token = os.getenv("ARM_TOKEN")
DATE_FORMAT_STR = '%Y-%m-%d'
start_date = "20230505"
end_date = "20230506"
DLW_DATA_STREAM = 'gucdlprofwstats4newsM1.c1'
DLW_DATA_STREAM_FILEEXT = '.cdf'
DLW_OUTPUT_DIR = os.path.join("/Users/elischwat/Development/data/sublimationofsnow/", DLW_DATA_STREAM)
file_list = act.discovery.download_arm_data(
            username, token, DLW_DATA_STREAM, start_date, end_date, output = DLW_OUTPUT_DIR)

In [None]:
wstats_df = act.io.read_arm_netcdf(file_list).to_dataframe().reset_index()
wstats_df = utils.modify_df_timezone(wstats_df, 'UTC', 'US/Mountain')
wstats_df = wstats_df[(wstats_df.time > '20230505') & (wstats_df.time < '20230506')]
wstats_df['time_low'] = wstats_df['time'] - dt.timedelta(minutes=5)
wstats_df['time_high'] = wstats_df['time'] + dt.timedelta(minutes=5)
wstats_df['height_low'] = wstats_df['height'] - 15
wstats_df['height_high'] = wstats_df['height'] + 15
wstats_df = wstats_df.rename(columns={'w_variance': 'value'})

In [None]:
alt.Chart(
    wstats_df.set_index('time').query("height < 1000").reset_index()
).mark_bar().encode(
    alt.X('w:Q').title(["w", "(m/s)"]).bin(step=0.25).axis(values=[-4,-2,-1,0,1,2,4]),
    alt.Y('count():Q')
).properties(
    width=200, height = 200,
) | alt.Chart(
    wstats_df.set_index('time').query("height < 1000").reset_index()
).mark_rect().encode(
    alt.X('time_low:T').title('time'),
    alt.X2('time_high:T'),
    alt.Y('height_low:Q').title('Height (m)'),
    alt.Y2('height_high:Q'),
    alt.Color('w:Q').title(["w", "(m/s)"]).scale(scheme='purpleorange', domain=[-1,1],),
    tooltip='value'
).properties(
    width=500, height = 200,
    title = 'Vertical velocity from Doppler Lidar at Gothic'
)

In [None]:
src = wstats_df.set_index('time').query("height < 1000")[['w', 'height']]
src = src.groupby([pd.Grouper(freq='30min'), 'height']).mean().reset_index()
src['time_low'] = src['time'] - dt.timedelta(minutes=15)
src['time_high'] = src['time'] + dt.timedelta(minutes=15)
src['height_low'] = src['height'] - 15
src['height_high'] = src['height'] + 15

alt.Chart(
    src
).mark_rect().encode(
    alt.X('time_low:T').title('time'),
    alt.X2('time_high:T'),
    alt.Y('height_low:Q').title('Height (m)'),
    alt.Y2('height_high:Q'),
    alt.Color('w:Q').title(["w", "(m/s)"]).scale(scheme='purpleorange', domain=[-1,1],),
).properties(
    width=500, height = 200,
    title = 'Vertical velocity from Doppler Lidar at Gothic'
) &\
alt.Chart(
    wstats_df.set_index('time').query("height < 500")[['w', 'height']].reset_index()
).mark_line().encode(
    alt.X('time:T'),
    alt.Y('mean(w):Q')
).properties(width=500, height = 200)

In [None]:
alt.Chart(wstats_df).mark_bar().encode(
    alt.X('value:Q').title(["w'w'", "(m² s⁻²)"]).bin(step=2),
    alt.Y('count():Q')
).properties(
    width=200, height = 200,
) | alt.Chart(
    wstats_df.set_index('time').query("height < 2000").reset_index()
).mark_rect().encode(
    alt.X('time_low:T').title('time'),
    alt.X2('time_high:T'),
    alt.Y('height_low:Q').title('Height (m)'),
    alt.Y2('height_high:Q'),
    alt.Color('value:Q').title(["w'w'", "(m² s⁻²)"]).scale(scheme='turbo'),
    tooltip='value'
).properties(
    width=500, height = 200,
    title = 'Vertical velocity variance from Doppler Lidar at Gothic'
)

In [None]:
src = tidy_df[
        (tidy_df.time > '20230505') & (tidy_df.time < '20230506')
    ].query("measurement == 'w_w_'").query("tower == 'c'")
src['time_low'] = src['time'] - dt.timedelta(minutes=15)
src['time_high'] = src['time'] + dt.timedelta(minutes=15)
src['height_low'] = src['height'].apply(lambda h: {
    1: 0,
    2: 1,
    3: 2,
    5: 3,
    10: 6.25,
    15: 10.25,
    20: 14.5,
}.get(h))
src['height_high'] = src['height'].apply(lambda h: {
    1: 1,
    2: 2,
    3: 3,
    5: 6.25,
    10: 10.25,
    15: 14.5,
    20: 20,
}.get(h))

alt.Chart(src).mark_bar().encode(
    alt.X('value:Q').title(["w'w' (m² s⁻²)"]).bin(),
    alt.Y('count():Q')
).properties(
    width=200, height = 200,
) | alt.Chart(
    src
).mark_rect().encode(
    alt.X('time_low:T').title('time'),
    alt.X2('time_high:T'),
    alt.Y('height_low:Q').title('Height (m)'),
    alt.Y2('height_high:Q'),
    alt.Color('value:Q').title(["w'w'", "(m² s⁻²)"]).scale(scheme='turbo'),
    tooltip='value'
).properties(
    width=500, height = 200,
    title = "w'w' from Tower c"
)

In [None]:
upper_chart = alt.Chart(
        wstats_df.set_index('time').query("height < 2000").query("height >= 20").reset_index()
    ).mark_rect().encode(
        alt.X('time_low:T').title('time').axis(None),
        alt.X2('time_high:T'),
        alt.Y('height_low:Q').title('Height (m)').scale(domain = [20, 2000]),
        alt.Y2('height_high:Q'),
        alt.Color('value:Q').title(["w'w'", "(m² s⁻²)"]).scale(scheme='turbo'),
        tooltip='value'
    ).properties(
        width=500, height = 200,
        title = 'Vertical velocity variance from Doppler Lidar at Gothic'
    )
lower_chart = alt.Chart(
        src
    ).mark_rect().encode(
        alt.X('time_low:T').title('time'),
        alt.X2('time_high:T'),
        alt.Y('height_low:Q').title('Height (m)'),
        alt.Y2('height_high:Q'),
        alt.Color('value:Q').title(["w'w'", "(m² s⁻²)"]).scale(domain=[0,8], scheme='turbo'),
        tooltip='value'
    ).properties(
        width=500, height = 100,
    )
alt.vconcat(
    upper_chart,
    lower_chart,
    spacing = -2
).resolve_scale(x='shared', color='shared')

In [None]:
alt.Chart(src.loc['20230505']).mark_circle().encode(
    alt.X('value:Q'),
    alt.Y('height:Q')
).properties(width=250,height=150)