In [1]:
# netcdf/numpy/xray/stats
import xarray as xr

# plotting packages
import seaborn as sns
from matplotlib.pyplot import subplots
import matplotlib.pyplot as plt
#from mpl_toolkits.basemap import Basemap

# turbulence package
import turbpy
import turbpy.multiConst as mc

from sublimpy import tidy

from metpy.units import units
import metpy
import pint_xarray
import altair as alt
alt.data_transformers.enable('json')
import pytz

from sklearn.metrics import r2_score
import pandas as pd
import numpy as np
import datetime as dt

# Inputs

In [2]:
start_date = '20221130'
end_date = '20230509'

In [3]:
try:
    tidy_df_30Min = pd.read_parquet(f"../sos/tidy_df_30Min_{start_date}_{end_date}_noplanar_fit.parquet")
except FileNotFoundError:
    print("No file such file exists for these dates.")
tidy_df_30Min['time'] = pd.to_datetime(tidy_df_30Min['time'])

In [4]:
obukhov_src = tidy_df_30Min.query("measurement == 'Obukhov length'")
for tower in obukhov_src['tower'].unique():
    print(tower)
    for height in obukhov_src['height'].unique():
        print(height)
        src = obukhov_src.query(
                f"tower == '{tower}'"
            ).query(
                f"height == {height}"
            )
        tidy_df_30Min = tidy.tidy_df_add_variable(
            tidy_df_30Min,
            (src['height'] / src['value']).values,
            f"stability_function_{int(height)}m_{tower}",
            "stability function",
            height,
            tower
        )

c
10.0
15.0
20.0
2.0
3.0
5.0


In [5]:
disdro_daily_max_precip_date_df = pd.read_csv("../sos/disdro_daily_max_precip_date_df.csv")

# Calculate turbulence statistics

In [6]:
tke_vs_wind_speed_src = tidy_df_30Min[
    tidy_df_30Min.measurement.isin(['wind speed', 'turbulent kinetic energy', 'richardson number', 'richardson number bulk'])
].pivot_table(
    index=['time', 'tower', 'height'], 
    values='value', 
    columns='measurement'
).reset_index()

In [7]:
tke_vs_wind_speed_src['Velocity scale'] = tke_vs_wind_speed_src['turbulent kinetic energy']**0.5
tke_vs_wind_speed_src['wind speed group'] = pd.cut(tke_vs_wind_speed_src['wind speed'], np.linspace(0, 12, 25, []))
tke_vs_wind_speed_src['wind speed group'] = tke_vs_wind_speed_src['wind speed group'].apply(lambda x: 0.5*(x.left + x.right))
tke_vs_wind_speed_src = tke_vs_wind_speed_src.query("tower == 'c'")

In [8]:
tke_vs_wind_speed_src

measurement,time,tower,height,richardson number,richardson number bulk,turbulent kinetic energy,wind speed,Velocity scale,wind speed group
0,2022-11-29 17:00:00,c,2.0,0.064688,0.508533,0.957351,3.138293,0.978443,3.12
1,2022-11-29 17:00:00,c,3.0,0.119395,0.690701,0.997178,3.323426,0.998588,3.12
2,2022-11-29 17:00:00,c,5.0,0.291124,1.005739,1.109119,3.622556,1.053147,3.60
3,2022-11-29 17:00:00,c,10.0,3.491341,1.933923,1.266228,3.750382,1.125268,3.60
4,2022-11-29 17:00:00,c,15.0,8.386020,2.686861,1.242489,3.916531,1.114670,4.08
...,...,...,...,...,...,...,...,...,...
111003,2023-05-09 17:30:00,c,3.0,0.114669,0.523551,0.469139,3.240431,0.684938,3.12
111004,2023-05-09 17:30:00,c,5.0,0.339357,0.783323,0.559886,3.576861,0.748255,3.60
111005,2023-05-09 17:30:00,c,10.0,7.829833,1.511319,0.642527,3.758235,0.801578,3.60
111006,2023-05-09 17:30:00,c,15.0,8.495489,2.135995,0.731780,3.885932,0.855442,4.08


# Season long turbulence

## Plot: stability function distributions

In [9]:
alt.Chart(tidy_df_30Min.query("measurement == 'stability function'")).mark_boxplot(outliers=False).encode(
    alt.X("height:O"),
    alt.Y("value:Q").title('stability function')
).properties(width = 200, height = 200, title='Without outliers') | \
alt.Chart(tidy_df_30Min.query("measurement == 'stability function'")).mark_boxplot(outliers={'size': 1}).encode(
    alt.X("height:O"),
    alt.Y("value:Q").title('stability function').scale(domain=[-100,100], clamp=True)
).properties(width = 200, height = 200, title='With outliers')

In [10]:
alt.Chart(tidy_df_30Min.query("measurement == 'stability function'")).mark_line().encode(
    alt.X("time:T"),
    alt.Y("value:Q").scale(domain=[-100, 100], clamp=True),
    alt.Color("height:O").scale(scheme='viridis')
).properties(width = 800)

## Plot: turbulence statistics for the winter season

In [11]:
alt.Chart(
    tke_vs_wind_speed_src
).mark_bar().encode(
    alt.X(
        "wind speed:Q", 
        bin=alt.Bin(maxbins=50), 
    ),
    alt.Y("count():Q")
) | \
alt.Chart(
    tke_vs_wind_speed_src
).transform_filter(
    alt.FieldGTEPredicate('richardson number', -2)
).transform_filter(
    alt.FieldLTEPredicate('richardson number', 10)
).mark_bar().encode(
    alt.X("richardson number:Q", bin=alt.Bin(maxbins=50)),
    # alt.X("richardson number:Q", bin=alt.Bin(maxbins=50), scale=alt.Scale(domain=[-1,3], clamp=True), axis=alt.Axis(values=[-1, 0, 0.25, 1, 3], format='.02f')),
    alt.Y("count():Q")
)

In [12]:
bars = alt.Chart(tke_vs_wind_speed_src).mark_errorbar(extent='stdev').encode(
    alt.X("wind speed group:Q").title("Wind speed (m/s) (binned)"),
    alt.Y("Velocity scale:Q")
).properties(width=150, height = 100)

points = alt.Chart(tke_vs_wind_speed_src).mark_circle(color='black').encode(
    alt.X("wind speed group:Q"),
    alt.Y("mean(Velocity scale):Q")
)

(bars + points).facet('height:O', columns=3)

In [13]:
alt.Chart(tke_vs_wind_speed_src).mark_line().encode(
    alt.X("wind speed group:Q"),
    alt.Y("mean(Velocity scale):Q"),
    alt.Color("height:O")
)

In [14]:
src = tke_vs_wind_speed_src.copy()
src = src[src.height.isin([2,10, 20])]
all_data_scaling_chart = alt.Chart(
    src
).mark_circle(opacity=0.15, size=5).encode(
    alt.X("wind speed:Q"),
    alt.Y("Velocity scale:Q").title('turbulence intensity')
).properties(width=150, height=100).facet(row='height:O')
all_data_scaling_chart

In [15]:
tke_vs_wind_speed_src['threshold'] = tke_vs_wind_speed_src['wind speed'] > 3

In [16]:
alt.Chart(
    tke_vs_wind_speed_src[tke_vs_wind_speed_src['richardson number'] > 0]
).transform_filter(
    alt.FieldLTPredicate("richardson number", 10)
).mark_circle(opacity=0.15, size=5).encode(
    alt.X("richardson number:Q", scale=alt.Scale(type='log', domain = [1e-3, 1e1], clamp=True)),
    alt.Y("Velocity scale:Q"),
    alt.Color("threshold:N")
).properties(width=175, height=175).facet('height:O')

## Plot: Ri vs Velocity Scale plots for case studies

In [17]:
src = tke_vs_wind_speed_src.copy().set_index('time').loc['2023-02-19':'2023-02-25']
src = src[src['wind speed'] < 10]
src = src[src.height.isin([2,10, 20])]
all_data_scaling_chart = alt.Chart(
    src
).transform_filter(
    alt.FieldLTPredicate("wind speed", 10)
).mark_circle(opacity=0.25, size=8).encode(
    alt.X("wind speed:Q"),
    alt.Y("Velocity scale:Q", title='turbulence intensity').scale(
        domain=[0,4], clamp=True
    )
).properties(width=150, height=100)
all_data_scaling_chart

In [18]:
src = tke_vs_wind_speed_src.copy().set_index('time').loc['2023-03-16':'2023-03-20']
src = src[src['wind speed'] < 10]
src = src[src.height.isin([2,10, 20])]
all_data_scaling_chart = alt.Chart(
    src
).transform_filter(
    alt.FieldLTPredicate("wind speed", 10)
).mark_circle(opacity=0.25, size=8).encode(
    alt.X("wind speed:Q"),
    alt.Y("Velocity scale:Q", scale=alt.Scale(domain=[0,4], clamp=True), title='turbulence intensity')
).properties(width=150, height=100)
all_data_scaling_chart



In [19]:
src = tke_vs_wind_speed_src.copy().set_index('time').loc['2023-01-31':'2023-02-05']
src = src[src['wind speed'] < 10]
src = src[src.height.isin([2,10, 20])]
all_data_scaling_chart = alt.Chart(
    src
).transform_filter(
    alt.FieldLTPredicate("wind speed", 10)
).mark_circle(opacity=0.25, size=8).encode(
    alt.X("wind speed:Q"),
    alt.Y("Velocity scale:Q", scale=alt.Scale(domain=[0,4], clamp=True), title='turbulence intensity')
).properties(width=150, height=100)
all_data_scaling_chart



In [20]:
(
    alt.Chart(
        tke_vs_wind_speed_src
    ).transform_filter(
        alt.FieldLTPredicate("wind speed", 10)
    ).mark_circle(opacity=0.15).encode(
        alt.X("wind speed:Q"),
        alt.Y("Velocity scale:Q")
    ).properties(width=175, height=175).facet('height:O')
    &
    alt.Chart(
        tke_vs_wind_speed_src.set_index('time').loc['2022-12-17':'2022-12-21']
    ).transform_filter(
        alt.FieldLTPredicate("wind speed", 10)
    ).mark_circle(opacity=0.5).encode(
        alt.X("wind speed:Q"),
        alt.Y("Velocity scale:Q")
    ).properties(width=175, height=175).facet('height:O')
    &
    alt.Chart(
        tke_vs_wind_speed_src.set_index('time').loc['2023-02-19':'2023-02-25']
    ).transform_filter(
        alt.FieldLTPredicate("wind speed", 10)
    ).mark_circle(opacity=0.5).encode(
        alt.X("wind speed:Q"),
        alt.Y("Velocity scale:Q")
    ).properties(width=175, height=175).facet('height:O')
    &
    alt.Chart(
        tke_vs_wind_speed_src.set_index('time').loc['2023-01-31':'2023-02-04']
    ).transform_filter(
        alt.FieldLTPredicate("wind speed", 10)
    ).mark_circle(opacity=0.5).encode(
        alt.X("wind speed:Q"),
        alt.Y("Velocity scale:Q")
    ).properties(width=175, height=175).facet('height:O')
    &
    alt.Chart(
        tke_vs_wind_speed_src.set_index('time').loc['2023-02-04':'2023-02-08']
    ).transform_filter(
        alt.FieldLTPredicate("wind speed", 10)
    ).mark_circle(opacity=0.5).encode(
        alt.X("wind speed:Q"),
        alt.Y("Velocity scale:Q")
    ).properties(width=175, height=175).facet('height:O')
).resolve_scale(x='shared', y='shared')

## Plot: Turbulence regimes

In [21]:
base = alt.Chart(
    tke_vs_wind_speed_src
).transform_filter(
    alt.FieldOneOfPredicate('height', [2,3, 5,10, 15, 20])
).mark_line().encode(
    alt.X("wind speed group:Q"),
    alt.Y("mean(Velocity scale):Q", title='TKE^0.5'),
    alt.Color(
        "height:O", 
        scale=alt.Scale(scheme='turbo')
    )
)

base.transform_filter(alt.FieldLTPredicate('richardson number bulk', 0)).properties(title='Ri ∈ [-∞, 0)').properties(height=150) |\
base.transform_filter(alt.FieldGTPredicate('richardson number bulk', 0)).transform_filter(alt.FieldLTPredicate('richardson number bulk', 0.25)).properties(title='Ri ∈ [0, 0.25]').properties(height=150) |\
base.transform_filter(alt.FieldGTPredicate('richardson number bulk', 0.25)).properties(title='Ri ∈ [0.25, ∞]').properties(height=150)

In [22]:
base = alt.Chart(
    tke_vs_wind_speed_src
).transform_filter(
    alt.FieldOneOfPredicate('height', [2,3, 5,10, 15, 20])
).mark_line().encode(
    alt.X("wind speed group:Q"),
    alt.Y("mean(Velocity scale):Q", title='TKE^0.5'),
    alt.Color(
        "height:O", 
        scale=alt.Scale(scheme='turbo')
    )
)

base.transform_filter(alt.FieldLTPredicate('richardson number', 0)).properties(title='Ri ∈ [-∞, 0)').properties(height=150) |\
base.transform_filter(alt.FieldGTPredicate('richardson number', 0)).transform_filter(alt.FieldLTPredicate('richardson number', 0.25)).properties(title='Ri ∈ [0, 0.25]').properties(height=150) |\
base.transform_filter(alt.FieldGTPredicate('richardson number', 0.25)).properties(title='Ri ∈ [0.25, ∞]').properties(height=150)

In [23]:
bars = alt.Chart(tke_vs_wind_speed_src).mark_errorbar(extent='stdev',  thickness=2, opacity = 0.5).encode(
    alt.X("wind speed group:Q").title("Wind speed (m/s) (binned)"),
    alt.Y("Velocity scale:Q")
)

points = alt.Chart(tke_vs_wind_speed_src).mark_circle(size=100, opacity = 0.5).encode(
    alt.X("wind speed group:Q"),
    alt.Y("mean(Velocity scale):Q")
)

base = (bars + points).transform_filter(alt.FieldOneOfPredicate('height', [2,5,20])).encode(
    alt.Color("height:N")
).properties(width=200, height = 150)

base.transform_filter(alt.FieldLTPredicate('richardson number', 0)).properties(title='Ri ∈ [-∞, 0)') |\
base.transform_filter(alt.FieldGTPredicate('richardson number', 0)).transform_filter(alt.FieldLTPredicate('richardson number', 0.25)).properties(title='Ri ∈ [0, 0.25]') |\
base.transform_filter(alt.FieldGTPredicate('richardson number', 0.25)).properties(title='Ri ∈ [0.25, ∞]')

In [24]:
base = alt.Chart(
    tke_vs_wind_speed_src
).transform_filter(
    alt.datum.tower == 'c'
).transform_filter(
    alt.FieldOneOfPredicate('height', [2, 10, 15, 20])
).mark_boxplot(size=10).encode(
    # alt.X("wind speed group:Q"),
    # alt.Y("Velocity scale:Q"),
    # alt.Color("height:O", scale=alt.Scale(scheme='turbo')),
    # alt.Facet("height:O")
    
    alt.X("height:O"),
    alt.Y("Velocity scale:Q"),
    alt.Color("height:O", scale=alt.Scale(scheme='turbo')),
    alt.Facet("wind speed group:Q", spacing=5, title='Wind Speed (binned)')
)

(
    base.transform_filter(alt.FieldLTPredicate('richardson number', 0)).properties(title='Ri ∈ [-∞, 0)').properties(height=150) &\
    base.transform_filter(alt.FieldGTPredicate('richardson number', 0)).transform_filter(alt.FieldLTPredicate('richardson number', 0.25)).properties(title='Ri ∈ [0, 0.25]').properties(height=150) &\
    base.transform_filter(alt.FieldGTPredicate('richardson number', 0.25)).properties(title='Ri ∈ [0.25, ∞]').properties(height=150)
).configure_view(
    strokeWidth=0
)

## Plot: time series for winter season

In [25]:
blowing_snow_chart = alt.Chart(tidy_df_30Min).transform_filter(
    alt.FieldOneOfPredicate('measurement', ['snow flux'])
).mark_line().encode(
    alt.X('time:T'),
    alt.Y("value:Q", title='Blowing Snow Flux (g/m^2)',
          scale=alt.Scale(domain=[0, 4], nice=False, clamp=True)
        ),
    alt.Color("height:N")
).properties(width=1400, height=200) 

days_since_precip_chart = alt.Chart(disdro_daily_max_precip_date_df).mark_line(color='black', strokeDash=[1,2]).encode(
    alt.X('time:T'),
    alt.Y("days_since_precip:Q"),
    alt.Color("variable:N")
).properties(width=1400, height=200)

alt.layer(blowing_snow_chart, days_since_precip_chart).resolve_scale(
    y = 'independent'
)

In [26]:
print("hi")

hi


In [27]:
alt.renderers.enable("mimetype")

RendererRegistry.enable('mimetype')

In [28]:
alt.Chart(tidy_df_30Min).transform_filter(
    alt.FieldOneOfPredicate('measurement', ['cumulative sublimation (mm)'])
).mark_line().encode(
    alt.X('time:T'),
    alt.Y("value:Q", title='Cumulative sublimation (mm)'),
    alt.Color("height:N")
).properties(width=1000, height=143)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [29]:
tidy_df_30Min['EC system'] = tidy_df_30Min.apply(lambda row: f"tower {row.tower}, {int(row.height)}m", axis=1)

In [30]:
tidy_df_30Min.query(
    "measurement == 'cumulative sublimation (mm)'"
).tower.unique()

array([], dtype=object)

In [31]:
alt.Chart(
    tidy_df_30Min.query(
        "measurement == 'cumulative sublimation (mm)'"
    ).query(
        "height > 2"
    )
).mark_line().encode(
    alt.X('time:T'),
    alt.Y("value:Q", title='Cumulative sublimation (mm)').scale(domain=[0,50], clamp=True),
    alt.Color("EC system:N")
).properties(width=600).resolve_scale(y='independent')

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [32]:
tidy_df_30Min.measurement.unique()

array(['IDir', 'Obukhov length', 'RH', 'SWE', 'Vpile', 'Vtherm',
       'air density', 'longwave radiation incoming',
       'longwave radiation outgoing', 'mixing ratio',
       'potential temperature', 'potential virtual temperature',
       'pressure', 'richardson number', 'richardson number bulk',
       'shear velocity', 'shortwave radiation incoming',
       'shortwave radiation outgoing', 'snow flux', 'snow temperature',
       'soil moisture', 'surface potential temperature',
       'surface potential virtual temperature', 'surface temperature',
       'surface virtual temperature', 'temperature',
       'temperature gradient', 'turbulent kinetic energy', 'u', 'u_h2o_',
       'u_tc_', 'u_u_', 'u_w_', 'v', 'v_h2o_', 'v_tc_', 'v_v_', 'v_w_',
       'virtual temperature', 'w', 'w_h2o_', 'w_tc_', 'w_w_',
       'wind direction', 'wind gradient', 'wind speed',
       'stability function'], dtype=object)

In [33]:
(
    alt.Chart(tidy_df_30Min).transform_filter(
        alt.datum.measurement == 'w_h2o_'
    ).transform_filter(
        alt.datum.tower == 'ue'
    ).transform_window(
        rolling_mean = 'mean(value)',
        frame=[-12,12],
        groupby=['height','tower']
    ).mark_line().encode(
        alt.X('time:T'),
        alt.Y("rolling_mean:Q", title = 'Latent heat flux (g/m^2/s)'),
        alt.Color("height:O", scale=alt.Scale(scheme='viridis'))
    ).properties(width=1000, height=143).resolve_scale(color='independent') & \
    \
    alt.Chart(tidy_df_30Min).transform_filter(
        alt.FieldOneOfPredicate('measurement', ['cumulative sublimation (mm)'])
    ).mark_line().encode(
        alt.X('time:T'),
        alt.Y("value:Q", title='Cumulative sublimation (mm)'),
        alt.Color("height:N")
    ).properties(width=1000, height=143) & \
    \
    alt.Chart(tidy_df_30Min).transform_filter(
        alt.FieldOneOfPredicate('measurement', ['snow flux'])
    ).mark_line().encode(
        alt.X('time:T'),
        alt.Y("value:Q", title='Blowing Snow Flux (g/m^2)',
            scale=alt.Scale(domain=[0, 4], nice=False, clamp=True)
            ),
        alt.Color("height:N")
    ).properties(width=1000, height=71) & \
    \
    alt.Chart(disdro_daily_max_precip_date_df).mark_bar().encode(
        alt.X("time:T"),
        alt.Y("daily_precip (cm)", title='Daily precip (SWE, cm)')
    ).properties(width=1000, height=71) & \
    \
    alt.Chart(tidy_df_30Min).transform_filter(
        alt.FieldOneOfPredicate('measurement', ['richardson number'])
    ).transform_filter(
        alt.datum.tower == 'c'
    ).transform_filter(
        alt.datum.height == 2
    ).transform_window(
        rolling_mean = 'mean(value)',
        frame=[-12,12],
        groupby=['height','tower']
    ).mark_line().encode(
        alt.X('time:T'),
        alt.Y("rolling_mean:Q", title='Gradient Ri',
            scale=alt.Scale(domain=[0, 0.6], nice=False, clamp=True)
            )
    ).properties(width=1000, height=71) & \
    \
    alt.Chart(tidy_df_30Min).transform_filter(
        alt.datum.variable == 'Rsw_in_9m_d'
    ).mark_line().encode(
        alt.X('time:T'),
        alt.Y("value:Q", title = 'Incoming SW (w/m^2)')
    ).properties(width=1000, height=71)
).resolve_scale(color='independent', x='shared')

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


# Case Studies

## Define case study dataframes

In [49]:
# Observation derived case studies
# case_study_start_date_1 = pd.datetime(2023,1,31)
# case_study_end_date_1 = pd.datetime(2023,2,4)

# case_study_start_date_2 = pd.datetime(2023,2,19)
# case_study_end_date_2 = pd.datetime(2023,2,23)

# case_study_start_date_3 = pd.datetime(2023,3,16)
# case_study_end_date_3 = pd.datetime(2023,3,20)

# Methodologically derived case studies
case_study_start_date_1 = pd.datetime(2022,12,21)
case_study_end_date_1 = pd.datetime(2022,12,24)
days_from_jan1_2023_1 = -11

case_study_start_date_2 = pd.datetime(2023,2,19)
case_study_end_date_2 = pd.datetime(2023,2,22)
days_from_jan1_2023_2 = 49

case_study_start_date_3 = pd.datetime(2023,3,5)
case_study_end_date_3 = pd.datetime(2023,3,8)
days_from_jan1_2023_3 = 63

case_study_start_date_4 = pd.datetime(2023,4,2)
case_study_end_date_4 = pd.datetime(2023,4,5)
days_from_jan1_2023_4 = 91

case_study_1_src = tidy_df_30Min[tidy_df_30Min['time'] < case_study_end_date_1]
case_study_1_src = case_study_1_src[case_study_1_src['time'] >= case_study_start_date_1]
case_study_1_src = case_study_1_src.assign(case_study = 1)
case_study_1_src['time'] = case_study_1_src['time'] - dt.timedelta(days=days_from_jan1_2023_1)

case_study_2_src = tidy_df_30Min[tidy_df_30Min['time'] < case_study_end_date_2]
case_study_2_src = case_study_2_src[case_study_2_src['time'] >= case_study_start_date_2]
case_study_2_src = case_study_2_src.assign(case_study = 2)
case_study_2_src['time'] = case_study_2_src['time'] - dt.timedelta(days=days_from_jan1_2023_2)

case_study_3_src = tidy_df_30Min[tidy_df_30Min['time'] < case_study_end_date_3]
case_study_3_src = case_study_3_src[case_study_3_src['time'] >= case_study_start_date_3]
case_study_3_src = case_study_3_src.assign(case_study = 3)
case_study_3_src['time'] = case_study_3_src['time'] - dt.timedelta(days=days_from_jan1_2023_3)

case_study_4_src = tidy_df_30Min[tidy_df_30Min['time'] < case_study_end_date_4]
case_study_4_src = case_study_4_src[case_study_4_src['time'] >= case_study_start_date_4]
case_study_4_src = case_study_4_src.assign(case_study = 4)
case_study_4_src['time'] = case_study_4_src['time'] - dt.timedelta(days=days_from_jan1_2023_4)

  case_study_start_date_1 = pd.datetime(2022,12,21)
  case_study_end_date_1 = pd.datetime(2022,12,24)
  case_study_start_date_2 = pd.datetime(2023,2,19)
  case_study_end_date_2 = pd.datetime(2023,2,22)
  case_study_start_date_3 = pd.datetime(2023,3,5)
  case_study_end_date_3 = pd.datetime(2023,3,8)
  case_study_start_date_4 = pd.datetime(2023,4,2)
  case_study_end_date_4 = pd.datetime(2023,4,5)


## Calculate cumulative sublimation for each case study (Separately)

In [50]:
for variable in [
    'w_h2o__3m_uw',
    'w_h2o__3m_ue',
    'w_h2o__3m_c',
    'w_h2o__3m_d'
]:
    case_study_1_src = tidy.tidy_df_add_variable(
        case_study_1_src,
        np.cumsum(case_study_1_src.query(f"variable == '{variable}'")['value']*60*30).values*1000/(1e6),
        "Cumulative sublimation (mm)",
        "Cumulative sublimation (mm)",
        int(variable.split('_')[-2].split('m')[0]),
        variable.split('_')[-1]
    )

    case_study_2_src = tidy.tidy_df_add_variable(
        case_study_2_src,
        np.cumsum(case_study_2_src.query(f"variable == '{variable}'")['value']*60*30).values*1000/(1e6),
        "Cumulative sublimation (mm)",
        "Cumulative sublimation (mm)",
        int(variable.split('_')[-2].split('m')[0]),
        variable.split('_')[-1]
    )

    case_study_3_src = tidy.tidy_df_add_variable(
        case_study_3_src,
        np.cumsum(case_study_3_src.query(f"variable == '{variable}'")['value']*60*30).values*1000/(1e6),
        "Cumulative sublimation (mm)",
        "Cumulative sublimation (mm)",
        int(variable.split('_')[-2].split('m')[0]),
        variable.split('_')[-1]
    )

    case_study_4_src = tidy.tidy_df_add_variable(
        case_study_4_src,
        np.cumsum(case_study_4_src.query(f"variable == '{variable}'")['value']*60*30).values*1000/(1e6),
        "Cumulative sublimation (mm)",
        "Cumulative sublimation (mm)",
        int(variable.split('_')[-2].split('m')[0]),
        variable.split('_')[-1]
    )

## Combine case study datasets into one dataframe (for plotting)

In [51]:
case_study_compare_src = pd.concat([
    case_study_1_src.assign(case_study = 1), 
    case_study_2_src.assign(case_study = 2), 
    case_study_3_src.assign(case_study = 3),
    case_study_4_src.assign(case_study = 4)
])

In [57]:
case_study_4_src.query("variable == 'spd_3m_c'")

Unnamed: 0,measurement,variable,height,tower,time,value,EC system,case_study
3839998,wind speed,spd_3m_c,3.0,c,2023-01-01 00:00:00,2.180240,"tower c, 3m",4.0
3839999,wind speed,spd_3m_c,3.0,c,2023-01-01 00:30:00,1.647098,"tower c, 3m",4.0
3840000,wind speed,spd_3m_c,3.0,c,2023-01-01 01:00:00,1.967130,"tower c, 3m",4.0
3840001,wind speed,spd_3m_c,3.0,c,2023-01-01 01:30:00,2.075879,"tower c, 3m",4.0
3840002,wind speed,spd_3m_c,3.0,c,2023-01-01 02:00:00,1.702841,"tower c, 3m",4.0
...,...,...,...,...,...,...,...,...
3840137,wind speed,spd_3m_c,3.0,c,2023-01-03 21:30:00,3.863578,"tower c, 3m",4.0
3840138,wind speed,spd_3m_c,3.0,c,2023-01-03 22:00:00,5.350650,"tower c, 3m",4.0
3840139,wind speed,spd_3m_c,3.0,c,2023-01-03 22:30:00,4.844733,"tower c, 3m",4.0
3840140,wind speed,spd_3m_c,3.0,c,2023-01-03 23:00:00,5.593398,"tower c, 3m",4.0


## Plot time series

In [56]:
base_nolabels = alt.Chart(case_study_compare_src).encode(
    alt.X('time:T').axis(labels=False).title(None),
    alt.Column('case_study:O').header(labels=False).title(None)
).properties(width=400, height=100)

base = alt.Chart(case_study_compare_src).encode(
    alt.X('time:T').axis(format="%H00").title('time (hour)'),
    alt.Column('case_study:O').header(labels=False).title(None)
).properties(width=400, height=100)

(
    # CUMULATIVE SUBLIMATION PLOT
    base_nolabels.transform_filter(
        alt.FieldOneOfPredicate('measurement', ['Cumulative sublimation (mm)'])
    ).transform_filter(
        alt.datum.height == 3
    ).transform_filter(
        alt.datum.tower == 'c'
    ).mark_line().encode(
        alt.Y("value:Q", title='Cumulative sublimation (mm)', scale=alt.Scale(zero=False, nice=False))
    ) &
    # LATENT HEAT FLUX PLOT
    base_nolabels.transform_filter(
        alt.datum.variable == "w_h2o__3m_c"
    ).transform_window(
        rolling_mean = 'mean(value)',
        frame= [-6, 6],
        groupby = ["height"]
    ).mark_line().encode(
        alt.Y("rolling_mean:Q", title = 'Latent heat flux (g/m^2/s)')
    ) & 
    # Richardson number plot
    base_nolabels.transform_filter(
        alt.FieldOneOfPredicate('variable', ['Ri_3m_c'])
    ).transform_window(
        rolling_avg = 'median(value)',
        frame=[-3,3],
        groupby = ["case_study"]
    ).mark_line().encode(
        alt.Y("rolling_avg:Q", title='Ri'),
    ) &
    # Surface and Air Temp Plot
    base_nolabels.transform_filter(
        alt.FieldOneOfPredicate('variable', ['T_3m_c', 'Tsurf_rad_d'])
    ).mark_line().encode(
        alt.Y("value:Q", title="Temperature (˚C)"),
        alt.StrokeDash("measurement:N")
    ) & 
    # blowing snow flux
    base_nolabels.transform_filter(
        alt.datum.variable == 'SF_avg_1m_ue'
    ).mark_line().encode(
        alt.Y("value:Q", title='Blowing Snow Flux (g/m^2)')
    ) &
    # SW Incoming
    base.transform_filter(
        alt.datum.measurement == 'shortwave radiation incoming'
    ).mark_line().encode(
        alt.Y("value:Q", title='SW Incoming (W/m^2)')
    )
).resolve_scale(strokeDash='independent').configure_axis(
    labelFontSize=14
)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


## Plot Profiles

In [None]:
case_study_1_src

In [None]:
case_study_start_date_profiles = pd.datetime(2023,2,13)
case_study_end_date_profiles = pd.datetime(2023,2,14)

case_study_profiles_src = tidy_df_30Min[tidy_df_30Min['time'] < case_study_end_date_profiles]
case_study_profiles_src = case_study_profiles_src[case_study_profiles_src['time'] >= case_study_start_date_profiles]
case_study_profiles_src = case_study_profiles_src.assign(case_study = 1)

In [None]:
profiles_src = case_study_profiles_src.copy()
profiles_src = profiles_src[profiles_src['time'].dt.minute == 0]
profiles_src = profiles_src[profiles_src['time'].dt.hour%3 == 0]
profiles_src['day'] = profiles_src['time'].dt.day
profiles_src['hour'] = profiles_src['time'].dt.hour

profiles_src = profiles_src.query("hour <= 18")

t_profiles_src = profiles_src[profiles_src['measurement'].isin(
    ['potential virtual temperature', 'surface potential virtual temperature']
)]

spd_profiles_src = profiles_src[profiles_src['measurement'] == 'wind speed'].query("tower == 'c'")

(
    alt.Chart(t_profiles_src).mark_line().encode(
        alt.X('value:Q').sort('-y').title("Pot. virtual temp (˚C)"),
        alt.Y("height:Q"),
        alt.Color("hour:O").scale(scheme='purpleorange')
    ).properties(height = 150)
    &
    alt.Chart(spd_profiles_src).mark_line().encode(
        alt.X('value:Q').sort('-y').title('Wind speed (m/s)'),
        alt.Y("height:Q"),
        alt.Color("hour:O").scale(scheme='purpleorange')
    ).properties(height = 150)
).configure_axis(grid=False)

In [None]:
profiles_src = case_study_profiles_src.copy()
profiles_src = profiles_src[profiles_src['time'].dt.minute == 0]
profiles_src = profiles_src[profiles_src['time'].dt.hour%3 == 0]
profiles_src['day'] = profiles_src['time'].dt.day
profiles_src['hour'] = profiles_src['time'].dt.hour

t_profiles_src = profiles_src[profiles_src['measurement'].isin(
    ['potential virtual temperature', 'surface potential virtual temperature']
)]

spd_profiles_src = profiles_src[profiles_src['measurement'] == 'wind speed'].query("tower == 'c'")

alt.Chart(t_profiles_src).mark_line().encode(
    alt.X('value:Q').sort('-y').title("Pot. virtual temp (˚C)"),
    alt.Y("height:Q"),
    alt.Facet("time:T")
).properties(height = 100, width = 100) \
& \
alt.Chart(spd_profiles_src).mark_line().encode(
    alt.X('value:Q').sort('-y').title('Wind speed (m/s)'),
    alt.Y("height:Q"),
    alt.Facet("time:T").header(format='%H:%M')
).properties(height = 100, width = 100)

In [None]:
alt.Chart(t_profiles_src).mark_line().encode(
    alt.X('value:Q').sort('-y'),
    alt.Y("height:Q"),
    alt.Color("hour:O").scale(scheme='rainbow'),
    alt.Facet("day:O")
).properties(height = 150)

In [None]:
wind_speed_chart = alt.Chart(case_study_compare_src).transform_filter(
    alt.FieldOneOfPredicate('variable', ['spd_2m_c', 'spd_10m_c', 'spd_20m_c'])
).mark_line().encode(
    alt.X('time:T'),
    alt.Y("value:Q", title='Wind Speed (m/s)'),
    alt.Color("height:N")
).properties(width=400, height=143).facet(column='case_study:O')

tke_chart = alt.Chart(case_study_compare_src).transform_filter(
    alt.FieldOneOfPredicate('variable', ['tke_2m_c', 'tke_10m_c', 'tke_20m_c'])
).mark_line().encode(
    alt.X('time:T'),
    alt.Y("value:Q", title='TKE', scale=alt.Scale(type='log')),
    alt.Color("height:N")
).properties(width=400, height=143).facet(column='case_study:O')

ri_plot = alt.Chart(case_study_compare_src).transform_filter(
    alt.FieldOneOfPredicate('variable', ['Ri_2m_c', 'Ri_2m_c_Tsurf_rad_d'])
).mark_line().encode(
    alt.X('time:T'),
    alt.Y("value:Q", title='Ri', scale=alt.Scale()),
    alt.Color("variable:N")
).properties(width=400, height=143).facet(column='case_study:O')

surf_temp_chart = alt.Chart(case_study_compare_src).transform_filter(
    alt.FieldOneOfPredicate('variable', ['Tsurf_c', 'Tsurf_d', 'Tsurf_ue', 'Tsurf_uw', 'Tsurf_rad_d'])
).transform_window(
    rolling_mean = 'mean(value)',
    frame = [-6, 6],
    groupby = ['variable']
).mark_line().encode(
    alt.X('time:T'),
    alt.Y("rolling_mean:Q", title='Temperature (˚C)'),
    alt.Color("variable:N")
).properties(width=400, height=143).facet(column='case_study:O')

air_temp_chart = alt.Chart(case_study_compare_src).transform_filter(
    alt.FieldOneOfPredicate('variable', ['T_2m_c'])
).transform_window(
    rolling_mean = 'mean(value)',
    frame = [-6, 6],
    groupby = ['variable']
).mark_line(color='black', strokeDash=[5,5]).encode(
    alt.X('time:T'),
    alt.Y("rolling_mean:Q", title='Temperature (˚C)'),
).properties(width=400, height=143).facet(column='case_study:O')

measured_lh_flux_plot = alt.Chart(case_study_compare_src).transform_filter(
    alt.datum.variable == 'w_h2o__2m_c'
).mark_point(color='black', size=10).encode(
    alt.X("time:T"),
    alt.Y("value:Q", title = 'Latent heat flux (g/m^2/s)', scale=alt.Scale())
).properties(width=400, height=143).facet(column='case_study:O')

predicted_lh_flux_plot = alt.Chart(case_study_compare_src).transform_filter(
    alt.FieldOneOfPredicate('variable', ['w_h20_modeled_MO_HdB_2m_c', 'w_h20_modeled_MO_HdB_2m_c_Tsurf_rad_d'])
).mark_line().encode(
    alt.X('time:T'),
    alt.Y("value:Q"),
    alt.Color("measurement:N")
).properties(width=400, height=143).facet(column='case_study:O')

snow_flux_chart = alt.Chart(case_study_compare_src).transform_filter(
    alt.FieldOneOfPredicate('measurement', ['snow flux'])
).mark_line().encode(
    alt.X('time:T'),
    alt.Y("value:Q", title='Blowing Snow Flux (g/m^2)'),
    alt.Color("height:N")
).properties(width=400, height=143).facet(column='case_study:O')

(wind_speed_chart & tke_chart & ri_plot & (surf_temp_chart 
                                        #    + air_temp_chart
) & (predicted_lh_flux_plot 
    #  + measured_lh_flux_plot
) & snow_flux_chart).resolve_scale(
    color='independent'
).configure_legend(
    labelLimit = 0
)

# Plot M.O.S.T model results

In [None]:
wide_df = tidy_df_30Min.pivot_table(
    index=['time'],
    columns="variable",
    values='value'
)[[
    'w_h2o__2m_c',

    'w_h20_modeled_Standard_2m_c',
    'w_h20_modeled_MO_HdB_2m_c',
    
    'Ri_2m_c',
    'SF_avg_1m_ue',
    'SF_avg_2m_ue',

    'spd_2m_c',
    'dir_2m_c',
    'u_2m_c',
    'v_2m_c',
    'w_2m_c',
    'u_w__2m_c',
    'v_w__2m_c',
    'u_tc__2m_c',
    'v_tc__2m_c',
    'u_h2o__2m_c',
    'v_h2o__2m_c',
    'w_tc__2m_c',
    'tc_2m_c',
    'T_2m_c', 
    'RH_2m_c'
]]

In [None]:
wide_df['Holtslag/de Bruin residual'] = wide_df['w_h20_modeled_MO_HdB_2m_c'] - wide_df['w_h2o__2m_c']
wide_df['Standard residual'] =  wide_df['w_h20_modeled_Standard_2m_c'] - wide_df['w_h2o__2m_c']

wide_df['Holtslag/de Bruin residual normalized'] = wide_df['Holtslag/de Bruin residual'] / wide_df['w_h2o__2m_c']
wide_df['Standard residual normalized'] = wide_df['Standard residual'] / wide_df['w_h2o__2m_c']

In [None]:
wide_df.to_csv("monin_obukhov_with_turbpy_seasonlong.csv")

In [None]:
wide_df = pd.read_csv("monin_obukhov_with_turbpy_seasonlong.csv")

In [None]:
fig, axes = plt.subplots(1,2, figsize =(10,5))

axes[0].scatter(
    wide_df['w_h2o__2m_c'],
    wide_df['w_h20_modeled_Standard_2m_c'],
    s=1,
    alpha=0.1
)
axes[0].set_title("All Data\nStandard bulk aerodynamic method")

axes[1].scatter(
    wide_df['w_h2o__2m_c'],
    wide_df['w_h20_modeled_Standard_2m_c'],
    s=1,
    alpha=0.1
)
axes[1].set_title("All Data\nHoltslag and de Bruin method for snow and ice")

for ax in axes:
    ax.set_xlim(-0.005, 0.025)
    ax.set_ylim(-0.005, 0.025)
    ax.set_xlabel("Measured Vertical Moisture Flux (g/m²/s)")
    ax.set_ylabel("Predicted Vertical Moisture Flux (g/m²/s)")
    # ax.set_aspect("equal")

plt.tight_layout()

# Examine multiple explanatory variables

In [None]:
fig, axes = plt.subplots(2,5, figsize =(30,10))

sns.scatterplot(x=wide_df['Ri_2m_c'],       y=wide_df['Holtslag/de Bruin residual'], s=15, alpha=0.10, ax=axes[0,0])
sns.scatterplot(x=wide_df['SF_avg_1m_ue'],  y=wide_df['Holtslag/de Bruin residual'], s=50, alpha=0.10, ax=axes[0,1])
sns.scatterplot(x=wide_df['SF_avg_2m_ue'],  y=wide_df['Holtslag/de Bruin residual'], s=50, alpha=0.10, ax=axes[0,2])
sns.scatterplot(x=wide_df['spd_2m_c'],      y=wide_df['Holtslag/de Bruin residual'], s=15, alpha=0.10, ax=axes[0,3])
sns.scatterplot(x=wide_df['dir_2m_c'],      y=wide_df['Holtslag/de Bruin residual'], s=15, alpha=0.10, ax=axes[0,4])

sns.scatterplot(x=wide_df['w_2m_c'],        y=wide_df['Holtslag/de Bruin residual'], s=15, alpha=0.10, ax=axes[1,0])
sns.scatterplot(x=wide_df['w_tc__2m_c'],    y=wide_df['Holtslag/de Bruin residual'], s=15, alpha=0.10, ax=axes[1,1])
sns.scatterplot(x=wide_df['tc_2m_c'],       y=wide_df['Holtslag/de Bruin residual'], s=15, alpha=0.10, ax=axes[1,2])
sns.scatterplot(x=wide_df['T_2m_c'],        y=wide_df['Holtslag/de Bruin residual'], s=15, alpha=0.10, ax=axes[1,3])
sns.scatterplot(x=wide_df['RH_2m_c'],       y=wide_df['Holtslag/de Bruin residual'], s=15, alpha=0.10, ax=axes[1,4])

for ax in axes.flatten():
    ax.set_ylim(-0.01, 0.01)
    ax.axhline(y = 0.0, color = 'k')

# axes[0,0].set_xlim(-0.5, 0.10)
# axes[0,1].set_xlim(0, 0.25)
# axes[0,2].set_xlim(0, 0.25)
# axes[0,3].set_xlim(0, 10)
# axes[0,4].set_xlim(0, 360)
# axes[1,1].set_xlim(-0.2, 0.1)

# sns.scatterplot(wide_df['u_2m_c'], wide_df['Holtslag/de Bruin residual'], s=1, alpha=0.1, ax=axes[0])
# sns.scatterplot(wide_df['v_2m_c'], wide_df['Holtslag/de Bruin residual'], s=1, alpha=0.1, ax=axes[0])
# sns.scatterplot(wide_df['u_w__2m_c'], wide_df['Holtslag/de Bruin residual'], s=1, alpha=0.1, ax=axes[0])
# sns.scatterplot(wide_df['v_w__2m_c'], wide_df['Holtslag/de Bruin residual'], s=1, alpha=0.1, ax=axes[0])
# sns.scatterplot(wide_df['u_tc__2m_c'], wide_df['Holtslag/de Bruin residual'], s=1, alpha=0.1, ax=axes[0])
# sns.scatterplot(wide_df['v_tc__2m_c'], wide_df['Holtslag/de Bruin residual'], s=1, alpha=0.1, ax=axes[0])
# sns.scatterplot(wide_df['u_h2o__2m_c'], wide_df['Holtslag/de Bruin residual'], s=1, alpha=0.1, ax=axes[0])
# sns.scatterplot(wide_df['v_h2o__2m_c'], wide_df['Holtslag/de Bruin residual'], s=1, alpha=0.1, ax=axes[0])

plt.tight_layout()

# Exclude outliers for richardson number residual comparison

In [None]:
ri_IQR = np.percentile(wide_df['Ri_2m_c'], 75) - np.percentile(wide_df['Ri_2m_c'], 25)
ri_max = np.percentile(wide_df['Ri_2m_c'], 75) + 1.5*ri_IQR
ri_min = np.percentile(wide_df['Ri_2m_c'], 75) - 1.5*ri_IQR

In [None]:
len(wide_df), len(wide_df.query(f"Ri_2m_c > {ri_min}").query(f"Ri_2m_c < {ri_max}"))

In [None]:
residual_IQR = np.nanpercentile(wide_df['Holtslag/de Bruin residual'], 75) - np.nanpercentile(wide_df['Holtslag/de Bruin residual'], 25)
residual_max = np.nanpercentile(wide_df['Holtslag/de Bruin residual'], 75) + 1.5*residual_IQR
residual_min = np.nanpercentile(wide_df['Holtslag/de Bruin residual'], 75) - 1.5*residual_IQR

In [None]:
len(wide_df), len(wide_df[(wide_df["Holtslag/de Bruin residual"] > residual_min) & (wide_df["Holtslag/de Bruin residual"] < residual_max)])

In [None]:
src = wide_df.query(f"Ri_2m_c > {ri_min}").query(f"Ri_2m_c < {ri_max}")
src = src[(src["Holtslag/de Bruin residual"] > residual_min) & (src["Holtslag/de Bruin residual"] < residual_max)]

g = sns.JointGrid(data = src, x='Ri_2m_c',       y='Holtslag/de Bruin residual', 
                #   cbar=True, binwidth=(0.01, 0.00025)
                )
g.plot_joint(sns.histplot)
g.plot_marginals(sns.kdeplot)
# plt.gca().set_xlim(-0.5, 0.10)
# plt.gca().set_ylim(-0.01, 0.01)