In [20]:
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.enable('json')

import matplotlib.pyplot as plt

from metpy.calc import add_height_to_pressure
from metpy.units import units
import sys
sys.path.append('../splash/')
import functions_library

# Open SOS Measurement Dataset

In [21]:
start_date = '20221130'
end_date = '20230509'
# open files
tidy_df = pd.read_parquet(f'tidy_df_{start_date}_{end_date}_noplanar_fit_clean.parquet')
# convert time column to datetime
tidy_df['time'] = pd.to_datetime(tidy_df['time'])
# limit data to our dates of interest, based on continuous snow cover at Kettle Ponds
tidy_df = tidy_df.set_index('time').sort_index().loc[start_date:end_date].reset_index()

# Open Turbpy Model Ensemble Dataset

In [22]:
model_df = pd.read_parquet("model_results.parquet")
# add a bunch of columns that are descriptive, from the config column which has multiple bits of info
model_df['z0'] = model_df['config'].apply(
    lambda v: v.split(' ')[-1]
)
model_df['e_sat_curve'] = model_df['config'].apply(
    lambda v: 'metpy' if 'metpy' in v else 'alduchov'
)
model_df['surface_measurement'] = model_df['config'].apply(
    lambda v: v.split(' ')[-3]
)
model_df['scheme'] = model_df['config'].apply(
    lambda v: 'andreas' if 'andreas lengths' in v else 'yang'
)
model_df['most_config'] = model_df['config'].apply(lambda s: ' '.join(s.split(' ')[:-3]))
# remove the scalar roughness length parameterization info 
model_df['most_config'] = model_df['most_config'].str.replace(' andreas lengths', '')

model_df.head()

Unnamed: 0,config,time,sensible heat conductance,latent heat conductance,sensible heat flux,latent heat flux,stabilityCorrectionParameters,param_dict,z0,e_sat_curve,surface_measurement,scheme,most_config
0,Standard Tsurf_c e_sat_alduchov z0_andreas,2022-11-30,0.007941,0.007940744,-3.530754,-14.65534,"{'L': None, 'conductanceNeutral': 0.0023275299...","{'available_capping': ['no_capping', 'louis_Ri...",z0_andreas,alduchov,Tsurf_c,yang,Standard
1,MO Beljaars Holtslag andreas lengths Tsurf_d e...,2022-11-30,0.001009,2834700.0,19.501769,1.853831,"{'L': 5.0472047308974135, 'conductanceNeutral'...","{'available_capping': ['no_capping', 'louis_Ri...",0.0001,alduchov,Tsurf_d,andreas,MO Beljaars Holtslag
2,MO Beljaars Holtslag andreas lengths Tsurf_rad...,2022-11-30,0.001142,2834700.0,15.860495,0.037459,"{'L': 7.509551492525484, 'conductanceNeutral':...","{'available_capping': ['no_capping', 'louis_Ri...",0.0001,alduchov,Tsurf_rad_d,andreas,MO Beljaars Holtslag
3,MO Cheng Brutsaert andreas lengths Tsurf_c e_s...,2022-11-30,0.00161,2834700.0,-2.382962,-10.092653,"{'L': -70.79507970858985, 'conductanceNeutral'...","{'available_capping': ['no_capping', 'louis_Ri...",0.0001,alduchov,Tsurf_c,andreas,MO Cheng Brutsaert
4,MO Cheng Brutsaert andreas lengths Tsurf_d e_s...,2022-11-30,0.000901,2834700.0,17.416304,1.653636,"{'L': 5.135544340539729, 'conductanceNeutral':...","{'available_capping': ['no_capping', 'louis_Ri...",0.0001,alduchov,Tsurf_d,andreas,MO Cheng Brutsaert


# Open COARE model results

In [23]:
coare_model_results = pd.read_parquet("coare_model_results.parquet").reset_index()
coare_model_results['z0'] = coare_model_results.config.str.split(' ').apply(lambda x: x[-1])
coare_model_results['surface_measurement'] = coare_model_results.config.str.split(' ').apply(lambda x: x[0])
coare_model_results['e_sat_curve'] = coare_model_results.config.str.split(' ').apply(lambda x: x[1])
coare_model_results.head(3)

Unnamed: 0,time,config,hsb,hlb,tau,zo,zot,zoq,L,usr,...,Cdn_10,Chn_10,Cen_10,rr,rt,rq,hlb_gperm2s,z0,surface_measurement,e_sat_curve
0,2022-11-30 00:00:00,Tsurf_c e_sat_alduchov z0_andreas,1.986449,9.42009,0.025311,0.000687,0.00011,0.000136,-111.311655,0.163917,...,0.001741,0.001462,0.00149,9.255205,1.460589,1.805124,0.003319,z0_andreas,Tsurf_c,e_sat_alduchov
1,2022-11-30 00:30:00,Tsurf_c e_sat_alduchov z0_andreas,0.609089,17.19412,0.042192,0.000448,9.1e-05,0.000111,-379.871783,0.210755,...,0.001596,0.001376,0.0014,7.775596,1.551745,1.895916,0.006059,z0_andreas,Tsurf_c,e_sat_alduchov
2,2022-11-30 01:00:00,Tsurf_c e_sat_alduchov z0_andreas,-1.383714,12.786376,0.026696,0.00037,0.000125,0.000149,440.767566,0.167234,...,0.001536,0.001389,0.001411,5.106162,1.715603,2.040416,0.004505,z0_andreas,Tsurf_c,e_sat_alduchov


# Calculate model run seasonal totals

In [24]:
from metpy.constants import density_water
seconds_in_timestep = 60*30

## COARE model runs

In [25]:
coare_model_results['hlb_mm'] = coare_model_results['hlb_gperm2s']*seconds_in_timestep/density_water
coare_cumsum_model_results = pd.DataFrame(coare_model_results.reset_index().sort_values("time").set_index(
    ["time", "config", "z0", "surface_measurement", "e_sat_curve"]
).groupby(["config"])['hlb_mm'].cumsum()).reset_index()
coare_cumsum_model_results

Unnamed: 0,time,config,z0,surface_measurement,e_sat_curve,hlb_mm
0,2022-11-30 00:00:00,Tsurf_c e_sat_alduchov z0_andreas,z0_andreas,Tsurf_c,e_sat_alduchov,0.005975
1,2022-11-30 00:00:00,Tsurf_rad_d e_sat_alduchov 0.001,0.001,Tsurf_rad_d,e_sat_alduchov,-0.000132
2,2022-11-30 00:00:00,Tsurf_d e_sat_alduchov 0.001,0.001,Tsurf_d,e_sat_alduchov,-0.001458
3,2022-11-30 00:00:00,Tsurf_c e_sat_alduchov 0.001,0.001,Tsurf_c,e_sat_alduchov,0.006116
4,2022-11-30 00:00:00,Tsurf_rad_d e_sat_alduchov 0.0005,0.0005,Tsurf_rad_d,e_sat_alduchov,-0.000122
...,...,...,...,...,...,...
185467,2023-05-09 23:30:00,Tsurf_c e_sat_alduchov 0.001,0.001,Tsurf_c,e_sat_alduchov,52.509812
185468,2023-05-09 23:30:00,Tsurf_d e_sat_alduchov 0.001,0.001,Tsurf_d,e_sat_alduchov,49.658541
185469,2023-05-09 23:30:00,Tsurf_rad_d e_sat_alduchov 0.001,0.001,Tsurf_rad_d,e_sat_alduchov,69.738211
185470,2023-05-09 23:30:00,Tsurf_d e_sat_alduchov 5e-05,5e-05,Tsurf_d,e_sat_alduchov,35.292701


In [26]:
coare_seasonal_model_results = pd.DataFrame(coare_cumsum_model_results.groupby([
    "config", 
    "z0",
    'surface_measurement',
    'e_sat_curve'
])['hlb_mm'].max()).reset_index()
coare_seasonal_model_results.head()

Unnamed: 0,config,z0,surface_measurement,e_sat_curve,hlb_mm
0,Tsurf_c e_sat_alduchov 0.0001,0.0001,Tsurf_c,e_sat_alduchov,41.087666
1,Tsurf_c e_sat_alduchov 0.0005,0.0005,Tsurf_c,e_sat_alduchov,49.679014
2,Tsurf_c e_sat_alduchov 0.001,0.001,Tsurf_c,e_sat_alduchov,52.509812
3,Tsurf_c e_sat_alduchov 0.005,0.005,Tsurf_c,e_sat_alduchov,57.098194
4,Tsurf_c e_sat_alduchov 1e-05,1e-05,Tsurf_c,e_sat_alduchov,29.933291


## for TurbPy model runs

In [27]:
model_df_cumsum = pd.DataFrame(model_df.sort_values("time").set_index(
    ["time", "config", "scheme", "z0", "e_sat_curve", "surface_measurement", "most_config"]
).groupby(["config", "scheme", "z0", "e_sat_curve", "surface_measurement", "most_config"])['latent heat flux'].cumsum()).reset_index()
agg_model_df = model_df.set_index('time').sort_index().loc[ : '20230509 17:30:0'].reset_index()
agg_model_df = agg_model_df[[
    'time',
    'config',
    'most_config',
    'z0',
    'e_sat_curve',
    'surface_measurement',
    'scheme',
    'latent heat flux'
]]
agg_model_df['latent heat flux (mm)'] = -agg_model_df['latent heat flux']*seconds_in_timestep/density_water/2838
agg_model_cum_df = pd.DataFrame(agg_model_df.sort_values("time").set_index(
    ["time", "config", "scheme", "z0", "e_sat_curve", "surface_measurement", "most_config"]
).groupby(["config", "scheme", "z0", "e_sat_curve", "surface_measurement", "most_config"])['latent heat flux (mm)'].cumsum()).reset_index()
agg_model_cum_df = agg_model_cum_df[agg_model_cum_df.time == agg_model_cum_df.time.max()]
agg_model_cum_df['complexity'] = agg_model_cum_df.most_config.isin(['Standard','Louis b = 12']).apply(lambda is_not_complex: 'bulk' if is_not_complex else 'most')

In [28]:
seconds_in_timestemp = 60*30

In [29]:
measured_value = tidy_df.query("variable == 'w_h2o__3m_c'").set_index('time')[['value']].cumsum().max().value*seconds_in_timestemp/density_water.magnitude
measured_min_value = tidy_df.query("variable == 'w_h2o__20m_c'").set_index('time')[['value']].cumsum().max().value*seconds_in_timestemp/density_water.magnitude
measured_max_value = tidy_df.query("variable == 'w_h2o__10m_c'").set_index('time')[['value']].cumsum().max().value*seconds_in_timestemp/density_water.magnitude

In [30]:
measured_value, measured_min_value, measured_max_value

(34.97171933414881, 26.98701865666334, 38.244808899045395)

# Isolate model runs we care about

## COARE


In [31]:
coare_cumsum_model_results = pd.DataFrame(coare_model_results.reset_index().sort_values("time").set_index(
    ["time", "config"]
).groupby(["config"])['hlb_mm'].cumsum()).reset_index()
coare_cumsum_model_results['z0'] = coare_cumsum_model_results.config.str.split(' ').apply(lambda x: x[-1])
coare_cumsum_model_results['surface_measurement'] = coare_cumsum_model_results.config.str.split(' ').apply(lambda x: x[0])
coare_cumsum_model_results['e_sat_curve'] = coare_cumsum_model_results.config.str.split(' ').apply(lambda x: x[1])
coare_cumsum_model_results

coare_seasonal_model_results = pd.DataFrame(coare_cumsum_model_results.groupby([
    "config", 
    "z0",
    'surface_measurement',
    'e_sat_curve'
])['hlb_mm'].max()).reset_index()

coare_seasonal_model_results = coare_seasonal_model_results[coare_seasonal_model_results.z0 != 'z0_windprofile_weekly']

## Turbpy

In [32]:
# MOST runs
src = agg_model_cum_df.query("e_sat_curve == 'alduchov'").query("complexity == 'most'").query("scheme == 'andreas'")
src = src[src.z0 != 'z0_windprofile_weekly']

# Standard bulk aerodynamic
src_standard = agg_model_cum_df[agg_model_cum_df.most_config == 'Standard']
src_standard = src_standard[src_standard.z0 != 'z0_windprofile_weekly']

# Plot Modeled Seasonal Totals, TurbPy and COARE, constant and variable z0 values

In [33]:
# Create horizontal rules for measured values
rules = alt.Chart().transform_calculate(
    measured = str(measured_value)
).mark_rule(strokeDash=[3,4], color='grey', opacity=0.25).encode(
    y='measured:Q'
) + alt.Chart().transform_calculate(
    measured = str(measured_min_value)
).mark_rule(strokeDash=[3,4], color='grey', opacity=0.25).encode(
    y='measured:Q'
) + alt.Chart().transform_calculate(
    measured = str(measured_max_value)
).mark_rule(strokeDash=[3,4], color='grey', opacity=0.25).encode(
    y='measured:Q'
)

In [34]:
z0_sort = ['1e-05', '5e-05', '0.0001', '0.0005', '0.001', '0.005', 
'z0_andreas', 'z0_windprofile_weekly', 'z0_andreas_weekly']

turbpy_model_results_chart = rules + (alt.Chart(src).mark_tick().encode(
    alt.X("z0:O").sort(z0_sort).axis(labelAngle=-45),
    alt.Y("latent heat flux (mm):Q").scale(zero=False),
    alt.Color("surface_measurement:N").title("Surf. T Sensor"),
    detail='config:N'
)
).properties(width = 300, height = 200, title='SNTHERM Algorithm (ticks) and Bulk Aerodynamic (dots)')

turbpy_model_results_chart

In [35]:
z0_sort = ['1e-05', '5e-05', '0.0001', '0.0005', '0.001', '0.005', 
'z0_andreas', 'z0_windprofile_weekly', 'z0_andreas_weekly']

turbpy_model_results_chart_standard = rules + (alt.Chart(src_standard).mark_circle().encode(
    alt.X("z0:O").sort(z0_sort).axis(labelAngle=-45),
    alt.Y("latent heat flux (mm):Q").scale(zero=False, domain=[20,140], clamp=True).title("Cumulative sublimation (mm)"),
    alt.Color("surface_measurement:N").title("Surf. T Sensor"),
    detail='config:N'
)
).properties(width = 300, height = 200, title='SNTHERM Algorithm')

turbpy_model_results_chart_standard

In [36]:
turbpy_model_results_chart + turbpy_model_results_chart_standard

In [37]:
coare_model_results_chart = rules + (
alt.Chart(coare_seasonal_model_results).mark_tick(
).encode(
    alt.X("z0:O").sort(z0_sort).axis(labelAngle=-45),
    alt.Y("hlb_mm:Q").scale(zero=False).title("Cumulative sublimation (mm)"),
    alt.Color("surface_measurement:N").title("Surf. T Sensor"),
)
).properties(width = 300, height = 200, title = 'COARE Algorithm')
coare_model_results_chart

## Combined Plot

In [38]:
combined_model_chart = (
    (
        (
            turbpy_model_results_chart + turbpy_model_results_chart_standard
        ) | coare_model_results_chart
    ).resolve_scale(y='shared')
).configure_axis(grid=False)

combined_model_chart.save("combined_model_chart.png", ppi=200)
combined_model_chart

In [41]:
tsurfs = tidy_df[tidy_df.variable.isin([
    'Tsurf_c',
    'Tsurf_d',
    'Tsurf_rad_d',
])].pivot(
    index = 'time',
    columns = 'variable',
    values = 'value'
).dropna()
tsurfs = tsurfs[
    (tsurfs.Tsurf_c<50) & (tsurfs.Tsurf_c>-50) &
    (tsurfs.Tsurf_d<50) & (tsurfs.Tsurf_d>-50) &
    (tsurfs.Tsurf_rad_d<50) & (tsurfs.Tsurf_rad_d>-50)
]
from sklearn.metrics import r2_score

In [42]:
alt.Chart(tsurfs).mark_circle(size=10, opacity=0.25).encode(
    alt.X("Tsurf_c"),
    alt.Y("Tsurf_d")
) | alt.Chart(tsurfs).mark_circle(size=10, opacity=0.25).encode(
    alt.X("Tsurf_c"),
    alt.Y("Tsurf_rad_d")
) | alt.Chart(tsurfs).mark_circle(size=10, opacity=0.25).encode(
    alt.X("Tsurf_d"),
    alt.Y("Tsurf_rad_d")
)

In [43]:
r2_score(tsurfs['Tsurf_c'], tsurfs['Tsurf_d']), np.mean(np.abs(tsurfs['Tsurf_c'] - tsurfs['Tsurf_d'])), np.median(np.abs(tsurfs['Tsurf_c'] - tsurfs['Tsurf_d']))

(0.9533943174391546, 0.6447582466727493, 0.19806671142578125)

In [44]:
r2_score(tsurfs['Tsurf_rad_d'], tsurfs['Tsurf_d']), np.mean(np.abs(tsurfs['Tsurf_rad_d'] - tsurfs['Tsurf_d'])), np.median(np.abs(tsurfs['Tsurf_rad_d'] - tsurfs['Tsurf_d']))

(0.9527983850616086, 1.3707914475336347, 1.2833251953125)

In [45]:
r2_score(tsurfs['Tsurf_rad_d'], tsurfs['Tsurf_c']), np.mean(np.abs(tsurfs['Tsurf_rad_d'] - tsurfs['Tsurf_c'])), np.median(np.abs(tsurfs['Tsurf_rad_d'] - tsurfs['Tsurf_c']))

(0.9348993937545148, 1.5164527305451927, 1.3803253173828125)

In [46]:
time_no_day = tsurfs.reset_index().time.apply(lambda dt:dt.replace(year=2000, month=1, day=1))
tsurfs['time_no_day'] = pd.to_datetime(time_no_day)

In [47]:
tsurfs['Tsurf_d - Tsurf_rad_d'] = tsurfs.Tsurf_d - tsurfs.Tsurf_rad_d
tsurfs['Tsurf_d - Tsurf_c'] = tsurfs.Tsurf_d - tsurfs.Tsurf_c

In [55]:
temp_difference_diurnal_chart = alt.Chart(
    tsurfs.reset_index()
).transform_fold(
    ['Tsurf_d - Tsurf_rad_d',
    'Tsurf_d - Tsurf_c' ]
).mark_errorbar().encode(
    alt.X("hoursminutes(time):T"),
    alt.Y("value:Q").title("T difference"),
    alt.Row("key:N").title("")
).properties(width=250, height=100)
temp_difference_diurnal_chart.save("temp_difference_diurnal_chart.png", ppi=200)
temp_difference_diurnal_chart

## Combined Table

Create a combined table

In [58]:
df = round(src[['z0', 'surface_measurement', 'most_config', 'latent heat flux (mm)']].pivot(
    index=['z0', 'surface_measurement'],
    columns='most_config',
    values='latent heat flux (mm)'
), 1)
df = df.apply(
    lambda row: f"{row.min()} - {row.max()}",
    axis=1
)
sntherm_estimates_table = pd.DataFrame(df).rename(columns={0:'SNTHERM'})
sntherm_estimates_table

Unnamed: 0_level_0,Unnamed: 1_level_0,SNTHERM
z0,surface_measurement,Unnamed: 2_level_1
0.0001,Tsurf_c,54.7 - 56.1
0.0001,Tsurf_d,51.7 - 53.2
0.0001,Tsurf_rad_d,72.3 - 73.9
0.0005,Tsurf_c,66.3 - 67.8
0.0005,Tsurf_d,62.8 - 64.5
0.0005,Tsurf_rad_d,87.6 - 89.4
0.001,Tsurf_c,70.1 - 71.5
0.001,Tsurf_d,66.5 - 68.1
0.001,Tsurf_rad_d,92.8 - 94.4
0.005,Tsurf_c,76.2 - 77.4


In [59]:
bulk_estimates_table = src_standard[['z0', 'surface_measurement', 'latent heat flux (mm)']].set_index(['z0', 'surface_measurement']).rename(
    columns={'latent heat flux (mm)': 'Bulk'}
)
bulk_estimates_table

Unnamed: 0_level_0,Unnamed: 1_level_0,Bulk
z0,surface_measurement,Unnamed: 2_level_1
z0_andreas,Tsurf_rad_d,
0.0001,Tsurf_c,54.556429
z0_andreas,Tsurf_d,
1e-05,Tsurf_c,35.817435
z0_andreas_weekly,Tsurf_d,44.594755
0.0005,Tsurf_d,73.98533
5e-05,Tsurf_c,47.607187
5e-05,Tsurf_d,45.120316
0.001,Tsurf_c,93.045124
0.005,Tsurf_d,143.007051


In [60]:
coare_estimates_table = coare_seasonal_model_results[['z0', 'surface_measurement', 'hlb_mm']].set_index(['z0', 'surface_measurement']).rename(
    columns={'hlb_mm': 'COARE'}
)
coare_estimates_table

Unnamed: 0_level_0,Unnamed: 1_level_0,COARE
z0,surface_measurement,Unnamed: 2_level_1
0.0001,Tsurf_c,41.087666
0.0005,Tsurf_c,49.679014
0.001,Tsurf_c,52.509812
0.005,Tsurf_c,57.098194
1e-05,Tsurf_c,29.933291
5e-05,Tsurf_c,37.44402
z0_andreas,Tsurf_c,26.313391
z0_andreas_weekly,Tsurf_c,37.058693
0.0001,Tsurf_d,38.743041
0.0005,Tsurf_d,46.954566


In [61]:
combined_estimates_table = round(
    sntherm_estimates_table.join(coare_estimates_table).join(bulk_estimates_table).reset_index(),
    1
)
combined_estimates_table

Unnamed: 0,z0,surface_measurement,SNTHERM,COARE,Bulk
0,0.0001,Tsurf_c,54.7 - 56.1,41.1,54.6
1,0.0001,Tsurf_d,51.7 - 53.2,38.7,51.7
2,0.0001,Tsurf_rad_d,72.3 - 73.9,54.4,71.1
3,0.0005,Tsurf_c,66.3 - 67.8,49.7,78.0
4,0.0005,Tsurf_d,62.8 - 64.5,47.0,74.0
5,0.0005,Tsurf_rad_d,87.6 - 89.4,65.9,101.6
6,0.001,Tsurf_c,70.1 - 71.5,52.5,93.0
7,0.001,Tsurf_d,66.5 - 68.1,49.7,88.3
8,0.001,Tsurf_rad_d,92.8 - 94.4,69.7,121.1
9,0.005,Tsurf_c,76.2 - 77.4,57.1,150.6


# Combine our 3 sources of information (measured data, turbpy models, COARE models) and compare with Stossel box data

In [62]:
# COARE
coare_model_results
# TURBPY
model_results_for_stossel_comparison = model_df.query("e_sat_curve == 'alduchov'").query("scheme == 'andreas'").query("surface_measurement == 'Tsurf_c'").query("most_config == 'MO Holtslag de Bruin'")
model_results_for_stossel_comparison = model_results_for_stossel_comparison[['time', 'latent heat flux', 'z0']]
model_results_for_stossel_comparison = model_results_for_stossel_comparison.set_index('time').tz_localize(None).reset_index()

# MEASUREMENTS
measurements_for_stossel_comparison = tidy_df.query("variable == 'w_h2o__3m_c'").set_index('time')[['value']].rename(columns={'value': 'measured'})

In [63]:
lysimeter_data = pd.read_csv("../sos/processed_lysimeter_data.csv")
lysimeter_data['time'] = pd.to_datetime(lysimeter_data['time'])
lysimeter_data['time_previous'] = pd.to_datetime(lysimeter_data['time_previous'])
lysimeter_data.head()

Unnamed: 0.1,Unnamed: 0,time_previous,time,delta_box_mean_mm,daytime
0,8,2023-02-03 16:26:00,2023-02-04 07:55:00,0.021063,False
1,9,2023-02-04 07:55:00,2023-02-04 16:30:00,-0.048646,True
2,10,2023-02-04 16:30:00,2023-02-05 08:02:00,0.013541,False
3,13,2023-02-07 12:28:00,2023-02-07 17:28:00,-0.044634,True
4,15,2023-02-09 17:00:00,2023-02-10 08:00:00,0.015547,False


In [64]:
combined_results_df = pd.DataFrame()
for i, row in lysimeter_data.iterrows():
    tprev = row['time_previous']
    t = row['time'] 
    
    # collect coare data
    this_coare_data = coare_model_results.set_index('time').sort_index().loc[tprev: t].pivot_table(
        index='time',
        values=['hlb_gperm2s'],
        columns='z0'
    )
    this_coare_data.columns = ['coare (z0=' + col + ')' for col in this_coare_data.columns.get_level_values(1)]
    this_coare_data = this_coare_data.sum()
    
    # collect turbpy data
    this_turbpy_data = model_results_for_stossel_comparison.set_index('time').sort_index().loc[tprev: t].pivot_table(
            index='time',
            values=['latent heat flux'],
            columns='z0'
    )
    this_turbpy_data.columns = ['turbpy (z0=' + col + ')' for col in this_turbpy_data.columns.get_level_values(1)]
    this_turbpy_data = this_turbpy_data.sum()

    # measured data
    this_ec_data = measurements_for_stossel_comparison.loc[tprev: t].sum()

    this_measurement_df = pd.DataFrame(pd.concat([
        this_coare_data,
        this_turbpy_data,
        this_ec_data.rename({'measured': 'EC measured'}),
        # make this negative to match the other stuff
        - row[['delta_box_mean_mm']].rename({'delta_box_mean_mm': 'Lysimeter measured'})
    ])).reset_index().rename(columns={
        0: 'lh flux (mm)',
        'index': 'measurement'
    })
    this_measurement_df['time'] = row['time']
    this_measurement_df['time_previous'] = row['time_previous']
    this_measurement_df['daytime'] = row['daytime']
    combined_results_df = pd.concat([combined_results_df, this_measurement_df])

Make the "lost mass" measurements from the stossel box positive

In [65]:
combined_results_df['type'] = combined_results_df['measurement'].apply(lambda s: s.split(' ')[0])
combined_results_df

Unnamed: 0,measurement,lh flux (mm),time,time_previous,daytime,type
0,coare (z0=0.0001),0.031701,2023-02-04 07:55:00,2023-02-03 16:26:00,False,coare
1,coare (z0=0.0005),0.040276,2023-02-04 07:55:00,2023-02-03 16:26:00,False,coare
2,coare (z0=0.001),0.043273,2023-02-04 07:55:00,2023-02-03 16:26:00,False,coare
3,coare (z0=0.005),0.043553,2023-02-04 07:55:00,2023-02-03 16:26:00,False,coare
4,coare (z0=1e-05),0.019697,2023-02-04 07:55:00,2023-02-03 16:26:00,False,coare
...,...,...,...,...,...,...
13,turbpy (z0=5e-05),-79.261509,2023-02-18 08:10:00,2023-02-17 17:05:00,False,turbpy
14,turbpy (z0=z0_andreas),23.52186,2023-02-18 08:10:00,2023-02-17 17:05:00,False,turbpy
15,turbpy (z0=z0_andreas_weekly),-93.942532,2023-02-18 08:10:00,2023-02-17 17:05:00,False,turbpy
16,EC measured,0.011126,2023-02-18 08:10:00,2023-02-17 17:05:00,False,EC


In [66]:
combined_results_df

Unnamed: 0,measurement,lh flux (mm),time,time_previous,daytime,type
0,coare (z0=0.0001),0.031701,2023-02-04 07:55:00,2023-02-03 16:26:00,False,coare
1,coare (z0=0.0005),0.040276,2023-02-04 07:55:00,2023-02-03 16:26:00,False,coare
2,coare (z0=0.001),0.043273,2023-02-04 07:55:00,2023-02-03 16:26:00,False,coare
3,coare (z0=0.005),0.043553,2023-02-04 07:55:00,2023-02-03 16:26:00,False,coare
4,coare (z0=1e-05),0.019697,2023-02-04 07:55:00,2023-02-03 16:26:00,False,coare
...,...,...,...,...,...,...
13,turbpy (z0=5e-05),-79.261509,2023-02-18 08:10:00,2023-02-17 17:05:00,False,turbpy
14,turbpy (z0=z0_andreas),23.52186,2023-02-18 08:10:00,2023-02-17 17:05:00,False,turbpy
15,turbpy (z0=z0_andreas_weekly),-93.942532,2023-02-18 08:10:00,2023-02-17 17:05:00,False,turbpy
16,EC measured,0.011126,2023-02-18 08:10:00,2023-02-17 17:05:00,False,EC


In [67]:
(alt.Chart(combined_results_df.query("daytime == True")).mark_tick().encode(
    alt.X("type:N"),
    alt.Y("lh flux (mm)"),
    alt.Column('time:T'),
).properties(height = 100) & alt.Chart(combined_results_df.query("daytime == False")).mark_tick().encode(
    alt.X("type:N"),
    alt.Y("lh flux (mm)"),
    alt.Column('time:T'),
).properties(height = 100)).display(renderer='svg')

## plot wind speeds, lh flux profiles, theta_v profiles, to see if on Feb 7, 11, 12, advection is likely, while on Feb 4, 10, 17 it seems less likely

In [68]:
case_studies_advective = pd.concat([
    tidy_df.set_index('time').loc['20230207': '20230207'],
])
case_studies_advective['day'] = case_studies_advective.index.day

case_studies_normal = pd.concat([
    tidy_df.set_index('time').loc['20230204': '20230204'],
    tidy_df.set_index('time').loc['20230210': '20230210'],
    tidy_df.set_index('time').loc['20230217': '20230217']
])

case_studies_normal['day'] = case_studies_normal.index.day

In [69]:
winddir_chart = alt.Chart(
    case_studies_advective.query("measurement == 'wind direction'").reset_index()
).transform_filter(
    alt.datum.tower == 'c'
).transform_filter(
    alt.FieldOneOfPredicate('height', [3,10,20])
).mark_circle().encode(
    alt.X("time:T"),
    alt.Y("value:Q"),
    alt.Color("height:O"),
    alt.Column("day:O")
).properties(width=200, height = 200).resolve_scale(x='independent')

bsflux_chart = alt.Chart(
    case_studies_advective.query("measurement == 'snow flux'").reset_index().query("value > 0")
).mark_circle().encode(
    alt.X("time:T"),
    alt.Y("value:Q").scale(type='log'),
    alt.Color("height:O"),
    alt.Column("day:O")
).properties(width=200, height = 50).resolve_scale(x='independent')

profiles_src = case_studies_advective.query("measurement == 'w_h2o_'").query("tower == 'c'").reset_index()
profiles_src = profiles_src.set_index("time").groupby([
    pd.Grouper(freq='240Min'), 'height', 'day'
]).mean(numeric_only=True).reset_index()
profiles_src['hour'] = profiles_src.time.dt.hour

lhflux_profiles_chart = alt.Chart(
    profiles_src    
).mark_line().encode(
    alt.X("mean(value):Q").sort('-y'),
    alt.Y("height:Q"),
    alt.Color("hour:O").scale(scheme='rainbow'),
    alt.Column("day:O")
).resolve_scale(color='shared').properties(width=200, height = 200)

profiles_src = case_studies_advective.query("measurement == 'potential virtual temperature'").query("tower == 'c'").reset_index()
profiles_src = profiles_src.set_index("time").groupby([
    pd.Grouper(freq='240Min'), 'height', 'day'
]).mean(numeric_only=True).reset_index()
profiles_src['hour'] = profiles_src.time.dt.hour

thetav_profiles_chart = alt.Chart(
    profiles_src    
).mark_line().encode(
    alt.X("mean(value):Q").sort('-y'),
    alt.Y("height:Q"),
    alt.Color("hour:O").scale(scheme='rainbow'),
    alt.Column("day:O")
).resolve_scale(color='shared').properties(width=200, height = 200)

(bsflux_chart & winddir_chart & lhflux_profiles_chart & thetav_profiles_chart).resolve_scale(color='independent')

In [70]:
winddir_chart = alt.Chart(
    case_studies_normal.query("measurement == 'wind direction'").reset_index()
).transform_filter(
    alt.datum.tower == 'c'
).transform_filter(
    alt.FieldOneOfPredicate('height', [3,10,20])
).mark_circle().encode(
    alt.X("time:T"),
    alt.Y("value:Q"),
    alt.Color("height:O"),
    alt.Column("day:O")
).properties(width=200, height = 200).resolve_scale(x='independent')

bsflux_chart = alt.Chart(
    case_studies_normal.query("measurement == 'snow flux'").reset_index().query("value > 0")
).mark_circle().encode(
    alt.X("time:T"),
    alt.Y("value:Q").scale(type='log'),
    alt.Color("height:O"),
    alt.Column("day:O")
).properties(width=200, height = 50).resolve_scale(x='independent')

profiles_src = case_studies_normal.query("measurement == 'w_h2o_'").query("tower == 'c'").reset_index()
profiles_src = profiles_src.set_index("time").groupby([
    pd.Grouper(freq='240Min'), 'height', 'day'
]).mean(numeric_only=True).reset_index()
profiles_src['hour'] = profiles_src.time.dt.hour

lhflux_profiles_chart = alt.Chart(
    profiles_src    
).mark_line().encode(
    alt.X("mean(value):Q").sort('-y'),
    alt.Y("height:Q"),
    alt.Color("hour:O").scale(scheme='rainbow'),
    alt.Column("day:O")
).resolve_scale(color='shared').properties(width=200, height = 200)

profiles_src = case_studies_normal.query("measurement == 'potential virtual temperature'").query("tower == 'c'").reset_index()
profiles_src = profiles_src.set_index("time").groupby([
    pd.Grouper(freq='240Min'), 'height', 'day'
]).mean(numeric_only=True).reset_index()
profiles_src['hour'] = profiles_src.time.dt.hour

thetav_profiles_chart = alt.Chart(
    profiles_src    
).mark_line().encode(
    alt.X("mean(value):Q").sort('-y'),
    alt.Y("height:Q"),
    alt.Color("hour:O").scale(scheme='rainbow'),
    alt.Column("day:O")
).resolve_scale(color='shared').properties(width=200, height = 200)

(bsflux_chart & winddir_chart & lhflux_profiles_chart & thetav_profiles_chart).resolve_scale(color='independent')

# Look at some time series of the combined results

In [71]:
alt.Chart(
    model_results_for_stossel_comparison.set_index('time').sort_index().loc['20230401': '20230415'].reset_index()
).mark_line(opacity=0.25, color='orange').encode(
    alt.X("time:T"),
    alt.Y("latent heat flux:Q"),
    detail = 'z0'
).properties(width = 1000) + alt.Chart(
    measurements_for_stossel_comparison.sort_index().loc['20230401': '20230415'].reset_index()
).mark_circle(color='black').encode(
    alt.X("time:T"),
    alt.Y("measured:Q"),
).properties(width = 1000)

In [72]:
alt.Chart(
    coare_model_results.set_index('time').sort_index().loc['20230401': '20230415'].reset_index()
).mark_line(opacity=0.25).encode(
    alt.X("time:T"),
    alt.Y("hlb_gperm2s:Q"),
    detail = 'z0'
).properties(width = 1000) + alt.Chart(
    measurements_for_stossel_comparison.sort_index().loc['20230401': '20230415'].reset_index()
).mark_circle(color='black').encode(
    alt.X("time:T"),
    alt.Y("measured:Q"),
).properties(width = 1000)

In [73]:
alt.Chart(
    coare_model_results.set_index('time').sort_index().loc['20230201': '20230215'].reset_index()
).mark_line(opacity=0.25).encode(
    alt.X("time:T"),
    alt.Y("hlb_gperm2s:Q"),
    detail = 'z0'
).properties(width = 1000) + alt.Chart(
    measurements_for_stossel_comparison.sort_index().loc['20230201': '20230215'].reset_index()
).mark_circle(color='black').encode(
    alt.X("time:T"),
    alt.Y("measured:Q"),
).properties(width = 1000)

In [74]:
coare_model_results.z0.unique()

array(['z0_andreas', 'z0_andreas_weekly', '1e-05', '5e-05', '0.0001',
       '0.0005', '0.001', '0.005'], dtype=object)

In [75]:
alt.Chart(
    coare_model_results.set_index('time').sort_index().loc['20230315': '20230330'].reset_index()
).transform_filter(
    alt.datum.z0 == '0.00001'
).mark_line(opacity=0.25).encode(
    alt.X("time:T"),
    alt.Y("hlb_gperm2s:Q"),
    detail = 'z0'
).properties(width = 1000) + alt.Chart(
    measurements_for_stossel_comparison.sort_index().loc['20230315': '20230330'].reset_index()
).mark_circle(color='black').encode(
    alt.X("time:T"),
    alt.Y("measured:Q"),
).properties(width = 1000)

# Calculate statistics

## Create dataset, combining measured data with model results from different sources  

Use a subset of TurbPy results, all the COARE results, and measured data at 3m on tower c, the tower and height of measurements used to run the models

### Turbpy models

In [76]:
select_turbpy_results_df = model_df[
    model_df['config'].isin([
        'MO Holtslag de Bruin andreas lengths Tsurf_c e_sat_alduchov 1e-05',
        'MO Holtslag de Bruin andreas lengths Tsurf_c e_sat_alduchov 0.0001',
        'MO Holtslag de Bruin andreas lengths Tsurf_c e_sat_alduchov 0.0005',
        'MO Holtslag de Bruin andreas lengths Tsurf_c e_sat_alduchov z0_andreas',
        'MO Holtslag de Bruin andreas lengths Tsurf_c e_sat_alduchov z0_andreas_weekly',
    ]) 
]
select_turbpy_results_df['z0'] = select_turbpy_results_df['z0'].astype('str')
select_turbpy_results_df = select_turbpy_results_df[['time', 'z0', 'latent heat flux']].pivot_table(
    index='time', columns='z0',
)
select_turbpy_results_df.columns = select_turbpy_results_df.columns.to_flat_index().str.join('_')
select_turbpy_results_df = select_turbpy_results_df.loc[:'2023-05-09']
select_turbpy_results_df.columns = select_turbpy_results_df.columns.to_series().apply(lambda s: f"turbpy (z0 = {s.split('_')[-1]})").values
select_turbpy_results_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  select_turbpy_results_df['z0'] = select_turbpy_results_df['z0'].astype('str')


Unnamed: 0_level_0,turbpy (z0 = 0.0001),turbpy (z0 = 0.0005),turbpy (z0 = 1e-05),turbpy (z0 = andreas),turbpy (z0 = weekly)
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-11-30 00:00:00,-10.092653,-12.054168,-7.518669,-12.332669,-11.614903
2022-11-30 00:30:00,-19.441872,-22.901364,-14.724919,-22.718555,-22.135357
2022-11-30 01:00:00,-14.483189,-17.355499,-10.790179,-16.907708,-16.694902
2022-11-30 01:30:00,-3.508045,-4.437003,-2.485310,-3.468893,-4.170064
2022-11-30 02:00:00,-0.923063,-1.198927,-0.632710,-1.499689,-1.112816
...,...,...,...,...,...
2023-05-09 21:30:00,-21.998996,-29.483407,-15.549811,-11.275450,-17.978991
2023-05-09 22:00:00,-37.509969,-49.012505,-26.144221,-40.444101,-30.399568
2023-05-09 22:30:00,-10.541718,-13.554715,-7.829870,,-8.836887
2023-05-09 23:00:00,-25.361903,-33.934139,-17.737039,-20.781807,-20.582072


### Measured data

In [77]:
measured_df = tidy_df.query("variable == 'w_h2o__3m_c'").set_index('time')[['value']].rename(columns={'value': 'measured'})

### NOAA/COARE Models

In [78]:
select_coare_results_df = coare_model_results[['time', 'config', 'hlb_gperm2s']].pivot(index='time', columns='config')
select_coare_results_df.columns = select_coare_results_df.columns.droplevel(0)
select_coare_results_df = select_coare_results_df[[
    "Tsurf_c e_sat_alduchov 0.0001",
    "Tsurf_c e_sat_alduchov 0.0005",
    "Tsurf_c e_sat_alduchov 1e-05",
    "Tsurf_c e_sat_alduchov z0_andreas",	
    "Tsurf_c e_sat_alduchov z0_andreas_weekly"
]].rename(columns = {
    "Tsurf_c e_sat_alduchov 0.0001": "coare (z0 = 0.0001)",
    "Tsurf_c e_sat_alduchov 0.0005": "coare (z0 = 0.0005)",
    "Tsurf_c e_sat_alduchov 1e-05": "coare (z0 = 1e-05)",
    "Tsurf_c e_sat_alduchov z0_andreas": "coare (z0 = andreas)",
    "Tsurf_c e_sat_alduchov z0_andreas_weekly": "coare (z0 = weekly)",
})

### Combine it all

In [79]:
select_turbpy_results_df = select_turbpy_results_df.reset_index('time')
select_turbpy_results_df['time'] = select_turbpy_results_df['time'].dt.tz_localize(None)
select_turbpy_results_df = select_turbpy_results_df.set_index('time')

model_meas_df = measured_df.join(
    select_turbpy_results_df
).join(
    select_coare_results_df
)
model_meas_nonans_df = model_meas_df.dropna()
model_meas_nonans_df

Unnamed: 0_level_0,measured,turbpy (z0 = 0.0001),turbpy (z0 = 0.0005),turbpy (z0 = 1e-05),turbpy (z0 = andreas),turbpy (z0 = weekly),coare (z0 = 0.0001),coare (z0 = 0.0005),coare (z0 = 1e-05),coare (z0 = andreas),coare (z0 = weekly)
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2022-11-30 00:00:00,0.001886,-10.092653,-12.054168,-7.518669,-12.332669,-11.614903,0.002713,0.003244,0.002019,0.003319,0.003125
2022-11-30 00:30:00,0.000472,-19.441872,-22.901364,-14.724919,-22.718555,-22.135357,0.005184,0.006107,0.003925,0.006059,0.005903
2022-11-30 01:00:00,-0.000584,-14.483189,-17.355499,-10.790179,-16.907708,-16.694902,0.003862,0.004624,0.002879,0.004505,0.004449
2022-11-30 01:30:00,-0.001696,-3.508045,-4.437003,-2.485310,-3.468893,-4.170064,0.000955,0.001203,0.000674,0.000942,0.001133
2022-11-30 02:00:00,-0.000566,-0.923063,-1.198927,-0.632710,-1.499689,-1.112816,0.000260,0.000336,0.000180,0.000432,0.000312
...,...,...,...,...,...,...,...,...,...,...,...
2023-05-09 21:00:00,0.014859,-25.492238,-34.357587,-17.955948,-13.015325,-20.803668,0.006594,0.008794,0.004655,0.003367,0.005390
2023-05-09 21:30:00,0.012305,-21.998996,-29.483407,-15.549811,-11.275450,-17.978991,0.005656,0.007559,0.004001,0.002895,0.004625
2023-05-09 22:00:00,0.017012,-37.509969,-49.012505,-26.144221,-40.444101,-30.399568,0.009395,0.012235,0.006650,0.010120,0.007716
2023-05-09 23:00:00,0.010541,-25.361903,-33.934139,-17.737039,-20.781807,-20.582072,0.006462,0.008587,0.004556,0.005334,0.005284


In [80]:
model_meas_nonans_df.sum()*30*60/density_water.magnitude

measured                     23.545136
turbpy (z0 = 0.0001)    -107983.975713
turbpy (z0 = 0.0005)    -129109.772786
turbpy (z0 = 1e-05)      -79610.808129
turbpy (z0 = andreas)    -98478.094679
turbpy (z0 = weekly)     -99572.484060
coare (z0 = 0.0001)          27.807167
coare (z0 = 0.0005)          33.265706
coare (z0 = 1e-05)           20.479367
coare (z0 = andreas)         25.516118
coare (z0 = weekly)          25.684689
dtype: float64

## Do statistics calculations
1. Mean Absolute Error
2. Mean Relative Error
3. R^2
4. NSE


In [81]:
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score
# define a function for Nash Sutcliffe
def nse(targets, predictions):
    return 1-(np.sum((targets-predictions)**2)/np.sum((targets-np.mean(targets))**2))

In [93]:
def get_stats_df(df, data_desc):
    return pd.DataFrame([
            # [data_desc, 'ME', "turbpy (z0 = 1e-05)", (df["turbpy (z0 = 1e-05)"] - df["measured"]).values.mean() ],
            # # [data_desc, 'ME', "turbpy (z0 = 0.0001)", (df["turbpy (z0 = 0.0001)"] - df["measured"]).values.mean() ],
            # # [data_desc, 'ME', "turbpy (z0 = 0.0005)", (df["turbpy (z0 = 0.0005)"] - df["measured"]).values.mean() ],
            # # [data_desc, 'ME', "turbpy (z0 = andreas)", (df["turbpy (z0 = andreas)"] - df["measured"]).values.mean() ],
            # # [data_desc, 'ME', "turbpy (z0 = weekly)", (df["turbpy (z0 = weekly)"] - df["measured"]).values.mean() ],
            [data_desc, 'ME', "coare (z0 = 1e-05)", (df["coare (z0 = 1e-05)"] - df["measured"]).values.mean() ],
            [data_desc, 'ME', "coare (z0 = 0.0001)", (df["coare (z0 = 0.0001)"] - df["measured"]).values.mean() ],
            [data_desc, 'ME', "coare (z0 = 0.0005)", (df["coare (z0 = 0.0005)"] - df["measured"]).values.mean() ],
            [data_desc, 'ME', "coare (z0 = andreas)", (df["coare (z0 = andreas)"] - df["measured"]).values.mean() ],
            [data_desc, 'ME', "coare (z0 = weekly)", (df["coare (z0 = weekly)"] - df["measured"]).values.mean() ],
            
            # # [data_desc, 'MAE', "turbpy (z0 = 1e-05)", mean_absolute_error(df["measured"], df["turbpy (z0 = 1e-05)"])],
            # # [data_desc, 'MAE', "turbpy (z0 = 0.0001)", mean_absolute_error(df["measured"], df["turbpy (z0 = 0.0001)"])],
            # # [data_desc, 'MAE', "turbpy (z0 = 0.0005)", mean_absolute_error(df["measured"], df["turbpy (z0 = 0.0005)"])],
            # # [data_desc, 'MAE', "turbpy (z0 = andreas)", mean_absolute_error(df["measured"], df["turbpy (z0 = andreas)"])],
            # # [data_desc, 'MAE', "turbpy (z0 = weekly)", mean_absolute_error(df["measured"], df["turbpy (z0 = weekly)"])],
            [data_desc, 'MAE', "coare (z0 = 1e-05)", mean_absolute_error(df["measured"], df["coare (z0 = 1e-05)"])],
            [data_desc, 'MAE', "coare (z0 = 0.0001)", mean_absolute_error(df["measured"], df["coare (z0 = 0.0001)"])],
            [data_desc, 'MAE', "coare (z0 = 0.0005)", mean_absolute_error(df["measured"], df["coare (z0 = 0.0005)"])],
            [data_desc, 'MAE', "coare (z0 = andreas)", mean_absolute_error(df["measured"], df["coare (z0 = andreas)"])],
            [data_desc, 'MAE', "coare (z0 = weekly)", mean_absolute_error(df["measured"], df["coare (z0 = weekly)"])],
        
            # # [data_desc, 'MAPE', "turbpy (z0 = 1e-05)", mean_absolute_percentage_error(df["measured"], df["turbpy (z0 = 1e-05)"])],
            # # [data_desc, 'MAPE', "turbpy (z0 = 0.0001)", mean_absolute_percentage_error(df["measured"], df["turbpy (z0 = 0.0001)"])],
            # # [data_desc, 'MAPE', "turbpy (z0 = 0.0005)", mean_absolute_percentage_error(df["measured"], df["turbpy (z0 = 0.0005)"])],
            # # [data_desc, 'MAPE', "turbpy (z0 = andreas)", mean_absolute_percentage_error(df["measured"], df["turbpy (z0 = andreas)"])],
            # # [data_desc, 'MAPE', "turbpy (z0 = weekly)", mean_absolute_percentage_error(df["measured"], df["turbpy (z0 = weekly)"])],
            [data_desc, 'MAPE', "coare (z0 = 1e-05)", mean_absolute_percentage_error(df["measured"], df["coare (z0 = 1e-05)"])],
            [data_desc, 'MAPE', "coare (z0 = 0.0001)", mean_absolute_percentage_error(df["measured"], df["coare (z0 = 0.0001)"])],
            [data_desc, 'MAPE', "coare (z0 = 0.0005)", mean_absolute_percentage_error(df["measured"], df["coare (z0 = 0.0005)"])],
            [data_desc, 'MAPE', "coare (z0 = andreas)", mean_absolute_percentage_error(df["measured"], df["coare (z0 = andreas)"])],
            [data_desc, 'MAPE', "coare (z0 = weekly)", mean_absolute_percentage_error(df["measured"], df["coare (z0 = weekly)"])],

            # # [data_desc, 'r2', "turbpy (z0 = 1e-05)", r2_score(df["measured"], df["turbpy (z0 = 1e-05)"])],
            # # [data_desc, 'r2', "turbpy (z0 = 0.0001)", r2_score(df["measured"], df["turbpy (z0 = 0.0001)"])],
            # # [data_desc, 'r2', "turbpy (z0 = 0.0005)", r2_score(df["measured"], df["turbpy (z0 = 0.0005)"])],
            # # [data_desc, 'r2', "turbpy (z0 = andreas)", r2_score(df["measured"], df["turbpy (z0 = andreas)"])],
            # # [data_desc, 'r2', "turbpy (z0 = weekly)", r2_score(df["measured"], df["turbpy (z0 = weekly)"])],
            [data_desc, 'r2', "coare (z0 = 1e-05)", r2_score(df["measured"], df["coare (z0 = 1e-05)"])],
            [data_desc, 'r2', "coare (z0 = 0.0001)", r2_score(df["measured"], df["coare (z0 = 0.0001)"])],
            [data_desc, 'r2', "coare (z0 = 0.0005)", r2_score(df["measured"], df["coare (z0 = 0.0005)"])],
            [data_desc, 'r2', "coare (z0 = andreas)", r2_score(df["measured"], df["coare (z0 = andreas)"])],
            [data_desc, 'r2', "coare (z0 = weekly)", r2_score(df["measured"], df["coare (z0 = weekly)"])],
            
            # # [data_desc, 'NSE', "turbpy (z0 = 1e-05)", nse(df["measured"], df["turbpy (z0 = 1e-05)"])],
            # # [data_desc, 'NSE', "turbpy (z0 = 0.0001)", nse(df["measured"], df["turbpy (z0 = 0.0001)"])],
            # # [data_desc, 'NSE', "turbpy (z0 = 0.0005)", nse(df["measured"], df["turbpy (z0 = 0.0005)"])],
            # # [data_desc, 'NSE', "turbpy (z0 = andreas)", nse(df["measured"], df["turbpy (z0 = andreas)"])],
            # # [data_desc, 'NSE', "turbpy (z0 = weekly)", nse(df["measured"], df["turbpy (z0 = weekly)"])],
            [data_desc, 'NSE', "coare (z0 = 1e-05)", nse(df["measured"], df["coare (z0 = 1e-05)"])],
            [data_desc, 'NSE', "coare (z0 = 0.0001)", nse(df["measured"], df["coare (z0 = 0.0001)"])],
            [data_desc, 'NSE', "coare (z0 = 0.0005)", nse(df["measured"], df["coare (z0 = 0.0005)"])],
            [data_desc, 'NSE', "coare (z0 = andreas)", nse(df["measured"], df["coare (z0 = andreas)"])],
            [data_desc, 'NSE', "coare (z0 = weekly)", nse(df["measured"], df["coare (z0 = weekly)"])],
            
            
        ], 
        columns=['data desc', 'statistic', 'model', 'value']
    )

# Plot Statistical Results

### For different coupling regimes (using the decoupling metric) (following Peltola et al., 2021)

In [94]:
coupled_times = tidy_df.query("variable == 'omega_3m_c'").query(
    "value >= 0.61"
).time

decoupled_times = tidy_df.query("variable == 'omega_3m_c'").query(
    "value < 0.43"
).time

weaklycoupled_times = tidy_df.query("variable == 'omega_3m_c'").query(
    "value < 0.61 & value >= 0.43"
).time

blowing_snow_times = pd.concat([
    tidy_df.query("variable == 'SF_avg_2m_ue'").query("value > 0").time,
    tidy_df.query("variable == 'SF_avg_1m_ue'").query("value > 0").time
])

clear_times = tidy_df.query("variable == 'SF_avg_2m_ue'").time[
    ~ tidy_df.query("variable == 'SF_avg_2m_ue'").time.isin(blowing_snow_times)
]

In [95]:
all_stats_df = pd.concat([
    get_stats_df(model_meas_nonans_df, data_desc='all'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(decoupled_times)], data_desc='decoupled'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(weaklycoupled_times)], data_desc='weakly coupled'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(coupled_times)], data_desc='coupled'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(blowing_snow_times)], data_desc='bs'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(clear_times)], data_desc='no bs'),
])

In [96]:
print(len(model_meas_nonans_df))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(decoupled_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(weaklycoupled_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(coupled_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(blowing_snow_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(clear_times)]))

5297
1355
835
2477
1441
3856


In [97]:
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'ME'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N").sort(['all', 'decoupled', 'weakly coupled', 'coupled', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'MAE'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N").sort(['all', 'decoupled', 'weakly coupled', 'coupled', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'MAPE'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N", header=alt.Header(labels=False), title=None).sort(['all', 'decoupled', 'weakly coupled', 'coupled', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'r2'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N", header=alt.Header(labels=False), title=None).sort(['all', 'decoupled', 'weakly coupled', 'coupled', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'NSE'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N", header=alt.Header(labels=False), title=None).sort(['all', 'decoupled', 'weakly coupled', 'coupled', 'no bs', 'ns'])
).properties(width=200, height = 100)

In [98]:
coare_model_results_statistics_plot = alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'ME'
).mark_bar().encode(
    alt.X("value:Q").title("Mean Error (g/m^2/s)"),
    alt.Y("model:N"),
    alt.Row("statistic:N").header(None),
    alt.Column("data desc:N").sort(['all', 'decoupled', 'weakly coupled', 'coupled', 'no bs', 'ns']).title(None)
).properties(width=100, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'NSE'
).mark_bar().encode(
    alt.X("value:Q").title("NSE"),
    alt.Y("model:N"),
    alt.Row("statistic:N").header(None),
    alt.Column("data desc:N", header=alt.Header(labels=False), title=None).sort(['all', 'decoupled', 'weakly coupled', 'coupled', 'no bs', 'ns'])
).properties(width=100, height = 100)
coare_model_results_statistics_plot.save("coare_model_results_statistics_plot.png", ppi=200)
coare_model_results_statistics_plot

In [99]:
round(all_stats_df.query("statistic == 'ME'").pivot(
    index='model',
    columns='data desc',
    values='value'
)*1000, 2).style.set_caption("Mean Error, mg/m^2/s").format('{:.2f}')

data desc,all,bs,coupled,decoupled,no bs,weakly coupled
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
coare (z0 = 0.0001),0.45,0.65,0.32,0.19,0.37,0.2
coare (z0 = 0.0005),1.02,1.79,1.32,0.16,0.73,0.18
coare (z0 = 1e-05),-0.32,-0.95,-0.98,0.2,-0.09,0.21
coare (z0 = andreas),0.21,0.63,-0.13,0.24,0.05,0.22
coare (z0 = weekly),0.22,0.36,-0.06,0.19,0.17,0.2


In [None]:
round(all_stats_df.query("statistic == 'NSE'").pivot(
    index='model',
    columns='data desc',
    values='value'
), 2).style.set_caption("Nash-Sutcliffe Model Efficiency").format('{:.2f}')

In [None]:
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'ME'
).mark_bar().encode(
    alt.Y("value:Q"),
    alt.X("model:N").axis(labelAngle=-45),
    alt.Row("statistic:N"),
    alt.Column("data desc:N").sort(['all', 'decoupled', 'weakly coupled', 'coupled', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'NSE'
).mark_bar().encode(
    alt.Y("value:Q"),
    alt.X("model:N").axis(labelAngle=-45),
    alt.Row("statistic:N"),
    alt.Column("data desc:N", header=alt.Header(labels=False), title=None).sort(['all', 'decoupled', 'weakly coupled', 'coupled', 'no bs', 'ns'])
).properties(width=200, height = 100)

In [None]:
fig, axes = plt.subplots(1,3, figsize=(10,3), sharex=True, sharey=True)

axes[0].scatter(
    model_meas_nonans_df[model_meas_nonans_df.index.isin(decoupled_times)]['measured'],
    model_meas_nonans_df[model_meas_nonans_df.index.isin(decoupled_times)]['coare (z0 = 1e-05)'],
    s=1,
    label='decoupled'
)
axes[0].set_title(
    f"r^2 = {round(r2_score(
        model_meas_nonans_df[model_meas_nonans_df.index.isin(decoupled_times)]['measured'],
        model_meas_nonans_df[model_meas_nonans_df.index.isin(decoupled_times)]['coare (z0 = 1e-05)'],
    ), 3)}"
)
axes[1].scatter(
    model_meas_nonans_df[model_meas_nonans_df.index.isin(weaklycoupled_times)]['measured'],
    model_meas_nonans_df[model_meas_nonans_df.index.isin(weaklycoupled_times)]['coare (z0 = 1e-05)'],
    s=1,
    label='weakly coupled'
)
axes[1].set_title(
    f"r^2 = {round(r2_score(
        model_meas_nonans_df[model_meas_nonans_df.index.isin(weaklycoupled_times)]['measured'],
        model_meas_nonans_df[model_meas_nonans_df.index.isin(weaklycoupled_times)]['coare (z0 = 1e-05)'],
    ), 3)}"
)
axes[2].scatter(
    model_meas_nonans_df[model_meas_nonans_df.index.isin(coupled_times)]['measured'],
    model_meas_nonans_df[model_meas_nonans_df.index.isin(coupled_times)]['coare (z0 = 1e-05)'],
    s=1,
    label='coupled'
)
axes[2].set_title(
    f"r^2 = {round(r2_score(
        model_meas_nonans_df[model_meas_nonans_df.index.isin(coupled_times)]['measured'],
        model_meas_nonans_df[model_meas_nonans_df.index.isin(coupled_times)]['coare (z0 = 1e-05)'],
    ), 3)}"
)

for ax in axes:
    ax.plot([-0.02,0.06],[-0.02,0.06], color='grey', alpha=0.5, zorder=-1)
    ax.set_xlim(-0.02,0.06)
    ax.set_ylim(-0.02,0.06)
    ax.set_aspect('equal')
    ax.legend()

### For Different Stability Regimes (Using static stability)

In [None]:
unstable_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query(
    "value < -0.01"
).time

stable_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query(
    "value > 0.01"
).time

neutral_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query(
    "value <= 0.01 & value >= -0.01"
).time

blowing_snow_times = pd.concat([
    tidy_df.query("variable == 'SF_avg_2m_ue'").query("value > 0").time,
    tidy_df.query("variable == 'SF_avg_1m_ue'").query("value > 0").time
])

clear_times = tidy_df.query("variable == 'SF_avg_2m_ue'").time[
    ~ tidy_df.query("variable == 'SF_avg_2m_ue'").time.isin(blowing_snow_times)
]

In [None]:
all_stats_df = pd.concat([
    get_stats_df(model_meas_nonans_df, data_desc='all'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(stable_times)], data_desc='stable'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(neutral_times)], data_desc='neutral'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(unstable_times)], data_desc='unstable'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(blowing_snow_times)], data_desc='bs'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(clear_times)], data_desc='no bs'),
])

In [None]:
print(len(model_meas_nonans_df))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(stable_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(neutral_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(unstable_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(blowing_snow_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(clear_times)]))

In [None]:
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'ME'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N").sort(['all', 'stable', 'neutral', 'unstable', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'MAE'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N").sort(['all', 'stable', 'neutral', 'unstable', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'MAPE'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N", header=alt.Header(labels=False), title=None).sort(['all', 'stable', 'neutral', 'unstable', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'r2'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N", header=alt.Header(labels=False), title=None).sort(['all', 'stable', 'neutral', 'unstable', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'NSE'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N", header=alt.Header(labels=False), title=None).sort(['all', 'stable', 'neutral', 'unstable', 'no bs', 'ns'])
).properties(width=200, height = 100)

### For Different Stability Regimes (Using static stability, 0.1 threshold)

In [None]:
unstable_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query(
    "value < -0.1"
).time

stable_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query(
    "value > 0.1"
).time

neutral_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query(
    "value <= 0.1 & value >= -0.1"
).time

blowing_snow_times = pd.concat([
    tidy_df.query("variable == 'SF_avg_2m_ue'").query("value > 0").time,
    tidy_df.query("variable == 'SF_avg_1m_ue'").query("value > 0").time
])

clear_times = tidy_df.query("variable == 'SF_avg_2m_ue'").time[
    ~ tidy_df.query("variable == 'SF_avg_2m_ue'").time.isin(blowing_snow_times)
]

In [None]:
all_stats_df = pd.concat([
    get_stats_df(model_meas_nonans_df, data_desc='all'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(stable_times)], data_desc='stable'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(neutral_times)], data_desc='neutral'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(unstable_times)], data_desc='unstable'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(blowing_snow_times)], data_desc='bs'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(clear_times)], data_desc='no bs'),
])

In [None]:
print(len(model_meas_nonans_df))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(stable_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(neutral_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(unstable_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(blowing_snow_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(clear_times)]))

In [None]:
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'MAE'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N").sort(['all', 'stable', 'neutral', 'unstable', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'MAPE'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N", header=alt.Header(labels=False), title=None).sort(['all', 'stable', 'neutral', 'unstable', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'r2'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N", header=alt.Header(labels=False), title=None).sort(['all', 'stable', 'neutral', 'unstable', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'NSE'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N", header=alt.Header(labels=False), title=None).sort(['all', 'stable', 'neutral', 'unstable', 'no bs', 'ns'])
).properties(width=200, height = 100)

### For Different Stability Regimes (Using dynamic stability)

In [None]:
unstable_times = tidy_df.query("variable == 'Ri_3m_c'").query(
    "value < -0.01"
).time

stable_times = tidy_df.query("variable == 'Ri_3m_c'").query(
    "value > 0.25"
).time

neutral_times = tidy_df.query("variable == 'Ri_3m_c'").query(
    "value <= 0.25 & value >= -0.01"
).time

blowing_snow_times = pd.concat([
    tidy_df.query("variable == 'SF_avg_2m_ue'").query("value > 0").time,
    tidy_df.query("variable == 'SF_avg_1m_ue'").query("value > 0").time
])

clear_times = tidy_df.query("variable == 'SF_avg_2m_ue'").time[
    ~ tidy_df.query("variable == 'SF_avg_2m_ue'").time.isin(blowing_snow_times)
]

In [None]:
all_stats_df = pd.concat([
    get_stats_df(model_meas_nonans_df, data_desc='all'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(stable_times)], data_desc='stable'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(neutral_times)], data_desc='neutral'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(unstable_times)], data_desc='unstable'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(blowing_snow_times)], data_desc='bs'),
    get_stats_df(model_meas_nonans_df[model_meas_nonans_df.index.isin(clear_times)], data_desc='no bs'),
])

In [None]:
print(len(model_meas_nonans_df))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(stable_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(neutral_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(unstable_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(blowing_snow_times)]))
print(len(model_meas_nonans_df[model_meas_nonans_df.index.isin(clear_times)]))

In [None]:
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'MAE'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N").sort(['all', 'stable', 'neutral', 'unstable', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'MAPE'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N", header=alt.Header(labels=False), title=None).sort(['all', 'stable', 'neutral', 'unstable', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'r2'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N", header=alt.Header(labels=False), title=None).sort(['all', 'stable', 'neutral', 'unstable', 'no bs', 'ns'])
).properties(width=200, height = 100) &\
alt.Chart(all_stats_df).transform_filter(
    alt.datum.statistic == 'NSE'
).mark_bar().encode(
    alt.X("value:Q"),
    alt.Y("model:N"),
    alt.Row("statistic:N"),
    alt.Column("data desc:N", header=alt.Header(labels=False), title=None).sort(['all', 'stable', 'neutral', 'unstable', 'no bs', 'ns'])
).properties(width=200, height = 100)

# WHAT is going on with unstable times???

In [None]:
unstable_times_by_ri = tidy_df.query("variable == 'Ri_3m_c'").query(
    "value < -0.01"
).time

unstable_times_by_grad = tidy_df.query("variable == 'temp_gradient_3m_c'").query(
    "value < -0.01"
).time

In [None]:
pd.set_option('display.max_rows', 100)

In [None]:
src = pd.DataFrame(unstable_times_by_grad)
src = src[src.time.dt.month == 1]
meas = alt.Chart(model_meas_df[model_meas_df.index.isin(src.time)].reset_index()).mark_circle(color='black').encode(
    alt.X("time:T"),
    alt.Y("measured:Q")
).properties(width=1200)

In [None]:
chart_src = model_meas_df[model_meas_df.index.isin(src.time)].drop(columns=['measured'])
    # ['turbpy (z0 = 0.0001)', 'turbpy (z0 = 0.0005)', 'turbpy (z0 = 1e-05)',
    #    'coare (z0 = 0.0001)', 'coare (z0 estimated)',
    #    'coare (z0 estimated weekly)', 'coare (z0 estimated weekly simple)'],
model = alt.Chart(chart_src.reset_index()).mark_circle(color='red').encode(
    alt.X("time:T"),
    alt.Y("coare (z0 estimated weekly simple):Q"),
    # alt.Color("key:N")
)

In [None]:
meas+model

# Examine mean errors as functions of time, months, stability regimes, blowing snow

In [None]:
# unstable_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query(
#     "value < -0.5"
# ).time

# stable_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query(
#     "value > 0.5"
# ).time

# neutral_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query(
#     "value <= 0.5 & value >= -0.5"
# ).time

unstable_times = tidy_df.query("variable == 'Ri_3m_c'").query(
    "value < -0.01"
).time

stable_times = tidy_df.query("variable == 'Ri_3m_c'").query(
    "value > 0.25"
).time

neutral_times = tidy_df.query("variable == 'Ri_3m_c'").query(
    "value <= 0.25 & value >= -0.01"
).time

In [None]:
residuals_df = model_meas_nonans_df.copy(deep=True)
residuals_df["turbpy (z0 = 0.0001)"]               = residuals_df["turbpy (z0 = 0.0001)"]              - residuals_df["measured"]
residuals_df["turbpy (z0 = 0.0005)"]               = residuals_df["turbpy (z0 = 0.0005)"]              - residuals_df["measured"]
residuals_df["turbpy (z0 = 1e-05)"]                = residuals_df["turbpy (z0 = 1e-05)"]               - residuals_df["measured"]
residuals_df["coare (z0 = 0.0001)"]                = residuals_df["coare (z0 = 0.0001)"]               - residuals_df["measured"]
residuals_df["coare (z0 estimated)"]               = residuals_df["coare (z0 estimated)"]              - residuals_df["measured"]
residuals_df["coare (z0 estimated weekly)"]        = residuals_df["coare (z0 estimated weekly)"]       - residuals_df["measured"]
residuals_df["coare (z0 estimated weekly simple)"] = residuals_df["coare (z0 estimated weekly simple)"]- residuals_df["measured"]

# times with more characteristics 
upward_flux_times = model_meas_nonans_df[model_meas_nonans_df.measured > 0].index.values

residuals_df = residuals_df[
    [
        "turbpy (z0 = 0.0001)",
        "turbpy (z0 = 0.0005)",
        "turbpy (z0 = 1e-05)",
        "coare (z0 = 0.0001)",
        "coare (z0 estimated)",
        "coare (z0 estimated weekly)",
        "coare (z0 estimated weekly simple)"
    ]
].reset_index().melt(id_vars = ['time'])
residuals_df['timeofday'] = residuals_df.time.apply(lambda dt: dt.replace(year = 1970, month=1, day=1))
residuals_df['month'] = residuals_df.time.dt.month
residuals_df = pd.concat([
    residuals_df[residuals_df.time.isin(blowing_snow_times)].assign(bs='bs'),
    residuals_df[~ residuals_df.time.isin(blowing_snow_times)].assign(bs='no bs')
])
residuals_df = pd.concat([
    residuals_df[residuals_df.time.isin(upward_flux_times)].assign(flux_upward='upward'),
    residuals_df[~ residuals_df.time.isin(upward_flux_times)].assign(flux_upward='downward')
])
residuals_df = pd.concat([
    residuals_df[(residuals_df.flux_upward == 'upward') & (residuals_df.value < 0)].assign(counter_gradient='counter gradient'),
    residuals_df[(residuals_df.flux_upward == 'upward') & (residuals_df.value > 0)].assign(counter_gradient='along gradient'),
    residuals_df[(residuals_df.flux_upward == 'downward') & (residuals_df.value > 0)].assign(counter_gradient='counter gradient'),
    residuals_df[(residuals_df.flux_upward == 'downward') & (residuals_df.value < 0)].assign(counter_gradient='along gradient'),
])
residuals_df = pd.concat([
    residuals_df[residuals_df.time.isin(stable_times)].assign(stability='stable'),
    residuals_df[residuals_df.time.isin(neutral_times)].assign(stability='neutral'),
    residuals_df[residuals_df.time.isin(unstable_times)].assign(stability='unstable')
])


residuals_df

In [None]:
src = residuals_df.set_index('time').sort_index().groupby([pd.Grouper(freq='1D'), 'variable']).sum(numeric_only=True).reset_index()
src
alt.Chart(src.query).mark_bar().encode(
    alt.X("time:T"),
    alt.Y("value:Q"),
    alt.Row("variable:O")
).properties(width=1000)

In [None]:
print(
    residuals_df.query("stability ==  'unstable'").query("bs == 'bs'").value.sum()*seconds_in_timestep/density_water.magnitude
)
print(
    residuals_df.query("stability ==  'stable'").query("bs == 'bs'").value.sum()*seconds_in_timestep/density_water.magnitude
)
print(
    residuals_df.query("stability ==  'neutral'").query("bs == 'bs'").value.sum()*seconds_in_timestep/density_water.magnitude
)

print(
    residuals_df.query("stability ==  'unstable'").query("bs == 'no bs'").value.sum()*seconds_in_timestep/density_water.magnitude
)
print(
    residuals_df.query("stability ==  'stable'").query("bs == 'no bs'").value.sum()*seconds_in_timestep/density_water.magnitude
)
print(
    residuals_df.query("stability ==  'neutral'").query("bs == 'no bs'").value.sum()*seconds_in_timestep/density_water.magnitude
)

In [None]:
src = residuals_df.query("flux_upward == 'upward'")

all_data_chart = alt.Chart(
    src
).transform_filter(
    alt.datum.month != 5
).mark_line().encode(
    alt.X("hours(timeofday):T"),
    alt.Y("mean(value):Q"),
    alt.Color("month:O").scale(scheme='turbo'),
    alt.Column("stability:N").title(None),
    alt.Row("flux_upward:N").title(None),
).properties(height = 100, width=200, title='all data')

bs_chart = alt.Chart(
    src.query("bs == 'bs'")
).transform_filter(
    alt.datum.month != 5
).mark_line().encode(
    alt.X("hours(timeofday):T"),
    alt.Y("mean(value):Q"),
    alt.Color("month:O").scale(scheme='turbo'),
    alt.Column("stability:N").title(None),
    alt.Row("flux_upward:N").title(None),
).properties(height = 100, width=200, title='blowing snow')

nobs_chart = alt.Chart(
    src.query("bs == 'no bs'")
).transform_filter(
    alt.datum.month != 5
).mark_line().encode(
    alt.X("hours(timeofday):T"),
    alt.Y("mean(value):Q"),
    alt.Color("month:O").scale(scheme='turbo'),
    alt.Column("stability:N").title(None),
    alt.Row("flux_upward:N").title(None),
).properties(height = 100, width=200, title='no blowing snow')

(all_data_chart & bs_chart & nobs_chart).resolve_scale(y='shared')

In [None]:
print(
    src.query("stability ==  'unstable'").query("bs == 'bs'").query("flux_upward == 'upward'").value.sum()*seconds_in_timestep/density_water.magnitude
)
print(
    src.query("stability ==  'stable'").query("bs == 'bs'").query("flux_upward == 'upward'").value.sum()*seconds_in_timestep/density_water.magnitude
)
print(
    src.query("stability ==  'neutral'").query("bs == 'bs'").query("flux_upward == 'upward'").value.sum()*seconds_in_timestep/density_water.magnitude
)

print(
    src.query("stability ==  'unstable'").query("bs == 'no bs'").query("flux_upward == 'upward'").value.sum()*seconds_in_timestep/density_water.magnitude
)
print(
    src.query("stability ==  'stable'").query("bs == 'no bs'").query("flux_upward == 'upward'").value.sum()*seconds_in_timestep/density_water.magnitude
)
print(
    src.query("stability ==  'neutral'").query("bs == 'no bs'").query("flux_upward == 'upward'").value.sum()*seconds_in_timestep/density_water.magnitude
)

In [None]:
unstable_chart = alt.Chart(
    
).transform_filter(
    alt.datum.month != 5
).mark_line().encode(
    alt.X("hours(timeofday):T"),
    alt.Y("mean(value):Q"),
    alt.Color("month:O").scale(scheme='turbo'),
    alt.Column("bs:N").title(None),
    alt.Row("flux_upward:N").title(None),
)

In [None]:
alt.Chart(residuals_df).transform_filter(
    alt.datum.month != 5
).mark_line().encode(
    alt.X("hours(timeofday):T"),
    alt.Y("mean(value):Q"),
    alt.Color("month:O").scale(scheme='turbo'),
    alt.Column("bs:N"),
    alt.Row("flux_upward:N")
).properties(height = 100, title='all data')

# Plot Measured vs Modeled


# Scatterplots

In [None]:
r2_score(
    model_meas_df.dropna()['measured'],
    model_meas_df.dropna()['modeled z0=1e-5'],
)

In [None]:
def model_comparison_plot(src):
    one_to_one_line = alt.Chart(pd.DataFrame({
        'x': [-0.05, -0.025, 0.0, 0.025, 0.05],
        'y': [-0.05, -0.025, 0.0, 0.025, 0.05]
    })).mark_line(
        color='grey'
    ).encode(x = 'x', y = 'y')

    value_r2_score_1eneg4 = round(
        r2_score(
            src.dropna()['measured'],
            src.dropna()['modeled z0=1e-4'],
        ),
        3
    )
    value_r2_score_1eneg5 = round(
        r2_score(
            src.dropna()['measured'],
            src.dropna()['modeled z0=1e-5'],
        ),
        3
    )
    scale = alt.Scale(domain = [-0.05, 0.05], clamp=True)
    axis = alt.Axis(values=[-0.05, -0.025, 0.0, 0.025, 0.05])
    return (
        # (
        #     one_to_one_line+alt.Chart(src).mark_circle(size=10, opacity=0.1).encode(
        #         alt.X("measured:Q").scale(scale).axis(axis),
        #         alt.Y("modeled z0=1e-4:Q").scale(scale).axis(axis),
        #     ).properties(width=200, height = 200, title=f"r² = {value_r2_score_1eneg4} (n = {len(src)})") | \
        #     one_to_one_line+alt.Chart(src).mark_rect().encode(
        #         alt.X("measured:Q").bin(maxbins=30).scale(scale).axis(axis),
        #         alt.Y("modeled z0=1e-4:Q").bin(maxbins=30).scale(scale).axis(axis),
        #         alt.Color("count():Q")
        #     ).properties(width=200, height = 200, title=f"r² = {value_r2_score_1eneg4} (n = {len(src)})")
        # ) &\
        (
            one_to_one_line+alt.Chart(src).mark_circle(size=10, opacity=0.1).encode(
                alt.X("measured:Q").scale(scale).axis(axis),
                alt.Y("modeled z0=1e-5:Q").scale(scale).axis(axis),
            ).properties(width=200, height = 200, title=f"r² = {value_r2_score_1eneg5} (n = {len(src)})") | \
            one_to_one_line+alt.Chart(src).mark_rect().encode(
                alt.X("measured:Q").bin(maxbins=30).scale(scale).axis(axis),
                alt.Y("modeled z0=1e-5:Q").bin(maxbins=30).scale(scale).axis(axis),
                alt.Color("count():Q")
            ).properties(width=200, height = 200, title=f"r² = {value_r2_score_1eneg5} (n = {len(src)})")   
        )
    )

In [None]:
winter_times = tidy_df.query("variable == 'SF_avg_2m_ue'").set_index("time").loc[:"2023-02-28"].index.values
spring_times = tidy_df.query("variable == 'SF_avg_2m_ue'").set_index("time").loc["2023-03-01":].index.values

In [None]:
model_comparison_plot(model_meas_df)

In [None]:
(model_comparison_plot(model_meas_df[model_meas_df.time.isin(unstable_times.values)]).properties(title='Unstable data') |\
model_comparison_plot(model_meas_df[model_meas_df.time.isin(stable_times.values)]).properties(title='Stable data') |\
model_comparison_plot(model_meas_df[model_meas_df.time.isin(neutral_times.values)]).properties(title='Neutral data')).resolve_scale(color='independent')

In [None]:
(
model_comparison_plot(model_meas_df[model_meas_df.time.isin(blowing_snow_times.values)]).properties(title='Blowing snow data') |\
model_comparison_plot(model_meas_df[model_meas_df.time.isin(clear_times.values)]).properties(title='Clear data')).resolve_scale(color='independent')

In [None]:
(
model_comparison_plot(model_meas_df[model_meas_df.time.isin(winter_times.values)]).properties(title='Winter data') |\
model_comparison_plot(model_meas_df[model_meas_df.time.isin(spring_times.values)]).properties(title='Spring data')).resolve_scale(color='independent')