In [229]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import altair as alt
alt.data_transformers.enable('json')
alt.renderers.enable('jupyterlab')

RendererRegistry.enable('jupyterlab')

# User inputs

In [230]:
start_date = '20221130'
end_date = '20230509'

tidy_dataset_fn = f"tidy_df_{start_date}_{end_date}_noplanar_fit_clean.parquet"

# Load data

In [231]:
try:
    tidy_df = pd.read_parquet(
        tidy_dataset_fn
    )
except FileNotFoundError:
    print("No file such file exists for these dates.")
tidy_df['time'] = pd.to_datetime(tidy_df['time'])

In [232]:
# data = tidy_df.query("variable == 'T_3m_c'").set_index('time')['value']
# na_groups = data.notna().cumsum()[data.isna()]
# t_lengths_consecutive_na = na_groups.groupby(na_groups).agg(len)

# data = tidy_df.query("variable == 'RH_3m_c'").set_index('time')['value']
# na_groups = data.notna().cumsum()[data.isna()]
# rh_lengths_consecutive_na = na_groups.groupby(na_groups).agg(len)

# Examine pot. virtual temperature gradient

In [233]:
np.abs(tidy_df[tidy_df.measurement == 'temperature gradient'].value).min()

2.138056099376673e-06

In [234]:
alt.Chart(
    tidy_df.query("measurement == 'temperature gradient'").query("height < 5")
).mark_line().encode(
    x = 'time:T',
    y = 'value:Q',
    column='height:O'
).properties(width=200, height = 200) & alt.Chart(
    tidy_df.query("measurement == 'temperature gradient'").query("height < 5")
).mark_bar().encode(
    alt.X('value:Q').bin(step=0.1),
    alt.Y("count():Q"),    
    alt.Column('height:O')
).properties(width=200, height = 200)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [235]:
src = tidy_df.query("variable == 'temp_gradient_3m_c'")
neutral_times = src[src['value'].abs() < 0.01].time

In [236]:
alt.Chart(
    tidy_df[
        tidy_df.time.isin(neutral_times.sample(16))
    ].query("measurement == 'wind speed'").query("tower == 'c'")
).mark_line().encode(
    alt.X("value:Q").title("Wind speed (m/s)").sort('-y'),
    alt.Y("height:Q").title("Height (m)"),
    alt.Facet("time:O", columns=8)
).properties(width = 125, height = 125)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [237]:
z0_df = tidy_df[
    tidy_df.time.isin(neutral_times)
].query("tower == 'c'")
src_snowdepth = tidy_df[
    tidy_df.measurement == 'snow depth'
]
src_snowdepth = src_snowdepth[['time', 'value']].set_index('time').rename(columns={'value': 'snow_depth'})
z0_df = z0_df[z0_df.measurement.isin([
    'wind speed',
    'shear velocity',
    'snow depth'
])]
z0_df = z0_df[~z0_df.variable.str.contains("predicted")]
z0_df = z0_df.pivot_table(index=['time'], values='value', columns='variable')
z0_df = z0_df.join(src_snowdepth)
z0_df.head()

Unnamed: 0_level_0,spd_10m_c,spd_15m_c,spd_20m_c,spd_2m_c,spd_3m_c,spd_5m_c,u*_10m_c,u*_15m_c,u*_20m_c,u*_2m_c,u*_3m_c,u*_5m_c,snow_depth
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-11-30 17:30:00,0.563815,0.519016,0.440678,0.655301,0.636088,0.648741,0.127592,0.080788,0.088558,0.080677,0.088328,0.094388,0.261997
2022-12-01 18:00:00,1.33975,1.315193,1.315227,1.239395,1.291831,1.318185,0.210201,0.206864,0.159162,0.11909,0.086317,0.14071,0.236329
2022-12-02 15:00:00,5.218128,5.242333,5.196984,4.659812,4.825768,5.117994,0.338601,0.216612,0.404506,0.292957,0.277848,0.317659,0.346144
2022-12-02 15:30:00,3.474541,3.531824,3.481877,3.278661,3.368644,3.461675,0.412802,0.376375,0.360358,0.305569,0.1132,0.21604,0.346144
2022-12-02 17:30:00,5.878656,6.004,6.186003,5.181984,5.513559,5.807573,0.636175,0.660143,0.72138,0.147376,0.235242,0.432853,0.346144


# Filter out data without monotonically increasing wind speeds 

In [238]:
len(z0_df)

664

In [239]:
def monotonically_increasing(l):
    return all(x < y for x, y in zip(l, l[1:]))

z0_df['is_monotonic_increasing'] = z0_df.apply(
    lambda row: monotonically_increasing([
            row['spd_2m_c'], 
            row['spd_3m_c'], 
            row['spd_5m_c'], 
            row['spd_10m_c'], 
            row['spd_15m_c'], 
            row['spd_20m_c']
    ]),
    axis = 1
)

In [240]:
z0_df = z0_df[z0_df.is_monotonic_increasing]

In [241]:
len(z0_df)

328

# Solve for $z_0$ assuming $d = 0$

https://www.eol.ucar.edu/content/calculation-roughness-length-and-displacement-height

In [242]:
von_karman = 0.4

In [243]:
d = 0.0
z0_df['z0_2m_c'] = (2 - d - z0_df['snow_depth'])/np.exp(z0_df['spd_2m_c']*von_karman/z0_df['u*_2m_c'])
z0_df['z0_3m_c'] = (3 - d - z0_df['snow_depth'])/np.exp(z0_df['spd_3m_c']*von_karman/z0_df['u*_3m_c'])
z0_df['z0_5m_c'] = (5 - d - z0_df['snow_depth'])/np.exp(z0_df['spd_5m_c']*von_karman/z0_df['u*_5m_c'])
z0_df['z0_10m_c'] = (10 - d - z0_df['snow_depth'])/np.exp(z0_df['spd_10m_c']*von_karman/z0_df['u*_10m_c'])
z0_df['z0_15m_c'] = (15 - d - z0_df['snow_depth'])/np.exp(z0_df['spd_15m_c']*von_karman/z0_df['u*_15m_c'])
z0_df['z0_20m_c'] = (20 - d - z0_df['snow_depth'])/np.exp(z0_df['spd_20m_c']*von_karman/z0_df['u*_20m_c'])

In [244]:
variables = [
        'z0_2m_c', 
        'z0_3m_c', 
        'z0_5m_c', 
        'z0_10m_c', 
        'z0_15m_c', 
        'z0_20m_c', 
    ]
alt.Chart(
    z0_df[variables].reset_index()
).transform_fold(
    variables
).transform_filter(
    alt.FieldOneOfPredicate('key', ['z0_2m_c', 'z0_3m_c'])
).mark_circle().encode(
    alt.X('time:T'),
    alt.Y("value:Q").scale(type='log'),
    alt.Row("key:N", sort=variables)
).properties(height = 100, width = 400)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [245]:
from sklearn.metrics import r2_score

In [246]:
alt.Chart(z0_df).mark_circle().encode(
    alt.X("z0_3m_c").scale(domain=[0.000000001, 10], type='log'),
    alt.Y("z0_2m_c").scale(domain=[0.000000001, 10], type='log')
).properties(width = 150, height = 150, title = str(round(r2_score(
        z0_df["z0_3m_c"],
        z0_df["z0_2m_c"]
    ), 3))
) | alt.Chart(z0_df).mark_circle().encode(
    alt.X("z0_3m_c").scale(domain=[0.000000001, 10], type='log'),
    alt.Y("z0_5m_c").scale(domain=[0.000000001, 10], type='log')
).properties(width = 150, height = 150, title = str(round(r2_score(
        z0_df["z0_3m_c"],
        z0_df["z0_5m_c"]
    ), 3))
) | alt.Chart(z0_df).mark_circle().encode(
    alt.X("z0_3m_c").scale(domain=[0.000000001, 10], type='log'),
    alt.Y("z0_10m_c").scale(domain=[0.000000001, 10], type='log')
).properties(width = 150, height = 150, title = str(round(r2_score(
        z0_df["z0_3m_c"],
        z0_df["z0_10m_c"]
    ), 3))
) | alt.Chart(z0_df).mark_circle().encode(
    alt.X("z0_3m_c").scale(domain=[0.000000001, 10], type='log'),
    alt.Y("z0_15m_c").scale(domain=[0.000000001, 10], type='log')
).properties(width = 150, height = 150, title = str(round(r2_score(
        z0_df["z0_3m_c"],
        z0_df["z0_15m_c"]
    ), 3))
) | alt.Chart(z0_df).mark_circle().encode(
    alt.X("z0_3m_c").scale(domain=[0.000000001, 10], type='log'),
    alt.Y("z0_20m_c").scale(domain=[0.000000001, 10], type='log')
).properties(width = 150, height = 150, title = str(round(r2_score(
        z0_df["z0_3m_c"],
        z0_df["z0_20m_c"]
    ), 3))
)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [247]:
alt.Chart(
    z0_df[['z0_3m_c']].resample("1D").median().reset_index()
).mark_circle(size=50).encode(
    alt.X("time:T"),
    alt.Y("z0_3m_c").scale(type='log')
) + alt.Chart(
    z0_df[['z0_3m_c']].resample("1D").median().reset_index()
).mark_bar(width=1).encode(
    alt.X("time:T"),
    alt.Y("z0_3m_c").scale(type='log')
).properties(height = 100)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [248]:
z0_df_weekly = z0_df[['z0_3m_c']].resample("W-MON").median().reset_index()

In [249]:
z0_df[['z0_3m_c']].median()

z0_3m_c    0.000544
dtype: float64

In [250]:
basic_z0_values_chart = alt.Chart(
    z0_df_weekly
).mark_circle(size=100).encode(
    alt.X("time:T"),
    alt.Y("z0_3m_c").scale(type='log')
) + alt.Chart(
    z0_df_weekly
).mark_bar(width=1).encode(
    alt.X("time:T"),
    alt.Y("z0_3m_c").scale(type='log')
).properties(height = 100)
basic_z0_values_chart

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


# Use Andreas et al. 2010 Method, NOAA/SPLASH (Chris Cox) solution

In [251]:
tdk = 273.15 
# surface temp mean
Tsm = tidy_df.query("variable == 'Tsurf_c'")['value'].values 
# air temp mean
Tam = tidy_df.query("variable == 'T_3m_c'")['value'].values 
# height of sonic
z_level_n = 3 - tidy_df.query("variable == 'SnowDepth_c'")['value'].values 
# wt-covariance, vertical flux of the sonic temperature  [deg m/s]
wT_csp = tidy_df.query("variable == 'w_tc__3m_c'")['value'].values   
wq_csp = tidy_df.query("variable == 'w_h2o__3m_c'")['value'].values   
wsp = tidy_df.query("variable == 'spd_3m_c'")['value'].values 
ustar = tidy_df.query("variable == 'u*_3m_c'")['value'].values 

surface_pot_temp = tidy_df.query("variable == 'Tsurfpot_c'")['value'].values
air_pot_temp  = tidy_df.query("variable == 'Tpot_3m_c'")['value'].values
surface_mixing_ratio = tidy_df.query("variable == 'Tsurfmixingratio_c'")['value'].values
air_mixing_ratio = tidy_df.query("variable == 'mixingratio_3m_c'")['value'].values
surface_specifichumidity = surface_mixing_ratio / (1 + surface_mixing_ratio)
air_specifichumidity  = air_mixing_ratio / (1 + air_mixing_ratio)

# Obukhov length
surflayr_avg_airtemp = 0.5*(Tsm + Tam)
surflayr_avg_specifichumidity = 0.5*(surface_specifichumidity + air_specifichumidity)
surflayr_avg_virtualtemp = 0.5*(
    tidy_df.query("variable == 'Tvirtual_3m_c'")['value'].values + 
    tidy_df.query("variable == 'Tsurfvirtual_c'")['value'].values
)
# right version of equation 2.3 in Andreas 2010
# L = - (
#     surflayr_avg_airtemp/( 0.4*9.81 )
# ) * (
#     ustar**3 / (
#         wT_csp + wq_csp* (0.61*surflayr_avg_airtemp)/(
#             1 + 0.61*surflayr_avg_specifichumidity
#           )
#     )
# )
# left version of equation 2.3 in Andreas 2010
L = - (
    (
        tidy_df.query("variable == 'Tvirtual_3m_c'")['value'].values
    )/( 0.4*9.81 )
) * (
    ustar**3 / wT_csp
)

# Monin-Obukhov stability parameter, z/L:
zeta_level_n = z_level_n/L

# Drag coefficient, Cd:
Cd = ustar**2/wsp**2 #- wu_csp/(wsp**2)

Ch = wT_csp / (wsp * (surface_pot_temp - air_pot_temp))

Ce = wq_csp / (wsp * (surface_specifichumidity - air_specifichumidity))

  ustar**3 / wT_csp


In [252]:
import sys
sys.path.append("../splash/")
import calc_z0
z0_values = calc_z0.calc_z0(z_level_n, Cd, zeta_level_n)
z0T_values = calc_z0.calc_z0T(z_level_n, Cd, Ch, zeta_level_n)
z0q_values = calc_z0.calc_z0Q(z_level_n, Cd, Ce, zeta_level_n)

time_values = tidy_df.time.unique()

z0_andreas_df = pd.DataFrame({
    "time": time_values, 
    "z0":   z0_values,
    "z0T": z0T_values,
    "z0q": z0q_values,
})

  sma = 1 + (6.5 * zL * (1+zL)**(1/3)) / (1.3 + zL); # Psi
  x = np.real((1 - 16*zL)**(0.25)) # assumes gamma = 16
  sma = 1 + (6.5 * zL * (1+zL)**(1/3)) / (1.3 + zL); # Psi
  x = np.real((1 - 16*zL)**(0.25)) # assumes gamma = 16
  z0 = z * np.exp(-(k*(Cd**0.5)*(Ch**-1) + sm))
  sma = 1 + (6.5 * zL * (1+zL)**(1/3)) / (1.3 + zL); # Psi
  x = np.real((1 - 16*zL)**(0.25)) # assumes gamma = 16


## Remove values >= 0.1, <= 7e-8 (Andreas et al., 2010)

In [253]:
print(len(z0_andreas_df.dropna()))

7048


In [254]:
z0_andreas_df['z0'] = z0_andreas_df['z0'].where(
    (z0_andreas_df['z0'] > 7e-8)
    &
    (z0_andreas_df['z0'] < 0.1)
)


In [255]:
print(len(z0_andreas_df.dropna()))

5298


## Apply the strict filtering criteria of Andreas et al. (2010)

In [256]:
time_values = tidy_df.query("variable == 'Tsurf_c'").set_index('time').sort_index().index.values
stress = tidy_df.query("variable == 'u*_3m_c'").assign(
    value = np.sqrt(tidy_df.query("variable == 'u*_3m_c'").value)
)
stress_good_times = stress[stress.value > 0].time

shflux = tidy_df.query("variable == 'w_tc__3m_c'")
shflux_good_times = shflux[np.abs(shflux.value) > 0.005].time

lhflux = tidy_df.query("variable == 'w_h2o__3m_c'")
lhflux_good_times = lhflux[np.abs(lhflux.value)/1000 > 2.5e-7].time

tdiff = (
    tidy_df[tidy_df.variable == 'Tsurfpot_c'].set_index('time')[['value']] - 
    tidy_df[tidy_df.variable == 'Tpot_3m_c'].set_index('time')[['value']]
)
tdiff_good_times = tdiff[np.abs(tdiff.value > 0.5)].index

Qdiff = (
    tidy_df[tidy_df.variable == 'Tsurfspecifichumidity_c'].set_index('time')[['value']] - 
    tidy_df[tidy_df.variable == 'specifichumidity_3m_c'].set_index('time')[['value']]
)
Qdiff_good_times = Qdiff[np.abs(Qdiff.value > 1e-5)].index

all_good_times = set(stress_good_times).intersection(
    set(shflux_good_times)
).intersection(
    set(lhflux_good_times)
).intersection(
    set(tdiff_good_times)
).intersection(
    set(Qdiff_good_times)
)

In [257]:
print(len(tidy_df.time.unique()))
print(len(stress_good_times))
print(len(shflux_good_times))
print(len(lhflux_good_times))
print(len(tdiff_good_times))
print(len(Qdiff_good_times))
print(len(all_good_times))

7728
7691
4484
5679
1894
5297
677


In [258]:
z0_andreas_df_strict = z0_andreas_df[z0_andreas_df.time.isin(all_good_times)]

In [259]:
print(len(z0_andreas_df.dropna()), len(z0_andreas_df_strict.dropna()))

5298 520


In [260]:
(
    (
        alt.Chart(
            np.log10(z0_andreas_df.set_index('time')).reset_index()
        ).mark_bar().encode(
            alt.X("z0:Q").bin(maxbins=30),
            alt.Y("count():Q")
        ) +
        alt.Chart(
            np.log10(z0_andreas_df.set_index('time')).reset_index()
        ).mark_rule(strokeWidth=3, strokeDash=[8,4], color='red').encode(
            alt.X("median(z0):Q")
        )
    )  
|
    (
        alt.Chart(
            np.log10(z0_andreas_df_strict.set_index('time')).reset_index()
        ).mark_bar().encode(
            alt.X("z0:Q").bin(maxbins=30),
            alt.Y("count():Q")
        ) +
        alt.Chart(
            np.log10(z0_andreas_df_strict.set_index('time')).reset_index()
        ).mark_rule(strokeWidth=3, strokeDash=[8,4], color='red').encode(
            alt.X("median(z0):Q")
        )
    )  
).resolve_scale(x='shared')

  result = func(self.values, **kwargs)
  result = func(self.values, **kwargs)


<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


## Calculate weekly medians

In [261]:
z0_andreas_df_weekly = z0_andreas_df.set_index('time').resample('W-MON').median().reset_index()
z0_andreas_df_strict_weekly = z0_andreas_df_strict.set_index('time').resample('W-MON').median().reset_index()

z0_andreas_df_weekly_counts = z0_andreas_df.set_index('time').resample('W-MON').count().reset_index()
z0_andreas_df_strict_weekly_counts = z0_andreas_df_strict.set_index('time').resample('W-MON').count().reset_index()

In [262]:
z0_andreas_df_weekly = pd.merge(
    z0_andreas_df_weekly[['time', 'z0']].rename(columns={'z0': 'all data'}),
    z0_andreas_df_strict_weekly[['time', 'z0']].rename(columns={'z0': 'filtered'}),
    on='time',
    how='outer'
)

z0_andreas_df_weekly_counts = pd.merge(
    z0_andreas_df_weekly_counts[['time', 'z0']].rename(columns={'z0': 'all data'}),
    z0_andreas_df_strict_weekly_counts[['time', 'z0']].rename(columns={'z0': 'filtered'}),
    on='time',
    how='outer'
).rename(columns = {
    'all data': 'all data counts',
    'filtered': 'filtered counts'
})

z0_andreas_df_weekly = z0_andreas_df_weekly.merge(z0_andreas_df_weekly_counts, on='time')
z0_andreas_df_weekly['relative'] = z0_andreas_df_weekly['filtered'] / z0_andreas_df_weekly['all data']
z0_andreas_df_weekly

Unnamed: 0,time,all data,filtered,all data counts,filtered counts,relative
0,2022-12-05,0.000323,0.000506,229,12.0,1.567173
1,2022-12-12,0.00014,0.00041,278,3.0,2.926994
2,2022-12-19,0.000271,0.000181,305,22.0,0.667242
3,2022-12-26,0.000198,0.000727,292,29.0,3.669903
4,2023-01-02,4.1e-05,2e-06,272,6.0,0.053655
5,2023-01-09,0.000157,0.000161,284,56.0,1.026756
6,2023-01-16,6.5e-05,7.2e-05,257,28.0,1.110088
7,2023-01-23,0.000108,0.00014,282,18.0,1.293302
8,2023-01-30,7.7e-05,2e-06,299,20.0,0.029958
9,2023-02-06,4.5e-05,2.7e-05,257,35.0,0.591568


In [263]:
print(z0_andreas_df.median())
print()
print(z0_andreas_df_strict.median())

time    2023-02-18 11:45:00
z0                 0.000074
z0T                     0.0
z0q                0.111271
dtype: object

time    2023-02-22 21:30:00
z0                 0.000149
z0T                30.50505
z0q                 0.39713
dtype: object


In [264]:
all_z0 = alt.Chart(
    z0_andreas_df
).mark_circle(opacity=0.25, size=5).encode(
    alt.X('time:T'),
    alt.Y("z0:Q").scale(type='log').axis(format="e"),
)
weekly_median_z0 = alt.Chart(
    z0_andreas_df_weekly.iloc[:-1]
).transform_fold(
    ['all data', 'filtered']
).mark_point(size=40, color='black').encode(
    alt.X('time:T'),
    alt.Y("value:Q").title("z₀"),
    alt.Shape("key:N").scale(range=['circle', 'triangle']).title(["Weekly", "average"])
)

median_line = alt.Chart(
    pd.DataFrame({'y':[z0_andreas_df.median().z0]})
).mark_rule(color='red', size=2).encode(y='y')
strict_median_line = alt.Chart(
    pd.DataFrame({'y':[z0_andreas_df_strict.median().z0]})
).mark_rule(color='red', size=2, strokeDash=[3,2]).encode(y='y')

z0_calculations_chart = (all_z0 + weekly_median_z0).properties(width = 250, height = 100)
z0_calculations_chart = (median_line + strict_median_line + z0_calculations_chart).configure_axis(grid=False)
z0_calculations_chart.save("z0_calculations_chart.png", ppi=400)
z0_calculations_chart.display(renderer='svg')

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


# Save roughness length values

In [38]:
z0_andreas_df.to_parquet("z0estimates/z0_andreas_df.parquet")
z0_andreas_df_strict.to_parquet("z0estimates/z0_andreas_df_strict.parquet")
z0_andreas_df_weekly.to_parquet("z0estimates/z0_andreas_df_weekly.parquet")

In [39]:
ls -lah z0estimates

total 720
drwxr-xr-x    5 elischwat  staff   160B Feb 13 21:12 [34m.[m[m/
drwxr-xr-x  415 elischwat  staff    13K Feb 22 16:14 [34m..[m[m/
-rw-r--r--    1 elischwat  staff   264K Feb 22 16:14 z0_andreas_df.parquet
-rw-r--r--    1 elischwat  staff    31K Feb 22 16:14 z0_andreas_df_strict.parquet
-rw-r--r--    1 elischwat  staff   3.5K Feb 22 16:14 z0_andreas_df_weekly.parquet
