In [5]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import altair as alt
alt.data_transformers.enable('json')
alt.renderers.enable('jupyterlab')

RendererRegistry.enable('jupyterlab')

# User inputs

In [6]:
start_date = '20221130'
end_date = '20230509'

tidy_dataset_fn = f"tidy_df_{start_date}_{end_date}_noplanar_fit.parquet"
tidy_daily_dataset_output_fn = f"tidy_df_daily_{start_date}_{end_date}_noplanar_fit.parquet"

# Load data

In [7]:
try:
    tidy_df_5Min = pd.read_parquet(
        tidy_dataset_fn
    )
except FileNotFoundError:
    print("No file such file exists for these dates.")
tidy_df_5Min['time'] = pd.to_datetime(tidy_df_5Min['time'])

In [8]:
src = tidy_df_5Min.query("variable == 'T_3m_c'").set_index('time')[['value']]
src = src[src.value.isna()].reset_index()
src['diff'] = (src['time'] - src['time'].shift(1)).apply(lambda t_delta: t_delta.seconds)
src = src.reset_index(drop=True)
src

Unnamed: 0,time,value,diff
0,2023-01-22 23:42:30,,
1,2023-01-22 23:47:30,,300.0
2,2023-01-22 23:52:30,,300.0
3,2023-01-31 16:07:30,,58500.0
4,2023-01-31 16:12:30,,300.0
...,...,...,...
904,2023-04-23 22:07:30,,300.0
905,2023-04-23 22:12:30,,300.0
906,2023-05-05 15:32:30,,62400.0
907,2023-05-05 15:37:30,,300.0


In [9]:
data = tidy_df_5Min.query("variable == 'T_3m_c'").set_index('time')['value']
na_groups = data.notna().cumsum()[data.isna()]
t_lengths_consecutive_na = na_groups.groupby(na_groups).agg(len)

data = tidy_df_5Min.query("variable == 'RH_3m_c'").set_index('time')['value']
na_groups = data.notna().cumsum()[data.isna()]
rh_lengths_consecutive_na = na_groups.groupby(na_groups).agg(len)

# Examine pot. virtual temperature gradient

In [10]:
np.abs(tidy_df_5Min[tidy_df_5Min.measurement == 'temperature gradient'].value).min()

1.842531134776948e-07

In [11]:
alt.Chart(
    tidy_df_5Min.query("measurement == 'temperature gradient'").query("height < 5")
).mark_line().encode(
    x = 'time:T',
    y = 'value:Q',
    column='height:O'
).properties(width=200, height = 200) & alt.Chart(
    tidy_df_5Min.query("measurement == 'temperature gradient'").query("height < 5")
).mark_bar().encode(
    alt.X('value:Q').bin(step=0.1),
    alt.Y("count():Q"),    
    alt.Column('height:O')
).properties(width=200, height = 200)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [12]:
src = tidy_df_5Min.query("variable == 'temp_gradient_3m_c'")
neutral_times = src[src['value'].abs() < 0.01].time

In [13]:
alt.Chart(
    tidy_df_5Min[
        tidy_df_5Min.time.isin(neutral_times.sample(16))
    ].query("measurement == 'wind speed'").query("tower == 'c'")
).mark_line().encode(
    alt.X("value:Q").title("Wind speed (m/s)").sort('-y'),
    alt.Y("height:Q").title("Height (m)"),
    alt.Facet("time:O", columns=8)
).properties(width = 125, height = 125)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [14]:
z0_df = tidy_df_5Min[
    tidy_df_5Min.time.isin(neutral_times)
].query("tower == 'c'")
src_snowdepth = tidy_df_5Min[
    tidy_df_5Min.measurement == 'snow depth'
]
src_snowdepth = src_snowdepth[['time', 'value']].set_index('time').rename(columns={'value': 'snow_depth'})
z0_df = z0_df[z0_df.measurement.isin([
    'wind speed',
    'shear velocity',
    'snow depth'
])]
z0_df = z0_df[~z0_df.variable.str.contains("predicted")]
z0_df = z0_df.pivot_table(index=['time'], values='value', columns='variable')
z0_df = z0_df.join(src_snowdepth)
z0_df.head()

Unnamed: 0_level_0,spd_10m_c,spd_15m_c,spd_20m_c,spd_2m_c,spd_3m_c,spd_5m_c,u*_10m_c,u*_15m_c,u*_20m_c,u*_2m_c,u*_3m_c,u*_5m_c,snow_depth
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-11-30 10:37:30,0.261196,0.261303,0.278766,0.294917,0.288938,0.263247,0.092021,0.060278,0.119137,0.082064,0.098465,0.075811,0.261997
2022-11-30 12:37:30,0.375744,0.368134,0.320001,0.332643,0.329449,0.390361,0.050571,0.051124,0.071796,0.055077,0.076588,0.070909,0.261997
2022-11-30 12:47:30,0.6753,0.651386,0.612457,0.7236,0.763967,0.736115,0.120388,0.123114,0.136278,0.076045,0.049296,0.053659,0.261997
2022-11-30 13:02:30,1.60272,1.645096,1.631929,1.397709,1.489085,1.518906,0.152537,0.113622,0.082739,0.109909,0.111435,0.142778,0.261997
2022-11-30 13:07:30,0.848206,0.888767,0.873107,0.787879,0.850785,0.839572,0.101733,0.115202,0.119509,0.06189,0.085271,0.121159,0.261997


# Filter out data without monotonically increasing wind speeds 

In [15]:
len(z0_df)

4220

In [16]:
def monotonically_increasing(l):
    return all(x < y for x, y in zip(l, l[1:]))

z0_df['is_monotonic_increasing'] = z0_df.apply(
    lambda row: monotonically_increasing([
            row['spd_2m_c'], 
            row['spd_3m_c'], 
            row['spd_5m_c'], 
            row['spd_10m_c'], 
            row['spd_15m_c'], 
            row['spd_20m_c']
    ]),
    axis = 1
)

In [17]:
z0_df = z0_df[z0_df.is_monotonic_increasing]

In [18]:
len(z0_df)

1960

# Solve for $z_0$ assuming $d = 0$

https://www.eol.ucar.edu/content/calculation-roughness-length-and-displacement-height

In [19]:
von_karman = 0.4

In [20]:
d = 0.0
z0_df['z0_2m_c'] = (2 - d - z0_df['snow_depth'])/np.exp(z0_df['spd_2m_c']*von_karman/z0_df['u*_2m_c'])
z0_df['z0_3m_c'] = (3 - d - z0_df['snow_depth'])/np.exp(z0_df['spd_3m_c']*von_karman/z0_df['u*_3m_c'])
z0_df['z0_5m_c'] = (5 - d - z0_df['snow_depth'])/np.exp(z0_df['spd_5m_c']*von_karman/z0_df['u*_5m_c'])
z0_df['z0_10m_c'] = (10 - d - z0_df['snow_depth'])/np.exp(z0_df['spd_10m_c']*von_karman/z0_df['u*_10m_c'])
z0_df['z0_15m_c'] = (15 - d - z0_df['snow_depth'])/np.exp(z0_df['spd_15m_c']*von_karman/z0_df['u*_15m_c'])
z0_df['z0_20m_c'] = (20 - d - z0_df['snow_depth'])/np.exp(z0_df['spd_20m_c']*von_karman/z0_df['u*_20m_c'])

In [21]:
variables = [
        'z0_2m_c', 
        'z0_3m_c', 
        'z0_5m_c', 
        'z0_10m_c', 
        'z0_15m_c', 
        'z0_20m_c', 
    ]
alt.Chart(
    z0_df[variables].reset_index()
).transform_fold(
    variables
).transform_filter(
    alt.FieldOneOfPredicate('key', ['z0_2m_c', 'z0_3m_c'])
).mark_circle().encode(
    alt.X('time:T'),
    alt.Y("value:Q").scale(type='log'),
    alt.Row("key:N", sort=variables)
).properties(height = 100, width = 400)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [22]:
from sklearn.metrics import r2_score

In [23]:
alt.Chart(z0_df).mark_circle().encode(
    alt.X("z0_3m_c").scale(domain=[0.000000001, 10], type='log'),
    alt.Y("z0_2m_c").scale(domain=[0.000000001, 10], type='log')
).properties(width = 150, height = 150, title = str(round(r2_score(
        z0_df["z0_3m_c"],
        z0_df["z0_2m_c"]
    ), 3))
) | alt.Chart(z0_df).mark_circle().encode(
    alt.X("z0_3m_c").scale(domain=[0.000000001, 10], type='log'),
    alt.Y("z0_5m_c").scale(domain=[0.000000001, 10], type='log')
).properties(width = 150, height = 150, title = str(round(r2_score(
        z0_df["z0_3m_c"],
        z0_df["z0_5m_c"]
    ), 3))
) | alt.Chart(z0_df).mark_circle().encode(
    alt.X("z0_3m_c").scale(domain=[0.000000001, 10], type='log'),
    alt.Y("z0_10m_c").scale(domain=[0.000000001, 10], type='log')
).properties(width = 150, height = 150, title = str(round(r2_score(
        z0_df["z0_3m_c"],
        z0_df["z0_10m_c"]
    ), 3))
) | alt.Chart(z0_df).mark_circle().encode(
    alt.X("z0_3m_c").scale(domain=[0.000000001, 10], type='log'),
    alt.Y("z0_15m_c").scale(domain=[0.000000001, 10], type='log')
).properties(width = 150, height = 150, title = str(round(r2_score(
        z0_df["z0_3m_c"],
        z0_df["z0_15m_c"]
    ), 3))
) | alt.Chart(z0_df).mark_circle().encode(
    alt.X("z0_3m_c").scale(domain=[0.000000001, 10], type='log'),
    alt.Y("z0_20m_c").scale(domain=[0.000000001, 10], type='log')
).properties(width = 150, height = 150, title = str(round(r2_score(
        z0_df["z0_3m_c"],
        z0_df["z0_20m_c"]
    ), 3))
)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [24]:
alt.Chart(
    z0_df[['z0_3m_c']].resample("1D").median().reset_index()
).mark_circle(size=50).encode(
    alt.X("time:T"),
    alt.Y("z0_3m_c").scale(type='log')
) + alt.Chart(
    z0_df[['z0_3m_c']].resample("1D").median().reset_index()
).mark_bar(width=1).encode(
    alt.X("time:T"),
    alt.Y("z0_3m_c").scale(type='log')
).properties(height = 100)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [25]:
z0_df_weekly = z0_df[['z0_3m_c']].resample("W-MON").median().reset_index()

In [26]:
basic_z0_values_chart = alt.Chart(
    z0_df_weekly
).mark_circle(size=100).encode(
    alt.X("time:T"),
    alt.Y("z0_3m_c").scale(type='log')
) + alt.Chart(
    z0_df_weekly
).mark_bar(width=1).encode(
    alt.X("time:T"),
    alt.Y("z0_3m_c").scale(type='log')
).properties(height = 100)
basic_z0_values_chart

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


# Use Andreas et al. 2010 Method, NOAA/SPLASH (Chris Cox) solution

In [27]:
tdk = 273.15 
# surface temp mean
Tsm = tidy_df_5Min.query("variable == 'Tsurf_c'")['value'].values 
# air temp mean
Tam = tidy_df_5Min.query("variable == 'T_3m_c'")['value'].values 
# height of sonic
z_level_n = 3 - tidy_df_5Min.query("variable == 'SnowDepth_d'")['value'].values 
# wt-covariance, vertical flux of the sonic temperature  [deg m/s]
wT_csp = tidy_df_5Min.query("variable == 'w_tc__3m_c'")['value'].values   
wq_csp = tidy_df_5Min.query("variable == 'w_h2o__3m_c'")['value'].values   
wsp = tidy_df_5Min.query("variable == 'spd_3m_c'")['value'].values 
ustar = tidy_df_5Min.query("variable == 'u*_3m_c'")['value'].values 

surface_pot_temp = tidy_df_5Min.query("variable == 'Tsurfpot_c'")['value'].values
air_pot_temp  = tidy_df_5Min.query("variable == 'Tpot_3m_c'")['value'].values
surface_mixing_ratio = tidy_df_5Min.query("variable == 'Tsurfmixingratio_c'")['value'].values
air_mixing_ratio = tidy_df_5Min.query("variable == 'mixingratio_3m_c'")['value'].values
surface_specifichumidity = surface_mixing_ratio / (1 + surface_mixing_ratio)
air_specifichumidity  = air_mixing_ratio / (1 + air_mixing_ratio)

# Obukhov length
surflayr_avg_airtemp = 0.5*(Tsm + Tam)
surflayr_avg_specifichumidity = 0.5*(surface_specifichumidity + air_specifichumidity)
surflayr_avg_virtualtemp = 0.5*(
    tidy_df_5Min.query("variable == 'Tvirtual_3m_c'")['value'].values + 
    tidy_df_5Min.query("variable == 'Tsurfvirtual_c'")['value'].values
)
# right version of equation 2.3 in Andreas 2010
# L = - (
#     surflayr_avg_airtemp/( 0.4*9.81 )
# ) * (
#     ustar**3 / (
#         wT_csp + wq_csp* (0.61*surflayr_avg_airtemp)/(
#             1 + 0.61*surflayr_avg_specifichumidity
#           )
#     )
# )
# left version of equation 2.3 in Andreas 2010
L = - (
    (
        tidy_df_5Min.query("variable == 'Tvirtual_3m_c'")['value'].values
    )/( 0.4*9.81 )
) * (
    ustar**3 / wT_csp
)

# Monin-Obukhov stability parameter, z/L:
zeta_level_n = z_level_n/L

# Drag coefficient, Cd:
Cd = ustar**2/wsp**2 #- wu_csp/(wsp**2)

Ch = wT_csp / (wsp * (surface_pot_temp - air_pot_temp))

Ce = wq_csp / (wsp * (surface_specifichumidity - air_specifichumidity))

In [33]:
import sys
sys.path.append("../splash/")
import calc_z0
z0_values = calc_z0.calc_z0(z_level_n, Cd, zeta_level_n)
z0T_values = calc_z0.calc_z0T(z_level_n, Cd, Ch, zeta_level_n)
z0q_values = calc_z0.calc_z0Q(z_level_n, Cd, Ce, zeta_level_n)

time_values = tidy_df_5Min.time.unique()

z0_andreas_df = pd.DataFrame({
    "time": time_values, 
    "z0":   z0_values,
    "z0T": z0T_values,
    "z0q": z0q_values,
})

  sma = 1 + (6.5 * zL * (1+zL)**(1/3)) / (1.3 + zL); # Psi
  x = np.real((1 - 16*zL)**(0.25)) # assumes gamma = 16
  sma = 1 + (6.5 * zL * (1+zL)**(1/3)) / (1.3 + zL); # Psi
  x = np.real((1 - 16*zL)**(0.25)) # assumes gamma = 16
  z0 = z * np.exp(-(k*(Cd**0.5)*(Ch**-1) + sm))
  z0 = z * np.exp(-(k*(Cd**0.5)*(Ch**-1) + sm))
  sma = 1 + (6.5 * zL * (1+zL)**(1/3)) / (1.3 + zL); # Psi
  x = np.real((1 - 16*zL)**(0.25)) # assumes gamma = 16


## Remove values >= 0.1, <= 7e-8 (Andreas et al., 2010)

In [34]:
print(len(z0_andreas_df.dropna()))

41964


In [35]:
z0_andreas_df['z0'] = z0_andreas_df['z0'].where(
    (z0_andreas_df['z0'] > 7e-8)
    &
    (z0_andreas_df['z0'] < 0.1)
)


In [36]:
print(len(z0_andreas_df.dropna()))

32182


## Apply the strict filtering criteria of Andreas et al. (2010)

In [37]:
time_values = tidy_df_5Min.query("variable == 'Tsurf_c'").set_index('time').sort_index().index.values
stress = tidy_df_5Min.query("variable == 'u_w_rot__3m_c'")
stress_good_times = stress[stress.value > 0].time

shflux = tidy_df_5Min.query("variable == 'w_tc__3m_c'")
shflux_good_times = shflux[np.abs(shflux.value) > 0.005].time

lhflux = tidy_df_5Min.query("variable == 'w_h2o__3m_c'")
lhflux_good_times = lhflux[np.abs(lhflux.value)/1000 > 2.5e-7].time

tdiff = (tidy_df_5Min[tidy_df_5Min.variable == 'Tsurfpot_c'].set_index('time')[['value']] - 
tidy_df_5Min[tidy_df_5Min.variable == 'Tpot_3m_c'].set_index('time')[['value']])
tdiff_good_times = tdiff[tdiff.value > 0.5].index

all_good_times = set(stress_good_times).intersection(
    set(shflux_good_times)
).intersection(
    set(lhflux_good_times)
).intersection(
    set(tdiff_good_times)
)

In [38]:
print(len(tidy_df_5Min.time.unique()))
print(len(stress_good_times))
print(len(shflux_good_times))
print(len(lhflux_good_times))
print(len(tdiff_good_times))
print(len(all_good_times))

46368
45350
27096
33605
11055
4090


In [39]:
z0_andreas_df_strict = z0_andreas_df[z0_andreas_df.time.isin(all_good_times)]

In [40]:
print(len(z0_andreas_df.dropna()), len(z0_andreas_df_strict.dropna()))

32182 3289


In [41]:
(alt.Chart(
    np.log10(z0_andreas_df.set_index('time')).reset_index()
).mark_bar().encode(
    alt.X("z0:Q").bin(maxbins=30),
    alt.Y("count():Q")
)
|
alt.Chart(
    np.log10(z0_andreas_df_strict.set_index('time')).reset_index()
).mark_bar().encode(
    alt.X("z0:Q").bin(maxbins=30),
    alt.Y("count():Q")
)
)

  result = func(self.values, **kwargs)


<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


## Calculate weekly medians

In [42]:
z0_andreas_df_weekly = z0_andreas_df.set_index('time').resample('W-MON').median().reset_index()
z0_andreas_df_strict_weekly = z0_andreas_df_strict.set_index('time').resample('W-MON').median().reset_index()

In [43]:
z0_andreas_df_weekly = pd.merge(
    z0_andreas_df_weekly[['time', 'z0']].rename(columns={'z0': 'all data'}),
    z0_andreas_df_strict_weekly[['time', 'z0']].rename(columns={'z0': 'filtered'}),
    on='time',
    how='outer'
)

In [44]:
z0_andreas_df_weekly

Unnamed: 0,time,all data,filtered
0,2022-12-05,0.001488,0.018645
1,2022-12-12,0.000356,0.007096
2,2022-12-19,0.001318,0.001702
3,2022-12-26,0.001355,0.019373
4,2023-01-02,0.000159,0.000706
5,2023-01-09,0.000757,0.001669
6,2023-01-16,0.000232,0.000516
7,2023-01-23,0.000365,0.000842
8,2023-01-30,0.000804,0.000518
9,2023-02-06,0.000322,0.000264


In [45]:
print(z0_andreas_df.median())
print()
print(z0_andreas_df_strict.median())

time    2023-02-18 05:00:00
z0                 0.000494
z0T                     0.0
z0q                  0.1819
dtype: object

time    2023-02-22 12:00:00
z0                 0.002021
z0T               165.21358
z0q                0.674436
dtype: object


In [52]:
all_z0 = alt.Chart(
    z0_andreas_df
).mark_circle(opacity=0.1, size=5).encode(
    alt.X('time:T'),
    alt.Y("z0:Q").scale(type='log').axis(format="e"),
)
weekly_median_z0 = alt.Chart(
    z0_andreas_df_weekly.iloc[:-1]
).transform_fold(
    ['all data', 'filtered']
).mark_point(size=40, color='black').encode(
    alt.X('time:T'),
    alt.Y("value:Q").title("z₀"),
    alt.Shape("key:N").scale(range=['circle', 'cross']).title(["Weekly", "average"])
)

upper_line = alt.Chart(pd.DataFrame({'y':[5e-3]})).mark_rule(color='grey', size=2, strokeDash=[2,2]).encode(y='y')
lower_line = alt.Chart(pd.DataFrame({'y':[2e-4]})).mark_rule(color='grey', size=2, strokeDash=[2,2]).encode(y='y')

z0_calculations_chart = (all_z0 + weekly_median_z0).properties(width = 250, height = 100).configure_axis(grid=False)
z0_calculations_chart = z0_calculations_chart + upper_line + lower_line
z0_calculations_chart.save("z0_calculations_chart.png", ppi=400)
z0_calculations_chart

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


# Save roughness length values

In [291]:
z0_andreas_df.to_parquet("z0estimates/z0_andreas_df.parquet")
z0_andreas_df_strict.to_parquet("z0estimates/z0_andreas_df_strict.parquet")
z0_andreas_df_weekly.to_parquet("z0estimates/z0_andreas_df_weekly.parquet")