In [1]:
import numpy as np
import xarray as xr

import datetime as dt
import pandas as pd

import matplotlib.pyplot as plt

import altair as alt
alt.data_transformers.enable('json')
from sublimpy import utils

import dask.dataframe as dd

In a future release, Dask DataFrame will use new implementation that
contains several improvements including a logical query planning.
The user-facing DataFrame API will remain unchanged.

The new implementation is already available and can be enabled by
installing the dask-expr library:

    $ pip install dask-expr

and turning the query planning option on:

    >>> import dask
    >>> dask.config.set({'dataframe.query-planning': True})
    >>> import dask.dataframe as dd

API documentation for the new implementation is available at
https://docs.dask.org/en/stable/dask-expr-api.html

Any feedback can be reported on the Dask issue tracker
https://github.com/dask/dask/issues 

  import dask.dataframe as dd


In [2]:
df_momentum_night = dd.read_parquet("/Users/elischwat/Development/data/sublimationofsnow/spectra/0900_1700/latent_heat/")
df_momentum_day = dd.read_parquet("/Users/elischwat/Development/data/sublimationofsnow/spectra/1900_0500/latent_heat/")

In [3]:
df_momentum_night = df_momentum_night[df_momentum_night.date >= '20221129'][df_momentum_night.date <= '20230509']
df_momentum_day = df_momentum_day[df_momentum_day.date >= '20221129'][df_momentum_day.date <= '20230509']

In [8]:
src = pd.concat([
    df_momentum_day.query("frequency < 0.1").query("frequency >= 0.001").query("tower == 'c'").query("height > 2").compute().assign(timeofday = 'Day time'),
    df_momentum_night.query("frequency < 0.1").query("frequency >= 0.001").query("tower == 'c'").query("height > 2").compute().assign(timeofday = 'Night time')
])
src.frequency = pd.cut(src.frequency, np.logspace(-3,-1, 100)).apply(lambda interval: (interval.left + interval.right)/2).astype('float')
src = src.groupby(['height', 'tower', 'timeofday', 'frequency'])[['cospectrum']].mean().reset_index()
day_and_night_avg_spectra_tower_c_all_heights = alt.Chart(
    src
).mark_point().encode(
    alt.X("frequency:Q").scale(type='log').title("Frequency (Hz)"),
    alt.Y("mean(cospectrum):Q").scale(type='linear').title("Cospectra(w'q')"),
    alt.Color("height:O").scale(scheme='turbo'),
    alt.Facet("timeofday:N").title(None).header(labelFontSize=14, labelFontStyle='bold')
).properties(width=300, height = 200).resolve_scale(y='independent')

In [9]:
src = df_momentum_day[df_momentum_day.height.isin([3,10])].query("frequency < 0.1").query("frequency >= 0.001").compute()
df_momentum_day_agg = src.groupby(['frequency', 'height', 'tower'])[['cospectrum']].mean().reset_index()

src = df_momentum_night[df_momentum_night.height.isin([3,10])].query("frequency < 0.1").query("frequency >= 0.001").compute()
df_momentum_night_agg = src.groupby(['frequency', 'height', 'tower'])[['cospectrum']].mean().reset_index()

src = pd.concat([
    df_momentum_day_agg.assign(timeofday = 'Day time'),
    df_momentum_night_agg.assign(timeofday = 'Night time')
])

day_and_night_avg_spectra_tower_all_towers = alt.Chart(
    src
).mark_line().encode(
    alt.X("frequency:Q").scale(type='log').title("Frequency (Hz)"),
    alt.Y("cospectrum:Q").scale(type='linear').title("Cospectra(w'q')"),
    alt.StrokeDash("height:N"),
    alt.Color("tower:N"),
    alt.Facet("timeofday:N").title(None).header(labelFontSize=14, labelFontStyle='bold')
).properties(width=300, height = 200).resolve_scale(y='independent')

In [10]:
spectra_combo_plot = (
    day_and_night_avg_spectra_tower_all_towers &
    day_and_night_avg_spectra_tower_c_all_heights
).configure_axis(grid=False).resolve_scale(
    x='shared', y='shared', color='independent', strokeDash='independent', shape='independent'
)
spectra_combo_plot.save("../../figures/generalexam_daily_averaged_spectra.png", ppi=200)
spectra_combo_plot

In [None]:
df_momentum_day_binned = df_momentum_day.compute()
df_momentum_day_binned['frequency _binned'] = df_momentum_day_binned.groupby('date')['frequency'].transform(
    lambda x: pd.cut(x, bins = np.logspace(-4, -1, 100))
)
df_momentum_day_binned = df_momentum_day_binned.dropna()
df_momentum_day_binned = df_momentum_day_binned.groupby(
    ['tower', 'height', 'frequency_binned']
)[
    ['cospectrum', 'quadrature spectrum']
].mean().reset_index()
df_momentum_day_binned['frequency_binned'] = df_momentum_day_binned['frequency_binned'].apply(
    lambda split: 0.5*(split.left + split.right)
).astype(float)

In [None]:
alt.Chart(
    df_momentum_day_binned.query("frequency_binned > 0.0009").query("tower == 'c'")
).mark_circle().encode(
    alt.X("frequency_binned:Q").scale(type='log'),
    alt.Y("cospectrum:Q").scale(type='linear'),
    alt.Color("height:O").scale(scheme='turbo'),
).properties(width=500)

In [None]:
alt.Chart(
    df_momentum_day_binned.query("frequency_binned > 0.0009").query("tower == 'c'")
).transform_calculate(
    abs = "abs(datum.cospectrum)"
).transform_filter(
    alt.datum.abs > 0
).mark_circle().encode(
    alt.X("frequency_binned:Q").scale(type='log'),
    alt.Y("abs:Q").scale(type='linear'),
    alt.Color("height:O").scale(scheme='turbo'),
).properties(width=500)

In [None]:
alt.Chart(
    df_momentum_day_binned.query("frequency_binned > 0.0009").query("tower == 'c'")
).mark_circle().encode(
    alt.X("frequency_binned:Q").scale(type='log'),
    alt.Y("cospectrum:Q").scale(type='symlog'),
    alt.Color("height:O").scale(scheme='turbo'),
).properties(width=500)

In [None]:
alt.Chart(
    df_momentum_day_binned.query("frequency_binned > 0.0009").query("tower == 'c'")
).mark_circle().encode(
    alt.X("frequency_binned:Q").scale(type='log'),
    alt.Y("quadrature spectrum:Q").scale(type='symlog'),
    alt.Color("height:O").scale(scheme='turbo'),
).properties(width=500)

# Identify times we want to analyze using sos surface measurements

In [None]:
# bs_times = tidy_df.query("variable == 'SF_avg_ue'").query("value > 0").time
# nobs_times = tidy_df.query("variable == 'SF_avg_ue'").query("value == 0").time

# decoupled_times = tidy_df.query("variable == 'omega_3m_c'").query("value < 0.43").time
# weaklycoupled_times = tidy_df.query("variable == 'omega_3m_c'").query("value >= 0.43").query("value <= 0.61").time
# coupled_times = tidy_df.query("variable == 'omega_3m_c'").query("value > 0.61").time

# ri_stable_times = tidy_df.query("variable == 'Ri_3m_c'").query("value > 0.25").time
# ri_unstable_times = tidy_df.query("variable == 'Ri_3m_c'").query("value < -0.01").time
# ri_neutral_times = tidy_df.query("variable == 'Ri_3m_c'").query("value >= -0.01").query("value <= 0.25").time

# tgrad_stable_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query("value > 0.01").time
# tgrad_unstable_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query("value < -0.01").time
# tgrad_neutral_times = tidy_df.query("variable == 'temp_gradient_3m_c'").query("value >= -0.01").query("value <= 0.01").time

### Open dataset, divide into daytime/nighttime, and group by stability conditions

In [None]:
start_date = '20221130'
end_date = '20230509'
# open files
tidy_df = pd.read_parquet(f'tidy_df_{start_date}_{end_date}_noplanar_fit_clean.parquet')
tidy_df_unclean = pd.read_parquet(f'tidy_df_{start_date}_{end_date}_noplanar_fit.parquet')

# convert time column to datetime
tidy_df['time'] = pd.to_datetime(tidy_df['time'])
tidy_df_unclean['time'] = pd.to_datetime(tidy_df_unclean['time'])

tidy_df = utils.modify_df_timezone(tidy_df, 'UTC', 'US/Mountain')
tidy_df_unclean = utils.modify_df_timezone(tidy_df_unclean, 'UTC', 'US/Mountain')

In [None]:
daytime_tidy_df = tidy_df[tidy_df.time.dt.hour.isin([9,10,11,12,13,14,15,16,17])]
nighttime_tidy_df = tidy_df[tidy_df.time.dt.hour.isin([19,20,21,22,23,0,1,2,3,4,5])]

In [None]:
daytime_daily_min_tempgrad = daytime_tidy_df.query("variable == 'temp_gradient_3m_c'").set_index('time').groupby(
    pd.Grouper(freq='1440Min')
)[['value']].min()
days_list_unstable =    daytime_daily_min_tempgrad.query("value < -0.001").reset_index().time.dt.strftime('%Y%m%d')
days_list_neutral =     daytime_daily_min_tempgrad.query("value >= -0.001").query("value <= 0.01").reset_index().time.dt.strftime('%Y%m%d')
days_list_stable =      daytime_daily_min_tempgrad.query("value > 0.01").reset_index().time.dt.strftime('%Y%m%d')

In [None]:
# bs_times = tidy_df.query("variable == 'SF_avg_ue'").query("value > 0").time
daytime_daily_max_bsflux = daytime_tidy_df.query("variable == 'SF_avg_ue'").set_index('time').groupby(
    pd.Grouper(freq='1440Min')
)[['value']].max()

days_list_nobs = daytime_daily_max_bsflux.query("value == 0").reset_index().time.dt.strftime('%Y%m%d')

In [None]:
len(days_list_unstable), len(days_list_neutral), len(days_list_stable)

In [None]:
df_momentum_day

In [None]:
# create dataset for momentum covariance, averaged across unstable days
df_momentum_day_binned_unstable = df_momentum_day[df_momentum_day.date.isin(days_list_unstable) & df_momentum_day.date.isin(days_list_nobs)]
df_momentum_day_binned_unstable = df_momentum_day_binned_unstable.compute()
df_momentum_day_binned_unstable['frequency_binned'] = df_momentum_day_binned_unstable.groupby('date')['frequency'].transform(
    lambda x: pd.cut(x, bins = np.logspace(-4, -1, 100))
)
df_momentum_day_binned_unstable = df_momentum_day_binned_unstable.dropna()
df_momentum_day_binned_unstable['frequency_binned'] = df_momentum_day_binned_unstable['frequency_binned'].apply(
    lambda split: 0.5*(split.left + split.right)
).astype(float)
df_momentum_day_binned_unstable = df_momentum_day_binned_unstable.groupby(
    ['tower', 'height', 'frequency_binned']
)[
    ['cospectrum', 'quadrature spectrum']
].mean().reset_index()

# create dataset for momentum covariance, averaged across stable days
df_momentum_day_binned_stable = df_momentum_day[df_momentum_day.date.isin(days_list_stable) & df_momentum_day.date.isin(days_list_nobs)]
df_momentum_day_binned_stable = df_momentum_day_binned_stable.compute()
df_momentum_day_binned_stable['frequency_binned'] = df_momentum_day_binned_stable.groupby('date')['frequency'].transform(
    lambda x: pd.cut(x, bins = np.logspace(-4, -1, 100))
)
df_momentum_day_binned_stable = df_momentum_day_binned_stable.dropna()
df_momentum_day_binned_stable['frequency_binned'] = df_momentum_day_binned_stable['frequency_binned'].apply(
    lambda split: 0.5*(split.left + split.right)
).astype(float)
df_momentum_day_binned_stable = df_momentum_day_binned_stable.groupby(
    ['tower', 'height', 'frequency_binned']
)[
    ['cospectrum', 'quadrature spectrum']
].mean().reset_index()

# create dataset for momentum covariance, averaged across neutral days
df_momentum_day_binned_neutral = df_momentum_day[df_momentum_day.date.isin(days_list_neutral) & df_momentum_day.date.isin(days_list_nobs)]
df_momentum_day_binned_neutral = df_momentum_day_binned_neutral.compute()
df_momentum_day_binned_neutral['frequency_binned'] = df_momentum_day_binned_neutral.groupby('date')['frequency'].transform(
    lambda x: pd.cut(x, bins = np.logspace(-4, -1, 100))
)
df_momentum_day_binned_neutral = df_momentum_day_binned_neutral.dropna()
df_momentum_day_binned_neutral['frequency_binned'] = df_momentum_day_binned_neutral['frequency_binned'].apply(
    lambda split: 0.5*(split.left + split.right)
).astype(float)
df_momentum_day_binned_neutral = df_momentum_day_binned_neutral.groupby(
    ['tower', 'height', 'frequency_binned']
)[
    ['cospectrum', 'quadrature spectrum']
].mean().reset_index()

In [None]:
daily_averaged_spectra = (
    alt.Chart(
        df_momentum_day_binned_stable.query("frequency_binned > 0.0009").query("tower == 'c'").query("height > 2")
    ).mark_circle().encode(
        alt.X("frequency_binned:Q").scale(type='log').title("frequency (hz)"),
        alt.Y("cospectrum:Q").scale(type='linear').scale(domain=[-0.5, 0.5], clamp=True).title("Cospectra(w'q')"),
        alt.Color("height:O").scale(scheme='turbo'),
    ).properties(width=300, height=200, title='Stable') | alt.Chart(
        df_momentum_day_binned_neutral.query("frequency_binned > 0.0009").query("tower == 'c'").query("height > 2")
    ).mark_circle().encode(
        alt.X("frequency_binned:Q").scale(type='log').title("frequency (hz)"),
        alt.Y("cospectrum:Q").scale(type='linear').scale(domain=[-0.5, 0.5], clamp=True).title("Cospectra(w'q')"),
        alt.Color("height:O").scale(scheme='turbo'),
    ).properties(width=300, height=200, title='Neutral') | alt.Chart(
        df_momentum_day_binned_unstable.query("frequency_binned > 0.0009").query("tower == 'c'").query("height > 2")
    ).mark_circle().encode(
        alt.X("frequency_binned:Q").scale(type='log').title("frequency (hz)"),
        alt.Y("cospectrum:Q").scale(type='linear').scale(domain=[-0.5, 0.5], clamp=True).title("Cospectra(w'q')"),
        alt.Color("height:O").scale(scheme='turbo'),
    ).properties(width=300, height=200, title='Unstable')
).resolve_scale(x='shared', y='shared')

daily_averaged_spectra

In [None]:
(
    alt.Chart(
        df_momentum_day_binned_stable.query("frequency_binned > 0.0009").query("tower == 'c'").query("height > 2")
    ).transform_calculate(
        abs = 'abs(datum.cospectrum)'
    ).mark_circle().encode(
        alt.X("frequency_binned:Q").scale(type='log').title("frequency (hz)"),
        alt.Y("abs:Q").scale(type='linear'),
        alt.Color("height:O").scale(scheme='turbo'),
    ).properties(width=300, height=200, title='Stable') | alt.Chart(
        df_momentum_day_binned_neutral.query("frequency_binned > 0.0009").query("tower == 'c'").query("height > 2")
    ).transform_calculate(
        abs = 'abs(datum.cospectrum)'
    ).mark_circle().encode(
        alt.X("frequency_binned:Q").scale(type='log').title("frequency (hz)"),
        alt.Y("abs:Q").scale(type='linear'),
        alt.Color("height:O").scale(scheme='turbo'),
    ).properties(width=300, height=200, title='Neutral') | alt.Chart(
        df_momentum_day_binned_unstable.query("frequency_binned > 0.0009").query("tower == 'c'").query("height > 2")
    ).transform_calculate(
        abs = 'abs(datum.cospectrum)'
    ).mark_circle().encode(
        alt.X("frequency_binned:Q").scale(type='log').title("frequency (hz)"),
        alt.Y("abs:Q").scale(type='linear'),
        alt.Color("height:O").scale(scheme='turbo'),
    ).properties(width=300, height=200, title='Unstable')
).resolve_scale(x='shared', y='shared')