In [1]:
import numpy as np
import xarray as xr

import datetime as dt
import pandas as pd

import matplotlib.pyplot as plt

import altair as alt
alt.data_transformers.enable('json')

from sublimpy import utils
import glob
import pytz
from scipy.signal import welch, csd
from scipy.stats import chi2

In [2]:
file_list = glob.glob("/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/*.nc")
file_list = [ f for f in file_list if '_20221224' in f]
# file_list = [ f for f in file_list if '_20230113' in f]
# file_list = [ f for f in file_list if '_20230224' in f]
# file_list = [ f for f in file_list if '_20230313' in f]
file_list = sorted(file_list)[16:24]
file_list

['/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20221224_16.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20221224_17.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20221224_18.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20221224_19.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20221224_20.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20221224_21.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20221224_22.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20221224_23.nc']

In [3]:
index_vars = ['base_time']
value_vars = [
        'u_2m_c',	'v_2m_c',	'w_2m_c',	'h2o_2m_c', 'tc_2m_c',
        'u_3m_c',	'v_3m_c',	'w_3m_c',	'h2o_3m_c', 'tc_3m_c',
        'u_5m_c',	'v_5m_c',	'w_5m_c',	'h2o_5m_c', 'tc_5m_c',
        'u_10m_c',	'v_10m_c',	'w_10m_c',	'h2o_10m_c', 'tc_10m_c',
        'u_15m_c',	'v_15m_c',	'w_15m_c',	'h2o_15m_c', 'tc_15m_c',
        'u_20m_c',	'v_20m_c',	'w_20m_c',	'h2o_20m_c', 'tc_20m_c',

        'u_3m_uw',	'v_3m_uw',	'w_3m_uw',	'h2o_3m_uw', 'tc_3m_uw',
        'u_10m_uw',	'v_10m_uw',	'w_10m_uw',	'h2o_10m_uw', 'tc_10m_uw',

        'u_3m_ue',	'v_3m_ue',	'w_3m_ue',	'h2o_3m_ue', 'tc_3m_ue',
        'u_10m_ue',	'v_10m_ue',	'w_10m_ue',	'h2o_10m_ue', 'tc_10m_ue',

        'u_3m_d',	'v_3m_d',	'w_3m_d',	'h2o_3m_d', 'tc_3m_d',
        'u_10m_d',	'v_10m_d',	'w_10m_d',	'h2o_10m_d', 'tc_10m_d',
    ]
VARIABLES = index_vars + value_vars

In [4]:
ds = xr.open_mfdataset(
    file_list, concat_dim="time", 
    combine="nested", 
    data_vars=VARIABLES
)

In [5]:
df = ds[VARIABLES].to_dataframe()

# Create timestamp
To use the datam, its necessary to combine 3 columns of data from the dataset to get the full timestamp. This is demonstrated below. The 'time' column actually only incudes the second and minute information. For all datapoints, the hour according to the 'time' column is 1.  The 'base_time' column indicates the hour of the day. The 'sample' column indicates the 20hz sample number. 

We demonstrate this in the plots below

In [6]:
df1 = pd.DataFrame({'time': np.unique(ds['time'])})
df2 = pd.DataFrame({'base_time': np.unique(ds['base_time'])})
df3 = pd.DataFrame({'sample': np.unique(ds['sample'])})
(
    alt.Chart(df3).mark_tick(thickness=5).encode(
        alt.X("sample:Q").title(
            f'sample (n = {len(df3)})'
        )
    ).properties(width=600) & 

    alt.Chart(df1).mark_tick(thickness=1).encode(
        alt.X("time:T").axis(
            format='%H%M%p'
        ).title(
            f'time (n = {len(df1)})'
        )
    ).properties(width=600) & 

    alt.Chart(df2).mark_tick(thickness=5).encode(
        alt.X("base_time:T").title(
            f'base_time (n = {len(df2)})'
        )
    ).properties(width=600)
)

In [7]:
df = df.reset_index()

In [8]:
df['time'] = df.apply(lambda row: dt.datetime(
        year = row['time'].year,
        month = row['time'].month,
        day = row['time'].day,
        hour = row['base_time'].hour,
        minute = row['time'].minute,
        second = row['time'].second,
        microsecond = int(row['sample'] * (1e6/20))
    ),
    axis = 1
)

In [9]:
df = utils.modify_df_timezone(df, pytz.UTC, "US/Mountain")

# Interpolate nans

In [10]:
for var in value_vars:
    nans_b4_interp = df[var].isna().sum()
    df[var] = df[var].interpolate()
    nans_after_interp = df[var].isna().sum()
    print(var, len(df), nans_b4_interp, nans_after_interp)

u_2m_c 576000 396 0
v_2m_c 576000 396 0
w_2m_c 576000 396 0
h2o_2m_c 576000 57436 57068
tc_2m_c 576000 396 0
u_3m_c 576000 379 0
v_3m_c 576000 379 0
w_3m_c 576000 379 0
h2o_3m_c 576000 41604 41251
tc_3m_c 576000 379 0
u_5m_c 576000 873 0
v_5m_c 576000 873 0
w_5m_c 576000 873 0
h2o_5m_c 576000 882 0
tc_5m_c 576000 873 0
u_10m_c 576000 1259 0
v_10m_c 576000 1259 0
w_10m_c 576000 1259 0
h2o_10m_c 576000 26612 25295
tc_10m_c 576000 1257 0
u_15m_c 576000 1353 0
v_15m_c 576000 1353 0
w_15m_c 576000 1353 0
h2o_15m_c 576000 1246 0
tc_15m_c 576000 1353 0
u_20m_c 576000 278 0
v_20m_c 576000 278 0
w_20m_c 576000 278 0
h2o_20m_c 576000 207 0
tc_20m_c 576000 278 0
u_3m_uw 576000 246 0
v_3m_uw 576000 246 0
w_3m_uw 576000 246 0
h2o_3m_uw 576000 49879 49638
tc_3m_uw 576000 246 0
u_10m_uw 576000 272 0
v_10m_uw 576000 272 0
w_10m_uw 576000 272 0
h2o_10m_uw 576000 50515 48794
tc_10m_uw 576000 272 0
u_3m_ue 576000 217 0
v_3m_ue 576000 217 0
w_3m_ue 576000 217 0
h2o_3m_ue 576000 15635 15445
tc_3m_ue 576000

# Calculate spectra of u'u', v'v', w'w'

In [11]:
spectrum_ls = []
for height in [2,3,5,10,20]:
    for var in ['u', 'v', 'w']:
        spectrum = pd.DataFrame(dict(zip(
            ['frequency', 'power spectrum'],
            list(welch(
                    df[f"{var}_{height}m_c"],
                    fs=20, #Hz
                    window='hann', #'hann' is the default,
                    nperseg=72000 # one hour window
            ))
        )))
        spectrum = spectrum.assign(height = height)
        spectrum = spectrum.assign(variance = f"{var}'{var}'")
        spectrum_ls.append(spectrum)
variance_spectrum_df = pd.concat(spectrum_ls)

In [18]:
length_of_study_period_in_seconds = (df.time.max() - df.time.min()).seconds
length_of_window_in_seconds = 72000/20
n_windows_in_study_period = length_of_study_period_in_seconds/length_of_window_in_seconds
edof = 2*n_windows_in_study_period
# Calculate confidence interval
# what are these?
conf_x = 1

conf_y0 = 1
# Degrees of freedom = 2 DOF per window, multiplied by number of windows
conf = conf_y0 * edof / chi2.ppf([0.025, 0.975], edof).reshape((2,1))

uncertainty_chart = alt.Chart().transform_calculate(
    high = f"{conf_y0 + conf[0][0]}",
    low = f"{conf_y0 - conf[1][0]}",
    x = f"{conf_x}"
).mark_line(color='black').encode(
    alt.X("x:Q").title(""),
    alt.Y("low:Q").title(""),
    alt.Y2("high:Q")
)

uncertainty_chart_dot = alt.Chart().transform_calculate(
    middle = f"{conf_y0}",
    x = f"{conf_x}"
).mark_circle(color='black').encode(
    alt.X("x:Q").title(""),
    alt.Y("middle:Q").title(""),
)

In [None]:


# Create a line with slope -5/3 (in log log space) that fits the data
fit_chart = alt.Chart(pd.DataFrame({
        'x': np.arange(0.01, 10),
        'y': 0.01*np.arange(0.01, 10)**(-5/3)
})).mark_line(color='black', strokeDash=[4,2]).encode(
    alt.X('x:Q').scale(type='log'),
    alt.Y('y:Q').scale(type='log'),
)

spectra_chart = alt.Chart().mark_line().encode(
    alt.X("frequency:Q").scale(type='log'),
    alt.Y("power spectrum:Q").scale(type='log'),
    alt.Color("height:N"),
)

alt.layer(
    spectra_chart,
    fit_chart,
    uncertainty_chart,
    uncertainty_chart_dot,
    data=variance_spectrum_df.query("frequency > 0")
).properties(
    width=200, 
    height=150
).facet(
    'variance:O'
).configure_axis(grid=False).display(renderer='svg')

In [None]:
variance_spectrum_df.query("frequency > 0").to_csv(str(df.time.dt.date.iloc[0]) + '-spectra.csv')

# Compare w'w' spectra from different months

In [13]:
df_multipledays = pd.concat([
    pd.read_csv("2022-12-24-spectra.csv").assign(date = '2022-12-24'),
    pd.read_csv("2023-01-13-spectra.csv").assign(date = '2023-01-13'),
    pd.read_csv("2023-02-24-spectra.csv").assign(date = '2023-02-24'),
    pd.read_csv("2023-03-13-spectra.csv").assign(date = '2023-03-13')
])

In [None]:
alt.Chart(
    df_multipledays[df_multipledays.variance == "w'w'"].query("height < 10")
).mark_line().encode(
    alt.X("frequency:Q").scale(type='log'),
    alt.Y("power spectrum:Q").scale(type='log'),
    alt.Color("height:N"),
    alt.Column("date:O"),
).properties(width = 300, height = 200)

In [14]:
bin_avg_spectra_df_ls = []
for height in df_multipledays.height.unique():
    for date in df_multipledays.date.unique():
        src = df_multipledays[df_multipledays.variance == "w'w'"].query(f"height == {height}").query(f"date == '{date}'")
        src['frequency_binned'] = pd.cut(
            src['frequency'],
            np.logspace(-5, 1, 100)
        )
        bin_avg_spectra = src.groupby("frequency_binned")[['power spectrum']].mean().reset_index()
        bin_avg_spectra['frequency'] = bin_avg_spectra['frequency_binned'].apply(lambda int: (int.left + int.right)/2)
        bin_avg_spectra['height'] = height
        bin_avg_spectra['date'] = date
        bin_avg_spectra_df_ls.append(bin_avg_spectra)
bin_avg_spectra_df = pd.concat(bin_avg_spectra_df_ls)

  bin_avg_spectra = src.groupby("frequency_binned")[['power spectrum']].mean().reset_index()
  bin_avg_spectra = src.groupby("frequency_binned")[['power spectrum']].mean().reset_index()
  bin_avg_spectra = src.groupby("frequency_binned")[['power spectrum']].mean().reset_index()
  bin_avg_spectra = src.groupby("frequency_binned")[['power spectrum']].mean().reset_index()
  bin_avg_spectra = src.groupby("frequency_binned")[['power spectrum']].mean().reset_index()
  bin_avg_spectra = src.groupby("frequency_binned")[['power spectrum']].mean().reset_index()
  bin_avg_spectra = src.groupby("frequency_binned")[['power spectrum']].mean().reset_index()
  bin_avg_spectra = src.groupby("frequency_binned")[['power spectrum']].mean().reset_index()
  bin_avg_spectra = src.groupby("frequency_binned")[['power spectrum']].mean().reset_index()
  bin_avg_spectra = src.groupby("frequency_binned")[['power spectrum']].mean().reset_index()
  bin_avg_spectra = src.groupby("frequency_binned")[['power spectrum']

In [27]:
src = bin_avg_spectra_df[bin_avg_spectra_df.date.isin(['2022-12-24', '2023-03-13'])].query("height < 10")
src['date'] = src['date'].replace({
    '2022-12-24': '2022-12-24 (7cm snowdepth)',
    '2023-03-13': '2023-03-13 (126cm snowdepth)',
})
power_spectra_2m_is_too_low_chart = (
    uncertainty_chart
    +
    uncertainty_chart_dot
    +
    alt.Chart(
        src[['power spectrum', 'frequency', 'height', 'date']]
    ).mark_line().encode(
        alt.X("frequency:Q").scale(domain = [0.001, 10], type='log').title("frequency (Hz)"),
        alt.Y("power spectrum:Q").scale(type='log').title("power spectrum "),
        alt.Color("height:N"),
        alt.StrokeDash("date:O"),
).properties(width = 300, height = 200)
).configure_axis(grid=False).configure_legend(orient='bottom-left', columns = 1)
power_spectra_2m_is_too_low_chart

In [28]:
power_spectra_2m_is_too_low_chart.save("power_spectra_2m_is_too_low_chart.png", ppi=200)

In [None]:
src = bin_avg_spectra_df[bin_avg_spectra_df.date.isin(['2022-12-24', '2023-03-13'])]
src['date'] = src['date'].replace({
    '2022-12-24': '2022-12-24 (7cm snowdepth)',
    '2023-03-13': '2023-03-13 (126cm snowdepth)',
})
alt.Chart(
    src
).mark_line().encode(
    alt.X("frequency:Q").scale(type='log'),
    alt.Y("power spectrum:Q").scale(type='log'),
    alt.Color("height:N"),
    alt.Column("date:O"),
    # alt.StrokeDash("date:O"),
).properties(width = 300, height = 200).configure_axis(grid=False)

# Calculate cospectra of u'w', w'tc', and w'h2o'

In [None]:
local_df_list = []
towers = ['c', 'd', 'uw', 'ue']
for tower in towers:
    if tower == 'c':
        heights = [2, 3, 5, 10, 15, 20]
    else:
        heights = [3, 10]
    for height in heights:
        local_df = pd.DataFrame(dict(zip(
            ['frequency', 'power spectrum'],
            list(csd(
                    df[f'u_{height}m_{tower}'],
                    df[f'w_{height}m_{tower}'],
                    fs=20, #Hz
                    window='hann', #'hann' is the default,
                    nperseg=72000
            ))
        ))).assign(height=height).assign(tower=tower)
        # local_df['power spectrum'] = np.real(local_df['power spectrum'])
        local_df_list.append(local_df)
momentum_copower_spectrum = pd.concat(local_df_list)

In [None]:
local_df_list = []
towers = ['c', 'd', 'uw', 'ue']
for tower in towers:
    if tower == 'c':
        heights = [2, 3, 5, 10, 15, 20]
    else:
        heights = [3, 10]
    for height in heights:
        local_df = pd.DataFrame(dict(zip(
            ['frequency', 'power spectrum'],
            list(csd(
                    df[f'w_{height}m_{tower}'],
                    df[f'tc_{height}m_{tower}'],
                    fs=20, #Hz
                    window='hann', #'hann' is the default,
                    nperseg=72000
            ))
        ))).assign(height=height).assign(tower=tower)
        # local_df['power spectrum'] = np.real(local_df['power spectrum'])
        local_df_list.append(local_df)
sensheat_copower_spectrum = pd.concat(local_df_list)

In [None]:
local_df_list = []
towers = ['c', 'd', 'uw', 'ue']
for tower in towers:
    if tower == 'c':
        heights = [2, 3, 5, 10, 15, 20]
    else:
        heights = [3, 10]
    for height in heights:
        local_df = pd.DataFrame(dict(zip(
            ['frequency', 'power spectrum'],
            list(csd(
                    df[f'w_{height}m_{tower}'],
                    df[f'h2o_{height}m_{tower}'],
                    fs=20, #Hz
                    window='hann', #'hann' is the default,
                    nperseg=72000
            ))
        ))).assign(height=height).assign(tower=tower)
        # local_df['power spectrum'] = np.real(local_df['power spectrum'])
        local_df_list.append(local_df)
latheat_copower_spectrum = pd.concat(local_df_list)

In [None]:
latheat_copower_spectrum

In [None]:
alt.Chart(
    momentum_copower_spectrum.query("tower == 'c'").query("frequency > 0")
).mark_line().encode(
    alt.X("frequency:Q").scale(type='log'),
    alt.Y("power spectrum:Q").scale(type='log'),
    alt.Color("height:N")
).properties(width=300, height=150, title="u'w'") |\
alt.Chart(
    sensheat_copower_spectrum.query("tower == 'c'").query("frequency > 0")
).mark_line().encode(
    alt.X("frequency:Q").scale(type='log'),
    alt.Y("power spectrum:Q").scale(type='log'),
    alt.Color("height:N")
).properties(width=300, height=150, title="w'tc'")  |\
alt.Chart(
    latheat_copower_spectrum.query("tower == 'c'").query("frequency > 0")
).mark_line().encode(
    alt.X("frequency:Q").scale(type='log'),
    alt.Y("power spectrum:Q").scale(type='log'),
    alt.Color("height:N")
).properties(width=300, height=150, title="w'h2o'")

In [None]:
alt.Chart(
    latheat_copower_spectrum.query("tower == 'c'").query("frequency > 0").query(
        "frequency < 0.01"
    )
).mark_line().encode(
    alt.X("frequency:Q").scale(type='log'),
    alt.Y("power spectrum:Q").scale(type='log'),
    alt.Color("height:N")
).properties(width=300, height=150, title="w'h2o'")

In [None]:
alt.Chart(
    latheat_copower_spectrum.query("tower == 'c'").query("frequency > 0").query("height < 10")
).mark_line().encode(
    alt.X("frequency:Q").scale(type='log'),
    alt.Y("power spectrum:Q"),
    alt.Color("height:N")
).properties(width=300, height=150, title="w'h2o'")

In [None]:
alt.Chart(
    sensheat_copower_spectrum[
        sensheat_copower_spectrum.height.isin([3,10])
    ].query("frequency > 0")
).mark_line().encode(
    alt.X("frequency:Q").scale(type='log'),
    alt.Y("power spectrum:Q"),
    alt.Color("height:N"),
    alt.Facet("tower:N", columns=2),
).properties(width=200, height=100, title="w'tc'").display(renderer='svg')