In [1]:
import numpy as np
import xarray as xr

import datetime as dt
import pandas as pd

import matplotlib.pyplot as plt

import altair as alt
alt.data_transformers.enable('json')

from sublimpy import utils
from sublimpy import tidy
from sublimpy import extrautils
import glob

In [2]:
file_list = glob.glob("/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/*.nc")
# file_list = [ f for f in file_list if '_20230113' in f]
file_list = [ f for f in file_list if '_20230308' in f]
file_list = sorted(file_list)[3:9]
file_list

['/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20230308_03.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20230308_04.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20230308_05.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20230308_06.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20230308_07.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20230308_08.nc']

In [3]:
variables = [
        'base_time', 
        'u_2m_c',	'v_2m_c',	'w_2m_c',	'h2o_2m_c', 'tc_2m_c',
        'u_3m_c',	'v_3m_c',	'w_3m_c',	'h2o_3m_c', 'tc_3m_c',
        'u_5m_c',	'v_5m_c',	'w_5m_c',	'h2o_5m_c', 'tc_5m_c',
        'u_10m_c',	'v_10m_c',	'w_10m_c',	'h2o_10m_c', 'tc_10m_c',
        'u_20m_c',	'v_20m_c',	'w_20m_c',	'h2o_20m_c', 'tc_20m_c',

        'u_3m_uw',	'v_3m_uw',	'w_3m_uw',	'h2o_3m_uw', 'tc_3m_uw',
        'u_10m_uw',	'v_10m_uw',	'w_10m_uw',	'h2o_10m_uw', 'tc_10m_uw',

        'u_3m_ue',	'v_3m_ue',	'w_3m_ue',	'h2o_3m_ue', 'tc_3m_ue',
        'u_10m_ue',	'v_10m_ue',	'w_10m_ue',	'h2o_10m_ue', 'tc_10m_ue',

        'u_3m_d',	'v_3m_d',	'w_3m_d',	'h2o_3m_d', 'tc_3m_d',
        'u_10m_d',	'v_10m_d',	'w_10m_d',	'h2o_10m_d', 'tc_10m_d',
    ]

In [4]:
ds = xr.open_mfdataset(
    file_list, concat_dim="time", 
    combine="nested", 
    data_vars=variables
)

In [5]:
df = ds[variables].to_dataframe()

# Create timestamp
To use the datam, its necessary to combine 3 columns of data from the dataset to get the full timestamp. This is demonstrated below. The 'time' column actually only incudes the second and minute information. For all datapoints, the hour according to the 'time' column is 1.  The 'base_time' column indicates the hour of the day. The 'sample' column indicates the 20hz sample number. 

We demonstrate this in the plots below

In [6]:
df1 = pd.DataFrame({'time': np.unique(ds['time'])})
df2 = pd.DataFrame({'base_time': np.unique(ds['base_time'])})
df3 = pd.DataFrame({'sample': np.unique(ds['sample'])})
(
    alt.Chart(df3).mark_tick(thickness=5).encode(
        alt.X("sample:Q").title(
            f'sample (n = {len(df3)})'
        )
    ).properties(width=600) & 

    alt.Chart(df1).mark_tick(thickness=1).encode(
        alt.X("time:T").axis(
            format='%H%M%p'
        ).title(
            f'time (n = {len(df1)})'
        )
    ).properties(width=600) & 

    alt.Chart(df2).mark_tick(thickness=5).encode(
        alt.X("base_time:T").title(
            f'base_time (n = {len(df2)})'
        )
    ).properties(width=600)
)

In [7]:
df = df.reset_index()

In [8]:
df['time'] = df.apply(lambda row: dt.datetime(
        year = row['time'].year,
        month = row['time'].month,
        day = row['time'].day,
        hour = row['base_time'].hour,
        minute = row['time'].minute,
        second = row['time'].second,
        microsecond = int(row['sample'] * (1e6/20))
    ),
    axis = 1
)

In [9]:
import pytz
df = utils.modify_df_timezone(df, pytz.UTC, "US/Mountain")

In [10]:
df['u_2m_c'] = df['u_2m_c'].interpolate()
df['v_2m_c'] = df['u_2m_c'].interpolate()
df['w_2m_c'] = df['w_2m_c'].interpolate()
df['h2o_2m_c'] = df['tc_2m_c'].interpolate()
df['tc_2m_c'] = df['tc_2m_c'].interpolate()

df['u_3m_c'] = df['u_3m_c'].interpolate()
df['v_3m_c'] = df['u_3m_c'].interpolate()
df['w_3m_c'] = df['w_3m_c'].interpolate()
df['h2o_3m_c'] = df['tc_3m_c'].interpolate()
df['tc_3m_c'] = df['tc_3m_c'].interpolate()

df['u_5m_c'] = df['u_5m_c'].interpolate()
df['v_5m_c'] = df['u_5m_c'].interpolate()
df['w_5m_c'] = df['w_5m_c'].interpolate()
df['h2o_5m_c'] = df['tc_5m_c'].interpolate()
df['tc_5m_c'] = df['tc_5m_c'].interpolate()

df['u_10m_c'] = df['u_10m_c'].interpolate()
df['v_10m_c'] = df['u_10m_c'].interpolate()
df['w_10m_c'] = df['w_10m_c'].interpolate()
df['h2o_10m_c'] = df['tc_10m_c'].interpolate()
df['tc_10m_c'] = df['tc_10m_c'].interpolate()

df['u_20m_c'] = df['u_20m_c'].interpolate()
df['v_20m_c'] = df['u_20m_c'].interpolate()
df['w_20m_c'] = df['w_20m_c'].interpolate()
df['h2o_20m_c'] = df['tc_20m_c'].interpolate()
df['tc_20m_c'] = df['tc_20m_c'].interpolate()


df['u_3m_d'] = df['u_3m_d'].interpolate()
df['v_3m_d'] = df['u_3m_d'].interpolate()
df['w_3m_d'] = df['w_3m_d'].interpolate()
df['h2o_3m_d'] = df['tc_3m_d'].interpolate()
df['tc_3m_d'] = df['tc_3m_d'].interpolate()

df['u_10m_d'] = df['u_10m_d'].interpolate()
df['v_10m_d'] = df['u_10m_d'].interpolate()
df['w_10m_d'] = df['w_10m_d'].interpolate()
df['h2o_10m_d'] = df['tc_10m_d'].interpolate()
df['tc_10m_d'] = df['tc_10m_d'].interpolate()

df['u_3m_ue'] = df['u_3m_ue'].interpolate()
df['v_3m_ue'] = df['u_3m_ue'].interpolate()
df['w_3m_ue'] = df['w_3m_ue'].interpolate()
df['h2o_3m_ue'] = df['tc_3m_ue'].interpolate()
df['tc_3m_ue'] = df['tc_3m_ue'].interpolate()

df['u_10m_ue'] = df['u_10m_ue'].interpolate()
df['v_10m_ue'] = df['u_10m_ue'].interpolate()
df['w_10m_ue'] = df['w_10m_ue'].interpolate()
df['h2o_10m_ue'] = df['tc_10m_ue'].interpolate()
df['tc_10m_ue'] = df['tc_10m_ue'].interpolate()

df['u_3m_uw'] = df['u_3m_uw'].interpolate()
df['v_3m_uw'] = df['u_3m_uw'].interpolate()
df['w_3m_uw'] = df['w_3m_uw'].interpolate()
df['h2o_3m_uw'] = df['tc_3m_uw'].interpolate()
df['tc_3m_uw'] = df['tc_3m_uw'].interpolate()

df['u_10m_uw'] = df['u_10m_uw'].interpolate()
df['v_10m_uw'] = df['u_10m_uw'].interpolate()
df['w_10m_uw'] = df['w_10m_uw'].interpolate()
df['h2o_10m_uw'] = df['tc_10m_uw'].interpolate()
df['tc_10m_uw'] = df['tc_10m_uw'].interpolate()

In [11]:
from scipy.signal import welch, csd
spectrum_ls = []
for height in [2,3,5,10,20]:
    for var in ['u', 'v', 'w']:
        spectrum = pd.DataFrame(dict(zip(
            ['frequency', 'power spectrum'],
            list(welch(
                    df[f"{var}_{height}m_c"],
                    fs=20, #Hz
                    window='hann', #'hann' is the default,
                    nperseg=72000
            ))
        )))
        spectrum = spectrum.assign(height = height)
        spectrum = spectrum.assign(variance = f"{var}'{var}'")
        spectrum_ls.append(spectrum)
variance_spectrum_df = pd.concat(spectrum_ls)

In [12]:
fit = pd.DataFrame({
    'x': np.arange(0.01, 10),
    'y': 0.01*np.arange(0.01, 10)**(-5/3)
})

fit_chart = alt.Chart(fit).mark_line(color='black', strokeDash=[4,2]).encode(
    alt.X('x:Q').scale(type='log'),
    alt.Y('y:Q').scale(type='log'),
)

In [None]:
# alt.Chart(variance_spectrum_df.query("frequency > 0")).mark_line().encode(
#     alt.X("frequency:Q").scale(type='log'),
#     alt.Y("power spectrum:Q").scale(type='log'),
#     alt.Row("height:O"),
#     alt.Column("variance:O")
# ).properties(width=200, height=150)

In [13]:
local_df_list = []
towers = ['c', 'd', 'uw', 'ue']
for tower in towers:
    if tower == 'c':
        heights = [2, 3, 5, 10, 20]
    else:
        heights = [3, 10]
    for height in heights:
        local_df = pd.DataFrame(dict(zip(
            ['frequency', 'power spectrum'],
            list(csd(
                    df[f'u_{height}m_{tower}'],
                    df[f'w_{height}m_{tower}'],
                    fs=20, #Hz
                    window='hann', #'hann' is the default,
                    nperseg=72000
            ))
        ))).assign(height=height).assign(tower=tower)
        local_df['power spectrum'] = np.abs(local_df['power spectrum'])
        local_df_list.append(local_df)
momentum_copower_spectrum = pd.concat(local_df_list)

In [14]:
local_df_list = []
towers = ['c', 'd', 'uw', 'ue']
for tower in towers:
    if tower == 'c':
        heights = [2, 3, 5, 10, 20]
    else:
        heights = [3, 10]
    for height in heights:
        local_df = pd.DataFrame(dict(zip(
            ['frequency', 'power spectrum'],
            list(csd(
                    df[f'w_{height}m_{tower}'],
                    df[f'tc_{height}m_{tower}'],
                    fs=20, #Hz
                    window='hann', #'hann' is the default,
                    nperseg=72000
            ))
        ))).assign(height=height).assign(tower=tower)
        local_df['power spectrum'] = np.abs(local_df['power spectrum'])
        local_df_list.append(local_df)
sensheat_copower_spectrum = pd.concat(local_df_list)

In [None]:
alt.Chart(
    momentum_copower_spectrum.query("tower == 'c'").query("frequency > 0")
).mark_line().encode(
    alt.X("frequency:Q").scale(type='log'),
    alt.Y("power spectrum:Q"),
    alt.Color("height:N")
).properties(width=300, height=150, title="u'w'") |\
alt.Chart(
    sensheat_copower_spectrum.query("tower == 'c'").query("frequency > 0")
).mark_line().encode(
    alt.X("frequency:Q").scale(type='log'),
    alt.Y("power spectrum:Q"),
    alt.Color("height:N")
).properties(width=300, height=150, title="u'w'")

In [19]:
alt.Chart(
    sensheat_copower_spectrum[
        sensheat_copower_spectrum.height.isin([3,10])
    ].query("frequency > 0")
).mark_line().encode(
    alt.X("frequency:Q").scale(type='log'),
    alt.Y("power spectrum:Q"),
    alt.Color("height:N"),
    alt.Facet("tower:N", columns=2),
).properties(width=200, height=100, title="w'tc'").display(renderer='svg')