In [73]:
import numpy as np
import xarray as xr

import datetime as dt
import pandas as pd

import matplotlib.pyplot as plt

import altair as alt
alt.data_transformers.enable('json')

from sublimpy import utils
from sublimpy import tidy
from sublimpy import extrautils
import glob

# Open data

## Fast data

In [74]:
file_list = glob.glob("/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/*.nc")
file_list = [ f for f in file_list if '_20230503_' in f]
file_list = sorted(file_list)[16:]
file_list

['/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20230503_16.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20230503_17.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20230503_18.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20230503_19.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20230503_20.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20230503_21.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20230503_22.nc',
 '/Users/elischwat/Development/data/sublimationofsnow/sosqc_fast/isfs_sos_qc_geo_tiltcor_hr_20230503_23.nc']

In [75]:
ds = xr.open_mfdataset(file_list, concat_dim="time", combine="nested")

In [76]:
ds = ds[[
    'base_time',
    'u_3m_c',	'v_3m_c',	'w_3m_c',	'h2o_3m_c',		'tc_3m_c',
    'u_3m_d',	'v_3m_d',	'w_3m_d',	'h2o_3m_d',		'tc_3m_d',
    'u_3m_ue',	'v_3m_ue',	'w_3m_ue',	'h2o_3m_ue',	'tc_3m_ue',
    'u_3m_uw',	'v_3m_uw',	'w_3m_uw',	'h2o_3m_uw',	'tc_3m_uw',
    'u_5m_c',	'v_5m_c',	'w_5m_c',	'h2o_5m_c',		'tc_5m_c',
    'u_10m_c',	'v_10m_c',	'w_10m_c',	'h2o_10m_c',	'tc_10m_c',
    'u_10m_d',	'v_10m_d',	'w_10m_d',	'h2o_10m_d',	'tc_10m_d',
    'u_10m_ue',	'v_10m_ue',	'w_10m_ue',	'h2o_10m_ue',	'tc_10m_ue',
    'u_10m_uw',	'v_10m_uw',	'w_10m_uw',	'h2o_10m_uw',	'tc_10m_uw',
    'u_15m_c',	'v_15m_c',	'w_15m_c',	'h2o_15m_c',	'tc_15m_c',
    'u_20m_c',	'v_20m_c',	'w_20m_c',	'h2o_20m_c',	'tc_20m_c',
]]

## Planar fits files

In [77]:
monthly_file = "/Users/elischwat/Development/data/sublimationofsnow/monthly_planar_fits.csv"
weekly_file = "/Users/elischwat/Development/data/sublimationofsnow/weekly_planar_fits.csv"

In [78]:
fits_df = pd.read_csv(monthly_file, delim_whitespace=True)
weeklyfits_df = pd.read_csv(weekly_file, delim_whitespace=True)

fits_df['height'] = fits_df['height'].str.replace('_', '.').astype('float')
weeklyfits_df['start_date'] = pd.to_datetime(weeklyfits_df['start_date'], format='%Y%m%d')
weeklyfits_df['end_date'] = pd.to_datetime(weeklyfits_df['end_date'], format='%Y%m%d')

fits_df['W_f'] = fits_df.apply(
    lambda row: [row['W_f_1'], row['W_f_2'], row['W_f_3']],
    axis=1
).drop(columns=['W_f_1', 'W_f_2', 'W_f_3'])
weeklyfits_df['W_f'] = weeklyfits_df.apply(
    lambda row: [row['W_f_1'], row['W_f_2'], row['W_f_3']],
    axis=1
).drop(columns=['W_f_1', 'W_f_2', 'W_f_3'])

  fits_df = pd.read_csv(monthly_file, delim_whitespace=True)
  weeklyfits_df = pd.read_csv(weekly_file, delim_whitespace=True)


# Create timestamp
To use the datam, its necessary to combine 3 columns of data from the dataset to get the full timestamp. This is demonstrated below. The 'time' column actually only incudes the second and minute information. For all datapoints, the hour according to the 'time' column is 1.  The 'base_time' column indicates the hour of the day. The 'sample' column indicates the 20hz sample number. 

We demonstrate this in the plots below

In [79]:
df1 = pd.DataFrame({'time': np.unique(ds['time'])})
df2 = pd.DataFrame({'base_time': np.unique(ds['base_time'])})
df3 = pd.DataFrame({'sample': np.unique(ds['sample'])})
(
    alt.Chart(df3).mark_tick(thickness=5).encode(
        alt.X("sample:Q").title(
            f'sample (n = {len(df3)})'
        )
    ).properties(width=600) & 

    alt.Chart(df1).mark_tick(thickness=1).encode(
        alt.X("time:T").axis(
            format='%H%M%p'
        ).title(
            f'time (n = {len(df1)})'
        )
    ).properties(width=600) & 

    alt.Chart(df2).mark_tick(thickness=5).encode(
        alt.X("base_time:T").title(
            f'base_time (n = {len(df2)})'
        )
    ).properties(width=600)
)

In [80]:
df = ds.to_dataframe().reset_index()

In [81]:
df['time'] = df.apply(lambda row: dt.datetime(
        year = row['time'].year,
        month = row['time'].month,
        day = row['time'].day,
        hour = row['base_time'].hour,
        minute = row['time'].minute,
        second = row['time'].second,
        microsecond = int(row['sample'] * (1e6/20))
    ),
    axis = 1
)

In [82]:
ds = df.set_index('time').to_xarray()

In [83]:
ds = utils.modify_xarray_timezone(ds, 'UTC', "US/Mountain")

# Define function to do Reynolds Averaging

In [106]:
def create_re_avg_ds(
        ds, 
        re_avg_period_size, 
        var1,
        var2,
        covariance_name
):
    coarse_ds = ds.coarsen(time=re_avg_period_size).mean()
    coarse_ds = coarse_ds.assign_coords(time = coarse_ds.time.dt.round('1s'))
    coarse_ds = coarse_ds.reindex_like(ds, method='nearest')
    ds[f"{var1}_mean"] = coarse_ds[f"{var1}"]
    ds[f"{var1}_fluc"] = ds[f"{var1}"] - ds[f"{var1}_mean"]
    ds[f"{var2}_mean"] = coarse_ds[f"{var2}"]
    ds[f"{var2}_fluc"] = ds[f"{var2}"] - ds[f"{var2}_mean"]
    ds[covariance_name] = ds[f"{var2}_fluc"] * ds[f"{var1}_fluc"]
    ds = ds.coarsen(time = re_avg_period_size).mean()
    ds = ds.assign_coords(time = ds.time.dt.round('1s'))
    return ds

# Iterate over variables, apply planar fit to fast data, and calculate covariance fluxes

In [108]:
MONTH = ds.time.dt.month.values[0]

df_list = []
for tower in ['c', 'uw', 'ue', 'd']:
    if tower == 'c':
        heights = [3,5,10,15,20]
    else:
        heights = [3,10]
    
    for height in heights:
        fitting_params = fits_df.set_index(['month', 'height', 'tower']).loc[
            MONTH,
            height,
            tower
        ]
        u, v, w = extrautils.apply_planar_fit(
            ds[f'u_{height}m_{tower}'].values.flatten(),
            ds[f'v_{height}m_{tower}'].values.flatten(),
            ds[f'w_{height}m_{tower}'].values.flatten(),
            fitting_params['a'], 
            fitting_params['W_f'],
        )
        ds[f'u_{height}m_{tower}_fit'] = ('time', u)
        ds[f'v_{height}m_{tower}_fit'] = ('time', v)
        ds[f'w_{height}m_{tower}_fit'] = ('time', w)
        
        ds_plain =  create_re_avg_ds(
            ds, 
            300*20, 
            var1 = f'w_{height}m_{tower}', 
            var2= f'h2o_{height}m_{tower}', 
            covariance_name = f'w_h2o__{height}m_{tower}'
        )
        ds_fit =    create_re_avg_ds(
            ds, 
            300*20, 
            var1 = f'w_{height}m_{tower}_fit', 
            var2= f'h2o_{height}m_{tower}', 
            covariance_name = f'w_h2o__{height}m_{tower}_fit'
        )

        merged_df = ds_plain[f'w_h2o__{height}m_{tower}'].to_dataframe()[[f'w_h2o__{height}m_{tower}']].join(
                ds_fit[f'w_h2o__{height}m_{tower}_fit'].to_dataframe()[[f'w_h2o__{height}m_{tower}_fit']]
        )
        df_list.append(merged_df)

In [113]:
df = pd.read_parquet("/Users/elischwat/Development/data/sublimationofsnow/planar_fit/20221101.parquet")