In [1]:
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.enable('json')

import matplotlib.pyplot as plt

from metpy.calc import add_height_to_pressure
from metpy.units import units
import sys
sys.path.append('../splash/')
import functions_library

# Open SOS Measurement Dataset

In [2]:
start_date = '20221130'
end_date = '20230509'
# open files
tidy_df_5Min = pd.read_parquet(f'tidy_df_{start_date}_{end_date}_noplanar_fit_clean.parquet')
# convert time column to datetime
tidy_df_5Min['time'] = pd.to_datetime(tidy_df_5Min['time'])
# limit data to our dates of interest, based on continuous snow cover at Kettle Ponds
tidy_df_5Min = tidy_df_5Min.set_index('time').sort_index().loc[start_date:end_date].reset_index()

In [3]:
tidy_df_30Min = tidy_df_5Min.set_index('time').groupby([
    pd.Grouper(freq='30Min'), 'tower', 'height', 'measurement', 'variable'
]).mean().reset_index()

In [5]:
ec_lhflux_and_counts_variables = [
    ('w_h2o__2m_c', 'counts_2m_c_1'), 
    ('w_h2o__3m_c', 'counts_3m_c_1'), 
    ('w_h2o__5m_c', 'counts_5m_c_1'), 
    ('w_h2o__10m_c', 'counts_10m_c_1'), 
    ('w_h2o__15m_c', 'counts_15m_c_1'), 
    ('w_h2o__20m_c', 'counts_20m_c_1'), 


    ('w_h2o__1m_d', 'counts_1m_d_1'), 
    ('w_h2o__3m_d', 'counts_3m_d_1'), 
    ('w_h2o__10m_d', 'counts_10m_d_1'), 
      
    ('w_h2o__1m_ue', 'counts_1m_ue_1'), 
    ('w_h2o__3m_ue', 'counts_3m_ue_1'), 
    ('w_h2o__10m_ue', 'counts_10m_ue_1'), 


    ('w_h2o__1m_uw',  'counts_1m_uw_1'), 
    ('w_h2o__3m_uw', 'counts_3m_uw_1'), 
    ('w_h2o__10m_uw', 'counts_10m_uw_1'), 
]
ec_lhflux_variables = list(zip(*ec_lhflux_and_counts_variables))[0]
counts_lhflux_variables = list(zip(*ec_lhflux_and_counts_variables))[1]
counts_lhflux_variables

('counts_2m_c_1',
 'counts_3m_c_1',
 'counts_5m_c_1',
 'counts_10m_c_1',
 'counts_15m_c_1',
 'counts_20m_c_1',
 'counts_1m_d_1',
 'counts_3m_d_1',
 'counts_10m_d_1',
 'counts_1m_ue_1',
 'counts_3m_ue_1',
 'counts_10m_ue_1',
 'counts_1m_uw_1',
 'counts_3m_uw_1',
 'counts_10m_uw_1')

In [24]:
src = tidy_df_5Min.set_index('time').loc['20221222 00': '20221222 12']
src = src[src.variable.isin(counts_lhflux_variables)]
src = src.reset_index()
print(len(src), len(src[src.value < 5400]), len(src[src.value < 3000]))
print(len(src), round(len(src[src.value < 5400])/len(src), 2), round(len(src[src.value < 3000])/len(src), 2))
alt.Chart(src).mark_circle().encode(
    alt.X("time:T"),
    alt.Y("value:Q"),
    alt.Facet("variable:N", columns=3)
).properties(height = 50, width=200)

2340 1446 531
2340 0.62 0.23


In [25]:
src = tidy_df_5Min.set_index('time').loc['20221222 00': '20221222 09']
src = src[src.variable.isin(counts_lhflux_variables)]
src = src.reset_index()
print(len(src), len(src[src.value < 5400]), len(src[src.value < 3000]))
print(len(src), round(len(src[src.value < 5400])/len(src), 2), round(len(src[src.value < 3000])/len(src), 2))
alt.Chart(src).mark_circle().encode(
    alt.X("time:T"),
    alt.Y("value:Q"),
    alt.Facet("variable:N", columns=3)
).properties(height = 50, width=200)

1800 1432 517
1800 0.8 0.29


In [47]:
src = tidy_df_5Min.set_index('time').loc['20221222 00': '20221222 09']
src = src[
    src.variable.isin(['counts_3m_c_1', 'SF_avg_1m_ue', 'SF_avg_2m_ue'])
].groupby(["time", "measurement"]).sum(numeric_only=True).reset_index()
src = src.pivot_table(
        values='value',
        columns = 'measurement',
        index='time'
    )
src['n bad data'] = 6000 - src['eddy covariance h2o high rate count']
alt.Chart(
    src
).mark_circle().encode(
    alt.X("snow flux:Q").title("Blowing snow flux (g/m^2/s)"),
    alt.Y("n bad data:Q").title(
        ["n flagged data per 5-minute avg", "(out of 6000)"]
    )
).properties(
    title=[
        'EC data quality on Dec. 22, 2022, 0000-0900',
        '3-meter sonic, Tower C'
    ]
).properties(width = 250, height = 250).display(renderer='svg')