In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

In [2]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))
pd.options.display.max_rows = 15
pd.options.display.max_columns = 100

In [3]:
data_dir = Path('../data/processed')
assert data_dir.exists()

In [6]:
onehz = pd.read_parquet(data_dir / 'cups_1hz.parquet').set_index('timestamp').asfreq('1s') # fill mising timestamps with nan

In [7]:
onehz.head(3)

Unnamed: 0_level_0,Air_Temp_87m,DeltaT_122_87m,Dewpt_Temp_122m,Dewpt_Temp_87m,PRECIP_INTEN,Cup_WS_C1_130m,Cup_WS_122m,Cup_WS_C1_105m,Vane_WD_122m,Vane_WD_87m
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2018-12-31 17:00:00,-15.698145,-0.28678,-14.12769,-14.87758,2.93948,6.966611,7.33234,6.931492,22.893036,28.280596
2018-12-31 17:00:01,-15.555123,-0.28678,-14.108216,-14.851624,2.944349,7.140418,7.182121,6.859962,28.400145,23.877924
2018-12-31 17:00:02,-15.672141,-0.290412,-14.095233,-14.910026,2.911893,7.181488,6.682454,6.98219,27.367561,21.780317


In [27]:
# Don't need to know the root cause of a flag anymore, so
# collapse separate labels into a single binary flag with .any(axis=1)
labels = {f.stem.rsplit('.', maxsplit=1)[0] : pd.read_parquet(f).any(axis=1)
          for f in (data_dir / 'labels').glob('*.parquet.gz')
         }

In [28]:
labels['Cup_WS_C1_105m'].head(3)

timestamp
2018-12-31 17:00:00    False
2018-12-31 17:00:01    False
2018-12-31 17:00:02    False
dtype: bool

In [33]:
# apply labels to data
for col, label in labels.items():
    onehz.loc[:, col].where(~label) # set NaN

In [34]:
# Due to inherited processing, set all 0 wind speed values to NaN (interpolate later)
onehz.loc[:, ['Cup_WS_C1_105m', 'Cup_WS_C1_130m']] = onehz.loc[:, ['Cup_WS_C1_105m', 'Cup_WS_C1_130m']].replace(0, np.nan)