In [20]:
import pandas as pd
import glob
import altair as alt
from sublimpy import utils

In [13]:
files = glob.glob("turb_datasets/tidy_df**.parquet")
df = pd.DataFrame()
for file in files:
    local = pd.read_parquet(file).query("measurement == 'w_h2o_'")
    df = pd.concat([df, local.assign(filename = file)])
df = df[df.variable.isin([
    'w_h2o__2m_c', 'w_h2o__3m_c', 'w_h2o__5m_c', 'w_h2o__10m_c', 'w_h2o__15m_c', 'w_h2o__20m_c', 
    'w_h2o__3m_uw', 'w_h2o__10m_uw', 
    'w_h2o__3m_ue', 'w_h2o__10m_ue', 
    'w_h2o__3m_d', 'w_h2o__10m_d', 
])]
df = utils.modify_df_timezone(df, 'UTC', 'US/Mountain')
df = df[(df['time'] > '20221130') & (df['time'] < '20230508')]

In [14]:
df['filename'] = df['filename'].str.replace('turb_datasets/tidy_df_20221101_20230619_planar_fit_multiplane_STRAIGHTUP_', '')
df['processing'] = df['filename'].str.replace('.parquet', '')

In [15]:
df.head()

Unnamed: 0,time,variable,value,height,tower,measurement,filename,processing
3993087,2022-11-30 00:30:00,w_h2o__3m_uw,-0.000622,3.0,uw,w_h2o_,nodespiking_flags36000_snowfallfiltered0.5mm.p...,nodespiking_flags36000_snowfallfiltered0.5mm
3993088,2022-11-30 01:00:00,w_h2o__3m_uw,-4.3e-05,3.0,uw,w_h2o_,nodespiking_flags36000_snowfallfiltered0.5mm.p...,nodespiking_flags36000_snowfallfiltered0.5mm
3993089,2022-11-30 01:30:00,w_h2o__3m_uw,-0.000308,3.0,uw,w_h2o_,nodespiking_flags36000_snowfallfiltered0.5mm.p...,nodespiking_flags36000_snowfallfiltered0.5mm
3993090,2022-11-30 02:00:00,w_h2o__3m_uw,-0.001253,3.0,uw,w_h2o_,nodespiking_flags36000_snowfallfiltered0.5mm.p...,nodespiking_flags36000_snowfallfiltered0.5mm
3993091,2022-11-30 02:30:00,w_h2o__3m_uw,9.4e-05,3.0,uw,w_h2o_,nodespiking_flags36000_snowfallfiltered0.5mm.p...,nodespiking_flags36000_snowfallfiltered0.5mm


In [16]:
totals_df = pd.DataFrame(df.groupby(['variable', 'processing'])['value'].sum()*30*60/1000).reset_index()

In [17]:
totals_df['despiking'] = totals_df['processing'].apply(lambda s: s.split('_')[0])
totals_df['instrument_flagging'] = totals_df['processing'].apply(lambda s: s.split('_')[1])
totals_df['snowfall_filtering'] = totals_df['processing'].apply(lambda s: s.split('_')[2])
totals_df['height'] = totals_df['variable'].str.split('_').str[3].str[:-1].astype(int)
totals_df['tower'] = totals_df['variable'].str.split('_').str[-1]
totals_df

Unnamed: 0,variable,processing,value,despiking,instrument_flagging,snowfall_filtering,height,tower
0,w_h2o__10m_c,nodespiking_flags36000_snowfallfiltered0.5mm,39.566325,nodespiking,flags36000,snowfallfiltered0.5mm,10,c
1,w_h2o__10m_c,nodespiking_flags36000_snowfallfiltered0mm,37.902591,nodespiking,flags36000,snowfallfiltered0mm,10,c
2,w_h2o__10m_c,nodespiking_flags36000_snowfallfilteredno,37.956772,nodespiking,flags36000,snowfallfilteredno,10,c
3,w_h2o__10m_c,nodespiking_flags3600_snowfallfiltered0.5mm,35.906514,nodespiking,flags3600,snowfallfiltered0.5mm,10,c
4,w_h2o__10m_c,nodespiking_flags3600_snowfallfiltered0mm,34.431823,nodespiking,flags3600,snowfallfiltered0mm,10,c
...,...,...,...,...,...,...,...,...
211,w_h2o__5m_c,q7_flags3600_snowfallfiltered0mm,32.703641,q7,flags3600,snowfallfiltered0mm,5,c
212,w_h2o__5m_c,q7_flags3600_snowfallfilteredno,34.031976,q7,flags3600,snowfallfilteredno,5,c
213,w_h2o__5m_c,q7_flags9000_snowfallfiltered0.5mm,34.422582,q7,flags9000,snowfallfiltered0.5mm,5,c
214,w_h2o__5m_c,q7_flags9000_snowfallfiltered0mm,33.021581,q7,flags9000,snowfallfiltered0mm,5,c


In [18]:
alt.Chart(totals_df).mark_point(size=100).encode(
    alt.X('value:Q').scale(zero=False),
    alt.Y('height:Q'),
    alt.Shape('tower:N'),
    alt.Column('instrument_flagging').sort(['flags36000', 'flags9000', 'flags3600']),
    alt.Row('despiking'),
    alt.Color('snowfall_filtering')
).properties(width=150, height = 150)

In [37]:
processing_options = df.processing.unique()
local = pd.DataFrame()
for process in processing_options:
    local = pd.concat([
        local,
        df.query("variable == 'w_h2o__20m_c'").query(f"processing == '{process}'").set_index('time').sort_index()[['value']].cumsum().assign(processing = process)
    ])
local['value'] = local['value']*1.8
local['despiking'] = local['processing'].apply(lambda s: s.split('_')[0])
local['instrument_flagging'] = local['processing'].apply(lambda s: s.split('_')[1])
local['snowfall_filtering'] = local['processing'].apply(lambda s: s.split('_')[2])

In [34]:
alt.data_transformers.enable('json')

DataTransformerRegistry.enable('json')

In [41]:
alt.Chart(local.reset_index().dropna()).mark_line().encode(
    alt.X('time:T'),
    alt.Y('value:Q'),
    alt.Color('processing:N'),
    alt.Row('snowfall_filtering:N'),
    tooltip = 'processing:N'
).properties(width=1000).resolve_scale(color='independent')

In [42]:
df.query("variable == 'w_h2o__20m_c'")

Unnamed: 0,time,variable,value,height,tower,measurement,filename,processing
3993087,2022-11-30 00:30:00,w_h2o__3m_uw,-0.000622,3.0,uw,w_h2o_,nodespiking_flags36000_snowfallfiltered0.5mm.p...,nodespiking_flags36000_snowfallfiltered0.5mm
3993088,2022-11-30 01:00:00,w_h2o__3m_uw,-0.000043,3.0,uw,w_h2o_,nodespiking_flags36000_snowfallfiltered0.5mm.p...,nodespiking_flags36000_snowfallfiltered0.5mm
3993089,2022-11-30 01:30:00,w_h2o__3m_uw,-0.000308,3.0,uw,w_h2o_,nodespiking_flags36000_snowfallfiltered0.5mm.p...,nodespiking_flags36000_snowfallfiltered0.5mm
3993090,2022-11-30 02:00:00,w_h2o__3m_uw,-0.001253,3.0,uw,w_h2o_,nodespiking_flags36000_snowfallfiltered0.5mm.p...,nodespiking_flags36000_snowfallfiltered0.5mm
3993091,2022-11-30 02:30:00,w_h2o__3m_uw,0.000094,3.0,uw,w_h2o_,nodespiking_flags36000_snowfallfiltered0.5mm.p...,nodespiking_flags36000_snowfallfiltered0.5mm
...,...,...,...,...,...,...,...,...
6484423,2023-05-07 21:30:00,w_h2o__20m_c,-0.001839,20.0,c,w_h2o_,nodespiking_flags36000_snowfallfilteredno.parquet,nodespiking_flags36000_snowfallfilteredno
6484424,2023-05-07 22:00:00,w_h2o__20m_c,-0.000894,20.0,c,w_h2o_,nodespiking_flags36000_snowfallfilteredno.parquet,nodespiking_flags36000_snowfallfilteredno
6484425,2023-05-07 22:30:00,w_h2o__20m_c,0.001349,20.0,c,w_h2o_,nodespiking_flags36000_snowfallfilteredno.parquet,nodespiking_flags36000_snowfallfilteredno
6484426,2023-05-07 23:00:00,w_h2o__20m_c,0.000808,20.0,c,w_h2o_,nodespiking_flags36000_snowfallfilteredno.parquet,nodespiking_flags36000_snowfallfilteredno
