# salvo_export_broadband_line

Notebook to export just the broadband measurements from the three lines in 2024

In [1]:
import os
import numpy as np
import pandas as pd
import warnings

from modules.salvoimport import load_salvo_data_str, load_gml_albedo, preprocess_rap
from modules.salvoproc import line_date_agg

In [2]:
# Load SALVO data
salvo_data_root_path = os.path.join("C:/", "Users", "dclemenssewall", "Desktop", "DavidCS", "NOAA", "projects", 
                                    "SALVO", "data", "salvo-2024-backup", "SALVO-2024-20240619-1305")

with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    df = load_salvo_data_str(salvo_data_root_path, dates=None,insts=['kz-mobile'], sites=['arm', 'beo', 'ice'])

In [70]:
df.index.drop_duplicates()

MultiIndex([('2024-04-17 15:51:00-08:00', 'ice', 'ice-20240417-a', ...),
            ('2024-04-17 15:52:00-08:00', 'ice', 'ice-20240417-a', ...),
            ('2024-04-17 15:56:00-08:00', 'ice', 'ice-20240417-a', ...),
            ('2024-04-17 16:00:00-08:00', 'ice',              nan, ...),
            ('2024-04-17 16:02:00-08:00', 'ice',              nan, ...),
            ('2024-04-17 16:03:00-08:00', 'ice',           'line', ...),
            ('2024-04-17 16:05:00-08:00', 'ice',           'line', ...),
            ('2024-04-17 16:06:00-08:00', 'ice',           'line', ...),
            ('2024-04-17 16:07:00-08:00', 'ice',           'line', ...),
            ('2024-04-17 16:08:00-08:00', 'ice',           'line', ...),
            ...
            ('2024-06-18 14:12:00-08:00', 'ice',           'line', ...),
            ('2024-06-18 14:13:00-08:00', 'ice',           'line', ...),
            ('2024-06-18 14:13:00-08:00', 'ice',           'line', ...),
            ('2024-06-18 14:14:00-0

In [72]:
df.index

MultiIndex([('2024-04-17 15:51:00-08:00', 'ice', 'ice-20240417-a', ...),
            ('2024-04-17 15:52:00-08:00', 'ice', 'ice-20240417-a', ...),
            ('2024-04-17 15:56:00-08:00', 'ice', 'ice-20240417-a', ...),
            ('2024-04-17 16:00:00-08:00', 'ice',              nan, ...),
            ('2024-04-17 16:02:00-08:00', 'ice',              nan, ...),
            ('2024-04-17 16:03:00-08:00', 'ice',           'line', ...),
            ('2024-04-17 16:05:00-08:00', 'ice',           'line', ...),
            ('2024-04-17 16:06:00-08:00', 'ice',           'line', ...),
            ('2024-04-17 16:07:00-08:00', 'ice',           'line', ...),
            ('2024-04-17 16:08:00-08:00', 'ice',           'line', ...),
            ...
            ('2024-06-18 14:12:00-08:00', 'ice',           'line', ...),
            ('2024-06-18 14:13:00-08:00', 'ice',           'line', ...),
            ('2024-06-18 14:13:00-08:00', 'ice',           'line', ...),
            ('2024-06-18 14:14:00-0

In [69]:
df[df.duplicated(keep=False)].sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,value
timestamp_akdt,site,location,position,repetition,wavelength,variable,Unnamed: 7_level_1
2024-04-17 15:52:00-08:00,ice,ice-20240417-a,0.0,2.0,,reflected_solar_W_m2,335.44
2024-04-17 16:10:00-08:00,ice,line,35.0,1.0,,reflected_solar_W_m2,340.10
2024-04-17 16:10:00-08:00,ice,line,40.0,1.0,,reflected_solar_W_m2,340.10
2024-04-17 16:17:00-08:00,ice,line,65.0,1.0,,reflected_solar_W_m2,344.76
2024-04-17 16:21:00-08:00,ice,line,80.0,1.0,,incident_solar_W_m2,472.63
...,...,...,...,...,...,...,...
2024-06-18 13:39:00-08:00,ice,line,140.0,1.0,,reflected_solar_W_m2,256.11
2024-06-18 14:06:00-08:00,ice,line,85.0,2.0,,reflected_solar_W_m2,353.60
2024-06-18 14:13:00-08:00,ice,line,30.0,1.0,,incident_solar_W_m2,482.66
2024-06-18 14:16:00-08:00,ice,line,20.0,1.0,,reflected_solar_W_m2,115.51


In [85]:
df_duplicated_diff = (df[df.duplicated(keep='first')].sort_index() - df[df.duplicated(keep='last')].sort_index()).dropna()

In [90]:
df_duplicated_diff.iloc[np.nonzero(df_duplicated_diff.values)[0]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,value
timestamp_akdt,site,location,position,repetition,wavelength,variable,Unnamed: 7_level_1
2024-06-04 12:45:00-08:00,ice,line,,,,reflected_solar_W_m2,-3.53
2024-06-04 13:43:00-08:00,ice,line,,,,reflected_solar_W_m2,-16.95
2024-06-04 13:43:00-08:00,ice,line,,,,reflected_solar_W_m2,16.95


In [66]:
df.loc[('2024-04-17 15:52:00-08:00', 'ice', 'ice-20240417-a', 0.0, 2.0)]

  df.loc[('2024-04-17 15:52:00-08:00', 'ice', 'ice-20240417-a', 0.0, 2.0)]


Unnamed: 0_level_0,Unnamed: 1_level_0,value
wavelength,variable,Unnamed: 2_level_1
,incident_solar_W_m2,416.39
,reflected_solar_W_m2,335.44
,albedo,0.805591


In [43]:
# Extract only the line measurements with valid locations
df_line = df.xs('line', level='location').loc[(slice(None), slice(None), np.arange(41)*5)]
df_line = df_line.reset_index().drop(columns='wavelength')
df_line['date_akdt'] = df_line['timestamp_akdt'].dt.strftime('%Y-%m-%d')
df_line['timestamp_akdt'] = df_line['timestamp_akdt'].dt.strftime('%H:%M')
df_line = df_line.set_index(['date_akdt', 'timestamp_akdt', 'site', 
                             'position', 'repetition', 'variable'])


In [44]:
df_line.unstack(level='variable')

ValueError: Index contains duplicate entries, cannot reshape

In [45]:
df_line

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,value
date_akdt,timestamp_akdt,site,position,repetition,variable,Unnamed: 6_level_1
2024-04-17,16:03,ice,0.0,1.0,incident_solar_W_m2,435.970000
2024-04-17,16:03,ice,0.0,1.0,reflected_solar_W_m2,353.780000
2024-04-17,16:03,ice,0.0,1.0,albedo,0.811478
2024-04-19,13:06,arm,0.0,1.0,incident_solar_W_m2,349.750000
2024-04-19,13:06,arm,0.0,1.0,reflected_solar_W_m2,288.380000
...,...,...,...,...,...,...
2024-06-17,14:30,beo,200.0,1.0,albedo,0.158003
2024-06-17,14:37,beo,200.0,2.0,albedo,0.157848
2024-06-18,13:32,ice,200.0,1.0,incident_solar_W_m2,412.380000
2024-06-18,13:32,ice,200.0,1.0,reflected_solar_W_m2,249.390000


In [55]:
df_line.xs(('2024-05-27', 'incident_solar_W_m2'), level=('date_akdt', 'variable'))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,value
timestamp_akdt,site,position,repetition,Unnamed: 4_level_1
12:18,arm,0.0,1.0,538.90
12:18,arm,0.0,1.0,538.90
12:21,arm,5.0,1.0,516.26
12:21,arm,5.0,1.0,516.26
12:21,arm,10.0,1.0,515.59
...,...,...,...,...
12:42,arm,190.0,1.0,536.21
12:42,arm,195.0,1.0,538.54
12:42,arm,195.0,1.0,538.54
12:43,arm,200.0,1.0,541.87


In [54]:
df_line[df_line.duplicated()]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,value
date_akdt,timestamp_akdt,site,position,repetition,variable,Unnamed: 6_level_1
2024-05-27,12:18,arm,0.0,1.0,incident_solar_W_m2,538.900000
2024-05-27,12:18,arm,0.0,1.0,reflected_solar_W_m2,464.120000
2024-05-27,12:18,arm,0.0,1.0,albedo,0.861236
2024-05-27,12:21,arm,5.0,1.0,incident_solar_W_m2,516.260000
2024-05-27,12:21,arm,5.0,1.0,reflected_solar_W_m2,445.410000
...,...,...,...,...,...,...
2024-06-10,15:00,arm,200.0,1.0,incident_solar_W_m2,708.820000
2024-06-10,15:00,arm,200.0,1.0,reflected_solar_W_m2,463.450000
2024-06-14,13:12,beo,200.0,1.0,incident_solar_W_m2,517.590000
2024-06-17,13:01,arm,200.0,1.0,incident_solar_W_m2,305.410000


In [30]:
df_line['timestamp_akdt'].dt.strftime('%H:%M')

0       16:03
1       16:03
2       16:03
3       13:06
4       13:06
        ...  
4216    14:30
4217    14:37
4218    13:32
4219    13:32
4220    13:32
Name: timestamp_akdt, Length: 4221, dtype: object

In [22]:
df_line['timestamp_akdt']

0      2024-04-17 16:03:00-08:00
1      2024-04-17 16:03:00-08:00
2      2024-04-17 16:03:00-08:00
3      2024-04-19 13:06:00-08:00
4      2024-04-19 13:06:00-08:00
                  ...           
4216   2024-06-17 14:30:00-08:00
4217   2024-06-17 14:37:00-08:00
4218   2024-06-18 13:32:00-08:00
4219   2024-06-18 13:32:00-08:00
4220   2024-06-18 13:32:00-08:00
Name: timestamp_akdt, Length: 4221, dtype: datetime64[ns, UTC-08:00]