In [68]:
# Import libraries and data 
import scipy.io as sio
import matplotlib.pyplot as plt
from scipy import ndimage
import copy
import pickle
import numpy as np
from scipy.signal import find_peaks
from os.path import dirname, join as pjoin
import datetime
import csv
import math
import sys
sys.path.insert(1,'../')
import Tools.data_processing as dp
import Tools.data_movement as dm 
from auxiliary_functions import get_candidates, detect_flat_plat, detect_stage_rises

fDOM_data = dm.read_in_preprocessed_timeseries('../Data/converted_data/julian_format/fDOM_raw_10.1.2011-9.4.2020.csv')
stage_data = dm.read_in_preprocessed_timeseries('../Data/converted_data/julian_format/stage_10.1.11-1.1.19.csv')
turb_data = dm.read_in_preprocessed_timeseries('../Data/converted_data/julian_format/turbidity_raw_10.1.2011_9.4.2020.csv')
stage_data = dp.align_stage_to_fDOM(fDOM_data, stage_data)

In [58]:
# Get stage rises
s_indices = detect_stage_rises(stage_data[:,1])

# Process stage rises so that each index displays distance to next stage rise in positive and negative direction
y = s_indices.shape[0] -1 
s_indexed = np.zeros((s_indices.shape[0],2))
x_count = -1 
y_count = -1
for x in range(s_indices.shape[0]):
    # X Block 
    
    # When x encounters first stage rise, start x counter
    if x_count == -1 and s_indices[x] == 1:
        x_count = 0
    if x_count != -1:
        if s_indices[x] == 1:
            x_count = 0
            s_indexed[x,0] = x_count
        else:
            x_count += 1
            s_indexed[x,0] = x_count
    else:
        s_indexed[x,0] = -1
            
    # Y Block
    if y_count == -1 and s_indices[y] == 1:
        y_count = 0
    if y_count != -1:
        if s_indices[y] == 1:
            y_count = 0
            s_indexed[y,1] = y_count
        else:
            y_count += 1
            s_indexed[y,1] = y_count
    else: 
        s_indexed[y,1] = -1
        
    y-=1

# Sharp peaks algo

# Manually determine some generous ranges for prominence, width, wlen, possibly threshold, possibly distance to segment out fluctuations, rel_height for basewidth
# Prominence range: (2,None), rel_height = 1.0 (maybe .9 but not .5), width: (None, 20), wlen = 100 (to save on efficieny), distance = 5 (this will prevent fluctuations from being detected)
# For distance we also have to consider that a sharp peak could occur mid event? 
# Threshold will not be useful it seems like
prominence_range = [5,None]
width_range = [None,None]
wlen = 100
distance = 1
rel_height =.6

# data = fDOM_raw_data[:100000][:,1]

# Get list of all peaks that could possibly be sharp peaks
fDOM_peaks, fDOM_props = find_peaks(fDOM_data[:,1],
                          height = (None, None),
                          threshold = (None,None),
                          distance = distance,
                          prominence = prominence_range,
                          width = width_range,
                          wlen = wlen,
                          rel_height = rel_height)



In [59]:
fDOM_cands = [[peak, math.floor(fDOM_props['left_ips'][i]), math.ceil(fDOM_props['right_ips'][i]),s_indexed[peak,0], s_indexed[peak,1] ,fDOM_props['prominences'][i]] for i,peak in enumerate(fDOM_peaks)]
print(len(fDOM_cands))

637


In [55]:
# Filter out skyrocketing peak candidates whose left_ips or right_ips are also a plummeting peak, as we don't want to detect these
removed = []
not_removed = []
for cand in fDOM_cands: 
    if cand[1] in possible_plum_peaks or cand[2] in possible_plum_peaks:
        removed.append(cand)
    elif cand[1]+1 in possible_plum_peaks or cand[2]+1 in possible_plum_peaks:
        removed.append(cand)
    elif cand[1]-1 in possible_plum_peaks or cand[2]-1 in possible_plum_peaks:
        removed.append(cand)
    else:
        not_removed.append(cand)
        
print(len(removed))
fDOM_cands = not_removed
print(len(not_removed))

79
24


In [66]:
turb_cand_params = {'prom' : [6,None],
                    'width': [None, None],
                    'wlen' : 200,
                    'dist' : 1,
                    'rel_h': .6}

# Get fDOM and turb candiate peaks
turb_peaks, turb_props = get_candidates(turb_data, turb_cand_params)

# Remove peaks that occur during a flat plateau 
turb_flat_plat = detect_flat_plat(turb_data, 100, 40)
turb_flat_plat_indxs = []
for i in range(turb_flat_plat.shape[0]):
    if turb_flat_plat[i] == 1:
        turb_flat_plat_indxs.append(i)

take_indices = []
for i,peak in enumerate(turb_peaks):
    if peak not in turb_flat_plat_indxs:
        take_indices.append(i)

turb_peaks = np.take(turb_peaks, take_indices)
for key in turb_props:
    turb_props[key] = np.take(turb_props[key], take_indices)

# Iterate through peaks and turn into short 3 point "events" by flagging the data point to either side of a peak
fDOM_events = []
fDOM_lb = []
fDOM_rb = []

for i,cand in enumerate(fDOM_cands):
            fDOM_events.append(np.array((fDOM_data[cand[0]])))
            fDOM_lb.append(fDOM_data[math.floor(cand[1]),0])
            fDOM_rb.append(fDOM_data[math.ceil(cand[2]),0])
            
fDOM_lb = list(set(fDOM_lb))
fDOM_lb.sort()
fDOM_rb = list(set(fDOM_rb))
fDOM_rb.sort()

turb_events = []
turb_lb = []
turb_rb = []
for i,peak in enumerate(turb_peaks):
            turb_events.append(np.array((turb_data[peak])))
            turb_lb.append(turb_data[math.floor(turb_props['left_ips'][i]),0])
            turb_rb.append(turb_data[math.ceil(turb_props['right_ips'][i]),0])
            
turb_lb = list(set(turb_lb))
turb_lb.sort()
turb_rb = list(set(turb_rb))
turb_rb.sort()            

fDOM_merged = dp.merge_data(fDOM_data, fDOM_events, 'not_sky_peak', '')
turb_merged = dp.merge_data(turb_data, turb_events, 't_opp', '')

fDOM_merged = dp.merge_additional_data(fDOM_merged, fDOM_lb, 'left_base')
fDOM_merged = dp.merge_additional_data(fDOM_merged, fDOM_rb, 'right_base')

turb_merged = dp.merge_additional_data(turb_merged, turb_lb, 'left_base')
turb_merged = dp.merge_additional_data(turb_merged, turb_rb, 'right_base')


stage_edge_data = dp.stage_rises_to_data(s_indices, stage_data)
stage_data_merged = dp.merge_data(stage_data, stage_edge_data, 'rise','')

dm.write_data_to_trainset(fDOM_merged,
                          stage_data_merged,
                          turb_merged,
                          '../Data/temp_plotting/fDOM_sky_0k-100k.csv',
                          True,
                          True,
                          0,
                          100000)

In [None]:
def detect_sky_peaks_fDOM(candidates: list[np.ndarray], 
                            properties: list, prom_range: 
                            list[float], base_width_range: 
                            list[float]) -> list[list[list]]:
    """
    Detect fDOM sharp peak anomalies from given set of possible candidates, given hyper parameters 
    """
    sharp_peaks = []
    not_sharp_peaks = []
    
    for i, peak in candidates:
        # Uses bases to calculate base_width. Or just use width. Is width a float or in
        base_width = properties['right_bases'][i] - properties['left_bases'][i]
        base_width = properties['widths'][i]
        prominence = properties['prominences'][i]
        start_timestamp = data[properties['left_bases'][i],0]
        end_timestamp = data[properties['right_bases'][i],0]
        peak_timestamp = data[peak][0]
        
        # Check if base_width violates
        if ((base_width > base_width_range[0] and base_width < base_width_range[1])
            and prominence > prom_range[0] and prominence < prom_range[1]):
            
            # Append [start, end, peak, label]
            sharp_peaks.append([start_timestamp,end_timestamp,peak_timestamp,'sharp_peak_fDOM'])
            
        else: 
            not_sharp_peaks.append([start_timestamp,end_timestamp,peak_timestamp,'rejected_sharp_peak_fDOM'])
    return [sharp_peaks, not_sharp_peaks]

In [28]:
# flip timeseries 
flipped_fDOM = np.array(fDOM_data)
flipped_fDOM = dp.flip_timeseries(flipped_fDOM)

# get candidates with find_peaks 
prominence_range_plum = [3,None]
width_range_plum = [None,5]
wlen_plum = 100
distance_plum = 1 
rel_height_plum =.6

# data = fDOM_raw_data[:100000][:,1]
data = flipped_fDOM[:,1]

# Get list of all peaks that could possibly be plummeting peaks
possible_plum_peaks, props = find_peaks(data,
                          height = (None, None),
                          threshold = (None,None),
                          distance = distance_plum,
                          prominence = prominence_range_plum,
                          width = width_range_plum,
                          wlen = wlen_plum,
                          rel_height = rel_height_plum)

fDOM_cands = [[peak, math.floor(props['left_ips'][i]), math.ceil(props['right_ips'][i]),s_indexed[peak,0], s_indexed[peak,1] ,props['prominences'][i]] for i,peak in enumerate(possible_plum_peaks)]

# get list of turb_peaks: these turb peaks look similar to skyrocketing peaks in fDOM. 
# prominence_range_turb = [2,None]
# width_range_turb = [None,10]
# wlen_turb = 100
# distance_turb = 1 
# rel_height_turb =.8

# data = fDOM_raw_data[:100000][:,1]

# # Get list of all peaks that could possibly be sharp peaks
# turb_peaks, props = find_peaks(data,
#                           height = (None, None),
#                           threshold = (None,None),
#                           distance = distance_turb,
#                           prominence = prominence_range_turb,
#                           width = width_range_turb,
#                           wlen = wlen_turb,
#                           rel_height = rel_height_turb)

  possible_plum_peaks, props = find_peaks(data,


In [29]:
turb_cand_params = {'prom' : [6,None],
                    'width': [None, None],
                    'wlen' : 200,
                    'dist' : 1,
                    'rel_h': .6}

# Get fDOM and turb candiate peaks
turb_peaks, turb_props = get_candidates(turb_data, turb_cand_params)

# Remove peaks that occur during a flat plateau 
turb_flat_plat = detect_flat_plat(turb_data, 100, 40)
turb_flat_plat_indxs = []
for i in range(turb_flat_plat.shape[0]):
    if turb_flat_plat[i] == 1:
        turb_flat_plat_indxs.append(i)

take_indices = []
for i,peak in enumerate(turb_peaks):
    if peak not in turb_flat_plat_indxs:
        take_indices.append(i)

turb_peaks = np.take(turb_peaks, take_indices)
for key in turb_props:
    turb_props[key] = np.take(turb_props[key], take_indices)

# Iterate through peaks and turn into short 3 point "events" by flagging the data point to either side of a peak
fDOM_events = []
fDOM_lb = []
fDOM_rb = []

for i,cand in enumerate(fDOM_cands):
            fDOM_events.append(np.array((flipped_fDOM[cand[0]])))
            fDOM_lb.append(flipped_fDOM[math.floor(cand[1]),0])
            fDOM_rb.append(flipped_fDOM[math.ceil(cand[2]),0])
            
fDOM_lb = list(set(fDOM_lb))
fDOM_lb.sort()
fDOM_rb = list(set(fDOM_rb))
fDOM_rb.sort()

turb_events = []
turb_lb = []
turb_rb = []
for i,peak in enumerate(turb_peaks):
            turb_events.append(np.array((turb_data[peak])))
            turb_lb.append(turb_data[math.floor(turb_props['left_ips'][i]),0])
            turb_rb.append(turb_data[math.ceil(turb_props['right_ips'][i]),0])
            
turb_lb = list(set(turb_lb))
turb_lb.sort()
turb_rb = list(set(turb_rb))
turb_rb.sort()            

fDOM_merged = dp.merge_data(flipped_fDOM, fDOM_events, 'f_opp', '')
turb_merged = dp.merge_data(turb_data, turb_events, 't_opp', '')

fDOM_merged = dp.merge_additional_data(fDOM_merged, fDOM_lb, 'left_base')
fDOM_merged = dp.merge_additional_data(fDOM_merged, fDOM_rb, 'right_base')

turb_merged = dp.merge_additional_data(turb_merged, turb_lb, 'left_base')
turb_merged = dp.merge_additional_data(turb_merged, turb_rb, 'right_base')


stage_edge_data = dp.stage_rises_to_data(s_indices, stage_data)
stage_data_merged = dp.merge_data(stage_data, stage_edge_data, 'rise','')

dm.write_data_to_trainset(fDOM_merged,
                          stage_data_merged,
                          dp.merge_data(fDOM_data, [], '',''),
                          '../Data/temp_plotting/fDOM_plum_0k-100k.csv',
                          True,
                          True,
                          0,
                          100000)

In [None]:
julian_hour = 0.04166666651144624
interference_hour_range = [julian_hour * 2]
prom_range = []
base_width_range = []

plummeting_peaks, not_plummeting_peaks = detect_plum_peaks_fDOM(possible_plum_peaks,
                                                                turb_peaks,
                                                                interference_hour_range,
                                                                props)

In [None]:
# get turb peaks 
def detect_plum_peaks_fDOM(candidates: list[np.ndarray],
                           turb_peaks: list[list[list]],
                           interference_hour_range: list[int]
                           properties: list, 
                           prom_range: list[float], 
                           base_width_range: list[float]):
    """
    Detect fDOM plummeting peak anomalies from given set of possible candidates, given hyper parameters and turb peaks
    """
    plum_peaks = []
    not_plum_peaks = []
    interference_peaks = [] # label these so we can see if it is rejecting correctly
    
    for i, peak in candidates:
        # Uses bases to calculate base_width. Or just use width. Is width a float or in
        base_width = properties['right_bases'][i] - properties['left_bases'][i]
        base_width = properties['widths'][i]
        prominence = properties['prominences'][i]
        start_timestamp = data[properties['left_bases'][i],0]
        end_timestamp = data[properties['right_bases'][i],0]
        peak_timestamp = data[peak][0]
        
        # Check if base_width violates
        if ((base_width > base_width_range[0] and base_width < base_width_range[1])
            and prominence > prom_range[0] and prominence < prom_range[1]):
            
            # Check for interference
            for peak in turb_peaks: 
                # Check if fDOM peak is within x hours of turb peak
                if condition: 
                    # Append to interference_peaks
                    break
            # Append [start, end, peak, label]
            plum_peaks.append([start_timestamp,end_timestamp,peak_timestamp,'plum_peak_fDOM'])
            
        else: 
            not_sharp_peaks.append([start_timestamp,end_timestamp,peak_timestamp,'rejected_plum_peak_fDOM'])
    return sharp_peaks, not_sharp_peaks
    
    pass

In [None]:
plum_peaks = []
def filter_fDOM_cands(peaks, props,fDOM_data):
    """
    Filter out candidates with an extremely small ending slope 
    Filter out candidates whose start and end occur on a plummenting peak/interference
    """
    take_indices = []
    
    end_thresh = .2
    for i, peak in enumerate(peaks): 
        if peak > 100000:
            break
        end_run = abs(peak - math.ceil(props['right_ips'][i]))
        end_rise = abs(fDOM_data[peak,1] - fDOM_data[math.ceil(props['right_ips'][i]),1])  
        end_slope = end_rise/end_run

        if end_slope < end_thresh:
            print('Date: {} ES: {}  Prom: {}'.format(dp.julian_to_datetime(fDOM_data[peak,0]), round(end_slope,3),round(props['prominences'][i],2)))
            print('Left IPS: {}'.format(dp.julian_to_datetime(fDOM_data[math.ceil(props['right_ips'][i]),0])))
            print('\n')
        
        if math.ceil(props['right_ips'][i]) in plum_peaks and math.floor(props['left_ips'][i]) in plum_peaks:
            print('DQP: Data: {}'.format(dp.julian_to_datetime(fDOM_data[peak,0])))
        
filter_fDOM_cands(fDOM_peaks, fDOM_props, fDOM_raw_data)

flipped_fDOM = np.array(fDOM_raw_data)
flipped_fDOM = dp.flip_timeseries(flipped_fDOM)