# Finding Turbidity Skyrocketing Peaks
Using the same technique as in fDOM, but for turbidity, the change is a higher minimum prominence.

In [73]:
import scipy.io as sio
from scipy.signal import find_peaks
import pandas as pd
import numpy as np
import copy
import sys
sys.path.insert(1,'../')
from Tools.auxiliary_functions import get_candidates, detect_flat_plat, detect_stage_rises
import Tools.data_processing as dp
import Tools.data_movement as dm
import math

# better print options for checking dataframes
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.precision', 10)

## Load in Data

In [74]:
turb_data = dm.read_in_preprocessed_timeseries('../Data/converted_data/julian_format/turbidity_raw_10.1.2011_9.4.2020.csv')

## Collect Candidate Peaks

In [75]:
prominence_range = [20, None]  # higher than that of fDOM
width_range = [None, None]
wlen = 100
distance = 1
rel_height = 0.6

# Get list of all peaks that could possibly be plummeting peaks
peaks, props = find_peaks(
    turb_data[:, 1],
    height=(None, None),
    threshold=(None, None),
    distance=distance,
    prominence=prominence_range,
    width=width_range,
    wlen=wlen,
    rel_height=rel_height,
)

# Form candidate set from returned information
cands = [
    [
        peak,
        math.floor(props["left_ips"][i]),
        math.ceil(props["right_ips"][i]),
        props["prominences"][i],
    ]
    for i, peak in enumerate(peaks)
]

cands_df = pd.DataFrame(cands)
cands_df.columns = ["idx_of_peak", "left_ips", "right_ips", "prominence"]

cands_df = cands_df.set_index("idx_of_peak")

# print(cands_df)

## Merge raw data with candidates on peak index

In [76]:
# create dataframe of raw turb data
raw_turb_df = pd.DataFrame(turb_data)
raw_turb_df.columns = ["timestamp_of_peak", "value_of_peak"]

# match on index of peak
total_df = cands_df.join(raw_turb_df)

# reset index to re-add idx of peak column
total_df = total_df.reset_index()

# drop left ips, right ips and prominence
del total_df['left_ips']
del total_df['right_ips']
del total_df['prominence']

# set default label value for all to be NSKP
total_df['label_of_peak'] = "NSKP"

# reorder cols
total_df = total_df.reindex(columns=["timestamp_of_peak", "value_of_peak", "label_of_peak", "idx_of_peak"])

# print final df
# print(total_df)


## Create julian and datetime formats

In [None]:
# rename to julian time df
final_julian_df = copy.deepcopy(total_df)

# create datetime df
final_datetime_df = copy.deepcopy(total_df)

# convert julian times to datetimes


## Output to CSV

In [77]:
# set df index to be timestamp of peak
final_julian_df = final_julian_df.set_index("timestamp_of_peak")
final_datetime_df = final_datetime_df.set_index("timestamp_of_peak")

# set path
csv_path_julian = "../Data/labeled_data/ground_truths/turb/turb_skp/julian_time/turb_skp_0k-300k.csv"
csv_path_datetime = "../Data/labeled_data/ground_truths/turb/turb_skp/datetime/turb_skp_0k-300k.csv"

# write to csv
final_julian_df.to_csv(csv_path_julian)
final_datetime_df.to_csv(csv_path_datetime)