# Finding Turbidity Skyrocketing Peaks
Using the same technique as in fDOM, but for turbidity, the change is a higher minimum prominence.

In [2]:
from scipy.signal import find_peaks
import pandas as pd
import copy
import sys
sys.path.insert(1,'../')
import Tools.data_processing as dp
import Tools.data_movement as dm
import math

# better print options for checking dataframes
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.precision', 10)

## Load in Data

In [3]:
turb_data = dm.read_in_preprocessed_timeseries('../Data/converted_data/julian_format/turbidity_raw_10.1.2011_9.4.2020.csv')

## Collect Candidate Peaks

In [8]:
prominence_range = [20, None]  # higher than that of fDOM
width_range = [None, None]
wlen = 100
distance = 1
rel_height = 0.6

# Get list of all peaks that could possibly be plummeting peaks
peaks, props = find_peaks(
    turb_data[:, 1],
    height=(None, None),
    threshold=(None, None),
    distance=distance,
    prominence=prominence_range,
    width=width_range,
    wlen=wlen,
    rel_height=rel_height,
)

# Form candidate set from returned information
cands = [
    [
        peak,
        math.floor(props["left_ips"][i]),
        math.ceil(props["right_ips"][i]),
        props["prominences"][i],
    ]
    for i, peak in enumerate(peaks)
]

cands_df = pd.DataFrame(cands)
cands_df.columns = ["idx_of_peak", "left_ips", "right_ips", "prominence"]

cands_df = cands_df.set_index("idx_of_peak")

print(cands_df)

             left_ips  right_ips     prominence   
idx_of_peak                                       
1988            1985      1992       20.3451752571
2084            2082      2087      108.1853868210
3270            3269      3272      235.0074983077
3295            3293      3297       29.2668226460
3300            3298      3302      139.7718154542
3710            3706      3717       31.4790647307
5911            5909      5914      185.6173543208
6601            6600      6605       54.9536534492
6783            6781      6785      300.8945310791
6799            6797      6801       43.7464275010
7831            7830      7836       39.6955474895
8642            8640      8644      186.9500093393
9879            9876      9883       25.9433387068
10238          10237     10242      100.8416458887
10276          10274     10277      171.9408400987
10323          10321     10326       51.0551459178
12714          12713     12715      230.2972147324
12720          12719     12722 

## Merge raw data with candidates on peak index

In [5]:
# create dataframe of raw turb data
raw_turb_df = pd.DataFrame(turb_data)
raw_turb_df.columns = ["timestamp_of_peak", "value_of_peak"]

# match on index of peak
total_df = cands_df.join(raw_turb_df)

# reset index to re-add idx of peak column
total_df = total_df.reset_index()

# drop left ips, right ips and prominence
del total_df['left_ips']
del total_df['right_ips']
del total_df['prominence']

# set default label value for all to be NSKP
total_df['label_of_peak'] = "NSKP"

# reorder cols
total_df = total_df.reindex(columns=["timestamp_of_peak", "value_of_peak", "label_of_peak", "idx_of_peak"])

# print final df
# print(total_df)


## Create julian and datetime formats

In [6]:
# rename to julian time df
final_julian_df = copy.deepcopy(total_df)

# create datetime df
final_datetime_df = copy.deepcopy(total_df)

# convert julian times to datetimes

for i, row in final_datetime_df.iterrows():
    # convert float to datetime
    jul_time = final_datetime_df.loc[i, "timestamp_of_peak"]

    dt = dp.julian_to_datetime(jul_time)
    dt = dt.isoformat()

    # set new time
    final_datetime_df.loc[i, "timestamp_of_peak"] = dt

## Output to CSV
Commented out, as this will overwrite already labeled data.

In [7]:
# # set df index to be timestamp of peak
# final_julian_df = final_julian_df.set_index("timestamp_of_peak")
# final_datetime_df = final_datetime_df.set_index("timestamp_of_peak")

# # set path
# csv_path_julian = "../Data/labeled_data/ground_truths/turb/turb_skp/julian_time/turb_SKP_0k-300k_labeled.csv"
# csv_path_datetime = "../Data/labeled_data/ground_truths/turb/turb_skp/datetime/turb_SKP_0k-300k_labeled.csv"

# # write to csv
# final_julian_df.to_csv(csv_path_julian)
# final_datetime_df.to_csv(csv_path_datetime)