In [1]:
import scipy.io as sio
from scipy.signal import find_peaks
import pandas as pd
import numpy as np
import copy
import sys
sys.path.insert(1,'../')
from Tools.auxiliary_functions import get_candidates, detect_flat_plat, detect_stage_rises
import Tools.data_processing as dp
import Tools.data_movement as dm
import math

# better print options for checking dataframes
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.precision', 10)

In [5]:
turb_data = dm.read_in_preprocessed_timeseries('../Data/converted_data/julian_format/turbidity_raw_10.1.2011_9.4.2020.csv')

## Turbidity PLP Peaks
Use what we have for fDOM to find turb peaks

In [6]:
# flip time series
flipped_turb = dp.flip_timeseries(copy.deepcopy(turb_data))

# get plummeting peak cand set
# TODO: modify these params
prominence_range = [3, None]  # peaks must have at least prominence 3
width_range = [None, 10]  # peaks cannot have a base width of more than 5
wlen = 100
distance = 1
rel_height = 0.6

peaks, props = find_peaks(
    flipped_turb[
        :,1
    ],
    height=(None, None),
    threshold=(None, None),
    distance=distance,
    prominence=prominence_range,
    width=width_range,
    wlen=wlen,
    rel_height=rel_height,
)

cands = [
    [
        peak,
        math.floor(props["left_ips"][i]),
        math.ceil(props["right_ips"][i]),
        props["prominences"][i],
    ]
    for i, peak in enumerate(peaks)
]

cands_df = pd.DataFrame(cands)
cands_df.columns = ["idx_of_peak", "left_ips", "right_ips", "prominence"]
cands_df = cands_df.set_index("idx_of_peak")

print(cands_df)


             left_ips  right_ips  prominence
idx_of_peak                                 
1983             1976       1987   13.722430
3275             3273       3276   13.846119
3298             3296       3299   29.266823
3713             3712       3714    3.140680
5922             5918       5924    5.351676
...               ...        ...         ...
228613         228611     228615    5.543963
228623         228621     228624    3.402510
228625         228624     228626    4.121580
228682         228680     228684    5.227082
228701         228698     228702    4.457855

[924 rows x 3 columns]


  peaks, props = find_peaks(


In [7]:
raw_turb_df = pd.DataFrame(turb_data)
raw_turb_df.columns = ["timestamp_of_peak", "value_of_peak"]

total_df = cands_df.join(raw_turb_df)

total_df = total_df.reset_index()

del total_df['left_ips']
del total_df['right_ips']
del total_df['prominence']

# set default label value for all to be NPLP
total_df['label_of_peak'] = "NPLP"

# reorder cols
total_df = total_df.reindex(columns=["timestamp_of_peak", "value_of_peak", "label_of_peak", "idx_of_peak"])

In [8]:
# rename to julian time df
final_julian_df = copy.deepcopy(total_df)

# create datetime df
final_datetime_df = copy.deepcopy(total_df)

# convert julian times to datetimes

for i, row in final_datetime_df.iterrows():
    # convert float to datetime
    jul_time = final_datetime_df.loc[i, "timestamp_of_peak"]

    dt = dp.julian_to_datetime(jul_time)
    dt = dt.isoformat()

    # set new time
    final_datetime_df.loc[i, "timestamp_of_peak"] = dt

In [None]:
# set df index to be timestamp of peak
final_julian_df = final_julian_df.set_index("timestamp_of_peak")
final_datetime_df = final_datetime_df.set_index("timestamp_of_peak")

# set path
csv_path_julian = "../Data/labeled_data/ground_truths/turb/turb_plp/julian_time/turb_PLP_0k-300k_labeled.csv"
csv_path_datetime = "../Data/labeled_data/ground_truths/turb/turb_plp/datetime/turb_PLP_0k-300k_labeled.csv"

# write to csv
final_julian_df.to_csv(csv_path_julian)
final_datetime_df.to_csv(csv_path_datetime)