# Finding Flat Plateaus in fDOM and turbidity
This will be used to label flat plateaus in the data.

NOTE: fDOM version not currently functional

In [None]:
# Imports
import pandas as pd
import math
import copy
import sys
sys.path.insert(1, '../')
from Tools import auxiliary_functions
import Tools.data_movement as dm
import Tools.data_processing as dp

# better print options for checking dataframes
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.precision', 10)

In [None]:
# load in data
fDOM_data = dm.read_in_preprocessed_timeseries('../Data/converted_data/julian_format/fDOM_raw_10.1.2011-9.4.2020.csv')
turb_data = dm.read_in_preprocessed_timeseries('../Data/converted_data/julian_format/turbidity_raw_10.1.2011_9.4.2020.csv')
stage_data = dm.read_in_preprocessed_timeseries('../Data/converted_data/julian_format/stage_10.1.11-1.1.19.csv')
stage_data = dp.align_stage_to_fDOM(fDOM_data, stage_data)

## fDOM Plateaus
We need to come up with a function that will find plateaus  
Zach wrote a rudimentary one, needs work

The dataframe this section produces, the index of the peak is the left base (AKA START OF PEAK)


### Get candidates

In [None]:
# Base function attempt
# FIXME: this does not correctly detect any
cands = auxiliary_functions.detect_flat_plat(fDOM_data, 100, 40)

indices = []
for i in range(cands.shape[0]):
    if cands[i] == 1:
        indices.append(i)


### Create dataframes

In [None]:
# Create dataframe
# get the beginning and ending of each plateau
last_val = -1
start_idx = -1
end_idx = -1

start_indices = []
end_indices = []

for idx, val in enumerate(indices):
    if val != last_val + 1:
        # we are now in a new peak, save stuff
        start_idx = val
        start_indices.append(start_idx)

        end_idx = last_val
        end_indices.append(end_idx)

    elif idx + 1 == len(indices):
        end_indices.append(val)

    # set last val
    last_val = val

# drop first index in end indices
del end_indices[0]

cands = [[]]
for i in range(len(start_indices)):
    cands.append([start_indices[i], start_indices[i], end_indices[i]])

# create dataframe
cands_df_fdom = pd.DataFrame(cands)
cands_df_fdom.columns = ["idx_of_peak", "left_base", "right_base"]

# drop first row
cands_df_fdom = cands_df_fdom.drop([0])

# set index
cands_df_fdom = cands_df_fdom.set_index("idx_of_peak")

# merge raw data with candidates on peak index
raw_fdom_df = pd.DataFrame(fDOM_data)
raw_fdom_df.columns = ["timestamp_of_peak", "value_of_peak"]
total_df_fdom = cands_df_fdom.join(raw_fdom_df)

# reset index
total_df_fdom = total_df_fdom.reset_index()

# drop unneedeed cols
del total_df_fdom['left_base']
del total_df_fdom['right_base']

total_df_fdom['label_of_peak'] = "NFPT" # set label to be not a flat plateau

total_df_fdom = total_df_fdom.reindex(columns=['timestamp_of_peak', 'value_of_peak', 'label_of_peak', "idx_of_peak"])

print(total_df_fdom)

### Create julian and datetime format df's

In [None]:
julian_fdom = copy.deepcopy(total_df_fdom)
datetime_fdom = copy.deepcopy(total_df_fdom)

for i, row in datetime_fdom.iterrows():
    jul_time = datetime_fdom.loc[i, "timestamp_of_peak"]

    dt = dp.julian_to_datetime(jul_time)
    dt = dt.isoformat()

    # set new time
    datetime_fdom.loc[i, "timestamp_of_peak"] = dt

print(julian_fdom)

## Turbidity Plateaus

### Get candidate list

In [None]:
# Base function attempt
cands = auxiliary_functions.detect_flat_plat(turb_data, 100, 40)

turb_flat_plat_indxs = []
for i in range(cands.shape[0]):
    if cands[i] == 1:
        turb_flat_plat_indxs.append(i)


### Create dataframes

In [None]:
# create dataframe
last_val = -1
start_idx = -1
end_idx = -1

start_indices = []
end_indices = []

for idx, val in enumerate(turb_flat_plat_indxs):
    if val != last_val + 1:
        # we are now in a new peak, save stuff
        start_idx = val
        start_indices.append(start_idx)

        end_idx = last_val
        end_indices.append(end_idx)

    elif idx + 1 == len(turb_flat_plat_indxs):
        end_indices.append(val)

    # set last val
    last_val = val

# drop first index in end indices
del end_indices[0]

cands = [[]]
for i in range(len(start_indices)):
    cands.append([start_indices[i], start_indices[i], end_indices[i]])

# create dataframe
cands_df_turb = pd.DataFrame(cands)
cands_df_turb.columns = ["idx_of_peak", "left_base", "right_base"]

# drop first row
cands_df_turb = cands_df_turb.drop([0])

# set index
cands_df_turb = cands_df_turb.set_index("idx_of_peak")

# merge raw data with candidates on peak index
raw_turb_df = pd.DataFrame(turb_data)
raw_turb_df.columns = ["timestamp_of_peak", "value_of_peak"]
total_df_turb = cands_df_turb.join(raw_turb_df)

# reset index
total_df_turb = total_df_turb.reset_index()

# drop unneedeed cols
del total_df_turb['left_base']
del total_df_turb['right_base']

total_df_turb['label_of_peak'] = "NFPT" # set label to be not a flat plateau

total_df_turb = total_df_turb.reindex(columns=['timestamp_of_peak', 'value_of_peak', 'label_of_peak', "idx_of_peak"])

print(total_df_turb)

### Create julian and datetime formats

In [None]:
julian_turb = copy.deepcopy(total_df_turb)
datetime_turb = copy.deepcopy(total_df_turb)

for i, row in datetime_turb.iterrows():
    jul_time = datetime_turb.loc[i, "timestamp_of_peak"]

    dt = dp.julian_to_datetime(jul_time)
    dt = dt.isoformat()

    # set new time
    datetime_turb.loc[i, "timestamp_of_peak"] = dt


## Output to CSV

In [None]:
# set index to be timestamp of peak

# fDOM
julian_fdom = julian_fdom.set_index("timestamp_of_peak")
datetime_fdom = datetime_fdom.set_index("timestamp_of_peak")

# turbidity
julian_turb = julian_turb.set_index("timestamp_of_peak")
datetime_turb = datetime_turb.set_index("timestamp_of_peak")

# set path
csv_path_julian_fdom = "../Data/labeled_data/ground_truths/fDOM/fDOM_FPT/julian_time/fDOM_FPT_0k-300k.csv"
csv_path_datetime_fdom = "../Data/labeled_data/ground_truths/fDOM/fDOM_FPT/datetime/fDOM_FPT_0k-300k.csv"

csv_path_julian_turb = "../Data/labeled_data/ground_truths/turb/turb_fpt/julian_time/turb_FPT_0k-300k_labeled.csv"
csv_path_datetime_turb = "../Data/labeled_data/ground_truths/turb/turb_fpt/datetime/turb_FPT_0k-300k_labeled.csv"

# write to csv 
# NOTE: commented out, as that would remove data
julian_fdom.to_csv(csv_path_julian_fdom)
datetime_fdom.to_csv(csv_path_datetime_fdom)

# julian_turb.to_csv(csv_path_julian_turb)
# #datetime_turb.to_csv(csv_path_datetime_turb)