# Augmenting time-series data
In this file, the data is augmented in order to create more of it.

In [None]:
# Imports
import sys
sys.path.insert(1,'../')
import Tools.data_processing as dp
import Tools.data_movement as dm 
import pandas as pd
from datetime import datetime
from datetime import timedelta
import copy
import random
import matplotlib.pyplot as plt

## Helper Functions
The following functions provide useful tools for the augmentation process

In [None]:
def next_time_entry(current_entry: float) -> float:
    """
    This function returns the next time entry in julian time

    current_entry: a julina time float

    return: julian time + 15 minutes from past julian time
    """

    # convert julian to datetime
    date_time_init = dp.julian_to_datetime(current_entry)

    # find next date time (add 15 minutes)
    next_entry = date_time_init + timedelta(minutes=15)

    # convert date time to julian time
    final_julian_time = dp.datetime_to_julian(next_entry)

    # return julian time
    return final_julian_time


def reindex_augmented_data(data: pd.DataFrame, datatype: str) -> pd.DataFrame:
    """
    Reindex the augmented data so there are no overlaps

    data: the data to reindex
    datatype: fdom, turb, or stage

    returns: reindexed data
    """
    pass


def get_last_augment_index(dataframe) -> int:
    """
    Collects the last index of the augmented time series
    """
    return dataframe.shape[0]


## Loading in data
The knowledge-based approach uses the data in `Data/converted_data/julian_format/`, so that is where the data augmentation will go.

In [None]:
# Read in raw data
fDOM_data = dm.read_in_preprocessed_timeseries(
    "../Data/converted_data/julian_format/fDOM_raw_10.1.2011-9.4.2020.csv"
)
stage_data = dm.read_in_preprocessed_timeseries(
    "../Data/converted_data/julian_format/stage_10.1.11-1.1.19.csv"
)
turb_data = dm.read_in_preprocessed_timeseries(
    "../Data/converted_data/julian_format/turbidity_raw_10.1.2011_9.4.2020.csv"
)

# align stage to fDOM
stage_data = dp.align_stage_to_fDOM(fDOM_data, stage_data)

# read in labeled fDOM
fDOM_labeled = pd.read_csv(
    "../Data/labeled_data/ground_truths/fDOM/fDOM_all_julian_0k-300k.csv"
)

# read in labeled turb
turb_labeled = pd.read_csv(
    "../Data/labeled_data/ground_truths/turb/turb_pp/julian_time/turb_pp_0k-300k_labeled.csv"
)

# New data folder:
AUGMENT_DATA_PATH = "../Data/augmented_data/julian_format/"


In [None]:
# Convert data into pandas dataframes for better indexing:
fDOM_raw = pd.DataFrame(fDOM_data)
fDOM_raw.columns = ["timestamp", "value"]

print(fDOM_raw.iloc[9399])

turb_raw = pd.DataFrame(turb_data)
turb_raw.columns = ["timestamp", "value"]

stage_raw = pd.DataFrame(stage_data)
stage_raw.columns = ["timestamp", "value"]

In [None]:
"""
Visualize data
"""
fig = plt.figure()
x = fDOM_raw['timestamp']
y = turb_raw['value']


line_fdom = plt.Line2D(fDOM_raw['timestamp'], fDOM_raw['value'])
line_turb = plt.Line2D(turb_raw['timestamp'], turb_raw['value'], color='red')
line_stage = plt.Line2D(stage_raw['timestamp'], stage_raw['value'], color='orange')
ax = fig.add_subplot(111)

ax.add_line(line_fdom)
ax.add_line(line_turb)
ax.add_line(line_stage)
ax.set_xlim(min(x), max(x))
ax.set_ylim(min(y) - 10, max(y) + 10)

plt.show()

## Augmenting Data
We will augment data for each type of peak, and for each measurement.

Starting with fDOM:
1. PLP (plummeting peak)
2. PP (phantom peak)
3. SKP (skyrocketing peak)

TODO: augment more peak types when they are labeled

With turbidity:
1. PP

### Constants
We define two constants for use with augmenting the data:
1. `TIME_RANGE_INIT`: the number of points before and after the relative peak that we take data from
2. `ITERATIONS`: the number of times we loop over the list of anomaly peaks, and augment them
3. `STARTING_TIMESTAMP`: the timestamp to start all augmented data at. The default value is 15 minutes after the last data measurement from the original set up data given to the project devs. As of 2/16/22, this timestamp is correct.

In [None]:
""" Helpful constants """
TIME_RANGE_INIT = 5  # the base distance of points to add between the peaks
ITERATIONS = 3 # number of times to loop over dataset and augment
STARTING_TIMESTAMP = 2459096.9583333335

### Augmenting fDOM

In [None]:
#####################################################################################
#                                                                                   #
#                               DATAFRAME SETUP SECTION                             #
#                                                                                   #
#####################################################################################

# new dataframes for augmented labeled/raw fDOM
augmented_fDOM_raw = pd.DataFrame(columns=["timestamp", "value"])
augmented_fDOM_labeled = pd.DataFrame(
    columns=["timestamp_of_peak", "value_of_peak", "label_of_peak", "idx_of_peak"]
)

# dataframe for augmented stage
augmented_stage_raw_fdom = pd.DataFrame(columns=["timestamp", "value"])

# dataframes for augmented raw/labeled turbidity
augmented_turb_raw_fdom = pd.DataFrame(columns=["timestamp", "value"])
augmented_turb_labeled_fdom = pd.DataFrame(
    columns=["timestamp_of_peak", "value_of_peak", "label_of_peak", "idx_of_peak"]
)

# variable to keep the last entry in the dataframe for stage
# defaults to the last entry that was in fdom/turb raw csv files, in julian format
prev_added_entry = STARTING_TIMESTAMP

# a list of peaks that don't align with the fDOM raw file that was aligned with stage
# i believe its just peaks that don't align with stage in general for whatever reason
missed_fDOM_peaks = []

#####################################################################################
#                                                                                   #
#                               AUGMENT DATA SECTION                                #
#                                                                                   #
#####################################################################################

for iteration in range(ITERATIONS):
    """
    Re-sample the fDOM labeled peaks to add variance to data
    """
    # labeled fDOM peaks
    fdom_anon_peaks = fDOM_labeled[fDOM_labeled["label_of_peak"] != "NAP"]
    # randomize the order, to add more
    fdom_anon_peaks = fdom_anon_peaks.sample(frac=1).reset_index(
        drop=True
    )  # reset index as values were removed

    # iterate over each peak
    for i, row in fdom_anon_peaks.iterrows():
        # check to see if any overlap occurs between peaks
        prev_dist = TIME_RANGE_INIT
        next_dist = TIME_RANGE_INIT

        # Get raw fDOM data points
        timestamp_of_peak = fdom_anon_peaks.loc[i, "timestamp_of_peak"]
        label_of_peak = fdom_anon_peaks.loc[i, "label_of_peak"]

        # get index dataframes of each type
        fdom_index_df = fDOM_raw[fDOM_raw["timestamp"] == timestamp_of_peak]
        stage_index_df = stage_raw[stage_raw["timestamp"] == timestamp_of_peak]
        turb_index_df = turb_raw[turb_raw["timestamp"] == timestamp_of_peak]

        if len(fdom_index_df.index.to_list()) != 0:
            # get indices of each data type from index df's
            index_of_peak = fdom_index_df.index.tolist()[0]
            stage_index = stage_index_df.index.tolist()[0]
            turb_index = turb_index_df.index.tolist()[0]

            # use this timestamp to make a dataframe of raw stuff
            # get data from fDOM_raw file
            fDOM_raw_time_range = pd.DataFrame(
                fDOM_raw.iloc[index_of_peak - prev_dist : index_of_peak + next_dist]
            )

            # get stage data range
            stage_time_range = pd.DataFrame(
                stage_raw.iloc[stage_index - prev_dist : stage_index + next_dist]
            )

            # get turbidity data range
            turb_time_range = pd.DataFrame(
                turb_raw.iloc[turb_index - prev_dist : turb_index + next_dist]
            )

            # make a copy of the modified data
            new_fdom_raw = copy.deepcopy(fDOM_raw_time_range)
            new_stage = copy.deepcopy(stage_time_range)
            new_turb_raw = copy.deepcopy(turb_time_range)

            # peak index can change when we add in x data
            new_fdom_peak_index = -1
            new_peak_timestamp = -1

            # generate a random number to multiply the peak by, +- 0.1
            # set seed
            random.seed()
            random_val = random.uniform(-0.1, 0.1)

            new_peak_val = new_fdom_raw.loc[index_of_peak, "value"] * (1 + random_val)

            new_fdom_raw.loc[index_of_peak, "value"] = new_peak_val

            # TODO insert necessary values into turb and stage

            # get the next possible timestamp
            new_time_entry = next_time_entry(prev_added_entry)

            # update all timestamps for augmented data
            for i, row in new_fdom_raw.iterrows():
                # if timestamps equal, we have the relative peak
                if new_fdom_raw.loc[i, "timestamp"] == timestamp_of_peak:
                    # register index here
                    new_fdom_peak_index = get_last_augment_index(augmented_fDOM_raw)
                    new_peak_timestamp = new_time_entry

                # update timestamps
                new_fdom_raw.loc[i, "timestamp"] = new_time_entry
                new_stage.loc[i, "timestamp"] = new_time_entry
                new_turb_raw.loc[i, "timestamp"] = new_time_entry

                # get next time stamp
                new_time_entry = next_time_entry(new_time_entry)

            # add entries into raw fDOM
            augmented_fDOM_raw = pd.concat(
                [augmented_fDOM_raw, new_fdom_raw], ignore_index=True
            )

            new_label = pd.DataFrame(
                [
                    [
                        new_peak_timestamp,
                        new_peak_val,
                        label_of_peak,
                        new_fdom_peak_index,
                    ]
                ],
                columns=[
                    "timestamp_of_peak",
                    "value_of_peak",
                    "label_of_peak",
                    "idx_of_peak",
                ],
            )

            # add entries to labeled fDOM
            augmented_fDOM_labeled = pd.concat([augmented_fDOM_labeled, new_label])

            # add entries to stage
            augmented_stage_raw_fdom = pd.concat(
                [augmented_stage_raw_fdom, new_stage], ignore_index=True
            )

            # add entries to turb
            augmented_turb_raw_fdom = pd.concat(
                [augmented_turb_raw_fdom, new_turb_raw], ignore_index=True
            )

            # update prev time entry
            prev_added_entry = new_time_entry

        else:
            # we missed some data points, append them to the missed data dataframe
            missed_fDOM_peaks.append(timestamp_of_peak)


In [None]:
"""
This cell allows you to print out the augmented dataframes in full
"""

# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_columns', None)
# pd.set_option('display.width', 1000)
# pd.set_option('display.colheader_justify', 'center')
# pd.set_option('display.precision', 3)

# print("Labeled Peaks Augmented")
# print(augmented_fDOM_labeled)
# print("\n")

# print("Raw fDOM Augmented")
# print(augmented_fDOM_raw)
# print("\n")

# print("Raw Stage Augmented")
# print(augmented_stage_raw_fdom)
# print("\n")

# print("Raw Turbidity Augmented")
# print(augmented_turb_raw_fdom)

In [None]:
"""
Visualize data with matplotlib
"""
fig = plt.figure()
x = augmented_turb_raw_fdom['timestamp']
y = augmented_turb_raw_fdom['value']

line_fdom = plt.Line2D(augmented_fDOM_raw['timestamp'], augmented_fDOM_raw['value'])
line_turb = plt.Line2D(augmented_turb_raw_fdom['timestamp'], augmented_turb_raw_fdom['value'], color='red')
line_stage = plt.Line2D(augmented_stage_raw_fdom['timestamp'], augmented_stage_raw_fdom['value'], color='orange')
ax = fig.add_subplot(111)

ax.add_line(line_fdom)
ax.add_line(line_turb)
ax.add_line(line_stage)
ax.set_xlim(min(x), max(x))
ax.set_ylim(min(y) - 10, max(y) + 10)

plt.show()

### Augment turbidity data
The following code blocks augment turbidity data.

In [None]:
#####################################################################################
#                                                                                   #
#                               DATAFRAME SETUP SECTION                             #
#                                                                                   #
#####################################################################################

# labeled turb
augmented_turb_labeled = pd.DataFrame(
    columns=["timestamp_of_peak", "value_of_peak", "label_of_peak", "idx_of_peak"]
)

# raw turb
augmented_turb_raw = pd.DataFrame(columns=["timestamp", "value"])

# raw fdom
augmented_fDOM_raw_turb = pd.DataFrame(columns=["timestamp", "value"])

# raw stage
augmented_stage_raw_turb = pd.DataFrame(columns=["timestamp", "value"])

# variable to keep the last entry in the dataframe for stage
# defaults to the last entry that was in fdom/turb raw csv files, in julian format
prev_added_entry = STARTING_TIMESTAMP

# a list of peaks that don't align with the fDOM raw file that was aligned with stage
# i believe its just peaks that don't align with stage in general for whatever reason
missed_turb_peaks = []

#####################################################################################
#                                                                                   #
#                               AUGMENT DATA SECTION                                #
#                                                                                   #
#####################################################################################

""" Augment turbidity data by calling previously written function """
for iteration in range(ITERATIONS):
    """ Resample turb labeled peaks at each iteration for more variance """
    # labeled turb peaks
    turb_anon_peaks = turb_labeled[turb_labeled["label_of_peak"] != "NPP"]
    turb_anon_peaks = turb_anon_peaks.sample(frac=1).reset_index()

    # iterate over each peak
    for i, row in turb_anon_peaks.iterrows():
        # check to see if any overlap occurs between peaks
        prev_dist = TIME_RANGE_INIT
        next_dist = TIME_RANGE_INIT

        """Get raw turb data points"""
        timestamp_of_peak = turb_anon_peaks.loc[i, "timestamp_of_peak"]
        label_of_peak = turb_anon_peaks.loc[i, "label_of_peak"]

        # get index dataframes of each type
        fdom_index_df = fDOM_raw[fDOM_raw["timestamp"] == timestamp_of_peak]
        stage_index_df = stage_raw[stage_raw["timestamp"] == timestamp_of_peak]
        turb_index_df = turb_raw[turb_raw["timestamp"] == timestamp_of_peak]

        if len(turb_index_df.index.to_list()) != 0:
            # get indices of each data type from index df's
            index_of_peak = turb_index_df.index.tolist()[0]
            stage_index = stage_index_df.index.tolist()[0]
            fdom_index = fdom_index_df.index.tolist()[0]

            # use this timestamp to make a dataframe of raw stuff
            # get data from fDOM_raw file
            fDOM_raw_time_range = pd.DataFrame(
                fDOM_raw.iloc[fdom_index - prev_dist : fdom_index + next_dist]
            )

            # get stage data range
            stage_time_range = pd.DataFrame(
                stage_raw.iloc[stage_index - prev_dist : stage_index + next_dist]
            )

            # get turbidity data range
            turb_time_range = pd.DataFrame(
                turb_raw.iloc[index_of_peak - prev_dist : index_of_peak + next_dist]
            )

            # make a copy of the modified data
            new_fdom_raw = copy.deepcopy(fDOM_raw_time_range)
            new_stage = copy.deepcopy(stage_time_range)
            new_turb_raw = copy.deepcopy(turb_time_range)

            # peak index can change when we add in x data
            new_fdom_peak_index = -1
            new_peak_timestamp = -1

            # generate a random number to multiply the peak by, +- 0.1
            # set seed
            random.seed()
            random_val = random.uniform(-0.1, 0.1)

            new_peak_val = new_turb_raw.loc[index_of_peak, "value"] * (1 + random_val)

            new_turb_raw.loc[index_of_peak, "value"] = new_peak_val

            # TODO insert necessary values into fdom and stage

            # get the next possible timestamp
            new_time_entry = next_time_entry(prev_added_entry)

            # update all timestamps for augmented data
            for i, row in new_turb_raw.iterrows():
                # if timestamps equal, we have the relative peak
                if new_turb_raw.loc[i, "timestamp"] == timestamp_of_peak:
                    # register index here
                    new_turb_peak_index = get_last_augment_index(augmented_turb_raw)
                    new_peak_timestamp = new_time_entry

                # update timestamps
                new_fdom_raw.loc[i, "timestamp"] = new_time_entry
                new_stage.loc[i, "timestamp"] = new_time_entry
                new_turb_raw.loc[i, "timestamp"] = new_time_entry

                # get next time stamp
                new_time_entry = next_time_entry(new_time_entry)

            # add entries to turb
            augmented_turb_raw = pd.concat(
                [augmented_turb_raw, new_turb_raw], ignore_index=True
            )

            new_label = pd.DataFrame(
                [
                    [
                        new_peak_timestamp,
                        new_peak_val,
                        label_of_peak,
                        new_turb_peak_index,
                    ]
                ],
                columns=[
                    "timestamp_of_peak",
                    "value_of_peak",
                    "label_of_peak",
                    "idx_of_peak",
                ],
            )

            # add entries to labeled turb
            augmented_turb_labeled = pd.concat([augmented_turb_labeled, new_label])

            # add entries to stage
            augmented_stage_raw_turb = pd.concat(
                [augmented_stage_raw_turb, new_stage], ignore_index=True
            )

            # add entries into raw fDOM
            augmented_fDOM_raw_turb = pd.concat(
                [augmented_fDOM_raw_turb, new_fdom_raw], ignore_index=True
            )

            # update prev time entry
            prev_added_entry = new_time_entry

        else:
            # we missed some data points, append them to the missed data dataframe
            missed_fDOM_peaks.append(timestamp_of_peak)


In [None]:
"""
This cell allows you to print out the augmented dataframes in full
"""

# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_columns', None)
# pd.set_option('display.width', 1000)
# pd.set_option('display.colheader_justify', 'center')
# pd.set_option('display.precision', 3)

# print("Labeled Peaks Augmented")
# print(augmented_turb_labeled)
# print("\n")

# print("Raw fDOM Augmented")
# print(augmented_fDOM_raw_turb)
# print("\n")

# print("Raw Stage Augmented")
# print(augmented_stage_raw_turb)
# print("\n")

# print("Raw Turbidity Augmented")
# print(augmented_turb_raw)

In [None]:
"""
Visualize data with matplotlib
"""
fig = plt.figure()
x = augmented_turb_raw['timestamp']
y = augmented_turb_raw['value']

line_fdom = plt.Line2D(augmented_fDOM_raw_turb['timestamp'], augmented_fDOM_raw_turb['value'])
line_turb = plt.Line2D(augmented_turb_raw['timestamp'], augmented_turb_raw['value'], color='red')
line_stage = plt.Line2D(augmented_stage_raw_turb['timestamp'], augmented_stage_raw_turb['value'], color='orange')
ax = fig.add_subplot(111)

ax.add_line(line_fdom)
ax.add_line(line_turb)
ax.add_line(line_stage)
ax.set_xlim(min(x), max(x))
ax.set_ylim(min(y) - 10, max(y) + 10)

plt.show()

## Move augmented data into csv files
The following codeblock creates csv files for the augmented data.

### NOTE ON DATA:
Due to the random sampling used when augmenting fDOM and turbidity,

In [None]:
""" Augmented Data Paths """
# trainset
trainset_fdom_path = "../Data/augmented_data/trainset_plotting/fdom/"
trainset_turb_path = "../Data/augmented_data/trainset_plotting/turb/"

# unlabeled data
unlabeled_fdom_path = "../Data/augmented_data/fdom/unlabeled/"
unlabeled_turb_path = "../Data/augmented_data/turb/unlabeled/"

# labeled data
labeled_fdom_path = "../Data/augmented_data/fdom/labeled/"
labeled_turb_path = "../Data/augmented_data/turb/labeled/"

In [30]:
def write_augmented_data_to_csv():
    # call to_csv for each dataframe
    # for each dataframe, we also drop the index

    # write fDOM augmented data
    augmented_fDOM_labeled.to_csv(labeled_fdom_path + 'labeled_fdom_peaks.csv', index=False)
    augmented_fDOM_raw.to_csv(unlabeled_fdom_path + 'unlabeled_fdom.csv', index=False)
    augmented_turb_raw_fdom.to_csv(unlabeled_fdom_path + 'unlabeled_turb.csv', index=False)
    augmented_stage_raw_fdom.to_csv(unlabeled_fdom_path + 'unlabeled_stage.csv', index=False)

    # write turb augmented data
    augmented_turb_labeled.to_csv(labeled_turb_path + 'labeled_turb_peaks.csv', index=False)
    augmented_turb_raw.to_csv(unlabeled_turb_path + 'unlabeled_turb.csv', index=False)
    augmented_fDOM_raw_turb.to_csv(unlabeled_turb_path + 'unlabeled_fdom.csv', index=False)
    augmented_stage_raw_turb.to_csv(unlabeled_turb_path + 'unlabeled_stage.csv', index=False)


def convert_df_julian_to_datetime(df: pd.DataFrame) -> pd.DataFrame:
    """
    Converts a dataframe julian timestamp to datetime ISO 8601 format

    df: a dataframe

    return: changed dataframe
    """
    # iterate over dataframe, replacing timestamp vals
    for i, row in df.iterrows():
        df.loc[i, "timestamp"] = dp.julian_to_datetime(df.loc[i, "timestamp"]).isoformat()
        
        # add stupid 0.00Z to fit trainset format
        df.loc[i, "timestamp"] = df.loc[i, "timestamp"] + ".000Z"

    return df

def write_to_trainset_csv():
    # TODO: add peak labels in

    # start by creating a dataframe that has the correct columns
    trainset_fdom_df = pd.DataFrame(columns=["series", "timestamp", "value", "label"])
    trainset_turb_df = pd.DataFrame(columns=["series", "timestamp", "value", "label"])

    # ~~~~~~~ fDOM section ~~~~~~~
    # start by just adding the fDOM data into the series, need to replace all timestamps
    fdom_trainset_raw = copy.deepcopy(augmented_fDOM_raw)
    fdom_turb_trainset_raw = copy.deepcopy(augmented_turb_raw_fdom)
    fdom_stage_trainset_raw = copy.deepcopy(augmented_stage_raw_fdom)
    
    # convert timestamps to julian
    fdom_trainset_raw = convert_df_julian_to_datetime(fdom_trainset_raw)
    fdom_turb_trainset_raw = convert_df_julian_to_datetime(fdom_turb_trainset_raw)
    fdom_stage_trainset_raw = convert_df_julian_to_datetime(fdom_stage_trainset_raw)

    # add in new values
    fdom_trainset_raw["series"] = "fDOM"
    fdom_trainset_raw["label"] = ""

    fdom_turb_trainset_raw["series"] = "turb"
    fdom_turb_trainset_raw["label"] = ""

    fdom_stage_trainset_raw["series"] = "stage"
    fdom_stage_trainset_raw["label"] = ""

    # reorder columns
    fdom_trainset_raw = fdom_trainset_raw.reindex(columns=["series", "timestamp", "value", "label"])
    fdom_turb_trainset_raw = fdom_turb_trainset_raw.reindex(columns=["series", "timestamp", "value", "label"])
    fdom_stage_trainset_raw = fdom_stage_trainset_raw.reindex(columns=["series", "timestamp", "value", "label"])

    # concat into single dataframe
    trainset_fdom_df = pd.concat([fdom_trainset_raw, fdom_turb_trainset_raw, fdom_stage_trainset_raw])

    # sort together
    trainset_fdom_df = trainset_fdom_df.sort_values(by=['timestamp'], kind='stable')

    # export to csv
    trainset_fdom_df.to_csv(trainset_fdom_path + "fdom_augmented.csv", index=False)

    # ~~~~~~~ turbidity section ~~~~~~~

    # create new dataframes
    turb_trainset_raw = copy.deepcopy(augmented_turb_raw)
    turb_fdom_trainset_raw = copy.deepcopy(augmented_fDOM_raw_turb)
    turb_stage_trainset_raw = copy.deepcopy(augmented_stage_raw_turb)

    # convert timestamps
    turb_trainset_raw = convert_df_julian_to_datetime(turb_trainset_raw)
    turb_fdom_trainset_raw = convert_df_julian_to_datetime(turb_fdom_trainset_raw)
    turb_stage_trainset_raw = convert_df_julian_to_datetime(turb_stage_trainset_raw)

    # add in new values
    turb_fdom_trainset_raw["series"] = "fDOM"
    turb_fdom_trainset_raw["label"] = ""

    turb_trainset_raw["series"] = "turb"
    turb_trainset_raw["label"] = ""

    turb_stage_trainset_raw["series"] = "stage"
    turb_stage_trainset_raw["label"] = ""

    # reorder columns
    turb_fdom_trainset_raw = turb_fdom_trainset_raw.reindex(columns=["series", "timestamp", "value", "label"])
    turb_trainset_raw = turb_trainset_raw.reindex(columns=["series", "timestamp", "value", "label"])
    turb_stage_trainset_raw = turb_stage_trainset_raw.reindex(columns=["series", "timestamp", "value", "label"])

    # concat into single dataframe
    trainset_turb_df = pd.concat([turb_fdom_trainset_raw, turb_trainset_raw, turb_stage_trainset_raw])

    # sort together
    trainset_turb_df = trainset_turb_df.sort_values(by=['timestamp'], kind='stable')

    # export to csv
    trainset_turb_df.to_csv(trainset_turb_path + "turb_augmented.csv", index=False)

#write_augmented_data_to_csv()
write_to_trainset_csv()