# Load Data

Here we load and preprocess data from CSV file

In [None]:
import numpy as np
import pandas as pd
import normalize

df_file_loc = "../data/2015/Physical/SWaT_Dataset_Attack_v0.csv"

df = pd.read_csv(df_file_loc)
print(df.shape)
df_total_rows = int(df.shape[0])

# Dataset Start Time and End Time
df_time_start = 0
df_time_end = 0

for index, row in df.iterrows():
    if index == 1:
        timestamp, _ = normalize.date_time(row["Timestamp"])
        print("timestamp_start=%s" %(timestamp))
        df_time_start = np.array(timestamp, dtype=np.datetime64)
        break


names = df.columns.tolist()
df_footer = pd.read_csv(df_file_loc, names=names, skiprows=df_total_rows)
for index, row in df_footer.iterrows():
    timestamp, _ = normalize.date_time(row["Timestamp"])
    print("timestamp_end=%s" %(timestamp))
    df_time_end = np.array(timestamp, dtype=np.datetime64)

df.reset_index()
df.head()

## Detect anomalies from input data

We can get the information about attacks from SWaT dataset `data/2015/List_of_attacks_Final.csv. They represent are divided into groups:

- Single stage single point (SSSP)
- Single stage multi point (SSMP)
- Multi stage single point (MSSP)
- Multi stage multi point attacks (MSMP)
 

In [None]:
import load 

attacks_file_loc = "../data/2015/List_of_attacks_Final_fixed.xlsx"
stages, anomalies = load.anomalies(attacks_file_loc)

print("Loaded %d anomalies\n" %(len(anomalies)))

## Plot all

In [None]:
import plot
import numpy as np
import filter

time_delta = np.timedelta64(1, "s")
time_start, time_end, filtered_anomalies = filter.get_times(anomalies, None)

time_start -= 300
time_end += 300

time_len = int((time_end - time_start) / time_delta)

x = time_start + np.arange(0, time_len, 1)
idx_start = int((time_start - df_time_start) / time_delta)
idx_end = int((time_end - df_time_start) / time_delta)

obj = {
    "name": "all",
    "file_loc": "../output/all.png", 
    "df": df, 
    "time_start": time_start, 
    "time_delta": time_delta, 
    "idx_start": idx_start, 
    "idx_end": idx_end,
    "filtered_anomalies": filtered_anomalies,
    # "title": "All stages together",
    "title": "",
    "x": x
}
plot.process(obj)



## Plot per stage (P1, P2, P3, P4, P5, P6)

Here we plot per stage what is and where happening

In [None]:
import filter
import plot

stages_for_plotting = [
    "P1",
    "P2",
    "P3",
    "P4",
    "P5",
    "P6",
]

time_delta = np.timedelta64(1, "s")

for stage in stages_for_plotting:
    time_start, time_end, filtered_anomalies = filter.get_times(anomalies, stage)

    time_start -= 300
    time_end += 300
    time_len = int((time_end - time_start) / time_delta)

    x = time_start + np.arange(0, time_len, 1)
    idx_start = int((time_start - df_time_start) / time_delta)
    idx_end = int((time_end - df_time_start) / time_delta)

    columns = stages[stage]
    df_custom = filter.load_df(df_file_loc, columns)

    obj = {
        "name": stage,
        "file_loc": "../output/stage_%s.png" %(stage), 
        "df": df_custom, 
        "time_start": time_start, 
        "time_delta": time_delta, 
        "idx_start": idx_start, 
        "idx_end": idx_end,
        "filtered_anomalies": filtered_anomalies,
        # "title": "Stage %s" %(stage),
        "title": "",
        "x": x
    }
    plot.process(obj)


## Plot anomalies

We plot anomalies as they happen, single anomaly and group as well


In [None]:
import filter
import plot

time_delta = np.timedelta64(1, "s")

# anomalies_for_plotting = [
#     {
#         "attack_points": [ 
#             "AIT504",
#         ],

#         "attack_stages": [
#             "P5",
#         ]
#     }
# ]
anomalies_for_plotting = anomalies

for anomaly_for_plotting in anomalies_for_plotting:
    time_start, time_end, filtered_anomalies = filter.get_times(anomalies, anomaly_for_plotting["attack_points"])

    time_start -= 300
    time_end += 300
    time_len = int((time_end - time_start) / time_delta)

    x = time_start + np.arange(0, time_len, 1)
    idx_start = int((time_start - df_time_start) / time_delta)
    idx_end = int((time_end - df_time_start) / time_delta)


    columns = anomaly_for_plotting["attack_points"]
    tag = "_".join(columns)
    df_custom = filter.load_df(df_file_loc, columns)

    obj = {
        "name": tag,
        "file_loc": "../output/anomaly_%s.png" %(tag), 
        "df": df_custom, 
        "time_start": time_start, 
        "time_delta": time_delta, 
        "idx_start": idx_start, 
        "idx_end": idx_end,
        "filtered_anomalies": filtered_anomalies,
        # "title": "Stage %s" %(stage),
        "title": "",
        "x": x
    }
    plot.process(obj)
