In [1]:
from pathlib import Path
from io import StringIO
import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    precision_recall_curve,
)

from util import (
    make_intervals,
    compute_event_wise_metrics,
    predict_with_PCA,
)

# Load original data files (xlsx) and save them as parquet

Loading `xlsx` data with pandas might be very slow. In the following cell, we load original `xlsx` files (as downloaded from iTrust) once and save the as `parquet` for future use.

In [None]:
DATADIR = Path("./data/SWaT")

df = pd.read_excel(
    DATADIR / "SWaT_Dataset_Normal_v1.xlsx",
    skiprows=[0],
    parse_dates=[" Timestamp"],
    date_format=" %d/%m/%Y %I:%M:%S %p",
    index_col=" Timestamp",
)
df.index.name = "Timestamp"
df.to_parquet(DATADIR / "SWaT_Dataset_Normal_v1.parquet")

DATADIR = Path("/home/amine/Workspace/data/SWaT/")

df = pd.read_excel(
    DATADIR / "SWaT_Dataset_Attack_v0.xlsx",
    skiprows=[0],
    parse_dates=[" Timestamp"],
    date_format=" %d/%m/%Y %I:%M:%S %p",
    index_col=" Timestamp",
)
df.index.name = "Timestamp"
df.to_parquet(DATADIR / "clean" / "SWaT_Dataset_Attack_v0.parquet")

# Load data (parquet)

In [2]:
DATADIR = Path("./data/SWaT")
normal = pd.read_parquet(DATADIR / "SWaT_Dataset_Normal_v1.parquet")
attack = pd.read_parquet(DATADIR / "SWaT_Dataset_Attack_v0.parquet")
labels = (attack["Normal/Attack"] == "Attack").astype(int)
y_true = labels.to_numpy() == 1

attacks_ts = pd.read_csv(
    DATADIR / "SWaT_Dataset_v0_attacks_timestamps.csv",
    parse_dates=["StartTime", "EndTime"],
    date_format="%d/%m/%Y %H:%M:%S",
)

# Make Labels
Labels from `attack["Normal/Attack"]` do not reflect the attacks provided by SWaT's owner in `List_of_attacks_Final_2015.pdf`. You can still use `attack["Normal/Attack"]` by skipping the next two cells and passing `gt_intervals=None` when calling `compute_event_wise_metrics`.

In [3]:
y_true_ts = np.zeros(len(labels))
gt_intervals = []
index = list(attack.index)
for _, (onset, offset) in attacks_ts.iterrows():
    onset = index.index(onset)
    offset = index.index(offset) + 1
    y_true_ts[onset:offset] = 1
    gt_intervals.append((onset, offset))
y_true_ts.mean()
y_true = y_true_ts == 1

print("Contamination rate:", f"{y_true.mean()*100:.2f}")
print("Number of anomalous events:", len(gt_intervals))
event_lengths = np.diff(gt_intervals).reshape(-1)
print("Min event length:", np.min(event_lengths))
print("Max event length:", np.max(event_lengths))
print("Average event length:", round(np.mean(event_lengths)))
print("Median event length:", round(np.median(event_lengths)))

Contamination rate: 11.98
Number of anomalous events: 35
Min event length: 101
Max event length: 34209
Average event length: 1540
Median event length: 444


# Hyperparameters
This is the most sensitive part of the whole experiment. Most works use loads of hyperparameters and report the best obtained results, often without disclosing or explaining the choice of hyperparameters' values.

In the following, we set the values of hyperparameters related to feature scaling, clipping, number of PCA components, and history size for score smoothing. These values yield state-of-the-art performance with the considered metrics, but you can achieve better performance for some metrics by tweaking some of these values.

In [4]:
SEED = 1234

MIN_SCALE = -1.0
MAX_SCALE = 1.0

MIN_CLIP = -5.0
MAX_CLIP = 5.0

PCA_N_COMP = 22
SCORE_SMOOTHING_HISTORY_SIZE = 30

# Algorithm

In [5]:
X_train = normal.drop(columns=["Normal/Attack"]).to_numpy().copy()
nb_features = X_train.shape[1]
X_test = attack.drop(columns=["Normal/Attack"]).to_numpy().copy()

scaler = MinMaxScaler((MIN_SCALE, MAX_SCALE))
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
np.random.seed(SEED)

pca = PCA(n_components=PCA_N_COMP)
pca.fit(X_train)

residual = predict_with_PCA(
    pca,
    X_test.clip(min=MIN_CLIP, max=MAX_CLIP),
    smooth_n=SCORE_SMOOTHING_HISTORY_SIZE,
)
scores = residual

# Evaluation

## Point-wise F1 score
We first compute the best point-wise F1 score, then we use the corresponding threshold to compute other metrics.

In [7]:
precision, recall, thresholds = precision_recall_curve(y_true, scores)

f1_scores = 2 * recall * precision / (recall + precision + 1e-10)
round(f1_scores.max(), 4)

0.8096

## Event-wise metrics
For these we use threshold that yields the best **point-wise** F1 score.

In [8]:
argmax = f1_scores.argmax()
best_threshold = thresholds[argmax]

In [9]:
y_pred = scores >= best_threshold


TP_ew, FP_ew, FN_ew, P_ew, R_ew, F1_ew, F1_c = compute_event_wise_metrics(
    y_true, y_pred, gt_intervals
)

print("Point-wise F1:\t\t", round(f1_score(y_true, y_pred), 4))
print("Event-wise F1 (F1_ew):\t", round(F1_ew, 4))
print("Composite F1 (F1_c):\t", round(F1_c, 4))
print("True positive events:\t", TP_ew)
print("False positive events:\t", FP_ew)


Point-wise F1:		 0.8096
Event-wise F1 (F1_ew):	 0.5551
Composite F1 (F1_c):	 0.5955
True positive events:	 15
False positive events:	 4


## Metrics without smoothing

You can achieve a much better **composite F1** score by setting `SCORE_SMOOTHING_HISTORY_SIZE` to `None` (no smoothing) instead of 30. This will have little impact on the **point-wise F1**, but the **event-wise** score will decrease considerably due to a high number of false alarms at the event level.

In [17]:
y_pred = scores >= best_threshold


TP_ew, FP_ew, FN_ew, P_ew, R_ew, F1_ew, F1_c = compute_event_wise_metrics(
    y_true, y_pred, gt_intervals
)

print("Point-wise F1:\t\t", round(f1_score(y_true, y_pred), 4))
print("Event-wise F1 (F1_ew):\t", round(F1_ew, 4))
print("Composite F1 (F1_c):\t", round(F1_c, 4))
print("True positive events:\t", TP_ew)
print("False positive events:\t", FP_ew)

Point-wise F1:		 0.8015
Event-wise F1 (F1_ew):	 0.0743
Composite F1 (F1_c):	 0.7579
True positive events:	 22
False positive events:	 533
