In [None]:
from helper import *
sys.path.insert(0,'tods') 
import plotter as pltr

In [None]:
# Hyper-parameters
window_size = 1000  # length of slide window in days
step_size = int(0.1 * window_size)
detector = 'LSTM'
threshold_opt = 'hybrid'
dataset = 'SMD'

## Step 1. Load Dataset

In [None]:
if dataset == 'SMD':
    GROUP = 1
    ENTITY = 2
    SMD_BASE_PATH = 'Dataset/SMD'

    X_train = load_data('%s/train/machine-%d-%d.txt' % (SMD_BASE_PATH, GROUP, ENTITY), header=False)
    X_train.columns = ['m%d' % i for i in range(X_train.shape[1])]
    X_train.index = pd.date_range('2021/03/02', '2021/03/21', periods=X_train.shape[0])
    X_train.index.name = 'timestamp'

    X_test = load_data('%s/test/machine-%d-%d.txt' % (SMD_BASE_PATH, GROUP, ENTITY), header=False)
    X_test.columns = ['m%d' % i for i in range(X_test.shape[1])]
    X_test.index = pd.date_range('2021/03/21', '2021/4/8', periods=X_test.shape[0])
    X_test.index.name = 'timestamp'

    y_true = pd.read_csv('Dataset/SMD/test_label/machine-%d-%d.txt' % (GROUP, ENTITY), header=None)
    y_true.columns = ['label']
    y_true.index = X_test.index

In [None]:
if dataset == 'CTF':
    import pickle as pkl

    ENTITY = 0
    CTF_BASE_PATH = 'Dataset/CTF/processed'

    X_train = load_data('%s/train/%d.csv' % (CTF_BASE_PATH, ENTITY)).iloc[:-1]
    X_train.columns = ['m%d' % i for i in range(X_train.shape[1])]
    X_train.index = pd.date_range(start='2020/04/18', freq='30s', periods=X_train.shape[0])
    X_train.index.name = 'timestamp'

    X_test = load_data('%s/test/%d.csv' % (CTF_BASE_PATH, ENTITY)).iloc[:-1]
    X_test.columns = ['m%d' % i for i in range(X_test.shape[1])]
    X_test.index = pd.date_range(start='2020/04/23', freq='30s', periods=X_test.shape[0])
    X_test.index.name = 'timestamp'

    with open('Dataset/CTF/label_result/%d.pkl' % ENTITY, 'rb') as f:
        y = pd.DataFrame(pkl.load(f), columns=['label'])
        y.index = X_test.index

In [None]:
print('Train Shape:', X_train.shape)
print('Test Shape:', X_test.shape)

In [None]:
# plot the first dimension
pltr.plot_data(pd.concat([X_train, X_test], axis=0), 'm0')

In [None]:
# plot the first dimension
pltr.plot_data(X_test, 'm0')

In [None]:
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_true = y_true.to_numpy()
print("window size:", window_size)

## Step 3. Anomaly Detection

In [None]:
import sys
import argparse
import os
import numpy as np
import pandas as pd
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from sklearn import metrics

In [None]:
if detector == 'DeepLog':
    from tods.sk_interface.detection_algorithm.DeepLog_skinterface import DeepLogSKI

    transformer = DeepLogSKI(window_size=window_size, features=X_train.shape[1], validation_size=0.3, hidden_size=4, preprocessing=False, verbose=1, batch_size=32, epochs=5)
    transformer.fit(X_train)
elif detector == 'LSTM':
    from tods.sk_interface.detection_algorithm.LSTMODetector_skinterface import LSTMODetectorSKI

    transformer = LSTMODetectorSKI(window_size=window_size, step_size=step_size, feature_dim=X_train.shape[1], hidden_dim=4, batch_size=32, epochs=5)
    transformer.fit(X_train)

In [None]:
for primitive in transformer.primitives:
    pred_score, relative_error_left_inds, relative_error_right_inds = primitive._clf.decision_function(X_test)

In [None]:
import plotly.graph_objects as go

## Plot decision scores
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(X_test)), y=pred_score, mode='lines', name='anomaly_score'))
fig.update_layout(height=400, width=800, showlegend=True)
fig.show()

In [None]:
mu = pred_score.mean()
sigma = pred_score.std()
threshold = mu + 3*sigma
y_pred = pred_score > threshold

In [None]:
x_test = pd.DataFrame(X_test)
pred_anomalies = pd.DataFrame(x_test.loc[np.where(y_pred == 1)])

col = 0
fig = go.Figure()
fig.add_trace(go.Scatter(x=x_test.index, y=x_test[col], mode='lines', name='x_test'))
fig.add_trace(go.Scatter(x=pred_anomalies.index, y=pred_anomalies[col], mode='markers', name='Anomaly'))
fig.update_layout(showlegend=True, xaxis_title="Time", yaxis_title="value", height=400, width=800)
fig.show()

In [None]:
real_anomalies = pd.DataFrame(x_test.iloc[np.where(y_true == 1)])

fig = go.Figure()
fig.add_trace(go.Scatter(x=x_test.index, y=x_test[col], mode='lines', name='x_test'))
fig.add_trace(go.Scatter(x=real_anomalies.index, y=real_anomalies[col], mode='markers', name='Anomaly'))
fig.update_layout(showlegend=True, xaxis_title="Time", yaxis_title="value", height=400, width=800)
fig.show()

In [None]:
precision, recall, thresholds = precision_recall_curve(y_true, y_pred)
f1_scores = 2*recall*precision/(recall+precision)
fpr, tpr, threshold = metrics.roc_curve(y_true, y_pred)
roc_auc = metrics.auc(fpr, tpr)

In [None]:
print('Accuracy Score: ', accuracy_score(y_true, y_pred))

In [None]:
confusion_matrix(y_true, y_pred)

In [None]:
print(classification_report(y_true, y_pred))

In [None]:
print('Best threshold: ', thresholds[np.argmax(f1_scores)])
print('Best F1-Score: ', np.max(f1_scores))

In [None]:
plt.title('ROC')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
results = results2
results['timestamp'] = X_test.index[:results.shape[0]]

## Step 4. Label Anomalies on the Test Dataset

## Step 5. Evaluate Results

In [None]:
pltr.plot_anomaly(match_data, y.loc[results['timestamp']], col)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix

ac = accuracy_score(results['label'], y.loc[results['timestamp']])
tn, fp, fn, tp  = confusion_matrix(results['label'], y.loc[results['timestamp']]['label']).ravel()

print('Accurancy:', ac)
print('TN:', tn)
print('FP:', fp)
print('FN:', fn)
print('TP:', tp)

In [None]:
recall = tp / (tp + fn)
precision = tp / (tp + fp)

In [None]:
print('Precision:', precision)
print('Recall:', recall)