In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf
from matplotlib.pyplot import figure
from sklearn.metrics import f1_score, confusion_matrix, roc_auc_score, recall_score, precision_score

In [2]:
def calculate_score(data_frame, score_type, no_sensors):
    """
    :param data_frame: Complete Dataframe
    :param no_sensors: Number of Sensors in Dataframe
    :param start_time: Start point. Value from time column
    :param max_delta: Number of samples. max_time = (no_samples * (time between two samples))
    :param step_size: It is the number of samples added to current time_delta to get next time_delta
    :return:
    """
    labels_pred = data_frame.iloc[:, (4 * no_sensors) + 2:(5 * no_sensors) + 2]
    labels_true = data_frame.iloc[:, (5 * no_sensors) + 2].values
    
    result_score = np.zeros((no_sensors))

    for sensor in range(no_sensors):
        anomaly_pred = labels_pred.iloc[:, sensor].values
        if score_type == "f1_score":
            result = f1_score(labels_true, anomaly_pred, average="binary")
            index_names = ["f1 sensor_" + str(i) for i in range(no_sensors)]
        elif score_type == "precision_score":
            result = precision_score(labels_true, anomaly_pred, average="binary")
            index_names = ["precision sensor_" + str(i) for i in range(no_sensors)]
        elif score_type == "recall_score":
            result = recall_score(labels_true, anomaly_pred, average="binary")
            index_names = ["recall sensor_" + str(i) for i in range(no_sensors)]   
        else:
            result = 0
            index_names = ["error" for i in range(no_sensors)]
            
        result_score[sensor] = result

    score_df = pd.Series(data=result_score, index=index_names)
    return score_df

In [3]:
def get_confusion_matrix(data_frame, no_sensors, specific_sensor):
    labels_pred = data_frame.iloc[:, (4 * no_sensors) + 2:(5 * no_sensors) + 2]
    labels_true = data_frame.iloc[:, (5 * no_sensors) + 2].values
    
    # No errors are on sawtooth signal (labels_true in dataframe is regarding sine wave)
    if specific_sensor == 1:
        labels_true = np.full((data_frame.shape[0]), 0)
    
    anomaly_pred = labels_pred.iloc[:, specific_sensor].values
    return confusion_matrix(labels_true, anomaly_pred).ravel()

In [4]:
def visualise_metric_per_sensor(results, title):
    fig, axes = plt.subplots(results.shape[1]-1, 1, figsize=(10,20),constrained_layout=False)
    ax = axes.ravel()
    t = results.loc[:,"delta_t"]
    columns = results.columns
    for i in range(results.shape[1]-1): 
        sns.lineplot(data=results, 
                     x=t, 
                     y=columns[i], 
                     ax=ax[i],
                     linewidth=1,
                     color="black")
        ax[i].set_xlabel("delta t [in samples]")
    plt.tight_layout()
    plt.subplots_adjust(top=0.95)
    plt.suptitle(title, fontsize=16)

In [5]:
def visualise_metric_machine(results, score_type, phase):
    t = results.loc[:,"delta_t"]
    complete_title = "CPPS Data - Beginning of Phase '{}''".format(phase)
    
    # Caluculate Metric for hole machine (sum over sensors and devide by no_sensors)
    labels = results.drop(columns="delta_t", axis=0)
    result_machine = labels.sum(axis=1) / results.shape[1]
    
    # Visualise Results
    sns.lineplot(x=t, 
                 y=result_machine, 
                 linewidth=1,
                 color="black")
    plt.xlabel("delta t [in samples]")
    plt.ylabel("{} over all dim".format(score_type))
    #plt.tight_layout()
    plt.title(complete_title, fontsize=16, y=1.12)

# Evaluation of Prediction Interval Metric
## Setup

In [7]:
all_data = pd.read_csv("../../files/classification/MLE/artfic_3sigma.csv", sep=";")

In [8]:
all_data.head()

Unnamed: 0,ID,sine_signal target,sawtooth_signal target,sine_signal mu predicted,sawtooth_signal mu predicted,sine_signal sigma predicted,sawtooth_signal sigma predicted,mean normalised residual,sine_signal normalised residual,sawtooth_signal normalised residual,Anomaly Sensor_1,Anomaly Sensor_2,anomaly
0,21.0,0.537549,-2.124137,0.602129,-0.993502,0.122289,0.768085,-1.000056,-0.528093,-1.472018,0,0,0
1,22.0,0.579476,-1.477146,0.403997,-1.119213,0.123176,0.603125,0.415578,1.424619,-0.593464,0,0,0
2,23.0,0.208268,-0.559378,0.321282,-1.010299,0.122544,0.56121,-0.059375,-0.922231,0.803481,0,0,0
3,24.0,0.088735,-1.062486,0.103274,-0.803296,0.122567,0.543723,-0.297658,-0.118621,-0.476695,0,0,0
4,25.0,-0.024003,-0.312885,-0.072047,-0.578098,0.123433,0.513684,0.452768,0.389238,0.516297,0,0,0


# Evaluation Metrics
## F1-score

In [9]:
f1_score = calculate_score(all_data, "f1_score", 2)
print("F1 score: {}".format(f1_score[0]))

F1 score: 0.42282749675745784


In [10]:
print(f1_score)

f1 sensor_0    0.422827
f1 sensor_1    0.000000
dtype: float64


## Confusion Matrix
### Sine signal

In [13]:
print("Positive --> Anomaly")
print("Negative --> Normal Behaviour")
print("--"*15)
tn, fp, fn, tp = get_confusion_matrix(all_data, 2, 0)
print("Sensor No. {}:".format(1))
print("True negative: {}".format(tn))
print("False positive: {}".format(fp))
print("False negative: {}".format(fn))
print("True positive: {}".format(tp))

Positive --> Anomaly
Negative --> Normal Behaviour
------------------------------
Sensor No. 1:
True negative: 11342
False positive: 8
False negative: 437
True positive: 163


## Sensetivity

In [15]:
print(tn/ (tn + fp))

0.999295154185022


### Sawtooth signal

In [16]:
print("Positive --> Anomaly")
print("Negative --> Normal Behaviour")
print("--"*15)
tn, fp, fn, tp = get_confusion_matrix(all_data, 2, 1)
print("Sensor No. {}:".format(2))
print("True negative: {}".format(tn))
print("False positive: {}".format(fp))
print("False negative: {}".format(fn))
print("True positive: {}".format(tp))

Positive --> Anomaly
Negative --> Normal Behaviour
------------------------------
Sensor No. 2:
True negative: 11925
False positive: 25
False negative: 0
True positive: 0


## Sensetivity

In [17]:
print(tn/ (tn + fp))

0.997907949790795
