In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf
from matplotlib.pyplot import figure
from sklearn.metrics import f1_score, confusion_matrix, roc_auc_score, recall_score, precision_score
import numpy as np
import statistics

In [2]:
def calculate_score(data_frame, score_type, no_sensors):
    """
    :param data_frame: Complete Dataframe
    :param no_sensors: Number of Sensors in Dataframe
    :param start_time: Start point. Value from time column
    :param max_delta: Number of samples. max_time = (no_samples * (time between two samples))
    :param step_size: It is the number of samples added to current time_delta to get next time_delta
    :return:
    """
    labels_pred = data_frame.iloc[:, (4 * no_sensors) + 2:(5 * no_sensors) + 2]
    labels_true = data_frame["status"].values
    
    result_score = np.zeros((no_sensors))

    for sensor in range(no_sensors):
        anomaly_pred = labels_pred.iloc[:, sensor].values
        if score_type == "f1_score":
            result = f1_score(labels_true, anomaly_pred, average="binary")
            index_names = ["f1 sensor_" + str(i) for i in range(no_sensors)]
        elif score_type == "precision_score":
            result = precision_score(labels_true, anomaly_pred, average="binary")
            index_names = ["precision sensor_" + str(i) for i in range(no_sensors)]
        elif score_type == "recall_score":
            result = recall_score(labels_true, anomaly_pred, average="binary")
            index_names = ["recall sensor_" + str(i) for i in range(no_sensors)]
        else:
            result = 0
            index_names = ["error" for i in range(no_sensors)]
            
        result_score[sensor] = result

    score_df = pd.Series(data=result_score, index=index_names)
    return score_df

In [3]:
def visualise_metric_per_sensor(results, title):
    fig, axes = plt.subplots(results.shape[1]-1, 1, figsize=(10,20),constrained_layout=False)
    ax = axes.ravel()
    t = results.loc[:,"delta_t"]
    columns = results.columns
    for i in range(results.shape[1]-1): 
        sns.lineplot(data=results, 
                     x=t, 
                     y=columns[i], 
                     ax=ax[i],
                     linewidth=1,
                     color="black")
        ax[i].set_xlabel("delta t [in samples]")
    plt.tight_layout()
    plt.subplots_adjust(top=0.95)
    plt.suptitle(title, fontsize=16)

In [4]:
def visualise_metric_machine(results, score_type, phase):
    t = results.loc[:,"delta_t"]
    complete_title = "CPPS Data - Beginning of Phase '{}''".format(phase)
    
    # Caluculate Metric for hole machine (sum over sensors and devide by no_sensors)
    labels = results.drop(columns="delta_t", axis=0)
    result_machine = labels.sum(axis=1) / results.shape[1]
    
    # Visualise Results
    sns.lineplot(x=t, 
                 y=result_machine, 
                 linewidth=1,
                 color="black")
    plt.xlabel("delta t [in samples]")
    plt.ylabel("{} over all dim".format(score_type))
    #plt.tight_layout()
    plt.title(complete_title, fontsize=16, y=1.12)

In [5]:
def vis_cumulative_detection(dataframe, no_features, first_feature, last_feature, location_line1, location_line2, subset):
    fig, axes = plt.subplots(subset, 1, figsize=(10,6), dpi=200)
    ax = axes.ravel()
    columns = dataframe.columns
    t = dataframe.iloc[:,0]
    for i in range(first_feature, last_feature):
        sns.lineplot(data=dataframe, 
                     x=t, 
                     y=dataframe.iloc[:, i+4*no_features+1].cumsum(),
                     ax=ax[i-first_feature],
                     color="blue",)
        ax[i-first_feature].set_ylim(0, 5000) 
        ax[i-first_feature].axvline(location_line1, color="r", linestyle ="--", linewidth=2)
        ax[i-first_feature].axvline(location_line2, color="r", linestyle ="--", linewidth=2)
        ax[i-first_feature].text(7000, 4000, "Phase 1: No Error", fontsize=10)
        ax[i-first_feature].text(25000, 4000, "Phase 2: Small Error", fontsize=10)
        ax[i-first_feature].text(41000, 1000, "Phase 3: Large Error", fontsize=10)
        
        # Legend etc. 
        ax[i-1].set_xlabel("time [in s]")
        ax[i-1].set_ylabel("Cumulative sum")
        ax[i-1].set_title("Sensor No.{}".format(i))

In [6]:
def get_confusion_matrix_machine(data_frame, no_sensors):
    
    tp = []
    fp = []
    tn = []
    fn = []
    for i in range(0,no_sensors):
        labels_pred = data_frame.iloc[:, (4 * no_sensors)+i+2].values
        labels_true = data_frame["status"].values
        
        tn_sensor, fp_sensor, fn_sensor, tp_sensor = confusion_matrix(labels_true, labels_pred).ravel()
        tp.append(tp_sensor)
        fp.append(fp_sensor)
        tn.append(tn_sensor)
        fn.append(fn_sensor)
    
    return tp, fp, tn, fn

In [7]:
def vis_anomaly_detection(dataframe, no_features, first_feature, last_feature, failures, line, training):
    fig, axes = plt.subplots(no_features, 1, figsize=(30,40))
    ax = axes.ravel()
    columns = dataframe.columns
    t = dataframe["up time"]
    for i in range(first_feature, last_feature):
        sns.lineplot(data=dataframe, 
                     x=t, 
                     y=dataframe.iloc[:, i+4*no_features+1].cumsum(),
                     ax=ax[i-first_feature],
                     color="blue",)
        
        # Plot line for last training sample
        if training:
            ax[i-1].axvspan(3283834,3523830, alpha=0.2, color='green')
        
        # Plot line for failure points
        if line:
            for failure in failures:
                ax[i-first_feature].axvline(failure, 0,2, color="r", linestyle ="--", linewidth=2)
        
        # Set y-limit
        ax[i-first_feature].set_ylim(0,15000)

# Evaluation Prediction Interval
## Setup

In [8]:
all_data_A1 = pd.read_csv("../../files/classification/MLE/2_research_question/phm67_2sigma_A1.csv", sep=";")
until_first_failure_A1 = all_data_A1.loc[all_data_A1["up time"]<=3807966]
all_data_A2 = pd.read_csv("../../files/classification/MLE/2_research_question/phm67_2sigma_A2.csv", sep=";")
until_first_failure_A2 = all_data_A2.loc[all_data_A2["up time"]<=3807966]
all_data_A3 = pd.read_csv("../../files/classification/MLE/2_research_question/phm67_2sigma_A3.csv", sep=";")
until_first_failure_A3 = all_data_A3.loc[all_data_A3["up time"]<=3807966]
all_data_A4 = pd.read_csv("../../files/classification/MLE/2_research_question/phm67_2sigma_A4.csv", sep=";")
until_first_failure_A4 = all_data_A4.loc[all_data_A4["up time"]<=3807966]

# Model Evaluation - Architecture 1
### F1 Score 

In [9]:
score = calculate_score(until_first_failure_A1, "f1_score", 12)
print(score[0])

0.7035222180340913


### Confusion Matrix

In [10]:
print("Positive --> Anomaly")
print("Negative --> Normal Behaviour")
print("--"*15)
tp, fp, tn, fn = get_confusion_matrix_machine(until_first_failure_A1, 12)
print("True negative: {}".format(tn[0]))
print("False positive: {}".format(fp[0]))
print("False negative: {}".format(fn[0]))
print("True positive: {}".format(tp[0]))

Positive --> Anomaly
Negative --> Normal Behaviour
------------------------------
True negative: 52600
False positive: 4856
False negative: 2223
True positive: 8399


## Sensetivity

In [11]:
print(tn[0] / (tn[0] + fp[0]))

0.9154831523252576


# Model Evaluation - Architecture 2
### F1 Score 

In [12]:
score = calculate_score(until_first_failure_A2, "f1_score", 12)
print(score[0])

0.6649087922538255


### Confusion Matrix

In [13]:
print("Positive --> Anomaly")
print("Negative --> Normal Behaviour")
print("--"*15)
tp, fp, tn, fn = get_confusion_matrix_machine(until_first_failure_A2, 12)
print("True negative: {}".format(tn[0]))
print("False positive: {}".format(fp[0]))
print("False negative: {}".format(fn[0]))
print("True positive: {}".format(tp[0]))

Positive --> Anomaly
Negative --> Normal Behaviour
------------------------------
True negative: 52635
False positive: 4821
False negative: 2931
True positive: 7691


## Sensetivity

In [14]:
print(tn[0] / (tn[0] + fp[0]))

0.9160923141186299


# Model Evaluation - Architecture 3
### F1 Score 

In [15]:
score = calculate_score(until_first_failure_A3, "f1_score", 12)
print(score[0])

0.6996761896954543


### Confusion Matrix

In [16]:
print("Positive --> Anomaly")
print("Negative --> Normal Behaviour")
print("--"*15)
tp, fp, tn, fn = get_confusion_matrix_machine(until_first_failure_A3, 12)
print("True negative: {}".format(tn[0]))
print("False positive: {}".format(fp[0]))
print("False negative: {}".format(fn[0]))
print("True positive: {}".format(tp[0]))

Positive --> Anomaly
Negative --> Normal Behaviour
------------------------------
True negative: 52216
False positive: 5240
False negative: 2087
True positive: 8535


## Sensetivity

In [17]:
print(tn[0] / (tn[0] + fp[0]))

0.9087997772208298


# Model Evaluation - Architecture 4
### F1 Score 

In [18]:
score = calculate_score(until_first_failure_A4, "f1_score", 12)
print(score[0])

0.6648757555406313


### Confusion Matrix

In [19]:
print("Positive --> Anomaly")
print("Negative --> Normal Behaviour")
print("--"*15)
tp, fp, tn, fn = get_confusion_matrix_machine(until_first_failure_A4, 12)
print("True negative: {}".format(tn[0]))
print("False positive: {}".format(fp[0]))
print("False negative: {}".format(fn[0]))
print("True positive: {}".format(tp[0]))

Positive --> Anomaly
Negative --> Normal Behaviour
------------------------------
True negative: 53168
False positive: 4288
False negative: 3197
True positive: 7425


## Sensetivity

In [20]:
print(tn[0] / (tn[0] + fp[0]))

0.925368978000557
