In [10]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import pandas as pd
import numpy as np

# own Modules 
from anomaly_classifier import AnomalyClassifier

# MLE Mode: 

In [11]:
mode = "MLE"
threshold = "3sigma"

# MSE Mode:

In [2]:
mode = "MSE"
threshold = "max_minus_{}".format("20percent")

## Parameters phm data

In [12]:
param = {
    "data" : {
        "path" : '../../data/phm_data_challenge/recipe/dataset_for_each_recipe/test/test_recipe_67_labeled_short.csv',
        "droped_feature" : ["stage", "Lot", "runnum", "recipe", "recipe_step",
                            "up time", "ongoing time", 
                            "ETCHSOURCEUSAGE", "ETCHAUXSOURCETIMER", 
                            "ETCHAUX2SOURCETIMER", "FIXTURESHUTTERPOSITION", "ROTATIONSPEED", "status"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 12,
    },
        "anomaly_detection" : {
        "threshold_quadratic_difference" : 0.5,
        "threshold_q_d_each_sensor" : [3.561851, 3.561851, 3.561851, 3.561851, 
                                      3.561851, 3.561851, 3.561851, 3.561851, 
                                       3.561851, 3.561851, 3.561851, 3.561851],
        "no_standard_deviation" : 3
    },
    "prediction_path": "../visualisation/files/prediction/{}/phm_67_SQ36.csv".format(mode),
    "classification_path" : "../visualisation/files/classification/{}/phm_67_SQ36_{}.csv".format(mode, threshold)
}

## Parameters artifical data

In [3]:
param = {
    "data" : {
        "path" : '../../data/artifical_signals/artifical_2_signals_errors.csv',
        "droped_feature" : ["anomaly"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 2,
    },
        "anomaly_detection" : {
        "threshold_quadratic_difference" : 0.5,
        "threshold_q_d_each_sensor" : [0.7012361884117126, 3.1871493458747864], # [0.13679444326055645, 0.5441877437706729],
        "no_standard_deviation" : 3
    },
    "prediction_path": "../visualisation/files/prediction/{}/artfic.csv".format(mode),
    "classification_path" : "../visualisation/files/classification/{}/artfic_{}.csv".format(mode, threshold)
}

## Parameters cpps data

In [3]:
param = {
    "data" : {
        "path" : '../../data/cpps_degradation_new/data_obs10/test/obs_space_error_sinusiod_preprocessed.csv',
        "droped_feature" : ["status"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 10,
    },
        "anomaly_detection" : {
        "threshold_quadratic_difference" : 0.5,
        "threshold_q_d_each_sensor" : [0.24372421389777016, 0.24761591192911026, 0.32903928384871034, 
                                       0.18159441767379658, 0.27133063146032477, 0.23155811781422755, 
                                       0.149805263381253, 0.31187742832928145, 0.0716386888894709, 0.2080102174959789],
        "no_standard_deviation" : 1
    },
    "prediction_path": "../visualisation/files/prediction/{}/cpps.csv".format(mode),
    "classification_path" : "../visualisation/files/classification/{}/cpps_{}.csv".format(mode, threshold)
}

## Parameters ramming data

In [20]:
param = {
    "data" : {
        "path" : '../../data/ramming_data/data_train_preprocessed.csv',
        "droped_feature" : ["status"
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 50,
    },
        "anomaly_detection" : {
        "threshold_quadratic_difference" : 0.5,
        "threshold_q_d_each_sensor" : [],
        "no_standard_deviation" : 2
    },
    "prediction_path": "../visualisation/files/prediction/{}/ramming_train.csv".format(mode),
    "classification_path" : "../visualisation/files/classification/{}/ramming_train_{}.csv".format(mode, threshold)
}

## Parameters variance data

In [11]:
param = {
    "data" : {
        "path" : '../../data/variation_in_variance/dataset_increasing_variance.csv',
        "droped_feature" : ["time", "mu", "sigma" 
                           ],
        "features_not_to_scale": []
    },
    "model" : {
        "input_size" : 1,
    },
        "anomaly_detection" : {
        "threshold_quadratic_difference" : 0.5,
        "threshold_q_d_each_sensor" : [],
        "no_standard_deviation" : 1
    },
    "prediction_path": "../visualisation/files/prediction/{}/variance_data_new.csv".format(mode),
    "classification_path" : "../visualisation/files/classification/{}/variance_data_new_{}.csv".format(mode, threshold)
}

## Detect Anomaly
### Euclidean Distance Metric for the Hole Machine (for MSE Loss Function)

In [34]:
results_prediction = pd.read_csv(param["prediction_path"], sep=";")
anomaly_classifier = AnomalyClassifier()
results = anomaly_classifier.apply_euclidean_distance_metric(results_prediction, 
                                                           param['model']['input_size'], 
                                                           param["anomaly_detection"]["threshold_quadratic_difference"])

### Euclidean Distance Difference Metric for Each Sensor (for MSE Loss Function)

In [4]:
results_prediction = pd.read_csv(param["prediction_path"], sep=";")
anomaly_classifier = AnomalyClassifier()
results = anomaly_classifier.apply_euclidean_distance_metric(results_prediction, 
                                                           param['model']['input_size'],
                                                           param["anomaly_detection"]["threshold_quadratic_difference"],
                                                           percentage = 0.8,
                                                           each_sensor=True, 
                                                           threshold_sensors=param["anomaly_detection"]["threshold_q_d_each_sensor"])

### Standard Deviation Metric (for MLE Loss Function)

In [13]:
results_prediction = pd.read_csv(param["prediction_path"], sep=";")
anomaly_classifier = AnomalyClassifier()
results = anomaly_classifier.apply_prediction_interval_metric(results_prediction, 
                                                            param['model']['input_size'],
                                                            param["anomaly_detection"]["no_standard_deviation"])

In [14]:
results.head()

Unnamed: 0,ID,IONGAUGEPRESSURE target,ETCHBEAMVOLTAGE target,ETCHBEAMCURRENT target,ETCHSUPPRESSORVOLTAGE target,ETCHSUPPRESSORCURRENT target,FLOWCOOLFLOWRATE target,FLOWCOOLPRESSURE target,ETCHGASCHANNEL1READBACK target,ETCHPBNGASREADBACK target,...,Anomaly Sensor_3,Anomaly Sensor_4,Anomaly Sensor_5,Anomaly Sensor_6,Anomaly Sensor_7,Anomaly Sensor_8,Anomaly Sensor_9,Anomaly Sensor_10,Anomaly Sensor_11,Anomaly Sensor_12
0,9.0,-4.299274,-1.580827,-1.587354,-1.584514,-1.480392,-4.541032,-5.725448,-2.370619,-4.992283,...,1,0,1,1,1,1,1,0,1,1
1,10.0,-4.299274,-1.581093,-1.587743,-1.584514,-1.480392,-4.541032,-5.725448,-2.370619,-4.998083,...,1,0,1,1,1,1,1,0,1,1
2,11.0,-4.299274,-1.581093,-1.587588,-1.584569,-1.480392,-4.548361,-5.725448,-2.370619,-4.998083,...,1,0,1,1,1,1,1,0,1,1
3,12.0,-4.298829,-1.58096,-1.587665,-1.584733,-1.480392,-4.541032,-5.379991,-2.367084,-4.998083,...,1,0,1,1,1,1,1,0,1,1
4,13.0,-4.296029,-1.581624,-1.587977,-1.584242,-1.480392,-0.572374,-1.932981,-2.367084,-4.990543,...,1,0,1,0,1,1,1,0,1,1


## Combine prediction data with data which was not consider for inference

In [15]:
original_sensor_data = pd.read_csv(param["data"]["path"])
data_of_droped_feature = original_sensor_data.loc[:, param["data"]["droped_feature"]+["ID"]]
complete_data = results.merge(right=data_of_droped_feature, how="inner", on="ID")

## Save data to csv file

In [16]:
complete_data.to_csv(param["classification_path"], sep=";", index=False)