In [None]:
import pandas as pd

energy_data = pd.read_csv("Extra//energy.csv")
energy_data['timestamp'] = pd.to_datetime(energy_data['timestamp'])
energy_data.set_index('timestamp', inplace=True)
resampled_energy = energy_data.resample("5s").mean()
resampled_energy = resampled_energy.fillna(method='ffill')
resampled_energy = resampled_energy.fillna(method='bfill')

env_data = pd.read_csv("Extra//environment.csv")
env_data['timestamp'] = pd.to_datetime(env_data['timestamp'])
env_data.set_index('timestamp', inplace=True)
resampled_env = env_data.resample("5s").mean()
resampled_env = resampled_env.fillna(method='ffill')
resampled_env = resampled_env.fillna(method='bfill')

resampled_energy['reactive_power'] = resampled_energy[["Reactive Power A average [kVAr]","Reactive Power B average [kVAr]","Reactive Power C average [kVAr]"]].mean(axis=1)
resampled_energy['thdi'] = resampled_energy[["THDI A average [%]","THDI B average [%]","THDI C average [%]"]].mean(axis=1)
resampled_energy['thdu'] = resampled_energy[["THDU A average [%]","THDU B average [%]","THDU C average [%]"]].mean(axis=1)
resampled_energy['current'] = resampled_energy[["Current A average [A]","Current B average [A]","Current C average [A]"]].mean(axis=1)
resampled_energy['voltage'] = resampled_energy[["Voltage A average [V]","Voltage B average [V]","Voltage C average [V]"]].mean(axis=1)
resampled_energy['power_factor'] = resampled_energy[["Power Factor A average","Power Factor B average","Power Factor C average"]].mean(axis=1)
useful_data = resampled_energy.join(resampled_env)
used_features = ["reactive_power","power_factor","current","voltage","thdu","thdi","Xacc","yaw","pitch"]
useful_data = useful_data[used_features]
useful_data = useful_data.dropna()

In [None]:
#load autoencoder anomalies
autoencoder_anomalies = pd.read_csv("std_anomalies/autoencoder_anomalies_v2.csv", index_col="Date")
autoencoder_anomalies.index = pd.to_datetime(autoencoder_anomalies.index, format='%Y-%m-%d %H:%M:%S')
autoencoder_anomalies.rename(columns={"modes": "modes_autoencoder", "Anomaly": "anomaly_autoencoder"}, inplace=True)
autoencoder_anomalies["anomaly_autoencoder"] = autoencoder_anomalies["anomaly_autoencoder"].replace({"Yes": True, "No": False})

#load hmm anomalies
hmm_anomalies = pd.read_csv("std_anomalies/hmm_anomalies_3std.csv", index_col="Date")
hmm_anomalies.index = pd.to_datetime(hmm_anomalies.index, format='%Y-%m-%d %H:%M:%S')
hmm_anomalies.rename(columns={"modes": "modes_hmm", "Anomaly": "anomaly_hmm"}, inplace=True)
hmm_anomalies["anomaly_hmm"] = hmm_anomalies["anomaly_hmm"].replace({"Yes": True, "No": False})
hmm_anomalies.head()

# merge anomalies
merged_df=pd.merge(useful_data,autoencoder_anomalies, how='inner', left_index=True, right_index=True)
merged_df=pd.merge(merged_df,hmm_anomalies, how='inner', left_index=True, right_index=True)
merged_df.head()

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler

for mode in merged_df['modes_autoencoder'].unique():
    mode_data = merged_df[merged_df['modes_autoencoder'] == mode].copy()
    mode_data_features = mode_data[used_features]

    #scaling the data so the mean is 0 and the std is 1
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(mode_data_features)
    scaled_df = pd.DataFrame(scaled_data, columns=used_features, index=mode_data.index)

    anomalies = np.abs(scaled_df) > #3*std as in papers and the std is already 1 after scaling it
    mode_data['std_anomaly_autoencoder'] = anomalies.any(axis=1)
    merged_df.loc[mode_data.index, 'std_anomaly_autoencoder'] = mode_data['std_anomaly_autoencoder']

for mode in merged_df['modes_hmm'].unique():
    mode_data = merged_df[merged_df['modes_hmm'] == mode].copy()
    mode_data_features = mode_data[used_features]

    #scaling the data so the mean is 0 and the std is 1
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(mode_data_features)
    scaled_df = pd.DataFrame(scaled_data, columns=used_features, index=mode_data.index)

    anomalies = np.abs(scaled_df) > 3 # 3*std as in papers and the std is already 1 after scaling it
    mode_data['std_anomaly_hmm'] = anomalies.any(axis=1)
    merged_df.loc[mode_data.index, 'std_anomaly_hmm'] = mode_data['std_anomaly_hmm']

merged_df.head()

In [None]:
from sklearn.metrics import confusion_matrix, recall_score, precision_score, accuracy_score, f1_score, matthews_corrcoef
import math

def get_metrics(y_pred, y_true):
    TP, FN, FP, TN = confusion_matrix(y_true, y_pred).ravel()
    TPR = recall_score(y_true, y_pred)
    TNR = TN / (TN + FP)
    G_MEAN = math.sqrt(TPR * TNR)
    PPV = precision_score(y_true, y_pred)
    ACC = accuracy_score(y_true, y_pred)
    F1 = f1_score(y_true, y_pred, average='weighted')
    MCC = matthews_corrcoef(y_true, y_pred)

    return {
        'RECALL': TPR,
        'SPECIFICITY': TNR,
        'PRECISION': PPV,
        'ACCURACY': ACC,
        'G_MEAN': G_MEAN,
        'F1': F1,
        'MCC': MCC
    }

In [None]:
metrics_dict = {}

for unique_mode in merged_df["modes_autoencoder"].unique():
    current_mode_df = merged_df[merged_df["modes_autoencoder"] == unique_mode]
    predicted_anomalies = current_mode_df['anomaly_autoencoder']

    current_mode_df['std_anomaly_autoencoder'] = current_mode_df['std_anomaly_autoencoder'].astype(bool)
    ground_truth = current_mode_df['std_anomaly_autoencoder']

    mode_metrics = get_metrics(predicted_anomalies, ground_truth)
    metrics_dict[unique_mode] = mode_metrics

metrics_df = pd.DataFrame.from_dict(metrics_dict, orient='index')
metrics_df

In [None]:
metrics_dict_hmm = {}

for unique_mode in merged_df["modes_hmm"].unique():
    current_mode_df = merged_df[merged_df["modes_hmm"] == unique_mode]
    predicted_anomalies = current_mode_df['anomaly_hmm']

    current_mode_df['std_anomaly_hmm'] = current_mode_df['std_anomaly_hmm'].astype(bool)
    ground_truth = current_mode_df['std_anomaly_hmm']

    mode_metrics = get_metrics(predicted_anomalies, ground_truth)
    metrics_dict_hmm[unique_mode] = mode_metrics

metrics_df_hmm = pd.DataFrame.from_dict(metrics_dict_hmm, orient='index')
metrics_df_hmm