In [19]:
import pandas as pd

energy_data = pd.read_csv("Extra//energy.csv")
energy_data['timestamp'] = pd.to_datetime(energy_data['timestamp'])
energy_data.set_index('timestamp', inplace=True)
resampled_energy = energy_data.resample("5s").mean()
resampled_energy = resampled_energy.fillna(method='ffill')
resampled_energy = resampled_energy.fillna(method='bfill')

env_data = pd.read_csv("Extra//environment.csv")
env_data['timestamp'] = pd.to_datetime(env_data['timestamp'])
env_data.set_index('timestamp', inplace=True)
resampled_env = env_data.resample("5s").mean()
resampled_env = resampled_env.fillna(method='ffill')
resampled_env = resampled_env.fillna(method='bfill')

resampled_energy['reactive_power'] = resampled_energy[["Reactive Power A average [kVAr]","Reactive Power B average [kVAr]","Reactive Power C average [kVAr]"]].mean(axis=1)
resampled_energy['thdi'] = resampled_energy[["THDI A average [%]","THDI B average [%]","THDI C average [%]"]].mean(axis=1)
resampled_energy['thdu'] = resampled_energy[["THDU A average [%]","THDU B average [%]","THDU C average [%]"]].mean(axis=1)
resampled_energy['current'] = resampled_energy[["Current A average [A]","Current B average [A]","Current C average [A]"]].mean(axis=1)
resampled_energy['voltage'] = resampled_energy[["Voltage A average [V]","Voltage B average [V]","Voltage C average [V]"]].mean(axis=1)
resampled_energy['power_factor'] = resampled_energy[["Power Factor A average","Power Factor B average","Power Factor C average"]].mean(axis=1)
useful_data = resampled_energy.join(resampled_env)
used_features = ["reactive_power","power_factor","current","voltage","thdu","thdi","Xacc","yaw","pitch"]
useful_data = useful_data[used_features]
useful_data = useful_data.dropna()

In [20]:
#load autoencoder anomalies
autoencoder_anomalies = pd.read_csv("std_anomalies/autoencoder_anomalies.csv", index_col="Date")
autoencoder_anomalies.index = pd.to_datetime(autoencoder_anomalies.index, format='%Y-%m-%d %H:%M:%S')
autoencoder_anomalies.rename(columns={"modes": "modes_autoencoder", "Anomaly": "anomaly_autoencoder"}, inplace=True)
autoencoder_anomalies["anomaly_autoencoder"] = autoencoder_anomalies["anomaly_autoencoder"].replace({"Yes": True, "No": False})

#load hmm anomalies
hmm_anomalies = pd.read_csv("std_anomalies/hmm_anomalies_3std.csv", index_col="Date")
hmm_anomalies.index = pd.to_datetime(hmm_anomalies.index, format='%Y-%m-%d %H:%M:%S')
hmm_anomalies.rename(columns={"modes": "modes_hmm", "Anomaly": "anomaly_hmm"}, inplace=True)
hmm_anomalies["anomaly_hmm"] = hmm_anomalies["anomaly_hmm"].replace({"Yes": True, "No": False})
hmm_anomalies.head()

# merge anomalies
merged_df=pd.merge(useful_data,autoencoder_anomalies, how='inner', left_index=True, right_index=True)
merged_df=pd.merge(merged_df,hmm_anomalies, how='inner', left_index=True, right_index=True)
merged_df.head()

Unnamed: 0_level_0,reactive_power,power_factor,current,voltage,thdu,thdi,Xacc,yaw,pitch,modes_autoencoder,anomaly_autoencoder,modes_hmm,anomaly_hmm
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-11-07 00:00:15,0.0,0.0,0.027917,238.993167,1.125,0.0,-379.1,-60.111149,1.337839,Offline,False,Offline,False
2022-11-07 00:00:20,0.0,0.0,0.028733,238.912533,1.16,0.0,-389.7,-60.045839,1.347028,Offline,False,Offline,False
2022-11-07 00:00:25,0.0,0.0,0.0274,238.9462,1.14,0.0,-393.8,-60.048941,1.3809,Offline,False,Offline,False
2022-11-07 00:00:30,0.0,0.0,0.0274,238.895333,1.153333,0.0,-383.6,-60.035862,1.370049,Offline,False,Offline,False
2022-11-07 00:00:35,0.0,0.0,0.028333,238.8774,1.146667,0.0,-377.8,-59.982022,1.336601,Offline,False,Offline,False


In [21]:
import numpy as np
from sklearn.preprocessing import StandardScaler

for mode in merged_df['modes_autoencoder'].unique():
    mode_data = merged_df[merged_df['modes_autoencoder'] == mode].copy()
    mode_data_features = mode_data[used_features]

    #scaling the data so the mean is 0 and the std is 1
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(mode_data_features)
    scaled_df = pd.DataFrame(scaled_data, columns=used_features, index=mode_data.index)

    anomalies = np.abs(scaled_df) > 3 #3*std as in papers and the std is already 1 after scaling it
    mode_data['std_anomaly_autoencoder'] = anomalies.any(axis=1)
    merged_df.loc[mode_data.index, 'std_anomaly_autoencoder'] = mode_data['std_anomaly_autoencoder']

for mode in merged_df['modes_hmm'].unique():
    mode_data = merged_df[merged_df['modes_hmm'] == mode].copy()
    mode_data_features = mode_data[used_features]

    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(mode_data_features)
    scaled_df = pd.DataFrame(scaled_data, columns=used_features, index=mode_data.index)

    anomalies = np.abs(scaled_df) > 3
    mode_data['std_anomaly_hmm'] = anomalies.any(axis=1)
    merged_df.loc[mode_data.index, 'std_anomaly_hmm'] = mode_data['std_anomaly_hmm']

merged_df.head()

Unnamed: 0_level_0,reactive_power,power_factor,current,voltage,thdu,thdi,Xacc,yaw,pitch,modes_autoencoder,anomaly_autoencoder,modes_hmm,anomaly_hmm,std_anomaly_autoencoder,std_anomaly_hmm
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2022-11-07 00:00:15,0.0,0.0,0.027917,238.993167,1.125,0.0,-379.1,-60.111149,1.337839,Offline,False,Offline,False,False,False
2022-11-07 00:00:20,0.0,0.0,0.028733,238.912533,1.16,0.0,-389.7,-60.045839,1.347028,Offline,False,Offline,False,False,False
2022-11-07 00:00:25,0.0,0.0,0.0274,238.9462,1.14,0.0,-393.8,-60.048941,1.3809,Offline,False,Offline,False,False,False
2022-11-07 00:00:30,0.0,0.0,0.0274,238.895333,1.153333,0.0,-383.6,-60.035862,1.370049,Offline,False,Offline,False,False,False
2022-11-07 00:00:35,0.0,0.0,0.028333,238.8774,1.146667,0.0,-377.8,-59.982022,1.336601,Offline,False,Offline,False,False,False


In [22]:
from sklearn.metrics import confusion_matrix, recall_score, precision_score, accuracy_score, f1_score, matthews_corrcoef
import math

def get_metrics(y_pred, y_true):
    TN, FP, FN, TP = confusion_matrix(y_true, y_pred).ravel()
    #TPR = recall_score(y_true, y_pred)
    TNR = TN / (TN + FP)
    TPR = TP / (TP + FN)
    G_MEAN = math.sqrt(TPR * TNR)
    PPV = precision_score(y_true, y_pred)
    ACC = accuracy_score(y_true, y_pred)
    F1 = f1_score(y_true, y_pred, average='weighted')
    MCC = matthews_corrcoef(y_true, y_pred)
    FDR = FP/(TP+FP)
    FAR = 1 - TNR

    return {
        'RECALL': TPR,
        'SPECIFICITY': TNR,
        'PRECISION': PPV,
        'ACCURACY': ACC,
        'G_MEAN': G_MEAN,
        'F1': F1,
        'Matthews Correlation Coefficient': MCC,
        'False Discovery Rate': FDR,
        'False Alarm Rate': FAR
    }

In [23]:
metrics_dict = {}

for unique_mode in merged_df["modes_autoencoder"].unique():
    current_mode_df = merged_df[merged_df["modes_autoencoder"] == unique_mode]
    predicted_anomalies = current_mode_df['anomaly_autoencoder']

    current_mode_df['std_anomaly_autoencoder'] = current_mode_df['std_anomaly_autoencoder'].astype(bool)
    ground_truth = current_mode_df['std_anomaly_autoencoder']

    mode_metrics = get_metrics(predicted_anomalies, ground_truth)
    metrics_dict[unique_mode] = mode_metrics

metrics_df = pd.DataFrame.from_dict(metrics_dict, orient='index')
metrics_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  current_mode_df['std_anomaly_autoencoder'] = current_mode_df['std_anomaly_autoencoder'].astype(bool)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  current_mode_df['std_anomaly_autoencoder'] = current_mode_df['std_anomaly_autoencoder'].astype(bool)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cur

Unnamed: 0,RECALL,SPECIFICITY,PRECISION,ACCURACY,G_MEAN,F1,Matthews Correlation Coefficient,False Discovery Rate,False Alarm Rate
Offline,0.102687,0.980818,0.192363,0.943412,0.31736,0.935103,0.113125,0.807637,0.019182
Online,0.061931,0.982208,0.280605,0.889474,0.246635,0.856506,0.090102,0.719395,0.017792
Mode1,0.241185,0.998251,0.924324,0.936654,0.490676,0.919093,0.454107,0.075676,0.001749
InMotion,0.26498,0.99464,0.771318,0.94804,0.513381,0.935915,0.433346,0.228682,0.00536
Mode2,0.353448,0.99805,0.911111,0.963544,0.593935,0.955816,0.554825,0.088889,0.00195


In [24]:
metrics_dict_hmm = {}

for unique_mode in merged_df["modes_hmm"].unique():
    current_mode_df = merged_df[merged_df["modes_hmm"] == unique_mode]
    predicted_anomalies = current_mode_df['anomaly_hmm']

    current_mode_df['std_anomaly_hmm'] = current_mode_df['std_anomaly_hmm'].astype(bool)
    ground_truth = current_mode_df['std_anomaly_hmm']

    mode_metrics = get_metrics(predicted_anomalies, ground_truth)
    metrics_dict_hmm[unique_mode] = mode_metrics

metrics_df_hmm = pd.DataFrame.from_dict(metrics_dict_hmm, orient='index')
metrics_df_hmm

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  current_mode_df['std_anomaly_hmm'] = current_mode_df['std_anomaly_hmm'].astype(bool)
  _warn_prf(average, modifier, msg_start, len(result))
  FDR = FP/(TP+FP)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  current_mode_df['std_anomaly_hmm'] = current_mode_df['std_anomaly_hmm'].astype(bool)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing

Unnamed: 0,RECALL,SPECIFICITY,PRECISION,ACCURACY,G_MEAN,F1,Matthews Correlation Coefficient,False Discovery Rate,False Alarm Rate
Offline,0.0,1.0,0.0,0.957403,0.0,0.936567,0.0,,0.0
Online,0.008197,1.0,1.0,0.900058,0.090536,0.85353,0.085889,0.0,0.0
Mode1,0.57969,0.995253,0.915367,0.961441,0.759564,0.957421,0.711023,0.084633,0.004747
InMotion,0.006658,1.0,1.0,0.936559,0.081595,0.906298,0.078964,0.0,0.0
Mode2,0.568966,0.993662,0.835443,0.970928,0.751904,0.968299,0.6757,0.164557,0.006338
