In [None]:
import pandas as pd
import glob
import os

Experiment 1

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score,cohen_kappa_score
from tqdm import tqdm


columns=['difference_norm_readings','difference_norm_cycle','difference_norm_trend']

models=['AE',
 'VAE',
 'BETA',
 'VAE_LinNF',
 'VAE_IAF',
 'DBVAE',
 'IWVAE',
 'MIWAE',
 'CIWAE',
 'WAE',
 'INFOVAE',
 'VAMP',
 'VQVAE',
 'HVAE',
 'RAE_GP',
 'RHVAE']
latent_dim=16
experiment="experiment_1"
building_df=pd.DataFrame()
buildings=["118","246","1245","1311","1141"]
pbar=tqdm(total=len(buildings))
start=1
for building in buildings:
    
    for model in models:
    
        #load all .csv files in experiment_1/csv/ into a single dataframe
        #make sure that the path is correct, change latent_dim_16 to latent_dim_8 if you want to use the 8 dimensional latent space results
        df = pd.concat([pd.read_csv(f) for f in glob.glob('experiment_1/csv/latent_dim_{}/{}/building_{}*.csv'.format(latent_dim,model,building))], ignore_index = True)
        df_adjusted = df[df['filled'] == 0]
        
        if start==1:
            
            if start==1:
                column='difference_norm_readings'
                #error=df_adjusted[column]
                seuil=df_adjusted[column].mean()+3*df_adjusted[column].std()
                df_adjusted['anomaly_{}'.format(column)] = df_adjusted[column] > seuil
                df_adjusted['anomaly_{}'.format(column)] = df_adjusted['anomaly_{}'.format(column)].astype(int)
                
                x=df_adjusted['anomaly_{}'.format(column)]
                y=df_adjusted["anomalies"]
                precision=precision_score(y, x)
                recall=recall_score(y, x)
                f1=f1_score(y, x)
                accuracy=accuracy_score(y, x)
                kappa=cohen_kappa_score(y, x)
                #add a new line to the dataframe building_df that contains the metrics
                building_df=building_df.append({'building':building,'model':model,'precision':precision,'recall':recall,'f1':f1,'accuracy':accuracy,'kappa':kappa},ignore_index=True)
            
    pbar.update(1)
pbar.close()
if not os.path.exists('experiment_1/result'):
    os.makedirs('experiment_1/result')
building_df.to_csv('experiment_1/result/result_latent_{}.csv'.format(latent_dim),index=False)
                

In [None]:
#result visualization example
df=pd.read_csv('experiment_1/result/result_latent_{}.csv'.format(latent_dim))
df=df[df['building']==118] #building id
df=df[['model','precision','recall','f1','kappa']] #metrics
df=df[(df['model']=='AE')|(df['model']=='VAE')|(df['model']=='IWVAE')|(df['model']=='WAE')|(df['model']=='VQVAE')|(df['model']=='HVAE')|(df['model']=='RAE_GP')] #models
df.reset_index(inplace=True,drop=True)
df

Experiment 2

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score,cohen_kappa_score
from tqdm import tqdm
import os

columns=['difference_norm_readings','difference_norm_cycle','difference_norm_trend']
models=['AE',
 'VAE',
 'BETA',
 'VAE_LinNF',
 'VAE_IAF',
 'DBVAE',
 'IWVAE',
 'MIWAE',
 'CIWAE',
 'WAE',
 'INFOVAE',
 'VAMP',
 'SVAE',
 'PVAE',
 'VQVAE',
 'HVAE',
 'RAE_GP',
 'RHVAE']
latent_dim=16
experiment="experiment_2"

buildings=["118","246","1245","1311","1141"]
pbar=tqdm(total=len(buildings))
start=1
columns_cluster=["cycle","trend"]
for column_cluster in columns_cluster:
    building_df=pd.DataFrame()
    for building in buildings:
        
        for model in models:
        
            #load all .csv files in experiment_1/csv/ into a single dataframe
            df = pd.concat([pd.read_csv(f) for f in glob.glob('{}/csv/{}/latent_dim_{}/{}/building_{}*.csv'.format(experiment,column_cluster,latent_dim,model,building))], ignore_index = True)
            df_adjusted = df[df['filled'] == 0]
            
            if start==1:
                
                if start==1:
                    column='difference_norm_readings'
                    seuil=df_adjusted[column].mean()+3*df_adjusted[column].std()
                    df_adjusted['anomaly_{}'.format(column)] = df_adjusted[column] > seuil
                    df_adjusted['anomaly_{}'.format(column)] = df_adjusted['anomaly_{}'.format(column)].astype(int)
                    
                    x=df_adjusted['anomaly_{}'.format(column)]
                    y=df_adjusted["anomalies"]
                    precision=precision_score(y, x)
                    recall=recall_score(y, x)
                    f1=f1_score(y, x)
                    accuracy=accuracy_score(y, x)
                    kappa=cohen_kappa_score(y, x)
                    #add a new line to the dataframe building_df that contains the metrics
                    building_df=building_df.append({'building':building,'model':model,'precision':precision,'recall':recall,'f1':f1,'accuracy':accuracy,'kappa':kappa},ignore_index=True)
                
        pbar.update(1)
    pbar.close()
    if not os.path.exists('experiment_2/result'):
        os.makedirs('experiment_2/result')
    if not(os.path.exists('{}/result/{}'.format(experiment,column_cluster))):
        os.makedirs('{}/result/{}'.format(experiment,column_cluster))
    building_df.to_csv('{}/result/{}/result_latent_{}.csv'.format(experiment,column_cluster,latent_dim),index=False)

In [None]:
#import one class classification metrics
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score,cohen_kappa_score
#set the threshold for the anomaly detection as an array that has all values between 0.1 and 0.4
thresholds = [i/100 for i in range(10,50)]
precision = []
recall = []
f1 = []
accuracy = []
kappa = []
for seuil in thresholds:
    df_adjusted["label"]=df_adjusted["difference_norm_readings"].apply(lambda x: 1 if x>seuil else 0)
    x=df_adjusted["label"]
    y=df_adjusted["anomalies"]

    precision.append(precision_score(x,y,average="macro"))
    recall.append(recall_score(x,y,average="macro"))    
    f1.append(f1_score(x,y,average="macro"))
    accuracy.append(accuracy_score(x,y))
    kappa.append(cohen_kappa_score(x,y))

In [None]:
seuil=0.188
df_adjusted["label"]=df_adjusted["difference_norm_readings"].apply(lambda x: 1 if x>seuil else 0)
x=df_adjusted["label"]
y=df_adjusted["anomalies"]

print("Precision Score: ",precision_score(x,y,average="macro"))
print("Recall Score: ",recall_score(x,y,average="macro"))
print("F1 Score: ",f1_score(x,y,average="macro"))
print("Accuracy Score: ",accuracy_score(x,y))
print("Cohen Kappa Score: ",cohen_kappa_score(x,y))

In [None]:
import matplotlib.pyplot as plt
import numpy as np
metrics=["precision","recall","f1","accuracy","kappa"]
metrics_eval={"precision":precision,"recall":recall,"f1":f1,"accuracy":accuracy,"kappa":kappa}

for metric in metrics:
    array=np.array(metrics_eval[metric])
    plt.plot(thresholds,array,label=metric)
    plt.legend()
    plt.xlabel("threshold")
    plt.ylabel(metric)
    plt.title("One class classification metrics ({}) for the anomaly detection".format(metric)  )
    plt.show()

In [None]:
df_anomalous=df_adjusted[df_adjusted["anomalies"]==1]
x=df_anomalous["label"]
y=df_anomalous["anomalies"]
print("Precision Score: ",precision_score(x,y,average="macro"))
print("Recall Score: ",recall_score(x,y,average="macro"))
print("F1 Score: ",f1_score(x,y,average="macro"))
print("Accuracy Score: ",accuracy_score(x,y))