# PERFORMANCE METRICS 

Compute the performance metrics of the EIF and EIF+ models (for the moment let's consider just these two). 

Performance Metrics computed: 

1. The typical classification metrics that we can obtain with sklearn.metrics.classification_report

2. The Average Precision -> this is obtained with sklearn.metrics.average_precision_score but we can still use the mean value obtained in the Average_Precision.ipynb notebook (the ones used to create the Violin Plot)

3. The ROC AUC Score -> obtainable with the sklearn.metrics.auc_roc_score 

In [1]:
import sys
import numpy as np
from datetime import datetime
import pandas as pd
import argparse
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import sklearn
sys.path.append('../experiments')
from append_dir import append_dirname
append_dirname("ExIFFI")
sys.path.append('../src')
from src.utils import *
from src.performance_report_functions import *
#from utils.feature_selection import *
from plot import *
from simulation_setup import *
from models import *
from models.forests import *
from pyod.models.dif import DIF
from pyod.models.auto_encoder import AutoEncoder
from sklearn.metrics import classification_report,average_precision_score,roc_auc_score
import seaborn as sns
sns.set()

import os
import pickle 
from glob import glob

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

2024-02-12 16:10:17.392151: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Test paths

In [2]:
path=os.getcwd()
path = os.path.dirname(path)
path_real = os.path.join(path, "data", "real")
path_syn = os.path.join(path, "data", "syn")
mat_files_real = glob(os.path.join(path_real, "*.mat"))
mat_file_names_real = {os.path.basename(x).split(".")[0]: x for x in mat_files_real}
mat_files_syn = glob(os.path.join(path_syn, "*.pkl"))
mat_file_names_syn = {os.path.basename(x).split(".")[0]: x for x in mat_files_syn}
csv_files_real = glob(os.path.join(path_real, "*.csv"))
csv_file_names_real = {os.path.basename(x).split(".")[0]: x for x in csv_files_real}
dataset_names = list(mat_file_names_real.keys()) + list(mat_file_names_syn) + list(csv_file_names_real.keys())
mat_file_names_real.update(mat_file_names_syn)
mat_file_names_real.update(csv_file_names_real)
dataset_paths = mat_file_names_real.copy()

In [3]:
datasets=['wine','shuttle']
path_list=[dataset_paths[name] for name in datasets]
model=IsolationForest()
df_perf=collect_performance_df(datasets,path_list,model=model)
df_perf


Loading wine dataset from /home/davidefrizzo/Desktop/PHD/ExIFFI/data/real/wine.mat
wine 

[number of samples = 129]
[percentage outliers = 0.07751937984496124]
[number features = 13]
[number outliers = 10]


  d = np.dot(x,normals[node_id])


Loading shuttle dataset from /home/davidefrizzo/Desktop/PHD/ExIFFI/data/real/shuttle.mat
shuttle 

[number of samples = 49097]
[percentage outliers = 0.0715114976475141]
[number features = 9]
[number outliers = 3511]


Unnamed: 0,Dataset,Precision,Recall,f1 score,Accuracy,Balanced Accuracy,Average Precision,ROC AUC Score
0,wine,0.5,0.6,0.545455,0.922481,0.77479,0.331008,0.77479
1,shuttle,0.702383,0.982056,0.819002,0.968959,0.975004,0.691063,0.975004


In [5]:
df=pd.read_pickle(os.path.join(os.getcwd(),'../','results','perf_results','12-02-2024_16-25-16_test_performance_wine_shuttle.pkl'))
df

Unnamed: 0,Dataset,Precision,Recall,f1 score,Accuracy,Balanced Accuracy,Average Precision,ROC AUC Score
0,wine,0.5,0.6,0.545455,0.922481,0.77479,0.331008,0.77479
1,shuttle,0.702791,0.982626,0.819477,0.969041,0.97531,0.691823,0.97531


# Synthetic Dataset

## Load pkl files

In [2]:
os.chdir('../data/syn')
os.getcwd()

'/home/davidefrizzo/Desktop/PHD/ExIFFI/data/syn'

## Training Set

In [3]:
file_to_read = open(os.getcwd()+'/ball_6_dim.pkl', "rb")
loaded_dictionary = pickle.load(file_to_read)
X_train=loaded_dictionary['X_train']

### Test Set

In [4]:
file_to_read = open(os.path.join(os.getcwd(),'anomalies.pkl'), "rb")
loaded_dictionary = pickle.load(file_to_read)
X_xaxis,X_yaxis,X_bisect,X_bisect_3d,X_bisect_6d=loaded_dictionary['X_xaxis'],loaded_dictionary['X_yaxis'],loaded_dictionary['X_bisec'],loaded_dictionary['X_bisec_3d'],loaded_dictionary['X_bisec_6d']

#### Test `performance` method

In [5]:
os.chdir('../real')
os.getcwd()

'/home/davidefrizzo/Desktop/PHD/ExIFFI/data/real'

In [6]:
X_train,X_test,X,y=load_preprocess('StandardScaler','wine',os.path.join(os.getcwd(),'wine.mat'))

Loading wine dataset from /home/davidefrizzo/Desktop/PHD/ExIFFI/data/real/wine.mat
wine 

[number of samples = 129]
[percentage outliers = 0.07751937984496124]
[number features = 13]
[number outliers = 10]


In [5]:
model=IsolationForest(n_estimators=100)
model.fit(X_train)
X_test=np.r_[X_train,X_xaxis]
_,X_test,_,y=pre_process('StandardScaler',X_train,X_test)
d,y_pred=performance(X_test,y,model)

  d = np.dot(x,normals[node_id])


In [7]:
y_pred

array([False, False, False, ..., False, False,  True])

#### Test `get_performance_dict` method

It works both with PyOD model and with models from `forests.py` 

In [5]:
model=IsolationForest(n_estimators=100)
model.fit(X_train)
X_test=np.r_[X_train,X_xaxis]
_,X_test,_,y=pre_process('StandardScaler',X_train,X_test)
mat=get_performance_dict('Xxaxis',X_train,X_test,y)
mat

array([[0.52380952, 1.        , 0.6875    , 0.52380952, 0.5       ,
        0.52380952, 0.5       ],
       [0.52380952, 1.        , 0.6875    , 0.52380952, 0.5       ,
        0.52380952, 0.5       ],
       [0.52380952, 1.        , 0.6875    , 0.52380952, 0.5       ,
        0.52380952, 0.5       ],
       [0.52380952, 1.        , 0.6875    , 0.52380952, 0.5       ,
        0.52380952, 0.5       ],
       [0.52380952, 1.        , 0.6875    , 0.52380952, 0.5       ,
        0.52380952, 0.5       ],
       [0.52380952, 1.        , 0.6875    , 0.52380952, 0.5       ,
        0.52380952, 0.5       ],
       [0.52380952, 1.        , 0.6875    , 0.52380952, 0.5       ,
        0.52380952, 0.5       ],
       [0.52380952, 1.        , 0.6875    , 0.52380952, 0.5       ,
        0.52380952, 0.5       ],
       [0.52380952, 1.        , 0.6875    , 0.52380952, 0.5       ,
        0.52380952, 0.5       ],
       [0.52380952, 1.        , 0.6875    , 0.52380952, 0.5       ,
        0.52380952, 0.5

## Performance Report Table Computation Function

The following functions can be found in the Python Script called performance_report_functions.py

In [18]:
def if_predict(score,p):
    y=score>np.sort(score)[::-1][int(p*len(score))]
    return y.astype(int)

In [19]:
def performance_if(y,score):
    p=sum(y)/len(y)
    y_pred=if_predict(score,p)
    d={}
    d['Precision']=sklearn.metrics.precision_score(y,y_pred) 
    d['Recall']=sklearn.metrics.recall_score(y,y_pred) 
    d['f1 score']=sklearn.metrics.f1_score(y,y_pred) 
    d['Accuracy']=sklearn.metrics.accuracy_score(y,y_pred) 
    d['Balanced Accuracy']=sklearn.metrics.balanced_accuracy_score(y,y_pred) 
    d['Average Precision']=sklearn.metrics.average_precision_score(y,y_pred) 
    d['ROC AUC Score']=sklearn.metrics.roc_auc_score(y,y_pred) 
    return d

In [20]:
def performance_eif(y,score,X_test,model):
    p=sum(y)/len(y)
    y_pred=model._predict(X_test,p).astype(int)
    d={}
    d['Precision']=sklearn.metrics.precision_score(y,y_pred) 
    d['Recall']=sklearn.metrics.recall_score(y,y_pred)
    d['f1 score']=sklearn.metrics.f1_score(y,y_pred)
    d['Accuracy']=sklearn.metrics.accuracy_score(y,y_pred)
    d['Balanced Accuracy']=sklearn.metrics.balanced_accuracy_score(y,y_pred)
    d['Average Precision']=sklearn.metrics.average_precision_score(y,score)
    d['ROC AUC Score']=sklearn.metrics.roc_auc_score(y,score)
    return d

In [21]:
def evaluate_performance(X_train,X_test,y):
    
    EIF=ExtendedIsolationForest(n_estimators=300,plus=0)
    EIF.fit(X_train)

    EIF_plus=ExtendedIsolationForest(n_estimators=300,plus=1)
    EIF_plus.fit(X_train)

    IF=IsolationForest(n_estimators=300,max_samples=min(len(X_train),256))
    IF.fit(X_train)

    score_if=-1*IF.score_samples(X_test)+0.5
    score_eif=EIF.predict(X_test)
    score_eif_plus=EIF_plus.predict(X_test)

    metrics_if=performance_if(y,score_if)
    metrics_eif=performance_eif(y,score_eif,X_test,EIF)
    metrics_eif_plus=performance_eif(y,score_eif_plus,X_test,EIF_plus)

    return metrics_if,metrics_eif,metrics_eif_plus
    

In [22]:
def collect_performance(metrics_dict,name,X_train,X_test,y):
    metrics_dict[name]={}
    metrics_dict[name]["IF"]={}
    metrics_dict[name]["EIF"]={}
    metrics_dict[name]["EIF_plus"]={}
    metric_names=['Precision', 'Recall', 'f1 score', 'Accuracy', 'Balanced Accuracy', 'Average Precision', 'ROC AUC Score']

    for metric_name in metric_names:
        metrics_dict[name]['IF'][metric_name]=[]
        metrics_dict[name]['EIF'][metric_name]=[]
        metrics_dict[name]['EIF_plus'][metric_name]=[]


    for i in tqdm(range(10)):
        metrics_if,metrics_eif,metrics_eif_plus=evaluate_performance(X_train,X_test,y)

        for metric_name in metric_names:
            metrics_dict[name]['IF'][metric_name].append(metrics_if[metric_name])
            metrics_dict[name]['EIF'][metric_name].append(metrics_eif[metric_name])
            metrics_dict[name]['EIF_plus'][metric_name].append(metrics_eif_plus[metric_name])

    for metric_name in metric_names:
        metrics_dict[name]['IF'][metric_name+'_avg']=np.mean(np.array(metrics_dict[name]['IF'][metric_name]))
        metrics_dict[name]['EIF'][metric_name+'_avg']=np.mean(np.array(metrics_dict[name]['EIF'][metric_name]))
        metrics_dict[name]['EIF_plus'][metric_name+'_avg']=np.mean(np.array(metrics_dict[name]['EIF_plus'][metric_name]))
     
    
    return metrics_dict

# AUTOMATIC PERFORMANCE REPORT COMPUTATION FUNCTION

## Real-World Datasets

In [23]:
def performance_report(name,metrics_dict,metrics_dict_split):
    
    os.chdir('c:\\Users\\lemeda98\\Desktop\\PHD Information Engineering\\ExIFFI\\ExIFFI\\data')
    if name=='diabetes' or name=='moodify':
        X,y=csv_dataset(name,os.getcwd()+'\\')
    else:
        X,y=dataset(name,os.getcwd()+'\\')

    X,y=downsample(X,y)
    X_train,X_test=partition_data(X,y)
    scaler=StandardScaler()
    X_train=scaler.fit_transform(X_train)
    X_test=scaler.transform(X_test)
    y_train=np.zeros(X_train.shape[0])
    y_test=np.ones(X_test.shape[0])
    y=np.concatenate([y_train,y_test])
    X_test=np.r_[X_train,X_test]
    scaler2=StandardScaler()
    X=scaler2.fit_transform(X)

    #Compute Performance Report Table without split
    metrics_dict=collect_performance(metrics_dict,name,X,X,y)

    #Compute Performance Report Table with split
    metrics_dict_split=collect_performance(metrics_dict_split,name,X_train,X_test,y)

    print('--------------------------------------------------------')
    print(name)
    print()
    print('f1 score and average precision no train test split ')
    print(f'IF -> f1 score: {metrics_dict[name]["IF"]["f1 score"]}\naverage precision: {metrics_dict[name]["IF"]["Average Precision"]}')
    print(f'EIF -> f1 score: {metrics_dict[name]["EIF"]["f1 score"]}\naverage precision: {metrics_dict[name]["EIF"]["Average Precision"]}')
    print(f'EIF_plus -> f1 score: {metrics_dict[name]["EIF_plus"]["f1 score"]}\naverage precision: {metrics_dict[name]["EIF_plus"]["Average Precision"]}')
    print(' ')
    print('f1 score and average precision with train test split ')
    print(f'IF -> f1 score: {metrics_dict_split[name]["IF"]["f1 score"]}\naverage precision: {metrics_dict_split[name]["IF"]["Average Precision"]}')
    print(f'EIF -> f1 score: {metrics_dict_split[name]["EIF"]["f1 score"]}\naverage precision: {metrics_dict_split[name]["EIF"]["Average Precision"]}')
    print(f'EIF_plus -> f1 score: {metrics_dict_split[name]["EIF_plus"]["f1 score"]}\naverage precision: {metrics_dict_split[name]["EIF_plus"]["Average Precision"]}')
    print('-----------------------------------------------------------')

    return metrics_dict,metrics_dict_split

## Synthetic Datasets

In [None]:
def performance_report_synt(name,X_train,X_test,metrics_dict,metrics_dict_split):
    
    X=np.r_[X_train,X_test]
    scaler=StandardScaler()
    X_train=scaler.fit_transform(X_train)
    X_test=scaler.transform(X_test)
    y_train=np.zeros(X_train.shape[0])
    y_test=np.ones(X_test.shape[0])
    y=np.concatenate([y_train,y_test])
    X_test=np.r_[X_train,X_test]
    scaler2=StandardScaler()
    X=scaler2.fit_transform(X)

    #Compute Performance Report Table without split
    metrics_dict=collect_performance(metrics_dict,name,X,X,y)

    #Compute Performance Report Table with split
    metrics_dict_split=collect_performance(metrics_dict_split,name,X_train,X_test,y)

    print('--------------------------------------------------------')
    print(name)
    print()
    print('f1 score and average precision no train test split ')
    print(f'IF -> f1 score: {metrics_dict[name]["IF"]["f1 score_avg"]}\naverage precision: {metrics_dict[name]["IF"]["Average Precision_avg"]}')
    print(f'EIF -> f1 score: {metrics_dict[name]["EIF"]["f1 score_avg"]}\naverage precision: {metrics_dict[name]["EIF"]["Average Precision_avg"]}')
    print(f'EIF_plus -> f1 score: {metrics_dict[name]["EIF_plus"]["f1 score_avg"]}\naverage precision: {metrics_dict[name]["EIF_plus"]["Average Precision_avg"]}')
    print(' ')
    print('f1 score and average precision with train test split ')
    print(f'IF -> f1 score: {metrics_dict_split[name]["IF"]["f1 score_avg"]}\naverage precision: {metrics_dict_split[name]["IF"]["Average Precision_avg"]}')
    print(f'EIF -> f1 score: {metrics_dict_split[name]["EIF"]["f1 score_avg"]}\naverage precision: {metrics_dict_split[name]["EIF"]["Average Precision_avg"]}')
    print(f'EIF_plus -> f1 score: {metrics_dict_split[name]["EIF_plus"]["f1 score_avg"]}\naverage precision: {metrics_dict_split[name]["EIF_plus"]["Average Precision_avg"]}')
    print('-----------------------------------------------------------')

    return metrics_dict,metrics_dict_split
    

In [68]:
metrics_dict={}
metrics_dict_split={}

Re load the X_train dataset for each different synthetic dataset to avoid having it scaled multiple times

In [69]:
file_to_read = open(os.getcwd()+'\\ball_6_dim.pkl', "rb")
loaded_dictionary = pickle.load(file_to_read)

Synthetic Datasets

In [None]:
metrics_dict,metrics_dict_split=performance_report_synt('Xaxis',X_train,X_xaxis,metrics_dict,metrics_dict_split)
X_train=loaded_dictionary['X_train']
metrics_dict,metrics_dict_split=performance_report_synt('Yaxis',X_train,X_yaxis,metrics_dict,metrics_dict_split)
X_train=loaded_dictionary['X_train']
metrics_dict,metrics_dict_split=performance_report_synt('Bisect',X_train,X_bisect,metrics_dict,metrics_dict_split)
X_train=loaded_dictionary['X_train']
metrics_dict,metrics_dict_split=performance_report_synt('Bisect_3d',X_train,X_bisect_3d,metrics_dict,metrics_dict_split)
X_train=loaded_dictionary['X_train']
metrics_dict,metrics_dict_split=performance_report_synt('Bisect_6d',X_train,X_bisect_6d,metrics_dict,metrics_dict_split)

Real World Datasets 

In [None]:
dataset_names=['wine','annthyroid','breastw','shuttle','pima','cardio','glass',
             'ionosphere','pendigits','diabetes','moodify']
for name in dataset_names:
    metrics_dict,metrics_dict_split=performance_report(name,metrics_dict,metrics_dict_split)

## Save in pkl file

In [9]:
os.chdir('c:\\Users\\lemeda98\\Desktop\\PHD Information Engineering\\ExIFFI\\ExIFFI\\results\\davide\\Performance Report')
path=os.getcwd()
path

'c:\\Users\\lemeda98\\Desktop\\PHD Information Engineering\\ExIFFI\\ExIFFI\\results\\davide\\Performance Report'

In [97]:
path = path + '\\Performance_Report_final_synt.pkl'
with open(path, 'wb') as f:
    pickle.dump(metrics_dict_split,f)

### Read from pkl file

In [10]:
os.chdir('c:\\Users\\lemeda98\\Desktop\\PHD Information Engineering\\ExIFFI\\ExIFFI')
path = os.getcwd() + '\\results\\davide\\Performance Report\\Performance_Report_final_synt.pkl'
with open(path, 'rb') as f:
    Performance_report_synt = pickle.load(f)

# Test for filename

In [12]:
data=pd.read_csv(dataset_paths['pima'])
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [13]:
data=pd.read_csv(dataset_paths['diabetes'])
data.head()

Unnamed: 0.1,Unnamed: 0,age,bmi,HbA1c_level,blood_glucose_level,Target
0,0,80.0,25.19,6.6,140,0
1,1,54.0,27.32,6.6,80,0
2,2,28.0,27.32,5.7,158,0
3,3,36.0,23.45,5.0,155,0
4,4,76.0,20.14,4.8,155,0


In [14]:
data=pd.read_csv(dataset_paths['moodify'])
data.head()

Unnamed: 0.1,Unnamed: 0,duration (ms),danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,spec_rate,Target
0,0,195000.0,0.611,0.614,-8.815,0.0672,0.0169,0.000794,0.753,0.52,128.05,3.446154e-07,0
1,1,194641.0,0.638,0.781,-6.848,0.0285,0.0118,0.00953,0.349,0.25,122.985,1.464234e-07,0
2,2,217573.0,0.56,0.81,-8.029,0.0872,0.0071,8e-06,0.241,0.247,170.044,4.00785e-07,0
3,3,443478.0,0.525,0.699,-4.571,0.0353,0.0178,8.8e-05,0.0888,0.199,92.011,7.959809e-08,0
4,4,225862.0,0.367,0.771,-5.863,0.106,0.365,1e-06,0.0965,0.163,115.917,4.693131e-07,0
