# PERFORMANCE METRICS 

Compute the performance metrics of the EIF and EIF+ models (for the moment let's consider just these two). 

Performance Metrics computed: 

1. The typical classification metrics that we can obtain with sklearn.metrics.classification_report

2. The Average Precision -> this is obtained with sklearn.metrics.average_precision_score but we can still use the mean value obtained in the Average_Precision.ipynb notebook (the ones used to create the Violin Plot)

3. The ROC AUC Score -> obtainable with the sklearn.metrics.auc_roc_score 

In [1]:
import sys
import numpy as np
from datetime import datetime
import pandas as pd
import argparse
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import sklearn
sys.path.append('../')
from utils import *
#from utils.feature_selection import *
from plot import *
from simulation_setup import *
from models import *
from models.forests import *
from models.Extended_IF import *
from models.Extended_DIFFI import *
from models.Extended_DIFFI_original import *
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,average_precision_score,roc_auc_score
import seaborn as sns
sns.set()

import os
import pickle 

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

# Synthetic Dataset

## Load pkl files

In [14]:
os.chdir('c:\\Users\\lemeda98\\Desktop\\PHD Information Engineering\\ExIFFI\\ExIFFI\\data\\diffi_data')
os.getcwd()

'c:\\Users\\lemeda98\\Desktop\\PHD Information Engineering\\ExIFFI\\ExIFFI\\data\\diffi_data'

In [16]:
file_to_read = open(os.getcwd()+'\\anomalies.pkl', "rb")
loaded_dictionary = pickle.load(file_to_read)
X_xaxis,X_yaxis,X_bisect,X_bisect_3d,X_bisect_6d=loaded_dictionary['X_xaxis'],loaded_dictionary['X_yaxis'],loaded_dictionary['X_bisec'],loaded_dictionary['X_bisec_3d'],loaded_dictionary['X_bisec_6d']

## Performance Report Table Computation Function

The following functions can be found in the Python Script called performance_report_functions.py

In [18]:
def if_predict(score,p):
    y=score>np.sort(score)[::-1][int(p*len(score))]
    return y.astype(int)

In [19]:
def performance_if(y,score):
    p=sum(y)/len(y)
    y_pred=if_predict(score,p)
    d={}
    d['Precision']=sklearn.metrics.precision_score(y,y_pred) 
    d['Recall']=sklearn.metrics.recall_score(y,y_pred) 
    d['f1 score']=sklearn.metrics.f1_score(y,y_pred) 
    d['Accuracy']=sklearn.metrics.accuracy_score(y,y_pred) 
    d['Balanced Accuracy']=sklearn.metrics.balanced_accuracy_score(y,y_pred) 
    d['Average Precision']=sklearn.metrics.average_precision_score(y,y_pred) 
    d['ROC AUC Score']=sklearn.metrics.roc_auc_score(y,y_pred) 
    return d

In [20]:
def performance_eif(y,score,X_test,model):
    p=sum(y)/len(y)
    y_pred=model._predict(X_test,p).astype(int)
    d={}
    d['Precision']=sklearn.metrics.precision_score(y,y_pred) 
    d['Recall']=sklearn.metrics.recall_score(y,y_pred)
    d['f1 score']=sklearn.metrics.f1_score(y,y_pred)
    d['Accuracy']=sklearn.metrics.accuracy_score(y,y_pred)
    d['Balanced Accuracy']=sklearn.metrics.balanced_accuracy_score(y,y_pred)
    d['Average Precision']=sklearn.metrics.average_precision_score(y,score)
    d['ROC AUC Score']=sklearn.metrics.roc_auc_score(y,score)
    return d

In [21]:
def evaluate_performance(X_train,X_test,y):
    
    EIF=ExtendedIsolationForest(n_estimators=300,plus=0)
    EIF.fit(X_train)

    EIF_plus=ExtendedIsolationForest(n_estimators=300,plus=1)
    EIF_plus.fit(X_train)

    IF=IsolationForest(n_estimators=300,max_samples=min(len(X_train),256))
    IF.fit(X_train)

    score_if=-1*IF.score_samples(X_test)+0.5
    score_eif=EIF.predict(X_test)
    score_eif_plus=EIF_plus.predict(X_test)

    metrics_if=performance_if(y,score_if)
    metrics_eif=performance_eif(y,score_eif,X_test,EIF)
    metrics_eif_plus=performance_eif(y,score_eif_plus,X_test,EIF_plus)

    return metrics_if,metrics_eif,metrics_eif_plus
    

In [22]:
def collect_performance(metrics_dict,name,X_train,X_test,y):
    metrics_dict[name]={}
    metrics_dict[name]["IF"]={}
    metrics_dict[name]["EIF"]={}
    metrics_dict[name]["EIF_plus"]={}
    metric_names=['Precision', 'Recall', 'f1 score', 'Accuracy', 'Balanced Accuracy', 'Average Precision', 'ROC AUC Score']

    for metric_name in metric_names:
        metrics_dict[name]['IF'][metric_name]=[]
        metrics_dict[name]['EIF'][metric_name]=[]
        metrics_dict[name]['EIF_plus'][metric_name]=[]


    for i in tqdm(range(10)):
        metrics_if,metrics_eif,metrics_eif_plus=evaluate_performance(X_train,X_test,y)

        for metric_name in metric_names:
            metrics_dict[name]['IF'][metric_name].append(metrics_if[metric_name])
            metrics_dict[name]['EIF'][metric_name].append(metrics_eif[metric_name])
            metrics_dict[name]['EIF_plus'][metric_name].append(metrics_eif_plus[metric_name])

    for metric_name in metric_names:
        metrics_dict[name]['IF'][metric_name+'_avg']=np.mean(np.array(metrics_dict[name]['IF'][metric_name]))
        metrics_dict[name]['EIF'][metric_name+'_avg']=np.mean(np.array(metrics_dict[name]['EIF'][metric_name]))
        metrics_dict[name]['EIF_plus'][metric_name+'_avg']=np.mean(np.array(metrics_dict[name]['EIF_plus'][metric_name]))
     
    
    return metrics_dict

# AUTOMATIC PERFORMANCE REPORT COMPUTATION FUNCTION

## Real-World Datasets

In [23]:
def performance_report(name,metrics_dict,metrics_dict_split):
    
    os.chdir('c:\\Users\\lemeda98\\Desktop\\PHD Information Engineering\\ExIFFI\\ExIFFI\\data')
    if name=='diabetes' or name=='moodify':
        X,y=csv_dataset(name,os.getcwd()+'\\')
    else:
        X,y=dataset(name,os.getcwd()+'\\')

    X,y=downsample(X,y)
    X_train,X_test=partition_data(X,y)
    scaler=StandardScaler()
    X_train=scaler.fit_transform(X_train)
    X_test=scaler.transform(X_test)
    y_train=np.zeros(X_train.shape[0])
    y_test=np.ones(X_test.shape[0])
    y=np.concatenate([y_train,y_test])
    X_test=np.r_[X_train,X_test]
    scaler2=StandardScaler()
    X=scaler2.fit_transform(X)

    #Compute Performance Report Table without split
    metrics_dict=collect_performance(metrics_dict,name,X,X,y)

    #Compute Performance Report Table with split
    metrics_dict_split=collect_performance(metrics_dict_split,name,X_train,X_test,y)

    print('--------------------------------------------------------')
    print(name)
    print()
    print('f1 score and average precision no train test split ')
    print(f'IF -> f1 score: {metrics_dict[name]["IF"]["f1 score"]}\naverage precision: {metrics_dict[name]["IF"]["Average Precision"]}')
    print(f'EIF -> f1 score: {metrics_dict[name]["EIF"]["f1 score"]}\naverage precision: {metrics_dict[name]["EIF"]["Average Precision"]}')
    print(f'EIF_plus -> f1 score: {metrics_dict[name]["EIF_plus"]["f1 score"]}\naverage precision: {metrics_dict[name]["EIF_plus"]["Average Precision"]}')
    print(' ')
    print('f1 score and average precision with train test split ')
    print(f'IF -> f1 score: {metrics_dict_split[name]["IF"]["f1 score"]}\naverage precision: {metrics_dict_split[name]["IF"]["Average Precision"]}')
    print(f'EIF -> f1 score: {metrics_dict_split[name]["EIF"]["f1 score"]}\naverage precision: {metrics_dict_split[name]["EIF"]["Average Precision"]}')
    print(f'EIF_plus -> f1 score: {metrics_dict_split[name]["EIF_plus"]["f1 score"]}\naverage precision: {metrics_dict_split[name]["EIF_plus"]["Average Precision"]}')
    print('-----------------------------------------------------------')

    return metrics_dict,metrics_dict_split

## Synthetic Datasets

In [None]:
def performance_report_synt(name,X_train,X_test,metrics_dict,metrics_dict_split):
    
    X=np.r_[X_train,X_test]
    scaler=StandardScaler()
    X_train=scaler.fit_transform(X_train)
    X_test=scaler.transform(X_test)
    y_train=np.zeros(X_train.shape[0])
    y_test=np.ones(X_test.shape[0])
    y=np.concatenate([y_train,y_test])
    X_test=np.r_[X_train,X_test]
    scaler2=StandardScaler()
    X=scaler2.fit_transform(X)

    #Compute Performance Report Table without split
    metrics_dict=collect_performance(metrics_dict,name,X,X,y)

    #Compute Performance Report Table with split
    metrics_dict_split=collect_performance(metrics_dict_split,name,X_train,X_test,y)

    print('--------------------------------------------------------')
    print(name)
    print()
    print('f1 score and average precision no train test split ')
    print(f'IF -> f1 score: {metrics_dict[name]["IF"]["f1 score_avg"]}\naverage precision: {metrics_dict[name]["IF"]["Average Precision_avg"]}')
    print(f'EIF -> f1 score: {metrics_dict[name]["EIF"]["f1 score_avg"]}\naverage precision: {metrics_dict[name]["EIF"]["Average Precision_avg"]}')
    print(f'EIF_plus -> f1 score: {metrics_dict[name]["EIF_plus"]["f1 score_avg"]}\naverage precision: {metrics_dict[name]["EIF_plus"]["Average Precision_avg"]}')
    print(' ')
    print('f1 score and average precision with train test split ')
    print(f'IF -> f1 score: {metrics_dict_split[name]["IF"]["f1 score_avg"]}\naverage precision: {metrics_dict_split[name]["IF"]["Average Precision_avg"]}')
    print(f'EIF -> f1 score: {metrics_dict_split[name]["EIF"]["f1 score_avg"]}\naverage precision: {metrics_dict_split[name]["EIF"]["Average Precision_avg"]}')
    print(f'EIF_plus -> f1 score: {metrics_dict_split[name]["EIF_plus"]["f1 score_avg"]}\naverage precision: {metrics_dict_split[name]["EIF_plus"]["Average Precision_avg"]}')
    print('-----------------------------------------------------------')

    return metrics_dict,metrics_dict_split
    

In [68]:
metrics_dict={}
metrics_dict_split={}

Re load the X_train dataset for each different synthetic dataset to avoid having it scaled multiple times

In [69]:
file_to_read = open(os.getcwd()+'\\ball_6_dim.pkl', "rb")
loaded_dictionary = pickle.load(file_to_read)

Synthetic Datasets

In [None]:
metrics_dict,metrics_dict_split=performance_report_synt('Xaxis',X_train,X_xaxis,metrics_dict,metrics_dict_split)
X_train=loaded_dictionary['X_train']
metrics_dict,metrics_dict_split=performance_report_synt('Yaxis',X_train,X_yaxis,metrics_dict,metrics_dict_split)
X_train=loaded_dictionary['X_train']
metrics_dict,metrics_dict_split=performance_report_synt('Bisect',X_train,X_bisect,metrics_dict,metrics_dict_split)
X_train=loaded_dictionary['X_train']
metrics_dict,metrics_dict_split=performance_report_synt('Bisect_3d',X_train,X_bisect_3d,metrics_dict,metrics_dict_split)
X_train=loaded_dictionary['X_train']
metrics_dict,metrics_dict_split=performance_report_synt('Bisect_6d',X_train,X_bisect_6d,metrics_dict,metrics_dict_split)

Real World Datasets 

In [None]:
dataset_names=['wine','annthyroid','breastw','shuttle','pima','cardio','glass',
             'ionosphere','pendigits','diabetes','moodify']
for name in dataset_names:
    metrics_dict,metrics_dict_split=performance_report(name,metrics_dict,metrics_dict_split)

## Save in pkl file

In [9]:
os.chdir('c:\\Users\\lemeda98\\Desktop\\PHD Information Engineering\\ExIFFI\\ExIFFI\\results\\davide\\Performance Report')
path=os.getcwd()
path

'c:\\Users\\lemeda98\\Desktop\\PHD Information Engineering\\ExIFFI\\ExIFFI\\results\\davide\\Performance Report'

In [97]:
path = path + '\\Performance_Report_final_synt.pkl'
with open(path, 'wb') as f:
    pickle.dump(metrics_dict_split,f)

### Read from pkl file

In [10]:
os.chdir('c:\\Users\\lemeda98\\Desktop\\PHD Information Engineering\\ExIFFI\\ExIFFI')
path = os.getcwd() + '\\results\\davide\\Performance Report\\Performance_Report_final_synt.pkl'
with open(path, 'rb') as f:
    Performance_report_synt = pickle.load(f)