In [1]:
from ipynb.fs.full.metrics import exos_running_time_df
from ipynb.fs.full.metrics import compute_performance
from ipynb.fs.full.metrics import aggregate_performance
import pandas as pd
pd.set_option('display.precision', 2)

import pickle
import os

# Accuracy and Running Time

In [2]:
def get_performance_df(n_streams=2, 
                       bfname = '100K_Case1', 
                       gt_folder = '/home/epanjei/Codes/OutlierGen/exos/nstreams/two',
                       rel_path =  'pickles/nstreams/2',
                       n_experiments=10):
    cwd = os.getcwd()
    gt_filename = f'{n_streams}_{bfname}.pkl'
    result_folder = os.path.join(cwd, rel_path)
    n_experiments = 10
    results = list()
    experiments = list()
    simulation_times = list()
    for i in range(1,n_experiments+1):
        matched, n_outliers, s_time = compute_performance(gt_folder= gt_folder, 
                                                         gt_filename= gt_filename, 
                                                         result_folder=result_folder, 
                                                         result_filename=f'{i}_{gt_filename}',
                                                         n_streams=n_streams, 
                                                         window_size=1000,
                                                         score_precision = 0.1)
        results.append((len(matched)/n_outliers) * 100)
        simulation_times.append(s_time)
        experiments.append(i)
    accuracy = {'experiment' : experiments, 'accuracy' : results, 'running_time' : simulation_times}
    df = pd.DataFrame(accuracy)
    df.to_pickle(f'{result_folder}/performance_{n_streams}_k1_{bfname}.pkl')
    return df

In [3]:
def get_performance_df_v2(n_streams=2, 
                          bfname = '100K_Case1', 
                          gt_folder = '/home/epanjei/Codes/OutlierGen/exos/nstreams',
                          rel_path =  'pickles/nstreams',
                          performance_folder = 'pickles/performance/nstreams',
                          n_experiments=10,
                          window_size=1000,
                          non_data_attr=2):
    cwd = os.getcwd()
    
    gt_filename = f'{n_streams}_{bfname}.pkl'
    gt_folder = f'{gt_folder}/{n_streams}'
    
    rel_path = f'{rel_path}/{n_streams}'
    result_folder = os.path.join(cwd, rel_path)
    
    performance_folder=f'{performance_folder}/{n_streams}'
    performance_folder=os.path.join(cwd, performance_folder)
    
    experiments = list()
    simulation_times = list()
    precision_means = list()
    recall_means = list()
    f1_score_means = list()
    
    for i in range(1,n_experiments+1):
        df, s_time = aggregate_performance(gt_folder=gt_folder, 
                                           gt_filename=gt_filename, 
                                           result_folder=result_folder, 
                                           result_filename=f'{i}_{gt_filename}',
                                           performance_folder=performance_folder,
                                           n_streams=n_streams, 
                                           window_size=window_size, 
                                           non_data_attr=2)
        simulation_times.append(s_time)
        precision_means.append(df['precision'].mean())
        recall_means.append(df['recall'].mean())
        f1_score_means.append(df['f1_score'].mean())
        experiments.append(i)
    
    accuracy = {'experiment' : experiments, 
                'precision' : precision_means, 
                'recall': recall_means,
                'f1_score' : f1_score_means,
                'running_time' : simulation_times}
    
    df_aggregate = pd.DataFrame(accuracy)
    df_aggregate.to_pickle(f'{performance_folder}/aggregate_{gt_filename}')
    return df_aggregate

In [4]:
# df = get_performance_df(n_streams=30, 
#                        bfname = '100K_Case1', 
#                        gt_folder = '/home/epanjei/Codes/OutlierGen/exos/nstreams/30',
#                        rel_path =  'pickles/nstreams/30',
#                        n_experiments=10)
# df

In [9]:
df = get_performance_df_v2(n_streams=25, 
                          bfname = '100K_Case1', 
                          gt_folder = '/home/epanjei/Codes/OutlierGen/exos/nstreams/',
                          rel_path =  'pickles/nstreams/',
                          performance_folder = 'pickles/performance/nstreams',
                          n_experiments=10,
                          window_size=1000,
                          non_data_attr=2)

df

Unnamed: 0,experiment,precision,recall,f1_score,running_time
0,1,0.84,0.81,0.78,108.09
1,2,0.84,0.8,0.78,108.24
2,3,0.83,0.8,0.77,107.73
3,4,0.81,0.8,0.76,108.36
4,5,0.84,0.8,0.78,108.59
5,6,0.81,0.79,0.76,108.65
6,7,0.83,0.8,0.78,108.53
7,8,0.83,0.8,0.77,109.12
8,9,0.85,0.81,0.79,108.15
9,10,0.84,0.8,0.78,108.59
