In [1]:
import pandas as pd
import pickle

In [2]:
def exos_running_time_df(results):
    df_dict = {}
    output = results['output']
    windows = output.keys()
    neigh_times = list()
    est_times = list()
    out_attrs_times = list()
    for window in windows:
        est_times.append(output[window]['est_time'])
        del output[window]['est_time']
        neigh_time = max([output[window][stream_id]['temporal_neighbor_time'] for stream_id in output[window].keys()])
        neigh_times.append(neigh_time)
        out_attrs_time = max([output[window][stream_id]['out_attrs_time'] for stream_id in output[window].keys()])
        out_attrs_times.append(out_attrs_time)
    df_dict['windows'] = windows
    df_dict['est_times'] = est_times
    df_dict['neigh_times'] = neigh_times
    df_dict['out_attrs_times'] = out_attrs_times
    return pd.DataFrame.from_dict(df_dict)

In [3]:
def unpickled_results(filename):
    exos_file = open(filename, 'rb')
    results = pickle.load(exos_file)
    return results

In [4]:
def match_attribute_based_on_contribution_score(outlying_attributes, ground_truth):
    """
    Inputs
    ------
    outlying_attributes: dict
        key-value pair, key=attribute's name, value=attribute's contribution score
    ground_truth : list
        list of outlying attributes
    outlying attributes are ordered by their contribution scores from the highest to the lowest
    """
    high_score_attr = list(outlying_attributes.keys())[0]
    if high_score_attr in ground_truth:
        return True
    return False

In [5]:
def match_all_attributes(outlying_attributes, ground_truth, score_precision):
    """
    Inputs
    ------
    outlying_attributes: dict
        key-value pair, key=attribute's name, value=attribute's contribution score
    ground_truth : list
        list of outlying attributes
    """
    check = list(set(outlying_attributes.keys()) & set(ground_truth))
    if len(check) == len(ground_truth):
        return True
    else:
        return False

In [6]:
def match_all_attributes_v2(outlying_attributes, ground_truth, score_precision):
    """
    Inputs
    ------
    outlying_attributes: dict
        key-value pair, key=attribute's name, value=attribute's contribution score
    ground_truth : list
        list of outlying attributes
    """
    check = list(set(outlying_attributes.keys()) & set(ground_truth))
    if len(check) == len(ground_truth):
        print(f'outlying_attributes {outlying_attributes}')
        print(f'ground_truth {ground_truth}')
        print(f'check {check}')
        return True
    else:
        return False

In [7]:
def compute_performance(gt_folder, gt_filename, result_folder, result_filename,
                     n_streams, window_size, score_precision = 0.1):
    result_path = f'{result_folder}/{result_filename}'
    results = unpickled_results(result_path)
    windows = tuple(results['output'].keys()) ## get tuple of window ids : (window_0, window_1, ...)
    n_outliers = 0
    matched = list()
    matched2 = list()
    for i in range(n_streams):
        gt_path = f'{gt_folder}/{i}_{gt_filename}'
        df = pd.read_pickle(gt_path)
        df = df[['label', 'outlying_attributes']]
        for j, window in enumerate(windows):
            outlier_indices = results['output'][window][i]['outlier_indices'][i]
            if outlier_indices is not None:
                new_df = df.iloc[j*window_size:(j+1)*window_size].reset_index(drop=True)
                n_outliers += len(outlier_indices)
                ground_truth = new_df.iloc[outlier_indices].reset_index(drop=True)
                outlying_attributes = results['output'][window][i]['out_attrs']
                for idx , gt in ground_truth.iterrows():
                    check = match_attribute_based_on_contribution_score(outlying_attributes[idx],
                                                                   gt['outlying_attributes'])
                    matched.append(check)
                    check2 = match_all_attributes(outlying_attributes[idx], 
                                              gt['outlying_attributes'], 
                                              score_precision)
                    matched2.append(check2)
    matched = [True for item in matched if item == True]
    matched2 = [True for item in matched2 if item == True]
    return matched, matched2, n_outliers, results['simulator_time']