In [1]:
import warnings
warnings.filterwarnings('ignore')

import time
import math
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn import datasets

from micoes import ClustreamExplainer
from micoes import DenstreamExplainer

from micoes.explainer.common import map_feature_scores
from micoes.microclusters.clustream import CluStream
from micoes.microclusters.clustream import update_clu_microcluster
from micoes.microclusters.denstream import DenStream
from micoes.microclusters.denstream import update_den_microcluster

from micoes.experiment.run_clustream_explainer import run_cluexplainer_experiment_on_each_file
from micoes.experiment.run_clustream_explainer import build_dataframe_cluexplainer_experiments_basic
from micoes.experiment.run_clustream_explainer import build_compact_df_clu_explainer_experiments

from micoes.experiment.run_denstream_explainer import run_denexplainer_experiment_on_each_file
from micoes.experiment.run_denstream_explainer import build_dataframe_denexplainer_experiments_basic
from micoes.experiment.run_denstream_explainer import build_compact_df_den_explainer_experiments

from micoes.experiment.run_microcluster_explainer_for_stream import run_temporal_explainer_comparison

from micoes.experiment.run_evaluation import FeatureScoresEvaluation

from micoes.experiment.generate_explainer_report import build_temporal_df_microcluster_explainer

import logging
logging.basicConfig(format='%(message)s', level=logging.INFO)


%matplotlib notebook
%matplotlib inline


## Testing Clustream based Explainer

In [2]:
def test_run_detector_explainer_clustream_online_wtiming():
    filename = '../data/odds/wine.csv'
    check = pd.read_csv('../data/odds/wine.csv')
    n_microclusters = 10
    alpha=3
    tau = 60 * 0.01
    result = run_cluexplainer_experiment_on_each_file(filename, n_microclusters, alpha, tau,
                                             label='label', init_percentage=20, detector_type='lof',
                                             max_rad_multiplier=2, round_flag=True, prior_knowledge=1,
                                             regularization='l1', regularization_param=1,
                                             intercept_scaling=1, simple_feature_contribution=True)
    return result
result = test_run_detector_explainer_clustream_online_wtiming()

In [3]:
def test_build_dataframe_cluexplainer_experiments_basic():
    folder = '../data/odds/'
    filenames = ('wine.csv',)
    n_microclusters = 10
    alpha=3
    tau = 60 * 0.01
    results = build_dataframe_cluexplainer_experiments_basic(folder, filenames, n_microclusters, alpha, tau,
                                                   label='label', init_percentage=20, detector_type='lof',
                                                   max_rad_multiplier=2, round_flag=True, prior_knowledge=1,
                                                   regularization='l1', regularization_param=1,
                                                   intercept_scaling=1, simple_feature_contribution=True)
    return results
results = test_build_dataframe_cluexplainer_experiments_basic()
results[0]

Running on wine -----------------


Unnamed: 0,stream_names,detection_duration,microcluster_duration,explanation_duration,total_data,total_outliers,total_features,coin_duration,clu_explainer_duration
0,wine,0.016066,0.02755,0.050362,103,7,13,0.23012,0.077912


In [4]:
def test_build_compact_df_clu_explainer_experiments(df):
    new_df = build_compact_df_clu_explainer_experiments(df)
    return new_df
df = results[0]
new_df = test_build_compact_df_clu_explainer_experiments(df)
new_df

NumExpr defaulting to 8 threads.


Unnamed: 0,stream_names,detection_duration,microcluster_duration,explanation_duration,total_data,total_outliers,total_features,coin_duration,clu_explainer_duration,ratio_microcluster_detection,ratio_clu_explanation_detection,ratio_coin_detection
0,wine,0.016066,0.02755,0.050362,103,7,13,0.23012,0.077912,1.714756,4.849386,14.323062


## Testing Denstream based Explainer

In [5]:
def test_run_detector_explainer_denstream_online_wtiming():
    filename = '../data/odds/wine.csv'
    check = pd.read_csv('../data/odds/wine.csv')
    mu=check.shape[0]/check.shape[1]
    lamda = 1
    beta = 0.01
    eta = np.std(check.drop(['label'], axis=1).values)
    result = run_denexplainer_experiment_on_each_file(filename, lamda, mu, beta, eta,
                                             label='label', init_percentage=10, detector_type='lof',
                                             max_rad_multiplier=2, round_flag=True, prior_knowledge=1,
                                             regularization='l1', regularization_param=1,
                                             intercept_scaling=1, simple_feature_contribution=True)
    return result
result = test_run_detector_explainer_denstream_online_wtiming()

In [6]:
def test_build_dataframe_denexplainer_experiments_basic():
    folder = '../data/odds/'
    filenames = ('wine.csv',)
    check = pd.read_csv('../data/odds/wine.csv')
    
    ## parameters for CluStream
    n_microclusters = 10
    alpha=3
    tau = 60 * 0.01
    
    ## parameters for DenStream
    mu=check.shape[0]/check.shape[1]
    lamda = 1
    beta = 0.01
    eta = np.std(check.drop(['label'], axis=1).values)
    results = build_dataframe_denexplainer_experiments_basic(folder, filenames, lamda, mu, beta, eta,
                                                   label='label', init_percentage=20, detector_type='lof',
                                                   max_rad_multiplier=2, round_flag=True, prior_knowledge=1,
                                                   regularization='l1', regularization_param=1,
                                                   intercept_scaling=1, simple_feature_contribution=True)
    return results
results = test_build_dataframe_denexplainer_experiments_basic()
results[0]

Running on wine -----------------


Unnamed: 0,stream_names,detection_duration,microcluster_duration,explanation_duration,total_data,total_outliers,total_features,coin_duration,den_explainer_duration
0,wine,0.001221,0.004759,0.049761,103,7,13,0.240706,0.05452


In [7]:
def test_build_compact_df_den_explainer_experiments(df):
    new_df = build_compact_df_den_explainer_experiments(df)
    return new_df
df = results[0]
new_df = test_build_compact_df_den_explainer_experiments(df)
new_df

Unnamed: 0,stream_names,detection_duration,microcluster_duration,explanation_duration,total_data,total_outliers,total_features,coin_duration,den_explainer_duration,ratio_microcluster_detection,ratio_den_explanation_detection,ratio_coin_detection
0,wine,0.001221,0.004759,0.049761,103,7,13,0.240706,0.05452,3.89925,44.668211,197.209262


## Testing on Temporal Window

In [8]:
def test_explainer_on_temporal_window():
    filepath = '../data/odds/wine.csv'
    check = pd.read_csv('../data/odds/wine.csv')
    
    ## parameters for CluStream
    n_microclusters = 10
    alpha=3
    tau = 60 * 0.01
    
    ## parameters for DenStream
    mu=check.shape[0]/check.shape[1]
    lamda = 1
    beta = 0.01
    eta = np.std(check.drop(['label'], axis=1).values)
    
    detection_results, cluexplainer_results, denexplainer_results = run_temporal_explainer_comparison(filepath=filepath,
                                                                       n_microclusters=n_microclusters, alpha=alpha, tau=tau,
                                                                       lamda=lamda, mu=mu, beta=beta, eta=eta,
                                                                       label='label', init_percentage=10, detector_type='lof',
                                                                       max_rad_multiplier=2, round_flag=True, prior_knowledge=1,
                                                                       regularization='l1', regularization_param=1,
                                                                       intercept_scaling=1, simple_feature_contribution=True,
                                                                       arrival_rate='Fixed', time_unit='seconds', time_interval=1, time_window=None,
                                                                       window_size=60, sliding_size=60, delay_time=1)
    return detection_results, cluexplainer_results, denexplainer_results
detection_results, cluexplainer_results, denexplainer_results = test_explainer_on_temporal_window()

Running stream...


In [9]:
build_temporal_df_microcluster_explainer(detection_results, cluexplainer_results, explainer_type='clu')

Unnamed: 0,windows,detection_duration,microcluster_duration,explanation_duration,clu_coin_explainer_duration,coin_duration,n_data,n_outlier
0,window_0,0.001105,0.016943,0.056581,0.073524,0.379837,60,12
1,window_1,0.001012,0.009743,0.033032,0.042775,0.129115,60,4
2,window_2,0.000553,0.000983,6.2e-05,0.001045,0.000698,9,0


In [10]:
build_temporal_df_microcluster_explainer(detection_results, denexplainer_results, explainer_type='den')

Unnamed: 0,windows,detection_duration,microcluster_duration,explanation_duration,den_coin_explainer_duration,coin_duration,n_data,n_outlier
0,window_0,0.001105,0.002825,0.0608,0.063626,0.379837,60,12
1,window_1,0.001012,0.002801,0.030486,0.033287,0.129115,60,4
2,window_2,0.000553,0.000499,4.7e-05,0.0006,0.000698,9,0


In [11]:
def test_explainer_with_evaluation_on_temporal_window():
    filepath = '../data/odds/wine.csv'
    check = pd.read_csv('../data/odds/wine.csv')
    
    ## parameters for CluStream
    n_microclusters = 10
    alpha=3
    tau = 60 * 0.01
    
    ## parameters for DenStream
    mu=check.shape[0]/check.shape[1]
    lamda = 1
    beta = 0.01
    eta = np.std(check.drop(['label'], axis=1).values)
    
    detection_results, cluexplainer_results, denexplainer_results = run_temporal_explainer_comparison(filepath=filepath,
                                                                       n_microclusters=n_microclusters, alpha=alpha, tau=tau,
                                                                       lamda=lamda, mu=mu, beta=beta, eta=eta,
                                                                       label='label', init_percentage=10, detector_type='lof',
                                                                       max_rad_multiplier=2, round_flag=True, prior_knowledge=1,
                                                                       regularization='l1', regularization_param=1,
                                                                       intercept_scaling=1, simple_feature_contribution=True,
                                                                       arrival_rate='Fixed', time_unit='seconds', time_interval=1, time_window=None,
                                                                       window_size=60, sliding_size=60, delay_time=1)
    wine_path = '../groundtruth/pickles/wine_60.pickle'
    feature_scores = denexplainer_results['window_0']['explanation_result']
    den_FSE = FeatureScoresEvaluation(wine_path, feature_scores)
    den_matches, intersection = den_FSE.compare_groundtruth_and_feature_scores()
    
    
    wine_path = '../groundtruth/pickles/wine_60.pickle'
    feature_scores = cluexplainer_results['window_0']['explanation_result']
    clu_FSE = FeatureScoresEvaluation(wine_path, feature_scores)
    clu_matches, intersection = clu_FSE.compare_groundtruth_and_feature_scores()
    
    
    wine_path = '../groundtruth/pickles/wine_60.pickle'
    feature_scores = cluexplainer_results['window_0']['coin_result']['feature_scores']
    coin_FSE = FeatureScoresEvaluation(wine_path, feature_scores)
    coin_matches, intersection = coin_FSE.compare_groundtruth_and_feature_scores()
# test_explainer_with_evaluation_on_temporal_window()