In [27]:
import glob
import json
import os
from pathlib import Path

import pandas as pd

def get_result_df(files):
    result = []
    for file in glob.glob(files):
        with open(file, 'r') as f:
            result.extend(json.load(f))

    return pd.DataFrame(result)

def make_pivot_table(df, col):
    return df.pivot('dataset', 'method', col)

def get_accuracy_table(df):
    df_acc = df.drop(['cov_drift_detected',
                      'dis_drift_detected',
                      'lab_drift_detected'], axis=1)
    return df_acc.pivot('dataset', 'method', 'accuracy')

def get_numdrift_table(df):
    c = df['cov_drift_detected'].astype(str)
    d = df['dis_drift_detected'].astype(str)
    l = df['lab_drift_detected'].astype(str)

    df_drift = df.drop(['accuracy',
                        'cov_drift_detected',
                        'dis_drift_detected',
                        'lab_drift_detected'], axis=1)
    df_drift['drift_detected'] = c + '/' + d + '/' + l
    
    return df_drift.pivot('dataset', 'method', 'drift_detected')

In [24]:
real_world_df = get_result_df('newresults/real-world/*.json')
real_world_df

Unnamed: 0,dataset,method,cov_drift_detected,dis_drift_detected,lab_drift_detected,accuracy
0,chessweka,SDD,2,2,1,0.715415
1,chessweka,HDD,2,2,1,0.715415
2,chessweka,BDD,2,0,0,0.660079


In [25]:
get_accuracy_table(real_world_df)

method,BDD,HDD,SDD
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
chessweka,0.660079,0.715415,0.715415


In [28]:
get_numdrift_table(real_world_df)

method,BDD,HDD,SDD
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
chessweka,2/0/0,2/2/1,2/2/1


In [11]:
real_world_df = get_result_df('results/*.json')
get_accuracy_table(real_world_df)

method,BDD,HDD,SDD
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LUdata,1.0±0.0,1.0±0.0,1.0±0.0
airlines2,0.59±0.0,0.59±0.0,0.59±0.0
chessweka,0.71±0.0,0.75±0.01,0.61±0.03
covtype,0.52±0.0,0.18±0.03,0.51±0.0
elec,0.76±0.0,0.7±0.01,0.74±0.01
outdoorStream,0.16±0.0,0.15±0.0,0.15±0.0
phishing,0.9±0.0,0.91±0.01,0.9±0.0
poker,0.69±0.0,0.69±0.0,0.68±0.0
rialto,0.64±0.0,0.25±0.0,0.53±0.0
spam,0.85±0.0,0.84±0.0,0.85±0.01


In [12]:
get_numdrift_table(real_world_df)

method,BDD,HDD,SDD
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LUdata,10.0±0.0,0.0±0.0,1.0±0.0
airlines2,3081.0±0.0,3080.0±0.0,3081.0±0.0
chessweka,2.0±0.0,1.0±0.0,2.0±0.0
covtype,3318.0±0.0,16.0±0.0,3318.0±0.0
elec,258.0±0.0,8.0±0.0,140.0±0.0
outdoorStream,21.0±0.0,6.0±0.0,6.0±0.0
phishing,62.0±0.0,10.0±0.0,62.0±0.0
poker,4731.0±0.0,4731.0±0.0,4731.0±0.0
rialto,465.0±0.0,87.0±0.0,348.0±0.0
spam,35.0±0.0,31.0±0.0,34.0±0.0


In [13]:
artificial_df = get_result_df('results/artificial/*.json')
get_accuracy_table(artificial_df)

method,BDD,HDD,SDD
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
interchangingRBF,0.93±0.0,0.2±0.0,0.16±0.0
mixedDrift,0.7±0.0,0.23±0.0,0.21±0.0
movingRBF,0.61±0.0,0.26±0.0,0.26±0.01
moving_squares,0.72±0.0,0.43±0.0,0.55±0.0
rotatingHyperplane,0.75±0.0,0.63±0.01,0.7±0.0
sea_big,0.79±0.0,0.81±0.02,0.79±0.0
sea_stream,0.79±0.0,0.83±0.01,0.79±0.0
transientChessboard,0.53±0.0,0.24±0.01,0.37±0.01


In [14]:
get_numdrift_table(artificial_df)

method,BDD,HDD,SDD
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LUdata,10.0±0.0,0.0±0.0,1.0±0.0
airlines2,3081.0±0.0,3080.0±0.0,3081.0±0.0
chessweka,2.0±0.0,1.0±0.0,2.0±0.0
covtype,3318.0±0.0,16.0±0.0,3318.0±0.0
elec,258.0±0.0,8.0±0.0,140.0±0.0
outdoorStream,21.0±0.0,6.0±0.0,6.0±0.0
phishing,62.0±0.0,10.0±0.0,62.0±0.0
poker,4731.0±0.0,4731.0±0.0,4731.0±0.0
rialto,465.0±0.0,87.0±0.0,348.0±0.0
spam,35.0±0.0,31.0±0.0,34.0±0.0
