In [22]:
import math

import matplotlib as mpl
from matplotlib import cm
mpl.rcParams['text.usetex'] = True 
mpl.rcParams['text.latex.preamble'] = [r'\usepackage{libertine}', r'\usepackage{newtxmath}'] 
mpl.rc('font', family='serif')

import numpy as np
import pandas as pd
import seaborn as sns
import copy

import matplotlib.pyplot as plt
from matplotlib import rc

from cycler import cycler

from scipy.interpolate import make_interp_spline, BSpline

mpl.rcParams['ps.usedistiller'] = 'xpdf' 

plt.style.use('seaborn-notebook')

plt.rcParams['axes.titlesize'] = '25'
plt.rcParams['axes.labelsize'] = '25'
plt.rcParams['legend.fontsize'] = '15'
plt.rcParams['xtick.labelsize'] = '18'
plt.rcParams['ytick.labelsize'] = '18'

monochrome=(cycler('color', sns.color_palette("husl", 8))*2+(cycler('marker', ['v', 's', "o"])*7)[0:16])
plt.rc('axes', prop_cycle=monochrome)

pd.options.display.max_rows = 999
pd.options.display.max_columns = None
cmap = cm.get_cmap('RdBu')

husl4 = sns.color_palette("husl", 4)
husl5 = sns.color_palette("husl", 5)
husl6 = sns.color_palette("husl", 7)
greens5 = sns.color_palette("Greens_r", 5)
purples5 = sns.color_palette("Purples_r", 7)
blues5 = sns.color_palette("Blues_r", 5)
reds5 = sns.color_palette("Reds_r", 8)
oranges5 = sns.color_palette("Oranges_r", 5)
greys5 = sns.color_palette("Greys_r", 5)
winter5 = sns.color_palette("viridis_r", 5)

In [23]:
folder = "2020-04-28-09-50_SGMRDminers_" # Replace

In [24]:
summary = pd.read_csv("../experiments/" + folder + "/SGMRDminers.csv")

In [25]:
searchers = []
for x in summary["subspaces"]:
    if("None" in x): 
        searchers += ["GMD-Init"]
    if("-RD" in x): 
        searchers += ["SGMRD-RD"]
    if("Worst" in x): 
        searchers += ["SGMRD-GD"]
    if("MPTS" in x): 
        searchers += ["SGMRD-MPTS"]
    if("All" in x): 
        searchers += ["GMD-Batch"]
    elif("Full" in x): 
        searchers += ["Full"]
len(searchers) == len(summary["subspaces"])

True

In [26]:
summary["searchers"] = searchers

In [27]:
summary["searchers"].unique()

array(['GMD-Init', 'SGMRD-RD', 'SGMRD-GD', 'SGMRD-MPTS', 'GMD-Batch',
       'Full'], dtype=object)

In [28]:
summary["minerId"].unique()

array(['Hash', 'LOF-1', 'LOF-2', 'LOF-5', 'LOF-10', 'LOF-20', 'LOF-50',
       'LOF-100'], dtype=object)

In [29]:
summary["refId"].unique()

array(['activity', 'kdd99', 'example_10', 'example_20', 'example_50'], dtype=object)

In [30]:
toexclude = ["SGMRD-GD", "SGMRD-RD"]
summary = summary[[x not in toexclude for x in summary["searchers"]]]

In [31]:
summaryhash = summary[["Hash" in x for x in summary["minerId"]]]

In [32]:
summaryfull = summary[[x == "Full" for x in summary["searchers"]]]
summary = summary[[x != "Full" for x in summary["searchers"]]]

In [33]:
summary.groupby(["refId", "minerId", "searchers"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,minetime,rocauc,prauc,ap,p1,p2,p5,p10,p20,p30,r1,r2,r5,r10,r20,r30
refId,minerId,searchers,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
activity,Hash,GMD-Batch,28952.8656,0.967,,0.7989,0.982,0.9708,0.8804,0.7164,0.469,0.3294,0.098,0.1942,0.44,0.7164,0.938,0.9883
activity,Hash,GMD-Init,32239.5874,0.9718,,0.8375,1.0,0.982,0.9245,0.7398,0.4744,0.3288,0.0998,0.1964,0.462,0.7398,0.9488,0.9865
activity,Hash,SGMRD-MPTS,29557.1857,0.9706,,0.8553,1.0,0.9933,0.946,0.7744,0.4712,0.3254,0.0998,0.1987,0.4728,0.7744,0.9425,0.9762
activity,LOF-1,GMD-Batch,153722.1203,0.7542,,0.2073,0.1261,0.1753,0.2257,0.2346,0.2382,0.2147,0.0126,0.0351,0.1128,0.2346,0.4764,0.644
activity,LOF-1,GMD-Init,167168.9962,0.7711,,0.2207,0.1351,0.1596,0.2284,0.2584,0.2548,0.2274,0.0135,0.0319,0.1142,0.2584,0.5097,0.6822
activity,LOF-1,SGMRD-MPTS,158119.0237,0.7549,,0.2061,0.1532,0.164,0.196,0.2342,0.2366,0.2109,0.0153,0.0328,0.098,0.2342,0.4733,0.6328
activity,LOF-10,GMD-Batch,160306.7702,0.9679,,0.812,0.9775,0.9618,0.8849,0.7425,0.4694,0.3272,0.0975,0.1924,0.4422,0.7425,0.9389,0.9816
activity,LOF-10,GMD-Init,192178.8364,0.9686,,0.8164,0.9865,0.9663,0.893,0.7384,0.4712,0.327,0.0984,0.1933,0.4463,0.7384,0.9425,0.9811
activity,LOF-10,SGMRD-MPTS,181439.342,0.9718,,0.8238,0.9685,0.964,0.884,0.7569,0.4739,0.3276,0.0966,0.1928,0.4418,0.7569,0.9479,0.9829
activity,LOF-100,GMD-Batch,168160.9243,0.9489,,0.6156,0.6757,0.6202,0.6376,0.6548,0.46,0.3233,0.0674,0.124,0.3187,0.6548,0.92,0.9699


In [34]:
summarylof = summary[["LOF" in x for x in summary["minerId"]]]

In [35]:
summaryloffull = summaryfull[["LOF" in x for x in summaryfull["minerId"]]]

In [36]:
summarylof.groupby(["refId", "searchers"])["rocauc"].idxmax()

refId       searchers 
activity    GMD-Batch      33
            GMD-Init        4
            SGMRD-MPTS     26
example_10  GMD-Batch     135
            GMD-Init      105
            SGMRD-MPTS    133
example_20  GMD-Batch     183
            GMD-Init      149
            SGMRD-MPTS    182
example_50  GMD-Batch     225
            GMD-Init      202
            SGMRD-MPTS    220
kdd99       GMD-Batch      80
            GMD-Init       50
            SGMRD-MPTS     73
Name: rocauc, dtype: int64

In [37]:
interesting = ["minetime", "rocauc", "ap", "p1", "p2", "p5", "r1", "r2", "r5"]

In [38]:
summarylof.loc[summarylof.groupby(["refId", "searchers"])["rocauc"].idxmax()].groupby(["refId", "minerId", "searchers"])[interesting].mean()*100

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,minetime,rocauc,ap,p1,p2,p5,r1,r2,r5
refId,minerId,searchers,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
activity,LOF-10,GMD-Batch,16030677.02,96.79,81.2,97.75,96.18,88.49,9.75,19.24,44.22
activity,LOF-5,GMD-Init,17968546.63,97.23,86.56,100.0,98.65,94.87,9.98,19.73,47.42
activity,LOF-5,SGMRD-MPTS,16896479.74,97.32,85.39,94.59,94.83,94.24,9.44,18.97,47.1
example_10,LOF-100,GMD-Batch,1361742.37,90.57,55.66,45.0,25.0,12.0,52.33,58.14,69.77
example_10,LOF-100,GMD-Init,2635870.8,89.43,51.85,44.0,24.5,10.8,51.16,56.98,62.79
example_10,LOF-100,SGMRD-MPTS,1424496.22,92.7,59.93,50.0,26.0,12.0,58.14,60.47,69.77
example_20,LOF-100,GMD-Batch,3261954.86,83.93,39.07,35.0,20.0,8.8,39.77,45.45,50.0
example_20,LOF-100,SGMRD-MPTS,3441590.82,85.05,41.19,36.0,19.5,9.2,40.91,44.32,52.27
example_20,LOF-5,GMD-Init,3738080.91,78.4,27.91,24.0,14.0,7.6,27.27,31.82,43.18
example_50,LOF-5,GMD-Batch,9691720.29,78.31,30.9,27.0,15.5,7.4,33.33,38.27,45.68


In [39]:
summaryloffull.loc[summaryloffull.groupby(["refId", "searchers"])["rocauc"].idxmax()].groupby(["refId", "minerId", "searchers"])[interesting].mean()*100

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,minetime,rocauc,ap,p1,p2,p5,r1,r2,r5
refId,minerId,searchers,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
activity,LOF-50,Full,4212001.29,93.93,61.8,74.32,64.72,64.03,7.42,12.94,32.0
example_10,LOF-5,Full,675721.34,88.77,31.44,33.0,18.5,10.4,38.37,43.02,60.47
example_20,LOF-2,Full,1367652.88,72.55,5.57,8.0,6.0,4.4,9.09,13.64,25.0
example_50,LOF-1,Full,2824681.33,61.38,1.08,0.0,0.5,0.6,0.0,1.23,3.7
kdd99,LOF-1,Full,1578129.8,70.37,10.79,0.0,0.0,0.08,0.0,0.0,0.06


In [40]:
summaryhash.groupby(["refId", "minerId", "searchers"])[interesting].mean()*100

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,minetime,rocauc,ap,p1,p2,p5,r1,r2,r5
refId,minerId,searchers,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
activity,Hash,Full,227695.12,95.95,68.23,71.62,72.58,75.0,7.15,14.52,37.48
activity,Hash,GMD-Batch,2895286.56,96.7,79.89,98.2,97.08,88.04,9.8,19.42,44.0
activity,Hash,GMD-Init,3223958.74,97.18,83.75,100.0,98.2,92.45,9.98,19.64,46.2
activity,Hash,SGMRD-MPTS,2955718.57,97.06,85.53,100.0,99.33,94.6,9.98,19.87,47.28
example_10,Hash,Full,44686.34,71.23,1.87,0.0,0.0,2.8,0.0,0.0,16.28
example_10,Hash,GMD-Batch,237637.63,83.51,16.94,20.0,16.0,8.2,23.26,37.21,47.67
example_10,Hash,GMD-Init,234792.27,83.24,14.04,22.0,12.0,7.2,25.58,27.91,41.86
example_10,Hash,SGMRD-MPTS,236934.98,82.57,16.63,22.0,12.5,8.0,25.58,29.07,46.51
example_20,Hash,Full,59970.21,48.39,0.8,0.0,0.0,0.0,0.0,0.0,0.0
example_20,Hash,GMD-Batch,503937.94,78.69,5.98,11.0,9.0,6.0,12.5,20.45,34.09


In [41]:
summary[(summary["refId"] == "kdd99") & (summary["searchers"] == "SGMRD-MPTS")]

Unnamed: 0,refId,minerId,subspaces,minetime,rocauc,prauc,ap,p1,p2,p5,p10,p20,p30,r1,r2,r5,r10,r20,r30,searchers
58,kdd99,Hash,/home/edouardfouche/data/subspaces/kdd99-SGMRD-KSP...,29944.1432,0.4419,0.0607,0.0595,0.0,0.004,0.0016,0.0016,0.0116,0.066,0.0,0.0011,0.0011,0.0022,0.0326,0.2782,SGMRD-MPTS
73,kdd99,LOF-1,/home/edouardfouche/data/subspaces/kdd99-SGMRD-KSP...,113946.9112,0.6998,0.0647,0.1029,0.0,0.002,0.0056,0.0388,0.05,0.0537,0.0,0.0006,0.0039,0.0545,0.1405,0.2265,SGMRD-MPTS
75,kdd99,LOF-2,/home/edouardfouche/data/subspaces/kdd99-SGMRD-KSP...,117791.6039,0.6173,0.062,0.086,0.0,0.0,0.0112,0.0272,0.0524,0.0547,0.0,0.0,0.0079,0.0382,0.1473,0.2305,SGMRD-MPTS
76,kdd99,LOF-10,/home/edouardfouche/data/subspaces/kdd99-SGMRD-KSP...,119156.5024,0.512,0.057,0.0674,0.0,0.002,0.0064,0.0112,0.0156,0.0161,0.0,0.0006,0.0045,0.0157,0.0438,0.068,SGMRD-MPTS
77,kdd99,LOF-5,/home/edouardfouche/data/subspaces/kdd99-SGMRD-KSP...,114130.4156,0.5486,0.058,0.0729,0.0,0.004,0.0184,0.0212,0.03,0.0287,0.0,0.0011,0.0129,0.0298,0.0843,0.1209,SGMRD-MPTS
78,kdd99,LOF-20,/home/edouardfouche/data/subspaces/kdd99-SGMRD-KSP...,130996.6903,0.4479,0.0557,0.0601,0.0,0.0,0.0008,0.0068,0.0076,0.0084,0.0,0.0,0.0006,0.0096,0.0214,0.0354,SGMRD-MPTS
79,kdd99,LOF-50,/home/edouardfouche/data/subspaces/kdd99-SGMRD-KSP...,127057.1096,0.3157,0.0518,0.0488,0.0,0.0,0.0,0.0004,0.0002,0.0007,0.0,0.0,0.0,0.0006,0.0006,0.0028,SGMRD-MPTS
82,kdd99,LOF-100,/home/edouardfouche/data/subspaces/kdd99-SGMRD-KSP...,144093.4539,0.2259,0.0471,0.0432,0.0,0.0,0.0,0.0004,0.0072,0.0123,0.0,0.0,0.0,0.0006,0.0202,0.0517,SGMRD-MPTS


In [42]:
summaryfull[(summaryfull["refId"] == "kdd99") & (summaryfull["minerId"] == "Hash")]

Unnamed: 0,refId,minerId,subspaces,minetime,rocauc,prauc,ap,p1,p2,p5,p10,p20,p30,r1,r2,r5,r10,r20,r30,searchers
88,kdd99,Hash,Full,1949.6346,0.2658,0.0461,0.045,0.024,0.024,0.0144,0.0072,0.0036,0.0027,0.0034,0.0067,0.0101,0.0101,0.0101,0.0112,Full


In [43]:
summary[(summary["refId"] == "kdd99") & (summary["minerId"] == "Hash")][interesting] * 100

Unnamed: 0,minetime,rocauc,ap,p1,p2,p5,r1,r2,r5
48,2973735.13,53.29,7.03,0.4,0.2,0.24,0.06,0.06,0.17
58,2994414.32,44.19,5.95,0.0,0.4,0.16,0.0,0.11,0.11
60,3037476.81,43.21,5.73,0.0,0.0,0.08,0.0,0.0,0.06


In [44]:

a = summaryloffull.loc[summaryloffull.groupby(["refId", "searchers"])["rocauc"].idxmax()]
b = summarylof.loc[summarylof.groupby(["refId", "searchers"])["rocauc"].idxmax()]
c = summaryhash[[x == "Full" for x in summaryhash["searchers"]]]
pd.concat([a,b,c]).groupby(["refId", "minerId", "searchers"])[interesting].mean()*100

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,minetime,rocauc,ap,p1,p2,p5,r1,r2,r5
refId,minerId,searchers,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
activity,Hash,Full,227695.12,95.95,68.23,71.62,72.58,75.0,7.15,14.52,37.48
activity,LOF-10,GMD-Batch,16030677.02,96.79,81.2,97.75,96.18,88.49,9.75,19.24,44.22
activity,LOF-5,GMD-Init,17968546.63,97.23,86.56,100.0,98.65,94.87,9.98,19.73,47.42
activity,LOF-5,SGMRD-MPTS,16896479.74,97.32,85.39,94.59,94.83,94.24,9.44,18.97,47.1
activity,LOF-50,Full,4212001.29,93.93,61.8,74.32,64.72,64.03,7.42,12.94,32.0
example_10,Hash,Full,44686.34,71.23,1.87,0.0,0.0,2.8,0.0,0.0,16.28
example_10,LOF-100,GMD-Batch,1361742.37,90.57,55.66,45.0,25.0,12.0,52.33,58.14,69.77
example_10,LOF-100,GMD-Init,2635870.8,89.43,51.85,44.0,24.5,10.8,51.16,56.98,62.79
example_10,LOF-100,SGMRD-MPTS,1424496.22,92.7,59.93,50.0,26.0,12.0,58.14,60.47,69.77
example_10,LOF-5,Full,675721.34,88.77,31.44,33.0,18.5,10.4,38.37,43.02,60.47


In [45]:
summaryloffull[summaryloffull["refId"] == "kdd99"].groupby(["refId", "minerId", "subspaces"])[interesting].mean() * 100

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,minetime,rocauc,ap,p1,p2,p5,r1,r2,r5
refId,minerId,subspaces,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
kdd99,LOF-1,Full,1578129.8,70.37,10.79,0.0,0.0,0.08,0.0,0.0,0.06
kdd99,LOF-10,Full,1734480.18,49.65,6.58,0.0,0.0,0.0,0.0,0.0,0.0
kdd99,LOF-100,Full,2070604.3,30.11,4.71,0.0,0.0,0.0,0.0,0.0,0.0
kdd99,LOF-2,Full,1609607.44,65.07,9.57,0.0,0.0,0.08,0.0,0.0,0.06
kdd99,LOF-20,Full,1793859.34,45.36,6.03,0.0,0.0,0.0,0.0,0.0,0.0
kdd99,LOF-5,Full,1659218.17,52.77,7.15,0.0,0.0,0.08,0.0,0.0,0.06
kdd99,LOF-50,Full,1959491.98,40.93,5.46,0.0,0.0,0.0,0.0,0.0,0.0
