Copyright (C) 2020 Edouard Fouché

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.

In [1]:
import math

import matplotlib as mpl
from matplotlib import cm
mpl.rcParams['text.usetex'] = True 
mpl.rcParams['text.latex.preamble'] = r'\usepackage{libertine}' 
mpl.rc('font', family='serif')

import numpy as np
import pandas as pd
import seaborn as sns
import copy

import matplotlib.pyplot as plt
from matplotlib import rc

from cycler import cycler

mpl.rcParams['ps.usedistiller'] = 'xpdf' 

plt.style.use('seaborn-notebook')

plt.rcParams['axes.titlesize'] = '25'
plt.rcParams['axes.labelsize'] = '25'
plt.rcParams['legend.fontsize'] = '15'
plt.rcParams['xtick.labelsize'] = '15'
plt.rcParams['ytick.labelsize'] = '15'

monochrome=(cycler('color', sns.color_palette("husl", 8))*2+(cycler('marker', ['v', 's', "o"])*7)[0:16])
plt.rc('axes', prop_cycle=monochrome)

pd.options.display.max_rows = 999
pd.options.display.max_columns = None
cmap = cm.get_cmap('RdBu')

In [2]:
folder = "2020-02-11-23-07_Evaluate_LOF_"

In [3]:
masterdata = pd.read_csv("../experiments/" + folder + "/Evaluate_LOF.csv")

In [4]:
masterdata.columns

Index(['scoreId', 'dataset', 'bmId', 'k', 'ndocs', 'emb', 'cpu', 'wall',
       'typeAauc', 'typeAap', 'typeAr1', 'typeAr2', 'typeAr5', 'typeAr10',
       'typeAr20', 'typeAr30', 'typeAp1', 'typeAp2', 'typeAp5', 'typeAp10',
       'typeAp20', 'typeAp30'],
      dtype='object')

In [5]:
masterdata["k"].unique() 

array([  1,  37,   5,  14,  48,   2,  18,   6,  62,   3,  23,   7,  80,
         4,  29,   9, 100,  11])

In [6]:
att = ["typeAauc", "typeAap", "typeAr1", "typeAr2", "typeAr5"]

In [7]:
times = ["cpu"]
subdata = masterdata[masterdata["k"] <= 100]
a = subdata.loc[subdata.groupby(["dataset"])["typeAauc"].idxmax()][["dataset", "k", "typeAauc", "typeAap", "typeAr1", "typeAr2", "typeAr5", "cpu"]]
a[times] = round(a[times] / 1000,2)
a

Unnamed: 0,dataset,k,typeAauc,typeAap,typeAr1,typeAr2,typeAr5,cpu
111,arxiv_15,37,0.6344,0.0178,0.0,0.0513,0.0769,18.82
142,arxiv_25,100,0.6841,0.0217,0.0435,0.0652,0.1304,14.53
161,arxiv_35,100,0.71,0.0216,0.0,0.0323,0.1129,32.36
166,arxiv_45,6,0.6299,0.0243,0.0202,0.0202,0.0505,81.95
263,arxiv_51,37,0.8257,0.045,0.0726,0.1452,0.2581,106.26
251,arxiv_52,100,0.5558,0.0109,0.0081,0.0081,0.0484,103.6
221,arxiv_53,2,0.5737,0.0133,0.0242,0.0323,0.0968,126.68
215,arxiv_54,100,0.6192,0.0133,0.0,0.0242,0.0645,104.91
191,arxiv_55,4,0.5947,0.0147,0.0323,0.0323,0.1129,125.89
10,nyt_1,23,0.6645,0.0162,0.03,0.03,0.09,82.75


In [10]:
a = subdata.groupby(["dataset"])["typeAauc", "typeAap", "typeAr1", "typeAr2", "typeAr5", "cpu"].mean()
a[a.columns] = round(a[a.columns]*100,2)
a

Unnamed: 0_level_0,typeAauc,typeAap,typeAr1,typeAr2,typeAr5,cpu
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
arxiv_15,59.47,1.5,0.28,2.28,7.55,1761935.69
arxiv_25,64.26,1.78,2.05,4.23,8.58,2127603.68
arxiv_35,66.57,1.95,1.43,4.21,12.81,3362937.36
arxiv_45,60.75,2.29,1.35,2.36,6.85,7752401.97
arxiv_51,77.7,3.93,5.87,11.74,23.39,10838130.04
arxiv_52,53.18,1.03,0.27,1.08,3.23,10879870.48
arxiv_53,54.5,1.19,0.99,2.24,6.94,11317837.22
arxiv_54,58.84,1.24,0.22,1.61,5.42,11235992.29
arxiv_55,57.86,1.42,2.87,4.39,8.15,11116223.11
nyt_1,63.34,1.52,1.72,3.72,8.06,9154094.86


In [13]:
times = ['cpu']
a = subdata.groupby(["dataset"])[times].mean()
a[times] = round(a[times] / 1000,2)
a

Unnamed: 0_level_0,cpu
dataset,Unnamed: 1_level_1
arxiv_15,17.62
arxiv_25,21.28
arxiv_35,33.63
arxiv_45,77.52
arxiv_51,108.38
arxiv_52,108.8
arxiv_53,113.18
arxiv_54,112.36
arxiv_55,111.16
nyt_1,91.54
