Copyright (C) 2020 Edouard Fouché

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.

In [1]:
import math

import matplotlib as mpl
from matplotlib import cm
mpl.rcParams['text.usetex'] = True 
mpl.rcParams['text.latex.preamble'] = r'\usepackage{libertine}' 
mpl.rc('font', family='serif')

import numpy as np
import pandas as pd
import seaborn as sns
import copy

import matplotlib.pyplot as plt
from matplotlib import rc

from cycler import cycler

mpl.rcParams['ps.usedistiller'] = 'xpdf' 

plt.style.use('seaborn-notebook')

plt.rcParams['axes.titlesize'] = '25'
plt.rcParams['axes.labelsize'] = '25'
plt.rcParams['legend.fontsize'] = '15'
plt.rcParams['xtick.labelsize'] = '15'
plt.rcParams['ytick.labelsize'] = '15'

monochrome=(cycler('color', sns.color_palette("husl", 8))*2+(cycler('marker', ['v', 's', "o"])*7)[0:16])
plt.rc('axes', prop_cycle=monochrome)

pd.options.display.max_rows = 999
pd.options.display.max_columns = None
cmap = cm.get_cmap('RdBu')

In [2]:
folder = "2020-02-12-06-30_Evaluate_RSHash_" # replace

In [3]:
masterdata = pd.read_csv("../experiments/" + folder + "/Evaluate_RSHash.csv")

In [4]:
masterdata.columns

Index(['scoreId', 'dataset', 'bmId', 'ndocs', 'emb', 'cpu', 'wall', 'typeAauc',
       'typeAap', 'typeAr1', 'typeAr2', 'typeAr5', 'typeAr10', 'typeAr20',
       'typeAr30', 'typeAp1', 'typeAp2', 'typeAp5', 'typeAp10', 'typeAp20',
       'typeAp30'],
      dtype='object')

In [5]:
att = ["typeAauc", "typeAap", "typeAr1", "typeAr2", "typeAr5"]

In [6]:
times = ["cpu"]
subdata = masterdata
a = subdata.loc[subdata.groupby(["dataset"])["typeAauc"].idxmax()][["dataset",  "typeAauc", "typeAap", "typeAr1", "typeAr2", "typeAr5", "cpu"]]
a[times] = round(a[times] / 1000,2)
a

Unnamed: 0,dataset,typeAauc,typeAap,typeAr1,typeAr2,typeAr5,cpu
5,arxiv_15,0.5421,0.0117,0.0256,0.0256,0.0256,0.12
1,arxiv_25,0.4421,0.0091,0.0,0.0,0.0435,0.24
4,arxiv_35,0.4777,0.0096,0.0161,0.0161,0.0484,0.14
7,arxiv_45,0.4919,0.0099,0.0,0.0,0.0606,0.42
14,arxiv_51,0.5123,0.011,0.0161,0.0403,0.0645,0.22
13,arxiv_52,0.5254,0.0108,0.0,0.0161,0.0565,0.36
11,arxiv_53,0.4231,0.0079,0.0,0.0,0.0,0.29
12,arxiv_54,0.4795,0.0092,0.0,0.0081,0.0242,0.37
10,arxiv_55,0.4899,0.0094,0.0081,0.0081,0.0323,0.27
6,nyt_1,0.4661,0.0087,0.0,0.01,0.01,0.34


In [11]:
times = ["cpu"]
subdata = masterdata
a = subdata.loc[subdata.groupby(["dataset"])["typeAauc"].idxmax()][["dataset", "typeAauc", "typeAap", "typeAr1", "typeAr2", "typeAr5", "cpu"]]
a[times] = round(a[times] / 1000,2)
a

Unnamed: 0,dataset,typeAauc,typeAap,typeAr1,typeAr2,typeAr5,cpu
5,arxiv_15,0.5421,0.0117,0.0256,0.0256,0.0256,0.12
1,arxiv_25,0.4421,0.0091,0.0,0.0,0.0435,0.24
4,arxiv_35,0.4777,0.0096,0.0161,0.0161,0.0484,0.14
7,arxiv_45,0.4919,0.0099,0.0,0.0,0.0606,0.42
14,arxiv_51,0.5123,0.011,0.0161,0.0403,0.0645,0.22
13,arxiv_52,0.5254,0.0108,0.0,0.0161,0.0565,0.36
11,arxiv_53,0.4231,0.0079,0.0,0.0,0.0,0.29
12,arxiv_54,0.4795,0.0092,0.0,0.0081,0.0242,0.37
10,arxiv_55,0.4899,0.0094,0.0081,0.0081,0.0323,0.27
6,nyt_1,0.4661,0.0087,0.0,0.01,0.01,0.34


In [12]:
a = subdata.groupby(["dataset"])["typeAauc", "typeAap", "typeAr1", "typeAr2", "typeAr5", "cpu"].mean()
a[a.columns] = round(a[a.columns]*100,2)
a

Unnamed: 0_level_0,typeAauc,typeAap,typeAr1,typeAr2,typeAr5,cpu
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
arxiv_15,54.21,1.17,2.56,2.56,2.56,11752.94
arxiv_25,44.21,0.91,0.0,0.0,4.35,23567.32
arxiv_35,47.77,0.96,1.61,1.61,4.84,13781.05
arxiv_45,49.19,0.99,0.0,0.0,6.06,42234.91
arxiv_51,51.23,1.1,1.61,4.03,6.45,22037.36
arxiv_52,52.54,1.08,0.0,1.61,5.65,35746.08
arxiv_53,42.31,0.79,0.0,0.0,0.0,28636.36
arxiv_54,47.95,0.92,0.0,0.81,2.42,36741.78
arxiv_55,48.99,0.94,0.81,0.81,3.23,26642.99
nyt_1,46.61,0.87,0.0,1.0,1.0,33864.54


In [15]:
times = ['cpu',]
a = subdata.groupby(["dataset"])[times].mean()
a[times] = round(a[times] / 1000,2)
a

Unnamed: 0_level_0,cpu
dataset,Unnamed: 1_level_1
arxiv_15,0.12
arxiv_25,0.24
arxiv_35,0.14
arxiv_45,0.42
arxiv_51,0.22
arxiv_52,0.36
arxiv_53,0.29
arxiv_54,0.37
arxiv_55,0.27
nyt_1,0.34
