In [76]:
import pandas as pd
from glob import glob
from os.path import basename,splitext
from statistics import mean
pd.set_option('display.colheader_justify', 'center')

In [77]:
file2name = {
    'amazon0302':'Amazon',
    "com-youtube": 'Youtube',
    "soc-LiveJournal1": 'LiveJournal',
    "soc-pokec-relationships": 'Pokec',
    "com-orkut": 'Orkut',
    "ca-HepPh": 'NetPhy',
    "ca-HepTh": 'NetHEP',
    "com-dblp": 'DBLP',
    "ego-twitter": 'Twitter',
    "soc-Epinions1": 'Epinions',
    "soc-Slashdot0811": 'Slashdot0811',
    "soc-Slashdot0902": 'Slashdot0902',
    "soc-pokec-relationships":'Pokec',
    "com-friendster":'Friendster',
}

In [78]:
files = glob("../../fillresults/*.txt")
rows = []
d = {'0':'Naive', '1':'Sorted'}
for f in files:
    dataset, p, method, K, eps, R, s = splitext(basename(f))[0].split('_')
    dataset = dataset.split('.')[0]
    # print(dataset, p, method, K, eps, R, s)
    try:
        _, rate = open(f).readlines()[-1].split(':')

        row = [dataset, p, int(R), d[s], float(rate)]
        rows.append(row)
    except:
        pass
    # print(row)


In [79]:
df = pd.DataFrame(rows,columns=['Dataset','Setting','R','Approach','Fillrate'])
df = df[df.R==256].drop('R', axis=1)
df = df[df.Setting.isin(['0.005','0.01','0.1'])]
df = df.sort_values(by=['Dataset'])
df.replace({"Dataset": file2name},inplace=True)
df

Unnamed: 0,Dataset,Setting,Approach,Fillrate
0,Amazon,0.01,Naive,0.04
120,Amazon,0.005,Naive,0.04
3,Amazon,0.1,Sorted,0.50
121,Amazon,0.005,Sorted,0.07
2,Amazon,0.1,Naive,0.13
...,...,...,...,...
91,Pokec,0.1,Sorted,0.43
88,Pokec,0.01,Naive,0.04
89,Pokec,0.01,Sorted,0.08
142,Pokec,0.005,Naive,0.03


In [92]:
pivot = df.pivot_table(index=['Dataset'], columns=['Setting','Approach'],margins=True,
               margins_name='Mean',  # defaults to 'All'
               aggfunc=mean).iloc[: , :-1]
# pivot.style.format('{:,.2f}')
# pivot = 
# pivot.style.set_properties(
#     subset=['index'], 
#     **{'font-weight': 'bold'}
# )
latex_str = pivot.to_latex(float_format=lambda x: '{:,.0%}'.format(x),na_rep='-',multirow=True)
splits = latex_str.replace('{l}','{c}').split('\n')
a = splits[7:-3]
b = []
for i in a:
    cells = i.split('&')
    if cells[0]!= 'Mean':
        cells[0] = '\\tt{'+cells[0]+'}'; 
    cells = "&".join(cells)
    b.append(cells)
splits[7:-3] = b
print('\n'.join(splits))



\begin{tabular}{lrrrrrr}
\toprule
{} & \multicolumn{6}{c}{Fillrate} \\
Setting & \multicolumn{2}{c}{0.005} & \multicolumn{2}{c}{0.01} & \multicolumn{2}{c}{0.1} \\
Approach &  Naive   & Sorted & Naive & Sorted & Naive & Sorted \\
Dataset      &          &        &       &        &       &        \\
\midrule
\tt{Amazon       }&    4\%    &   7\%   &   4\%  &  11\%   &  13\%  &  50\%   \\
\tt{DBLP         }&    3\%    &   6\%   &   4\%  &   8\%   &  10\%  &  43\%   \\
\tt{Epinions     }&    3\%    &   6\%   &   4\%  &   8\%   &  10\%  &  43\%   \\
\tt{LiveJournal  }&    3\%    &   6\%   &   4\%  &   8\%   &  10\%  &  44\%   \\
\tt{NetHEP       }&    4\%    &   8\%   &   4\%  &  14\%   &  19\%  &  58\%   \\
\tt{NetPhy       }&    4\%    &   8\%   &   4\%  &  14\%   &  19\%  &  58\%   \\
\tt{Orkut        }&    3\%    &   6\%   &   4\%  &   8\%   &  10\%  &  44\%   \\
\tt{Pokec        }&    3\%    &   6\%   &   4\%  &   8\%   &  10\%  &  43\%   \\
\tt{Slashdot0811 }&    3\%    &   6\%   &   

In [81]:
pivot

Unnamed: 0_level_0,Fillrate,Fillrate,Fillrate,Fillrate,Fillrate,Fillrate
Setting,0.005,0.005,0.01,0.01,0.1,0.1
Approach,Naive,Sorted,Naive,Sorted,Naive,Sorted
Dataset,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
Amazon,0.04,0.07,0.04,0.11,0.13,0.5
DBLP,0.03,0.06,0.04,0.08,0.1,0.43
Epinions,0.03,0.06,0.04,0.08,0.1,0.43
LiveJournal,0.03,0.06,0.04,0.08,0.1,0.44
NetHEP,0.04,0.08,0.04,0.14,0.19,0.58
NetPhy,0.04,0.08,0.04,0.14,0.19,0.58
Orkut,0.03,0.06,0.04,0.08,0.1,0.44
Pokec,0.03,0.06,0.04,0.08,0.1,0.43
Slashdot0811,0.03,0.06,0.04,0.08,0.1,0.43
Slashdot0902,0.03,0.06,0.04,0.08,0.1,0.44


In [86]:
df2 = pd.read_csv('../../report.txt',delimiter=' ',names=['tag','Split','Naive','Sorted'],header=None)
df2['Dataset']=df2.tag.apply(lambda x: x.split('_')[0].split('.')[0])
df2['Setting']=df2.tag.apply(lambda x: x.split('_')[1])
df2

Unnamed: 0,tag,Split,Naive,Sorted,Dataset,Setting
0,amazon0302_0.01,1,0.95,0.96,amazon0302,0.01
1,amazon0302_0.01,2,0.81,0.51,amazon0302,0.01
2,amazon0302_0.01,4,0.61,0.28,amazon0302,0.01
3,amazon0302_0.01,8,0.37,0.16,amazon0302,0.01
4,amazon0302_0.1,1,1.00,1.00,amazon0302,0.1
...,...,...,...,...,...,...
299,soc-Slashdot0811_0.005,8,0.16,0.11,soc-Slashdot0811,0.005
300,soc-Slashdot0902_0.005,1,0.72,0.73,soc-Slashdot0902,0.005
301,soc-Slashdot0902_0.005,2,0.48,0.39,soc-Slashdot0902,0.005
302,soc-Slashdot0902_0.005,4,0.28,0.21,soc-Slashdot0902,0.005


In [89]:
df2 = df2[df2.Split!=1]
df2 = df2[df2.Setting.isin(['0.005','0.01','0.1'])]
df2.replace({"Dataset": file2name},inplace=True)
df2 = df2.sort_values(by=['Dataset'])
pivot2 = df2.drop(['tag'],axis=1).pivot_table(index=['Dataset'], columns=['Setting','Split'],margins=True,
               margins_name='Mean',  # defaults to 'All'
               aggfunc=mean)
pivot2 = pivot2.reorder_levels([1,2,0], axis=1).clip(0,1)
pivot2= pivot2.iloc[: , :-1]
pivot2.sort_index(axis=1, inplace=True)
pivot2


Setting,0.005,0.005,0.005,0.005,0.005,0.005,0.01,0.01,0.01,0.01,0.01,0.01,0.1,0.1,0.1,0.1,0.1,0.1,Mean
Split,2,2,4,4,8,8,2,2,4,4,8,8,2,2,4,4,8,8,Unnamed: 19_level_1
Unnamed: 0_level_2,Naive,Sorted,Naive,Sorted,Naive,Sorted,Naive,Sorted,Naive,Sorted,Naive,Sorted,Naive,Sorted,Naive,Sorted,Naive,Sorted,Naive
Dataset,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3
Amazon,0.58,0.44,0.36,0.23,0.21,0.12,0.81,0.51,0.61,0.28,0.37,0.16,1.0,0.64,1.0,0.52,1.0,0.38,0.66
DBLP,0.48,0.39,0.28,0.21,0.16,0.11,0.73,0.5,0.5,0.27,0.29,0.15,1.0,0.6,1.0,0.46,1.0,0.34,0.604444
Epinions,0.48,0.39,0.28,0.21,0.16,0.11,0.74,0.5,0.5,0.27,0.29,0.15,1.0,0.6,1.0,0.46,1.0,0.34,0.605556
Friendster,0.48,0.39,0.28,0.21,0.16,0.11,0.73,0.5,0.5,0.27,0.29,0.15,1.0,0.6,1.0,0.46,1.0,0.34,0.604444
LiveJournal,0.48,0.39,0.28,0.21,0.16,0.11,0.73,0.5,0.5,0.27,0.29,0.15,1.0,0.6,1.0,0.46,1.0,0.34,0.604444
NetHEP,0.74,0.52,0.51,0.28,0.3,0.15,0.94,0.53,0.79,0.3,0.5,0.18,1.0,0.69,1.0,0.64,1.0,0.45,0.753333
NetPhy,0.74,0.52,0.5,0.28,0.3,0.15,0.94,0.52,0.79,0.3,0.5,0.17,1.0,0.7,1.0,0.64,1.0,0.45,0.752222
Orkut,0.48,0.39,0.28,0.21,0.16,0.11,0.73,0.5,0.5,0.27,0.29,0.15,1.0,0.6,1.0,0.46,1.0,0.34,0.604444
Pokec,0.48,0.39,0.28,0.21,0.16,0.11,0.73,0.5,0.5,0.27,0.29,0.15,1.0,0.6,1.0,0.46,1.0,0.34,0.604444
Slashdot0811,0.48,0.39,0.28,0.21,0.16,0.11,0.73,0.5,0.5,0.27,0.29,0.15,1.0,0.6,1.0,0.46,1.0,0.34,0.604444


In [91]:
latex_str = pivot2.to_latex(float_format=lambda x: '{:,.0%}'.format(x),na_rep='-',multirow=True)
# splits = latex_str.split('\n')
splits = latex_str.replace('{l}','{c}').split('\n')
a = splits[7:-3]
b = []
for i in a:
    cells = i.split('&')
    if cells[0]!= 'Mean':
        cells[0] = '\\tt{'+cells[0]+'}'; 
    cells = "&".join(cells)
    b.append(cells)
splits[7:-3] = b
print('\n'.join(splits))

\begin{tabular}{lrrrrrrrrrrrrrrrrrrr}
\toprule
Setting & \multicolumn{6}{c}{0.005} & \multicolumn{6}{c}{0.01} & \multicolumn{6}{c}{0.1} &  Mean \\
Split & \multicolumn{2}{c}{2} & \multicolumn{2}{c}{4} & \multicolumn{2}{c}{8} & \multicolumn{2}{c}{2} & \multicolumn{2}{c}{4} & \multicolumn{2}{c}{8} & \multicolumn{2}{c}{2} & \multicolumn{2}{c}{4} & \multicolumn{3}{c}{8} \\
{} & Naive & Sorted & Naive & Sorted & Naive & Sorted & Naive & Sorted & Naive & Sorted & Naive & Sorted & Naive & Sorted & Naive & Sorted & Naive & Sorted & Naive \\
Dataset      &       &        &       &        &       &        &       &        &       &        &       &        &       &        &       &        &       &        &       \\
\midrule
\tt{Amazon       }&  58\%  &  44\%   &  36\%  &  23\%   &  21\%  &  12\%   &  81\%  &  51\%   &  61\%  &  28\%   &  37\%  &  16\%   &  100\% &  64\%   &  100\% &  52\%   &  100\% &  38\%   &  66\%  \\
\tt{DBLP         }&  48\%  &  39\%   &  28\%  &  21\%   &  16\%  &  11\%  