In [1]:
import os
import json
import pandas as pd
import re

path = os.path.abspath("/group/ag_abi/seiler/IBF/")
pattern = re.compile(r"(\w+)_(\w+)<(\w+), (\w+)<\d+>, (\w+)>/(\d+)/(\d+)(?:/(\d+)/(\d+)){0,1}")

# Insert Benchmark Evaluation

In [2]:
filename = os.path.join(path, "insert.json")
with open(filename, "r") as f:
    x = json.load(f)

In [3]:
row_list = []
for benchmark in x['benchmarks']:
    row_dict = {}
    [test, spec, alphabet, strategy, bitvector, bins, k, ram, h] = re.match(pattern, benchmark['name']).groups()
    if ram is None:
        ram = int(4**int(k)*int(bins)/1024/1024/8)
    else:
        ram = int(2**int(ram)/1024/1024/8)
    if h is None:
        h = 1  
    time = "{0:,.2f}".format(benchmark['real_time']/10**9/60)
    size = "{0:,}".format(int(benchmark['Size']))
    row_dict['Function'] = test
    row_dict['BD'] = spec
    row_dict['Alphabet'] = alphabet
    row_dict['Strategy'] = strategy
    row_dict['Bitvector'] = bitvector
    row_dict['bins'] = bins
    row_dict['k'] = k
    row_dict['RAM'] = "{0:,}".format(ram)
    row_dict['h'] = h
    row_dict['Time'] = time
    row_dict['Size'] = size
    row_list.append(row_dict)

In [4]:
df = pd.DataFrame(row_list)

In [5]:
df = df[["Function",
         "BD",
         "Alphabet",
         "Strategy",
         "Bitvector",
         "bins",
         "k",
         "h",
         "RAM",
         "Size",
         "Time"]]

In [6]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df)

Unnamed: 0,Function,BD,Alphabet,Strategy,Bitvector,bins,k,h,RAM,Size,Time
0,insertKmer,IBF,Dna,Normal,Uncompressed,64,19,3,1024,1024,16.78
1,insertKmer,IBF,Dna,Normal,Uncompressed,64,19,3,2048,2048,17.0
2,insertKmer,IBF,Dna,Normal,Uncompressed,64,19,3,4096,4096,18.1
3,insertKmer,IBF,Dna,Normal,Uncompressed,64,19,3,8192,8192,20.83
4,insertKmer,IBF,Dna,Normal,Uncompressed,64,19,3,16384,16384,21.31
5,insertKmer,IBF,Dna,Normal,Uncompressed,256,19,3,1024,1024,16.68
6,insertKmer,IBF,Dna,Normal,Uncompressed,256,19,3,2048,2048,16.7
7,insertKmer,IBF,Dna,Normal,Uncompressed,256,19,3,4096,4096,18.11
8,insertKmer,IBF,Dna,Normal,Uncompressed,256,19,3,8192,8192,20.85
9,insertKmer,IBF,Dna,Normal,Uncompressed,256,19,3,16384,16384,21.7


In [7]:
df.to_csv(os.path.join(path, "insertBenchmark.tsv"), sep='\t', index=False)

# Select Benchmark Evaluation

In [8]:
filename = os.path.join(path, "select.json")
with open(filename, "r") as f:
    x = json.load(f)

In [9]:
row_list = []
for benchmark in x['benchmarks']:
    row_dict = {}
    [test, spec, alphabet, strategy, bitvector, bins, k, ram, h] = re.match(pattern, benchmark['name']).groups()
    if ram is None:
        ram = int(4**int(k)*int(bins)/1024/1024/8)
    else:
        ram = int(2**int(ram)/1024/1024/8)
    if h is None:
        h = 1  
    #time = round(benchmark['real_time']/10**9,2)
    #size = int(benchmark['Size'])
    row_dict['Full Time'] = "{0:,.2f}".format(benchmark['fullTime'])
    row_dict['load BD'] = "{0:,.2f}".format(benchmark['loadingTime'])
    row_dict['load Reads'] = "{0:,.2f}".format(benchmark['ioTime'])
    row_dict['sum Select'] = "{0:,.2f}".format(benchmark['selectTime'])
    row_dict['avg Select'] = "{0:,.2f}".format(benchmark['selectTime'] / 32)
    row_dict['Threads'] = 32
    row_dict['TP'] = "{0:,}".format(int(benchmark['TP']))
    row_dict['FN'] = "{0:,}".format(int(benchmark['FN']))
    row_dict['FP'] = "{0:,}".format(int(benchmark['FP']))
    row_dict['P'] = "{0:,}".format(int(benchmark['P']))
    row_dict['readNo'] = "{0:,}".format(int(benchmark['readNo']))
    row_dict['Absolute Verifications'] = "{0:,}".format(int(benchmark['verifications']))
    row_dict['Verifications per read'] = "{0:,.2f}".format(benchmark['Verifications'])
    row_dict['Sensitivity'] = benchmark['Sensitivity']
    row_dict['Precision'] = benchmark['Precision']
    row_dict['FNR'] = "{0:,.2f}".format(benchmark['FNR'])
    row_dict['FDR'] = "{0:,.2f}".format(benchmark['FDR'])
    row_dict['Function'] = test
    row_dict['BD'] = spec
    row_dict['Alphabet'] = alphabet
    row_dict['Strategy'] = strategy
    row_dict['Bitvector'] = bitvector
    row_dict['bins'] = bins
    row_dict['k'] = k
    row_dict['RAM'] = "{0:,}".format(int(ram))
    row_dict['h'] = h
    #row_dict['Time'] = time
    #row_dict['Size'] = size
    row_list.append(row_dict)

In [10]:
df = pd.DataFrame(row_list)

In [11]:
df = df[["Function",
         "BD",
         "Alphabet",
         "Strategy",
         "Bitvector",
         "bins",
         "k",
         "h",
         "RAM",
         "Full Time",
         "load BD",
         "load Reads",
         "sum Select",
         "avg Select",
         "TP",
         "FN",
         "FP",
         "P",
         "readNo",
         "Absolute Verifications",
         "Verifications per read",
         "Sensitivity",
         "Precision",
         "FNR",
         "FDR", 
         ]]

In [12]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df)

Unnamed: 0,Function,BD,Alphabet,Strategy,Bitvector,bins,k,h,RAM,Full Time,load BD,load Reads,sum Select,avg Select,TP,FN,FP,P,readNo,Absolute Verifications,Verifications per read,Sensitivity,Precision,FNR,FDR
0,select,IBF,Dna,Normal,Uncompressed,64,19,3,1024,2.03,0.73,1.24,35.14,1.1,1048576,0,66010749,67059325,1048576,67059325,63.95,1.0,0.015637,0.0,0.98
1,select,IBF,Dna,Normal,Uncompressed,64,19,3,2048,2.43,1.27,1.23,31.5,0.98,1048576,0,16647,1065223,1048576,1065223,1.02,1.0,0.984372,0.0,0.02
2,select,IBF,Dna,Normal,Uncompressed,64,19,3,4096,3.44,2.25,1.23,32.26,1.01,1048576,0,192,1048768,1048576,1048768,1.0,1.0,0.999817,0.0,0.0
3,select,IBF,Dna,Normal,Uncompressed,64,19,3,8192,7.04,5.76,1.14,36.73,1.15,1048576,0,189,1048765,1048576,1048765,1.0,1.0,0.99982,0.0,0.0
4,select,IBF,Dna,Normal,Uncompressed,64,19,3,16384,14.14,12.59,1.16,43.9,1.37,1048576,0,189,1048765,1048576,1048765,1.0,1.0,0.99982,0.0,0.0
5,select,IBF,Dna,Normal,Uncompressed,256,19,3,1024,3.14,0.71,1.35,70.55,2.2,1048576,0,267188388,268236964,1048576,268236964,255.81,1.0,0.003909,0.0,1.0
6,select,IBF,Dna,Normal,Uncompressed,256,19,3,2048,3.13,1.22,1.24,54.36,1.7,1048576,0,66925,1115501,1048576,1115501,1.06,1.0,0.940005,0.0,0.06
7,select,IBF,Dna,Normal,Uncompressed,256,19,3,4096,4.41,2.55,1.27,52.06,1.63,1048576,0,2041,1050617,1048576,1050617,1.0,1.0,0.998057,0.0,0.0
8,select,IBF,Dna,Normal,Uncompressed,256,19,3,8192,6.96,5.02,1.21,54.5,1.7,1048576,0,2038,1050614,1048576,1050614,1.0,1.0,0.99806,0.0,0.0
9,select,IBF,Dna,Normal,Uncompressed,256,19,3,16384,13.24,11.13,1.15,59.56,1.86,1048576,0,2040,1050616,1048576,1050616,1.0,1.0,0.998058,0.0,0.0


In [13]:
df.to_csv(os.path.join(path, "selectBenchmark.tsv"), sep='\t', index=False)