In [1]:
import os
import json
import pandas as pd
import re

path = os.path.abspath("/group/ag_abi/seiler/IBF/")
pattern = re.compile(r"(\w+)_(\w+)<(\w+), (\w+)<\d+>, (\w+)(?:, Chunks<(\d+)>){0,1}>/(\d+)/(\d+)(?:/(\d+)/(\d+)){0,1}")

# Insert Benchmark Evaluation

In [14]:
filename = os.path.join(path, "insertCompChunk.json")
with open(filename, "r") as f:
    x = json.load(f)

In [15]:
row_list = []
for benchmark in x['benchmarks']:
    row_dict = {}
    [test, spec, alphabet, strategy, bitvector, chunks, bins, k, ram, h] = re.match(pattern, benchmark['name']).groups()
    if ram is None:
        ram = int(4**int(k)*int(bins)/1024/1024/8)
    else:
        ram = int(2**int(ram)/1024/1024/8)
    if h is None:
        h = 1  
    time = "{0:,.2f}".format(benchmark['real_time']/10**9/60)
    size = "{0:,}".format(int(benchmark['Size']))
    row_dict['Function'] = test
    row_dict['BD'] = spec
    row_dict['Alphabet'] = alphabet
    row_dict['Strategy'] = strategy
    row_dict['Bitvector'] = bitvector
    row_dict['Chunks'] = chunks
    row_dict['bins'] = bins
    row_dict['k'] = k
    row_dict['RAM'] = "{0:,}".format(ram)
    row_dict['h'] = h
    row_dict['Time'] = time
    row_dict['Size'] = size
    row_list.append(row_dict)

In [16]:
df = pd.DataFrame(row_list)

In [17]:
df = df[["Function",
         "BD",
         "Alphabet",
         "Strategy",
         "Bitvector",
         "Chunks",
         "bins",
         "k",
         "h",
         "RAM",
         "Size",
         "Time"]]

In [18]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df)

Unnamed: 0,Function,BD,Alphabet,Strategy,Bitvector,Chunks,bins,k,h,RAM,Size,Time
0,insertKmer,IBF,Dna,Normal,CompressedDisk,1,64,19,3,16384,10222,31.77
1,insertKmer,IBF,Dna,Normal,CompressedDisk,1,256,19,3,16384,10224,32.14
2,insertKmer,IBF,Dna,Normal,CompressedDisk,1,1024,19,3,16384,10221,30.56
3,insertKmer,IBF,Dna,Normal,CompressedDisk,1,8192,19,3,16384,10219,61.3
4,insertKmer,IBF,Dna,Normal,CompressedDisk,2,64,19,3,16384,10220,33.86
5,insertKmer,IBF,Dna,Normal,CompressedDisk,2,256,19,3,16384,10222,34.14
6,insertKmer,IBF,Dna,Normal,CompressedDisk,2,1024,19,3,16384,10220,32.83
7,insertKmer,IBF,Dna,Normal,CompressedDisk,2,8192,19,3,16384,10217,53.5
8,insertKmer,IBF,Dna,Normal,CompressedDisk,4,64,19,3,16384,10220,39.95
9,insertKmer,IBF,Dna,Normal,CompressedDisk,4,256,19,3,16384,10222,43.28


In [19]:
df.to_csv(os.path.join(path, "insertCompChunkBenchmark.tsv"), sep='\t', index=False)

# Select Benchmark Evaluation

In [20]:
filename = os.path.join(path, "selectCompChunk.json")
with open(filename, "r") as f:
    x = json.load(f)

In [21]:
row_list = []
for benchmark in x['benchmarks']:
    row_dict = {}
    [test, spec, alphabet, strategy, bitvector, chunks, bins, k, ram, h] = re.match(pattern, benchmark['name']).groups()
    if ram is None:
        ram = int(4**int(k)*int(bins)/1024/1024/8)
    else:
        ram = int(2**int(ram)/1024/1024/8)
    if h is None:
        h = 1  
    #time = round(benchmark['real_time']/10**9,2)
    #size = int(benchmark['Size'])
    row_dict['Full Time'] = "{0:,.2f}".format(benchmark['fullTime'])
    row_dict['load BD'] = "{0:,.2f}".format(benchmark['loadingTime'])
    row_dict['load Reads'] = "{0:,.2f}".format(benchmark['ioTime'])
    row_dict['sum Select'] = "{0:,.2f}".format(benchmark['selectTime'])
    row_dict['avg Select'] = "{0:,.2f}".format(benchmark['selectTime'] / 32)
    row_dict['Threads'] = 32
    row_dict['TP'] = "{0:,}".format(int(benchmark['TP']))
    row_dict['FN'] = "{0:,}".format(int(benchmark['FN']))
    row_dict['FP'] = "{0:,}".format(int(benchmark['FP']))
    row_dict['P'] = "{0:,}".format(int(benchmark['P']))
    row_dict['readNo'] = "{0:,}".format(int(benchmark['readNo']))
    row_dict['Absolute Verifications'] = "{0:,}".format(int(benchmark['verifications']))
    row_dict['Verifications per read'] = "{0:,.2f}".format(benchmark['Verifications'])
    row_dict['Sensitivity'] = benchmark['Sensitivity']
    row_dict['Precision'] = benchmark['Precision']
    row_dict['FNR'] = "{0:,.2f}".format(benchmark['FNR'])
    row_dict['FDR'] = "{0:,.2f}".format(benchmark['FDR'])
    row_dict['Function'] = test
    row_dict['BD'] = spec
    row_dict['Alphabet'] = alphabet
    row_dict['Strategy'] = strategy
    row_dict['Bitvector'] = bitvector
    row_dict['Chunks'] = chunks
    row_dict['bins'] = bins
    row_dict['k'] = k
    row_dict['RAM'] = "{0:,}".format(int(ram))
    row_dict['h'] = h
    #row_dict['Time'] = time
    #row_dict['Size'] = size
    row_list.append(row_dict)

In [22]:
df = pd.DataFrame(row_list)

In [23]:
df = df[["Function",
         "BD",
         "Alphabet",
         "Strategy",
         "Bitvector",
         "Chunks",
         "bins",
         "k",
         "h",
         "RAM",
         "Full Time",
         "load BD",
         "load Reads",
         "sum Select",
         "avg Select",
         "TP",
         "FN",
         "FP",
         "P",
         "readNo",
         "Absolute Verifications",
         "Verifications per read",
         "Sensitivity",
         "Precision",
         "FNR",
         "FDR", 
         ]]

In [24]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df)

Unnamed: 0,Function,BD,Alphabet,Strategy,Bitvector,Chunks,bins,k,h,RAM,Full Time,load BD,load Reads,sum Select,avg Select,TP,FN,FP,P,readNo,Absolute Verifications,Verifications per read,Sensitivity,Precision,FNR,FDR
0,select,IBFChunked,Dna,Normal,CompressedDisk,1,64,19,3,16384,231.46,222.48,1.87,243.44,7.61,1048576,0,189,1048765,1048576,1048765,1.0,1.0,0.99982,0.0,0.0
1,select,IBFChunked,Dna,Normal,CompressedDisk,1,256,19,3,16384,240.06,226.17,1.8,382.88,11.96,1048576,0,2040,1050616,1048576,1050616,1.0,1.0,0.998058,0.0,0.0
2,select,IBFChunked,Dna,Normal,CompressedDisk,1,1024,19,3,16384,255.02,219.77,1.87,945.07,29.53,1048576,0,0,1048576,1048576,1048576,1.0,1.0,1.0,0.0,0.0
3,select,IBFChunked,Dna,Normal,CompressedDisk,1,8192,19,3,16384,473.04,228.3,2.05,6308.82,197.15,1048576,0,0,1048576,1048576,1048576,1.0,1.0,1.0,0.0,0.0
4,select,IBFChunked,Dna,Normal,CompressedDisk,2,64,19,3,16384,243.49,235.28,3.38,238.02,7.44,1048576,0,189,1048765,1048576,1048765,1.0,1.0,0.99982,0.0,0.0
5,select,IBFChunked,Dna,Normal,CompressedDisk,2,256,19,3,16384,243.46,229.44,3.48,392.28,12.26,1048576,0,2040,1050616,1048576,1050616,1.0,1.0,0.998058,0.0,0.0
6,select,IBFChunked,Dna,Normal,CompressedDisk,2,1024,19,3,16384,262.47,226.16,3.65,1024.16,32.01,1048576,0,0,1048576,1048576,1048576,1.0,1.0,1.0,0.0,0.0
7,select,IBFChunked,Dna,Normal,CompressedDisk,2,8192,19,3,16384,471.91,227.95,3.98,6946.82,217.09,1048576,0,0,1048576,1048576,1048576,1.0,1.0,1.0,0.0,0.0
8,select,IBFChunked,Dna,Normal,CompressedDisk,4,64,19,3,16384,241.78,233.02,6.47,250.9,7.84,1048576,0,189,1048765,1048576,1048765,1.0,1.0,0.99982,0.0,0.0
9,select,IBFChunked,Dna,Normal,CompressedDisk,4,256,19,3,16384,246.02,230.72,6.74,432.76,13.52,1048576,0,2040,1050616,1048576,1050616,1.0,1.0,0.998058,0.0,0.0


In [25]:
df.to_csv(os.path.join(path, "selectCompChunkBenchmark.tsv"), sep='\t', index=False)