In [1]:
#%reset
import pandas as pd
from pandas import DataFrame, Series
#false positive warnings all the time:
pd.options.mode.chained_assignment = None

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

import numpy as np
import statistics

import os.path

from datetime import datetime


In [2]:
import re

#source: http://stackoverflow.com/questions/5917082/regular-expression-to-match-numbers-with-or-without-commas-and-decimals-in-text
regex_number_with_commas = '(\d+|\d{1,3}(,\d{3})*)(\.\d+)?'
regex_decimal_number = '(\d+\.)?\d+'


def removeCommasInteger(i):
    return int(i.replace(",",""))

#Thanks to: https://regex101.com/ for testing the regexes live!

# 1. Simulation identifiers and paths

Simulation is identified as a **tuple**: (Store, Number of nodes, Amount of RAM, Dataset, Config, Additional info field)

In [3]:
example_tuple = ("Fuseki", 1, 64, "Ontoforce", "Default", "")

## Helper functions

* simulation tuple -> long_identifier
* simulation tuple -> short_identifier
* simulation tuple -> path to log file


In [4]:
def generateSimulationID(tup):
    identifier = tup[0] \
            + "_N" + str(tup[1]) \
            + "_" + str(tup[2]) \
            + "_" + tup[3] \
            + "_" + tup[4] \
    
    extra = ""
    if len(tup[5]) !=0:
        extra = "_" + str(tup[5])
    
    return identifier+extra


simulation_map = {
    "Watdiv10M": "W10", "Watdiv100M": "W100", "Watdiv1000M": "W1000", "Ontoforce": "Ont"
}

def generateSimulationIDCompact(tup, pref_length=3):
    #pref_length = 3
    identifier = tup[0][:pref_length] \
            + "_N" + str(tup[1]) \
            + "_" + str(tup[2]) \
            + "_" + simulation_map[tup[3]] \
            + "_" + str(tup[4])[:pref_length] \
    
    extra = ""
    if len(tup[5]) !=0:
        extra = "_" + str(tup[5])
    
    return identifier+extra

def generateFullPathErrorLog(path, tup):
    return path + tup[0] + '/' +  generateSimulationID(tup) + "_output.log"

### TEST

In [5]:
home_path = "./"

print(generateSimulationID(example_tuple))
print(generateSimulationIDCompact(example_tuple))
sim_identifier = generateSimulationIDCompact(example_tuple)
full_path = generateFullPathErrorLog(home_path,example_tuple)
print(full_path)
print('Log file found? ' + str(os.path.isfile(full_path))) 

Fuseki_N1_64_Ontoforce_Default
Fus_N1_64_Ont_Def
./Fuseki/Fuseki_N1_64_Ontoforce_Default_output.log
Log file found? True


# 2. Convert log file into csv document

Fields:

- query_order: the index of the query
- query_name
- threadID
- number of results
- runtime
- simulation_id

## 2.1 Extract Query Thread IDs

In [6]:
def getIDsWarmupThreads(path):

    queryThreadList = []
    querying_started = False

    with open(path) as f:
        for line in f:

            if 'Running Warmups...' in line:
                querying_started = True
            
            if 'Running Benchmarks...' in line:
                break
                
            if not querying_started:
                continue

            
            if 'Running Operation ' in line:
                m = re.search('Thread (\d+)',line)
                queryThreadList.append(m.group(1))

    return list(set(queryThreadList))

def getIDsQueryThreads(path):
    queryThreadList = []
    querying_started = False
    
    with open(path) as f:
        for line in f:
            
            if 'Running Benchmarks...' in line:
                querying_started = True
            
            #some of the log files are appended versions of multiple benchmark runs
            if querying_started and 'Running Warmups...' in line:
                break
                
            if not querying_started:
                continue
                
            if 'Running Operation ' in line:
                m = re.search('Thread (\d+)',line)
                queryThreadList.append(m.group(1))

    return list(set(queryThreadList))

### TEST

In [7]:
ids_warmupthreads = getIDsWarmupThreads(full_path)
print(ids_warmupthreads)
ids_querythreads = getIDsQueryThreads(full_path)
print(ids_querythreads)

['1']
['537', '538', '536', '533', '535']


## 2.2 Extract logs per queryID

In [8]:
def getLogLinesOfWThread(qid, path):
    lines = []
    querying_started = False
    
    c = 0
    c_start = 0
    
    with open(path) as f:
        for line in f:
            c+=1
            
            if 'Running Warmups...' in line:
                c_start = c
                querying_started = True
            
            
            if 'Running Benchmarks...' in line:
                querying_started = False
                
                break
              
            if not querying_started:
                continue
            
            if ('Thread ' + str(qid) + "]") in line:
                
                if 'Running Operation' in line:
                    if '...[Thread' in line: #loglines accidentally concatenated
                        line1 = line[:line.find('...[Thread') +3]
                        line2 = line[line.find('...[Thread') +3:]
                        lines.append(line1.strip())
                        lines.append(line2.strip())
                        
                        #print(line1)
                        #print(line2)
                    else:
                        lines.append(line.strip())
                elif 'result(s) in' in line:
                    lines.append(line.strip())
                elif 'got error' in line:
                    lines.append(line.strip())
                    
    #print("Loglines: " + str(c_start) + " -> " + str(c))
    return lines

def getLogLinesOfQThread(qid, path):
    lines = []
    querying_started = False

    c = 0
    c_start = 0
    
    with open(path) as f:
        for line in f:
            c+=1
            
            if 'Running Benchmarks...' in line:
                querying_started = True
                c_start = c
            
            #some of the log files are appended versions of multiple benchmark runs
            if querying_started and 'Running Warmups...' in line:
                break
                
            if not querying_started:
                continue
            
            if ('Thread ' + str(qid) + "]") in line:
                
                if 'Running Operation' in line:
                    if '...[Thread' in line: #loglines accidentally concatenated
                        line1 = line[:line.find('...[Thread') +3]
                        line2 = line[line.find('...[Thread') +3:]
                        lines.append(line1.strip())
                        lines.append(line2.strip())
                        
                        #print(line1)
                        #print(line2)
                    else:
                        lines.append(line.strip())
                elif 'result(s) in' in line:
                    lines.append(line.strip())
                elif 'got error' in line:
                    lines.append(line.strip())

    #print("Loglines: " + str(c_start) + " -> " + str(c))

    return lines

### TEST

In [9]:
for tid in ids_warmupthreads:
    print(str(tid) + ":\t" + str(len(getLogLinesOfWThread(tid, full_path))))
for tid in ids_querythreads:
    print(str(tid) + ":\t" + str(len(getLogLinesOfQThread(tid, full_path))))    
    
lines = getLogLinesOfWThread(1, full_path)


1:	2446
537:	575
538:	571
536:	572
533:	573
535:	571


## 2.3 Convert logs per thread into query information tuples

Log lines consist of a line stating a query is executed (contains query name) and a second line stating whether the query succeeded, the number of results and the runtime. Note that these lines are with the same thread and not consecutively in the log file!

In [10]:
def generateTupleFromResultsLine(line, sim_id, query_name, thread_id, thread_type, order_id):
    
    regex_results = 'got (' + regex_number_with_commas + ') result'
    regex_seconds = 'in ('+ regex_decimal_number + ')s$'
    
    m = re.search(regex_results, line)
    number_of_results = removeCommasInteger(m.group(1))
    m = re.search(regex_seconds, line)
    number_of_seconds = float(m.group(1))
    
    return (sim_id, query_name, thread_id, thread_type, order_id, number_of_results, number_of_seconds, 'SUCCESS')

In [11]:
example_line = lines[15]
print(example_line)
print(generateTupleFromResultsLine(example_line, generateSimulationIDCompact(example_tuple), \
                                  'query_name', 1, 'warmup', 0))

2016-09-16 08:02:42,803 - Logger - INFO - STATUS: [Thread 1] got 1 result(s) in 0.011952997s
('Fus_N1_64_Ont_Def', 'query_name', 1, 'warmup', 0, 1, 0.011952997, 'SUCCESS')


In [12]:
def generateQueryEventList(full_path, tid, thread_type, queryPath, timeout, sim_id):
    resultList = []
    
    lines = None
    if thread_type == 'warmup':
        lines = getLogLinesOfWThread(tid, full_path)
    else :
        lines = getLogLinesOfQThread(tid, full_path)
    
    readResults = False
    query = None
    c = 0
    
    for l in lines:
        
   
        if readResults:
            

            if 'result(s) in ' in l:
                
                tup = generateTupleFromResultsLine(l, sim_id, query, tid, thread_type, c)
                resultList.append(tup)
                

            elif 'got error' in l:

                if 'Operation Callable' in l:

                    tup = (sim_id, query, tid, thread_type, c, -1, timeout, 'TIMEOUT')
                    resultList.append(tup)
                    
                else:
                    #print(l)
                    tup = (sim_id, query, tid, thread_type, c, -1, -1, 'ERROR')
                    resultList.append(tup)                                    
            else:

                #if next line is also a 'Running Operation' then we also flag as error
                tup = (sim_id, query, tid, thread_type, c, -1, -1, 'ERROR')
                resultList.append(tup)                                    


            readResults = False

        if 'Running Operation' in l:

            p = l.find(queryPath)
            p_end = l.find('...')
            
            if p>=0: 
                readResults = True
                query = str(l[p+len(queryPath):-3])
                #print(query)
                c+=1
    
    return resultList

### TEST

In [13]:
eventlist = generateQueryEventList(full_path, 1, 'warmup', "templated/", 300, sim_identifier)
c=0
for e in eventlist:
   
    print(e)
    c+=1
    if c==10:
        break

('Fus_N1_64_Ont_Def', 'queries/xhf', 1, 'warmup', 1, 0, 0.281765686, 'SUCCESS')
('Fus_N1_64_Ont_Def', 'queries/xzany', 1, 'warmup', 2, -1, 300, 'TIMEOUT')
('Fus_N1_64_Ont_Def', 'queries/xrr', 1, 'warmup', 3, -1, 300, 'TIMEOUT')
('Fus_N1_64_Ont_Def', 'queries/xzark', 1, 'warmup', 4, -1, 300, 'TIMEOUT')
('Fus_N1_64_Ont_Def', 'queries/xzape', 1, 'warmup', 5, -1, 300, 'TIMEOUT')
('Fus_N1_64_Ont_Def', 'queries/xzack', 1, 'warmup', 6, -1, 300, 'TIMEOUT')
('Fus_N1_64_Ont_Def', 'queries/xuj', 1, 'warmup', 7, 0, 0.017050155, 'SUCCESS')
('Fus_N1_64_Ont_Def', 'queries/xzauh', 1, 'warmup', 8, 1, 0.011952997, 'SUCCESS')
('Fus_N1_64_Ont_Def', 'queries/xwd', 1, 'warmup', 9, 146, 0.910000413, 'SUCCESS')
('Fus_N1_64_Ont_Def', 'queries/xzajc', 1, 'warmup', 10, 0, 0.010266175, 'SUCCESS')


## 2.4 Create dataframe with tuples of 1 thread

In [14]:
#WARNING: don't append row by row, it's very slow, internally creates a new dataframe since dataframes are immutable
# speed went from +3 minutes -> subsecond!!

def queryEventsToDataframe(queryEventList):
    
    infomaps = []
    c=0
    for tup in queryEventList:
        
        infomap = {}
        infomap['sim_id'] = tup[0]
        infomap['query_name'] = tup[1]
        infomap['thread_id'] = int(tup[2])
        infomap['thread_type'] = tup[3]
        infomap['order_id'] = int(tup[4])
        infomap['number_of_results'] = int(tup[5])
        infomap['runtime'] = tup[6]
        infomap['flag'] = tup[7]
        row = pd.Series(infomap)
        #df.loc[c] = row
        c+=1
        infomaps.append(row)
        
        #df.convert_objects(convert_numeric=True)
        
    df = pd.concat(infomaps, axis=1).T
    df = df[['sim_id', 'query_name', 'thread_id', 'thread_type', 'order_id', 'number_of_results', \
                               'runtime', 'flag']]
    return df

### TEST

In [15]:
df = queryEventsToDataframe(eventlist)
df.head(n=5)

Unnamed: 0,sim_id,query_name,thread_id,thread_type,order_id,number_of_results,runtime,flag
0,Fus_N1_64_Ont_Def,queries/xhf,1,warmup,1,0,0.281766,SUCCESS
1,Fus_N1_64_Ont_Def,queries/xzany,1,warmup,2,-1,300.0,TIMEOUT
2,Fus_N1_64_Ont_Def,queries/xrr,1,warmup,3,-1,300.0,TIMEOUT
3,Fus_N1_64_Ont_Def,queries/xzark,1,warmup,4,-1,300.0,TIMEOUT
4,Fus_N1_64_Ont_Def,queries/xzape,1,warmup,5,-1,300.0,TIMEOUT


## 2.5 Merge dataframes of all threads

In [16]:
def generateQueryEventDataframe(bm_tuple, home_path, query_path, timeout):
    
    path_logfile = generateFullPathErrorLog(home_path, bm_tuple)
    sim_id = generateSimulationIDCompact(bm_tuple)
    print("PROCESSING SIM_ID: " + sim_id)
    print(path_logfile)
    
    found = os.path.isfile(path_logfile)
    print('Log file found? ' + str(found))
    
    if found is False:
        return False
    
    warmup_t = getIDsWarmupThreads(path_logfile)
    query_t = getIDsQueryThreads(path_logfile)

    print(warmup_t)
    print(query_t)
    
    df_tot = None
    
    for tid in warmup_t:
        #loglines = getLogLinesOfThread(tid, path_logfile)
        #print("WThread "+str(tid)+":\t" + str(len(loglines)) + " loglines found")
        eventlist = generateQueryEventList(path_logfile, tid, 'warmup', query_path, timeout, sim_id)
        print("WThread " + str(tid) + ": " + str(len(eventlist)) + " query events found")
        
        #TODO remove
        print(eventlist[0])
        
        if df_tot is not None:
            df = queryEventsToDataframe(eventlist)
            df_tot = pd.concat([df_tot, df])
        else:
            df_tot = queryEventsToDataframe(eventlist)
            
    for tid in query_t:
        #loglines = getLogLinesOfThread(tid, path_logfile)  
        #print("QThread "+str(tid)+":\t" + str(len(loglines)) + " loglines found")
        eventlist = generateQueryEventList(path_logfile, tid, 'stress', query_path, timeout, sim_id)
        print("QThreads " + str(tid)+ ": " + str(len(eventlist)) + " query events found")
        
        if df_tot is not None:
            df = queryEventsToDataframe(eventlist)
            df_tot = pd.concat([df_tot, df])
        else:
            df_tot = queryEventsToDataframe(eventlist)
            

    
    return df_tot
    

### TEST

In [17]:
df_tot = generateQueryEventDataframe(example_tuple, './', 'templated/', 300)
df_tot.shape

PROCESSING SIM_ID: Fus_N1_64_Ont_Def
./Fuseki/Fuseki_N1_64_Ontoforce_Default_output.log
Log file found? True
['1']
['537', '538', '536', '533', '535']
WThread 1: 1223 query events found
('Fus_N1_64_Ont_Def', 'queries/xhf', '1', 'warmup', 1, 0, 0.281765686, 'SUCCESS')
QThreads 537: 287 query events found
QThreads 538: 285 query events found
QThreads 536: 286 query events found
QThreads 533: 286 query events found
QThreads 535: 285 query events found


(2652, 8)

In [18]:
df_tot.head(n=5)

Unnamed: 0,sim_id,query_name,thread_id,thread_type,order_id,number_of_results,runtime,flag
0,Fus_N1_64_Ont_Def,queries/xhf,1,warmup,1,0,0.281766,SUCCESS
1,Fus_N1_64_Ont_Def,queries/xzany,1,warmup,2,-1,300.0,TIMEOUT
2,Fus_N1_64_Ont_Def,queries/xrr,1,warmup,3,-1,300.0,TIMEOUT
3,Fus_N1_64_Ont_Def,queries/xzark,1,warmup,4,-1,300.0,TIMEOUT
4,Fus_N1_64_Ont_Def,queries/xzape,1,warmup,5,-1,300.0,TIMEOUT


## 2.6 Save to CSV 

In [19]:
filename = sim_identifier + "_queryevents.csv"
df_tot.to_csv('./csv/'+filename, sep="\t", header=True, index=False)

# 3 Convert all simulation data to csv

## A. Watdiv (10M, 100M, 1000M)


In [20]:
#(1, 32, "Watdiv10M", "Default", ""), \

#(1, 32, "Watdiv100M", "Default", ""), \
#(1, 64, "Watdiv100M", "Default", ""), \
#(3, 64, "Watdiv100M", "Default", ""), \
#
#(1, 32, "Watdiv1000M", "Default", ""), \
#(1, 64, "Watdiv1000M", "Default", ""), \
#(1, 64, "Watdiv1000M", "Optimized", "")]    
#(3, 32, "Watdiv1000M", "Optimized", "")]    

### 1. Blazegraph

In [21]:
bm_tuples = [ ("Blazegraph", 1, 32, "Watdiv10M", "Default", ""), \
("Blazegraph", 1, 32, "Watdiv100M", "Default", ""), \
("Blazegraph", 1, 32, "Watdiv1000M", "Default", ""), \
("Blazegraph", 1, 64, "Watdiv1000M", "Default", ""), \
("Blazegraph", 1, 64, "Watdiv1000M", "Optimized", "")]

for bm_tuple in bm_tuples:
    
    print(str(datetime.now()))
    
    df = generateQueryEventDataframe(bm_tuple, './', 'templated/', 300)

    if df is not False:
        filename = generateSimulationID(bm_tuple)+ "_queryevents.csv"
        df.to_csv('./csv/'+filename, sep="\t", header=True, index=False)
print(str(datetime.now()))

2018-09-19 14:40:44.174321
PROCESSING SIM_ID: Bla_N1_32_W10_Def
./Blazegraph/Blazegraph_N1_32_Watdiv10M_Default_output.log
Log file found? True
['1']
['17', '22', '19', '21', '18', '23', '24', '20']
WThread 1: 6000 query events found
('Bla_N1_32_W10_Def', 'L3/L3_split67.sparql', '1', 'warmup', 1, 20, 0.029128175, 'SUCCESS')
QThreads 17: 2000 query events found
QThreads 22: 2000 query events found
QThreads 19: 2000 query events found
QThreads 21: 2000 query events found
QThreads 18: 4000 query events found
QThreads 23: 2000 query events found
QThreads 24: 4000 query events found
QThreads 20: 2000 query events found
2018-09-19 14:40:50.869276
PROCESSING SIM_ID: Bla_N1_32_W100_Def
./Blazegraph/Blazegraph_N1_32_Watdiv100M_Default_output.log
Log file found? True
['1']
['17', '22', '19', '21', '18', '23', '24', '20']
WThread 1: 6000 query events found
('Bla_N1_32_W100_Def', 'C1/C1_split28.sparql', '1', 'warmup', 1, 201, 4.508388634, 'SUCCESS')
QThreads 17: 2000 query events found
QThreads 22

### 2. GraphDB

In [22]:
bm_tuples = [ ("GraphDB", 1, 32, "Watdiv10M", "Default", ""), \
("GraphDB", 1, 32, "Watdiv100M", "Default", ""), \
("GraphDB", 1, 32, "Watdiv1000M", "Default", ""), \
("GraphDB", 1, 64, "Watdiv1000M", "Default", ""), \
("GraphDB", 1, 64, "Watdiv1000M", "Optimized", "")]

for bm_tuple in bm_tuples:
    
    print(str(datetime.now()))
    
    df = generateQueryEventDataframe(bm_tuple, './', 'templated/', 300)
    
    if df is not False:
        filename = generateSimulationID(bm_tuple)+ "_queryevents.csv"
        df.to_csv('./csv/'+filename, sep="\t", header=True, index=False)

print(str(datetime.now()))

2018-09-19 14:41:00.433539
PROCESSING SIM_ID: Gra_N1_32_W10_Def
./GraphDB/GraphDB_N1_32_Watdiv10M_Default_output.log
Log file found? True
['1']
['17', '22', '19', '21', '18', '23', '24', '20']
WThread 1: 6000 query events found
('Gra_N1_32_W10_Def', 'S3/S3_split31.sparql', '1', 'warmup', 1, 0, 0.039665247, 'SUCCESS')
QThreads 17: 2000 query events found
QThreads 22: 4000 query events found
QThreads 19: 2000 query events found
QThreads 21: 4000 query events found
QThreads 18: 2000 query events found
QThreads 23: 2000 query events found
QThreads 24: 2000 query events found
QThreads 20: 2000 query events found
2018-09-19 14:41:06.712003
PROCESSING SIM_ID: Gra_N1_32_W100_Def
./GraphDB/GraphDB_N1_32_Watdiv100M_Default_output.log
Log file found? True
['1']
['17', '22', '19', '21', '23', '25', '24', '20']
WThread 1: 6000 query events found
('Gra_N1_32_W100_Def', 'L2/L2_split77.sparql', '1', 'warmup', 1, 78, 0.144596859, 'SUCCESS')
QThreads 17: 2000 query events found
QThreads 22: 4000 query e

### 3. ES

In [23]:
bm_tuples = [ ("ES", 1, 32, "Watdiv10M", "Default", ""), \
("ES", 1, 32, "Watdiv100M", "Default", ""), \
("ES", 1, 32, "Watdiv1000M", "Default", ""), \
("ES", 3, 32, "Watdiv1000M", "Default", ""), \
("ES", 1, 64, "Watdiv1000M", "Default", "")]

for bm_tuple in bm_tuples:
    
    print(str(datetime.now()))
    
    df = generateQueryEventDataframe(bm_tuple, './', 'templated/', 300)
    
    if df is not False:
        filename = generateSimulationID(bm_tuple)+ "_queryevents.csv"
        df.to_csv('./csv/'+filename, sep="\t", header=True, index=False)

print(str(datetime.now()))

2018-09-19 14:41:13.750162
PROCESSING SIM_ID: ES_N1_32_W10_Def
./ES/ES_N1_32_Watdiv10M_Default_output.log
Log file found? True
['1']
['17', '22', '19', '21', '23', '25', '24', '20']
WThread 1: 6000 query events found
('ES_N1_32_W10_Def', 'S6/S6_split75.sparql', '1', 'warmup', 1, 3, 0.074037429, 'SUCCESS')
QThreads 17: 2000 query events found
QThreads 22: 2000 query events found
QThreads 19: 2000 query events found
QThreads 21: 4000 query events found
QThreads 23: 2000 query events found
QThreads 25: 4000 query events found
QThreads 24: 2000 query events found
QThreads 20: 2000 query events found
2018-09-19 14:41:19.781496
PROCESSING SIM_ID: ES_N1_32_W100_Def
./ES/ES_N1_32_Watdiv100M_Default_output.log
Log file found? False
2018-09-19 14:41:19.782653
PROCESSING SIM_ID: ES_N1_32_W1000_Def
./ES/ES_N1_32_Watdiv1000M_Default_output.log
Log file found? True
['1']
['1', '16']
WThread 1: 5674 query events found
('ES_N1_32_W1000_Def', 'S4/S4_split98.sparql', '1', 'warmup', 1, 3062, 68.098128339

### 4. Virtuoso

In [24]:
bm_tuples = [ ("Virtuoso", 1, 32, "Watdiv10M", "Default", ""), \
("Virtuoso", 1, 32, "Watdiv100M", "Default", ""), \
("Virtuoso", 1, 32, "Watdiv1000M", "Default", ""), \
("Virtuoso", 1, 32, "Watdiv1000M", "Default", "RERUN"), \
("Virtuoso", 3, 32, "Watdiv1000M", "Default", ""), \
("Virtuoso", 1, 64, "Watdiv1000M", "Default", ""), \
("Virtuoso", 1, 64, "Watdiv1000M", "Optimized", "")]

for bm_tuple in bm_tuples:
    
    print(str(datetime.now()))
    
    df = generateQueryEventDataframe(bm_tuple, './', 'templated/', 300)
    
    if df is not False:
        filename = generateSimulationID(bm_tuple)+ "_queryevents.csv"
        df.to_csv('./csv/'+filename, sep="\t", header=True, index=False)

print(str(datetime.now()))

2018-09-19 14:41:23.524747
PROCESSING SIM_ID: Vir_N1_32_W10_Def
./Virtuoso/Virtuoso_N1_32_Watdiv10M_Default_output.log
Log file found? True
['1']
['17', '22', '19', '18', '21', '25', '33', '24', '20']
WThread 1: 6000 query events found
('Vir_N1_32_W10_Def', 'S5/S5_split59.sparql', '1', 'warmup', 1, 0, 0.009704654, 'SUCCESS')
QThreads 17: 2000 query events found
QThreads 22: 2000 query events found
QThreads 19: 2000 query events found
QThreads 18: 2000 query events found
QThreads 21: 2000 query events found
QThreads 25: 4000 query events found
QThreads 33: 2000 query events found
QThreads 24: 2000 query events found
QThreads 20: 2000 query events found
2018-09-19 14:41:29.558572
PROCESSING SIM_ID: Vir_N1_32_W100_Def
./Virtuoso/Virtuoso_N1_32_Watdiv100M_Default_output.log
Log file found? True
['1']
['17', '22', '19', '21', '18', '23', '24', '20']
WThread 1: 6000 query events found
('Vir_N1_32_W100_Def', 'F3/F3_split99.sparql', '1', 'warmup', 1, 7, 0.018153995, 'SUCCESS')
QThreads 17: 400

### 5. Fuseki

In [25]:
bm_tuples = [ ("Fuseki", 1, 64, "Watdiv100M", "Default", ""), \
("Fuseki", 1, 64, "Watdiv1000M", "Default", "")]

for bm_tuple in bm_tuples:
    
    print(str(datetime.now()))
    
    df = generateQueryEventDataframe(bm_tuple, './', 'templated/', 300)
    
    if df is not False:
        filename = generateSimulationID(bm_tuple)+ "_queryevents.csv"
        df.to_csv('./csv/'+filename, sep="\t", header=True, index=False)

print(str(datetime.now()))

2018-09-19 14:41:44.386388
PROCESSING SIM_ID: Fus_N1_64_W100_Def
./Fuseki/Fuseki_N1_64_Watdiv100M_Default_output.log
Log file found? True
['1']
['101', '104', '107', '105', '103']
WThread 1: 400 query events found
('Fus_N1_64_W100_Def', 'L5/L5_split1.sparql', '1', 'warmup', 1, 2681, 10.564796776, 'SUCCESS')
QThreads 101: 400 query events found
QThreads 104: 400 query events found
QThreads 107: 400 query events found
QThreads 105: 400 query events found
QThreads 103: 400 query events found
2018-09-19 14:41:45.393536
PROCESSING SIM_ID: Fus_N1_64_W1000_Def
./Fuseki/Fuseki_N1_64_Watdiv1000M_Default_output.log
Log file found? True
['1']
['57', '53', '55', '52', '54']
WThread 1: 400 query events found
('Fus_N1_64_W1000_Def', 'C3/C3_split13.sparql', '1', 'warmup', 1, -1, 300, 'TIMEOUT')
QThreads 57: 400 query events found
QThreads 53: 400 query events found
QThreads 55: 400 query events found
QThreads 52: 400 query events found
QThreads 54: 400 query events found
2018-09-19 14:41:45.910408


### 6. LDF

In [26]:
bm_tuples = [ ("LDF", 1, 64, "Watdiv100M", "Default", ""), \
("LDF", 3, 64, "Watdiv100M", "Default", ""), \
("LDF", 1, 64, "Watdiv1000M", "Default", ""), \
("LDF", 3, 64, "Watdiv1000M", "Default", "")]

for bm_tuple in bm_tuples:
    
    print(str(datetime.now()))
    
    df = generateQueryEventDataframe(bm_tuple, './', 'templated/', 300)
    
    if df is not False:
        filename = generateSimulationID(bm_tuple)+ "_queryevents.csv"
        df.to_csv('./csv/'+filename, sep="\t", header=True, index=False)

print(str(datetime.now()))

2018-09-19 14:41:45.928030
PROCESSING SIM_ID: LDF_N1_64_W100_Def
./LDF/LDF_N1_64_Watdiv100M_Default_output.log
Log file found? True
['1']
['70', '75', '73', '76', '72']
WThread 1: 400 query events found
('LDF_N1_64_W100_Def', 'F4/F4_split5.sparql', '1', 'warmup', 1, 103, 12.653614641, 'SUCCESS')
QThreads 70: 400 query events found
QThreads 75: 400 query events found
QThreads 73: 400 query events found
QThreads 76: 400 query events found
QThreads 72: 400 query events found
2018-09-19 14:41:46.474412
PROCESSING SIM_ID: LDF_N3_64_W100_Def
./LDF/LDF_N3_64_Watdiv100M_Default_output.log
Log file found? True
['1']
['117', '116', '114', '115', '112']
WThread 1: 400 query events found
('LDF_N3_64_W100_Def', 'C2/C2_split7.sparql', '1', 'warmup', 1, -1, 300, 'TIMEOUT')
QThreads 117: 400 query events found
QThreads 116: 400 query events found
QThreads 114: 400 query events found
QThreads 115: 400 query events found
QThreads 112: 400 query events found
2018-09-19 14:41:46.980331
PROCESSING SIM_ID: 

### 7. FluidOps

In [27]:
bm_tuples = [ ("FluidOps", 3, 64, "Watdiv100M", "Default", ""), \
("FluidOps", 1, 64, "Watdiv1000M", "Default", ""), \
("FluidOps", 3, 64, "Watdiv1000M", "Default", "")]
 
for bm_tuple in bm_tuples:
    
    print(str(datetime.now()))
    
    df = generateQueryEventDataframe(bm_tuple, './', 'templated/', 300)
    
    if df is not False:
        filename = generateSimulationID(bm_tuple)+ "_queryevents.csv"
        df.to_csv('./csv/'+filename, sep="\t", header=True, index=False)

print(str(datetime.now()))

2018-09-19 14:41:48.147568
PROCESSING SIM_ID: Flu_N3_64_W100_Def
./FluidOps/FluidOps_N3_64_Watdiv100M_Default_output.log
Log file found? True
['1']
[]
WThread 1: 291 query events found
('Flu_N3_64_W100_Def', 'F1/F1_split11.sparql', '1', 'warmup', 1, 2, 8.646807594, 'SUCCESS')
2018-09-19 14:41:48.209543
PROCESSING SIM_ID: Flu_N1_64_W1000_Def
./FluidOps/FluidOps_N1_64_Watdiv1000M_Default_output.log
Log file found? True
['1']
['32', '37', '31', '35', '36']
WThread 1: 400 query events found
('Flu_N1_64_W1000_Def', 'L5/L5_split2.sparql', '1', 'warmup', 1, 13001, 1.294186533, 'SUCCESS')
QThreads 32: 400 query events found
QThreads 37: 400 query events found
QThreads 31: 400 query events found
QThreads 35: 400 query events found
QThreads 36: 400 query events found
2018-09-19 14:41:48.819055
PROCESSING SIM_ID: Flu_N3_64_W1000_Def
./FluidOps/FluidOps_N3_64_Watdiv1000M_Default_output.log
Log file found? True
['1']
['426', '429', '434', '427', '428']
WThread 1: 400 query events found
('Flu_N3_64_

## B. Ontoforce Dataset

In [28]:
bm_tuples = [ ("Blazegraph", 1, 64, "Ontoforce", "Optimized", ""), \
("GraphDB", 1, 64, "Ontoforce", "Optimized", ""), \
("ES", 1, 64, "Ontoforce", "Default", ""), \
("Virtuoso", 1, 64, "Ontoforce", "Optimized", ""), \
("Virtuoso", 1, 64, "Ontoforce", "Optimized", "VWall"), \
("Virtuoso", 1, 32, "Ontoforce", "Optimized", ""), \
("Virtuoso", 1, 32, "Ontoforce", "Optimized", "VWall"), \
("Virtuoso", 3, 64, "Ontoforce", "Optimized", "0"), \
("Virtuoso", 3, 64, "Ontoforce", "Optimized", "1"), \
("Virtuoso", 3, 64, "Ontoforce", "Optimized", "2"), \
("Virtuoso", 3, 64, "Ontoforce", "Optimized", "AWS1"), \
("Virtuoso", 3, 64, "Ontoforce", "Optimized", "AWS2"), \
("Virtuoso", 3, 64, "Ontoforce", "Optimized", "AWS3"), \
("Fuseki", 1, 64, "Ontoforce", "Default", ""), \
("FluidOps", 1, 64, "Ontoforce", "Default", "1"), \
("FluidOps", 1, 64, "Ontoforce", "Default", "2"), \
("FluidOps", 1, 64, "Ontoforce", "Default", "3"), \
("FluidOps", 3, 64, "Ontoforce", "Default", "1"), \
("FluidOps", 3, 64, "Ontoforce", "Default", "2"), \
("FluidOps", 3, 64, "Ontoforce", "Default", "3")]
             

In [29]:
for bm_tuple in bm_tuples:
    
    print(str(datetime.now()))
    
    df = generateQueryEventDataframe(bm_tuple, './', 'templated/', 1200)
    
    if df is not False:
        filename = generateSimulationID(bm_tuple)+ "_queryevents.csv"
        df.to_csv('./csv/'+filename, sep="\t", header=True, index=False)

print(str(datetime.now()))


2018-09-19 14:41:49.450157
PROCESSING SIM_ID: Bla_N1_64_Ont_Opt
./Blazegraph/Blazegraph_N1_64_Ontoforce_Optimized_output.log
Log file found? True
['1']
[]
WThread 1: 468 query events found
('Bla_N1_64_Ont_Opt', 'queries/xzaub', '1', 'warmup', 1, 0, 0.164709896, 'SUCCESS')
2018-09-19 14:41:49.596548
PROCESSING SIM_ID: Gra_N1_64_Ont_Opt
./GraphDB/GraphDB_N1_64_Ontoforce_Optimized_output.log
Log file found? True
['1']
['245', '243', '246', '244', '247']
WThread 1: 1223 query events found
('Gra_N1_64_Ont_Opt', 'queries/xzalt', '1', 'warmup', 1, 0, 0.030136252, 'SUCCESS')
QThreads 245: 542 query events found
QThreads 243: 538 query events found
QThreads 246: 536 query events found
QThreads 244: 554 query events found
QThreads 247: 562 query events found
2018-09-19 14:41:50.537391
PROCESSING SIM_ID: ES_N1_64_Ont_Def
./ES/ES_N1_64_Ontoforce_Default_output.log
Log file found? True
['1']
['53', '51', '52', '49', '50']
WThread 1: 1223 query events found
('ES_N1_64_Ont_Def', 'queries/xzasi', '1',