In [1]:
import pandas as pd
from pandas import DataFrame, Series
#false positive warnings all the time:
pd.options.mode.chained_assignment = None

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

import numpy as np
import statistics

import os.path

import time
def print_elapsed(start):
    t = time.time() - start
    print(str(t) + " secs")

# Runtime Analysis

## Methods:

* Assumption: queries are correct! This assumption will be invalidated for a number of simulations later on!

### A1. Define Tquery?

* Constraint: only look for query runtimes during the time the server was up (=benchmark survival). There are cases where the server crashed and the benchmarker reported timeouts.

* Distinguish between time in the warmup phase (single threaded) and the stress test (multithreaded)

* Per query report the number successful runs, the number of error runs and the number of timeouts

* For the median runtime take timeouts and successful runtimes into account

* **NEW** Remove incorrect queries before 

* **NEW** Remove C3 templates (pd.merge will handle this as C3s won't be in the index for Virtuoso

* If query is never seen during benchmark survival we cannot say anything => it's not in our data and plots!


### A2. Tquery dataframe:

**PREFILTER ON BENCHMARK SURVIVAL**

** IF LOG FILE NOT AVAILABLE:** use median reported in results csv file with thread_type=stress, successes=all, errors=0, timeouts=0

**COLUMNS:**

- thread_type: warmup/stress
- successes: number of times flag == SUCCESS
- errors: number of times flag == ERROR
- timeouts: number of times flag == TIMEOUT
- incorrect: number of times correct = INCORRECT




## 1. Query Runtime dataframe from events file: Tryout

In [2]:
def generateFilenameQueryEventsCorrect(bmtuple):
    fname = './csv_correct/' +  bmtuple[0] + "_N" + str(bmtuple[1]) + "_" + str(bmtuple[2]) + "_" + bmtuple[3] + "_" + bmtuple[4]
    if len(str(bmtuple[5])) != 0:
        fname = fname + "_" + str(bmtuple[5])
    return fname + "_queryevents_correct.csv"

def generateFilenameRuntimesCorrect(bmtuple):
    fname = './runtime_csv_correct/' +  bmtuple[0] + "_N" + str(bmtuple[1]) + "_" + str(bmtuple[2]) + "_" + bmtuple[3] + "_" + bmtuple[4]
    if len(str(bmtuple[5])) != 0:
        fname = fname + "_" + str(bmtuple[5])
    return fname + "_queryruntimes_correct.csv"

def generate_sorted_events_filename(tup):
    return 'query_events_sorted/'+generateSimulationID(tup)+"_events_sorted.csv"

### TEST

In [3]:
events_file = generateFilenameQueryEventsCorrect(('Virtuoso',1,32,'Watdiv1000M', 'Default',''))
print(events_file)
print(os.path.isfile(events_file))

df = pd.read_csv(events_file, sep='\t')
df.head()

./csv_correct/Virtuoso_N1_32_Watdiv1000M_Default_queryevents_correct.csv
True


Unnamed: 0,sim_id,query_name,thread_id,thread_type,order_id,number_of_results,runtime,flag,correct
0,Vir_N1_32_W1000_Def,L3/L3_split42.sparql,1,warmup,1,46,1.183432,SUCCESS,CORRECT
1,Vir_N1_32_W1000_Def,L3/L3_split42.sparql,17,stress,1505,46,0.014287,SUCCESS,CORRECT
2,Vir_N1_32_W1000_Def,L3/L3_split42.sparql,13,stress,1040,46,0.009484,SUCCESS,CORRECT
3,Vir_N1_32_W1000_Def,L3/L3_split42.sparql,19,stress,1896,46,0.008077,SUCCESS,CORRECT
4,Vir_N1_32_W1000_Def,L3/L3_split42.sparql,21,stress,1610,46,0.007975,SUCCESS,CORRECT


### Max Order ID per thread

In [4]:
df_filter_success = df[df['flag'] == 'SUCCESS']
df_survival = df_filter_success.groupby(['thread_id', 'thread_type'], as_index=False)[['sim_id','order_id']].max()

for line in df_survival.values:
    print(line)


[1 'warmup' 'Vir_N1_32_W1000_Def' 2000]
[13 'stress' 'Vir_N1_32_W1000_Def' 2000]
[17 'stress' 'Vir_N1_32_W1000_Def' 2000]
[19 'stress' 'Vir_N1_32_W1000_Def' 2000]
[20 'stress' 'Vir_N1_32_W1000_Def' 2000]
[21 'stress' 'Vir_N1_32_W1000_Def' 2000]


### For a thread get the corresponding restriced df

In [5]:
df_test = df[(df['thread_id'] == 16) & (df['thread_type'] == 'stress' )]  
df_test2 = df_test[df_test['order_id'] <= 82] 
df_test2.head()

Unnamed: 0,sim_id,query_name,thread_id,thread_type,order_id,number_of_results,runtime,flag,correct


In [6]:
def preFilterForBMSurvival(filename):
    df = pd.read_csv(filename, sep='\t')
    
    df_filter_success = df[df['flag'] == 'SUCCESS']
    df_survival = df_filter_success.groupby(['thread_id', 'thread_type'], as_index=False)[['sim_id','order_id']].max()

    
    restricted_dfs = []
    for tup in df_survival.values:
        thread_id = tup[0]
        thread_type = tup[1]
        df_thread = df[(df['thread_id'] == thread_id) & (df['thread_type'] == thread_type )]  
        
        max_order_id = tup[3]
        df_thread_restricted = df_thread[df_thread['order_id'] <= max_order_id]
        restricted_dfs.append(df_thread_restricted)
    
    return pd.concat(restricted_dfs, axis=0)
        
     
    
        
    

### TEST 

In [7]:
df_test = preFilterForBMSurvival(generateFilenameQueryEventsCorrect(('Virtuoso',1,32,'Watdiv1000M', 'Default','')))

In [8]:
print(df_test.shape)
print(12000)

(12000, 9)
12000


### timeouts, errors, successes column values

In [9]:
TIMEOUT = df_test['flag'] == 'TIMEOUT'
ERROR   = df_test['flag'] == 'ERROR'
SUCCESS = df_test['flag'] == 'SUCCESS'
CORRECT = df_test['correct'] == 'CORRECT'

df_timeouts = df_test [TIMEOUT]
df_errors = df_test [ERROR]
df_successes = df_test [SUCCESS & CORRECT]
df_incorrect = df_test [SUCCESS & ~CORRECT]

df_nonerrors =  pd.concat([df_timeouts, df_successes])

print(df_timeouts.shape)
print(df_errors.shape)
print(df_successes.shape)
print(df_incorrect.shape)

print(df_nonerrors.shape)

(0, 9)
(4, 9)
(11396, 9)
(600, 9)
(11396, 9)


In [10]:
ntimeouts = df_timeouts.groupby(['query_name', 'thread_type'], as_index=False).agg({'flag': np.size})
ntimeouts = ntimeouts.rename(columns={'flag': 'timeouts'})
ntimeouts.head()

Unnamed: 0,query_name,thread_type,timeouts


In [11]:
nerrors = df_errors.groupby(['query_name', 'thread_type'], as_index=False).agg({'flag': np.size})
nerrors = nerrors.rename(columns={'flag': 'errors'})
nerrors.head()

Unnamed: 0,query_name,thread_type,errors
0,L1/L1_split2.sparql,stress,1
1,L2/L2_split12.sparql,stress,1
2,S2/S2_split19.sparql,stress,1
3,S4/S4_split7.sparql,stress,1


In [12]:
nsuccess = df_successes.groupby(['query_name', 'thread_type'], as_index=False).agg({'flag': np.size})
nsuccess = nsuccess.rename(columns={'flag': 'success'})
nsuccess.head()

Unnamed: 0,query_name,thread_type,success
0,C1/C1_split0.sparql,stress,5
1,C1/C1_split0.sparql,warmup,1
2,C1/C1_split1.sparql,stress,5
3,C1/C1_split1.sparql,warmup,1
4,C1/C1_split10.sparql,stress,5


In [13]:
nincorrect = df_incorrect.groupby(['query_name', 'thread_type'], as_index=False).agg({'flag': np.size})
nincorrect = nincorrect.rename(columns={'flag': 'incorrect'})
nincorrect.head()

Unnamed: 0,query_name,thread_type,incorrect
0,C3/C3_split0.sparql,stress,5
1,C3/C3_split0.sparql,warmup,1
2,C3/C3_split1.sparql,stress,5
3,C3/C3_split1.sparql,warmup,1
4,C3/C3_split10.sparql,stress,5


### runtime nonerror queries

In [14]:
nonerror_timings = df_nonerrors.groupby(['query_name', 'thread_type'], as_index=False).agg({'runtime': np.median})
nonerror_timings = nonerror_timings.rename(columns={'runtime': 'median_runtime'})
nonerror_timings.tail()

Unnamed: 0,query_name,thread_type,median_runtime
3795,S7/S7_split97.sparql,warmup,0.005846
3796,S7/S7_split98.sparql,stress,12.211164
3797,S7/S7_split98.sparql,warmup,6.344599
3798,S7/S7_split99.sparql,stress,0.00876
3799,S7/S7_split99.sparql,warmup,0.005399


### merge together

In [15]:
df_merge1 = pd.merge(ntimeouts, nerrors, how='outer', on=['query_name','thread_type'])
df_merge1 = df_merge1.fillna(0)
df_merge2 = pd.merge(df_merge1, nsuccess, how='outer', on=['query_name','thread_type'])
df_merge2 = df_merge2.fillna(0)

#LEAVE OUT: incorrect queries are unusable for runtime comparisons!

#df_merge3 = pd.merge(df_merge2, nincorrect, how='outer', on=['query_name','thread_type'])
#df_merge3 = df_merge3.fillna(0)
df_merge3 = df_merge2

df_merge4 = pd.merge(df_merge3, nonerror_timings, how='outer', on=['query_name','thread_type'])
timeout = 300
df_merge4 = df_merge4.fillna(timeout)
df_merge4.head()

Unnamed: 0,timeouts,query_name,thread_type,errors,success,median_runtime
0,0.0,L1/L1_split2.sparql,stress,1.0,4,0.009956
1,0.0,L2/L2_split12.sparql,stress,1.0,4,0.595631
2,0.0,S2/S2_split19.sparql,stress,1.0,4,0.163735
3,0.0,S4/S4_split7.sparql,stress,1.0,4,0.075144
4,0.0,C1/C1_split0.sparql,stress,0.0,5,57.697063


In [16]:
#print(df_merge4.columns)
df_merge5 = df_merge4[['query_name', 'thread_type', 'errors', 'timeouts', 'success', 'median_runtime']]

In [17]:
df_merge5.to_csv('test_tquery.csv', sep="\t", index=False)

## 2. Query Runtime for all available files

In [18]:
def createQueryRuntimeDF(filename, querytimeout):
    df_survival = preFilterForBMSurvival(filename)
        
    TIMEOUT = df_survival['flag'] == 'TIMEOUT'
    ERROR   = df_survival['flag'] == 'ERROR'
    SUCCESS = df_survival['flag'] == 'SUCCESS'
    CORRECT = df_survival['correct'] == 'CORRECT'

    df_timeouts  = df_survival [TIMEOUT]
    df_errors    = df_survival [ERROR]
    df_successes = df_survival [SUCCESS & CORRECT]
    df_incorrect = df_survival [SUCCESS & ~CORRECT]

    df_nonerrors =  pd.concat([df_timeouts, df_successes])
    

    
    
    ntimeouts        = df_timeouts.groupby(['query_name', 'thread_type'], as_index=False).agg({'flag': np.size})
    nerrors          = df_errors.groupby(['query_name', 'thread_type'], as_index=False).agg({'flag': np.size})
    nsuccess         = df_successes.groupby(['query_name', 'thread_type'], as_index=False).agg({'flag': np.size})
    nincorrect       = df_incorrect.groupby(['query_name', 'thread_type'], as_index=False).agg({'flag': np.size})
    nonerror_timings = df_nonerrors.groupby(['query_name', 'thread_type'], as_index=False).agg({'runtime': np.median})
    
    ntimeouts        = ntimeouts.rename(columns={'flag': 'timeouts'})
    nerrors          = nerrors.rename(columns={'flag': 'errors'})
    nsuccess         = nsuccess.rename(columns={'flag': 'success'})
    nincorrect       = nincorrect.rename(columns={'flag': 'incorrect'})
    nonerror_timings = nonerror_timings.rename(columns={'runtime': 'median_runtime'})
 
    df_merge1 = pd.merge(ntimeouts, nerrors, how='outer', on=['query_name','thread_type'])
    df_merge1 = df_merge1.fillna(0)
    df_merge2 = pd.merge(df_merge1, nsuccess, how='outer', on=['query_name','thread_type'])
    df_merge2 = df_merge2.fillna(0)

    #LEAVE OUT: incorrect queries are unusable for runtime comparisons!

    #df_merge3 = pd.merge(df_merge2, nincorrect, how='outer', on=['query_name','thread_type'])
    #df_merge3 = df_merge3.fillna(0)
    df_merge3 = df_merge2

    df_merge4 = pd.merge(df_merge3, nonerror_timings, how='outer', on=['query_name','thread_type'])
    df_merge4 = df_merge4.fillna(querytimeout)
    
    return df_merge4[['query_name', 'thread_type', 'success', 'errors', 'timeouts', 'median_runtime']]



### 1. Blazegraph Watdiv

In [19]:

def runtimes_for_stores(tuples, timeout):
    start = time.time()
    
    for bm_tuple in bm_tuples:
    
        filename = generateFilenameQueryEventsCorrect(bm_tuple)

        print(filename + ": " + str(os.path.isfile(filename)))

        if os.path.isfile(filename):

            df = createQueryRuntimeDF(filename, timeout)
            new_filename = generateFilenameRuntimesCorrect(bm_tuple)
            df.to_csv(new_filename, sep="\t", header=True, index=False)

    print_elapsed(start)
    return

In [20]:
bm_tuples = [ ("Blazegraph", 1, 32, "Watdiv10M", "Default", ""), \
("Blazegraph", 1, 32, "Watdiv100M", "Default", ""), \
("Blazegraph", 1, 32, "Watdiv1000M", "Default", ""), \
("Blazegraph", 1, 64, "Watdiv1000M", "Default", ""), \
("Blazegraph", 1, 64, "Watdiv1000M", "Optimized", "")]

querytimeout = 300
runtimes_for_stores(bm_tuples, querytimeout)


./csv_correct/Blazegraph_N1_32_Watdiv10M_Default_queryevents_correct.csv: True
./csv_correct/Blazegraph_N1_32_Watdiv100M_Default_queryevents_correct.csv: True


  stride //= shape[i]


./csv_correct/Blazegraph_N1_32_Watdiv1000M_Default_queryevents_correct.csv: True
./csv_correct/Blazegraph_N1_64_Watdiv1000M_Default_queryevents_correct.csv: True
./csv_correct/Blazegraph_N1_64_Watdiv1000M_Optimized_queryevents_correct.csv: True
0.4529082775115967 secs


### 2. GraphDB Watdiv

In [21]:
bm_tuples = [ ("GraphDB", 1, 32, "Watdiv10M", "Default", ""), \
("GraphDB", 1, 32, "Watdiv100M", "Default", ""), \
("GraphDB", 1, 32, "Watdiv1000M", "Default", ""), \
("GraphDB", 1, 64, "Watdiv1000M", "Default", ""), \
("GraphDB", 1, 64, "Watdiv1000M", "Optimized", "")]

runtimes_for_stores(bm_tuples, querytimeout)


./csv_correct/GraphDB_N1_32_Watdiv10M_Default_queryevents_correct.csv: True
./csv_correct/GraphDB_N1_32_Watdiv100M_Default_queryevents_correct.csv: True


  stride //= shape[i]


./csv_correct/GraphDB_N1_32_Watdiv1000M_Default_queryevents_correct.csv: False
./csv_correct/GraphDB_N1_64_Watdiv1000M_Default_queryevents_correct.csv: True
./csv_correct/GraphDB_N1_64_Watdiv1000M_Optimized_queryevents_correct.csv: True
0.38358449935913086 secs


### 3. ES Watdiv

In [22]:
bm_tuples = [ ("ES", 1, 32, "Watdiv10M", "Default", ""), \
("ES", 1, 32, "Watdiv100M", "Default", ""), \
("ES", 1, 32, "Watdiv1000M", "Default", ""), \
("ES", 3, 32, "Watdiv1000M", "Default", ""), \
("ES", 1, 64, "Watdiv1000M", "Default", "")]

runtimes_for_stores(bm_tuples, querytimeout)


./csv_correct/ES_N1_32_Watdiv10M_Default_queryevents_correct.csv: True
./csv_correct/ES_N1_32_Watdiv100M_Default_queryevents_correct.csv: False
./csv_correct/ES_N1_32_Watdiv1000M_Default_queryevents_correct.csv: True


  stride //= shape[i]


./csv_correct/ES_N3_32_Watdiv1000M_Default_queryevents_correct.csv: True
./csv_correct/ES_N1_64_Watdiv1000M_Default_queryevents_correct.csv: True
0.36228251457214355 secs


### 4. Virtuoso Watdiv

In [23]:
bm_tuples = [ ("Virtuoso", 1, 32, "Watdiv10M", "Default", ""), \
("Virtuoso", 1, 32, "Watdiv100M", "Default", ""), \
("Virtuoso", 1, 32, "Watdiv1000M", "Default", ""), \
("Virtuoso", 1, 32, "Watdiv1000M", "Default", "RERUN"), \
("Virtuoso", 3, 32, "Watdiv1000M", "Default", ""), \
("Virtuoso", 1, 64, "Watdiv1000M", "Default", ""), \
("Virtuoso", 1, 64, "Watdiv1000M", "Optimized", "")]

runtimes_for_stores(bm_tuples, querytimeout)



./csv_correct/Virtuoso_N1_32_Watdiv10M_Default_queryevents_correct.csv: True
./csv_correct/Virtuoso_N1_32_Watdiv100M_Default_queryevents_correct.csv: True


  stride //= shape[i]


./csv_correct/Virtuoso_N1_32_Watdiv1000M_Default_queryevents_correct.csv: True
./csv_correct/Virtuoso_N1_32_Watdiv1000M_Default_RERUN_queryevents_correct.csv: True
./csv_correct/Virtuoso_N3_32_Watdiv1000M_Default_queryevents_correct.csv: True
./csv_correct/Virtuoso_N1_64_Watdiv1000M_Default_queryevents_correct.csv: True
./csv_correct/Virtuoso_N1_64_Watdiv1000M_Optimized_queryevents_correct.csv: True
0.691211462020874 secs


### 5. Fuseki Watdiv

In [24]:
bm_tuples = [ ("Fuseki", 1, 64, "Watdiv100M", "Default", ""), \
("Fuseki", 1, 64, "Watdiv1000M", "Default", "")]

runtimes_for_stores(bm_tuples, querytimeout)


./csv_correct/Fuseki_N1_64_Watdiv100M_Default_queryevents_correct.csv: True
./csv_correct/Fuseki_N1_64_Watdiv1000M_Default_queryevents_correct.csv: True
0.08414578437805176 secs


### 6. LDF Watdiv

In [25]:
bm_tuples = [ ("LDF", 1, 64, "Watdiv100M", "Default", ""), \
("LDF", 3, 64, "Watdiv100M", "Default", ""), \
("LDF", 1, 64, "Watdiv1000M", "Default", ""), \
("LDF", 3, 64, "Watdiv1000M", "Default", "")]

runtimes_for_stores(bm_tuples, querytimeout)


./csv_correct/LDF_N1_64_Watdiv100M_Default_queryevents_correct.csv: True
./csv_correct/LDF_N3_64_Watdiv100M_Default_queryevents_correct.csv: True
./csv_correct/LDF_N1_64_Watdiv1000M_Default_queryevents_correct.csv: True
./csv_correct/LDF_N3_64_Watdiv1000M_Default_queryevents_correct.csv: True
0.21881604194641113 secs


### 7. FluidOps Watdiv

In [26]:
bm_tuples = [ ("FluidOps", 3, 64, "Watdiv100M", "Default", ""), \
("FluidOps", 1, 64, "Watdiv1000M", "Default", ""), \
("FluidOps", 3, 64, "Watdiv1000M", "Default", "")]

runtimes_for_stores(bm_tuples, querytimeout)


./csv_correct/FluidOps_N3_64_Watdiv100M_Default_queryevents_correct.csv: True
./csv_correct/FluidOps_N1_64_Watdiv1000M_Default_queryevents_correct.csv: True
./csv_correct/FluidOps_N3_64_Watdiv1000M_Default_queryevents_correct.csv: True
0.1184389591217041 secs


  stride //= shape[i]


### 8. Ontoforce

In [27]:
querytimeout = 1200
bm_tuples = [ ("Blazegraph", 1, 64, "Ontoforce", "Optimized", ""), \
("GraphDB", 1, 64, "Ontoforce", "Optimized", ""), \
("ES", 1, 64, "Ontoforce", "Default", ""), \
("Virtuoso", 1, 64, "Ontoforce", "Optimized", ""), \
("Virtuoso", 1, 64, "Ontoforce", "Optimized", "VWall"), \
("Virtuoso", 1, 32, "Ontoforce", "Optimized", ""), \
("Virtuoso", 1, 32, "Ontoforce", "Optimized", "VWall"), \
("Virtuoso", 3, 64, "Ontoforce", "Optimized", "0"), \
("Virtuoso", 3, 64, "Ontoforce", "Optimized", "1"), \
("Virtuoso", 3, 64, "Ontoforce", "Optimized", "2"), \
("Virtuoso", 3, 64, "Ontoforce", "Optimized", "AWS1"), \
("Virtuoso", 3, 64, "Ontoforce", "Optimized", "AWS2"), \
("Virtuoso", 3, 64, "Ontoforce", "Optimized", "AWS3"), \
("Fuseki", 1, 64, "Ontoforce", "Default", ""), \
("FluidOps", 1, 64, "Ontoforce", "Default", "1"), \
("FluidOps", 1, 64, "Ontoforce", "Default", "2"), \
("FluidOps", 1, 64, "Ontoforce", "Default", "3"), \
("FluidOps", 3, 64, "Ontoforce", "Default", "1"), \
("FluidOps", 3, 64, "Ontoforce", "Default", "2")]
#omitted default 3 since all queries failed

runtimes_for_stores(bm_tuples, querytimeout)


./csv_correct/Blazegraph_N1_64_Ontoforce_Optimized_queryevents_correct.csv: True
./csv_correct/GraphDB_N1_64_Ontoforce_Optimized_queryevents_correct.csv: True
./csv_correct/ES_N1_64_Ontoforce_Default_queryevents_correct.csv: True
./csv_correct/Virtuoso_N1_64_Ontoforce_Optimized_queryevents_correct.csv: True
./csv_correct/Virtuoso_N1_64_Ontoforce_Optimized_VWall_queryevents_correct.csv: True
./csv_correct/Virtuoso_N1_32_Ontoforce_Optimized_queryevents_correct.csv: True
./csv_correct/Virtuoso_N1_32_Ontoforce_Optimized_VWall_queryevents_correct.csv: True
./csv_correct/Virtuoso_N3_64_Ontoforce_Optimized_0_queryevents_correct.csv: True
./csv_correct/Virtuoso_N3_64_Ontoforce_Optimized_1_queryevents_correct.csv: True
./csv_correct/Virtuoso_N3_64_Ontoforce_Optimized_2_queryevents_correct.csv: True
./csv_correct/Virtuoso_N3_64_Ontoforce_Optimized_AWS1_queryevents_correct.csv: True
./csv_correct/Virtuoso_N3_64_Ontoforce_Optimized_AWS2_queryevents_correct.csv: True
./csv_correct/Virtuoso_N3_64_On

## 3. Query Runtime dataframe from results file: Tryout

* for two simulations (early on in bm cycle) the log files were not stored:
    - GraphDB,1,32,Watdiv1000M,Default
    - ES     ,1,32,Watdiv100M ,Default
    
* Let's manually convert them to the queryruntimes.csv format



In [28]:
bm_tuple1 = ('GraphDB',1,32,'Watdiv1000M','Default','')
results_csv = './GraphDB/results_GraphDB_N1_32_Watdiv1000M_Default.csv'
df_tuple1 = pd.read_csv(results_csv, sep=',', skiprows=34)

interesting_cols = ['Operation', 'Average Runtime (Arithmetic)']
df_tuple1 = df_tuple1[interesting_cols]

df_tuple1['query_name'] = df_tuple1['Operation'].apply(lambda op: op[len('templated/'):])
df_tuple1['median_runtime'] = df_tuple1['Average Runtime (Arithmetic)']
df_tuple1['thread_type'] = 'stress'

df_tuple1['success'] = 1
df_tuple1['errors'] = 0
df_tuple1['timeouts'] = 0

interesting_cols = ['query_name', 'thread_type', 'success', 'errors', 'timeouts', 'median_runtime']
df_tuple2 = df_tuple1[interesting_cols]

new_filename = generateFilenameRuntimesCorrect(bm_tuple1)
df_tuple2.to_csv(new_filename, sep="\t", header=True, index=False)

In [29]:
bm_tuple1 = ('ES',1,32,'Watdiv100M','Default','')
results_csv = './ES/results_ES_N1_32_Watdiv100M_Default.csv'

interesting_cols = ['Operation', 'Average Runtime (Arithmetic)']
df_tuple1 = df_tuple1[interesting_cols]

df_tuple1['query_name'] = df_tuple1['Operation'].apply(lambda op: op[len('templated/'):])
df_tuple1['median_runtime'] = df_tuple1['Average Runtime (Arithmetic)']
df_tuple1['thread_type'] = 'stress'

df_tuple1['success'] = 1
df_tuple1['errors'] = 0
df_tuple1['timeouts'] = 0

interesting_cols = ['query_name', 'thread_type', 'success', 'errors', 'timeouts', 'median_runtime']
df_tuple2 = df_tuple1[interesting_cols]

new_filename = generateFilenameRuntimesCorrect(bm_tuple1)
df_tuple2.to_csv(new_filename, sep="\t", header=True, index=False)