In [1]:
import pandas as pd
from pandas import DataFrame, Series
#false positive warnings all the time:
pd.options.mode.chained_assignment = None

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

import numpy as np
import statistics

import os.path

In [2]:
#http://matplotlib.org/1.4.0/users/customizing.html

plt.style.use('ggplot')

mpl.rcParams['font.size'] = 18

### LINES
mpl.rcParams['lines.linewidth'] = '2.0'        # line width in points
mpl.rcParams['lines.marker']    = 'None'
mpl.rcParams['lines.markersize']=  '6'          # markersize, in points

###LEGEND
mpl.rcParams['legend.fontsize']='medium'

### GRIDS
mpl.rcParams['grid.color']='black'     # grid color
mpl.rcParams['grid.linestyle'] =':'    # dotted
mpl.rcParams['grid.linewidth']='0.5'   # in points
mpl.rcParams['grid.alpha']='1.0'       # transparency, between 0.0 and 1.0

### TICKS
mpl.rcParams['xtick.labelsize']="small"  # fontsize of the tick labels
mpl.rcParams['xtick.direction']="out"      # direction: in, out, or inout
mpl.rcParams['ytick.labelsize']="small"   # fontsize of the tick labels
mpl.rcParams['ytick.direction']="out"      # direction: in, out, or inout

### AXES
mpl.rcParams['axes.linewidth']  =  '1.0'    # edge linewidth
mpl.rcParams['axes.grid']       =  'True'   # display grid or not
mpl.rcParams['axes.titlesize']  =  'large'  # fontsize of the axes title
mpl.rcParams['axes.labelsize']  = 'large'  # fontsize of the x any y labels
mpl.rcParams['axes.facecolor']= 'white'     # axes background color

mpl.rcParams['figure.facecolor']= '1.0'     # figure facecolor; 0.75 is scalar gray

In [3]:
def generateSimulationID(tup):
    identifier = tup[0] \
            + "_N" + str(tup[1]) \
            + "_" + str(tup[2]) \
            + "_" + tup[3] \
            + "_" + tup[4] \
    
    extra = ""
    if len(tup[5]) !=0:
        extra = "_" + str(tup[5])
    
    return identifier+extra


simulation_map = {
    "Watdiv10M": "W10", "Watdiv100M": "W100", "Watdiv1000M": "W1000", "Ontoforce": "Ont"
}

def generateSimulationIDCompact(tup, pref_length=3):
    #pref_length = 3
    identifier = tup[0][:pref_length] \
            + "_N" + str(tup[1]) \
            + "_" + str(tup[2]) \
            + "_" + simulation_map[tup[3]] \
            + "_" + str(tup[4])[:pref_length] \
    
    extra = ""
    if len(tup[5]) !=0:
        extra = "_" + str(tup[5])
    
    return identifier+extra

In [4]:
def generate_sorted_events_filename(tup):
    return 'query_events_sorted/'+generateSimulationID(tup)+"_events_sorted.csv"

In [5]:
#crashes on watdiv
#fuseki watdiv
#fluidops watdiv

tuple_list = [
    ('Fuseki', 1, 64,'Watdiv100M', 'Default', ''), \
    ('FluidOps', 3, 64,'Watdiv100M', 'Default', ''), \
    ('FluidOps', 3, 64,'Watdiv1000M', 'Default', ''), \
    ('Fuseki', 1, 64,'Watdiv1000M', 'Default', '')
    
]

tup = tuple_list[0]
df = pd.read_csv(generate_sorted_events_filename(tup), sep='\t', index_col=0)
df.head(n=3)

Unnamed: 0,sim_id,query_name,thread_id,thread_type,order_id,number_of_results,runtime,flag,correct,cumul_time,overall_order_id,cumul_results
0,Fus_N1_64_W100_Def,L5/L5_split1.sparql,1,warmup,1,2681,10.564797,SUCCESS,CORRECT,10.564797,0,2681
1,Fus_N1_64_W100_Def,S4/S4_split6.sparql,1,warmup,2,0,1.231266,SUCCESS,CORRECT,11.796062,1,2681
2,Fus_N1_64_W100_Def,S4/S4_split16.sparql,1,warmup,3,2,4.933205,SUCCESS,CORRECT,16.729267,2,2683


In [6]:
mask1 = df['flag'] == 'SUCCESS'
mask2 = df['correct'] == 'CORRECT'
df_filtered = df[mask1 & mask2]
df_filtered.groupby('thread_id')[['order_id']].max()

Unnamed: 0_level_0,order_id
thread_id,Unnamed: 1_level_1
1,400
101,43
103,50
104,58
105,52
107,52


In [7]:
df_filtered.groupby('thread_id')[['order_id']].max().sum()

order_id    655
dtype: int64

In [8]:
rows = []
rows.append(df[(df['thread_id'] == 101) & (df['order_id'] == 44)])
rows.append(df[(df['thread_id'] == 103) & (df['order_id'] == 51)])
rows.append(df[(df['thread_id'] == 104) & (df['order_id'] == 59)])
rows.append(df[(df['thread_id'] == 105) & (df['order_id'] == 53)])
rows.append(df[(df['thread_id'] == 107) & (df['order_id'] == 53)])

pd.concat(rows)

Unnamed: 0,sim_id,query_name,thread_id,thread_type,order_id,number_of_results,runtime,flag,correct,cumul_time,overall_order_id,cumul_results
650,Fus_N1_64_W100_Def,L3/L3_split15.sparql,101,stress,44,-1,300.0,TIMEOUT,INCORRECT,58325.423103,650,152233
1694,Fus_N1_64_W100_Def,C2/C2_split17.sparql,103,stress,51,-1,-1.0,ERROR,INCORRECT,58528.69648,1694,152933
651,Fus_N1_64_W100_Def,S1/S1_split2.sparql,104,stress,59,-1,300.0,TIMEOUT,INCORRECT,58351.44232,651,152233
1344,Fus_N1_64_W100_Def,L3/L3_split5.sparql,105,stress,53,-1,-1.0,ERROR,INCORRECT,58420.310421,1344,152305
993,Fus_N1_64_W100_Def,S7/S7_split16.sparql,107,stress,53,-1,300.0,TIMEOUT,INCORRECT,58354.976065,993,152233


In [9]:
df_filtered['template'] = df_filtered['query_name'].apply(lambda q: q[:2])
df_filtered2 = df_filtered[df_filtered['thread_type'] == 'stress']
pd.DataFrame(df_filtered2['template'].value_counts()).sort_index()

Unnamed: 0,template
C1,16
F2,15
F4,9
F5,2
L1,14
L2,15
L3,3
L4,8
L5,14
S1,1


* Fuseki handles all queries in the warmup phase, but crashes on the first C2 query in the stress test!

In [10]:
tup = tuple_list[1]
df = pd.read_csv(generate_sorted_events_filename(tup), sep='\t', index_col=0)
df.head(n=3)

Unnamed: 0,sim_id,query_name,thread_id,thread_type,order_id,number_of_results,runtime,flag,correct,cumul_time,overall_order_id,cumul_results
0,Flu_N3_64_W100_Def,F1/F1_split11.sparql,1.0,warmup,1.0,2.0,8.646808,SUCCESS,CORRECT,8.646808,0,2.0
1,Flu_N3_64_W100_Def,S4/S4_split2.sparql,1.0,warmup,2.0,0.0,6.27947,SUCCESS,CORRECT,14.926278,1,2.0
2,Flu_N3_64_W100_Def,F4/F4_split14.sparql,1.0,warmup,3.0,299.0,6.934621,SUCCESS,CORRECT,21.860899,2,301.0


In [11]:
mask1 = df['flag'] == 'SUCCESS'
mask2 = df['correct'] == 'CORRECT'
df_filtered = df[mask1 & mask2]
df_filtered.groupby('thread_id')[['order_id']].max()

Unnamed: 0_level_0,order_id
thread_id,Unnamed: 1_level_1
1.0,40.0


In [12]:
rows = []
rows.append(df[(df['thread_id'] == 1) & (df['order_id'] == 41)])


pd.concat(rows)

Unnamed: 0,sim_id,query_name,thread_id,thread_type,order_id,number_of_results,runtime,flag,correct,cumul_time,overall_order_id,cumul_results
40,Flu_N3_64_W100_Def,C3/C3_split2.sparql,1.0,warmup,41.0,-1.0,300.0,TIMEOUT,INCORRECT,2012.826529,40,9191.0


In [13]:
df_filtered['template'] = df_filtered['query_name'].apply(lambda q: q[:2])
pd.DataFrame(df_filtered['template'].value_counts()).sort_index()

Unnamed: 0,template
C1,1
C2,5
F1,3
F2,1
F3,1
F4,1
F5,2
L1,1
L2,3
L3,2


* FluidOps crashes on the first C3 query!!!

In [14]:
tup = tuple_list[2]
df = pd.read_csv(generate_sorted_events_filename(tup), sep='\t', index_col=0)
df.head(n=3)

Unnamed: 0,sim_id,query_name,thread_id,thread_type,order_id,number_of_results,runtime,flag,correct,cumul_time,overall_order_id,cumul_results
0,Flu_N3_64_W1000_Def,L4/L4_split4.sparql,1,warmup,1,5952,37.702363,SUCCESS,CORRECT,37.702363,0,5952
1,Flu_N3_64_W1000_Def,S5/S5_split4.sparql,1,warmup,2,0,112.306998,SUCCESS,CORRECT,150.00936,1,5952
2,Flu_N3_64_W1000_Def,F4/F4_split10.sparql,1,warmup,3,2203,61.347327,SUCCESS,CORRECT,211.356687,2,8155


In [15]:
mask1 = df['flag'] == 'SUCCESS'
mask2 = df['correct'] == 'CORRECT'
df_filtered = df[mask1 & mask2]
df_filtered.groupby('thread_id')[['order_id']].max()

Unnamed: 0_level_0,order_id
thread_id,Unnamed: 1_level_1
1,4


In [16]:
rows = []
rows.append(df[(df['thread_id'] == 1) & (df['order_id'] == 5)])


pd.concat(rows)

Unnamed: 0,sim_id,query_name,thread_id,thread_type,order_id,number_of_results,runtime,flag,correct,cumul_time,overall_order_id,cumul_results
4,Flu_N3_64_W1000_Def,C3/C3_split4.sparql,1,warmup,5,-1,300.0,TIMEOUT,INCORRECT,572.1049,4,8948


In [17]:
df.head(n=5)

Unnamed: 0,sim_id,query_name,thread_id,thread_type,order_id,number_of_results,runtime,flag,correct,cumul_time,overall_order_id,cumul_results
0,Flu_N3_64_W1000_Def,L4/L4_split4.sparql,1,warmup,1,5952,37.702363,SUCCESS,CORRECT,37.702363,0,5952
1,Flu_N3_64_W1000_Def,S5/S5_split4.sparql,1,warmup,2,0,112.306998,SUCCESS,CORRECT,150.00936,1,5952
2,Flu_N3_64_W1000_Def,F4/F4_split10.sparql,1,warmup,3,2203,61.347327,SUCCESS,CORRECT,211.356687,2,8155
3,Flu_N3_64_W1000_Def,F2/F2_split1.sparql,1,warmup,4,793,60.748212,SUCCESS,CORRECT,272.1049,3,8948
4,Flu_N3_64_W1000_Def,C3/C3_split4.sparql,1,warmup,5,-1,300.0,TIMEOUT,INCORRECT,572.1049,4,8948


* FluidOps crashes on the first C3 query!!!

In [18]:
tup = tuple_list[3]
df = pd.read_csv(generate_sorted_events_filename(tup), sep='\t', index_col=0)
df.head(n=3)

Unnamed: 0,sim_id,query_name,thread_id,thread_type,order_id,number_of_results,runtime,flag,correct,cumul_time,overall_order_id,cumul_results
0,Fus_N1_64_W1000_Def,C3/C3_split13.sparql,1,warmup,1,-1,300.0,TIMEOUT,INCORRECT,300.0,0,0
1,Fus_N1_64_W1000_Def,C3/C3_split19.sparql,1,warmup,2,-1,300.0,TIMEOUT,INCORRECT,600.0,1,0
2,Fus_N1_64_W1000_Def,S5/S5_split16.sparql,1,warmup,3,0,22.691257,SUCCESS,CORRECT,622.691257,2,0


In [19]:
mask1 = df['flag'] == 'SUCCESS'
mask2 = df['correct'] == 'CORRECT'
df_filtered = df[mask1 & mask2]
df_filtered.groupby('thread_id')[['order_id']].max()

Unnamed: 0_level_0,order_id
thread_id,Unnamed: 1_level_1
1,51


In [20]:
rows = []
rows.append(df[(df['thread_id'] == 1) & (df['order_id'] == 52)])


pd.concat(rows)

Unnamed: 0,sim_id,query_name,thread_id,thread_type,order_id,number_of_results,runtime,flag,correct,cumul_time,overall_order_id,cumul_results
51,Fus_N1_64_W1000_Def,C2/C2_split1.sparql,1,warmup,52,-1,-1.0,ERROR,INCORRECT,9312.836477,51,43515


In [21]:
df_filtered['template'] = df_filtered['query_name'].apply(lambda q: q[:2])
pd.DataFrame(df_filtered['template'].value_counts()).sort_index()

Unnamed: 0,template
C1,4
F2,6
L2,2
L5,2
S3,3
S4,2
S5,4
S6,3


* Fuseki crashes on the first C2 query

# Summary

* Fuseki Watdiv100M
    - 655 queries before crash, 400 = warmup successful, approx 50 queries per thread in stress test
    - 16u15 until crash
    - first C2 query in stress test generates HTTP error, all other templates succeeded
* Fuseki Watdiv1000M
    - 51 queries before crash, doesn't survive warmup phase
    - 2u35 until crash
    - first C2 query in warmup test generates HTTP error, all other templates succeeded
* FluidOps N3 Watdiv100M
    - 40 queries before crash, doesn't survive warmup phase!
    - 2013 seconds until crash
    - first C3 query in warmup results in TIMEOUT, other templates succeeded! From then on no more successes
* FluidOps N3 Watdiv1000M
    - 4 queries before crash, doesn't survive warmup phase!
    - 572 seconds until crash
    - first C3 query in warmup results in TIMEOUT, other templates succeeded! Fron then on no more successes