In [1]:
import pandas as pd
from pandas import DataFrame, Series
#false positive warnings all the time:
pd.options.mode.chained_assignment = None

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

import numpy as np
import statistics

import os.path

In [2]:
#http://matplotlib.org/1.4.0/users/customizing.html

plt.style.use('ggplot')

mpl.rcParams['font.size'] = 18

### LINES
mpl.rcParams['lines.linewidth'] = '2.0'        # line width in points
mpl.rcParams['lines.marker']    = 'None'
mpl.rcParams['lines.markersize']=  '6'          # markersize, in points

###LEGEND
mpl.rcParams['legend.fontsize']='medium'

### GRIDS
mpl.rcParams['grid.color']='black'     # grid color
mpl.rcParams['grid.linestyle'] =':'    # dotted
mpl.rcParams['grid.linewidth']='0.5'   # in points
mpl.rcParams['grid.alpha']='1.0'       # transparency, between 0.0 and 1.0

### TICKS
mpl.rcParams['xtick.labelsize']="small"  # fontsize of the tick labels
mpl.rcParams['xtick.direction']="out"      # direction: in, out, or inout
mpl.rcParams['ytick.labelsize']="small"   # fontsize of the tick labels
mpl.rcParams['ytick.direction']="out"      # direction: in, out, or inout

### AXES
mpl.rcParams['axes.linewidth']  =  '1.0'    # edge linewidth
mpl.rcParams['axes.grid']       =  'True'   # display grid or not
mpl.rcParams['axes.titlesize']  =  'large'  # fontsize of the axes title
mpl.rcParams['axes.labelsize']  = 'large'  # fontsize of the x any y labels
mpl.rcParams['axes.facecolor']= 'white'     # axes background color

mpl.rcParams['figure.facecolor']= '1.0'     # figure facecolor; 0.75 is scalar gray

# 1. Is het number of results per query consistent between query mixes in the same simulation?

### TRYOUT

In [3]:
filename = './csv/Blazegraph_N1_64_Watdiv1000M_Optimized_queryevents.csv'

df = pd.read_csv(filename, sep='\t')

In [4]:
print(df.shape)
df = df[df['flag'] == 'SUCCESS']
print(df.shape)

grouped =  df.groupby(['sim_id','query_name'], as_index=False)
df_consistent =grouped.agg({'flag': np.size, 'number_of_results': pd.Series.nunique})
df_consistent.rename(columns={"flag": 'number_of_successes', "number_of_results": "number_of_different_results"})

inconsistencies = df_consistent[df_consistent['number_of_results'] > 1].shape[0]

print('Total number of inconsistencies: ' + str(inconsistencies))

(2400, 8)
(2392, 8)
Total number of inconsistencies: 0


In [5]:
def generateFilename(bmtuple):
    fname = './csv/' +  bmtuple[0] + "_N" + str(bmtuple[1]) + "_" + str(bmtuple[2]) + "_" + bmtuple[3] + "_" + bmtuple[4]
    if len(str(bmtuple[5])) != 0:
        fname = fname + "_" + str(bmtuple[5])
    return fname + "_queryevents.csv"

In [6]:
def createInconsistencyDF(bm_tuple):
    filename = generateFilename(bm_tuple)
    df = pd.read_csv(filename, sep='\t')
    grouped =  df.groupby(['sim_id','query_name'], as_index=False)
    df_consistent =grouped.agg({'flag': np.size, 'number_of_results': pd.Series.nunique})
    df_consistent.rename(columns={"flag": 'number_of_successes', "number_of_results": "number_of_different_results"})
    
    return df_consistent

### TEST

In [7]:
bm_tup = ('Blazegraph', 1, 64, 'Watdiv1000M', 'Optimized', '')

df = createInconsistencyDF(bm_tup)
inconsistencies = df_consistent[df_consistent['number_of_results'] > 1].shape[0]
print('Total number of inconsistencies: ' + str(inconsistencies))

Total number of inconsistencies: 0


# 2. Test for all simulations 

## A. Enterprise stores Watdiv10M

In [8]:
tuple_list = [('Blazegraph', 1, 32,'Watdiv10M', 'Default', ''), \
    ('GraphDB', 1, 32,'Watdiv10M', 'Default', ''), \
    ('ES', 1, 32,'Watdiv10M', 'Default', ''), \
    ('Virtuoso', 1, 32,'Watdiv10M', 'Default', '')]   

for bm_tup in tuple_list:
    df = createInconsistencyDF(bm_tup)
    inconsistencies = df_consistent[df_consistent['number_of_results'] > 1].shape[0]
    print('Total number of inconsistencies for ' + bm_tup[0] + ': ' + str(inconsistencies))

Total number of inconsistencies for Blazegraph: 0
Total number of inconsistencies for GraphDB: 0
Total number of inconsistencies for ES: 0
Total number of inconsistencies for Virtuoso: 0


## B. Enterprise stores Watdiv 100M

In [9]:
tuple_list = [('Blazegraph', 1, 32,'Watdiv100M', 'Default', ''), \
    ('GraphDB', 1, 32,'Watdiv100M', 'Default', ''), \
#    ('ES', 1, 32,'Watdiv100M', 'Default', ''), \ MISSING
    ('Virtuoso', 1, 32,'Watdiv100M', 'Default', '')]   

for bm_tup in tuple_list:
    df = createInconsistencyDF(bm_tup)
    inconsistencies = df_consistent[df_consistent['number_of_results'] > 1].shape[0]
    print('Total number of inconsistencies for ' + bm_tup[0] + ': ' + str(inconsistencies))

Total number of inconsistencies for Blazegraph: 0
Total number of inconsistencies for GraphDB: 0
Total number of inconsistencies for Virtuoso: 0


## C. Enterprise stores Watdiv 1000M: single node


In [10]:
tuple_list = [('Blazegraph', 1, 32,'Watdiv1000M', 'Default', ''), \
   # ('GraphDB', 1, 32,'Watdiv1000M', 'Default', ''), \ MISSING FILE
    ('ES', 1, 32,'Watdiv1000M', 'Default', ''), \
    ('Virtuoso', 1, 32,'Watdiv1000M', 'Default', ''), \
    ('Blazegraph', 1, 64,'Watdiv1000M', 'Default', ''), \
    ('GraphDB', 1, 64,'Watdiv1000M', 'Default', ''), \
    ('ES', 1, 64,'Watdiv1000M', 'Default', ''), \
    ('Virtuoso', 1, 64,'Watdiv1000M', 'Default', ''), \
    ('Blazegraph', 1, 64,'Watdiv1000M', 'Optimized', ''), \
    ('GraphDB', 1, 64,'Watdiv1000M', 'Optimized', ''), \
    #('ES', 1, 64,'Watdiv1000M', 'Optimized', ''), \
    ('Virtuoso', 1, 64,'Watdiv1000M', 'Optimized', '')] 
          
              

for bm_tup in tuple_list:
    df = createInconsistencyDF(bm_tup)
    inconsistencies = df_consistent[df_consistent['number_of_results'] > 1].shape[0]
    print('Total number of inconsistencies for ' + bm_tup[0] + ': ' + str(inconsistencies))

Total number of inconsistencies for Blazegraph: 0
Total number of inconsistencies for ES: 0
Total number of inconsistencies for Virtuoso: 0
Total number of inconsistencies for Blazegraph: 0
Total number of inconsistencies for GraphDB: 0
Total number of inconsistencies for ES: 0
Total number of inconsistencies for Virtuoso: 0
Total number of inconsistencies for Blazegraph: 0
Total number of inconsistencies for GraphDB: 0
Total number of inconsistencies for Virtuoso: 0


## D. Enterprise stores Watdiv 1000M: multi-node


In [11]:
tuple_list = [#('Blazegraph', 3, 32,'Watdiv1000M', 'Default', ''), \
    #('GraphDB', 3, 32,'Watdiv1000M', 'Default', ''), \
    ('ES', 3, 32,'Watdiv1000M', 'Default', ''), \
    ('Virtuoso', 3, 32,'Watdiv1000M', 'Default', '')] 

for bm_tup in tuple_list:
    df = createInconsistencyDF(bm_tup)
    inconsistencies = df_consistent[df_consistent['number_of_results'] > 1].shape[0]
    print('Total number of inconsistencies for ' + bm_tup[0] + ': ' + str(inconsistencies))

Total number of inconsistencies for ES: 0
Total number of inconsistencies for Virtuoso: 0


## E. Other stores Watdiv 


In [12]:
tuple_list = [('Fuseki', 1, 64,'Watdiv100M', 'Default', ''), \
    ('LDF', 1, 64,'Watdiv100M', 'Default', ''), \
    ('LDF', 3, 64,'Watdiv100M', 'Default', ''), \
    ('FluidOps', 3, 64,'Watdiv100M', 'Default', ''), \
    ('Fuseki', 1, 64,'Watdiv1000M', 'Default', ''), \
    ('LDF', 1, 64,'Watdiv1000M', 'Default', ''), \
    ('LDF', 3, 64,'Watdiv1000M', 'Default', ''), \
    ('FluidOps', 1, 64,'Watdiv1000M', 'Default', ''), \
    ('FluidOps', 3, 64,'Watdiv1000M', 'Default', '')]

for bm_tup in tuple_list:
    df = createInconsistencyDF(bm_tup)
    inconsistencies = df_consistent[df_consistent['number_of_results'] > 1].shape[0]
    print('Total number of inconsistencies for ' + bm_tup[0] + ': ' + str(inconsistencies))

Total number of inconsistencies for Fuseki: 0
Total number of inconsistencies for LDF: 0
Total number of inconsistencies for LDF: 0
Total number of inconsistencies for FluidOps: 0
Total number of inconsistencies for Fuseki: 0
Total number of inconsistencies for LDF: 0
Total number of inconsistencies for LDF: 0
Total number of inconsistencies for FluidOps: 0
Total number of inconsistencies for FluidOps: 0


## F. Ontoforce


In [13]:
tuple_list = [('Blazegraph', 1, 64,'Ontoforce', 'Optimized', ''), \
    ('GraphDB', 1, 64,'Ontoforce', 'Optimized', ''), \
    ('ES', 1, 64,'Ontoforce', 'Default', ''), \
    ('Virtuoso', 1, 64,'Ontoforce', 'Optimized', ''), \
    ('Virtuoso', 1, 32,'Ontoforce', 'Optimized', ''), \
    ('Virtuoso', 1, 32,'Ontoforce', 'Optimized', 'VWall'), \
    ('Virtuoso', 1, 64,'Ontoforce', 'Optimized', ''), \
    ('Virtuoso', 1, 64,'Ontoforce', 'Optimized', 'VWall'), \
    ('Virtuoso', 3, 64,'Ontoforce', 'Optimized', '0'), \
    ('Virtuoso', 3, 64,'Ontoforce', 'Optimized', '1'), \
    ('Virtuoso', 3, 64,'Ontoforce', 'Optimized', '2'), \
    ('Virtuoso', 3, 64,'Ontoforce', 'Optimized', 'AWS1'), \
    ('Virtuoso', 3, 64,'Ontoforce', 'Optimized', 'AWS2'), \
    ('Virtuoso', 3, 64,'Ontoforce', 'Optimized', 'AWS3'), \
    ('Fuseki', 1, 64,'Ontoforce', 'Default', ''), \
    ('FluidOps', 1, 64,'Ontoforce', 'Default', '1'), \
    ('FluidOps', 1, 64,'Ontoforce', 'Default', '2'), \
    ('FluidOps', 1, 64,'Ontoforce', 'Default', '3'), \
    ('FluidOps', 3, 64,'Ontoforce', 'Default', '1'), \
    ('FluidOps', 3, 64,'Ontoforce', 'Default', '2'), \
    ('FluidOps', 3, 64,'Ontoforce', 'Default', '3')]

for bm_tup in tuple_list:
    df = createInconsistencyDF(bm_tup)
    inconsistencies = df_consistent[df_consistent['number_of_results'] > 1].shape[0]
    print('Total number of inconsistencies for ' + bm_tup[0] + ': ' + str(inconsistencies))

Total number of inconsistencies for Blazegraph: 0
Total number of inconsistencies for GraphDB: 0
Total number of inconsistencies for ES: 0
Total number of inconsistencies for Virtuoso: 0
Total number of inconsistencies for Virtuoso: 0
Total number of inconsistencies for Virtuoso: 0
Total number of inconsistencies for Virtuoso: 0
Total number of inconsistencies for Virtuoso: 0
Total number of inconsistencies for Virtuoso: 0
Total number of inconsistencies for Virtuoso: 0
Total number of inconsistencies for Virtuoso: 0
Total number of inconsistencies for Virtuoso: 0
Total number of inconsistencies for Virtuoso: 0
Total number of inconsistencies for Virtuoso: 0
Total number of inconsistencies for Fuseki: 0
Total number of inconsistencies for FluidOps: 0
Total number of inconsistencies for FluidOps: 0
Total number of inconsistencies for FluidOps: 0
Total number of inconsistencies for FluidOps: 0
Total number of inconsistencies for FluidOps: 0
Total number of inconsistencies for FluidOps: 0

# Conclusion: No interthread inconsistencies!