## Import Libs

In [9]:
import re
import sys

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

from scipy import stats
from scipy.interpolate import make_interp_spline, BSpline

norm = stats.norm(loc=0, scale=1)

## Global Variables

In [95]:
# constants
labels = ['KSM', 'No KSM']
array_size = [32768]

In [96]:
data_ksm = {}
data_without_ksm = {}

### Reading Files

In [97]:
import glob
import errno

# int(filter(str.isdigit, str1))

base_path_logs = './logs/'

with_madvise_logs_path = base_path_logs + 'with_madvise/'
without_madvise_logs_path = base_path_logs + 'without_madvise/'

for arr in array_size:
    if data_ksm.get(arr) == None:
        data_ksm[arr] = []
        
    if data_without_ksm.get(arr) == None:
        data_without_ksm[arr] = []
        
    with_madvise_logs = glob.glob(with_madvise_logs_path + 'size_' + str(arr) + '/*.log')
    for name in with_madvise_logs:
        try:
            with open(name) as f:
                for line in f:
                    if line.find('with_madvise (') != -1:
                        temp = [float(s) for s in re.findall(r'-?\d+\.?\d*', line)]
                        data_ksm[arr].append({
                            "process": int(temp[3]),
                            "private": temp[0],
                            "shared": temp[1],
                            "total": temp[2]
                        })
        except IOError as exc:
            if exc.errno != errno.EISDIR:
                raise
                
    data_ksm[arr] = sorted(data_ksm[arr], key=lambda k: k['process']) 
                
    without_madvise_logs = glob.glob(without_madvise_logs_path + 'size_' + str(arr) + '/*.log')
    for name in without_madvise_logs:
        try:
            with open(name) as f:
                for line in f:
                    if line.find('without_madvise (') != -1:
                        temp = [float(s) for s in re.findall(r'-?\d+\.?\d*', line)]
                        data_without_ksm[arr].append({
                            "process": int(temp[3]),
                            "private": temp[0],
                            "shared": temp[1],
                            "total": temp[2]
                        })
        except IOError as exc:
            if exc.errno != errno.EISDIR:
                raise
                
    data_without_ksm[arr] = sorted(data_without_ksm[arr], key=lambda k: k['process']) 

In [98]:
data_ksm

{32768: [{'process': 10, 'private': 820.0, 'shared': 94.0, 'total': 914.0},
  {'process': 10, 'private': 848.0, 'shared': 93.0, 'total': 941.0},
  {'process': 15, 'private': 1.2, 'shared': 139.5, 'total': 1.4},
  {'process': 20, 'private': 1.6, 'shared': 172.0, 'total': 1.8},
  {'process': 25, 'private': 2.0, 'shared': 211.5, 'total': 2.2},
  {'process': 30, 'private': 2.4, 'shared': 246.0, 'total': 2.7},
  {'process': 35, 'private': 2.9, 'shared': 281.5, 'total': 3.1},
  {'process': 40, 'private': 3.2, 'shared': 306.0, 'total': 3.5},
  {'process': 45, 'private': 3.6, 'shared': 339.5, 'total': 4.0},
  {'process': 50, 'private': 4.1, 'shared': 361.0, 'total': 4.4},
  {'process': 100, 'private': 8.1, 'shared': 634.0, 'total': 8.7},
  {'process': 200, 'private': 30.0, 'shared': 904.0, 'total': 30.9},
  {'process': 300, 'private': 51.5, 'shared': 1.0, 'total': 52.5},
  {'process': 400, 'private': 69.0, 'shared': 1.1, 'total': 70.1},
  {'process': 500, 'private': 86.5, 'shared': 1.2, 'total

In [99]:
data_without_ksm

{32768: [{'process': 5, 'private': 888.0, 'shared': 48.5, 'total': 936.5},
  {'process': 10, 'private': 1.8, 'shared': 89.0, 'total': 1.8},
  {'process': 15, 'private': 2.6, 'shared': 128.5, 'total': 2.8},
  {'process': 20, 'private': 3.5, 'shared': 164.0, 'total': 3.7},
  {'process': 25, 'private': 4.4, 'shared': 201.5, 'total': 4.6},
  {'process': 30, 'private': 5.2, 'shared': 236.0, 'total': 5.5},
  {'process': 35, 'private': 6.1, 'shared': 264.5, 'total': 6.4},
  {'process': 40, 'private': 7.0, 'shared': 296.0, 'total': 7.3},
  {'process': 45, 'private': 7.9, 'shared': 321.5, 'total': 8.2},
  {'process': 50, 'private': 8.8, 'shared': 345.0, 'total': 9.1},
  {'process': 100, 'private': 17.5, 'shared': 561.0, 'total': 18.1},
  {'process': 200, 'private': 35.0, 'shared': 899.0, 'total': 35.9},
  {'process': 300, 'private': 52.5, 'shared': 1.0, 'total': 53.5},
  {'process': 400, 'private': 70.1, 'shared': 1.1, 'total': 71.2},
  {'process': 500, 'private': 87.4, 'shared': 1.2, 'total': 

## Plot

In [5]:


dat = [latency_zero, latency_twenty, latency_hundred]
throughput = {}
throughput[labels[0]] = 37.58974551742285
throughput[labels[1]] = 37.41534777565757
throughput[labels[2]] = 37.34827264239029

qe_latency_twenty, pe_latency_twenty = ecdf(latency_twenty)
q_latency_twenty = np.linspace(qe_latency_twenty[0], qe_latency_twenty[-1], 1000)
p_latency_twenty = norm.cdf(q_latency_twenty)

qe_latency_zero, pe_latency_zero = ecdf(latency_zero)
q_latency_zero = np.linspace(qe_latency_zero[0], qe_latency_zero[-1], 1000)
p_latency_zero = norm.cdf(q_latency_zero)

qe_latency_hundred, pe_latency_hundred = ecdf(latency_hundred)
q_latency_hundred = np.linspace(qe_latency_hundred[0], qe_latency_hundred[-1], 1000)
p_latency_hundred = norm.cdf(q_latency_hundred)

fig, (ax1, ax2) = plt.subplots(1, 2, sharey=False, figsize=(10, 4))

ax1.plot(base_latency_zero[:-1], cumulative_latency_zero / 1000, 'red', lw=2, label='No delay')
ax1.plot(base_latency_twenty[:-1], cumulative_latency_twenty / 1000, 'blue', lw=2, label='20% 20ms delay')
ax1.plot(base_latency_hundred[:-1], cumulative_latency_hundred / 1000, 'black', lw=2, label='100% 20ms delay')
ax1.set
ax1.set_xlabel('Latency (ms)')
ax1.set_ylabel('Percentile')
ax1.set_title('Empirical CDF')

ax2.plot(qe_latency_zero, pe_latency_zero, 'red', lw=2, label='No delay')
ax2.plot(qe_latency_twenty, pe_latency_twenty, 'blue', lw=2, label='20% 20ms delay')
ax2.plot(qe_latency_hundred, pe_latency_hundred, 'black', lw=2, label='100% 20ms delay')
ax2.set_xlabel('Latency (ms)')
ax2.set_ylabel('Percentile')
ax2.set_title('Theoritical CDF')

props = dict(boxstyle='round', facecolor='lightcyan', alpha=0.5)

textstr = ''

for i in range(len(labels)):
    textstr = '\n'.join([info_text(labels[i], dat[i], throughput[labels[i]]) for i in range(len(labels))])

plt.text(1.1, 1.02, textstr, transform=ax2.transAxes,
    verticalalignment='top', bbox=props, fontname='monospace')

plt.legend(bbox_to_anchor=(-0.8, -0.29, 2., .102),
               loc=3, ncol=3,  borderaxespad=0.,
               labels=labels)
plt.suptitle("Cassandra Latency Emulation (3 nodes)", y=1.05)

plt.show()

# fig.set_size_inches(18.5, 10.5, forward=True)
fig.savefig("result.png", dpi=100, bbox_inches="tight")