In [1]:
# Add analysis to the path
import sys
import pathlib
parent_dir = os.path.join(os.path.abspath(''), "..")
sys.path.append(parent_dir)

import parsers
from pprint import pprint
from aggregation import *
import matplotlib.pyplot as plt
from matplotlib import rc

rc('font',**{'family': 'serif', 'size': 19})
rc('text', usetex=True)

path_to_results = os.path.normpath(os.path.join(parent_dir, "archive"))
print(path_to_results)
working_dir = os.path.normpath(os.path.join(parent_dir, "working"))
data = parsers.main(path_to_results, working_dir=working_dir)
print("Finished")

/home/jazev/dev/cs4365/analysis/archive
Extracting 30 top level archives on 4 workers
Finished


In [2]:
def minimal(flt):
    return "{0:.1f}".format(flt).rstrip('0').rstrip('.')

def set_up_legend(cols, size=14):
    handles, labels = plt.gca().get_legend_handles_labels()
    handles = np.concatenate((handles[::2],handles[1::2]),axis=0)
    labels = np.concatenate((labels[::2],labels[1::2]),axis=0)
    plt.legend(handles, labels, loc="upper right", ncol=cols, fontsize=size)

def replica_groups_figure(test_id, replica_block_size=10, sampling_period=5.0, peak=None, num_cores=1):
    actual_replicas = len(data[test_id].replicas) // replica_block_size
    for i in range(actual_replicas):
        replica_nums = [i * replica_block_size + j for j in range(replica_block_size)]
        plot_cpu_for_hosts((data[test_id].replicas[i].single() for i in replica_nums), num_cores=num_cores, sampling_period=sampling_period)

        # Add peak if specified
        if peak is not None:
            plt.axhline(y=peak, color='gray', linestyle='dashed')

        plt.legend(loc="upper right")
        plt.gca().set_title(f"CPU \% for replicas {i * replica_block_size} - {(i + 1) * replica_block_size - 1} for test ({test_id})", pad=10)
        plt.xlabel(f"Time (seconds)")
        plt.ylabel(f"Avg CPU Utilization over {minimal(sampling_period)} sec. (\%)")
        plt.gca().set_ylim([-5,105])
        plt.gca().xaxis.labelpad = 5
        set_up_legend(cols=2, size=14)

def cpu_figure(hosts, title, show=True, sampling_period=5.0, num_cores=1, max_cpu=105):
    plot_cpu_for_hosts(hosts, sampling_period=sampling_period, num_cores=num_cores)

    plt.legend(loc="upper right")
    plt.gca().set_title(title, pad=10)
    plt.xlabel(f"Time (seconds)")
    plt.ylabel(f"Avg CPU Utilization over {minimal(sampling_period)} sec. (\%)")
    plt.gca().set_ylim([0, max_cpu])
    plt.gca().xaxis.labelpad = 5
    set_up_legend(cols=2, size=14)

    if show:
        plt.show()

def plot_cpu_for_hosts(hosts, sampling_period=5.0, num_cores=1):
    for i, host in enumerate(hosts):
        df = aggregate_cpu(host, sampling_period=sampling_period)
        plt.plot([t / 1E3 for t in normalize(df["time"])], [c / num_cores for c in df["cpu"]], label=str(i))


In [None]:
def make_single_cpu_subplot(test, peak):
    replica_groups_figure(test, replica_block_size = 20, sampling_period=5.0, peak=peak, num_cores=4)

fig = plt.figure(figsize=(17.5, 5))
plt.subplot(1, 2, 1)
make_single_cpu_subplot("d-rc-50", peak=50)
plt.subplot(1, 2, 2)
make_single_cpu_subplot("d-rc-100", peak=100)
plt.savefig('evaluation_50_100.pdf', bbox_inches='tight')

In [None]:
def make_host_cpu_subplot(test, host, replicas):
    replica_nums = range(replicas)
    hosts = [data[test].replicas[i].hosts[host] for i in range(replicas)]
    title=f"CPU \% for auth in replicas {0} - {replicas - 1} for test ({test})"
    cpu_figure(hosts, title=title, show=False, sampling_period=10.0, max_cpu=40)

fig = plt.figure(figsize=(17.5, 5))
plt.subplot(1, 2, 1)
make_host_cpu_subplot("ii-rc-s", "auth", 4)
plt.subplot(1, 2, 2)
make_host_cpu_subplot("ii-rc-b", "auth", 4)
plt.savefig('evaluation_s_b.pdf', bbox_inches='tight')

In [11]:
def aggregate_replica_cpu(replica, sampling_period=1.0):
    cpus = []
    for host in replica.hosts.values():
        cpu_df = aggregate_cpu(host, sampling_period=sampling_period)
        cpus.extend(cpu_df['cpu'])
    load_threshold = 0.5 * np.percentile(cpus, 75)
    load = [c for c in cpus if c >= load_threshold]
    not_load = [c for c in cpus if c <  load_threshold]
    return (load, not_load)

dic = {}
for test in data.values():
    if "c-" not in test.id:
        continue
    print(f"started test {test.id}")
    load = []
    not_load = []
    for (l, nl) in execute(aggregate_replica_cpu, test.replicas):
        load.extend(l)
        not_load.extend(nl)
    dic[test.id] = (load, not_load)
    print(f"  finished test {test.id}")
print("done")

        

started test ii-c-b
  finished test ii-c-b
started test ii-rc-s
  finished test ii-rc-s
started test d-rc-50
  finished test d-rc-50
started test ii-mc-b
  finished test ii-mc-b
started test i-c-100
  finished test i-c-100
started test i-mc-100
  finished test i-mc-100
started test d-mc-100
  finished test d-mc-100
started test ii-c-s
  finished test ii-c-s
started test i-mc-50
  finished test i-mc-50
started test d-rc-100
  finished test d-rc-100
started test ii-rc-b
  finished test ii-rc-b
started test i-rc-100
  finished test i-rc-100
started test d-c-100
  finished test d-c-100
started test ii-mc-s
  finished test ii-mc-s
started test d-mc-50
  finished test d-mc-50
started test d-c-50
  finished test d-c-50
started test i-rc-50
  finished test i-rc-50
started test i-c-50
  finished test i-c-50
done


In [13]:
for key, val in dic.items():
    load, not_load = val
    load_df = pd.DataFrame({'load': load})
    not_load_df = pd.DataFrame({'not_load': not_load})
    print(f"test {key}")
    print(load_df.describe(include='all'))
    print(not_load_df.describe(include='all'))
    print("================================")


test ii-c-b
               load
count  43387.000000
mean      24.336992
std       28.780487
min        7.900000
25%       10.650000
50%       13.550000
75%       22.750000
max      166.600000
           not_load
count  24290.000000
mean       7.052569
std        1.266185
min        2.700000
25%        6.800000
50%        7.400000
75%        7.800000
max        9.250000
test ii-rc-s
               load
count  49064.000000
mean      23.721303
std       23.014896
min        8.700000
25%       12.950000
50%       15.800000
75%       19.800000
max      202.100000
          not_load
count  9916.000000
mean      7.130163
std       1.422398
min       2.700000
25%       6.800000
50%       7.250000
75%       7.850000
max      10.850000
test d-rc-50
              load
count  7292.000000
mean    216.652470
std      14.122324
min     108.700000
25%     214.300000
50%     219.300000
75%     222.700000
max     239.857143
          not_load
count  3263.000000
mean     18.220225
std      18.768895
min 