In [1]:
# Add analysis to the path
import sys
import pathlib
parent_dir = os.path.join(os.path.abspath(''), "..")
sys.path.append(parent_dir)

import parsers
from pprint import pprint

path_to_results = os.path.normpath(os.path.join(parent_dir, "archive"))
print(path_to_results)
working_dir = os.path.normpath(os.path.join(parent_dir, "working"))
data = parsers.main(path_to_results, working_dir=working_dir)
print("Finished")

Finished


In [2]:
def cpu_threshold_ts_intervals(host: parsers.TestHost, min_length: int=8, cpu_threshold: float=10, lower_threshold: bool=True) -> List[List[float]]:
    cpu_avg = aggregate_cpu(host, sampling_period=1.0)

    runs = []
    current_run = None
    for _, row in cpu_avg.iterrows():
        cpu = row['cpu']
        if (lower_threshold and cpu > cpu_threshold) or (not lower_threshold and cpu < cpu_threshold):
            if not current_run:
                current_run = []
            current_run.append(row['time'])
        else:
            if current_run:
                runs.append(current_run)
                current_run = None
    if current_run:
        runs.append(current_run)
    runs = [r for r in runs if len(r) > min_length]
    intervals = [(r[0], r[-1]) for r in runs]
    return intervals


def describe_intervals(test: parsers.Test, top=0.10, cpu_threshold=50, idle_cpu_threshold=30) -> List[str]:
    output = []
    output.append(f"==== {test.id} ====")
    # First, describe all intervals for the test
    hosts = flatten(replica.hosts.values() for replica in test.replicas)
    all_host_intervals = [host_collection_intervals(host) for host in hosts]
    all_container_intervals = flatten(all_host_intervals)
    all_intervals = flatten(all_container_intervals)
    intervals, _ = zip(*all_intervals)
    intervals_df = pd.DataFrame({'Read deltas (ms)': intervals})
    output.append(str(intervals_df.describe(include='all')))

    # Second, describe top percentage of intervals
    top_percent = []
    for container_list in all_container_intervals:
        container_intervals, _ = zip(*container_list)
        limit = np.quantile(container_intervals, 1 - top)
        top_percent.extend(i for i in container_intervals if i > limit)
    top_percent_df = pd.DataFrame({f'Top {top*100:.1f}% container read deltas (ms)': top_percent})
    output.append(str(top_percent_df.describe(include='all')))

    # Third, describe all intervals under load
    under_load = []
    for host, host_intervals in zip(hosts, all_host_intervals):
        threshold_intervals = cpu_threshold_ts_intervals(host, cpu_threshold=cpu_threshold)
        for container_list in host_intervals:
            for interval, timestamp in container_list:
                for lower, upper in threshold_intervals:
                    if lower <= timestamp <= upper:
                        under_load.append(interval)
                        break
    under_load_df = pd.DataFrame({f'Read deltas undder load (> {cpu_threshold:.1f}% CPU) (ms)': under_load})
    output.append(str(under_load_df.describe(include='all')))

    # Fourth, describe all intervals at idle
    at_idle = []
    for host, host_intervals in zip(hosts, all_host_intervals):
        threshold_intervals = cpu_threshold_ts_intervals(host, cpu_threshold=idle_cpu_threshold, lower_threshold=False)
        for container_list in host_intervals:
            for interval, timestamp in container_list:
                for lower, upper in threshold_intervals:
                    if lower <= timestamp <= upper:
                        at_idle.append(interval)
                        break
    at_idle_df = pd.DataFrame({f'Read deltas at idle (< {idle_cpu_threshold:.1f}% CPU) (ms)': at_idle})
    output.append(str(at_idle_df.describe(include='all')))
    output.append(f"=================")
    return output

In [3]:
tests = ["d-rc-50", "d-rc-100", "d-mc-50", "d-mc-100", "i-rc-50", "i-rc-100", "i-mc-50", "i-mc-100", "ii-rc-s", "ii-rc-b", "ii-mc-s", "ii-mc-b"]
num = len(tests)
done = 0
with Pool(cpu_count()) as pool:
    for output in pool.imap_unordered(describe_intervals, (data[test] for test in tests)):
        print("\n".join(output))
        print(f"{done+1}/{num} done")
        done += 1


==== d-rc-50 ====
       Read deltas (ms)
count     410101.000000
mean          50.121165
std            0.902472
min            0.341797
25%           50.056152
50%           50.073486
75%           50.091553
max          101.172119
       Top 10.0% container read deltas (ms)
count                          40825.000000
mean                              50.831908
std                                1.945786
min                               50.095703
25%                               50.121094
50%                               50.164307
75%                               50.543701
max                              101.172119
       Read deltas undder load (> 50.0% CPU) (ms)
count                               387993.000000
mean                                    50.124118
std                                      0.920509
min                                      0.341797
25%                                     50.056152
50%                                     50.074219
75%                 