In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
LOG_PATH="/Users/law/drive/uni/msc/ma/benchmarks/matrix_run"

In [None]:
import os
import re

RUN_RE = re.compile(r"Running (\d+) child.*, (\d+) stream.*")
THROUGHPUT_RE = re.compile(r"Found sustainable candidate \((\d+) events/s.\)*")
BENCHMARK_RE = re.compile(r"BENCHMARK: WINDOWS: (.*) - AGG_FNS: (.*)")

def parse_log_file(log_file):
    sustainable_throughputs = {}
    
    current_bm = None
    current_run = None
    current_throughput = None
    with open(log_file) as f:
        for line in f:
            benchmark_match = BENCHMARK_RE.match(line)
            if benchmark_match is not None:
                current_bm = (benchmark_match.group(1), benchmark_match.group(2))
                sustainable_throughputs[current_bm] = {}
                current_throughput = None
                print(current_bm)
            
            run_match = RUN_RE.match(line)
            if run_match is not None:
                if current_run != None:
                    print(f"Did not find candidate line for {current_run}")
                current_run = (int(run_match.group(1)), int(run_match.group(2)))
                current_throughput = None
                print(current_run)

            throughput_match = THROUGHPUT_RE.match(line)
            if throughput_match is not None:
                if current_throughput is not None:
                    print(f"Did not find run line after {current_run}")
                current_throughput = int(throughput_match.group(1))
                sustainable_throughputs[current_bm][current_run] = current_throughput
                current_run = None
                
    if current_run is not None:
        print(f"Did not find candidate line for {current_run}")
                
    return sustainable_throughputs
                

ALL_THROUGHPUTS = {}
for log_file in sorted(os.listdir(LOG_PATH)):
    if log_file.endswith(".log"):
        print(f"Parsing {log_file}")
        sustainable_throughputs = parse_log_file(os.path.join(LOG_PATH, log_file))
        print(f"current: {sustainable_throughputs}")
        ALL_THROUGHPUTS = {**ALL_THROUGHPUTS, **sustainable_throughputs}
        print(f"all:     {ALL_THROUGHPUTS}\n")        

In [None]:
for benchmark, run_throughputs in sorted(ALL_THROUGHPUTS.items()):
    print(f"Benchmark {benchmark}")
    for (num_children, num_streams), throughput in sorted(run_throughputs.items()):
        print(f"Total sustainable throughput for {num_children} child(ren) with " \
              f"{num_streams // num_children} stream(s) each " \
              f"is {(throughput * num_streams // num_children): >7d} events/s per child.")
    print()

# Plots

In [None]:
from matplotlib import rcParams
rcParams.update({'figure.autolayout': True, 'pgf.rcfonts' : False})

### Plot Sustainable Throughput

In [None]:
import matplotlib.pyplot as plt

def plot_single_child_throughputs(child_streams, throughputs, title):
    str_child_streams = [str(cs) for cs in child_streams]
    plt.plot(throughputs)
    plt.xticks(range(len(child_streams)), str_child_streams)
    plt.ylabel("events/s in mio.")
    plt.xlabel("#children, # streams")
    plt.title(title)
    plt.ylim(ymin=0, ymax=1.2)
    plt.show()

    
for benchmark, run_throughputs in sorted(ALL_THROUGHPUTS.items()):
    print(f"Benchmark {benchmark}")
    bm_child_streams = []
    bm_throughputs = []
    for (num_children, num_streams), throughput in sorted(run_throughputs.items()):
        if num_children != 1:
            continue
        bm_child_streams.append((num_children, num_streams)) 
        bm_throughputs.append((num_streams * throughput) / 1_000_000)
    print(bm_child_streams)
    print(bm_throughputs)
    plot_single_child_throughputs(bm_child_streams, bm_throughputs, benchmark)

In [None]:
import matplotlib.pyplot as plt


def plot_multi_child_throghputs(child_streams, throughputs, title):
    plt.bar(range(len(throughputs)), throughputs)
    plt.ylabel("events/sin mio.")
    plt.xlabel("(#children, #streams)")
    plt.xticks(range(len(streams)), [f"({n_child}, {n_stream})" for n_child, n_stream in child_streams])
    plt.title(title)
    plt.ylim(ymin=0)
    plt.show()

    plt.plot(throughputs)
    plt.ylabel("events/sin mio.")
    plt.xlabel("(#children, #streams)")
    plt.xticks(range(len(streams)), [f"({n_child}, {n_stream})" for n_child, n_stream in child_streams])
    plt.title(title)
    plt.ylim(ymin=0)
    plt.show()

for benchmark, run_throughputs in sorted(ALL_THROUGHPUTS.items()):
    print(f"Benchmark {benchmark}")
    bm_child_streams = []
    bm_throughputs = []
    for (num_children, num_streams), throughput in sorted(run_throughputs.items()):
        if num_children != num_streams:
            continue
        bm_child_streams.append((num_children, num_streams)) 
        bm_throughputs.append((num_streams * throughput) / 1_000_000)
    print(bm_child_streams)
    print(bm_throughputs)
    plot_multi_child_throghputs(bm_child_streams, bm_throughputs, benchmark)
