In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
LOG_PATH="/Users/law/repos/ma/disco/benchmark-runs/runs_sustainable_throughput/tumbling_1000_max/"

In [None]:
import os
import re

RUN_RE = re.compile(r"Running (\d+) child.*, (\d+) stream.*")
THROUGHPUT_RE = re.compile(r"Found sustainable candidate \((\d+) events/s.\)*")

def parse_log_file(log_file):
    sustainable_throughputs= {}
    
    current_run = None
    current_throughput = None
    with open(log_file) as f:
        for line in f:
            run_match = RUN_RE.match(line)
            if run_match is not None:
                if current_run != None:
                    print(f"Did not find candidate line for {current_run}")
                current_run = (int(run_match.group(1)), int(run_match.group(2)))
                current_throughput = None

            throughput_match = THROUGHPUT_RE.match(line)
            if throughput_match is not None:
                if current_throughput is not None:
                    print(f"Did not find run line after {current_run}")
                current_throughput = int(throughput_match.group(1))
                sustainable_throughputs[current_run] = current_throughput
                current_run = None
                
    if current_run is not None:
        print(f"Did not find candidate line for {current_run}")
                
    return sustainable_throughputs
                

ALL_THROUGHPUTS = {}
for log_file in sorted(os.listdir(LOG_PATH)):
    if log_file.endswith("-nodes-max.log"):
        print(f"Parsing {log_file}")
        sustainable_throughputs = parse_log_file(os.path.join(LOG_PATH, log_file))
        print(f"current: {sustainable_throughputs}")
        ALL_THROUGHPUTS = {**ALL_THROUGHPUTS, **sustainable_throughputs}
        print(f"all:     {ALL_THROUGHPUTS}\n")        

In [None]:
for (num_children, num_streams), throughput in sorted(ALL_THROUGHPUTS.items()):
    print(f"Total sustainable throughput for {num_children} child(ren) with " \
          f"{num_streams // num_children} stream(s) each " \
          f"is {(throughput * num_streams // num_children): >7d} events/s per child.")

# Plots

In [None]:
from matplotlib import rcParams
rcParams.update({'figure.autolayout': True, 'pgf.rcfonts' : False})

### Plot Sustainable Throughput

In [None]:
import matplotlib.pyplot as plt

streams = sorted([(n_stream, (n_stream * throughput) / 1_000_000 ) 
                  for ((n_child, n_stream), throughput) 
                  in ALL_THROUGHPUTS.items() 
                  if n_child == 1])
print(streams)

streams, throughputs = list(zip(*streams))

print(streams, throughputs)

plt.plot(streams, throughputs)
plt.ylabel("events/sin mio.")
plt.xlabel("# streams")
plt.xticks(range(10), ["", "1", "2", "", "4", "", "", "", "8", ""])
plt.ylim(ymin=0, ymax=2)
plt.legend(["TUMBLING, 1 sec, MAX"])
plt.show()

# if save_fig:
#     plt.savefig(f"load_{out_file_name}.svg")
#     plt.savefig(f"load_{out_file_name}.pgf")
#     plt.savefig(f"load_{out_file_name}.png")
# plt.show()

In [None]:
import matplotlib.pyplot as plt

streams = sorted([((n_child, n_stream), (n_stream * throughput) / 1_000_000 ) 
                  for ((n_child, n_stream), throughput) 
                  in ALL_THROUGHPUTS.items() 
                  if n_child == n_stream or n_child * 2 == n_stream])

print(streams)

streams, throughputs = list(zip(*streams))

print(streams, throughputs)

plt.bar(range(len(throughputs)), throughputs)
plt.ylabel("events/sin mio.")
plt.xlabel("(#children, #streams)")
plt.xticks(range(len(streams)), [f"({n_child}, {n_stream})" for n_child, n_stream in streams])
plt.legend(["TUMBLING, 1 sec, MAX"])
plt.show()

plt.plot(throughputs)
plt.ylabel("events/sin mio.")
plt.xlabel("(#children, #streams)")
plt.xticks(range(len(streams)), [f"({n_child}, {n_stream})" for n_child, n_stream in streams])
plt.legend(["TUMBLING, 1 sec, MAX"])
plt.show()
