## Parse

In [21]:
from dataclasses import dataclass
import re
import os

EXPERIMENTS_FOLDER = "experiments/"
os.makedirs(EXPERIMENTS_FOLDER, exist_ok=True)

@dataclass
class SingleExperiment:
    number_clients: int
    number_replicas: int
    payload: int
    percentage_writes: int
    percentage_reads: int

@dataclass
class Results:
    throughput: float
    read_latency_ms: float
    write_latency_ms: float
    overall_latency_ms: float



def set_num_from_dir(exp_dir: str) -> int:
    return int(re.findall(r"\d+", exp_dir)[0])

def exp_from_log(log_file: str) -> SingleExperiment:
    result = re.search(r"(\d+)C_(\d+)R_(\d+)B_(\d+)_(\d+)", log_file)
    return SingleExperiment(int(result.group(1)),
                            int(result.group(2)),
                            int(result.group(3)),
                            int(result.group(4)),
                            int(result.group(5)))


def results_from_log(file_path: str) -> Results:
    read_latency_ms = None
    write_latency_ms = None
    throughput = None
    for line in open(file_path).readlines():
        if ("[OVERALL], Throughput(ops/sec), " in line):
            throughput = float(re.findall(r"\d+\.\d+", line)[0])
        elif ("[READ], AverageLatency(us), " in line):
            read_latency_ms = float(re.findall(r"\d+\.\d+", line)[0]) / 1000
        elif ("[UPDATE], AverageLatency(us), " in line):
            write_latency_ms = float(re.findall(r"\d+\.\d+", line)[0]) / 1000
        elif ("Op Timed out" in line):
            return None

    print(log_file)
    assert read_latency_ms is not None
    assert write_latency_ms is not None
    assert throughput is not None

    overall_latency_ms = (read_latency_ms + write_latency_ms) / 2
    return Results(throughput, read_latency_ms, write_latency_ms, overall_latency_ms)




In [22]:
import os
from collections import defaultdict as dd

exps_per_set = dd(lambda: [])

for exp_dir in os.listdir(EXPERIMENTS_FOLDER):
    set_num = set_num_from_dir(exp_dir)
    for log_file in os.listdir(f"{EXPERIMENTS_FOLDER}{exp_dir}/"):
        exp = exp_from_log(log_file)
        results = results_from_log(f"{EXPERIMENTS_FOLDER}{exp_dir}/{log_file}")
        if results is None:
            continue
        exps_per_set[set_num].append((exp, results))


10C_3R_1024B_50_50.log
10C_4R_1024B_50_50.log
10C_5R_1024B_50_50.log
10C_6R_1024B_50_50.log
15C_3R_1024B_50_50.log
15C_4R_1024B_50_50.log
15C_5R_1024B_50_50.log
15C_6R_1024B_50_50.log
20C_3R_1024B_50_50.log
20C_4R_1024B_50_50.log
20C_5R_1024B_50_50.log
20C_6R_1024B_50_50.log
25C_3R_1024B_50_50.log
25C_4R_1024B_50_50.log
25C_5R_1024B_50_50.log
25C_6R_1024B_50_50.log
30C_3R_1024B_50_50.log
30C_4R_1024B_50_50.log
30C_5R_1024B_50_50.log
5C_3R_1024B_50_50.log
5C_4R_1024B_50_50.log
5C_5R_1024B_50_50.log
5C_6R_1024B_50_50.log


### Write to JSON

In [23]:
import json

#TODO lame and unnecessary


## Plot

In [24]:
import matplotlib.pyplot as plt

GRAPHS_FOLDER = "graphs/"
os.makedirs(GRAPHS_FOLDER, exist_ok=True)

@dataclass
class Line:
    label: str
    throughput_axis: list[float]
    latency_axis: list[float]


def gen_throughput_latency_graph(num_set: int, lines: list[Line]):
    for line in lines:
        plt.plot(line.throughput_axis, line.latency_axis, marker="*", label=line.label)
    plt.xlabel("Throughput (ops/sec)")
    plt.ylabel("Average Latency (ms)")
    plt.title("Throughput-Latency Plot")

    plt.tight_layout()
    plt.legend()

    plt.savefig(f"{GRAPHS_FOLDER}exp{num_set}.pdf")
    #plt.show()
    plt.clf()

In [25]:
for num_set, lst_exps in exps_per_set.items():
    lines = dd(lambda: Line("", [], []))
    for exp, res in sorted(lst_exps, key=lambda x: (x[0].number_clients, x[0].number_replicas)):
        lines[exp.number_replicas].label = f"{exp.number_replicas} replicas"
        lines[exp.number_replicas].throughput_axis.append(res.throughput)
        lines[exp.number_replicas].latency_axis.append(res.overall_latency_ms)

    gen_throughput_latency_graph(num_set, lines.values())

<Figure size 432x288 with 0 Axes>