In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import os


from pydantic import BaseModel, Field, validator
from collections import defaultdict

plt.rcParams["figure.dpi"] = 400


In [None]:
# Report Types


class Summary(BaseModel):
    min: float
    max: float
    count: float
    p50: float
    median: float
    p75: float
    p90: float
    p95: float
    p99: float
    p999: float


class Metrics(BaseModel):
    first_counter_at: pd.Timestamp = Field(alias="firstCounterAt")
    last_counter_at: pd.Timestamp = Field(alias="lastCounterAt")
    counters: dict[str, int]
    rates: dict[str, int]
    summaries: dict[str, Summary]

    @validator("first_counter_at", "last_counter_at")
    def validate_first_counter_at(cls, v):
        return pd.Timestamp(v)


class Report(BaseModel):
    region: str
    georeplicated: bool
    cache_enabled: bool

    aggregate: Metrics
    intermediate: list[Metrics]


In [None]:
# Helper Functions

from typing import Iterable

SUMMARY_HTTP_RESPONSE_TIME = "http.response_time"

SUMMARY_TYPE_READ = "read"
SUMMARY_TYPE_WRITE = "write"
SUMMARY_TYPE_READ_BLOB = "read blob"
SUMMARY_TYPE_WRITE_BLOB = "write blob"
SUMMARY_TYPE_USER_AUTH = "user auth"
SUMMARY_TYPE_USER_CREATE = "user create"
SUMMARY_TYPE_USER_AUCTIONS = "user auctions"
SUMMARY_TYPE_USER_FOLLOWING = "user following"
SUMMARY_TYPE_AUCTIONS_POPULAR = "auctions popular"
SUMMARY_TYPE_AUCTIONS_RECENT = "auctions recent"
SUMMARY_TYPE_AUCTIONS_CREATE = "auction create"
SUMMARY_TYPE_BID_CREATE = "bid create"
SUMMARY_TYPE_AUCTION_BIDS = "auction bids"
SUMMARY_TYPE_AUCTION_QUESTIONS = "auction questions"
SUMMARY_TYPE_QUESTION_CREATE = "question create"
SUMMARY_TYPE_REPLY_CREATE = "reply create"
SUMMARY_ENDPOINTS = {
    # User Endpoints
    "plugins.metrics-by-endpoint.response_time.POST:/user/auth": [
        SUMMARY_TYPE_READ,
        SUMMARY_TYPE_USER_AUTH,
    ],
    "plugins.metrics-by-endpoint.response_time.POST:/user": [
        SUMMARY_TYPE_WRITE,
        SUMMARY_TYPE_USER_CREATE,
    ],
    "plugins.metrics-by-endpoint.response_time.GET:/user/*/auctions": [
        SUMMARY_TYPE_READ,
        SUMMARY_TYPE_USER_AUCTIONS,
    ],
    "plugins.metrics-by-endpoint.response_time.GET:/user/*/following": [
        SUMMARY_TYPE_READ,
        SUMMARY_TYPE_USER_FOLLOWING,
    ],
    # Auction Endpoints
    "plugins.metrics-by-endpoint.response_time.GET:/auction/any/popular": [
        SUMMARY_TYPE_READ,
        SUMMARY_TYPE_AUCTIONS_POPULAR,
    ],
    "plugins.metrics-by-endpoint.response_time.GET:/auction/any/recent": [
        SUMMARY_TYPE_READ,
        SUMMARY_TYPE_AUCTIONS_RECENT,
    ],
    "plugins.metrics-by-endpoint.response_time.GET:/auction/*/bid": [
        SUMMARY_TYPE_READ,
        SUMMARY_TYPE_AUCTION_BIDS,
    ],
    "plugins.metrics-by-endpoint.response_time.GET:/auction/*/question": [
        SUMMARY_TYPE_READ,
        SUMMARY_TYPE_AUCTION_QUESTIONS,
    ],
    "plugins.metrics-by-endpoint.response_time.POST:/auction": [
        SUMMARY_TYPE_WRITE,
        SUMMARY_TYPE_AUCTIONS_CREATE,
    ],
    "plugins.metrics-by-endpoint.response_time.POST:/auction/*/bid": [
        SUMMARY_TYPE_WRITE,
        SUMMARY_TYPE_BID_CREATE,
    ],
    "plugins.metrics-by-endpoint.response_time.POST:/auction/*/question": [
        SUMMARY_TYPE_WRITE,
        SUMMARY_TYPE_QUESTION_CREATE,
    ],
    "plugins.metrics-by-endpoint.response_time.POST:/auction/*/question/*/reply": [
        SUMMARY_TYPE_WRITE,
        SUMMARY_TYPE_REPLY_CREATE,
    ],
    # Media Endpoints
    "plugins.metrics-by-endpoint.response_time.GET:/media": [SUMMARY_TYPE_READ_BLOB],
    "plugins.metrics-by-endpoint.response_time.GET:/media/*": [SUMMARY_TYPE_READ_BLOB],
    "plugins.metrics-by-endpoint.response_time.POST:/media": [SUMMARY_TYPE_WRITE_BLOB],
}
SUMMARY_IGNORED = ["vusers.session_length"]


def parse_report_dir(name: str) -> tuple[str, str, str]:
    geo, cache, region = name.split("_")
    geo = geo.split("=")[1] == "1"
    cache = cache.split("=")[1] == "1"
    region = region.split("=")[1]
    return geo, cache, region


def read_reports(geo: bool, cache: bool, region: str) -> dict[str, Report]:
    geos = "1" if geo else "0"
    caches = "1" if cache else "0"
    dir = f"reports/geo={geos}_cache={caches}_region={region}"
    reports = {}
    for file in os.listdir(dir):
        with open(f"{dir}/{file}") as f:
            obj = json.load(f)
            aggregate = Metrics.parse_obj(obj["aggregate"])
            intermediate = [Metrics.parse_obj(i) for i in obj["intermediate"]]
            reports[file.removesuffix(".json")] = Report(
                region=region,
                georeplicated=geo,
                cache_enabled=cache,
                aggregate=aggregate,
                intermediate=intermediate,
            )
    return reports


def read_reports2(region: str) -> dict[str, Report]:
    reports = {}
    with open(f"reports/{region}.json") as f:
        obj = json.load(f)
        aggregate = Metrics.parse_obj(obj["aggregate"])
        intermediate = [Metrics.parse_obj(i) for i in obj["intermediate"]]
        reports["workload1"] = Report(
            region=region,
            georeplicated=False,
            cache_enabled=False,
            aggregate=aggregate,
            intermediate=intermediate,
        )
    return reports


def summary_to_series(summary: Summary) -> pd.Series:
    return pd.Series(
        {
            "min": summary.min,
            "max": summary.max,
            "count": summary.count,
            "median": summary.median,
            "p75": summary.p75,
            "p90": summary.p90,
            "p95": summary.p95,
            "p99": summary.p99,
            "p999": summary.p999,
        }
    )


def reports_http_means(reports: Iterable[Report]) -> pd.Series:
    series = [
        summary_to_series(s.aggregate.summaries[SUMMARY_HTTP_RESPONSE_TIME])
        for s in reports
    ]
    return pd.concat(series, axis=1).mean(axis=1)


def reports_summaries_by_type(reports: Iterable[Report]) -> pd.DataFrame:
    def count_for_name(counters: dict[str, int], name: str) -> int:
        counter_partial_name = name.replace(".response_time", "")
        acum = 0
        for counter_name, counter_value in counters.items():
            if counter_partial_name in counter_name and (
                "200" or "204" in counter_name
            ):
                acum += counter_value
        return acum

    summaries_per_type = defaultdict(list)
    weights_per_type = defaultdict(list)
    summary_per_type = {}

    for report in reports:
        counters = report.aggregate.counters
        for name, summary in report.aggregate.summaries.items():
            if name == SUMMARY_HTTP_RESPONSE_TIME or name in SUMMARY_IGNORED:
                continue
            if name not in SUMMARY_ENDPOINTS:
                raise ValueError(f"Unknown summary: {name}")
            summary_types = SUMMARY_ENDPOINTS[name]
            summary_count = count_for_name(counters, name)
            for summary_type in summary_types:
                summaries_per_type[summary_type].append(summary_to_series(summary))
                weights_per_type[summary_type].append(summary_count)

    for summary_type, summaries in summaries_per_type.items():
        weights = weights_per_type[summary_type]
        scaled = [s * w for s, w in zip(summaries, weights)]
        summary_per_type[summary_type] = pd.concat(scaled, axis=1).sum(axis=1) / sum(
            weights
        )

    return pd.DataFrame(summary_per_type)


def reports_summaries_medians_by_type(reports: Iterable[Report]) -> pd.Series:
    return (
        reports_summaries_by_type(reports)
        .drop(index=["min", "max"])
        .transpose()["median"]
    )


def report_summaries_by_type(report: Report) -> pd.DataFrame:
    return reports_summaries_by_type([report])


In [None]:
r1 = {"workload1": read_reports(False, False, "westeurope")["workload1"]}
r2 = {"workload1": read_reports(False, True, "westeurope")["workload1"]}
r3 = read_reports2("europe")
r4 = read_reports2("us")
r5 = read_reports2("local")


In [None]:
c1 = reports_summaries_medians_by_type(r1.values())
c2 = reports_summaries_medians_by_type(r2.values())
c3 = reports_summaries_medians_by_type(r3.values())
c4 = reports_summaries_medians_by_type(r4.values())
c5 = reports_summaries_medians_by_type(r5.values())


def prep_data(d: pd.Series) -> pd.Series:
    return d


latencies = pd.DataFrame(
    {
        "West Europe Cache Disabled": prep_data(c1),
        "West Europe Cache Enabled": prep_data(c2),
        "West Europe Kubernetes": prep_data(c3),
        "Central US Kubernetes": prep_data(c4),
        "Margem Sul": prep_data(c5),
    },
).transpose()

fig, axs = plt.subplots(1, 2, figsize=(16, 6))

labels = latencies.index.values
x = np.arange(len(labels))
colors = [sns.color_palette()[i] for i in range(len(labels))]
yread = latencies["read"].values
ywrite = latencies["write"].values

axs[0].bar(x, yread, label=labels, color=colors)
axs[0].legend(title="Configuration")
axs[0].get_xaxis().set_visible(False)
axs[0].set_ylabel("Average Latency (ms)")
axs[0].set_title("Read Latencies")

axs[1].bar(x, ywrite, label=labels, color=colors)
axs[1].legend(title="Configuration")
axs[1].get_xaxis().set_visible(False)
axs[1].set_ylabel("Average Latency (ms)")
axs[1].set_title("Write Latencies")

fig.tight_layout()
fig.savefig("latencies.pdf", facecolor="white", dpi=600)


In [None]:
latencies.rename(
    {
        "West Europe Cache Disabled": "WECOFF",
        "West Europe Cache Enabled": "WECON",
        "West Europe Kubernetes": "WEUK8S",
        "Central US Kubernetes": "CUSAK8S",
        "Margem Sul": "MS",
    },
).loc[["WEUK8S", "CUSAK8S"]][
    [
        SUMMARY_TYPE_USER_AUTH,
        SUMMARY_TYPE_USER_AUCTIONS,
        SUMMARY_TYPE_USER_FOLLOWING,
        SUMMARY_TYPE_AUCTIONS_POPULAR,
        SUMMARY_TYPE_AUCTIONS_RECENT,
        SUMMARY_TYPE_AUCTION_BIDS,
        SUMMARY_TYPE_AUCTION_QUESTIONS,
    ]
].plot(
    kind="bar", rot=0
)
