In [None]:
import pandas as pd
import seaborn as sb
import math
import numpy as np

import matplotlib.pyplot as plt

sb.set_palette("tab10", n_colors=3)
# sb.set_palette("mako", n_colors=3)


In [None]:
df = pd.read_csv("all.csv")

df = df.sort_values(by=["client_protocol", "remote_protocol"])

df.head(20)

In [None]:
# calculate derived data
df["init_failure_rate"] = df["init_failures"] / df["init_count"] * 100
df["log_method_failure_rate"] = np.log10(df["method_call_failures"] / df["method_call_count"])
df["log_non_idempotent_mismatch_rate"] = np.log10(df["non_idempotent_mismatches"] / df["non_idempotent_calls"])


# factor difference between simulated failure rate and observed failure rate
# since we are dealing in log scales, we subtract
df["method_failure_factor"] = df["inverse_failure_probability"] - df["log_method_failure_rate"]

# calc axes variables
df["log_inverse_failure_probability"] = np.log10(df["inverse_failure_probability"])

df["log_inverse_failure_probability"].fillna(10, inplace=True)
df["log_method_failure_rate"].fillna(-6, inplace=True)
df["log_method_failure_rate"].replace([np.inf, -np.inf], -6, inplace=True)
df["log_non_idempotent_mismatch_rate"].fillna(-6, inplace=True)
df["log_non_idempotent_mismatch_rate"].replace([np.inf, -np.inf], -6, inplace=True)


In [None]:
# client_remote
# segmenting the dataset into 4 partitions

not_faulty= "^((?!Faulty).)*$"

same_proto: pd.DataFrame = df[df["client_protocol"] == df["remote_protocol"]]
diff_proto: pd.DataFrame = df[df["client_protocol"] != df["remote_protocol"]]

ok_ok = same_proto[df["client_protocol"].str.contains(not_faulty)]
err_err = same_proto[df["client_protocol"].str.startswith("Faulty")]

ok_err = diff_proto[df["client_protocol"].str.contains(not_faulty)]
err_ok = diff_proto[df["client_protocol"].str.startswith("Faulty")]


ok_err

avg = ok_ok.groupby(["client_protocol", "remote_protocol"]).mean().reset_index()
avg_data = avg.reset_index()

cols = ["log_method_failure_rate", "log_inverse_failure_probability", "log_non_idempotent_mismatch_rate"]
identifiers = ["client_protocol", "remote_protocol"]

# stuff below this line in this cell are not used
# ok_ok_comp = ok_ok

# ok_ok_comp[ok_ok_comp["client_protocol"].str.contains("Default")] = \
#     ok_ok_comp[ok_ok_comp["client_protocol"].str.contains("Default")] \
#     - avg_data.iloc[0, :][cols]


# ok_ok_comp

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(10, 10), sharey="row")
# all subplots share the same axes labels, remove the labels from all but the bottom left
for ax in axes[0, :]:
    ax.set_xlabel("")

data = [
    [ok_ok, ok_err],
    [err_ok, err_err]
]

titles = [
    ["Control", "Server-side failure"],
    ["Client-side failure", "Twin failure"]
]

for row in range(2):
    for col in range(2):

        axes[row, col].set_title(titles[row][col])
        axes[row, col].yaxis.grid()

        sb.swarmplot(
            data=data[row][col],
            x="log_inverse_failure_probability",
            y="log_method_failure_rate",
            ax=axes[row, col],
            hue="client_protocol",
            dodge=True,
            # legend=False
        )

        # create a global legend for the entire figure based on the first subplot
        if row == 0 and col == 0:
            handles, labels = axes[row, col].get_legend_handles_labels()
            fig.legend(handles, labels, loc="upper center", ncol=3)

            # plot log avg value for control
            n_ax = sb.swarmplot(
                data=avg_data,
                x="log_inverse_failure_probability",
                y="log_method_failure_rate",
                ax=axes[row, col],
                hue="client_protocol",
                legend=False,
                size=8,
                dodge=True,
            )

        if row == 0:
            axes[row, col].set_xlabel("")

        axes[row, col].legend().set_visible(False)


In [None]:
fig.savefig("plot.svg")


In [None]:
fig, axes = plt.subplots(2, 2, figsize=(10, 10))
# all subplots share the same axes labels, remove the labels from all but the bottom left
for ax in axes[0, :]:
    ax.set_xlabel("")

data = [
    [ok_ok, ok_err],
    [err_ok, err_err]
]

titles = [
    ["Control", "Server-side failure"],
    ["Client-side failure", "Twin failure"]
]

for row in range(2):
    for col in range(2):

        axes[row, col].set_title(titles[row][col])
        axes[row, col].yaxis.grid()

        sb.swarmplot(
            data=data[row][col],
            x="log_inverse_failure_probability",
            y="log_non_idempotent_mismatch_rate",
            ax=axes[row, col],
            hue="client_protocol",
            dodge=True,
            # legend=False
        )

        # create a global legend for the entire figure based on the first subplot
        if row == 0 and col == 0:
            handles, labels = axes[row, col].get_legend_handles_labels()
            fig.legend(handles, labels, loc="upper center", ncol=3)

        if row == 0:
            axes[row, col].set_xlabel("")
        if col == 1:
            axes[row, col].set_ylabel("")

        axes[row, col].legend().set_visible(False)


In [None]:
fig.savefig("idem.svg")
