In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt

import os

from seaborn_box_width_fix import adjust_box_widths


In [None]:
VALID_ENVIRONMENTS = ["local", "same-pod", "same-cluster"]
ENVIRONMENT_NAMING = {
    "local": "Local",
    "same-pod": "Same Pod",
    "same-cluster": "Same Cluster",
}
HOOK_NAMING = {True: "Hook", False: "No hook"}

RC_CONTEXT = {
    "text.usetex": True,
    "font.family": "serif",
    "font.serif": "Palatino",
    "font.size": 20,
    "savefig.bbox": "tight",
    "savefig.pad_inches": 0.0,
}


def generate_vector(env: str, hook: bool, ncycles: int = 1_000) -> np.ndarray:
    """Generates mocked latency values with warm-up, for a given setup"""

    assert env in VALID_ENVIRONMENTS
    std = {"local": 10, "same-pod": 40, "same-cluster": 70}
    x = np.random.normal(200 + (10 if hook else 0), std[env], ncycles)

    # add exponential decay to the beginning of the vector
    warmup_strength, warmup_ratio = 30, 0.2
    warmup = np.linspace(0, 1, int(ncycles * warmup_ratio))
    warmup = 1.0 + np.exp(-5 * warmup) * (warmup_strength / std[env])
    x[: len(warmup)] *= warmup

    return x


def generate_dataframe() -> pd.DataFrame:
    """Generate a fake dataframe in wide format"""
    df = pd.DataFrame()

    for env in VALID_ENVIRONMENTS:
        for hook in [True, False]:
            vec = generate_vector(env, hook)
            vec_df = pd.DataFrame.from_dict({"time": vec, "env": env, "hook": hook})
            df = pd.concat([df, vec_df])

    return df


def filter_dataframe(df: pd.DataFrame, n: int) -> pd.DataFrame:
    """Per condition, remove the first n values"""
    return df[df.index > n]


@matplotlib.rc_context(RC_CONTEXT)
def plot_boxplots(df: pd.DataFrame):
    sns.set_theme(style="whitegrid", palette="pastel")
    fig = plt.figure(figsize=(5, 4))

    df = df.copy(deep=True)
    df["hook"] = df["hook"].map(HOOK_NAMING)
    df["env"] = df["env"].map(ENVIRONMENT_NAMING)

    ax = sns.boxplot(
        data=df,
        x="env",
        y="time",
        hue="hook",
        palette=["#a6cee3", "#b2df8a"],
        width=0.75,
        showcaps=True,
        showfliers=False,
        linewidth=0.5,
    )

    sns.despine(offset=10, trim=False)
    ax.set_ylabel("Latency (ms)")
    ax.set_xlabel("")
    ax.legend(loc="upper left", frameon=True, fancybox=False, ncol=2)
    ax.yaxis.grid(True, ls=(0, (1, 3)), which="major", color="grey", alpha=0.25, lw=1)

    adjust_box_widths(fig, 0.8)

    plt.tight_layout()
    plt.savefig("boxplots.pdf")
    plt.show()


@matplotlib.rc_context(RC_CONTEXT)
def plot_lagplots(df: pd.DataFrame, cutoff: int):
    sns.set_theme(style="whitegrid", palette="pastel")
    plt.figure(figsize=(5, 2.5))

    df = df.query("hook == True")
    df = df.query("index < @cutoff")
    df = df.copy(deep=True)
    df.reset_index(inplace=True)
    df["hook"] = df["hook"].map(HOOK_NAMING)
    df["env"] = df["env"].map(ENVIRONMENT_NAMING)

    ax = sns.lineplot(
        data=df,
        y="time",
        x="index",
        hue="env",
        linewidth=0.75,
        palette=["#66c2a5", "#fc8d62", "#8da0cb"],
    )
    
    sns.despine(offset=10, trim=False)
    ax.set_ylabel("Latency (ms)")
    ax.set_xlabel("Iterations")
    ax.legend(loc="upper right", frameon=True, fancybox=False, ncol=1)
    ax.yaxis.grid(True, ls=(0, (1, 3)), which="major", color="grey", alpha=0.25, lw=1)
    ax.xaxis.grid(False)

    plt.tight_layout()
    plt.savefig("lagplots.pdf")
    plt.show()


df = generate_dataframe()
plot_boxplots(filter_dataframe(df, 300))
plot_lagplots(df, cutoff=200)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import os

benchmark_path = "../../../bin/persistentVolume/benchmark_exports"
# current_path = "aws/04.07._time-13-40"
current_path = ""

# path: folder containing csv files
path = os.path.join(benchmark_path, current_path) 
files_in_directories = os.listdir(path)

# csv_files format e.g., = ['benchmark_1691149092971.csv', 'benchmark_1691149143814.csv', 'benchmark_1691149189006.csv', 'benchmark_1691149237747.csv', 'benchmark_1691149278736.csv']
csv_files = [file for file in files_in_directories if file.endswith(".csv")]

raw_data = []
for file in csv_files:
    file_data = pd.read_csv(os.path.join(path, file), sep=',', header=1).values
    print("Length: " + str(len(file_data)) + "; Name: " + file)
    raw_data.append(file_data)

data_matrix = []
for data in raw_data:
    #     print(data)
    second_colume_data = [column[1] for column in data]
    testData = second_colume_data


    data_matrix.append(testData)

    q = np.quantile(testData, 0.95)
    x = [x for x in testData if x <= q]
    print(np.std(x))

plt.boxplot(data_matrix, showfliers=False)
plt.show()

In [None]:
import os
import sys
print ("test")
os.path.dirname(sys.executable)
print (os.path.dirname(sys.executable))