In [None]:
%pip install -r requirements.txt

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np
from datetime import datetime, timedelta
import statsmodels.api as sm
import seaborn as sns
from ruptures import Pelt
from ruptures.metrics import hausdorff


In [2]:
index_kurento_names = [
    "loadtest-webrtc-final-kurento-2p-t3medium",
    "loadtest-webrtc-final-kurento-2p-t3medium-2",
    "loadtest-webrtc-final-kurento-2p-t3medium-wait",
    "loadtest-webrtc-final-kurento-2p-t3medium-retry",
    "loadtest-webrtc-final-kurento-5p-t3medium",
    "loadtest-webrtc-final-kurento-5p-t3medium-2",
    "loadtest-webrtc-final-kurento-5p-t3medium-wait",
    "loadtest-webrtc-final-kurento-5p-t3medium-retry",
    "loadtest-webrtc-final-kurento-8p-t3medium",
    "loadtest-webrtc-final-kurento-8p-t3medium-2",
    "loadtest-webrtc-final-kurento-8p-t3medium-wait",
    "loadtest-webrtc-final-kurento-8p-t3medium-retry",
    "loadtest-webrtc-final-kurento-3p-10s-t3medium",
    "loadtest-webrtc-final-kurento-3p-10s-t3medium-2",
    "loadtest-webrtc-final-kurento-3p-10s-t3medium-wait",
    "loadtest-webrtc-final-kurento-3p-10s-t3medium-retry",
    "loadtest-webrtc-final-kurento-3p-20s-t3medium",
    "loadtest-webrtc-final-kurento-3p-20s-t3medium-2",
    "loadtest-webrtc-final-kurento-3p-20s-t3medium-wait",
    "loadtest-webrtc-final-kurento-3p-20s-t3medium-retry",
    "loadtest-webrtc-final-kurento-3p-40s-t3medium",
    "loadtest-webrtc-final-kurento-3p-40s-t3medium-2",
    "loadtest-webrtc-final-kurento-3p-40s-t3medium-wait",
    "loadtest-webrtc-final-kurento-3p-40s-t3medium-retry",
]

index_mediasoup_names = [
    "loadtest-webrtc-final-mediasoup-2p-t3medium",
    "loadtest-webrtc-final-mediasoup-2p-t3medium-2",
    "loadtest-webrtc-final-mediasoup-2p-t3medium-wait",
    "loadtest-webrtc-final-mediasoup-2p-t3medium-retry",
    "loadtest-webrtc-final-mediasoup-5p-t3medium",
    "loadtest-webrtc-final-mediasoup-5p-t3medium-2",
    "loadtest-webrtc-final-mediasoup-5p-t3medium-wait",
    "loadtest-webrtc-final-mediasoup-5p-t3medium-retry",
    "loadtest-webrtc-final-mediasoup-8p-t3medium",
    "loadtest-webrtc-final-mediasoup-8p-t3medium-2",
    "loadtest-webrtc-final-mediasoup-8p-t3medium-wait",
    "loadtest-webrtc-final-mediasoup-8p-t3medium-retry",
    "loadtest-webrtc-final-mediasoup-3p-10s-t3medium",
    "loadtest-webrtc-final-mediasoup-3p-10s-t3medium-2",
    "loadtest-webrtc-final-mediasoup-3p-10s-t3medium-wait",
    "loadtest-webrtc-final-mediasoup-3p-10s-t3medium-retry",
    "loadtest-webrtc-final-mediasoup-3p-20s-t3medium",
    "loadtest-webrtc-final-mediasoup-3p-20s-t3medium-2",
    "loadtest-webrtc-final-mediasoup-3p-20s-t3medium-wait",
    "loadtest-webrtc-final-mediasoup-3p-20s-t3medium-retry",
    "loadtest-webrtc-final-mediasoup-3p-40s-t3medium",
    "loadtest-webrtc-final-mediasoup-3p-40s-t3medium-2",
    "loadtest-webrtc-final-mediasoup-3p-40s-t3medium-wait",
    "loadtest-webrtc-final-mediasoup-3p-40s-t3medium-retry",
]

index_list_names = index_kurento_names + index_mediasoup_names

node_types = ["browseremulator", "masternode", "medianode"]
packet_types = ["inbound", "outbound"]

seconds_per_fragment = 17

start_end_times = pd.read_json("dfs_final/start-end-times.json", orient="index")
start_end_times["from"] = pd.to_datetime(
    start_end_times["from"], format="ISO8601"
).dt.tz_convert("UTC")
start_end_times["to"] = pd.to_datetime(
    start_end_times["to"], format="ISO8601"
).dt.tz_convert("UTC")


def timestamp_to_secs(df_node, index, cpu_times=True):
    df_tmp = df_node.copy()
    df_tmp["@timestamp"] = pd.to_datetime(df_tmp["@timestamp"], format="ISO8601")
    tmp_serie = pd.Series(
        [df_tmp["@timestamp"].max(), start_end_times.loc[index, "to"]]
    )
    end_time = tmp_serie.min() if cpu_times else tmp_serie.max()
    df_tmp = df_tmp[df_tmp["@timestamp"] < end_time]
    tmp_serie = pd.Series(
        [df_tmp["@timestamp"].min(), start_end_times.loc[index, "from"]]
    )
    start_time = tmp_serie.max() if cpu_times else tmp_serie.min()
    df_tmp["timestamp_secs"] = (df_tmp["@timestamp"] - start_time).dt.total_seconds()
    return df_tmp

In [3]:
df_list = [
    pd.read_csv(f"dfs_final/{x}.csv")
    if os.path.isfile(f"dfs_final/{x}.csv")
    else pd.DataFrame()
    for x in index_list_names
]


for i, df_user in enumerate(df_list):
    if not df_user.empty:
        index = index_list_names[i]
        start_test_time = start_end_times.loc[index, "from"]

        df_user["@timestamp"] = pd.to_datetime(df_user["@timestamp"], format="ISO8601")
        df_cut_index_0 = df_user[df_user["cut_index"] == 0]
        for index, row in df_cut_index_0.iterrows():
            start_user_qoe_time = row["@timestamp"]
            time_diff = start_user_qoe_time - start_test_time
            userFrom = row["userFrom"]
            userTo = row["userTo"]
            session = row["session"]

            df_user.loc[
                (df_user["userFrom"] == userFrom)
                & (df_user["userTo"] == userTo)
                & (df_user["session"] == session),
                "cut_index",
            ] = (
                df_user.loc[
                    (df_user["userFrom"] == userFrom)
                    & (df_user["userTo"] == userTo)
                    & (df_user["session"] == session),
                    "cut_index",
                ]
                * seconds_per_fragment
                + time_diff.seconds
            )

index_data = []


data_types = node_types + packet_types


for i, df_user in enumerate(df_list):
    index = index_list_names[i]

    splitted = index.split("-")

    publishers = int(splitted[4][:-1])

    subscribers = 0

    subscribers_parsed = splitted[5]

    if subscribers_parsed[-1] == "s":
        subscribers = int(subscribers_parsed[:-1])

    users = publishers + subscribers

    repeat = 1

    type = splitted[-1]

    if type not in data_types:
        if type == "t3medium":
            repeat = "1"

        else:
            repeat = type

        index_data.append(
            {
                "type": repeat,
                "publishers": publishers,
                "subscribers": subscribers,
                "users": users,
            }
        )

In [None]:
data = []
for i, df_tmp in enumerate(df_list):
    if not df_tmp.empty:
        average = df_tmp["vmaf"].mean()
        median = df_tmp["vmaf"].median()
        min = df_tmp["vmaf"].min()
        max = df_tmp["vmaf"].max()
        std = df_tmp["vmaf"].std()
        data.append([index_list_names[i], average, median, std, min, max])

pd.DataFrame(data, columns=["index_type", "average", "median", "std", "min", "max"])

In [4]:
# INDIVIDUAL PLOTS PARAMETERS

# Index of index_list to use
index = 45
# If there is only one metric in display, choose which one
metric = "vmaf"

show_full_range = True

# Calculations
index_name = index_list_names[index]
metric_label = metric.upper()
qoe_metrics_normalized = ["vmaf", "msssim", "ssim", "vifp", "pesq", "visqol"]
qoe_metrics_not_normalized = ["psnr", "psnrhvs", "psnrhvsm"]
metrics = qoe_metrics_normalized + qoe_metrics_not_normalized

In [None]:
plt.rcParams["figure.figsize"] = [25, 10]
colors = ["blue", "orange", "green", "red"]
k = 0

for metric in qoe_metrics_normalized:
    for i, df_tmp in enumerate(df_list):
        if not df_tmp.empty:
            if k == 0:
                fig, ax = plt.subplots()
            df_mean = df_tmp.groupby(["cut_index"]).mean(numeric_only=True)
            plot_name = index_list_names[i]
            if not df_mean.empty:
                ax.plot(df_mean.index, df_mean[metric], label=plot_name)
                ax.axvline(
                    x=df_mean.index.max(),
                    linestyle="--",
                    alpha=0.7,
                )
                ax.axvline(
                    x=df_mean.index.min(),
                    linestyle="--",
                    alpha=0.7,
                )

            if k == 3:
                ax.set_xlabel("timestamp (seconds)")
                ax.set_ylabel(metric)
                ax.grid()
                ax.set_ylim(-0.5, 1.05)
                ax.set_yticks(np.arange(-0.5, 1.05, 0.05))
                fig.legend(loc="lower left")
                fig.suptitle(f"{metric} over time (mean, {plot_name})")
                fig.savefig(f"images_final/{plot_name}_{metric}.png")
            k = (k + 1) % 4

In [None]:
plt.rcParams["figure.figsize"] = [25, 10]
typologies = ["2p", "5p", "8p", "3p-10s", "3p-20s", "3p-40s"]
media_nodes = ["kurento", "mediasoup"]
colors = ["blue", "orange", "green", "red"]
for typology in typologies:
    for media_node in media_nodes:
        fig, ax = plt.subplots()
        target_names = filter(
            lambda x: typology in x and media_node in x, index_list_names
        )

        max_x = -1
        for i, target_name in enumerate(target_names):
            df_node = pd.read_csv(f"dfs_final/{target_name}-medianode.csv")
            # remove entries with cpu < 0.001
            # df_node = df_node[df_node["cpu"] > 0.001]
            df_node = timestamp_to_secs(df_node, target_name)
            df_node = df_node.drop(columns=["@timestamp", "memory"]).dropna()
            ax.plot(
                df_node["timestamp_secs"],
                df_node["cpu"],
                color=colors[i],
                label=target_name,
            )
            plt.axvline(
                x=df_node["timestamp_secs"].max(),
                color=colors[i],
                linestyle="--",
                alpha=0.7,
                label="End of test " + target_name,
            )
            max_x = np.max(np.array([max_x, df_node["timestamp_secs"].max()]))

        ax.set_xlabel("timestamp (seconds)")
        ax.set_ylabel("CPU usage (%)")
        ax.grid()
        ax.set_ylim(0, 1.05)
        ax.set_yticks(np.arange(0, 1.05, 0.05))
        # ax.set_xticks(np.arange(0, max_x, 120))

        fig.suptitle(f"CPU usage over time ({typology}, {media_node})")
        fig.legend(loc="right")
        fig.savefig(f"images_final/{typology}_{media_node}_cpu.png")

In [None]:
plt.rcParams["figure.figsize"] = [25, 10]
fig, ax = plt.subplots()

webrtc_stats_y = ax.twinx()

aggregation_map = {"@timestamp": "first"}
for metric in qoe_metrics_normalized:
    aggregation_map[metric] = "mean"

df_tmp = df_list[index].groupby("cut_index").agg(aggregation_map)
df_tmp = timestamp_to_secs(df_tmp, index_name)
df_node = pd.read_csv(f"dfs_final/{index_name}-medianode.csv")
df_node = timestamp_to_secs(df_node, index_name)
df = df_tmp

df_node_cpu = df_node.drop(columns=["memory"]).dropna()
df_node_memory = df_node.drop(columns=["cpu"]).dropna()

df_stats_inbound = pd.read_csv(f"dfs_final/{index_name}-webrtc-stats-inbound.csv")
df_stats_outbound = pd.read_csv(f"dfs_final/{index_name}-webrtc-stats-outbound.csv")
df_stats_inbound["@timestamp"] = pd.to_datetime(
    df_stats_inbound["@timestamp"], format="ISO8601"
)
df_stats_outbound["@timestamp"] = pd.to_datetime(
    df_stats_outbound["@timestamp"], format="ISO8601"
)
df_stats_inbound = df_stats_inbound.drop(columns="user_id")
df_stats_outbound = df_stats_outbound.drop(columns="user_id")
df_stats_inbound = df_stats_inbound.groupby("@timestamp").mean()
df_stats_outbound = df_stats_outbound.groupby("@timestamp").mean()

df_stats_inbound = timestamp_to_secs(df_stats_inbound, index_name)
df_stats_outbound = timestamp_to_secs(df_stats_outbound, index_name)

qoe_zorder = 10
resource_zorder = 0

qoe_linewidth = 3
resource_linewidth = 1

legend_handles = []
for metric in qoe_metrics_normalized:
    legend_handles.append(
        ax.plot(
            df["@timestamp"],
            df[metric],
            label=metric,
            marker="o",
            zorder=qoe_zorder,
            lw=qoe_linewidth,
        )
    )
legend_handles.append(
    ax.plot(
        df_node_cpu["@timestamp"],
        df_node_cpu["cpu"],
        "g",
        label="cpu",
        zorder=resource_zorder,
        lw=resource_linewidth,
        linestyle="--",
    )
)
legend_handles.append(
    ax.plot(
        df_node_memory["@timestamp"],
        df_node_memory["memory"],
        "c",
        label="memory",
        zorder=resource_zorder,
        lw=resource_linewidth,
        linestyle="--",
    )
)


for column in df_stats_inbound.columns[1:]:
    # Maybe readd gpSum?
    if (
        not "bytesSent" in column
        and not "packetsSent" in column
        and not "qpSum" in column
        and not "framesEncoded" in column
    ):
        legend_handles.append(
            webrtc_stats_y.plot(
                df_stats_inbound.index, df_stats_inbound[column], label=column
            )
        )

ax.set_title(f"QOE metric (normalized) over time (mean, worker data, {index_name})")
ax.set_xlabel("timestamp (day hour:minute)")
ax.set_ylabel("QOE metric (normalized), CPU %, Memory %")
webrtc_stats_y.set_ylabel("WebRTC stats")
ax.grid()
ax.legend(loc="upper left")
webrtc_stats_y.legend(loc="center left")

In [None]:
def ols(dir, x_label, df):
    os.makedirs(dir, exist_ok=True)
    X = df[x_label]


    X = sm.add_constant(X)  # adding a constant
    for metric in metrics:
        Y = df[metric]

        model = sm.OLS(Y, X).fit()
        summary = model.summary()

        with open(f"{dir}/{metric}-summary.tex", "w") as fh:
            fh.write(summary.as_latex())

        with open(f"{dir}/{metric}-summary.html", "w") as fh:
            fh.write(summary.as_html())

        with open(f"{dir}/{metric}.txt", "w") as fh:
            fh.write(summary.as_text())

In [None]:
df_tmp = pd.concat(df_list, ignore_index=True)
ols("results_ols/results_all", "user_count", df_tmp)

df_tmp = pd.concat(df_list[:24], ignore_index=True)
ols("results_ols/results_kurento", "user_count", df_tmp)

df_tmp = pd.concat(df_list[24:], ignore_index=True)
ols("results_ols/results_mediasoup", "user_count", df_tmp)

In [None]:
df_merged_list = []

df_webrtc_inbound_merged_list = []
df_webrtc_outbound_merged_list = []

for i, df_tmp in enumerate(df_list):

    index_name = index_list_names[i]

    df_node = pd.read_csv(f"dfs_final/{index_name}-medianode.csv")

    df_node = df_node.drop(columns=["memory"]).dropna()

    df_tmp = timestamp_to_secs(df_tmp, index_name, False)

    df_node = timestamp_to_secs(df_node, index_name, False)

    df_tmp["timestamp_secs"] = df_tmp["timestamp_secs"].round(-1)

    df_node["timestamp_secs"] = df_node["timestamp_secs"].round(-1)

    # df_tmp = df_tmp.groupby(["timestamp_secs"]).mean(numeric_only=True)


    merged_df = pd.merge(df_tmp, df_node, on="timestamp_secs", how="inner")


    df_merged_list.append(merged_df)

    df_webrtc_inbound = pd.read_csv(f"dfs_final/{index_name}-webrtc-stats-inbound.csv")

    df_webrtc_inbound = timestamp_to_secs(df_webrtc_inbound, index_name, False)
    df_webrtc_inbound["timestamp_secs"] = df_webrtc_inbound["timestamp_secs"].round(-1)

    merged_inbound_df = pd.merge(
        df_tmp, df_webrtc_inbound, on="timestamp_secs", how="inner"
    )

    df_webrtc_inbound_merged_list.append(merged_inbound_df)

    df_webrtc_outbound = pd.read_csv(
        f"dfs_final/{index_name}-webrtc-stats-outbound.csv"
    )

    df_webrtc_outbound = timestamp_to_secs(df_webrtc_outbound, index_name, False)
    df_webrtc_outbound["timestamp_secs"] = df_webrtc_outbound["timestamp_secs"].round(
        -1
    )

    merged_outbound_df = pd.merge(
        df_tmp, df_webrtc_outbound, on="timestamp_secs", how="inner"
    )

    df_webrtc_outbound_merged_list.append(merged_outbound_df)

In [None]:
df_tmp = pd.concat(df_merged_list, ignore_index=True)
ols("results_ols/results_all_cpu", "cpu", df_tmp)

df_tmp = pd.concat(df_merged_list[:24], ignore_index=True)
ols("results_ols/results_kurento_cpu", "cpu", df_tmp)

df_tmp = pd.concat(df_merged_list[24:], ignore_index=True)
ols("results_ols/results_mediasoup_cpu", "cpu", df_tmp)

In [None]:
df_tmp = pd.concat(
    [
        df_list[0],
        df_list[1],
        df_list[4],
        df_list[5],
        df_list[8],
        df_list[9],
        df_list[12],
        df_list[13],
        df_list[16],
        df_list[17],
        df_list[20],
        df_list[21],
        df_list[24],
        df_list[25],
        df_list[28],
        df_list[29],
        df_list[32],
        df_list[33],
        df_list[36],
        df_list[37],
        df_list[40],
        df_list[41],
        df_list[44],
        df_list[45],
    ],
    ignore_index=True,
)
ols("results_ols/results_default_all", "user_count", df_tmp)

df_tmp = pd.concat(
    [
        df_list[2],
        df_list[6],
        df_list[10],
        df_list[14],
        df_list[18],
        df_list[22],
        df_list[26],
        df_list[30],
        df_list[34],
        df_list[38],
        df_list[42],
        df_list[46],
    ],
    ignore_index=True,
)
ols("results_ols/results_wait_all", "user_count", df_tmp)

df_tmp = pd.concat(
    [
        df_list[3],
        df_list[7],
        df_list[11],
        df_list[15],
        df_list[19],
        df_list[23],
        df_list[27],
        df_list[31],
        df_list[35],
        df_list[39],
        df_list[43],
        df_list[47],
    ],
    ignore_index=True,
)
ols("results_ols/results_retry_all", "user_count", df_tmp)

df_tmp = pd.concat(
    [    
        df_list[0],
        df_list[1],
        df_list[4],
        df_list[5],
        df_list[8],
        df_list[9],
        df_list[12],
        df_list[13],
        df_list[16],
        df_list[17],
        df_list[20],
        df_list[21],
        df_list[24]
    ],
    ignore_index=True,
)
ols("results_ols/results_default_kurento", "user_count", df_tmp)

df_tmp = pd.concat(
    [
        df_list[2],
        df_list[6],
        df_list[10],
        df_list[14],
        df_list[18],
        df_list[22],
    ],
    ignore_index=True,
)
ols("results_ols/results_wait_kurento", "user_count", df_tmp)

df_tmp = pd.concat(
    [
        df_list[3],
        df_list[7],
        df_list[11],
        df_list[15],
        df_list[19],
        df_list[23],
    ],
    ignore_index=True,
)
ols("results_ols/results_retry_kurento", "user_count", df_tmp)

df_tmp = pd.concat(
    [
        df_list[24],
        df_list[25],
        df_list[28],
        df_list[29],
        df_list[32],
        df_list[33],
        df_list[36],
        df_list[37],
        df_list[40],
        df_list[41],
        df_list[44],
        df_list[45],
    ],
    ignore_index=True,
)
ols("results_ols/results_default_mediasoup", "user_count", df_tmp)

df_tmp = pd.concat(
    [
        df_list[26],
        df_list[30],
        df_list[34],
        df_list[38],
        df_list[42],
        df_list[46],
    ],
    ignore_index=True,
)
ols("results_ols/results_wait_mediasoup", "user_count", df_tmp)

df_tmp = pd.concat(
    [
        df_list[27],
        df_list[31],
        df_list[35],
        df_list[39],
        df_list[43],
        df_list[47],
    ],
    ignore_index=True,
)
ols("results_ols/results_retry_mediasoup", "user_count", df_tmp)


In [None]:
df_tmp = pd.concat(
    [
        df_merged_list[0],
        df_merged_list[1],
        df_merged_list[4],
        df_merged_list[5],
        df_merged_list[8],
        df_merged_list[9],
        df_merged_list[12],
        df_merged_list[13],
        df_merged_list[16],
        df_merged_list[17],
        df_merged_list[20],
        df_merged_list[21],
        df_merged_list[24],
        df_merged_list[25],
        df_merged_list[28],
        df_merged_list[29],
        df_merged_list[32],
        df_merged_list[33],
        df_merged_list[36],
        df_merged_list[37],
        df_merged_list[40],
        df_merged_list[41],
        df_merged_list[44],
        df_merged_list[45],
    ],
    ignore_index=True,
)
ols("results_ols/results_default_all_cpu", "cpu", df_tmp)

df_tmp = pd.concat(
    [
        df_merged_list[2],
        df_merged_list[6],
        df_merged_list[10],
        df_merged_list[14],
        df_merged_list[18],
        df_merged_list[22],
        df_merged_list[26],
        df_merged_list[30],
        df_merged_list[34],
        df_merged_list[38],
        df_merged_list[42],
        df_merged_list[46],
    ],
    ignore_index=True,
)
ols("results_ols/results_wait_all_cpu", "cpu", df_tmp)

df_tmp = pd.concat(
    [
        df_merged_list[3],
        df_merged_list[7],
        df_merged_list[11],
        df_merged_list[15],
        df_merged_list[19],
        df_merged_list[23],
        df_merged_list[27],
        df_merged_list[31],
        df_merged_list[35],
        df_merged_list[39],
        df_merged_list[43],
        df_merged_list[47],
    ],
    ignore_index=True,
)
ols("results_ols/results_retry_all_cpu", "cpu", df_tmp)

df_tmp = pd.concat(
    [    
        df_merged_list[0],
        df_merged_list[1],
        df_merged_list[4],
        df_merged_list[5],
        df_merged_list[8],
        df_merged_list[9],
        df_merged_list[12],
        df_merged_list[13],
        df_merged_list[16],
        df_merged_list[17],
        df_merged_list[20],
        df_merged_list[21],
        df_merged_list[24]
    ],
    ignore_index=True,
)
ols("results_ols/results_default_kurento_cpu", "cpu", df_tmp)

df_tmp = pd.concat(
    [
        df_merged_list[2],
        df_merged_list[6],
        df_merged_list[10],
        df_merged_list[14],
        df_merged_list[18],
        df_merged_list[22],
    ],
    ignore_index=True,
)
ols("results_ols/results_wait_kurento_cpu", "cpu", df_tmp)

df_tmp = pd.concat(
    [
        df_merged_list[3],
        df_merged_list[7],
        df_merged_list[11],
        df_merged_list[15],
        df_merged_list[19],
        df_merged_list[23],
    ],
    ignore_index=True,
)
ols("results_ols/results_retry_kurento_cpu", "cpu", df_tmp)

df_tmp = pd.concat(
    [
        df_merged_list[24],
        df_merged_list[25],
        df_merged_list[28],
        df_merged_list[29],
        df_merged_list[32],
        df_merged_list[33],
        df_merged_list[36],
        df_merged_list[37],
        df_merged_list[40],
        df_merged_list[41],
        df_merged_list[44],
        df_merged_list[45],
    ],
    ignore_index=True,
)
ols("results_ols/results_default_mediasoup_cpu", "cpu", df_tmp)

df_tmp = pd.concat(
    [
        df_merged_list[26],
        df_merged_list[30],
        df_merged_list[34],
        df_merged_list[38],
        df_merged_list[42],
        df_merged_list[46],
    ],
    ignore_index=True,
)
ols("results_ols/results_wait_mediasoup_cpu", "cpu", df_tmp)

df_tmp = pd.concat(
    [
        df_merged_list[27],
        df_merged_list[31],
        df_merged_list[35],
        df_merged_list[39],
        df_merged_list[43],
        df_merged_list[47],
    ],
    ignore_index=True,
)
ols("results_ols/results_retry_mediasoup_cpu", "cpu", df_tmp)


In [None]:
for i, df_tmp in enumerate(df_list):
    index_name = index_list_names[i]
    try:
        ols(f"results_ols/indexes/{index_name}", "user_count", df_tmp)
    except:
        print(f"Failed for {index_name}")

for i, df_tmp in enumerate(df_merged_list):
    index_name = index_list_names[i]
    try:
        ols(f"results_ols/indexes_cpu/{index_name}", "cpu", df_tmp)
    except:
        print(f"Failed (cpu) for {index_name}")

In [None]:
from io import StringIO
from IPython.display import display, HTML

dir = "test/"

os.makedirs(dir, exist_ok=True)

#df = pd.concat(df_merged_list, ignore_index=True)

i = 24
df = df_merged_list[i]

df = df[df["cpu"] < 0.95]

X = df["user_count"]

metric = "cpu"

X = sm.add_constant(X)  # adding a constant
Y = df[metric]

model = sm.OLS(Y, X).fit()
summary = model.summary()

# with open(f"{dir}/{metric}-summary.tex", "w") as fh:
#     fh.write(summary.as_latex())

# with open(f"{dir}/{metric}-summary.html", "w") as fh:
#     fh.write(summary.as_html())

# with open(f"{dir}/{metric}.txt", "w") as fh:
#     fh.write(summary.as_text())
print(index_list_names[i])
display(pd.read_html(StringIO(summary.tables[1].as_html()), header=0, index_col=0)[0])

In [None]:
for i in range(0,48):
    index_name = index_list_names[i]
    print(index_name)
    df = df_list[i]
    os.makedirs(f"images_final/sns/{index_name}", exist_ok=True)
    #df_merged = df_merged_list[i]


    # plot x cpu y vmaf 
    # fig, ax = plt.subplots()
    # sns.lineplot(data=df_merged, x="cpu", y="vmaf", ax=ax)
    df = df.sort_values(by=["user_count"])
    for metric in metrics:
        fig, ax = plt.subplots()
        if metric in qoe_metrics_normalized:
            ax.set_ylim(0, 1.05)
        ax.set_xlim(0, df["user_count"].max() + 1)
        ax.set_yticks(np.arange(0, 1.05, 0.1))
        sns.lineplot(data=df, x="user_count", y=metric, ax=ax)
        plt.grid()
        plt.savefig(f"images_final/sns/{index_name}/{metric}.png")


# algo = Pelt(model="rbf").fit(df["vmaf"].values)
# result = algo.predict(pen=10)
# for breakpoint_idx in result:
#     breakpoint_value = df["user_count"].iloc[breakpoint_idx]
#     ax.scatter(breakpoint_value, df["vmaf"].iloc[breakpoint_idx], color='red', marker='o', s=100)

# #vertical line

# ax.axvline(x=result[0], linestyle="--", alpha=0.7, label="Change point")

In [None]:
for i in range(0,48):
    print(index_list_names[i])
    # TODO: paint the webrtc stats results

In [28]:

pd.set_option('display.max_colwidth', None)
import math
minio_items = pd.read_csv("dfs_final/minio_items.csv")


# Initialize columns
minio_items["sessions"] = 0
minio_items["videos per session"] = 0  # or any default value you want to set initially

minio_items = minio_items.value_counts(["index", "error"]).reset_index().sort_values(by=["index", "error"])

for index in minio_items["index"]:
    index_name = index_list_names.index(index)
    df = df_list[index_name]

    max_user_count = df["user_count"].max().astype(int)
    publishers = int(index.split("-")[4][:-1])
    subscribers = int(index.split("-")[5][:-1]) if index.split("-")[5].endswith("s") else 0

    # Create a mask for the current index
    index_mask = minio_items["index"] == index

    # Update columns using vectorized operations
    minio_items.loc[index_mask, "user_count"] = max_user_count
    minio_items.loc[index_mask, "sessions"] = minio_items.loc[index_mask, "user_count"].div(publishers + subscribers).apply(math.ceil)
    minio_items.loc[index_mask, "videos per session"] = publishers * (publishers - 1) + subscribers * publishers
    minio_items.loc[index_mask, "total possible videos"] = minio_items.loc[index_mask, "sessions"] * minio_items.loc[index_mask, "videos per session"]

minio_items = minio_items[["index", "error", "total possible videos", "count", "sessions", "videos per session"]]
minio_items

Unnamed: 0,index,error,total possible videos,count,sessions,videos per session
49,loadtest-webrtc-final-kurento-2p-t3medium,False,12.0,12,6.0,2.0
46,loadtest-webrtc-final-kurento-2p-t3medium-2,False,48.0,40,24.0,2.0
45,loadtest-webrtc-final-kurento-2p-t3medium-retry,False,54.0,48,27.0,2.0
43,loadtest-webrtc-final-kurento-2p-t3medium-wait,False,60.0,58,30.0,2.0
22,loadtest-webrtc-final-kurento-3p-10s-t3medium,False,216.0,207,6.0,36.0
23,loadtest-webrtc-final-kurento-3p-10s-t3medium-2,False,216.0,205,6.0,36.0
25,loadtest-webrtc-final-kurento-3p-10s-t3medium-retry,False,324.0,186,9.0,36.0
35,loadtest-webrtc-final-kurento-3p-10s-t3medium-retry,True,324.0,95,9.0,36.0
26,loadtest-webrtc-final-kurento-3p-10s-t3medium-wait,False,216.0,175,6.0,36.0
42,loadtest-webrtc-final-kurento-3p-20s-t3medium,False,462.0,66,7.0,66.0


In [29]:
minio_items = pd.read_csv("dfs_final/minio_items.csv")

minio_items = minio_items[minio_items["error"] == False]
minio_items = minio_items.value_counts(["index"]).reset_index().sort_values(by=["index"])


# Initialize columns
minio_items["sessions"] = 0
minio_items["videos per session"] = 0  # or any default value you want to set initially

for index in minio_items["index"]:
    index_name = index_list_names.index(index)
    df = df_list[index_name]

    max_user_count = df["user_count"].max().astype(int)
    publishers = int(index.split("-")[4][:-1])
    subscribers = int(index.split("-")[5][:-1]) if index.split("-")[5].endswith("s") else 0

    # Create a mask for the current index
    index_mask = minio_items["index"] == index

    # Update columns using vectorized operations
    minio_items.loc[index_mask, "user_count"] = max_user_count
    minio_items.loc[index_mask, "sessions"] = minio_items.loc[index_mask, "user_count"].div(publishers + subscribers).apply(math.ceil)
    minio_items.loc[index_mask, "videos per session"] = publishers * (publishers - 1) + subscribers * publishers
    minio_items.loc[index_mask, "total possible videos"] = minio_items.loc[index_mask, "sessions"] * minio_items.loc[index_mask, "videos per session"]
    minio_items.loc[index_mask, "success ratio"] = minio_items.loc[index_mask, "count"] / minio_items.loc[index_mask, "total possible videos"] * 100

minio_items = minio_items[["index", "total possible videos", "count", "success ratio", "sessions", "videos per session"]]
print(f"< 5 %: {len(minio_items[minio_items['success ratio'] < 5].index)}")
print(f"5 % < n < 25 %: {len(minio_items[(minio_items['success ratio'] >= 5) & (minio_items['success ratio'] < 25)].index)}")
print(f"25 % < n < 50 %: {len(minio_items[(minio_items['success ratio'] >= 25) & (minio_items['success ratio'] < 50)].index)}")
print(f"50 % < n < 75 %: {len(minio_items[(minio_items['success ratio'] >= 50) & (minio_items['success ratio'] < 75)].index)}")
print(f"75 % < n < 95 %: {len(minio_items[(minio_items['success ratio'] >= 75) & (minio_items['success ratio'] < 95)].index)}")
print(f"> 95 %: {len(minio_items[minio_items['success ratio'] >= 95].index)}")
print(f"Total: {len(minio_items.index)}")

minio_items

< 5 %: 3
5 % < n < 25 %: 6
25 % < n < 50 %: 17
50 % < n < 75 %: 12
75 % < n < 95 %: 7
> 95 %: 3
Total: 48


Unnamed: 0,index,total possible videos,count,success ratio,sessions,videos per session
47,loadtest-webrtc-final-kurento-2p-t3medium,12.0,12,100.0,6,2
44,loadtest-webrtc-final-kurento-2p-t3medium-2,48.0,40,83.333333,24,2
43,loadtest-webrtc-final-kurento-2p-t3medium-retry,54.0,48,88.888889,27,2
41,loadtest-webrtc-final-kurento-2p-t3medium-wait,60.0,58,96.666667,30,2
22,loadtest-webrtc-final-kurento-3p-10s-t3medium,216.0,207,95.833333,6,36
23,loadtest-webrtc-final-kurento-3p-10s-t3medium-2,216.0,205,94.907407,6,36
25,loadtest-webrtc-final-kurento-3p-10s-t3medium-retry,324.0,186,57.407407,9,36
26,loadtest-webrtc-final-kurento-3p-10s-t3medium-wait,216.0,175,81.018519,6,36
40,loadtest-webrtc-final-kurento-3p-20s-t3medium,462.0,66,14.285714,7,66
24,loadtest-webrtc-final-kurento-3p-20s-t3medium-2,264.0,200,75.757576,4,66


In [30]:
minio_items[minio_items['success ratio'] < 5]

Unnamed: 0,index,total possible videos,count,success ratio,sessions,videos per session
45,loadtest-webrtc-final-mediasoup-3p-20s-t3medium-retry,990.0,34,3.434343,15,66
42,loadtest-webrtc-final-mediasoup-3p-40s-t3medium-wait,1008.0,49,4.861111,8,126
46,loadtest-webrtc-final-mediasoup-5p-t3medium-retry,900.0,16,1.777778,45,20


In [31]:
minio_items[(minio_items['success ratio'] >= 5) & (minio_items['success ratio'] < 25)]

Unnamed: 0,index,total possible videos,count,success ratio,sessions,videos per session
40,loadtest-webrtc-final-kurento-3p-20s-t3medium,462.0,66,14.285714,7,66
28,loadtest-webrtc-final-kurento-3p-20s-t3medium-retry,792.0,130,16.414141,12,66
39,loadtest-webrtc-final-kurento-3p-40s-t3medium-2,504.0,81,16.071429,4,126
38,loadtest-webrtc-final-kurento-3p-40s-t3medium-retry,882.0,82,9.297052,7,126
30,loadtest-webrtc-final-mediasoup-3p-20s-t3medium-wait,858.0,127,14.801865,13,66
9,loadtest-webrtc-final-mediasoup-3p-40s-t3medium-2,1260.0,307,24.365079,10,126


In [32]:
minio_items[(minio_items['success ratio'] >= 25) & (minio_items['success ratio'] < 50)]

Unnamed: 0,index,total possible videos,count,success ratio,sessions,videos per session
27,loadtest-webrtc-final-kurento-3p-20s-t3medium-wait,330.0,155,46.969697,5,66
35,loadtest-webrtc-final-kurento-8p-t3medium-retry,224.0,90,40.178571,4,56
11,loadtest-webrtc-final-mediasoup-3p-10s-t3medium,792.0,306,38.636364,22,36
5,loadtest-webrtc-final-mediasoup-3p-10s-t3medium-2,936.0,314,33.547009,26,36
20,loadtest-webrtc-final-mediasoup-3p-10s-t3medium-retry,828.0,259,31.280193,23,36
1,loadtest-webrtc-final-mediasoup-3p-10s-t3medium-wait,756.0,340,44.973545,21,36
6,loadtest-webrtc-final-mediasoup-3p-20s-t3medium,858.0,312,36.363636,13,66
4,loadtest-webrtc-final-mediasoup-3p-20s-t3medium-2,990.0,318,32.121212,15,66
8,loadtest-webrtc-final-mediasoup-3p-40s-t3medium,1008.0,308,30.555556,8,126
18,loadtest-webrtc-final-mediasoup-3p-40s-t3medium-retry,1008.0,286,28.373016,8,126


In [33]:
minio_items[(minio_items['success ratio'] >= 25) & (minio_items['success ratio'] < 50)]

Unnamed: 0,index,total possible videos,count,success ratio,sessions,videos per session
27,loadtest-webrtc-final-kurento-3p-20s-t3medium-wait,330.0,155,46.969697,5,66
35,loadtest-webrtc-final-kurento-8p-t3medium-retry,224.0,90,40.178571,4,56
11,loadtest-webrtc-final-mediasoup-3p-10s-t3medium,792.0,306,38.636364,22,36
5,loadtest-webrtc-final-mediasoup-3p-10s-t3medium-2,936.0,314,33.547009,26,36
20,loadtest-webrtc-final-mediasoup-3p-10s-t3medium-retry,828.0,259,31.280193,23,36
1,loadtest-webrtc-final-mediasoup-3p-10s-t3medium-wait,756.0,340,44.973545,21,36
6,loadtest-webrtc-final-mediasoup-3p-20s-t3medium,858.0,312,36.363636,13,66
4,loadtest-webrtc-final-mediasoup-3p-20s-t3medium-2,990.0,318,32.121212,15,66
8,loadtest-webrtc-final-mediasoup-3p-40s-t3medium,1008.0,308,30.555556,8,126
18,loadtest-webrtc-final-mediasoup-3p-40s-t3medium-retry,1008.0,286,28.373016,8,126


In [34]:
minio_items[(minio_items['success ratio'] >= 50) & (minio_items['success ratio'] < 75)]

Unnamed: 0,index,total possible videos,count,success ratio,sessions,videos per session
25,loadtest-webrtc-final-kurento-3p-10s-t3medium-retry,324.0,186,57.407407,9,36
19,loadtest-webrtc-final-kurento-3p-40s-t3medium,378.0,277,73.280423,3,126
21,loadtest-webrtc-final-kurento-3p-40s-t3medium-wait,378.0,223,58.994709,3,126
37,loadtest-webrtc-final-kurento-5p-t3medium-2,140.0,84,60.0,7,20
36,loadtest-webrtc-final-kurento-5p-t3medium-retry,140.0,90,64.285714,7,20
29,loadtest-webrtc-final-kurento-8p-t3medium,224.0,129,57.589286,4,56
32,loadtest-webrtc-final-kurento-8p-t3medium-2,168.0,111,66.071429,3,56
31,loadtest-webrtc-final-kurento-8p-t3medium-wait,168.0,118,70.238095,3,56
13,loadtest-webrtc-final-mediasoup-2p-t3medium,452.0,301,66.59292,226,2
12,loadtest-webrtc-final-mediasoup-2p-t3medium-2,414.0,301,72.705314,207,2


In [35]:
minio_items[(minio_items['success ratio'] >= 75) & (minio_items['success ratio'] < 95)]

Unnamed: 0,index,total possible videos,count,success ratio,sessions,videos per session
44,loadtest-webrtc-final-kurento-2p-t3medium-2,48.0,40,83.333333,24,2
43,loadtest-webrtc-final-kurento-2p-t3medium-retry,54.0,48,88.888889,27,2
23,loadtest-webrtc-final-kurento-3p-10s-t3medium-2,216.0,205,94.907407,6,36
26,loadtest-webrtc-final-kurento-3p-10s-t3medium-wait,216.0,175,81.018519,6,36
24,loadtest-webrtc-final-kurento-3p-20s-t3medium-2,264.0,200,75.757576,4,66
34,loadtest-webrtc-final-kurento-5p-t3medium,120.0,96,80.0,6,20
33,loadtest-webrtc-final-kurento-5p-t3medium-wait,120.0,106,88.333333,6,20


In [37]:
minio_items[(minio_items['success ratio'] >= 95)]

Unnamed: 0,index,total possible videos,count,success ratio,sessions,videos per session
47,loadtest-webrtc-final-kurento-2p-t3medium,12.0,12,100.0,6,2
41,loadtest-webrtc-final-kurento-2p-t3medium-wait,60.0,58,96.666667,30,2
22,loadtest-webrtc-final-kurento-3p-10s-t3medium,216.0,207,95.833333,6,36


In [23]:
minio_items = pd.read_csv("dfs_final/minio_items.csv")

minio_items = minio_items[minio_items["error"] == True]
minio_items = minio_items.value_counts(["index"]).reset_index().sort_values(by=["index"])


# Initialize columns
minio_items["sessions"] = 0
minio_items["videos per session"] = 0  # or any default value you want to set initially

for index in minio_items["index"]:
    index_name = index_list_names.index(index)
    df = df_list[index_name]

    max_user_count = df["user_count"].max().astype(int)
    publishers = int(index.split("-")[4][:-1])
    subscribers = int(index.split("-")[5][:-1]) if index.split("-")[5].endswith("s") else 0

    # Create a mask for the current index
    index_mask = minio_items["index"] == index

    # Update columns using vectorized operations
    minio_items.loc[index_mask, "user_count"] = max_user_count
    minio_items.loc[index_mask, "sessions"] = minio_items.loc[index_mask, "user_count"].div(publishers + subscribers).apply(math.ceil)
    minio_items.loc[index_mask, "videos per session"] = publishers * (publishers - 1) + subscribers * publishers
    minio_items.loc[index_mask, "total possible videos"] = minio_items.loc[index_mask, "sessions"] * minio_items.loc[index_mask, "videos per session"]
    minio_items.loc[index_mask, "success ratio"] = minio_items.loc[index_mask, "count"] / minio_items.loc[index_mask, "total possible videos"] * 100

minio_items = minio_items[["index", "total possible videos", "count", "success ratio", "sessions", "videos per session"]]

minio_items

Unnamed: 0,index,total possible videos,count,success ratio,sessions,videos per session
0,loadtest-webrtc-final-kurento-3p-10s-t3medium-...,324.0,95,29.320988,9,36
2,loadtest-webrtc-final-kurento-3p-20s-t3medium,462.0,3,0.649351,7,66
1,loadtest-webrtc-final-kurento-3p-40s-t3medium,378.0,89,23.544974,3,126
3,loadtest-webrtc-final-kurento-3p-40s-t3medium-...,882.0,3,0.340136,7,126


In [24]:
minio_items = pd.read_csv("dfs_final/minio_items.csv")

minio_items = minio_items.value_counts(["index", "error"]).reset_index().sort_values(by=["index", "error"])

# remove error column and add the count of success and error in the same index
minio_items = minio_items.drop(columns=["error"]).groupby(["index"]).sum().reset_index()

# Initialize columns
minio_items["sessions"] = 0
minio_items["videos per session"] = 0  # or any default value you want to set initially

for index in minio_items["index"]:
    index_name = index_list_names.index(index)
    df = df_list[index_name]

    max_user_count = df["user_count"].max().astype(int)
    publishers = int(index.split("-")[4][:-1])
    subscribers = int(index.split("-")[5][:-1]) if index.split("-")[5].endswith("s") else 0

    # Create a mask for the current index
    index_mask = minio_items["index"] == index

    # Update columns using vectorized operations
    minio_items.loc[index_mask, "user_count"] = max_user_count
    minio_items.loc[index_mask, "sessions"] = minio_items.loc[index_mask, "user_count"].div(publishers + subscribers).apply(math.ceil)
    minio_items.loc[index_mask, "videos per session"] = publishers * (publishers - 1) + subscribers * publishers
    minio_items.loc[index_mask, "total possible videos"] = minio_items.loc[index_mask, "sessions"] * minio_items.loc[index_mask, "videos per session"]
    minio_items.loc[index_mask, "success ratio"] = minio_items.loc[index_mask, "count"] / minio_items.loc[index_mask, "total possible videos"] * 100

minio_items = minio_items[["index", "total possible videos", "count", "success ratio", "sessions", "videos per session"]]
print(f"< 5 %: {len(minio_items[minio_items['success ratio'] < 5].index)}")
print(f"5 % < n < 25 %: {len(minio_items[(minio_items['success ratio'] >= 5) & (minio_items['success ratio'] < 25)].index)}")
print(f"25 % < n < 50 %: {len(minio_items[(minio_items['success ratio'] >= 25) & (minio_items['success ratio'] < 50)].index)}")
print(f"50 % < n < 75 %: {len(minio_items[(minio_items['success ratio'] >= 50) & (minio_items['success ratio'] < 75)].index)}")
print(f"75 % < n < 95 %: {len(minio_items[(minio_items['success ratio'] >= 75) & (minio_items['success ratio'] < 95)].index)}")
print(f"> 95 %: {len(minio_items[minio_items['success ratio'] >= 95].index)}")
print(f"Total: {len(minio_items.index)}")

minio_items

< 5 %: 3
5 % < n < 25 %: 6
25 % < n < 50 %: 17
50 % < n < 75 %: 10
75 % < n < 95 %: 8
> 95 %: 4
Total: 48


Unnamed: 0,index,total possible videos,count,success ratio,sessions,videos per session
0,loadtest-webrtc-final-kurento-2p-t3medium,12.0,12,100.0,6,2
1,loadtest-webrtc-final-kurento-2p-t3medium-2,48.0,40,83.333333,24,2
2,loadtest-webrtc-final-kurento-2p-t3medium-retry,54.0,48,88.888889,27,2
3,loadtest-webrtc-final-kurento-2p-t3medium-wait,60.0,58,96.666667,30,2
4,loadtest-webrtc-final-kurento-3p-10s-t3medium,216.0,207,95.833333,6,36
5,loadtest-webrtc-final-kurento-3p-10s-t3medium-2,216.0,205,94.907407,6,36
6,loadtest-webrtc-final-kurento-3p-10s-t3medium-...,324.0,281,86.728395,9,36
7,loadtest-webrtc-final-kurento-3p-10s-t3medium-...,216.0,175,81.018519,6,36
8,loadtest-webrtc-final-kurento-3p-20s-t3medium,462.0,69,14.935065,7,66
9,loadtest-webrtc-final-kurento-3p-20s-t3medium-2,264.0,200,75.757576,4,66
