In [None]:
%pip install -r requirements.txt

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np
from datetime import datetime, timedelta
import statsmodels.api as sm

In [None]:
index_kurento_names = [
    "loadtest-webrtc-final-kurento-2p-t3medium",
    "loadtest-webrtc-final-kurento-2p-t3medium-2",
    "loadtest-webrtc-final-kurento-2p-t3medium-wait",
    "loadtest-webrtc-final-kurento-2p-t3medium-retry",
    "loadtest-webrtc-final-kurento-5p-t3medium",
    "loadtest-webrtc-final-kurento-5p-t3medium-2",
    "loadtest-webrtc-final-kurento-5p-t3medium-wait",
    "loadtest-webrtc-final-kurento-5p-t3medium-retry",
    "loadtest-webrtc-final-kurento-8p-t3medium",
    "loadtest-webrtc-final-kurento-8p-t3medium-2",
    "loadtest-webrtc-final-kurento-8p-t3medium-wait",
    "loadtest-webrtc-final-kurento-8p-t3medium-retry",
    "loadtest-webrtc-final-kurento-3p-10s-t3medium",
    "loadtest-webrtc-final-kurento-3p-10s-t3medium-2",
    "loadtest-webrtc-final-kurento-3p-10s-t3medium-wait",
    "loadtest-webrtc-final-kurento-3p-10s-t3medium-retry",
    "loadtest-webrtc-final-kurento-3p-20s-t3medium",
    "loadtest-webrtc-final-kurento-3p-20s-t3medium-2",
    "loadtest-webrtc-final-kurento-3p-20s-t3medium-wait",
    "loadtest-webrtc-final-kurento-3p-20s-t3medium-retry",
    "loadtest-webrtc-final-kurento-3p-40s-t3medium",
    "loadtest-webrtc-final-kurento-3p-40s-t3medium-2",
    "loadtest-webrtc-final-kurento-3p-40s-t3medium-wait",
    "loadtest-webrtc-final-kurento-3p-40s-t3medium-retry",
]

index_mediasoup_names = [
    "loadtest-webrtc-final-mediasoup-2p-t3medium",
    "loadtest-webrtc-final-mediasoup-2p-t3medium-2",
    "loadtest-webrtc-final-mediasoup-2p-t3medium-wait",
    "loadtest-webrtc-final-mediasoup-2p-t3medium-retry",
    "loadtest-webrtc-final-mediasoup-5p-t3medium",
    "loadtest-webrtc-final-mediasoup-5p-t3medium-2",
    "loadtest-webrtc-final-mediasoup-5p-t3medium-wait",
    "loadtest-webrtc-final-mediasoup-5p-t3medium-retry",
    "loadtest-webrtc-final-mediasoup-8p-t3medium",
    "loadtest-webrtc-final-mediasoup-8p-t3medium-2",
    "loadtest-webrtc-final-mediasoup-8p-t3medium-wait",
    "loadtest-webrtc-final-mediasoup-8p-t3medium-retry",
    "loadtest-webrtc-final-mediasoup-3p-10s-t3medium",
    "loadtest-webrtc-final-mediasoup-3p-10s-t3medium-2",
    "loadtest-webrtc-final-mediasoup-3p-10s-t3medium-wait",
    "loadtest-webrtc-final-mediasoup-3p-10s-t3medium-retry",
    "loadtest-webrtc-final-mediasoup-3p-20s-t3medium",
    "loadtest-webrtc-final-mediasoup-3p-20s-t3medium-2",
    "loadtest-webrtc-final-mediasoup-3p-20s-t3medium-wait",
    "loadtest-webrtc-final-mediasoup-3p-20s-t3medium-retry",
    "loadtest-webrtc-final-mediasoup-3p-40s-t3medium",
    "loadtest-webrtc-final-mediasoup-3p-40s-t3medium-2",
    "loadtest-webrtc-final-mediasoup-3p-40s-t3medium-wait",
    "loadtest-webrtc-final-mediasoup-3p-40s-t3medium-retry",
]

index_list_names = index_kurento_names + index_mediasoup_names

node_types = ["browseremulator", "masternode", "medianode"]
packet_types = ["inbound", "outbound"]

seconds_per_fragment = 17

start_end_times = pd.read_json("dfs_final/start-end-times.json", orient="index")
start_end_times["from"] = pd.to_datetime(
    start_end_times["from"], format="ISO8601"
).dt.tz_convert("UTC")
start_end_times["to"] = pd.to_datetime(
    start_end_times["to"], format="ISO8601"
).dt.tz_convert("UTC")


def timestamp_to_secs(df_node, index, cpu_times=True):
    df_tmp = df_node.copy()
    df_tmp["@timestamp"] = pd.to_datetime(df_tmp["@timestamp"], format="ISO8601")
    tmp_serie = pd.Series(
        [df_tmp["@timestamp"].max(), start_end_times.loc[index, "to"]]
    )
    end_time = tmp_serie.min() if cpu_times else tmp_serie.max()
    df_tmp = df_tmp[df_tmp["@timestamp"] < end_time]
    tmp_serie = pd.Series(
        [df_tmp["@timestamp"].min(), start_end_times.loc[index, "from"]]
    )
    start_time = tmp_serie.max() if cpu_times else tmp_serie.min()
    df_tmp["timestamp_secs"] = (df_tmp["@timestamp"] - start_time).dt.total_seconds()
    return df_tmp

In [None]:
df_list = [
    pd.read_csv(f"dfs_final/{x}.csv")
    if os.path.isfile(f"dfs_final/{x}.csv")
    else pd.DataFrame()
    for x in index_list_names
]


for i, df_user in enumerate(df_list):
    if not df_user.empty:
        index = index_list_names[i]
        start_test_time = start_end_times.loc[index, "from"]

        df_user["@timestamp"] = pd.to_datetime(df_user["@timestamp"], format="ISO8601")
        df_cut_index_0 = df_user[df_user["cut_index"] == 0]
        for index, row in df_cut_index_0.iterrows():
            start_user_qoe_time = row["@timestamp"]
            time_diff = start_user_qoe_time - start_test_time
            userFrom = row["userFrom"]
            userTo = row["userTo"]
            session = row["session"]

            df_user.loc[
                (df_user["userFrom"] == userFrom)
                & (df_user["userTo"] == userTo)
                & (df_user["session"] == session),
                "cut_index",
            ] = (
                df_user.loc[
                    (df_user["userFrom"] == userFrom)
                    & (df_user["userTo"] == userTo)
                    & (df_user["session"] == session),
                    "cut_index",
                ]
                * seconds_per_fragment
                + time_diff.seconds
            )

index_data = []


data_types = node_types + packet_types


for i, df_user in enumerate(df_list):
    index = index_list_names[i]

    splitted = index.split("-")

    publishers = int(splitted[4][:-1])

    subscribers = 0

    subscribers_parsed = splitted[5]

    if subscribers_parsed[-1] == "s":
        subscribers = int(subscribers_parsed[:-1])

    users = publishers + subscribers

    repeat = 1

    type = splitted[-1]

    if type not in data_types:
        if type == "t3medium":
            repeat = "1"

        else:
            repeat = type

        index_data.append(
            {
                "type": repeat,
                "publishers": publishers,
                "subscribers": subscribers,
                "users": users,
            }
        )

In [None]:
data = []
for i, df_tmp in enumerate(df_list):
    if not df_tmp.empty:
        average = df_tmp["vmaf"].mean()
        median = df_tmp["vmaf"].median()
        min = df_tmp["vmaf"].min()
        max = df_tmp["vmaf"].max()
        std = df_tmp["vmaf"].std()
        data.append([index_list_names[i], average, median, std, min, max])

pd.DataFrame(data, columns=["index_type", "average", "median", "std", "min", "max"])

In [None]:
# INDIVIDUAL PLOTS PARAMETERS

# Index of index_list to use
index = 45
# If there is only one metric in display, choose which one
metric = "vmaf"

show_full_range = True

# Calculations
index_name = index_list_names[index]
metric_label = metric.upper()
qoe_metrics_normalized = ["vmaf", "msssim", "ssim", "vifp", "pesq", "visqol"]
qoe_metrics_not_normalized = ["psnr", "psnrhvs", "psnrhvsm"]

In [None]:
plt.rcParams["figure.figsize"] = [25, 10]
colors = ["blue", "orange", "green", "red"]
k = 0

for metric in qoe_metrics_normalized:
    for i, df_tmp in enumerate(df_list):
        if not df_tmp.empty:
            if k == 0:
                fig, ax = plt.subplots()
            df_mean = df_tmp.groupby(["cut_index"]).mean(numeric_only=True)
            plot_name = index_list_names[i]
            if not df_mean.empty:
                ax.plot(df_mean.index, df_mean[metric], label=plot_name)
                ax.axvline(
                    x=df_mean.index.max(),
                    linestyle="--",
                    alpha=0.7,
                )
                ax.axvline(
                    x=df_mean.index.min(),
                    linestyle="--",
                    alpha=0.7,
                )

            if k == 3:
                ax.set_xlabel("timestamp (seconds)")
                ax.set_ylabel(metric)
                ax.grid()
                ax.set_ylim(-0.5, 1.05)
                ax.set_yticks(np.arange(-0.5, 1.05, 0.05))
                fig.legend(loc="lower left")
                fig.suptitle(f"{metric} over time (mean, {plot_name})")
                fig.savefig(f"images_final/{plot_name}_{metric}.png")
            k = (k + 1) % 4

In [None]:
plt.rcParams["figure.figsize"] = [25, 10]
typologies = ["2p", "5p", "8p", "3p-10s", "3p-20s", "3p-40s"]
media_nodes = ["kurento", "mediasoup"]
colors = ["blue", "orange", "green", "red"]
for typology in typologies:
    for media_node in media_nodes:
        fig, ax = plt.subplots()
        target_names = filter(
            lambda x: typology in x and media_node in x, index_list_names
        )

        max_x = -1
        for i, target_name in enumerate(target_names):
            df_node = pd.read_csv(f"dfs_final/{target_name}-medianode.csv")
            # remove entries with cpu < 0.001
            # df_node = df_node[df_node["cpu"] > 0.001]
            df_node = timestamp_to_secs(df_node, target_name)
            df_node = df_node.drop(columns=["@timestamp", "memory"]).dropna()
            ax.plot(
                df_node["timestamp_secs"],
                df_node["cpu"],
                color=colors[i],
                label=target_name,
            )
            plt.axvline(
                x=df_node["timestamp_secs"].max(),
                color=colors[i],
                linestyle="--",
                alpha=0.7,
                label="End of test " + target_name,
            )
            max_x = np.max(np.array([max_x, df_node["timestamp_secs"].max()]))

        ax.set_xlabel("timestamp (seconds)")
        ax.set_ylabel("CPU usage (%)")
        ax.grid()
        ax.set_ylim(0, 1.05)
        ax.set_yticks(np.arange(0, 1.05, 0.05))
        # ax.set_xticks(np.arange(0, max_x, 120))

        fig.suptitle(f"CPU usage over time ({typology}, {media_node})")
        fig.legend(loc="right")
        fig.savefig(f"images_final/{typology}_{media_node}_cpu.png")

In [None]:
plt.rcParams["figure.figsize"] = [25, 10]
fig, ax = plt.subplots()

webrtc_stats_y = ax.twinx()

aggregation_map = {"@timestamp": "first"}
for metric in qoe_metrics_normalized:
    aggregation_map[metric] = "mean"

df_tmp = df_list[index].groupby("cut_index").agg(aggregation_map)
df_tmp = timestamp_to_secs(df_tmp, index_name)
df_node = pd.read_csv(f"dfs_final/{index_name}-medianode.csv")
df_node = timestamp_to_secs(df_node, index_name)
df = df_tmp

df_node_cpu = df_node.drop(columns=["memory"]).dropna()
df_node_memory = df_node.drop(columns=["cpu"]).dropna()

df_stats_inbound = pd.read_csv(f"dfs_final/{index_name}-webrtc-stats-inbound.csv")
df_stats_outbound = pd.read_csv(f"dfs_final/{index_name}-webrtc-stats-outbound.csv")
df_stats_inbound["@timestamp"] = pd.to_datetime(
    df_stats_inbound["@timestamp"], format="ISO8601"
)
df_stats_outbound["@timestamp"] = pd.to_datetime(
    df_stats_outbound["@timestamp"], format="ISO8601"
)
df_stats_inbound = df_stats_inbound.drop(columns="user_id")
df_stats_outbound = df_stats_outbound.drop(columns="user_id")
df_stats_inbound = df_stats_inbound.groupby("@timestamp").mean()
df_stats_outbound = df_stats_outbound.groupby("@timestamp").mean()

df_stats_inbound = timestamp_to_secs(df_stats_inbound, index_name)
df_stats_outbound = timestamp_to_secs(df_stats_outbound, index_name)

qoe_zorder = 10
resource_zorder = 0

qoe_linewidth = 3
resource_linewidth = 1

legend_handles = []
for metric in qoe_metrics_normalized:
    legend_handles.append(
        ax.plot(
            df["@timestamp"],
            df[metric],
            label=metric,
            marker="o",
            zorder=qoe_zorder,
            lw=qoe_linewidth,
        )
    )
legend_handles.append(
    ax.plot(
        df_node_cpu["@timestamp"],
        df_node_cpu["cpu"],
        "g",
        label="cpu",
        zorder=resource_zorder,
        lw=resource_linewidth,
        linestyle="--",
    )
)
legend_handles.append(
    ax.plot(
        df_node_memory["@timestamp"],
        df_node_memory["memory"],
        "c",
        label="memory",
        zorder=resource_zorder,
        lw=resource_linewidth,
        linestyle="--",
    )
)


for column in df_stats_inbound.columns[1:]:
    # Maybe readd gpSum?
    if (
        not "bytesSent" in column
        and not "packetsSent" in column
        and not "qpSum" in column
        and not "framesEncoded" in column
    ):
        legend_handles.append(
            webrtc_stats_y.plot(
                df_stats_inbound.index, df_stats_inbound[column], label=column
            )
        )

ax.set_title(f"QOE metric (normalized) over time (mean, worker data, {index_name})")
ax.set_xlabel("timestamp (day hour:minute)")
ax.set_ylabel("QOE metric (normalized), CPU %, Memory %")
webrtc_stats_y.set_ylabel("WebRTC stats")
ax.grid()
ax.legend(loc="upper left")
webrtc_stats_y.legend(loc="center left")

In [11]:
os.makedirs("results_all", exist_ok=True)

df_tmp = pd.concat(df_list, ignore_index=True)

X = df_tmp["user_count"]


X = sm.add_constant(X)  # adding a constant

metrics = qoe_metrics_normalized + qoe_metrics_not_normalized

for metric in metrics:
    Y = df_tmp[metric]

    model = sm.OLS(Y, X).fit()
    summary = model.summary()

    with open(f"results_all/{metric}-all-summary.tex", "w") as fh:
        fh.write(summary.as_latex())

    with open(f"results_all/{metric}-all-summary.html", "w") as fh:
        fh.write(summary.as_html())

    with open(f"results_all/{metric}-all.txt", "w") as fh:
        fh.write(summary.as_text())

In [19]:
os.makedirs("results_kurento", exist_ok=True)

df_tmp = pd.concat(df_list[:24], ignore_index=True)

X = df_tmp["user_count"]

X = sm.add_constant(X)  # adding a constant

for metric in metrics:
    Y = df_tmp[metric]

    model = sm.OLS(Y, X).fit()
    summary = model.summary()

    with open(f"results_kurento/{metric}-kurento-summary.tex", "w") as fh:
        fh.write(summary.as_latex())

    with open(f"results_kurento/{metric}-kurento-summary.html", "w") as fh:
        fh.write(summary.as_html())

    with open(f"results_kurento/{metric}-kurento.txt", "w") as fh:
        fh.write(summary.as_text())

In [13]:
os.makedirs("results_mediasoup", exist_ok=True)

df_tmp = pd.concat(df_list[24:], ignore_index=True)

X = df_tmp["user_count"]

X = sm.add_constant(X)  # adding a constant

for metric in metrics:
    Y = df_tmp[metric]

    model = sm.OLS(Y, X).fit()
    summary = model.summary()

    with open(f"results_mediasoup/{metric}-mediasoup-summary.tex", "w") as fh:
        fh.write(summary.as_latex())

    with open(f"results_mediasoup/{metric}-mediasoup-summary.html", "w") as fh:
        fh.write(summary.as_html())

    with open(f"results_mediasoup/{metric}-mediasoup.txt", "w") as fh:
        fh.write(summary.as_text())

In [14]:
df_merged_list = []
for i, df_tmp in enumerate(df_list):
    index_name = index_list_names[i]
    df_node = pd.read_csv(f"dfs_final/{index_name}-medianode.csv")
    df_node = df_node.drop(columns=["memory"]).dropna()
    df_tmp = timestamp_to_secs(df_tmp, index_name, False)
    df_node = timestamp_to_secs(df_node, index_name, False)
    df_tmp["timestamp_secs"] = df_tmp["timestamp_secs"].round(-1)
    df_node["timestamp_secs"] = df_node["timestamp_secs"].round(-1)
    df_tmp = df_tmp.groupby(["timestamp_secs"]).mean(numeric_only=True)

    merged_df = pd.merge(df_tmp, df_node, on="timestamp_secs", how="inner")

    df_merged_list.append(merged_df)

In [15]:
os.makedirs("results_all_cpu", exist_ok=True)

df_tmp = pd.concat(df_merged_list, ignore_index=True)

X = df_tmp["cpu"]


X = sm.add_constant(X)  # adding a constant

metrics = qoe_metrics_normalized + qoe_metrics_not_normalized

for metric in metrics:
    Y = df_tmp[metric]

    model = sm.OLS(Y, X).fit()
    summary = model.summary()

    with open(f"results_all_cpu/{metric}-all-summary.tex", "w") as fh:
        fh.write(summary.as_latex())

    with open(f"results_all_cpu/{metric}-all-summary.html", "w") as fh:
        fh.write(summary.as_html())

    with open(f"results_all_cpu/{metric}-all.txt", "w") as fh:
        fh.write(summary.as_text())

In [21]:
os.makedirs("results_kurento_cpu", exist_ok=True)

df_tmp = pd.concat(df_merged_list[:24], ignore_index=True)
X = df_tmp["cpu"]


X = sm.add_constant(X)  # adding a constant

metrics = qoe_metrics_normalized + qoe_metrics_not_normalized

for metric in metrics:
    Y = df_tmp[metric]

    model = sm.OLS(Y, X).fit()
    summary = model.summary()

    with open(f"results_kurento_cpu/{metric}-kurento-summary.tex", "w") as fh:
        fh.write(summary.as_latex())

    with open(f"results_kurento_cpu/{metric}-kurento-summary.html", "w") as fh:
        fh.write(summary.as_html())

    with open(f"results_kurento_cpu/{metric}-kurento.txt", "w") as fh:
        fh.write(summary.as_text())

In [23]:
os.makedirs("results_mediasoup_cpu", exist_ok=True)

df_tmp = pd.concat(df_merged_list[24:], ignore_index=True)

X = df_tmp["cpu"]

X = sm.add_constant(X)  # adding a constant

for metric in metrics:
    Y = df_tmp[metric]

    model = sm.OLS(Y, X).fit()
    summary = model.summary()

    with open(f"results_mediasoup_cpu/{metric}-mediasoup-summary.tex", "w") as fh:
        fh.write(summary.as_latex())

    with open(f"results_mediasoup_cpu/{metric}-mediasoup-summary.html", "w") as fh:
        fh.write(summary.as_html())

    with open(f"results_mediasoup_cpu/{metric}-mediasoup.txt", "w") as fh:
        fh.write(summary.as_text())