In [None]:
import os
import sys
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from typing import List

sys.path.append(os.path.abspath(os.path.join("..")))

from src.enums import QRErrorCorrectionLevel
from src.utils import bytes_to_display
from constants import (
    BENCHMARKS_ALL_EAGER_PATH,
    BENCHMARKS_STRONG_SCALING_EAGER_PATH,
    BENCHMARKS_STRONG_SCALING_LAZY_PATH,
    BENCHMARKS_WEAK_SCALING_LAZY_PATH,
    BENCHMARKS_WEAK_SCALING_EAGER_PATH,
    CHUNK_SIZE_BYTES_HEADER,
    DECODE_THROUGHPUT_MB_S_HEADER,
    ENCODE_THROUGHPUT_MB_S_HEADER,
    ENCODING_LIBRARY_HEADER,
    ERROR_CORRECTION_LEVEL_HEADER,
    MAX_WORKERS_HEADER,
    MB,
    DATA_SIZE_BYTES_HEADER,
    READ_VIDEO_THROUGHPUT_MB_S_HEADER,
    TIME_DECODE_HEADER,
    TIME_DESERIALIZE_HEADER,
    TIME_ENCODE_HEADER,
    TIME_READ_VIDEO_HEADER,
    TIME_SERIALIZE_HEADER,
    TIME_TOTAL_HEADER,
    TIME_WRITE_VIDEO_HEADER,
    TOTAL_THROUGHPUT_MB_S_HEADER,
    VIDEO_SIZE_MB_HEADER,
    WRITE_VIDEO_THROUGHPUT_MB_S_HEADER,
    QR_ENCODING_LIBRARIES
)

DEFAULT_FIGSIZE = (10, 6)
DEFAULT_DPI = 300
DEFAULT_TICK_ROTATION = 30
SVG_FORMAT = "svg"
MARKER_O = "o"
ERRORBAR_NONE = None
ERRORBAR_STANDARD_DEVIATION = "sd"
MAX_WORKERS_COUNT = 24
MIN_WORKERS_COUNT = 1

MAX_WORKERS_RANGE = range(MIN_WORKERS_COUNT, MAX_WORKERS_COUNT + 1)

LAYER_STRING = "Layer"
TIME_BYTE_MS_STRING = "Time per byte (ms/byte)"
THROUGHPUT_MB_S_STRING = "Throughput (MB/s)"
TOTAL_TIME_PERCENT_STRING = "Total Time (%)"
PIPELINE_LAYER_STRING = "Pipeline Layer"
LIBRARY_STRING = "Encoding Library"
CPU_USAGE_PERCENT_STRING = "CPU Usage (%)"
MEMORY_USAGE_PERCENT = "Memory Usage (%)"

ENCODE_STRING = "Encode"
DECODE_STRING = "Decode"
WRITE_VIDEO_STRING = "Write Video"
READ_VIDEO_STRING = "Read Video"

sns.set_theme(style="whitegrid")

In [None]:
df_all_eager = pd.read_csv(BENCHMARKS_ALL_EAGER_PATH)
df_strong_scaling_eager = pd.read_csv(BENCHMARKS_STRONG_SCALING_EAGER_PATH)
df_strong_scaling_lazy = pd.read_csv(BENCHMARKS_STRONG_SCALING_LAZY_PATH)
df_weak_scaling_lazy = pd.read_csv(BENCHMARKS_WEAK_SCALING_LAZY_PATH)
df_weak_scaling_eager = pd.read_csv(BENCHMARKS_WEAK_SCALING_EAGER_PATH)

### Pipeline Layers and Bottlenecks

In [None]:
def visualize_time_percent_by_layer(df: pd.DataFrame, filter: bool = True, pie_chart: bool = True, save: bool = True) -> None:

    time_columns = [
        TIME_SERIALIZE_HEADER,
        TIME_DESERIALIZE_HEADER,
        TIME_ENCODE_HEADER,
        TIME_DECODE_HEADER,
        TIME_WRITE_VIDEO_HEADER,
        TIME_READ_VIDEO_HEADER
    ]

    # Get percentages for each layer.
    for column in time_columns:
        df[f"{column} %"] = df[column] / df[TIME_TOTAL_HEADER] * 100

    percent_columns = [f"{col} %" for col in time_columns]

    if filter:
        df_filtered = df[
            (df[DATA_SIZE_BYTES_HEADER] == MB) &
            (df[ERROR_CORRECTION_LEVEL_HEADER] == QRErrorCorrectionLevel.L.name) &
            (df[CHUNK_SIZE_BYTES_HEADER] == df.groupby([DATA_SIZE_BYTES_HEADER, ERROR_CORRECTION_LEVEL_HEADER])[CHUNK_SIZE_BYTES_HEADER].transform("max"))
        ]
    else:
        df_filtered = df
    
    unpivoted_df = df_filtered.melt(
        id_vars=[ENCODING_LIBRARY_HEADER],
        value_vars=percent_columns,
        var_name=LAYER_STRING,
        value_name=TOTAL_TIME_PERCENT_STRING
    )

    # Get layer names.
    unpivoted_df[LAYER_STRING] = unpivoted_df[LAYER_STRING].str.replace("Time ", "", regex=False)
    unpivoted_df[LAYER_STRING] = unpivoted_df[LAYER_STRING].str.replace(" (ms) %", "", regex=False)

    plt.figure(figsize=DEFAULT_FIGSIZE)
    ax = sns.barplot(
        data=unpivoted_df,
        x=LAYER_STRING,
        y=TOTAL_TIME_PERCENT_STRING,
        hue=ENCODING_LIBRARY_HEADER,
        errorbar=ERRORBAR_NONE
    )
    ax.set_title("Total Time (%) by Pipeline Layer and Library")
    ax.set_ylabel(TOTAL_TIME_PERCENT_STRING)
    ax.set_xlabel(PIPELINE_LAYER_STRING)
    ax.legend(title=LIBRARY_STRING)
    plt.xticks(rotation=DEFAULT_TICK_ROTATION)
    plt.tight_layout()

    if save:
        plt.savefig(f"plots/time_percent_by_layer_eager_{"filtered" if filter else "avg"}.svg", format=SVG_FORMAT, dpi=DEFAULT_DPI)

    plt.show()

    if pie_chart:
        for library in QR_ENCODING_LIBRARIES:
            df_library = unpivoted_df[unpivoted_df[ENCODING_LIBRARY_HEADER] == library.name.capitalize()]
            
            mean_percentages = df_library.groupby(LAYER_STRING)[TOTAL_TIME_PERCENT_STRING].mean()
            
            # Create pie chart
            plt.figure(figsize=(6, 6))
            plt.pie(
                mean_percentages,
                labels=mean_percentages.index,
                autopct='%1.1f%%',
                startangle=70,
            )
            plt.title(f"Total Time (%) by Pipeline Layer â€” {library.name.capitalize()}")
            plt.tight_layout()
            
            if save:
                plt.savefig(f"plots/time_percent_by_layer_pie_{library.name.lower()}_eager_{'filtered' if filter else 'avg'}.svg", format=SVG_FORMAT, dpi=DEFAULT_DPI)
            
            plt.show()

In [None]:
# Pipeline Layers.
visualize_time_percent_by_layer(df=df_all_eager, filter=False, pie_chart=True)

In [None]:
visualize_time_percent_by_layer(df=df_all_eager, filter=True, pie_chart=True)

### Pipeline Layer Distributions

In [None]:

def visualize_throughput_by_layer_and_library(df: pd.DataFrame, save: bool = True) -> None:
    
    throughput_columns = [
        ENCODE_THROUGHPUT_MB_S_HEADER,
        DECODE_THROUGHPUT_MB_S_HEADER,
        WRITE_VIDEO_THROUGHPUT_MB_S_HEADER,
        READ_VIDEO_THROUGHPUT_MB_S_HEADER,
        TOTAL_THROUGHPUT_MB_S_HEADER
    ]

    unpivoted_df = df.melt(
        id_vars=[ENCODING_LIBRARY_HEADER],
        value_vars=throughput_columns,
        var_name=LAYER_STRING,
        value_name=THROUGHPUT_MB_S_STRING
    )

    plt.figure(figsize=DEFAULT_FIGSIZE)
    ax = sns.barplot(
        data=unpivoted_df,
        x=LAYER_STRING,
        y=THROUGHPUT_MB_S_STRING,
        hue=ENCODING_LIBRARY_HEADER,
        errorbar=ERRORBAR_NONE         
    )
    ax.set_title("QR Throughput by Layer and Library")
    ax.set_ylabel(THROUGHPUT_MB_S_STRING)
    ax.set_xlabel(PIPELINE_LAYER_STRING)
    ax.legend(title=LIBRARY_STRING)
    plt.xticks(rotation=DEFAULT_TICK_ROTATION)  
    plt.tight_layout()

    if save:
        plt.savefig("plots/throughput_by_layer_and_library_eager.svg", format=SVG_FORMAT, dpi=DEFAULT_DPI)

    plt.show()

In [None]:
# Layer Distributions.
visualize_throughput_by_layer_and_library(df=df_all_eager)

### Strong Scaling

In [None]:
def visualize_strong_scaling(
    df: pd.DataFrame,
    layer: str,
    armdahl_list: List[List[int]],
    save: bool = True
) -> None:
    if layer not in [ENCODE_STRING, DECODE_STRING]:
        raise ValueError("Please supply either 'Encode' or 'Decode' layers.")

    throughput_column = (
        ENCODE_THROUGHPUT_MB_S_HEADER
        if layer == ENCODE_STRING
        else DECODE_THROUGHPUT_MB_S_HEADER
    )

    df_filtered = df.copy()

    plt.figure(figsize=DEFAULT_FIGSIZE)
    ax = sns.lineplot(
        data=df_filtered,
        x=MAX_WORKERS_HEADER,
        y=throughput_column,
        hue=ENCODING_LIBRARY_HEADER,
        marker=MARKER_O,
        errorbar=ERRORBAR_NONE
    )

    palette = sns.color_palette()
    library_order = ax.get_legend_handles_labels()[1]  
    color_map = {lib: palette[i] for i, lib in enumerate(library_order)}

    workers = sorted(df_filtered[MAX_WORKERS_HEADER].unique()) 

    for lib, armdahl_s in zip(df_filtered[ENCODING_LIBRARY_HEADER].unique(), armdahl_list):
        baseline_L = df_filtered.loc[
            (df_filtered[ENCODING_LIBRARY_HEADER] == lib) &
            (df_filtered[MAX_WORKERS_HEADER] == 1),
            throughput_column
        ].iat[0]

        for s in armdahl_s:
            armdahl_values = [
                baseline_L * (1 / (s + (1 - s) / n))
                for n in workers
            ]

            ax.plot(
                workers,
                armdahl_values,
                linestyle="--",
                color=tuple(0.75 * c for c in color_map[lib]),
                label=f"{lib} - Armdahl's Law (s = {s})",
                zorder=1
            )
        
    dummy_line_space = Line2D(
        [0], [0],
        linestyle="none",
        marker="",
        label="   "
    )

    dummy_line_data_size = Line2D(
        [0], [0],
        linestyle="none",
        marker="",
        label="Data Size = 1 MB"
    )
    ax.add_artist(
        ax.legend(
            handles=ax.get_legend_handles_labels()[0] + [dummy_line_space, dummy_line_data_size],
            labels=ax.get_legend_handles_labels()[1] + ["", "Data Size = 1 MB"],
            title=LIBRARY_STRING,
            loc="upper left",
            ncol=1
        )
    )

    ax.set_title(f"Strong Scaling - {"Data-to-Video" if layer == ENCODE_STRING else "Video-to-Data"} Pipeline")
    ax.set_xlabel(MAX_WORKERS_HEADER)
    ax.set_ylabel(THROUGHPUT_MB_S_STRING)
    ax.set_xlim(MIN_WORKERS_COUNT - 0.25, MAX_WORKERS_COUNT + 0.25)
    ax.set_xticks(list(range(MIN_WORKERS_COUNT, MAX_WORKERS_COUNT + 1)))
    plt.tight_layout()

    if save:
        plt.savefig(
            f"plots/strong_scaling_lazy_{"data_to_video" if layer == ENCODE_STRING else "video_to_data"}.svg",
            format=SVG_FORMAT,
            dpi=DEFAULT_DPI
        )
    plt.show()


In [None]:
# Strong Scaling.
visualize_strong_scaling(df=df_strong_scaling_lazy, layer=ENCODE_STRING, armdahl_list=[[0, 0.15], [0, 0.35]])
visualize_strong_scaling(df=df_strong_scaling_lazy, layer=DECODE_STRING, armdahl_list=[[0, 0.20], [0, 0.15]])

### Weak Scaling

In [None]:
def visualize_weak_scaling(df: pd.DataFrame, layer: str, gustafson_list: List[List[int]], save: bool = True) -> None:
    
    if layer not in [ENCODE_STRING, DECODE_STRING]:
        raise ValueError("Please supply either 'Encode' or 'Decode' layers.")

    df_filtered = df[df[MAX_WORKERS_HEADER] <= MAX_WORKERS_COUNT].copy()

    throughput_column = (
        ENCODE_THROUGHPUT_MB_S_HEADER
        if layer == ENCODE_STRING
        else DECODE_THROUGHPUT_MB_S_HEADER
    )

    plt.figure(figsize=DEFAULT_FIGSIZE)
    ax = sns.lineplot(
        data=df,
        x=MAX_WORKERS_HEADER,
        y=throughput_column,
        hue=ENCODING_LIBRARY_HEADER,
        marker=MARKER_O,
        errorbar=ERRORBAR_NONE
    )

    palette = sns.color_palette()
    _, labels = ax.get_legend_handles_labels()
    color_map = {lib: palette[i] for i, lib in enumerate(labels)}

    workers = sorted(df[MAX_WORKERS_HEADER].unique())

    for lib, gustafson_s in zip(df_filtered[ENCODING_LIBRARY_HEADER].unique(), gustafson_list):
        baseline_L = df_filtered.loc[
            (df_filtered[ENCODING_LIBRARY_HEADER] == lib) &
            (df_filtered[MAX_WORKERS_HEADER] == 1),
            throughput_column
        ].iat[0]

        for s in gustafson_s:
            gustafson_values = [
                baseline_L * (s + (1 - s) * n)
                for n in workers
            ]

            ax.plot(
                workers,
                gustafson_values,
                linestyle="--",
                color=tuple(0.75 * c for c in color_map[lib]),
                label=f"{lib} - Gustafson's Law (s = {s})",
                zorder=1
            )

    dummy_line_space = Line2D(
        [0], [0],
        linestyle="none",
        marker="",
        label="   "
    )

    dummy_line_data_size_increments = Line2D(
        [], [],
        linestyle="none",
        marker=None,
        markerfacecolor="none",
        markeredgecolor="black",
        label="Data Size = 250 KB $x$ Workers"
    )
    ax.add_artist(
        ax.legend(
            handles=ax.get_legend_handles_labels()[0] + [dummy_line_space, dummy_line_data_size_increments],
            labels=ax.get_legend_handles_labels()[1] + ["", "Data Size = 250 KB $x$ Workers"],
            title=LIBRARY_STRING,
            loc="upper left"
        )
    )
    
    ax.set_title(f"Weak Scaling - {"Data-To-Video" if layer == ENCODE_STRING else "Video-to-Data"} Pipeline")
    ax.set_xlabel(MAX_WORKERS_HEADER)
    ax.set_ylabel(THROUGHPUT_MB_S_STRING)
    ax.set_xlim(MIN_WORKERS_COUNT - 0.25, MAX_WORKERS_COUNT + 0.25)
    ax.set_xticks(MAX_WORKERS_RANGE)
    plt.tight_layout()

    if save:
        plt.savefig(f"plots/weak_scaling_lazy_{"data_to_video" if layer == ENCODE_STRING else "video_to_data"}.svg", format=SVG_FORMAT, dpi=DEFAULT_DPI)

    plt.show()


In [None]:
# Weak Scaling.
visualize_weak_scaling(df=df_weak_scaling_lazy, layer=ENCODE_STRING, gustafson_list=[[0, 0.50], [0, 0.70]])
visualize_weak_scaling(df=df_weak_scaling_lazy, layer=DECODE_STRING, gustafson_list=[[0, 0.50], [0, 0.50]])

### CPU Usage

In [None]:
def visualize_cpu_usage_vs_max_workers(df, layer: str, save: bool = True) -> None:
    
    if layer not in [ENCODE_STRING, DECODE_STRING]:
        raise ValueError("Please supply either 'Encode' or 'Decode' layers.")

    cpu_column = layer + " " + CPU_USAGE_PERCENT_STRING

    df = df[df[MAX_WORKERS_HEADER] <= MAX_WORKERS_COUNT].copy()

    plt.figure(figsize=DEFAULT_FIGSIZE)
    ax = sns.lineplot(
        data=df,
        x=MAX_WORKERS_HEADER,
        y=cpu_column,
        hue=ENCODING_LIBRARY_HEADER,
        marker=MARKER_O,
        errorbar=ERRORBAR_STANDARD_DEVIATION
    )
    ax.set_title(f"CPU Usage (%) vs Max Workers - {layer}")
    ax.set_ylabel(CPU_USAGE_PERCENT_STRING)
    ax.set_xlabel(MAX_WORKERS_HEADER)
    ax.set_xlim(MIN_WORKERS_COUNT - 0.25, MAX_WORKERS_COUNT + 0.25)
    ax.legend(title=LIBRARY_STRING)

    plt.xticks(MAX_WORKERS_RANGE)
    plt.tight_layout()

    if save:
        plt.savefig(f"plots/cpu_usage_vs_max_workers_lazy_{"data_to_video" if layer == ENCODE_STRING else "video_to_data"}.svg", format=SVG_FORMAT, dpi=DEFAULT_DPI)

    plt.show()

In [None]:
# CPU Usage.
visualize_cpu_usage_vs_max_workers(df_strong_scaling_lazy, layer=ENCODE_STRING)
visualize_cpu_usage_vs_max_workers(df_strong_scaling_lazy, layer=DECODE_STRING)

### Memory Usage

In [None]:
def visualize_memory_usage_vs_data_size(df: pd.DataFrame, layer: str, save: bool = True) -> None:
    if layer not in [ENCODE_STRING, DECODE_STRING]:
        raise ValueError("Please supply either 'Encode' or 'Decode' layers.")
    
    memory_column = layer + " " + MEMORY_USAGE_PERCENT
    df = df[df[DATA_SIZE_BYTES_HEADER] != 250000]
    data_sizes = sorted(df[DATA_SIZE_BYTES_HEADER].unique())

    plt.figure(figsize=DEFAULT_FIGSIZE)
    ax = sns.lineplot(
        data=df,
        x=DATA_SIZE_BYTES_HEADER,
        y=memory_column,
        hue=ENCODING_LIBRARY_HEADER,
        marker=MARKER_O,
        errorbar=ERRORBAR_STANDARD_DEVIATION
    )
    ax.set_title(f"Memory Usage vs Data Size - {layer}")
    ax.set_ylabel(MEMORY_USAGE_PERCENT)
    ax.set_xlabel(DATA_SIZE_BYTES_HEADER)
    ax.set_xticks(data_sizes)
    ax.set_xticklabels([bytes_to_display(data_size) for data_size in data_sizes])
    ax.legend(title=LIBRARY_STRING)
    plt.tight_layout()

    if save:
        plt.savefig(f"plots/memory_usage_vs_data_size_eager_{layer.lower()}.svg", format=SVG_FORMAT, dpi=DEFAULT_DPI)

    plt.show()

In [None]:
# Memory Usage
visualize_memory_usage_vs_data_size(df_weak_scaling_eager, layer=ENCODE_STRING)
visualize_memory_usage_vs_data_size(df_weak_scaling_eager, layer=DECODE_STRING)

In [None]:
def visualize_video_size_vs_encoding_library(df: pd.DataFrame, save: bool = True) -> None:

    plt.figure(figsize=DEFAULT_FIGSIZE)
    ax = sns.barplot(
        data=df,
        x=ENCODING_LIBRARY_HEADER,
        y=VIDEO_SIZE_MB_HEADER,
        hue=ENCODING_LIBRARY_HEADER,
        errorbar=ERRORBAR_NONE
    )
    ax.set_title(f"Video Size vs. Encoding Library")
    ax.set_xlabel(ENCODING_LIBRARY_HEADER)
    ax.set_ylabel(VIDEO_SIZE_MB_HEADER)
    plt.tight_layout()

    if save:
        plt.savefig(f"plots/video_size_vs_encoding_library_lazy.svg", format=SVG_FORMAT, dpi=DEFAULT_DPI)

    plt.show()

In [None]:
visualize_video_size_vs_encoding_library(df=df_strong_scaling_lazy)

### Chunks and Data Sizes

In [None]:
def visualize_throughput_vs_chunks_and_data_size(
    df: pd.DataFrame,
    save: bool = True
) -> None:
    
    df["Data Size Label"] = df[DATA_SIZE_BYTES_HEADER].map(bytes_to_display)

    g = sns.FacetGrid(
        df,
        row=ERROR_CORRECTION_LEVEL_HEADER,
        col="Data Size Label",
        height=2.5,
        sharey=True,
        sharex=False,
        hue=ENCODING_LIBRARY_HEADER,
    )
    g.map(
        sns.lineplot,
        CHUNK_SIZE_BYTES_HEADER,
        TOTAL_THROUGHPUT_MB_S_HEADER,
        marker=MARKER_O,
        errorbar=ERRORBAR_STANDARD_DEVIATION,
    )

    g.add_legend(title=LIBRARY_STRING)
    g.set_axis_labels(CHUNK_SIZE_BYTES_HEADER, THROUGHPUT_MB_S_STRING)
    g.set_titles(row_template="ECL={row_name}", col_template="Data Size={col_name}")

    if save:
        plt.savefig("plots/throughput_vs_chunks_and_data_sizes_eager.svg", format=SVG_FORMAT, dpi=DEFAULT_DPI)
    
    plt.show()

In [None]:
# Chunks and Data Sizes.
visualize_throughput_vs_chunks_and_data_size(df=df_all_eager)