# Task Assignment Algorithm Comparison

In [None]:
%matplotlib widget
from eval_benchmark import get_data, de_duplicate_names, parse_benchmarks, plot_n_partitions_with_property, plot_bar_with_property, get_geometric_means
import matplotlib.pyplot as plt
import numpy as np

# benchs = get_data("tpch_queries")
benchs = get_data("growing_queries")
names = [b["assignmentAlgorithm"] for b in benchs]
names = de_duplicate_names(names)
total_ex_times = [b["totalExecutionTime"] for b in benchs]
total_sched_times = [b["totalSchedulingTime"] for b in benchs]
data = [parse_benchmarks(b) for b in benchs]
geometric_means = get_geometric_means(data)
print(f"Data from {len(data[0])} assignments")
for n, t, g, s in zip(names, total_ex_times, geometric_means, total_sched_times):
    print(f"{n + ':':<17}{t:>8.2f}{g:>8.3f}{s:>8.2f}")


In [None]:
fig, ax = plt.subplots()
ax.bar(names, geometric_means)
ax.tick_params(axis="x", rotation=30)

In [None]:
fig, ax = plt.subplots()
for x, y, name in zip(total_sched_times, geometric_means, names):
    ax.scatter(x, y, label=name, s=100)
    ax.annotate(name, (x, y), textcoords="offset points", xytext=(0, 5), ha="center", fontsize=14)
ax.set_yscale("log")
ax.set_xscale("log")
ax.set_xlabel("Scheduling Optimization Time")
ax.set_ylabel("Simulated Execution Time")


## Shared Nothing Scenario

In [None]:
fig, ax = plt.subplots()
ax.tick_params(axis="x", rotation=30)
ax.set_yscale("log")
plot_bar_with_property(names, data, lambda b: b.is_shared_nothing(), "Execution Time For Shared Nothing Problems", ax)



## Comparison Over Partition Counts

In [None]:
for allowed_homogeneous_values in [[True], [False]]:
    fig, ax = plt.subplots()
    plot_n_partitions_with_property(names, data, lambda b: b.homogeneous in allowed_homogeneous_values,
                                    f"Execution Time over partitions with all caches (Homogeneous: {allowed_homogeneous_values})", ax)
    for caching_name in ["All", "Half", "HalfRand", "One", "OneRand", "None", "Copy", "Rand1", "Rand2", "Rand4"]:
        fig, ax = plt.subplots()
        plot_n_partitions_with_property(names, data, lambda b: b.caching_name == caching_name and b.homogeneous in allowed_homogeneous_values,
                                    f"Execution Time over partitions with {caching_name} caches (Homogeneous: {allowed_homogeneous_values})", ax)

## Comparison Between Homogeneous and Heterogeneous clusters

In [None]:
fig, ax = plt.subplots()
plot_bar_with_property(names, data, lambda b: b.homogeneous, "Execution Time For Homogeneous Clusters", ax=ax)
ax.tick_params(axis="x", rotation=30)
fig, ax = plt.subplots()
plot_bar_with_property(names, data, lambda b: not b.homogeneous, "Execution Time For Heterogeneous Clusters", ax=ax)
ax.tick_params(axis="x", rotation=30)

In [None]:
## Comparison for non empty caches

In [None]:
fig, ax = plt.subplots()
plot_bar_with_property(names, data, lambda b: b.caching_name != "None" and b.homogeneous,
                       "Execution Time For Homogeneous And Non-Empty Caches", ax=ax)
ax.tick_params(axis="x", rotation=30)
fig, ax = plt.subplots()
plot_bar_with_property(names, data, lambda b: b.caching_name != "None" and not b.homogeneous,
                       "Execution Time For Heterogeneous And Non-Empty Caches", ax=ax)
ax.tick_params(axis="x", rotation=30)

In [None]:
## Comparison for empty caches
fig, ax = plt.subplots()
plot_bar_with_property(names, data, lambda b: b.caching_name == "None" and b.homogeneous,
                       "Execution Time For Homogeneous And Empty Caches", ax=ax)
ax.tick_params(axis="x", rotation=30)
fig, ax = plt.subplots()
plot_bar_with_property(names, data, lambda b: b.caching_name == "None" and not b.homogeneous,
                       "Execution Time For Heterogeneous And Empty Caches", ax=ax)
ax.tick_params(axis="x", rotation=30)

In [None]:
## Comparison for scale-up scenario 2x nodes
interesting_algorithms = ["Component", "NetHEFT", "Greedy1", "Greedy2", "Greedy3", "Greedy4"]

fig, ax = plt.subplots()
plot_bar_with_property(names, data, lambda b: b.caching_name == "HalfRand" and b.homogeneous and b.algorithm in interesting_algorithms,
                       "Execution Time For Homogeneous 2x Scale-Up", ax=ax)
ax.tick_params(axis="x", rotation=30)
fig, ax = plt.subplots()
plot_bar_with_property(names, data, lambda b: b.caching_name == "HalfRand" and not b.homogeneous and b.algorithm in interesting_algorithms,
                       "Execution Time For Heterogeneous 2x Scale-Up", ax=ax)
ax.tick_params(axis="x", rotation=30)
## Comparison for scale-up scenario 1-N nodes
fig, ax = plt.subplots()
plot_bar_with_property(names, data, lambda b: b.caching_name == "One" and b.homogeneous and b.algorithm in interesting_algorithms,
                       "Execution Time For Homogeneous 1-N Scale-Up", ax=ax)
ax.tick_params(axis="x", rotation=30)
fig, ax = plt.subplots()
plot_bar_with_property(names, data, lambda b: b.caching_name == "One" and not b.homogeneous and b.algorithm in interesting_algorithms,
                       "Execution Time For Heterogeneous 1-N Scale-Up", ax=ax)
ax.tick_params(axis="x", rotation=30)

In [None]:
for allowed_homogeneous_values in [[True], [False]]:
    fig, ax = plt.subplots()
    plot_bar_with_property(names, data, lambda b: b.homogeneous in allowed_homogeneous_values,
                                    f"Execution Time with all caches (Homogeneous: {allowed_homogeneous_values})", ax)
    for caching_name in ["All", "Half", "HalfRand", "One", "OneRand", "None", "Copy", "Rand1", "Rand2", "Rand4"]:
        fig, ax = plt.subplots()
        plot_bar_with_property(names, data, lambda b: b.caching_name == caching_name and b.homogeneous in allowed_homogeneous_values,
                                    f"Execution Time with {caching_name} caches (Homogeneous: {allowed_homogeneous_values})", ax)