In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

from tad4bj import DataStorage

EXPERIMENT_SIZE_FIELDS = [
    "number_of_fragments", 
    "points_per_fragment", 
]

PLATFORM_FIELDS = [
    "nodes",
    "cpus_per_node",
    "backends_per_node",
]

In [None]:
def smart_mean(row):
    it_times = row["execution_time"]
    
    if it_times:
        count = len(it_times)
        if count > 0:
            mean_time = np.mean(it_times)
            row["min_time"] = np.min(it_times)
            row["mean_time"] = mean_time
            row["std_time"] = np.std(it_times)
            row["count"] = len(it_times)
            
        row["overhead_relative"] = 100.0 * row["split_overhead_time"] / mean_time
            
    if not row['dataclay']:
        row['mode'] = "COMPSs"
    elif row['use_split']:
        row['mode'] = "dC+split"
    else:
        row['mode'] = "dC"

    return row

db = DataStorage("csvm-split")
df = db.to_dataframe().apply(smart_mean, axis=1)
#df = db.to_dataframe().apply(smart_mean, axis=1).query("(start_ts > '2021-02-18 20') and (start_ts < '2021-02-20')")

In [None]:
byproduct = df.groupby(EXPERIMENT_SIZE_FIELDS + ['mode'] + PLATFORM_FIELDS).agg({"overhead_relative": ["mean"], "split_overhead_time": ["mean"], "mean_time": ["mean"], "min_time": ["min"], "std_time": ["mean"]})
# The "condensed" dataframe, containing the rellevant data properly condensed
cdf = byproduct.reset_index()
# Flatten columns
cdf.columns = [col[0] for col in cdf.columns.values]
cdf

In [None]:
sns.barplot(data=cdf, x="number_of_fragments", hue="mode", y="mean_time")

In [None]:
fig, axs = plt.subplots(ncols=2)

sns.barplot(data=cdf, x="number_of_fragments", hue="mode", y="split_overhead_time", ax=axs[0])
sns.barplot(data=cdf, x="number_of_fragments", hue="mode", y="overhead_relative", ax=axs[1], saturation=1 )
axs[1].set(ylim=(0, 100))
axs[0].legend().remove()

In [None]:
sns.barplot(data=cdf.query("number_of_fragments < 1000"), x="number_of_fragments", hue="mode", y="mean_time")

In [None]:
sns.barplot(data=cdf, x="number_of_fragments", hue="mode", y="min_time")

In [None]:
sns.barplot(data=cdf.query("number_of_fragments < 1000"), x="number_of_fragments", hue="mode", y="min_time")