In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

from tad4bj import DataStorage

EXPERIMENT_SIZE_FIELDS = [
    "n_blocks_fit",
    "n_blocks_nn",
    "points_per_block",
]

PLATFORM_FIELDS = [
    "nodes",
    "cpus_per_node",
    "backends_per_node",
]

In [None]:
def smart_mean(row):
    it_times = row["kneighbors_time"]
    
    if it_times:
        count = len(it_times)
        if count > 0:
            row["min_time"] = np.min(it_times)
            row["mean_time"] = np.mean(it_times)
            row["std_time"] = np.std(it_times)
            row["count"] = len(it_times)

    if not row['dataclay']:
        row['mode'] = "COMPSs"
    elif row['use_split']:
        row['mode'] = "dC+split"
    else:
        row['mode'] = "dC"

    return row

db = DataStorage("nn-split")
df = db.to_dataframe().apply(smart_mean, axis=1).query("start_ts > '2022-06-01'")

In [None]:
edf = df.explode("kneighbors_time").sort_values('mode')
edf['kneighbors_time'] = edf['kneighbors_time'].astype(float)

In [None]:
data = edf.query('(nodes - 1) * 24 == n_blocks_fit')
sns.barplot(data=data, x="nodes", hue="mode", y="kneighbors_time")
plt.title("24 blocks per node")
plt.show()

data = edf.query('(nodes - 1) * 48 == n_blocks_fit')
sns.barplot(data=data, x="nodes", hue="mode", y="kneighbors_time")
plt.title("48 blocks per node")
plt.show()
