In [None]:
# Boilerplate that all notebooks reuse:
from analysis_common import *

%matplotlib inline

# Kernel analysis

In [None]:
df = read_ods("./results.ods", "kmeans-kernel")
expand_modes(df)

order = ['DRAM', 'AD', 'MM (hot)', 'MM (cold)']

sns.barplot(x='MODE', y='TIMING',
            data=df[(df.POINTS_PER_FRAGMENT == 4000)],
            capsize=0.1,
            order=order,
            palette=custom_kernel_palette(4))
plt.title("4k points per fragment (small object)")
plt.ylabel("kernel exec time (s)")
plt.show()

sns.barplot(x='MODE', y='TIMING',
            data=df[(df.POINTS_PER_FRAGMENT == 200000)],
            capsize=0.1,
            order=order,
            palette=custom_kernel_palette(4))
plt.title("200k points per fragment (big object)")
plt.ylabel("kernel exec time (s)")
plt.show()


In [None]:
df.loc[(df.POINTS_PER_FRAGMENT == 4000), "NORMALIZED"] = df.TIMING 
df.loc[(df.POINTS_PER_FRAGMENT == 200000), "NORMALIZED"] = df.TIMING / 50

ax = sns.barplot(x='MODE', y='NORMALIZED',
                 data=df,
                 capsize=0.1,
                 order=order,
                 hue="POINTS_PER_FRAGMENT",
                 palette="Set2")

kernel_plot_tweaks(ax, 50, legend_title="Points per fragment", rotate=False)

plt.savefig("kmeans-kernel.pdf", bbox_inches='tight')
plt.show()


In [None]:
kernel_times = df.groupby(["POINTS_PER_FRAGMENT", "MODE"]).min()
kernel_times

# _k_-means results analysis

In [None]:
df = read_ods("./results.ods", "kmeans-app")
expand_modes(df)

In [None]:
for ppf in [200000, 4000]:
    df.loc[(df.POINTS_PER_FRAGMENT == ppf) & (df.MODE == "DRAM"), "ATOM_KERNEL"] =  \
        kernel_times.loc[(ppf, "DRAM"), "TIMING"]
    df.loc[(df.POINTS_PER_FRAGMENT == ppf) & (df.MODE == "AD"), "ATOM_KERNEL"] =  \
        kernel_times.loc[(ppf, "AD"), "TIMING"]
    df.loc[(df.POINTS_PER_FRAGMENT == ppf) & (df.MODE == "DAOS"), "ATOM_KERNEL"] =  \
        kernel_times.loc[(ppf, "DRAM"), "TIMING"]

df.loc[(df.POINTS_PER_FRAGMENT == 4000)
       & (df.NUMBER_OF_FRAGMENTS == 1600)
       & (df.MODE == "MM"),
       "ATOM_KERNEL"] = kernel_times.loc[(4000, "MM (hot)"), "TIMING"]
df.loc[(df.POINTS_PER_FRAGMENT == 200000) 
       & (df.NUMBER_OF_FRAGMENTS == 32) 
       & (df.MODE == "MM"),
       "ATOM_KERNEL"] = kernel_times.loc[(200000, "MM (hot)"), "TIMING"]
df.loc[(df.POINTS_PER_FRAGMENT == 4000) 
       & (df.NUMBER_OF_FRAGMENTS == 12800) 
       & (df.MODE == "MM"),
       "ATOM_KERNEL"] = kernel_times.loc[(4000, "MM (cold)"), "TIMING"]
df.loc[(df.POINTS_PER_FRAGMENT == 200000) 
       & (df.NUMBER_OF_FRAGMENTS == 256) 
       & (df.MODE == "MM"), 
       "ATOM_KERNEL"] = kernel_times.loc[(200000, "MM (cold)"), "TIMING"]

df["KERNEL_TIME"] = df["NUMBER_OF_FRAGMENTS"] * df["KMEANS_ITERATIONS"] * df["ATOM_KERNEL"]

# Sanity check
null_values = df[df.isnull().values]
if len(null_values) > 0:
    null_values # show that

In [None]:
sns.set(style="whitegrid")

small = (
    ((df.POINTS_PER_FRAGMENT == 200000) & (df.NUMBER_OF_FRAGMENTS == 32)) |
    ((df.POINTS_PER_FRAGMENT == 4000) & (df.NUMBER_OF_FRAGMENTS == 1600))
)

big = (
    ((df.POINTS_PER_FRAGMENT == 200000) & (df.NUMBER_OF_FRAGMENTS == 256)) |
    ((df.POINTS_PER_FRAGMENT == 4000) & (df.NUMBER_OF_FRAGMENTS == 12800))
)

order = ["DRAM", "AD", "MM", "DAOS"]

ax = sns.barplot(x='MODE', y="TIMING",
                 data=df[small],
                 capsize=0.1,
                 order=order,
                 palette="Set1",
                 hue=df.POINTS_PER_FRAGMENT)

bottom = sns.barplot(x='MODE', y="KERNEL_TIME",
            data=df[small],
            capsize=0,
            errwidth=0,
            order=order,
            palette="Set2",
            hue=df.POINTS_PER_FRAGMENT)

xlabel_tweaks(ax, 3, 1)
legend_tweaks(bottom, ["small objects", "big objects", "kernel comp."], placement='upper left')
plt.ylabel("execution time (s)")
plt.title("Small dataset")
save_tweaks('kmeans-small.pdf')
plt.show()

ax = sns.barplot(x='MODE', y="TIMING",
                 data=df[big],
                 capsize=0.1,
                 order=order,
                 palette="Set1",
                 hue=df.POINTS_PER_FRAGMENT)

annotate_dram(ax)

bottom = sns.barplot(x='MODE', y="KERNEL_TIME",
            data=df[big],
            capsize=0,
            errwidth=0,
            order=order,
            color="blue",
            palette="Set2",
            hue=df.POINTS_PER_FRAGMENT)

xlabel_tweaks(ax, 3, 1)
legend_tweaks(bottom, ["small objects", "big objects", "kernel comp."], placement='upper left')
plt.ylabel("execution time (s)")
plt.title("Big dataset")
save_tweaks('kmeans-big.pdf')
plt.show()