In [None]:
# Boilerplate that all notebooks reuse:
from analysis_common import *

%matplotlib inline

# Kernel analysis

In [None]:
df = read_ods("./results.ods", "kmeans-kernel")
expand_modes(df)

order = ['DRAM', 'AD', 'MM (hot)', 'MM (cold)']
hue_order = [200000, 4000]

sns.barplot(x='MODE', y='TIMING',
            data=df[(df.POINTS_PER_FRAGMENT == 4000)],
            capsize=0.1,
            order=order,
            palette=custom_kernel_palette(4))
plt.title("4k points per fragment (small object)")
plt.ylabel("kernel exec time (s)")
plt.show()

sns.barplot(x='MODE', y='TIMING',
            data=df[(df.POINTS_PER_FRAGMENT == 200000)],
            capsize=0.1,
            order=order,
            palette=custom_kernel_palette(4))
plt.title("200k points per fragment (big object)")
plt.ylabel("kernel exec time (s)")
plt.show()


In [None]:
df.loc[(df.POINTS_PER_FRAGMENT == 4000), "NORMALIZED"] = df.TIMING 
df.loc[(df.POINTS_PER_FRAGMENT == 200000), "NORMALIZED"] = df.TIMING / 50


ax = sns.barplot(y='MODE', x='NORMALIZED',
                 data=df,
                 capsize=0.1,
                 ci=None,
                 order=order,
                 hue_order=hue_order,
                 hue="POINTS_PER_FRAGMENT",
                 palette="muted")

# Get confidence intervals
import numpy as np
def tweak_confidences(df, ax, value, order, big_filter, small_filter, big_tweaks, small_tweaks, color):
    ci_big = []
    ci_small = []
    for m in order:
        try:
            ci_big.append(sns.utils.ci(df[big_filter(m)][value]))
        except IndexError:
            ci_big.append([0, 0])
        try:
            ci_small.append(sns.utils.ci(df[small_filter(m)][value]))
        except IndexError:
            ci_small.append([0, 0])
    for t in big_tweaks:
        ci_big[order.index(t[0])][t[1]]=t[2]
    for t in small_tweaks:
        ci_small[order.index(t[0])][t[1]]=t[2]
    cis = np.array(ci_big+ci_small)
    x= cis.mean(axis=1)
    xerr= cis[:, 0]-x
    y = [p.get_y()+p.get_height()/2 for p in ax.patches]
    plt.errorbar(y=y[:len(x)],x=x, xerr=xerr.T, fmt='none', c=color, capsize=4)
    
tweak_confidences(df, ax, 'NORMALIZED', order, color='k',
                  big_filter=lambda m: (df['MODE']==m)&(df['POINTS_PER_FRAGMENT']==200000),
                  small_filter=lambda m: (df['MODE']==m)&(df['POINTS_PER_FRAGMENT']==4000),
                  big_tweaks=[['DRAM', 1, 0.0115], ['MM (hot)', 1, 0.0125]],
                  small_tweaks=[['DRAM', 1, 0.0118], ['MM (hot)', 1, 0.012]])
kernel_plot_tweaks(ax, 50, legend_title="Points per fragment")



def compute_sd(elems):
    pass

plt.savefig("kmeans-kernel.pdf", bbox_inches='tight')
plt.show()


In [None]:
kernel_times = df.groupby(["POINTS_PER_FRAGMENT", "MODE"]).min()
kernel_times

# _k_-means results analysis

In [None]:
df = read_ods("./results.ods", "kmeans-app")
expand_modes(df)

In [None]:
for ppf in [200000, 4000]:
    df.loc[(df.POINTS_PER_FRAGMENT == ppf) & (df.MODE == "DRAM"), "ATOM_KERNEL"] =  \
        kernel_times.loc[(ppf, "DRAM"), "TIMING"]
    df.loc[(df.POINTS_PER_FRAGMENT == ppf) & (df.MODE == "AD"), "ATOM_KERNEL"] =  \
        kernel_times.loc[(ppf, "AD"), "TIMING"]
    df.loc[(df.POINTS_PER_FRAGMENT == ppf) & (df.MODE == "AD (pre-copy)"), "ATOM_KERNEL"] =  \
        kernel_times.loc[(ppf, "DRAM"), "TIMING"]
    df.loc[(df.POINTS_PER_FRAGMENT == ppf) & (df.MODE == "DAOS"), "ATOM_KERNEL"] =  \
        kernel_times.loc[(ppf, "DRAM"), "TIMING"]

df.loc[(df.POINTS_PER_FRAGMENT == 4000)
       & (df.NUMBER_OF_FRAGMENTS == 1600)
       & (df.MODE == "MM"),
       "ATOM_KERNEL"] = kernel_times.loc[(4000, "MM (hot)"), "TIMING"]
df.loc[(df.POINTS_PER_FRAGMENT == 200000) 
       & (df.NUMBER_OF_FRAGMENTS == 32) 
       & (df.MODE == "MM"),
       "ATOM_KERNEL"] = kernel_times.loc[(200000, "MM (hot)"), "TIMING"]
df.loc[(df.POINTS_PER_FRAGMENT == 4000) 
       & (df.NUMBER_OF_FRAGMENTS == 12800) 
       & (df.MODE == "MM"),
       "ATOM_KERNEL"] = kernel_times.loc[(4000, "MM (cold)"), "TIMING"]
df.loc[(df.POINTS_PER_FRAGMENT == 200000) 
       & (df.NUMBER_OF_FRAGMENTS == 256) 
       & (df.MODE == "MM"), 
       "ATOM_KERNEL"] = kernel_times.loc[(200000, "MM (cold)"), "TIMING"]

df["KERNEL_TIME"] = df["NUMBER_OF_FRAGMENTS"] * df["KMEANS_ITERATIONS"] * df["ATOM_KERNEL"]

# Sanity check
null_values = df[df.isnull().values]
if len(null_values) > 0:
    null_values # show that

In [None]:
sns.set(style="whitegrid")

small = (
    ((df.POINTS_PER_FRAGMENT == 200000) & (df.NUMBER_OF_FRAGMENTS == 32)) |
    ((df.POINTS_PER_FRAGMENT == 4000) & (df.NUMBER_OF_FRAGMENTS == 1600))
)

big = (
    ((df.POINTS_PER_FRAGMENT == 200000) & (df.NUMBER_OF_FRAGMENTS == 256)) |
    ((df.POINTS_PER_FRAGMENT == 4000) & (df.NUMBER_OF_FRAGMENTS == 12800))
)

order = ["DRAM", "AD", "AD (pre-copy)", "MM", "DAOS"]

ax = sns.barplot(y='MODE', x="TIMING",
                 data=df[small],
                 capsize=0.1,
                 order=order,
                 hue_order=hue_order,
                 palette="colorblind",
                 hue=df.POINTS_PER_FRAGMENT)

bottom = sns.barplot(y='MODE', x="KERNEL_TIME",
            data=df[small],
            capsize=0,
            errwidth=0,
            order=order,
            hue_order=hue_order,
            palette="pastel",
            hue=df.POINTS_PER_FRAGMENT)


# tweak_confidences(df[small], bottom, 'KERNEL_TIME', order, color='gray',
#                   big_filter=lambda m: (df[small]['MODE']==m)&(df[small]['POINTS_PER_FRAGMENT']==200000),
#                   small_filter=lambda m: (df[small]['MODE']==m)&(df[small]['POINTS_PER_FRAGMENT']==4000),
#                   big_tweaks=[['DRAM', 1, 200], ['MM', 1, 200]],
#                   small_tweaks=[['DRAM', 1, 250], ['MM', 1, 300]])


ylabel_tweaks(ax, [1, 4], ['non-active', 'active'], 0.28, 0.005)
legend_tweaks(bottom, ["big objects", "small objects", "kernel comp."], placement='upper right')
ax.set_xlabel("execution time (s)")
plt.title("Small dataset")
save_tweaks('kmeans-small.pdf', big=True)
plt.show()

ax = sns.barplot(y='MODE', x="TIMING",
                 data=df[big],
                 capsize=0.1,
                 order=order,
                 hue_order=hue_order,
                 palette="colorblind",
                 hue=df.POINTS_PER_FRAGMENT)

annotate_dram(ax)

bottom = sns.barplot(y='MODE', x="KERNEL_TIME",
            data=df[big],
            capsize=0,
            errwidth=0,
            order=order,
            hue_order=hue_order,
            palette="pastel",
            hue=df.POINTS_PER_FRAGMENT)

# tweak_confidences(df[big], bottom, 'KERNEL_TIME', order, color='gray',
#                   big_filter=lambda m: (df[big]['MODE']==m)&(df[big]['POINTS_PER_FRAGMENT']==200000),
#                   small_filter=lambda m: (df[big]['MODE']==m)&(df[big]['POINTS_PER_FRAGMENT']==4000),
#                   big_tweaks=[['MM', 1, 3000]],
#                   small_tweaks=[['MM', 1, 4000]])


ylabel_tweaks(ax, [1, 4], ['non-active', 'active'], 0.28, 0.005)
legend_tweaks(bottom, ["big objects", "small objects", "kernel comp."], placement='upper right')
ax.set_xlabel("execution time (s)")
plt.title("Big dataset")
save_tweaks('kmeans-big.pdf', big=True)
plt.show()

In [None]:
df.groupby(["POINTS_PER_FRAGMENT", "NUMBER_OF_FRAGMENTS", "MODE"]).mean()