# HPC performance visualization

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
plt.rcParams.update({'font.size': 16})

## Read and format files
n = [16, 32, 64, 128, 256, 512, 1024, 2048]

In [None]:
cd data

In [None]:
#blocked version
blk = pd.read_csv('blk.csv', header=None, delim_whitespace=True)
blk.drop(columns=[2,3,4], inplace=True)
blk = blk.rename({0: "size", 1: "Mflop/s"}, axis=1)

#unoptimized version
unopt = pd.read_csv('unopti.csv', header=None, delim_whitespace=True)
unopt.drop(columns=[2,3], inplace=True)
unopt = unopt.rename({0: "size", 1: "Mflop/s"}, axis=1)

#permutations
per = pd.read_csv('per.csv', header=None, delim_whitespace=True)
per.drop(columns=[2], inplace=True)
per = per.rename({0: "size", 1: "Mflop/s", 3: 'per'}, axis=1)

# extract individual permutations from dataframe
mnk = per.loc[per['per'] == 'matmult_mnk']
mkn = per.loc[per['per'] == 'matmult_mkn']
nmk = per.loc[per['per'] == 'matmult_nmk']
nkm = per.loc[per['per'] == 'matmult_nkm']
kmn = per.loc[per['per'] == 'matmult_kmn']
knm = per.loc[per['per'] == 'matmult_knm']

#CBLAS
lib = pd.read_csv('cblas.csv', header=None, delim_whitespace=True)
lib.drop(columns=[2], inplace=True)
lib = lib.rename({0: "size", 1: "Mflop/s", 3: 'lib'}, axis=1)


#varying blocksizes
blksize = pd.read_csv('blksize.csv', header=None, delim_whitespace=True)
blksize.drop(columns=[0,2,3], inplace=True)
blksize = blksize.rename({1: "Mflop/s", 4: "Blocksize"}, axis=1)

#hardware stats
hw = pd.read_csv('res.csv', delim_whitespace=True)
cache = hw.drop(columns=['CPUsec.','CPUsec..1'])

In [None]:
cd ..

## Plot results

In [None]:
f, ax = plt.subplots(figsize=(15,9))

#ax.semilogx(lib['size'], lib['Mflop/s'], marker='*', label='lib');
ax.semilogx(mnk['size'], mnk['Mflop/s'], marker='*', label='mnk');
ax.semilogx(mkn['size'], mkn['Mflop/s'], marker='*', label='mkn');
ax.semilogx(nmk['size'], nmk['Mflop/s'], marker='*', label='nmk');
ax.semilogx(nkm['size'], nkm['Mflop/s'], marker='*', label='nkm');
ax.semilogx(kmn['size'], kmn['Mflop/s'], marker='*', label='kmn');
ax.semilogx(knm['size'], knm['Mflop/s'], marker='*', label='knm');
ax.semilogx(blk['size'], blk['Mflop/s'], marker='*', label='blk (bs=32)');
ax.set(xlabel='Memory occupied by matrices [kB]', ylabel='Mflop/s', title='Permutation comparison',
      ylim=(0,7500));

#ax.axhline(max_mflops)
ax.axvline(32, linewidth=0.5, color='k', label='L1 cache')
ax.axvline(256, linewidth=0.5, color='b', label='L2 cache')
ax.axvline(30000, linewidth=0.5, color='r', label='L3 cache')
ax.legend(loc='upper left', bbox_to_anchor=(0.0, 1),
          ncol=3, fancybox=True, shadow=True);

# uncomment to save figure:
#f.savefig('per_comparison.png', dpi=300)

In [None]:
# max clock freq. 2.9 GHz
clockmax = 2900 #MHz
# instructions per cycle
instr_p_cyc = 16
max_mflops = clockmax * instr_p_cyc
max_mflops # theoretically

In [None]:
# unoptimized (native) mkn vs  optimized mkn vs CBLAS
f, ax = plt.subplots(figsize=(15,8))

ax.semilogx(lib['size'], lib['Mflop/s'], marker='*', label='CBLAS');
ax.semilogx(mkn['size'], mkn['Mflop/s'], marker='*', label='mkn');
ax.semilogx(unopt['size'], unopt['Mflop/s'], marker='*', label='mkn unopt.');
ax.set(xlabel='Memory occupied by matrices [kB]', ylabel='Mflop/s', title='CBLAS comparison');

#ax.axhline(max_mflops, linestyle='--', label='Theoretical max', linewidth=0.8)
ax.axvline(32, linewidth=0.5, color='k', label='L1 cache')
ax.axvline(256, linewidth=0.5, color='b', label='L2 cache')
ax.axvline(30000, linewidth=0.5, color='r', label='L3 cache')
ax.legend(loc='upper left', bbox_to_anchor=(0.0, 1),
          ncol=1, fancybox=True, shadow=True);

# uncomment to save figure:
#f.savefig('native_vs_cblas.png', dpi=300)

### Hardware stats

In [None]:
cache

In [None]:
ax = cache.plot.bar(x='Versions', rot=0, color=['deepskyblue','orangered', 'steelblue', 'firebrick'],
               title='CPU cache access comparison', figsize=(10,7), width=0.7, zorder=3);
ax.yaxis.grid(True, color='#BBBBBB')

# uncomment to save figure:
#plt.savefig('Cache_access.png', dpi=300)

In [None]:
f, ax = plt.subplots(figsize=(15,8))
ax.semilogx(blksize['Blocksize'], blksize['Mflop/s'], marker='*', linewidth=0.7);
ax.set(xlabel='Blocksize', ylabel='Mflop/s', title='Performance with different blocksizes');

ax.axvline(32, linewidth=0.5, color='k', label='L1 cache');
ax.axvline(256, linewidth=0.5, color='b', label='L2 cache');

ax.legend();

In [None]:
blksize