In [None]:
import os
import re
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import HTML, Markdown


result = 'latest'
#result = '2020_01_30_12_40_30'

base_dir = f'salomon/outputs/{result}/'
#base_dir = f'run/outputs/{result}/'

base_dir = os.getenv('B_RESULT_DIR', base_dir)
data = pd.read_csv(os.path.join(base_dir, "stats.csv"), names=['stat', 'rank', 'size', 'round', 'time_ms'])
#data = data[data['round'] != 0]
data

In [None]:
display(HTML('<h1>' + os.path.basename(os.path.realpath(base_dir)) + '</h1>'))
with open(os.path.join(base_dir, 'info')) as f:
    display(HTML(f'<pre>{f.read()}</pre>'))
try:
    with open(os.path.join(base_dir, 'env')) as f:
        display(HTML(f'<pre>{f.read()}</pre>'))
except FileNotFoundError:
    pass

## stdout

In [None]:
def format_md(s):
    s = re.sub(r'^###(.+)', lambda r: f'</pre><h3>{r.group(1)}</h3><pre>', s, flags=re.MULTILINE)
    s = re.sub(r'^##(.+)', lambda r: f'</pre><h2>{r.group(1)}</h2><pre>', s, flags=re.MULTILINE)
    return f"<pre>{s}</pre>"
    

with open(os.path.join(base_dir, 'out')) as f:
    display(Markdown(format_md(f.read())))

## Result md5sum

In [None]:
try:
    with open(os.path.join(base_dir, 'checksums')) as f:
        display(HTML(f'<pre>{f.read()}</pre>'))
except FileNotFoundError:
    pass

## Result images

In [None]:
from IPython.display import Image
for path in sorted(glob.glob(os.path.join(base_dir, "**/*.thumbnail*.png"), recursive=True)):
    display(HTML(f"<h3>{os.path.basename(os.path.dirname(path))} nodes</h3>"))
    display(Image(filename=path))

## Execution stats

In [None]:
def stats(stat):    
    display(HTML(f"<h1>{stat}</h1>"))
    overview = data[data['stat'] == stat].pivot_table(index=['size', 'round'], columns='rank', values='time_ms')
    display(overview)
    
    mean_per_node = data[data['stat'] == stat].pivot_table(index=['size'], columns='rank', values='time_ms')
    display(mean_per_node)
    g = mean_per_node.T.mean().plot.bar(title=f'{stat} execution mean time')
    g.set_ylabel('time [ms]')
    g.set_xlabel('total nodes')
    plt.show()
    
    global_mean = mean_per_node.T.quantile(.5)
    if 1 in global_mean:
        global_mean = global_mean[1] / global_mean
        g = plt.plot(global_mean, linestyle='none', marker='o')
        plt.title(f'{stat} speedup')
        plt.ylabel('speedup')
        plt.xlabel('total nodes')
        x = np.linspace(1, max(global_mean.index))
        plt.plot(x, x)
        plt.show()
#stats('convolution')

In [None]:
for stat in data['stat'].unique():
    stats(stat)

## Execution time without IO

In [None]:
convolution = data[data['stat'] == 'convolution'].pivot_table(index=['size'], columns='rank', values='time_ms')
gather = data[data['stat'] == 'gather'].pivot_table(index=['size'], columns='rank', values='time_ms')
total = (convolution + gather).T.mean()

speedup = total[1] / total
display(pd.DataFrame({'time_ms': total, 'speedup': speedup}))
plt.plot(speedup, linestyle='none', marker='o')
plt.title(f'speedup without IO')
plt.ylabel('speedup')
plt.xlabel('total nodes')
x = np.linspace(1, max(speedup.index))
plt.plot(x, x)
plt.show()