In [None]:
import argparse
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
import itertools
import os
import importlib
from IPython.display import Image, display, HTML
import glob
import importlib

# get the path to this script
import inspect
scriptname = inspect.getframeinfo(inspect.currentframe()).filename
scriptpath = os.path.dirname(os.path.abspath(scriptname))
print('script root is',scriptpath)

import sys
print(sys.version)

# make sure path to this utility is in PYTHONPATH
import plotutils as pu

### This will read the output of future_overhead --csv

In [None]:
def read_future_overhead_csv(name):
    print('Reading',name)
    data = pd.read_csv(
        name,
        names=['futures', 'launch', 'sync', 'exec', 'time', 'ftime', 'sched', 'numa', 'threads', 'info'],
        header=None,
        comment='#',
        sep='\s*,\s*', # regex to skip whitespace and use comma separator
        engine='python',
        skipinitialspace=True)

    # Clean data : if less than 7 cols are not NaN, drop the row
    data.dropna(thresh=7, inplace=True)
    #title_print('File ' + name, data)
    return data

### Read a whole directory of files given hostname, root dir, prefix etc

In [None]:
def load_future_overhead_files(filenames, hostname, info):
    alldata = pd.DataFrame()
    for filename in filenames:
        data = read_future_overhead_csv(filename)
        # fill empty info NaN info columns with ""
        data.fillna('', inplace=True)
        if ('info' not in data) or (info!=''):
            print('Setting info to', info)
            data['info'] = info
        data['date']     = date_str
        data['hostname'] = hostname
        alldata = pd.concat([alldata, data], sort=False)
    return alldata

def load_future_overhead_files_in_dir(data_root, date_str, prefix='', hostname='none', info=''):
    alldata = pd.DataFrame()
    dirname = os.path.join(data_root, date_str)
    if prefix != '':
        filenames = glob.glob(os.path.join(dirname, prefix + '*.txt'))
        data = load_future_overhead_files(filenames, hostname, info)
        alldata = pd.concat([alldata, data], sort=False)
    else:
        for m in ['ault', 'daint', 'tave', 'jb-s76', 'pop-os']:
            filenames = glob.glob(os.path.join(dirname, m + '-*.txt'))
            print('Calling load with hostname', m)
            data = load_future_overhead_files(filenames, m, info)
            alldata = pd.concat([alldata, data], sort=False)
    pu.add_to_global_data(alldata, os.path.join(dirname, prefix))
    return alldata

### Force Reload plotutils (if you are editing it to add some new feature)

In [None]:
importlib.reload(pu);

### Use this to delete all loaded data if necessary

In [None]:
def reset_data():
    pu.global_dataframe = pd.DataFrame(
        columns=['futures', 'launch', 'sync', 'exec', 'time', 'ftime', 'sched', 'numa', 'threads', 'info', 'date', 'hostname'])
    pu.global_dataframe.index.name = 'index'
    pu.global_datadict = {}

reset_data()

### Useful info about known machines

In [None]:
machine_desc = {'ault'  :'Ault \n (AMD EPYC 7501@2GHz) \n 32x2-Cores (128 pu) \n 8 numa',
                'daint' :'Daint \n (Xeon E5-2695v4@2.10GHz) \n 18*2 Cores (72 pu) \n 2 numa',
                'tave'  :'Tave \n KNL-Xeon-Phi 7230@1.30GHz \n 64 Cores (256 pu) \n 1 numa',
                'jb-s76':'Laptop \n 4 Cores (8 pu) \n 1 numa',
                'pop-os':'Laptop \n i7-6700HQ@2.60GHz \n 4 Cores (8 pu) \n 1 numa'
                }

## Each directory contains a set of files per machine etc
### display some plots, from two directories containing data for daint on 2 different days

In [None]:
date_str = '2020-08-21'
data_root = scriptpath # '/home/biddisco/benchmarking-results/future-overheads/'
data = load_future_overhead_files_in_dir(data_root, date_str)
pu.title_print('data for ' + date_str, data)

# Plot all schedulers, using an empty row field gives a tiled display
columns, rows = ['sched'], []
select        = { 'numa':0, 'date':date_str, 'hostname':'daint'}
plotvars      = {'x':'threads', 'y':'ftime', 'series':['launch','sync']}
xparams       = pu.axis(label='Threads',  scale='linear', base=10)
yparams       = pu.axis(label='Overhead $\mu$s', format=lambda v,pos: '{0:.2f}'.format(v), limits=(0,1.5), scale='linear')
cparams       = pu.rowcol(format=
                    lambda t,v: str(t) + '\n' + (str(v) if str(v)!='1' else date_str[:10]))
rparams       = pu.rowcol(format=lambda t,v: machine_desc[v])
fig = pu.plot_graph_series(data, rows, columns, select, plotvars, xparams, yparams, rparams=rparams, cparams=cparams, size=(6,6))

In [None]:
date_str = '2020-08-25'
data_root = scriptpath # '/home/biddisco/benchmarking-results/future-overheads/'
data = load_future_overhead_files_in_dir(data_root, date_str)
pu.title_print('', data)

columns, rows = ['sched'], []
select        = { 'numa':0, 'date':date_str, 'hostname':'daint'}
plotvars      = {'x':'threads', 'y':'ftime', 'series':['launch','sync']}
xparams       = pu.axis(label='Threads',  scale='linear', base=10)
yparams       = pu.axis(label='Overhead $\mu$s', format=lambda v,pos: '{0:.2f}'.format(v), limits=(0,1.5), scale='linear')
cparams       = pu.rowcol(format=
                    lambda t,v: str(t) + '\n' + (str(v) if str(v)!='1' else date_str[:10]))
rparams       = pu.rowcol(format=lambda t,v: machine_desc[v])
fig = pu.plot_graph_series(data, rows, columns, select, plotvars, xparams, yparams, rparams=rparams, cparams=cparams, size=(8,6))

In [None]:
# plot comparison of some data across dates
rows, columns = ['sched'], ['launch']
select   = {'numa':0, 'sync':['limiting-Exec','latch'], 'hostname':['pop-os','daint']}
plotvars = {'x':'threads', 'y':'ftime', 'series':['sync','date']}
xparams       = pu.axis(label='Threads',  scale='linear', base=10)
yparams       = pu.axis(label='Overhead $\mu$s', format=lambda v,pos: '{0:.2f}'.format(v), limits=(0,1.5), scale='linear')
fig = pu.plot_graph_series(pu.global_dataframe, rows, columns, select, plotvars, xparams, yparams, size=(8,4))