# Model Factors

## Generates performance metrics from a set of Paraver traces


__author__ = "Michael Wagner"

__copyright__ = "Copyright 2017, Barcelona Supercomputing Center (BSC)"

__version__ = 0.3.6 +

### Imports

In [None]:
# Global imports
import os
import fnmatch
import time
import numpy
import scipy
import scipy.optimize
from collections import OrderedDict

### Externally defined functions

In [None]:
from helpers import Trace
from helpers import check_installation
from helpers import human_readable
from helpers import run_command
from helpers import save_remove

In [None]:
# PyCOMPSs imports
import pycompss.interactive as ipycompss

### Start the COMPSs runtime

In [None]:
ipycompss.start(graph=True, debug=True, verbose=True)

In [None]:
from pycompss.api.task import task
from pycompss.api.constraint import constraint
from pycompss.api.parameter import *
from pycompss.api.api import compss_wait_on

### Global variables

In [None]:
#Contains all raw data entries with a printable name.
#This is used to generate and print all raw data, so, if an entry is added, it should be added here, too.
raw_data_doc = OrderedDict([('runtime', 'Runtime (us)'), ('runtime_dim', 'Runtime (ideal)'), ('useful_avg', 'Useful duration (average)'), ('useful_max', 'Useful duration (maximum)'), ('useful_tot', 'Useful duration (total)'), ('useful_dim', 'Useful duration (ideal, max)'), ('useful_ins', 'Useful instructions (total)'), ('useful_cyc', 'Useful cycles (total)')])

#Contains all model factor entries with a printable name.
#This is used to generate and print all model factors, so, if an entry is added, it should be added here, too.
mod_factors_doc = OrderedDict([('parallel_eff', 'Parallel efficiency'), ('load_balance', '  Load balance'), ('comm_eff', '  Communication efficiency'), ('serial_eff', '    Serialization efficiency'), ('transfer_eff', '    Transfer efficiency'), ('comp_scale', 'Computation scalability'), ('global_eff', 'Global efficiency'), ('ipc_scale', 'IPC scalability'), ('inst_scale', 'Instruction scalability'), ('freq_scale', 'Frequency scalability'), ('speedup', 'Speedup'), ('ipc', 'Average IPC'), ('freq', 'Average frequency (GHz)')])

### Functions

In [None]:
def read_mod_factors_csv(debug, project):
    """Reads the model factors table from a csv file."""
    global mod_factors_doc
    delimiter = ';'
    file_path = project

    #Read csv to list of lines
    if os.path.isfile(file_path) and file_path[-4:] == '.csv':
        with open(file_path, 'r') as f:
            lines = f.readlines()
        lines = [line.rstrip('\n') for line in lines]
    else:
        raise Exception('==ERROR==', file_path, 'is not a valid csv file.')

    #Get the number of processes of the traces
    processes = lines[0].split(delimiter)
    processes.pop(0)

    #Create artificial trace_list and trace_processes
    trace_list = []
    trace_processes = {}
    for process in processes:
        trace_list.append(process)
        trace_processes[process] = int(process)

    #Create empty mod_factors handle
    mod_factors = create_mod_factors(trace_list)

    #Get mod_factor_doc keys
    mod_factors_keys = list(mod_factors_doc.items())

    #Iterate over the data lines
    for index, line in enumerate(lines[1:len(mod_factors_keys)+1]):
        key = mod_factors_keys[index][0]
        line = line.split(delimiter)
        for index, trace in enumerate(trace_list):
            mod_factors[key][trace] = float(line[index+1])

    if debug:
        print_mod_factors_table(mod_factors, trace_list, trace_processes)

    return mod_factors, trace_list, trace_processes

In [None]:
def amdahl(x, x0, f):
    """#Projection function based on amdahl; 2 degrees of freedom: x0, f"""
    return x0 / (f + (1 - f) * x)

def pipe(x, x0, f):
    """Projection function based on pipeline; 2 degrees of freedom: x0, f"""
    return x0 * x / ((1 - f) + f * (2 * x - 1) )

def linear(x, x0, f):
    """Projection function linear; 2 degrees of freedom: x0, a"""
    return x0 + f * x

In [None]:
def compute_projection(mod_factors, traces, debug, model, limit, bounds, sigma, out, cfgs):
    """Computes the projection from the gathered model factors and returns the
    according dictionary of fitted prediction functions."""
    
    trace_list, trace_processes = get_list_proc(traces)

    if debug:
        print('==DEBUG== Computing projection of model factors.')

    number_traces = len(trace_list)
    x_proc = numpy.zeros(number_traces)
    y_para = numpy.zeros(number_traces)
    y_load = numpy.zeros(number_traces)
    y_comm = numpy.zeros(number_traces)
    y_comp = numpy.zeros(number_traces)
    y_glob = numpy.zeros(number_traces)

    #Convert dictionaries to NumPy arrays
    for index, trace in enumerate(trace_list):
        x_proc[index] = trace_processes[trace]
        y_para[index] = mod_factors['parallel_eff'][trace]
        y_load[index] = mod_factors['load_balance'][trace]
        y_comm[index] = mod_factors['comm_eff'][trace]
        y_comp[index] = mod_factors['comp_scale'][trace]
        y_glob[index] = mod_factors['global_eff'][trace]

    #Select model function
    if model == 'amdahl':
        _model = amdahl
    elif model == 'pipe':
        _model = pipe
    elif model == 'linear':
        _model = linear

    #Set limit for projection
    if limit:
        _limit = str(limit)
    else:
        _limit = '10000'

    #Set boundary for curve fitting parameters: ([x0_min,f_min],[x0_max,f_max])
    #For amdahl and pipe f is in [0,1]
    if bounds:
        _bounds = ([-numpy.inf,0],[numpy.inf,1])
    else:
        _bounds = ([-numpy.inf,-numpy.inf],[numpy.inf,numpy.inf])

    #Set data uncertainty for vector with y-values.
    #Smaller values mean higher priority for these y-values.
    #Values are compared relatively, not absolute.
    if sigma == 'first':
        _sigma = numpy.ones(number_traces)
        _sigma[0] = 0.1
    elif sigma == 'equal':
        _sigma = numpy.ones(number_traces)
    elif sigma == 'decrease':
        _sigma = numpy.linspace(1, 2, number_traces)

    # Execute curve fitting, returns optimal parameters array and covariance matrix
    # Uses a Levenberg-Marquardt algorithm, i.e. damped least-squares, if no
    # bounds are provide; otherwise a Trust Region Reflective algorithm.
    # Please note: Both are not true least squares.
    # They are greedy methoda and simply run into the nearest local minimum.
    # However, this should work fine for this simple 1D optimization.
    # Use try to check for SciPy version.
    try:
        para_opt, para_cov = scipy.optimize.curve_fit(_model, x_proc, y_para, sigma=_sigma, bounds=_bounds)
        load_opt, load_cov = scipy.optimize.curve_fit(_model, x_proc, y_load, sigma=_sigma, bounds=_bounds)
        comm_opt, comm_cov = scipy.optimize.curve_fit(_model, x_proc, y_comm, sigma=_sigma, bounds=_bounds)
        comp_opt, comp_cov = scipy.optimize.curve_fit(_model, x_proc, y_comp, sigma=_sigma, bounds=_bounds)
        glob_opt, glob_cov = scipy.optimize.curve_fit(_model, x_proc, y_glob, sigma=_sigma, bounds=_bounds)
    except TypeError:
        print('==Error== Projection failed! The script requires SciPy 0.17.0 or newer.')
        return

    #Create the fitting functions for gnuplot; 2 degrees of freedom: x0, f
    if _model == amdahl:
        load_fit, comm_fit, comp_fit = fit_amdahl(x_proc, load_opt, comm_opt, comp_opt)
    elif _model == pipe:
        load_fit, comm_fit, comp_fit = fit_pipe(x_proc, load_opt, comm_opt, comp_opt)
    elif _model == linear:
        load_fit, comm_fit, comp_fit = fit_linear(x_proc, load_opt, comm_opt, comp_out)

    #Select whether para and glob are fitted or multiplied according to model
    para_fit = ' '.join(['para( x ) = load( x ) * comm( x ) / 100'])   
    glob_fit = ' '.join(['glob( x ) = para( x ) * comp( x ) / 100'])

    if not os.path.exists(os.path.dirname(out)):
        os.makedirs(os.path.dirname(out))

    outfile_path = out # os.path.join(out, 'modelfactors.gp')
    cfgs_path = cfgs
    points_data = [x_proc, y_para, y_load, y_comm, y_comp, y_glob, number_traces]
    create_gnuplot(_limit, para_fit, load_fit, comm_fit, comp_fit, glob_fit, points_data, cfgs_path, outfile_path)
    # write_projection(x_proc, y_para, y_load, y_comm, y_comp, y_glob, number_traces, outfile_path)  # collapsed with previous
    plot = create_matplotlib(_limit, para_fit, load_fit, comm_fit, comp_fit, glob_fit, points_data, cfgs_path, outfile_path)
    show(plot)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

def show(plot):
    plot.plot()

### Auxiliar functions

In [None]:
from helpers import get_traces_from_args

In [None]:
def create_raw_data(trace_name):
    """Creates 2D dictionary of the raw input data and initializes with zero.
    The raw_data dictionary has the format: [raw data key][trace].
    """
    global raw_data_doc
    raw_data = {}
    for key in raw_data_doc:
        trace_dict = {}
        trace_dict[trace_name] = 0
        raw_data[key] = trace_dict
    return raw_data

In [None]:
def create_mod_factors(trace_name):
    """Creates 2D dictionary of the model factors and initializes with an empty
    string. The mod_factors dictionary has the format: [mod factor key][trace].
    """
    global mod_factors_doc
    mod_factors = {}
    for key in mod_factors_doc:
        trace_dict = {}
        trace_dict[trace_name] = 0.0
        mod_factors[key] = trace_dict
    return mod_factors

In [None]:
def get_list_proc(traces):
    trace_list = traces.keys()
    trace_processes = {}
    for trace_name in trace_list:
        trace_processes[trace_name] = traces[trace_name].get_processes()
    return trace_list, trace_processes

In [None]:
def print_raw_data_table(raw_data, traces):
    """Prints the raw data table in human readable form on stdout."""
    global raw_data_doc
    print('Overview of the collected raw data:')
    
    trace_list, trace_processes = get_list_proc(traces)
    
    longest_name = len(sorted(raw_data_doc.values(), key=len)[-1])

    line = ''.rjust(longest_name)
    for trace in trace_list:
        line += ' | '
        line += str(trace_processes[trace]).rjust(15)
    print(line)

    print(''.ljust(len(line),'='))
    
    for data_key in raw_data_doc:
        line = raw_data_doc[data_key].ljust(longest_name)
        for trace in trace_list:
            line += ' | '
            line += str(raw_data[data_key][trace]).rjust(15)
        print(line)
    print('')

In [None]:
def print_raw_data_table_html(raw_data, traces):
    """Prints the raw data table in html form."""
    global raw_data_doc
    print('Overview of the collected raw data:')
    
    trace_list, trace_processes = get_list_proc(traces)
    
    headers = ['Parameter']
    data = []

    for trace in trace_list:
        headers.append(str(trace_processes[trace]))
    
    for data_key in raw_data_doc:
        line = []
        line.append(raw_data_doc[data_key])
        for trace in trace_list:
            line.append(str(raw_data[data_key][trace]))
        data.append(line)
    
    from IPython.display import HTML, display
    import tabulate
    display(HTML(tabulate.tabulate(data, headers=headers, tablefmt='html', floatfmt=".2f")))

In [None]:
def print_mod_factors_table(mod_factors, traces):
    """Prints the model factors table in human readable form on stdout."""
    global mod_factors_doc
    print('Overview of the computed model factors:')

    longest_name = len(sorted(mod_factors_doc.values(), key=len)[-1])
    
    trace_list, trace_processes = get_list_proc(traces)
    
    line = ''.rjust(longest_name)
    for trace in trace_list:
        line += ' | '
        line += str(trace_processes[trace]).rjust(10)
    print(line)

    print(''.ljust(len(line),'='))

    for mod_key in mod_factors_doc:
        line = mod_factors_doc[mod_key].ljust(longest_name)
        if mod_key in ['speedup','ipc','freq']:
            for trace in trace_list:
                line += ' | '
                try: #except NaN
                    line += ('{0:.2f}'.format(mod_factors[mod_key][trace])).rjust(10)
                except ValueError:
                    line += ('{}'.format(mod_factors[mod_key][trace])).rjust(10)
        else:
            for trace in trace_list:
                line += ' | '
                try: # except NaN
                    line += ('{0:.2f}%'.format(mod_factors[mod_key][trace])).rjust(10)
                except ValueError:
                    line += ('{}'.format(mod_factors[mod_key][trace])).rjust(10)
        print(line)
        # Print empty line to separate values
        if mod_key in ['global_eff','freq_scale']:
            line = ''.ljust(longest_name)
            for trace in trace_list:
                line += ' | '
                line += ''.rjust(10)
            print(line)
    print('')

In [None]:
def print_mod_factors_table_html(mod_factors, traces):
    """Prints the model factors table in html form."""
    global mod_factors_doc
    print('Overview of the computed model factors:')

    trace_list, trace_processes = get_list_proc(traces)
    
    headers = ['Parameter']
    data = []
    
    for trace in trace_list:
        headers.append(str(trace_processes[trace]))

    for mod_key in mod_factors_doc:
        line = []
        line.append(mod_factors_doc[mod_key])
        for trace in trace_list:
            line.append(mod_factors[mod_key][trace])
        data.append(line)

    from IPython.display import HTML, display
    import tabulate
    display(HTML(tabulate.tabulate(data, headers=headers, tablefmt='html', floatfmt=".2f")))

In [None]:
def print_mod_factors_csv(mod_factors, raw_data, traces, file_path):
    """Prints the model factors table in a csv file."""
    global mod_factors_doc
    
    trace_list, trace_processes = get_list_proc(traces)

    delimiter = ';'
    with open(file_path, 'w') as output:
        line = 'Number of processes'
        for trace in trace_list:
            line += delimiter
            line += str(trace_processes[trace])
        output.write(line + '\n')

        for mod_key in mod_factors_doc:
            line = mod_factors_doc[mod_key].replace('  ', '', 2)
            # for trace in trace_list:
            for trace in mod_factors[mod_key]:
                line += delimiter
                try: # except NaN
                    line += '{0:.6f}'.format(mod_factors[mod_key][trace])
                except ValueError:
                    line += '{}'.format(mod_factors[mod_key][trace])
            output.write(line + '\n')

        output.write('#\n')

        for raw_key in raw_data_doc:
            line = '#' + raw_data_doc[raw_key]
            # for trace in trace_list:
            for trace in raw_data[raw_key]:
                line += delimiter
                try: # except NaN
                    line += '{0:.2f}'.format(raw_data[raw_key][trace])
                except ValueError:
                    line += '{}'.format(raw_data[raw_key][trace])
            output.write(line + '\n')

    print('Model factors written to ' + file_path)

In [None]:
def create_ideal_trace(trace, processes, cfg_dir, debug):
    """Runs prv2dim and dimemas with ideal configuration for given trace."""
    trace_dim = trace[:-4] + '.dim'
    trace_sim = trace[:-4] + '.sim.prv'
    cmd = ['prv2dim', trace, trace_dim]
    run_command(cmd, debug)

    if os.path.isfile(trace_dim):
        if debug:
            print('==DEBUG== Created file ' + trace_dim)
    else:
        print('==Error== ' + trace_dim + 'could not be creaeted.')
        return

    #Create Dimemas configuration
    content = []
    with open(os.path.join(cfg_dir, 'dimemas_ideal.cfg')) as f:
        content = f.readlines()

    content = [line.replace('REPLACE_BY_NTASKS', str(processes) ) for line in content]
    content = [line.replace('REPLACE_BY_COLLECTIVES_PATH', os.path.join(cfg_dir, 'dimemas.collectives')) for line in content]

    with open(trace[:-4]+'.dimemas_ideal.cfg', 'w') as f:
        f.writelines(content)

    cmd = ['Dimemas', '-S', '32k', '--dim', trace_dim, '-p', trace_sim, trace[:-4]+'.dimemas_ideal.cfg']
    run_command(cmd, debug)

    os.remove(trace_dim)
    os.remove(trace[:-4]+'.dimemas_ideal.cfg')

    if os.path.isfile(trace_sim):
        if debug:
            print('==DEBUG== Created file ' + trace_sim)
        return trace_sim
    else:
        print('==Error== ' + trace_sim + ' could not be creaeted.')
        return ''


### Tasks

In [None]:
@task(returns=str)
def get_scaling_type(raw_data, traces, scaling, debug):
    #def get_scaling_type(raw_data, trace_list, trace_processes, first_trace_processes, scaling, debug):
    """Guess the scaling type (weak/strong) based on the useful instructions.
    Computes the normalized instruction ratio for all measurements, whereas the
    normalized instruction ratio is (instructions ratio / process ratio) with
    the smallest run as reference. For exact weak scaling the normalized ratio
    should be exactly 1 and for exact strong scaling it should be close to zero
    with an upper bound of 0.5. The eps value defines the threshold to be
    considered weak scaling and should give enough buffer to safely handle
    non-ideal scaling.
    """
    eps = 0.9
    normalized_inst_ratio = 0
    
    trace_list = traces.keys()
    first_trace_processes = None
    trace_processes = []
    for trace_name in trace_list:
        trace_processes = traces[trace_name].get_processes()
        if first_trace_processes is None:
            first_trace_processes = trace_processes

    #Check if there is only one trace.
    if len(trace_list) == 1:
        return 'strong'

    for trace in trace_list:
        inst_ratio = float(raw_data['useful_ins'][trace]) / float(raw_data['useful_ins'][trace_list[0]])
        # proc_ratio = float(trace_processes[trace]) / float(trace_processes[trace_list[0]])
        proc_ratio = float(trace_processes) / float(first_trace_processes)
        normalized_inst_ratio += inst_ratio / proc_ratio

    #Get the average inst increase. Ignore ratio of first trace 1.0)
    normalized_inst_ratio = (normalized_inst_ratio - 1) / (len(trace_list) - 1)

    scaling_computed = ''

    if normalized_inst_ratio > eps:
        scaling_computed = 'weak'
    else:
        scaling_computed = 'strong'

    if scaling == 'auto':
        if debug:
            print('==DEBUG== Detected ' + scaling_computed + ' scaling.')
            print('')
        return scaling_computed

    if scaling == 'weak':
        if scaling_computed == 'strong':
            print('==Warning== Scaling set to weak scaling but detected strong scaling.')
            print('')
        return 'weak'

    if scaling == 'strong':
        if scaling_computed == 'weak':
            print('==Warning== Scaling set to strong scaling but detected weak scaling.')
            print('')
        return 'strong'

    raise Exception('==Error== reached undefined control flow state.')

In [None]:
@task(trace=FILE_IN, timings=FILE_IN, runtime=FILE_IN, cycles=FILE_IN, inst=FILE_IN, dimemas_cfgs=FILE_IN, dimemas_collectives=FILE_IN, returns=dict)
def gather_raw_data(trace, timings, runtime, cycles, inst, dimemas_cfgs, dimemas_collectives, trace_processes, cfgs_path, debug):
    """Gathers all raw data needed to generate the model factors. Return raw
    data in a 2D dictionary <data type><list of values for each trace>"""
    trace_name = os.path.basename(trace)
    raw_data = create_raw_data(trace_name)

    cfgs = {}
    cfgs['root_dir']      = cfgs_path
    cfgs['timings']       = os.path.join(cfgs['root_dir'], 'timings.cfg')
    cfgs['runtime']       = os.path.join(cfgs['root_dir'], 'runtime.cfg')
    cfgs['cycles']        = os.path.join(cfgs['root_dir'], 'cycles.cfg')
    cfgs['instructions']  = os.path.join(cfgs['root_dir'], 'instructions.cfg')

    #Main loop over all traces
    time_tot = time.time()

    line = 'Analyzing ' + os.path.basename(trace)
    line += ' (' + str(trace_processes) + ' processes'
    line += ', ' + human_readable( os.path.getsize( trace ) ) + ')'
    print(line)

    #Create simulated ideal trace with Dimemas
    time_dim = time.time()
    trace_sim = create_ideal_trace(trace, trace_processes, cfgs['root_dir'], debug)
    time_dim = time.time() - time_dim
    if not trace_sim == '':
        print('Successfully created simulated trace with Dimemas in {0:.1f} seconds.'.format(time_dim))
    else:
        print('Failed to create simulated trace with Dimemas.')

    #Run paramedir for the original and simulated trace
    time_pmd = time.time()
    cmd_normal = ['paramedir', trace]
    cmd_normal.extend([cfgs['timings'],      trace[:-4] + '.timings.stats'])
    cmd_normal.extend([cfgs['runtime'],      trace[:-4] + '.runtime.stats'])
    cmd_normal.extend([cfgs['cycles'],       trace[:-4] + '.cycles.stats'])
    cmd_normal.extend([cfgs['instructions'], trace[:-4] + '.instructions.stats'])

    cmd_ideal = ['paramedir', trace_sim]
    cmd_ideal.extend([cfgs['timings'],       trace_sim[:-4] + '.timings.stats'])
    cmd_ideal.extend([cfgs['runtime'],       trace_sim[:-4] + '.runtime.stats'])

    run_command(cmd_normal, debug)
    if not trace_sim == '':
        run_command(cmd_ideal, debug)

    time_pmd = time.time() - time_pmd

    error_timing = 0;
    error_counters = 0;
    error_ideal = 0;

    #Check if all files are created
    if not os.path.exists(trace[:-4] + '.timings.stats') or \
       not os.path.exists(trace[:-4] + '.runtime.stats'):
        print('==ERROR== Failed to compute timing information with paramedir.')
        error_timing = 1

    if not os.path.exists(trace[:-4] + '.cycles.stats') or \
       not os.path.exists(trace[:-4] + '.instructions.stats'):
        print('==ERROR== Failed to compute counter information with paramedir.')
        error_counters = 1

    if not os.path.exists(trace_sim[:-4] + '.timings.stats') or \
       not os.path.exists(trace_sim[:-4] + '.runtime.stats'):
        print('==ERROR== Failed to compute timing information with paramedir.')
        error_ideal = 1
        trace_sim = ''

    if error_timing or error_counters or error_ideal:
        print('Failed to analyze trace with paramedir in {0:.1f} seconds.'.format(time_pmd))
    else:
        print('Successfully analyzed trace with paramedir in {0:.1f} seconds.'.format(time_pmd))


    #Parse the paramedir output files
    time_prs = time.time()

    #Get total, average, and maximum useful duration
    if os.path.exists(trace[:-4] + '.timings.stats'):
        content = []
        with open(trace[:-4] + '.timings.stats') as f:
            content = f.readlines()

        for line in content:
            if line.split():
                if line.split()[0] == 'Total':
                    raw_data['useful_tot'][trace_name] = float(line.split()[1])
                if line.split()[0] == 'Average':
                    raw_data['useful_avg'][trace_name] = float(line.split()[1])
                if line.split()[0] == 'Maximum':
                    raw_data['useful_max'][trace_name] = float(line.split()[1])
    else:
        raw_data['useful_tot'][trace_name] = 'NaN'
        raw_data['useful_avg'][trace_name] = 'NaN'
        raw_data['useful_max'][trace_name] = 'NaN'

    #Get runtime
    if os.path.exists(trace[:-4] + '.runtime.stats'):
        content = []
        with open(trace[:-4] + '.runtime.stats') as f:
            content = f.readlines()

        for line in content:
            if line.split():
                if line.split()[0] == 'Average':
                    raw_data['runtime'][trace_name] = float(line.split()[1])
    else:
        raw_data['runtime'][trace_name] = 'NaN'

    #Get useful cycles
    if os.path.exists(trace[:-4] + '.cycles.stats'):
        content = []
        with open(trace[:-4] + '.cycles.stats') as f:
            content = f.readlines()

        for line in content:
            if line.split():
                if line.split()[0] == 'Total':
                    raw_data['useful_cyc'][trace_name] = int(float(line.split()[1]))
    else:
        raw_data['useful_cyc'][trace_name] = 'NaN'

    #Get useful instructions
    if os.path.exists(trace[:-4] + '.instructions.stats'):
        content = []
        with open(trace[:-4] + '.instructions.stats') as f:
            content = f.readlines()

        for line in content:
            if line.split():
                if line.split()[0] == 'Total':
                    raw_data['useful_ins'][trace_name] = int(float(line.split()[1]))
    else:
        raw_data['useful_ins'][trace_name] ='NaN'

    #Get maximum useful duration for simulated trace
    if os.path.exists(trace_sim[:-4] + '.timings.stats'):
        content = []
        with open(trace_sim[:-4] + '.timings.stats') as f:
            content = f.readlines()

        for line in content:
            if line.split():
                if line.split()[0] == 'Maximum':
                    raw_data['useful_dim'][trace_name] = float(line.split()[1])
    else:
        raw_data['useful_dim'][trace_name] = 'NaN'

    #Get runtime for simulated trace
    if os.path.exists(trace_sim[:-4] + '.runtime.stats'):
        content = []
        with open(trace_sim[:-4] + '.runtime.stats') as f:
            content = f.readlines()

        for line in content:
            if line.split():
                if line.split()[0] == 'Average':
                    raw_data['runtime_dim'][trace_name] = float(line.split()[1])
    else:
        raw_data['runtime_dim'][trace_name] = 'NaN'

    #Remove paramedir output files
    save_remove(trace[:-4] + '.timings.stats', debug)
    save_remove(trace[:-4] + '.runtime.stats', debug)
    save_remove(trace[:-4] + '.cycles.stats', debug)
    save_remove(trace[:-4] + '.instructions.stats', debug)
    save_remove(trace_sim[:-4] + '.timings.stats', debug)
    save_remove(trace_sim[:-4] + '.runtime.stats', debug)
    time_prs = time.time() - time_prs

    time_tot = time.time() - time_tot
    print('Finished successfully in {0:.1f} seconds.'.format(time_tot))
    print('')

    return raw_data

In [None]:
@task(returns=dict, priority=True)
def merge_dicts(dict1, dict2):
    dict_all = {}
    for key in dict1.keys():
        x = dict1[key]
        y = dict2[key]
        z = x.copy()   
        z.update(y)
        dict_all[key] = z
    return dict_all

In [None]:
@task(trace=FILE_IN, returns=dict)
def compute_model_factors(raw_data, trace, trace_processes, first_trace, first_trace_processes, scaling, debug):
    """Computes the model factors from the gathered raw data and returns the
    according dictionary of model factors."""
    trace_name = os.path.basename(trace)
    mod_factors = create_mod_factors(trace_name)
    
    proc_ratio = float(trace_processes) / float(first_trace_processes)
    
    #Basic efficiency factors
    try: #except NaN
        mod_factors['load_balance'][trace_name] = raw_data['useful_avg'][trace_name] / raw_data['useful_max'][trace_name] * 100.0
    except:
        mod_factors['load_balance'][trace_name] = 'NaN'

    try: #except NaN
        mod_factors['comm_eff'][trace_name] = raw_data['useful_max'][trace_name] / raw_data['runtime'][trace_name] * 100.0
    except:
        mod_factors['comm_eff'][trace_name] = 'NaN'

    try: #except NaN
        mod_factors['serial_eff'][trace_name] = raw_data['useful_dim'][trace_name] / raw_data['runtime_dim'][trace_name] * 100.0
    except:
        mod_factors['serial_eff'][trace_name] = 'NaN'

    try: #except NaN
        mod_factors['transfer_eff'][trace_name] = mod_factors['comm_eff'][trace_name] / mod_factors['serial_eff'][trace_name] * 100.0
    except:
        mod_factors['transfer_eff'][trace_name] = 'NaN'

    try: #except NaN
        mod_factors['parallel_eff'][trace_name] = mod_factors['load_balance'][trace_name] * mod_factors['comm_eff'][trace_name] / 100.0
    except:
        mod_factors['parallel_eff'][trace_name] = 'NaN'

    try: #except NaN
        if scaling == 'strong':
            mod_factors['comp_scale'][trace_name] = raw_data['useful_tot'][first_trace] / raw_data['useful_tot'][trace_name] * 100.0
        else:
            mod_factors['comp_scale'][trace_name] = raw_data['useful_tot'][first_trace] / raw_data['useful_tot'][trace_name] * proc_ratio * 100.0
    except:
        mod_factors['comp_scale'][trace_name] = 'NaN'

    try: #except NaN
        mod_factors['global_eff'][trace_name] = mod_factors['parallel_eff'][trace_name] * mod_factors['comp_scale'][trace_name] / 100.0
    except:
        mod_factors['global_eff'][trace_name] = 'NaN'

    #Basic scalability factors
    try: #except NaN
        mod_factors['ipc'][trace_name] = float(raw_data['useful_ins'][trace_name]) / float(raw_data['useful_cyc'][trace_name])
    except:
        mod_factors['ipc'][trace_name] = 'NaN'
    try: #except NaN
        ipc_first_trace = float(raw_data['useful_ins'][first_trace]) / float(raw_data['useful_cyc'][first_trace])
        mod_factors['ipc_scale'][trace_name] = mod_factors['ipc'][trace_name] / ipc_first_trace * 100.0
        # Do not reuse mod_factors for the first_trace to avoid INOUT
        # mod_factors['ipc_scale'][trace_name] = mod_factors['ipc'][trace_name] / mod_factors['ipc'][first_trace] * 100.0
    except Exception as e:
        mod_factors['ipc_scale'][trace_name] = 'NaN'
    try: #except NaN
        mod_factors['freq'][trace_name] = float(raw_data['useful_cyc'][trace_name]) / float(raw_data['useful_tot'][trace_name]) / 1000
    except:
        mod_factors['freq'][trace_name] = 'NaN'
    try: #except NaN
        freq_first_trace = float(raw_data['useful_cyc'][first_trace]) / float(raw_data['useful_tot'][first_trace]) / 1000
        mod_factors['freq_scale'][trace_name] = mod_factors['freq'][trace_name] / freq_first_trace * 100.0
        # Do not reuse mod_factors for the first_trace to avoid INOUT
        # mod_factors['freq_scale'][trace_name] = mod_factors['freq'][trace_name] / mod_factors['freq'][first_trace] * 100.0
    except Exception as e:
        mod_factors['freq_scale'][trace_name] = 'NaN'
    try: #except NaN
        if scaling == 'strong':
            mod_factors['inst_scale'][trace_name] = float(raw_data['useful_ins'][first_trace]) / float(raw_data['useful_ins'][trace_name]) * 100.0
        else:
            mod_factors['inst_scale'][trace_name] = float(raw_data['useful_ins'][first_trace]) / float(raw_data['useful_ins'][trace_name]) * proc_ratio * 100.0
    except:
        mod_factors['inst_scale'][trace_name] = 'NaN'
    try: #except NaN
        if scaling == 'strong':
            mod_factors['speedup'][trace_name] = raw_data['runtime'][first_trace] / raw_data['runtime'][trace_name]
        else:
            mod_factors['speedup'][trace_name] = raw_data['runtime'][first_trace] / raw_data['runtime'][trace_name] * proc_ratio
    except:
        mod_factors['speedup'][trace_name] = 'NaN'

    return mod_factors

In [None]:
@task(returns=(str, str, str))
def fit_amdahl(x_proc, load_opt, comm_opt, comp_opt):
    load_fit = ' '.join(['load( x ) = ( x >',str(x_proc[0]),') ?',str(load_opt[0]),'/ (',str(load_opt[1]),'+ ( 1 -',str(load_opt[1]),') * x ) : 1/0'])
    comm_fit = ' '.join(['comm( x ) = ( x >',str(x_proc[0]),') ?',str(comm_opt[0]),'/ (',str(comm_opt[1]),'+ ( 1 -',str(comm_opt[1]),') * x ) : 1/0'])
    comp_fit = ' '.join(['comp( x ) = ( x >',str(x_proc[0]),') ?',str(comp_opt[0]),'/ (',str(comp_opt[1]),'+ ( 1 -',str(comp_opt[1]),') * x ) : 1/0'])
    return load_fit, comm_fit, comp_fit

In [None]:
@task(returns=(str, str, str))
def fit_pipe(x_proc, load_opt, comm_opt, comp_opt):
    load_fit = ' '.join(['load( x ) = ( x >', str(x_proc[0]),') ?', str(load_opt[0]),'* x / ( ( 1 -', str(load_opt[1]),') +', str(load_opt[1]),'* ( 2 * x - 1 ) ) : 1/0'])
    comm_fit = ' '.join(['comm( x ) = ( x >', str(x_proc[0]),') ?', str(comm_opt[0]),'* x / ( ( 1 -', str(comm_opt[1]),') +', str(comm_opt[1]),'* ( 2 * x - 1 ) ) : 1/0'])
    comp_fit = ' '.join(['comp( x ) = ( x >', str(x_proc[0]),') ?', str(comp_opt[0]),'* x / ( ( 1 -', str(comp_opt[1]),') +', str(comp_opt[1]),'* ( 2 * x - 1 ) ) : 1/0'])
    return load_fit, comm_fit, comp_fit

In [None]:
@task(returns=(str, str, str))
def fit_linear(x_proc, load_opt, comm_opt, comp_opt):
    load_fit = ' '.join(['load( x ) = ( x >', str(x_proc[0]), ') ?', str(load_opt[0]), '+ x *', str(load_opt[1]), ': 1/0'])
    comm_fit = ' '.join(['comm( x ) = ( x >', str(x_proc[0]), ') ?', str(comm_opt[0]), '+ x *', str(comm_opt[1]), ': 1/0'])
    comp_fit = ' '.join(['comp( x ) = ( x >', str(x_proc[0]), ') ?', str(comp_opt[0]), '+ x *', str(comp_opt[1]), ': 1/0'])
    return load_fit, comm_fit, comp_fit

In [None]:
@task(file_path=FILE_OUT)
def create_gnuplot(limit, para_fit, load_fit, comm_fit, comp_fit, glob_fit, points_data, cfgs_path, file_path):
    #Create Gnuplot file
    gp_template = os.path.join(cfgs_path, 'modelfactors.gp')
    content = []
    with open(gp_template) as f:
        content = f.readlines()
     
    #Replace xrange
    content = [line.replace('#REPLACE_BY_XRANGE', ''.join(['set xrange [1:',limit,']']) ) for line in content]

    #Replace projection functions
    content = [line.replace('#REPLACE_BY_PARA_FUNCTION', para_fit ) for line in content]
    content = [line.replace('#REPLACE_BY_LOAD_FUNCTION', load_fit ) for line in content]
    content = [line.replace('#REPLACE_BY_COMM_FUNCTION', comm_fit ) for line in content]
    content = [line.replace('#REPLACE_BY_COMP_FUNCTION', comp_fit ) for line in content]
    content = [line.replace('#REPLACE_BY_GLOB_FUNCTION', glob_fit ) for line in content]

    with open(file_path, 'w') as f:
        f.writelines(content)
        
    x_proc, y_para, y_load, y_comm, y_comp, y_glob, number_traces = points_data
        
    #Add data points to gnuplot file
    with open(file_path, 'a') as f:
        for index in range(0, number_traces):
            line = ' '.join([str(x_proc[index]), str(y_para[index]), '\n'])
            f.write(line)
        f.write('e\n')

        for index in range(0, number_traces):
            line = ' '.join([str(x_proc[index]), str(y_load[index]), '\n'])
            f.write(line)
        f.write('e\n')

        for index in range(0, number_traces):
            line = ' '.join([str(x_proc[index]), str(y_comm[index]), '\n'])
            f.write(line)
        f.write('e\n')

        for index in range(0, number_traces):
            line = ' '.join([str(x_proc[index]), str(y_comp[index]), '\n'])
            f.write(line)
        f.write('e\n')

        for index in range(0, number_traces):
            line = ' '.join([str(x_proc[index]), str(y_glob[index]), '\n'])
            f.write(line)
        f.write('e\n')

        f.write('\n')
        f.write('pause -1\n')

    print('Projection written to ' + file_path)

In [None]:
# Collapsed with gnuplot generation
# @task(file_path=FILE_INOUT)
# def write_projection(x_proc, y_para, y_load, y_comm, y_comp, y_glob, number_traces, file_path):
#     #Add data points to gnuplot file
#     with open(file_path, 'a') as f:
#         for index in range(0, number_traces):
#             line = ' '.join([str(x_proc[index]), str(y_para[index]), '\n'])
#             f.write(line)
#         f.write('e\n')

#         for index in range(0, number_traces):
#             line = ' '.join([str(x_proc[index]), str(y_load[index]), '\n'])
#             f.write(line)
#         f.write('e\n')

#         for index in range(0, number_traces):
#             line = ' '.join([str(x_proc[index]), str(y_comm[index]), '\n'])
#             f.write(line)
#         f.write('e\n')

#         for index in range(0, number_traces):
#             line = ' '.join([str(x_proc[index]), str(y_comp[index]), '\n'])
#             f.write(line)
#         f.write('e\n')

#         for index in range(0, number_traces):
#             line = ' '.join([str(x_proc[index]), str(y_glob[index]), '\n'])
#             f.write(line)
#         f.write('e\n')

#         f.write('\n')
#         f.write('pause -1\n')

#     print('Projection written to ' + file_path)

In [None]:
@task(file_path=FILE_OUT)
def create_matplotlib(limit, para_fit, load_fit, comm_fit, comp_fit, glob_fit, points_data, cfgs_path, file_path):
    x_proc, y_para, y_load, y_comm, y_comp, y_glob, number_traces = points_data

### Main

In the following cell, the necessary widgets for interactive executions are defined as well as the main function.

In [None]:
import ipywidgets as widgets
import os

style = {'description_width': 'initial'}

class wdgts(object):
    # list of traces to process. Accepts wild cards and automatically filters for valid traces
    w_trace_folder = widgets.Text(value=os.getcwd() + os.path.sep + 'traces/gromacs_jesus/',
                                  description='List of traces:',
                                  layout={'width':'60%'})
    # increase output verbosity to debug level
    w_debug = widgets.Checkbox(value=False,
                               description='Debug')
    # define whether the measurements are weak or strong scaling (default: auto)
    w_scaling = widgets.ToggleButtons(options=['auto', 'weak','strong'],
                                      description='Scaling',
                                      button_style='info', # 'success', 'info', 'warning', 'danger' or ''
                                      tooltips=['Automatic measurements scaling', 'weak measurements scaling', 'Strong measurements scaling'])
    # run only the projection for the given modelfactors.csv (default: false)
    w_project = widgets.Text(value='false',
                             placeholder='modelfactors.csv',
                             description='CSV projection file path:',
                             style=style,
                             layout={'width':'60%'})
    # limit number of cores for the projection (default: 10000)
    w_limit = widgets.IntText(value=10000,
                              description='Projection # cores:',
                              style=style,
                              layout={'width':'60%'})
    # select model for prediction (default: amdahl)
    w_model = widgets.ToggleButtons(options=['amdahl','pipe','linear'],
                                    description='Model',
                                    button_style='info', # 'success', 'info', 'warning', 'danger' or ''
                                    tooltips=['Amdahl model prediction', 'Pipe model prediction', 'Linear model prediction'])
    # set bounds for the prediction (default: yes)
    w_bounds = widgets.Checkbox(value=True,
                                description='Prediction bounds')
    # set error restrains for prediction (default: first). first: prioritize smallest run; equal: no priority; decrease: decreasing priority for larger runs
    w_sigma = widgets.ToggleButtons(options=['first','equal','decrease'],
                                    description='Sigma',
                                    button_style='info', # 'success', 'info', 'warning', 'danger' or ''
                                    tooltips=['Prioritize smallest run', 'No priority', 'Decreasing priority for larger runs'])
    # path of the configuration files
    w_cfgs = widgets.Text(value=os.getcwd() + os.path.sep + 'cfgs',
                          placeholder='cfgs',
                          description='Configuration files path:',
                          style=style,
                         layout={'width':'60%'})
    # path of output file
    w_out = widgets.Text(value='results.out',
                         placeholder='Output_file.out',
                         description='Output file:',
                         style=style,
                         layout={'width':'60%'})
    # path of csv output file
    w_csv = widgets.Text(value='results.csv',
                         placeholder='Output_file.csv',
                         description='CSV output file:',
                         style=style,
                         layout={'width':'60%'})


def model_factors(trace_folder, debug, scaling, project, limit, model, bounds, sigma, cfgs, out, csv):
    """Main control flow.
    Currently the script only accepts one parameter, which is a list of traces
    that are processed. This can be a regex with wild cards and only valid trace
    files are kept at the end.
    """
    trace_list = []
    for file in os.listdir(trace_folder):
        if file.endswith(".prv"):
            trace_list.append(os.path.join(trace_folder, file))
            
    if debug:
        print("Traces  :")
        for t in trace_list:
            print("\t- " + str(t))
        print("Debug   : " + str(debug))
        print("Scaling : " + str(scaling))
        print("Project : " + str(project))
        print("Limit   : " + str(limit))
        print("Model   : " + str(model))
        print("Bounds  : " + str(bounds))
        print("Sigma   : " + str(sigma))
        print("Cfgs    : " + str(cfgs))
        print("Out     : " + str(out))
        print("Csv     : " + str(csv))
        
        
    # Parse command line arguments
    # cmdl_args = parse_arguments() They have already been parsed by the widgets... so remove all cmdl_args
    out = os.path.abspath(out)
    cfgs = os.path.abspath(cfgs)
    # Check if paramedir and Dimemas are in the path
    check_installation(debug) ## TODO: CHECK INSTALLATION OF DIMEMAS
    # Check if projection-only mode is selected
    # If not: compute everything
    # Else: read the passed modelfactors.csv
    if project == 'false':
        # trace_list, trace_processes = get_traces_from_args(trace_list)
        traces = get_traces_from_args(trace_list)
        
        lraw_data = []
        timings       = os.path.join(cfgs, 'timings.cfg')
        runtime       = os.path.join(cfgs, 'runtime.cfg')
        cycles        = os.path.join(cfgs, 'cycles.cfg')
        inst  = os.path.join(cfgs, 'instructions.cfg')
        dimemas_cfgs = os.path.join(cfgs, 'dimemas_ideal.cfg')
        dimemas_collectives = os.path.join(cfgs, 'dimemas.collectives')

        for name, trace in traces.items():
            partial_raw_data = gather_raw_data(trace.get_path(), timings, runtime, cycles, inst, dimemas_cfgs, dimemas_collectives, trace.get_processes(), cfgs, debug)
            lraw_data.append(partial_raw_data)
        raw_data = reduce(merge_dicts, lraw_data)
        
        # Guess the weak or strong scaling
        scaling = get_scaling_type(raw_data, traces, scaling, debug)

        # Compute the model factors and print them
        lmod_factors = []
        first_trace = None
        first_processes = None
        for name, trace in traces.items():
            if first_processes is None and first_trace is None:
                first_trace = name
                first_processes = trace.get_processes()
            partial_mod_factors = compute_model_factors(raw_data, trace.get_path(), trace.get_processes(), first_trace, first_processes, scaling, debug)
            lmod_factors.append(partial_mod_factors) 
        mod_factors = reduce(merge_dicts, lmod_factors)
        
        mod_factors = compss_wait_on(mod_factors)
        raw_data = compss_wait_on(raw_data)
        print_raw_data_table_html(raw_data, traces)        # Remove _html for normal print
        print_mod_factors_table_html(mod_factors, traces)  # Remove _html for normal print
        print_mod_factors_csv(mod_factors, raw_data, traces, csv)
    else:
        # Read the model factors from the csv file
        mod_factors, trace_list, trace_processes = read_mod_factors_csv(debug, project)

    print("Compute_projection")
    compute_projection(mod_factors, traces, debug, model, limit, bounds, sigma, out, cfgs)
    
widgets.interact_manual(model_factors, trace_folder=wdgts.w_trace_folder, debug=wdgts.w_debug, scaling=wdgts.w_scaling, project=wdgts.w_project, limit=wdgts.w_limit, model=wdgts.w_model, bounds=wdgts.w_bounds, sigma=wdgts.w_sigma, cfgs=wdgts.w_cfgs, out=wdgts.w_out, csv=wdgts.w_csv)

In [None]:
ipycompss.stop()

In [None]:
titles_raw = ['Parameter', 24, 48, 96, 192]
raw = [['Runtime (us)', 16722763.02, 8754638.15, 4714094.74, 2285913.92],
       ['Runtime (ideal)', 16495194.04, 8542198.57, 4628710.67, 2199460.51],
       ['Useful duration (average)', 15171967.02, 7698566.41, 3951799.18, 1852709.09],
       ['Useful duration (maximum)', 16380967.8, 8429582.16, 4516863.01, 2132687.88],
       ['Useful duration (total)', 364127208.59, 369531187.88, 379372720.92, 355720145.85],
       ['Useful duration (ideal, max)', 16380967.8, 8429582.16, 4516863.01, 2132687.88],
       ['Useful instructions (total)', 1089905844835, 1128635398017, 1215612183046, 1167055295613],
       ['Useful cycles (total)', 990700240272, 1000470887596, 1041832237966, 976609105921]]

from IPython.display import HTML, display
import tabulate
display(HTML(tabulate.tabulate(raw, headers=titles_raw, tablefmt='html', floatfmt=".2f")))
