In [1]:
# In this notebook two type of experiments are considered
# - pure    -> experiments with one simultaneously running MPI task 
# - sharing -> experiments with two simultaneously running MPI tasks

# This experiments may be
# local  -> VMs connect between each other with local linux bridge 
# or
# remote -> VMs connect between each other with remote linux bridge


In [24]:
# Pure experiments

import os
import json
import pandas as pd
from tabulate import tabulate
from collections import OrderedDict


def get_comp_time_pure(results_dir, tasks, sizes, procs, experiments_num):
    """This function parse results file and return computational statistics

    Args:
        results_dir (str): directory with result files.
            Each file has name with following pattern:
            <exec file name>_<task size>_<number of MPI procs>_e_<experiment seq num>.out
            
            Ex:
                lu.W.x_W_8_e_1.out

    Returns:
        dict: key   - task name 
              value - dict with
                  key   - task size
                  value - dict with
                      key   - number of proc
                      value - average computational time

    """
    
    if not os.path.isdir(results_dir):
        raise "Cannot find directory ''%s'" % results_dir
        
    results = {}
    
    for task in tasks:
        if task not in results:
            results[task] = OrderedDict()

        for size in sizes:
            if size not in results[task]:
                results[task][size] = OrderedDict()
            
            for proc in procs:
                if proc not in results[task][size]:
                    results[task][size][proc] = list()

                filename = "%s.%s.x_%s_%s" % (task, size, size, proc)
                for n in range(0, experiments_num):
                    full_path = os.path.join(results_dir, "%s_e_%d.out" % (filename, n))
                    if not os.path.isfile(full_path):
                        results[task][size][proc].append(float("nan"))
                    else:
                        with open(full_path, "r") as f:
                            line = f.readline()
                            while line != "":
                                if "Time in seconds" in line:
                                    results[task][size][proc].append(float(line.split('=')[1]))
                                    break
                                line = f.readline()

                if len(results[task][size][proc]) != 0:
                    results[task][size][proc] = sum(results[task][size][proc]) / float(len(results[task][size][proc]))
                else:
                    results[task][size][proc] = float("nan")

    return results

def print_comp_time_pure(comp_time, tasks, sizes, procs):
    for task in tasks:
        df = pd.DataFrame(comp_time[task])
        print("Results for task '%s'" % (task))
        print(df)
        print('\n')

def get_resources_usage():
    # TODO
    pass


# ============================================================

# Read and print computational time
tasks = ["is", "ep", "cg", "lu", "ft"]
sizes = ["S", "W", "A", "B", "C", "D"]
procs = [1, 2, 4, 8, 16]
exp_num = 5

results = get_comp_time_pure("../results/pure_NPB_local/cluster",
                                tasks, sizes, procs, exp_num)

# print(json.dumps(results, sort_keys = True, indent = 4))
print_comp_time_pure(results, tasks, sizes, procs)


# Read and print information about resources usage
res_usage = get_resources_usage()


Results for task 'is'
        S      W      A      B      C   D
1   0.006  0.080  0.546  2.214    NaN NaN
2   0.006  0.060  0.394  1.544  6.020 NaN
4   0.010  0.040  0.230  0.898  3.476 NaN
8   0.018  0.048  0.216  0.674  2.414 NaN
16  0.424  0.948  1.350  1.908  5.156 NaN


Results for task 'ep'
        S      W       A       B        C         D
1   0.896  1.688  12.462  49.386  196.928  3150.274
2   0.530  0.908   6.466  25.718  101.532  1623.076
4   0.282  0.540   3.576  13.848   55.030   882.300
8   0.164  0.306   2.106   8.038   31.574   498.338
16  0.098  0.196   1.374   5.404   21.134   333.168


Results for task 'cg'
        S      W      A        B        C   D
1   0.052  0.296  0.992  100.154      NaN NaN
2   0.090  0.242  0.656   44.996  141.412 NaN
4   0.106  0.200  0.444   26.766   81.296 NaN
8   0.250  0.340  0.476   14.820   47.416 NaN
16  1.574  1.572  2.338   28.452   59.484 NaN


Results for task 'lu'
        S       W       A        B        C         D
1   0.040   

In [31]:
# Sharing experiments
# We have checked yet sharing effect only on tasks with size A
# slowdown coefficient = time with sharing / pure time

# Names of result files
# pure
#     ep.B.x_B_16_e_0.out

# sharing 
#     ep.A.x_ep.A.x_8_4_e_0.out
#     ep.A.x_ep.A.x_8_16.out

def get_comp_time_sharing(dir1, dir2, tasks, procs, experiments_num):
    """This function parse results file and return computational statistics

    Args:
        dir1 and dir2 (str): directory with result files for cluster_1 and cluster_2
            Each file has name with following pattern:
            <exec file name 1>_<exec file name 2>_<number of MPI procs 1>_<number of MPI procs 2>_e_<experiment seq num>.out
            
            Ex:
                lu.A.x_lu.A.x_8_8_e_3.out

    Returns:
        dict: key   - tuple with two task names
              value - dict with
                  key   - tuple with np1, np2, where np is number of processes
                  value - tuple with two corresponding computational times

    """
    
    if not os.path.isdir(dir1):
        raise "Cannot find directory ''%s'" % dir1
        
    if not os.path.isdir(dir2):
        raise "Cannot find directory ''%s'" % dir2
        
    results = {}
    
    for i in range(0, len(tasks)):
        for j in range(i, len(tasks)):
            key_1 = str(tasks[i]) + ' ' + str(tasks[j])
            if key_1 not in results:
                results[key_1] = OrderedDict()

            for np1 in procs:
                for np2 in procs:
#                     print("RUN (%s, %s) (%s, %s)" % (tasks[i], tasks[j], np1, np2))
                    
                    key_2 = str(np1) + ' ' + str(np2)
                    if key_2 not in results[key_1]:
                        results[key_1][key_2] = list()
                    
                    filename_1 = "%s.A.x_%s.A.x_%s_%s" % (tasks[i], tasks[j], np1, np2)
                    filename_2 = "%s.A.x_%s.A.x_%s_%s" % (tasks[j], tasks[i], np2, np1)
            
                    for n in range(0, experiments_num):
                        full_path_1 = os.path.join(dir1, "%s_e_%d.out" % (filename_1, n))
                        full_path_2 = os.path.join(dir2, "%s_e_%d.out" % (filename_2, n))
                        
                        t1 = float("nan")
                        t2 = float("nan")
                        
                        if os.path.isfile(full_path_1):
                            with open(full_path_1, "r") as f:
                                line = f.readline()
                                while line != "":
                                    if "Time in seconds" in line:
                                        t1 = float(line.split('=')[1])
                                        break
                                    line = f.readline()
                        
                        if os.path.isfile(full_path_2):
                            with open(full_path_2, "r") as f:
                                line = f.readline()
                                while line != "":
                                    if "Time in seconds" in line:
                                        t2 = float(line.split('=')[1])
                                        break
                                    line = f.readline()
                        
                    results[key_1][key_2].append("%f %f" % (t1, t2))
    return results


def print_comp_time_sharing():
    pass

# ============================================================

# Read and print computational time
tasks = ["is", "ep", "cg", "lu", "ft"]
procs = [4, 8, 16]
exp_num = 5

results = get_comp_time_sharing(
            "../results/sharing_NPB_size_A_local/cluster_1",
            "../results/sharing_NPB_size_A_local/cluster_2",
            tasks, procs, exp_num)

print(json.dumps(results, sort_keys = True, indent = 4))
# print_comp_time_sharing(results, tasks, sizes, procs)


{
    "cg cg": {
        "16 16": [
            "24.170000 24.610000"
        ],
        "16 4": [
            "2.170000 1.740000"
        ],
        "16 8": [
            "12.920000 13.840000"
        ],
        "4 16": [
            "1.050000 2.180000"
        ],
        "4 4": [
            "0.710000 0.590000"
        ],
        "4 8": [
            "0.770000 0.820000"
        ],
        "8 16": [
            "11.180000 10.680000"
        ],
        "8 4": [
            "0.760000 0.710000"
        ],
        "8 8": [
            "1.050000 1.020000"
        ]
    },
    "cg ft": {
        "16 16": [
            "14.060000 11.810000"
        ],
        "16 4": [
            "5.910000 4.750000"
        ],
        "16 8": [
            "9.600000 8.610000"
        ],
        "4 16": [
            "1.300000 1.450000"
        ],
        "4 4": [
            "0.580000 1.580000"
        ],
        "4 8": [
            "0.810000 1.390000"
        ],
        "8 16": [
            "9.500000 7.7