This will plot and compare the resulting data. To run this, make sure that you have already generated all data you want to be compared. If you want to run a sequential sort or the multiprocessing merge sort, run with run.py. If you want to run the MPI merge sort, run with mpi_run.py. 

In [1]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

from cloudmesh.common.util import banner
from analysis import get_data
from analysis import read_log, read_logs
from analysis import generate_average

In [2]:
user = "alex"
node = "2400MHz"
sorts = ["seq-merge", "mp-mergesort"]
sort = "mp-mergesort"

sizes = [100, 1000, 10e4, 10e5]
sizes = [int(size) for size in sizes]

In [None]:
# this will take a long time. 
# also if you don't want to use all your processors then use different commands. 
for sort in sorts:
    for size in sizes:
        run_cmd = f"./run.py --user={user} --node={node} --size={size} --sort={sort}"
        banner(run_cmd)
        os.system(run_cmd)

In [3]:
def plot_benchmark_by_size(df, size=None, tag=None, files=None, x="sizes", y="time"):
    names = "-".join(files)
    sns.lineplot(data=df, x=x, y=y, hue="name");
    t = tag.title()
    label = f"{t}".replace("_", " ")
    plt.title(f"{label}")
    if "time" in y: y = f"{y}/s"
    if "time" in x: x = f"{x}/s"
    plt.xlabel(x.capitalize())
    plt.ylabel(y.capitalize())
    plt.savefig(f"images/{tag}-{names}.png")
    plt.savefig(f"images/{tag}-{names}.pdf")

In [4]:
files = ["alex"]
sorts = ["seq-merge", "mp-mergesort"]

frames = []
for file in files:
    for sort in sorts:
        frame = []
        for size in sizes: 
            size = int(size)
            log = f'{sort}-{node}-{file}'
            _frame = read_log(log, size=size, tag=sort)
            frame = frame + _frame
        frames.append(frame)
# print(frames)


df = pd.DataFrame()
for frame in frames:
    _df = pd.DataFrame(data=frame,
                columns=["processes", "time", "size", "name", "tag"])
    df = pd.concat([df, _df], ignore_index=True)
# plot_benchmark_by_size(df, "name", tag=sort, files=files, x="size", y="time")

In [5]:
df

Unnamed: 0,processes,time,size,name,tag
0,1,0.000,100,alex,seq-merge
1,1,0.000,100,alex,seq-merge
2,1,0.000,100,alex,seq-merge
3,1,0.000,100,alex,seq-merge
4,1,0.000,100,alex,seq-merge
...,...,...,...,...,...
515,1,5.948,1000000,alex,mp-mergesort
516,1,5.995,1000000,alex,mp-mergesort
517,1,6.042,1000000,alex,mp-mergesort
518,1,6.014,1000000,alex,mp-mergesort


In [6]:
df = df.pivot_table(
    values='time', index=['tag', 'processes'], columns=['name', 'size'], fill_value=0, aggfunc='mean')

In [7]:
df

Unnamed: 0_level_0,name,alex,alex,alex,alex
Unnamed: 0_level_1,size,100,1000,100000,1000000
tag,processes,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
mp-mergesort,1,0.4556,0.7743,1.01,6.1965
mp-mergesort,2,0.5187,0.7883,0.7596,4.1716
mp-mergesort,3,0.4768,0.4964,0.761,3.405
mp-mergesort,4,0.5205,0.4998,0.7526,3.2054
mp-mergesort,5,0.5799,0.5338,0.7915,3.1233
mp-mergesort,6,0.6483,0.687,0.9562,2.7502
mp-mergesort,7,0.6134,0.729,1.273,3.0334
mp-mergesort,8,0.7626,0.668,1.3802,2.765
mp-mergesort,9,0.6876,0.7685,1.0931,3.2437
mp-mergesort,10,0.8441,0.9244,1.1056,3.1543


In [8]:
print(df.loc['mp-mergesort'])
speedup = df.rdiv(df.loc['seq-merge'].iloc[0])
speedup

name         alex                        
size      100     1000    100000  1000000
processes                                
1          0.4556  0.7743  1.0100  6.1965
2          0.5187  0.7883  0.7596  4.1716
3          0.4768  0.4964  0.7610  3.4050
4          0.5205  0.4998  0.7526  3.2054
5          0.5799  0.5338  0.7915  3.1233
6          0.6483  0.6870  0.9562  2.7502
7          0.6134  0.7290  1.2730  3.0334
8          0.7626  0.6680  1.3802  2.7650
9          0.6876  0.7685  1.0931  3.2437
10         0.8441  0.9244  1.1056  3.1543
11         0.7965  0.8987  1.2086  3.2474
12         0.8514  0.9148  1.3277  3.2315


Unnamed: 0_level_0,name,alex,alex,alex,alex
Unnamed: 0_level_1,size,100,1000,100000,1000000
tag,processes,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
mp-mergesort,1,0.0,0.003229,0.520198,1.00744
mp-mergesort,2,0.0,0.003171,0.69168,1.496452
mp-mergesort,3,0.0,0.005036,0.690407,1.833363
mp-mergesort,4,0.0,0.005002,0.698113,1.947526
mp-mergesort,5,0.0,0.004683,0.663803,1.998719
mp-mergesort,6,0.0,0.003639,0.549467,2.269871
mp-mergesort,7,0.0,0.003429,0.412726,2.057955
mp-mergesort,8,0.0,0.003743,0.380669,2.257722
mp-mergesort,9,0.0,0.003253,0.480651,1.924531
mp-mergesort,10,0.0,0.002704,0.475217,1.979076
