This will plot and compare the resulting data. To run this, make sure that you have already generated all data you want to be compared. If you want to run a sequential sort or the multiprocessing merge sort, run with run.py. If you want to run the MPI merge sort, run with mpi_run.py. 

In [1]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

from cloudmesh.common.util import banner
from analysis import get_data
from analysis import read_log, read_logs
from analysis import generate_average

In [11]:
user = "alex"
node = "v100"
sorts = ["seq-merge", "mp-mergesort"]
sort = "mp-mergesort"

sizes = [100, 1000, 1e4, 1e5, 1e6, 1e7, 1e8]
sizes = [int(size) for size in sizes]

In [None]:
# this will take a long time. 
# also if you don't want to use all your processors then use different commands. 
for sort in sorts:
    for size in sizes:
        run_cmd = f"./run.py --user={user} --node={node} --size={size} --sort={sort}"
        banner(run_cmd)
        os.system(run_cmd)

In [3]:
def plot_benchmark_by_size(df, size=None, tag=None, files=None, x="sizes", y="time"):
    names = "-".join(files)
    sns.lineplot(data=df, x=x, y=y, hue="name");
    t = tag.title()
    label = f"{t}".replace("_", " ")
    plt.title(f"{label}")
    if "time" in y: y = f"{y}/s"
    if "time" in x: x = f"{x}/s"
    plt.xlabel(x.capitalize())
    plt.ylabel(y.capitalize())
    plt.savefig(f"images/{tag}-{names}.png")
    plt.savefig(f"images/{tag}-{names}.pdf")

In [12]:
files = ["alex"]
sorts = ["seq-merge", "mp-mergesort"]

frames = []
for file in files:
    for sort in sorts:
        frame = []
        for size in sizes: 
            size = int(size)
            log = f'{sort}-{node}-{file}'
            _frame = read_log(log, size=size, tag=sort)
            frame = frame + _frame
        frames.append(frame)
# print(frames)


df = pd.DataFrame()
for frame in frames:
    _df = pd.DataFrame(data=frame,
                columns=["processes", "time", "size", "name", "tag"])
    df = pd.concat([df, _df], ignore_index=True)
# plot_benchmark_by_size(df, "name", tag=sort, files=files, x="size", y="time")

In [13]:
df

Unnamed: 0,processes,time,size,name,tag
0,1,0.000,100,alex,seq-merge
1,1,0.000,100,alex,seq-merge
2,1,0.000,100,alex,seq-merge
3,1,0.000,100,alex,seq-merge
4,1,0.000,100,alex,seq-merge
...,...,...,...,...,...
2465,1,88.554,10000000,alex,mp-mergesort
2466,1,86.471,10000000,alex,mp-mergesort
2467,1,88.362,10000000,alex,mp-mergesort
2468,1,88.557,10000000,alex,mp-mergesort


In [14]:
df = df.pivot_table(
    values='time', index=['tag', 'processes'], columns=['name', 'size'], fill_value=0, aggfunc='mean')

In [15]:
df

Unnamed: 0_level_0,name,alex,alex,alex,alex,alex,alex,alex
Unnamed: 0_level_1,size,100,1000,10000,100000,1000000,10000000,100000000
tag,processes,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
mp-mergesort,1,0.0309,0.0309,0.0443,0.4661,5.3893,88.0766,0.0
mp-mergesort,2,0.0336,0.038,0.0703,0.489,5.6157,90.2118,0.0
mp-mergesort,3,0.038,0.0425,0.078,0.5209,5.8003,91.4093,0.0
mp-mergesort,4,0.0434,0.0465,0.0926,0.5345,5.8381,83.3704,0.0
mp-mergesort,5,0.0486,0.0517,0.0988,0.5598,6.012,84.7213,0.0
mp-mergesort,6,0.0565,0.0591,0.106,0.5535,6.136,84.3268,0.0
mp-mergesort,7,0.0612,0.0666,0.1133,0.5694,6.2229,84.4062,0.0
mp-mergesort,8,0.068,0.0733,0.12,0.5705,6.2078,84.9146,0.0
mp-mergesort,9,0.0745,0.0789,0.1289,0.6119,6.4212,82.5112,0.0
mp-mergesort,10,0.0801,0.084,0.13,0.614,6.271,81.6843,0.0


In [10]:
print(df.loc['mp-mergesort'])
speedup = df.rdiv(df.loc['seq-merge'].iloc[0])
speedup

name           alex                                                    \
size      100       1000      10000     100000    1000000   10000000    
processes                                                               
1            0.0309    0.0309    0.0443    0.4661    5.3893   88.0766   
2            0.0336    0.0380    0.0703    0.4890    5.6157   90.2118   
3            0.0380    0.0425    0.0780    0.5209    5.8003   91.4093   
4            0.0434    0.0465    0.0926    0.5345    5.8381   83.3704   
5            0.0486    0.0517    0.0988    0.5598    6.0120   84.7213   
6            0.0565    0.0591    0.1060    0.5535    6.1360   84.3268   
7            0.0612    0.0666    0.1133    0.5694    6.2229   84.4062   
8            0.0680    0.0733    0.1200    0.5705    6.2078   84.9146   
9            0.0745    0.0789    0.1289    0.6119    6.4212   82.5112   
10           0.0801    0.0840    0.1300    0.6140    6.2710   81.6843   
11           0.0868    0.0898    0.1362    0.6179  

Unnamed: 0_level_0,name,alex,alex,alex,alex,alex,alex,alex
Unnamed: 0_level_1,size,100,1000,10000,100000,1000000,10000000,100000000
tag,processes,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
mp-mergesort,1,0.0,0.084142,0.749436,0.917829,0.996178,0.790153,inf
mp-mergesort,2,0.0,0.068421,0.472262,0.874847,0.956016,0.771451,inf
mp-mergesort,3,0.0,0.061176,0.425641,0.821271,0.92559,0.761345,inf
mp-mergesort,4,0.0,0.055914,0.358531,0.800374,0.919597,0.834757,inf
mp-mergesort,5,0.0,0.05029,0.336032,0.764202,0.892997,0.821446,inf
mp-mergesort,6,0.0,0.043993,0.313208,0.7729,0.874951,0.825289,inf
mp-mergesort,7,0.0,0.039039,0.293027,0.751317,0.862733,0.824513,inf
mp-mergesort,8,0.0,0.035471,0.276667,0.749869,0.864831,0.819576,inf
mp-mergesort,9,0.0,0.032953,0.257564,0.699134,0.83609,0.843449,inf
mp-mergesort,10,0.0,0.030952,0.255385,0.696743,0.856115,0.851987,inf
