In [2]:
# Inspect data and plot figures
import numpy as np

import matplotlib.pyplot as plt
plt.style.use("seaborn-poster")
plt.rcParams["figure.figsize"] = (10, 6)

import pandas as pd

# Paths to data
base = "../data/base.csv"
opt = "../data/opt.csv"

# Load data
base = pd.read_csv(base, header=None)
opt = pd.read_csv(opt, header=None)
header = ["SIZE", "NODES", "CPUS_PER_NODE", "NBODIES", "RTIME", "GLOPS"]
base.set_axis(header, axis=1, inplace=True)
opt.set_axis(header, axis=1, inplace=True)

In [3]:
# Functions
def speedup(serial_rtime: float, parallel_rtime: float) -> float:
    return serial_rtime/parallel_rtime

def efficiency(speedup_val: float, n_procs: int) -> float:
    return speedup_val/n_procs

# Describe
## Initial Observations
* Min and Max of Optimized rtime is lower
* Mean of optimized program is actually lower
* How does scalability compare (i.e., as N increases, how do the strategies compare w.r.t rtime)?

## What Should Figures Show
* Speedup = serial rtime / parallel rtime # @ n-processes
* Efficiency = Speedup / n-processes
* Hard vs. Weak Scaling
* Serial Performance

In [4]:
base.describe()

Unnamed: 0,SIZE,NODES,CPUS_PER_NODE,NBODIES,RTIME,GLOPS
count,144.0,144.0,144.0,144.0,144.0,144.0
mean,35.0,4.402778,6.75,3908.0,35.115382,1.761667
std,38.929738,2.348395,6.67057,3788.029434,84.855694,1.96604
min,2.0,2.0,1.0,512.0,0.18,0.17
25%,7.5,2.0,1.0,896.0,0.6645,0.52
50%,20.0,4.0,4.0,2560.0,2.8645,0.955
75%,40.0,6.0,16.0,5572.0,24.7,2.26
max,128.0,8.0,16.0,10000.0,505.927,9.08


In [13]:
base[(base["NBODIES"] == 10000) & (base["SIZE"] == 2)]["RTIME"]

36     504.297
84     503.987
132    505.927
Name: RTIME, dtype: float64

In [5]:
opt.describe()

Unnamed: 0,SIZE,NODES,CPUS_PER_NODE,NBODIES,RTIME,GLOPS
count,144.0,144.0,144.0,144.0,144.0,144.0
mean,35.0,3.791667,3.75,3908.0,19.378958,2.183819
std,38.929738,2.058158,5.444571,3788.029434,52.185408,2.285562
min,2.0,2.0,1.0,512.0,0.103,0.02
25%,7.5,2.0,1.0,896.0,2.9605,0.205
50%,20.0,4.0,1.0,2560.0,12.0135,1.47
75%,40.0,4.0,4.0,5572.0,16.472,3.5725
max,128.0,8.0,16.0,10000.0,366.462,9.34


# Summary Statistics

In [11]:
NBODIES = base["NBODIES"].unique()
SIZE = sorted(base["SIZE"].unique())
print(NBODIES)
print(SIZE)

base_mean_std = dict({"SIZE": [], "NBODIES": [], "MEAN_RTIME": [], "STD_RTIME": []})
opt_mean_std = dict({"SIZE": [], "NBODIES": [], "MEAN_RTIME": [], "STD_RTIME": []})

for nbody in NBODIES:
    for size in SIZE:
        base_duplicated: pd.DataFrame = base[(base["NBODIES"] == nbody) & (base["SIZE"] == size)]
        opt_duplicated: pd.DataFrame = opt[(opt["NBODIES"] == nbody) & (opt["SIZE"] == size)]

        base_mean = base_duplicated["RTIME"].mean()
        base_std = base_duplicated["RTIME"].mean()
        opt_mean = opt["RTIME"].mean()
        opt_std = opt["RTIME"].std()

        base_mean_std["SIZE"].append(size)
        base_mean_std["NBODIES"].append(nbody)
        base_mean_std["MEAN_RTIME"].append(base_mean)
        base_mean_std["STD_RTIME"].append(base_std)

        opt_mean_std["SIZE"].append(size )
        opt_mean_std["NBODIES"].append(nbody)
        opt_mean_std["MEAN_RTIME"].append(opt_mean)
        opt_mean_std["STD_RTIME"].append(opt_std)

[  512  1024  4096 10000]
[2, 4, 6, 8, 16, 24, 32, 64, 96, 128]
