In [22]:
# Inspect data and plot figures
from collections import defaultdict

import numpy as np

import matplotlib.pyplot as plt
plt.style.use("seaborn-poster")
plt.rcParams["figure.figsize"] = (10, 6)

import pandas as pd

# Paths to data
base = "../data/base.csv"
opt = "../data/opt.csv"
seq = "../data/sequential.csv"

# Load data
base = pd.read_csv(base, header=None)
opt = pd.read_csv(opt, header=None)
seq = pd.read_csv(seq, header=None)

header = ["SIZE", "NODES", "CPUS_PER_NODE", "NBODIES", "RTIME", "GLOPS"]
base.set_axis(header, axis=1, inplace=True)
opt.set_axis(header, axis=1, inplace=True)
seq.set_axis(["NBODIES", "RTIME", "GFLOPS"], axis=1, inplace=True)

In [5]:
# Functions
def speedup(serial_rtime: float, parallel_rtime: float) -> float:
    return serial_rtime/parallel_rtime

def efficiency(speedup_val: float, n_procs: int) -> float:
    return speedup_val/n_procs

# Describe
## Initial Observations
* Min and Max of Optimized rtime is lower
* Mean of optimized program is actually lower
* How does scalability compare (i.e., as N increases, how do the strategies compare w.r.t rtime)?

## What Should Figures Show
* Speedup = serial rtime / parallel rtime # @ n-processes
* Efficiency = Speedup / n-processes
* Hard vs. Weak Scaling
* Serial Performance

In [6]:
base.describe()

Unnamed: 0,SIZE,NODES,CPUS_PER_NODE,NBODIES,RTIME,GLOPS
count,144.0,144.0,144.0,144.0,144.0,144.0
mean,35.0,4.402778,6.75,3908.0,35.115382,1.761667
std,38.929738,2.348395,6.67057,3788.029434,84.855694,1.96604
min,2.0,2.0,1.0,512.0,0.18,0.17
25%,7.5,2.0,1.0,896.0,0.6645,0.52
50%,20.0,4.0,4.0,2560.0,2.8645,0.955
75%,40.0,6.0,16.0,5572.0,24.7,2.26
max,128.0,8.0,16.0,10000.0,505.927,9.08


In [7]:
base[(base["NBODIES"] == 10000) & (base["SIZE"] == 2)]["RTIME"]

36     504.297
84     503.987
132    505.927
Name: RTIME, dtype: float64

In [8]:
opt.describe()

Unnamed: 0,SIZE,NODES,CPUS_PER_NODE,NBODIES,RTIME,GLOPS
count,144.0,144.0,144.0,144.0,144.0,144.0
mean,35.0,3.791667,3.75,3908.0,19.378958,2.183819
std,38.929738,2.058158,5.444571,3788.029434,52.185408,2.285562
min,2.0,2.0,1.0,512.0,0.103,0.02
25%,7.5,2.0,1.0,896.0,2.9605,0.205
50%,20.0,4.0,1.0,2560.0,12.0135,1.47
75%,40.0,4.0,4.0,5572.0,16.472,3.5725
max,128.0,8.0,16.0,10000.0,366.462,9.34


# Summary Statistics

In [19]:
NBODIES = base["NBODIES"].unique()
SIZE = sorted(base["SIZE"].unique())
print("NBODIES=", NBODIES)
print("SIZE=", SIZE)

# Store summary statistics
base_mean_std = {"SIZE": [], "NBODIES": [], "MEAN_RTIME": [], "STD_RTIME": []}
opt_mean_std = {"SIZE": [], "NBODIES": [], "MEAN_RTIME": [], "STD_RTIME": []}
seq_mean_std = {"NBODIES": [], "MEAN_RTIME": [], "STD_RTIME": []}

# Get summary statistics for parameters of interest
for nbody in NBODIES:
    seq_duplicated: pd.DataFrame = seq[(seq["NBODIES"] == nbody)]

    seq_mean = seq_duplicated["RTIME"].mean()
    seq_std = seq_duplicated["RTIME"].std()

    seq_mean_std["NBODIES"].append(nbody)
    seq_mean_std["MEAN_RTIME"].append(seq_mean)
    seq_mean_std["STD_RTIME"].append(seq_std)

    for size in SIZE:
        base_duplicated: pd.DataFrame = base[(base["NBODIES"] == nbody) & (base["SIZE"] == size)]
        opt_duplicated: pd.DataFrame = opt[(opt["NBODIES"] == nbody) & (opt["SIZE"] == size)]

        base_mean = base_duplicated["RTIME"].mean()
        base_std = base_duplicated["RTIME"].mean()

        opt_mean = opt_duplicated["RTIME"].mean()
        opt_std = opt_duplicated["RTIME"].std()

        base_mean_std["SIZE"].append(size)
        base_mean_std["NBODIES"].append(nbody)
        base_mean_std["MEAN_RTIME"].append(base_mean)
        base_mean_std["STD_RTIME"].append(base_std)

        opt_mean_std["SIZE"].append(size )
        opt_mean_std["NBODIES"].append(nbody)
        opt_mean_std["MEAN_RTIME"].append(opt_mean)
        opt_mean_std["STD_RTIME"].append(opt_std)

# Make dataframes
base_mean_std = pd.DataFrame(base_mean_std)
opt_mean_std = pd.DataFrame(opt_mean_std)
seq_mean_std = pd.DataFrame(seq_mean_std)

NBODIES= [  512  1024  4096 10000]
SIZE= [2, 4, 6, 8, 16, 24, 32, 64, 96, 128]


In [20]:
display(seq_mean_std)
display(base_mean_std)
display(opt_mean_std)

Unnamed: 0,NBODIES,MEAN_RTIME,STD_RTIME
0,512,1.553333,0.004933
1,1024,5.598333,0.006807
2,4096,86.540333,0.238383
3,10000,511.479333,0.552442


Unnamed: 0,SIZE,NBODIES,MEAN_RTIME,STD_RTIME
0,2,512,1.502,1.502
1,4,512,0.882667,0.882667
2,6,512,0.614667,0.614667
3,8,512,0.517333,0.517333
4,16,512,0.345667,0.345667
5,24,512,0.205333,0.205333
6,32,512,0.319,0.319
7,64,512,0.441667,0.441667
8,96,512,0.232667,0.232667
9,128,512,0.549,0.549


Unnamed: 0,SIZE,NBODIES,MEAN_RTIME,STD_RTIME
0,2,512,1.084667,0.024338
1,4,512,0.142667,0.001528
2,6,512,0.105,0.001732
3,8,512,1.4685,1.464688
4,16,512,2.846333,0.02829
5,24,512,2.839,0.042755
6,32,512,8.448333,5.982271
7,64,512,13.918,0.151793
8,96,512,13.850667,0.073935
9,128,512,14.376667,0.116475


# Speedups and Efficiencies

In [21]:
display(seq_mean_std)
display(base_mean_std.head())
display(opt_mean_std.head())

Unnamed: 0,NBODIES,MEAN_RTIME,STD_RTIME
0,512,1.553333,0.004933
1,1024,5.598333,0.006807
2,4096,86.540333,0.238383
3,10000,511.479333,0.552442


Unnamed: 0,SIZE,NBODIES,MEAN_RTIME,STD_RTIME
0,2,512,1.502,1.502
1,4,512,0.882667,0.882667
2,6,512,0.614667,0.614667
3,8,512,0.517333,0.517333
4,16,512,0.345667,0.345667


Unnamed: 0,SIZE,NBODIES,MEAN_RTIME,STD_RTIME
0,2,512,1.084667,0.024338
1,4,512,0.142667,0.001528
2,6,512,0.105,0.001732
3,8,512,1.4685,1.464688
4,16,512,2.846333,0.02829


In [35]:
# TODO: Should calculate these first and then take averagess
base_speedup_efficiency = defaultdict(list)
opt_speedup_efficiency = defaultdict(list)

for nbody in NBODIES:
    seq_mean_rtime = seq_mean_std[seq_mean_std["NBODIES"] == nbody]["MEAN_RTIME"].values[0]
    for size in SIZE:
        base_mean_rtime_at_params = base_mean_std[
            (base_mean_std["NBODIES"] == nbody) & (base_mean_std["SIZE"] == size)]["MEAN_RTIME"].values[0]
        opt_mean_rtime_at_params = opt_mean_std[
            (opt_mean_std["NBODIES"] == nbody) & (opt_mean_std["SIZE"] == size)]["MEAN_RTIME"].values[0]


        base_speedup = speedup(seq_mean_rtime, base_mean_rtime_at_params)
        base_efficiency = efficiency(base_speedup, size)
        opt_speedup = speedup(seq_mean_rtime, opt_mean_rtime_at_params)
        opt_efficiency = efficiency(opt_speedup, size)

        base_speedup_efficiency["SIZE"].append(size)
        base_speedup_efficiency["NBODIES"].append(nbody)
        base_speedup_efficiency["SPEEDUP"].append(base_speedup)
        base_speedup_efficiency["EFFICIENCY"].append(base_efficiency)

        opt_speedup_efficiency["SIZE"].append(size)
        opt_speedup_efficiency["NBODIES"].append(nbody)
        opt_speedup_efficiency["SPEEDUP"].append(opt_speedup)
        opt_speedup_efficiency["EFFICIENCy"].append(opt_efficiency)

# make dataframes
base_speedup_efficiency = pd.DataFrame(base_speedup_efficiency)
opt_speedup_efficiency = pd.DataFrame(opt_speedup_efficiency)

In [36]:
base_speedup_efficiency

Unnamed: 0,SIZE,NBODIES,SPEEDUP,EFFICIENCY
0,2,512,1.034177,0.517088
1,4,512,1.759819,0.439955
2,6,512,2.527115,0.421186
3,8,512,3.002577,0.375322
4,16,512,4.493732,0.280858
5,24,512,7.564935,0.315206
6,32,512,4.869383,0.152168
7,64,512,3.516981,0.054953
8,96,512,6.676218,0.069544
9,128,512,2.829387,0.022105


# Tables

# Figures