In [None]:
from itertools import product
from pathlib import Path
from string import ascii_lowercase
from operator import mul
from functools import reduce
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
GPU_IDS=["0,1","0,1,2,3","0,1,6,7","0,1,2,3,4,5,6,7"]
CPU_WORKERS=[1,4,16,32,64,96,128]
BUFFER_SIZES=["64k","256k","1m","4m"]
num_runs=10
fn_fmt="gpu_{g}-workers_{c}-buffer_{b}-run_{r}.txt"
exp_settings = [("gdsddn", "readLargeFileDDN"), 
                ("gds_nvme", "readLargeFileCuFileNVME"),
                ("regular_nvme", "readLargeFileNVME")]
exp_names = [x[0] for x in exp_settings]
rst_dirs = [Path(x[1]) for x in exp_settings]

In [None]:
def gather_result(rst_dir: Path):
    cnt = 0
    rst_shape = len(GPU_IDS), len(CPU_WORKERS), len(BUFFER_SIZES), num_runs
    rst_lst_x, rst_lst_y = [], []
    for g,c,b,r in product(GPU_IDS, CPU_WORKERS, BUFFER_SIZES, range(1,num_runs+1)):
        fn = rst_dir/fn_fmt.format(g=g,c=c,b=b,r=r)
        cnt += 1
        with open(fn,"r") as fp:
            for line in fp:
                if "Throughput" in line:
                    assert "MiB/s" in line ## make sure units are always MiB/s
                    *_, x, y = line.split() ## x first worker, y last worker
                    rst_lst_x.append(x)
                    rst_lst_y.append(y)
    rst_first = np.asarray(rst_lst_x, dtype=np.float).reshape(rst_shape)
    rst_last = np.asarray(rst_lst_y, dtype=np.float).reshape(rst_shape)
    assert reduce(mul, rst_first.shape) == cnt
    return rst_first, rst_last

In [None]:
rst_first, rst_last = [], []
for rst_dir in rst_dirs:
    rst_x, rst_y = gather_result(rst_dir)
    rst_first.append(rst_x)
    rst_last.append(rst_y)
rst_first, rst_last = map(np.asarray, (rst_first, rst_last))
print(rst_first.shape)
print("(", ",".join(["exp names", "GPU settings", "CPU workers", "buffer sizes", "run ids"]),")")

In [None]:
def grid_plot(rst: np.ndarray, avg_fn: callable):
    assert avg_fn == np.median or avg_fn == np.mean
    fig, axes = plt.subplots(nrows=len(BUFFER_SIZES),
                             ncols=len(GPU_IDS), 
                             figsize=(14,14), sharey=True)
    x = list(map(float, CPU_WORKERS))
    
    for b_idx, b in enumerate(BUFFER_SIZES):
        for g_idx, g in enumerate(GPU_IDS):
            ax = axes[b_idx][g_idx]
            idx = b_idx*len(BUFFER_SIZES)+g_idx
            data = rst[:, g_idx, :, b_idx, :]
            for e_idx, e in enumerate(exp_names):
                # y = np.median(data[e_idx], axis=-1)
                # y = np.mean(data[e_idx], axis=-1)
                y = avg_fn(data[e_idx], axis=-1)
                s = np.std(data[e_idx], axis=-1)
                ax.errorbar(x, y, s, label=f"{e}")
                #ax.set_xscale("log",base=2)
                ax.grid()
                ax.set_xticks(x)
                ax.text(0.1, 0.9, f"({ascii_lowercase[idx]})", 
                        horizontalalignment='center',  
                        verticalalignment='center', 
                        transform=ax.transAxes, 
                        fontdict={"family":"arial", "size":14})
            if b_idx == 0:
                ax.set_title(f"GPU IDs {g}")
            if g_idx == 0:
                ax.set_ylabel(f"Buffer Size {b} \nthroughput MiB/s")
            if b_idx != len(GPU_IDS)-1:
                ax.set_xticklabels([])
            else:
                ax.set_xticklabels(map(int,x))
            if b_idx == 0 and g_idx == len(GPU_IDS)-1:
                ax.legend()
    return fig

In [None]:
fig = grid_plot(rst_last, np.median)
fig.savefig("grid_plot_last.png", dpi=300)

In [None]:
fig = grid_plot(rst_first, np.median)
fig.savefig("grid_plot_first", dpi=300)

In [None]:
fig = grid_plot(np.abs(rst_first-rst_last), np.median)
fig.savefig("grid_plot_diff", dpi=300)

In [None]:
## OUTDATED: Single Plots
## rst = gather_result()
## x = list(map(float, CPU_WORKERS))
## print(BUFFER_SIZES)
## for b_cfg, b in enumerate(BUFFER_SIZES):
##     fig, ax = plt.subplots(1)
##     tmp = rst[:,:,b_cfg,:]
##     for idx, g in enumerate(GPU_IDS):
##         y = np.median(tmp[idx], axis=-1)
##         # y = np.mean(tmp[idx], axis=-1)
##         s = np.std(tmp[idx], axis=-1)
##         ax.errorbar(x, y, s, label=f"gids: {g}")
##     ax.legend()
##     ax.set_ylabel("Throughput MiB/s")
##     ax.set_xlabel("num. of CPU workers")
##     ax.set_title(f"{exp_name}: GDS buff size {b}")
##     fig.savefig(exp_name+b+".png", dpi=300)