In [None]:
import pandas as pd
import numpy as np
from itertools import product
from pathlib import Path
import matplotlib.pyplot as plt
from operator import mul
from functools import reduce

In [None]:
GPU_IDS=["0,1","0,1,2,3","0,1,6,7","0,1,2,3,4,5,6,7"]
CPU_WORKERS=[1,4,16,32,64,96,128]
BUFFER_SIZES=["64k","256k","1m","4m"]
num_runs=10
fn_fmt="gpu_{g}-workers_{c}-buffer_{b}-run_{r}.txt"

## GDS DDN
# exp_name = "gdsddn"
# DIR=Path("gds_results")

## GDS NVME
# exp_name = "gds_nvme"
# DIR=Path("readLargeFileCuFileNVME/")

## No Cufile NVME 
# exp_name = "regular_nvme"
# DIR=Path("readLargeFileNVME/")

In [None]:
def gather_result():
    cnt = 0
    rst_shape = len(GPU_IDS), len(CPU_WORKERS), len(BUFFER_SIZES), num_runs
    rst_lst_x, rst_lst_y = [], []
    for g,c,b,r in product(GPU_IDS, CPU_WORKERS, BUFFER_SIZES, range(1,num_runs+1)):
        fn = DIR/fn_fmt.format(g=g,c=c,b=b,r=r)
        cnt += 1
        with open(fn,"r") as fp:
            for line in fp:
                if "Throughput" in line:
                    assert "MiB/s" in line ## make sure units are the same
                    *_, x, y = line.split()
                    rst_lst_x.append(x)
                    rst_lst_y.append(y)
    rst = np.asarray(rst_lst_x, dtype=np.float).reshape(rst_shape)
    assert reduce(mul, rst.shape) == cnt
    return rst

In [None]:
rst = gather_result()
x = list(map(float, CPU_WORKERS))
print(BUFFER_SIZES)
for b_cfg, b in enumerate(BUFFER_SIZES):
    fig, ax = plt.subplots(1)
    tmp = rst[:,:,b_cfg,:]
    for idx, g in enumerate(GPU_IDS):
        y = np.median(tmp[idx], axis=-1)
        # y = np.mean(tmp[idx], axis=-1)
        s = np.std(tmp[idx], axis=-1)
        ax.errorbar(x, y, s, label=f"gids: {g}")
    ax.legend()
    ax.set_ylabel("Throughput MiB/s")
    ax.set_xlabel("num. of CPU workers")
    ax.set_title(f"{exp_name}: GDS buff size {b}")
    fig.savefig(exp_name+b+".png", dpi=300)