In [1]:
import pandas as pd
from imdb import IMDBDataset
import seaborn as sns
import numpy as np
from scipy.stats import f_oneway
import matplotlib.pyplot as plt

# Points of interest

- energy / synapse (fJ) (energy / param)
- energy / input vector (μJ)
- Power (W)
- Throughput (vectors / s)
- Power / synapse (nW) (power / param)
- Synapses (params)
- Dataset size
- Training time (s)
- Epochs

In [39]:
imdb_dataset = IMDBDataset()
imdb_size = len(imdb_dataset)
imdb_size

25000

In [2]:
mnist_size = 60000
imdb_size = 25000
batch_size = 64

In [4]:
def load_benchmark_df(filename, batch_size = 64):
    df = pd.read_csv(filename)
    num_params = df.total_params.iloc[0]
    df = df.sort_values(by=["run", "epoch", "elapsed_time"])
    df["iteration_time"] = (
        df.groupby(["run", "epoch"])["elapsed_time"].diff().fillna(0)
    )
    iteration_energy = np.clip(df["power_consumption"] * df["iteration_time"], 0, None)
    df["energy_J"] = iteration_energy
    energy_per_vector = iteration_energy / batch_size
    df["energy_per_vector_uJ"] = energy_per_vector * 10e6
    df["energy_per_param_fJ"] = energy_per_vector / num_params * 10e15
    df["power_per_param_nW"] = df["power_consumption"] / (batch_size * num_params) * 10e9

    return df


def get_summary_df(filename, dataset_size, batch_size=64, summarize=True):
    df = load_benchmark_df(filename, batch_size)
    results = []
    run_groups = df.groupby(["run"])
    for (run,), run_df in run_groups:
        epoch_group = run_df.groupby(["epoch"])
        train_duration = epoch_group["elapsed_time"].max().sum()
        epochs = len(epoch_group)
        
        mean_power = run_df["power_consumption"].mean()
        mean_energy_per_param = run_df["energy_per_param_fJ"].mean()
        mean_energy_per_vector = run_df["energy_per_vector_uJ"].mean()
        power_per_param = run_df["power_per_param_nW"].mean()
        run_energy = run_df["energy_J"].sum()
        params = run_df.total_params.iloc[0]
        iterations = len(run_df)

        energy_per_param_std = run_df["energy_per_param_fJ"].std()
        
        results.append({
            "run": run,
            "energy_per_param_fJ": mean_energy_per_param,
            "energy_per_vector_uJ": mean_energy_per_vector,
            "power_W": mean_power,
            "energy_per_param_std": energy_per_param_std,
            
            "train_duration_s": train_duration,
            "epochs": epochs,
            "params": params,
            "power_per_param_nW": power_per_param,
            "energy_J": run_energy,
            "iterations": iterations,
        })

    energy_per_param = [g.values for name, g in run_groups["energy_per_param_fJ"]]
    f_statistic, p_value = f_oneway(*energy_per_param)
        
    runs_df = pd.DataFrame(results)

    runs_df["dataset_size"] = dataset_size
    runs_df["throughput"] = dataset_size * runs_df.epochs.iloc[0] / runs_df["train_duration_s"]
    stddev = runs_df["energy_per_param_fJ"]

    if not summarize:
        return runs_df

    summary_df = pd.DataFrame([runs_df.drop(columns=["run"]).mean()])
    summary_df["runs_std"] = stddev
    summary_df["f_stat"] = f_statistic
    summary_df["p_val"] = p_value

    reordered_cols = ["energy_per_param_fJ", "energy_per_vector_uJ", "power_W", "throughput", "power_per_param_nW", "params", "dataset_size", "train_duration_s", "epochs", "energy_per_param_std", "iterations", "energy_J", "runs_std", "f_stat", "p_val"]
    summary_df = summary_df[reordered_cols]
    return summary_df

# Summarize final results

In [9]:
models = ["lstm", "resnet18", "transformer"]
# platforms = ["cuda_a100", "cuda", "npu"]
platforms = ["cuda"]
dataset_sizes = {
    "lstm": imdb_size,
    "transformer": imdb_size,
    "resnet18": mnist_size
}
platform_names = {
    "cuda_a100": "A100",
    "cuda": "4060",
    "npu": "NPU",
}

metrics_map = {
    "energy_per_param_fJ": "Energy per synaptic event fJ",
    "energy_per_vector_uJ": "Energy per input vector uJ",
    "power_W": "Power W",
    "throughput": "Throughput vectors/second",
    "power_per_param_nW": "Power per synapse nW",
    "params": "Number of synapses",
    "dataset_size": "Dataset size",
    "train_duration_s": "Traning duration sec",
    "epochs": "Number of epochs",
    "runs_std": "Standard Deviation",
    "f_stat": "F-statistic",
    "p_val": "P-value",
}
cols_to_keep = [
    'energy_per_param_fJ',
    'energy_per_vector_uJ',
    'power_W',
    'throughput',
    'power_per_param_nW',
    'params',
    'dataset_size',
    'train_duration_s',
    'epochs',
    'runs_std',
    'f_stat',
    'p_val'
]

benchmark_filename = f"benchmarks/{models[0]}_benchmark_{platforms[0]}.csv"
dataset_size = dataset_sizes[models[0]]
df = get_summary_df(filename, dataset_size)[cols_to_keep]
results_df = pd.DataFrame(index=df.T.index)
runs = {}
for platform in platforms:
    for model in models:
        benchmark_filename = f"benchmarks/{model}_benchmark_{platform}.csv"
        dataset_size = dataset_sizes[model]
        experiment_name = f"{model.upper()}_{platform_names[platform]}"
        
        runs_df = get_summary_df(benchmark_filename, dataset_size, summarize=False)
        runs[experiment_name] = runs_df

        summary_df = get_summary_df(benchmark_filename, dataset_size)[cols_to_keep]
        results_df[experiment_name] = summary_df.T

results_df.rename(index=metrics_map, inplace=True)
results_df.index.name = "NN Architecture"
with pd.ExcelWriter("results.xlsx") as writer:
    results_df.to_excel(writer, sheet_name="Summary")
    for run in runs:
        runs[run].to_excel(writer, sheet_name=run, index=False)
