# Analysis of MLPF inference results

In [None]:
pwd

In [None]:
import numpy as np
from pathlib import Path
import json
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd

In [None]:
result_dir = '/mnt/ceph/users/ewulff/particleflow/inference_tests/'
result_files = list(Path(result_dir).glob("*"))

In [None]:
for f in result_files:
    print(f)

In [None]:
file = result_files[0]

In [None]:
with file.open("r", encoding="utf-8") as f:
    data = json.load(f)
results = data["results"]

In [None]:
def result2vars(result):
    mean_throughput = result["wl-scores"]["mean_throughput"]
    mean_run_time = result["wl-scores"]["mean_run_time"]
    batch_size = result["wl-stats"]["batch_size"]
    events_per_run = result["wl-stats"]["events_per_run"]
    return mean_throughput, mean_run_time, batch_size, events_per_run

In [None]:
mean_throughput, mean_run_time, batch_size, events_per_run = result2vars(results[0])
mean_throughput, mean_run_time, batch_size, events_per_run

In [None]:
def file2nodename(file):
    return file.stem.split("_")[-1]

In [None]:
def file2lists(file):
    with file.open("r", encoding="utf-8") as f:
        data = json.load(f)
    results = data["results"]

    mean_throughputs = []
    mean_run_times = []
    batch_sizes = []
    for result in results:
        mean_throughput, mean_run_time, batch_size, events_per_run = result2vars(result)
        mean_throughputs.append(mean_throughput)
        mean_run_times.append(mean_run_time)
        batch_sizes.append(batch_size)
    sorted_indices = np.argsort(batch_sizes)
    return np.array(mean_throughputs)[sorted_indices], np.array(mean_run_times)[sorted_indices], np.array(batch_sizes)[sorted_indices]

In [None]:
mpl.rc_file("my_matplotlib_rcparams.txt")

In [None]:
def file2plot_throughput(file):
    mean_throughputs, mean_run_times, batch_sizes = file2lists(file)
    p = plt.plot(batch_sizes, mean_throughputs, label=file2nodename(file), marker="D")
    linear = mean_throughputs[0] * np.array([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024])
    plt.plot(batch_sizes, linear, linestyle="--", color=p[-1].get_color())
    return batch_sizes
def file2plot_runtime(file):
    mean_throughputs, mean_run_times, batch_sizes = file2lists(file)
    plt.plot(batch_sizes, mean_run_times, label=file2nodename(file), marker="D")

In [None]:
for file in result_files:
    batch_sizes = file2plot_throughput(file)
    plt.legend(loc=(0.65, 0.1))
    plt.xlabel("Batch size")
    plt.ylabel("Throughput [events/s]")
    # plt.title("Tests on " + file2nodename(file))
    plt.xscale('log')
    plt.yscale('log')
    plt.xticks(batch_sizes, labels=batch_sizes)
    plt.yticks([1, 10, 100, 1000])
    plt.ylim((10, 1000))
    plt.xlim((0, None))
plt.show()

In [None]:
data = {}
for file in result_files:
    mean_throughputs, mean_run_times, batch_sizes = file2lists(file)
    nodename = file2nodename(file)
    data[nodename] = mean_throughputs
    
df = pd.DataFrame(data=data, index=["batch_size={:d}".format(x) for x in batch_sizes])
df = df.reindex(sorted(df.columns), axis=1)
df

In [None]:
for file in result_files:
    file2plot_runtime(file)
    plt.legend(loc=(0.65, 0.6))
    plt.xlabel("Batch size")
    plt.ylabel("Runtime [s]")
    # plt.title("Tests on " + file2nodename(file))
    plt.xscale('log')
    plt.yscale('log')
    plt.xticks(batch_sizes, labels=batch_sizes)
plt.show()