In [None]:
cd ../related_post_gen/python

In [None]:
from subprocess import run, PIPE, STDOUT
from shutil import which
import sys
from glob import iglob
import pandas as pd
from io import StringIO
from tqdm.notebook import tqdm

In [None]:
def python(*args, **kwargs):
    result = run([sys.executable, *args], text=True, capture_output=True, **kwargs)
    if result.stderr:
        print(result.stderr)
    result.check_returncode()
    return result.stdout
print(python("--version"))

In [None]:
def git(*args, **kwargs):
    result = run([which("git"), *args], text=True, capture_output=True, **kwargs)
    if result.returncode:
        print(result.stderr)
    result.check_returncode()
    return result.stdout
print(git("show", "-s"))

In [None]:
def k_input_columns(k):
    assert k in {5, 6}
    columns = ["counter", "import", "compile", "input", "process", "output"]
    if k == 5:
        return columns[:2] + columns[3:]
    return columns

In [None]:
TIME_COLUMNS = ["import", "input", "process", "output"]

In [None]:
FILES = ["related.py", "related_np.py", "related_numba.py"]

In [None]:
# from commit that added detiled times to target branch
COMMITS = "0ca6f7eeb7329223976e857036786dbabdfa3898..numba2"

In [None]:
N = 20

In [None]:
def profile_file(f, n=N):
    print(f)
    # warm-up
    print(python(f))
    print(python(f, env={"DETAILED_PYTHON_PERF": "1"}))
    # measure
    buf = "\n".join(python(f, env={"DETAILED_PYTHON_PERF": "1"}).strip() 
                    for _ in tqdm(range(n)))
    record = pd.read_csv(StringIO(buf), sep="\t", header=None)
    record.columns = k_input_columns(len(record.columns))
    record["total"] = record.iloc[:, 1:].sum(axis=1)
    record.insert(0, "impl", f)
    return record

In [None]:
def profile_file_history(f, commits=COMMITS, n=N):
    data = []
    for i, h in enumerate(git("rev-list", "--reverse", commits, "--", f).splitlines()):
        git("checkout", h)
        subj = git("show", "-s", "--format=%s").strip()
        subj = f"{i:02}: {subj:.18}…"
        print(subj)
        record = profile_file(f, n)
        record.insert(1, "commit", subj)
        data.append(record)
    return pd.concat(data)


In [None]:
times = pd.concat(profile_file_history(f) for f in FILES) 

In [None]:
if "compile" in times:
    times.insert(4, "compile", times.pop("compile").fillna(0))

In [None]:
from uncertainties import ufloat

def pretty_stddev(df: pd.DataFrame):
    return (1000 * df).apply(lambda c: f"{ufloat(c.mean(), c.std()):.2ufP}")

table = times.groupby(["impl", "counter", "commit"]).apply(pretty_stddev)
table

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

tidy = times.melt(id_vars=["impl", "commit", "counter"],
                  var_name="section", value_name="time")
grid = sns.catplot(tidy, hue="section", y="commit", x="time",
                   row="impl", col="counter", kind="point",
                  sharex=False, sharey=False)


In [None]:
def stddev(df: pd.DataFrame):
    return (1000 * df).apply(lambda c: ufloat(c.mean(), c.std()))

final_table = times.groupby(["counter", "impl", "commit"]).apply(stddev) \
    .groupby(level=[0,1]).last() \
    .reset_index() \
    .sort_values(["counter", "process"]) \
    .set_index(["counter", "impl"]) \
    .map(lambda v: f"{v:.2ufP}")
final_table

In [None]:
order = final_table.loc["perf_counter"].index
finals = times[["impl", "counter", "process", "total"]] \
    .groupby(["impl", "counter"]).last().reset_index() \
    .melt(id_vars=["impl", "counter"], var_name="section", value_name="time")
sns.catplot(finals, hue="section", y="impl", x="time",
            row="counter", kind="point",
            order=order)