In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from datetime import date, timedelta

import urllib.request
from urllib.error import HTTPError

import re

from enum import StrEnum

In [None]:
class TrackingFlavor(StrEnum):
    default = "tracking"
    fast = "fasttracking"
    acts = "actstracking"

def get_url(d, flavor):
    date_str = d.strftime("%d-%m-%Y")
    base = f"https://atlaspmb.web.cern.ch/atlaspmb/spot-mon-phase2_recoonly_{flavor}/pages/logs"
    binary = "main-x86_64-el9-gcc13-opt"
    job = f"phase2_recoonly_{flavor}-RAWtoALL"
    if d < date.fromisoformat("2024-03-01"):
        base += "/archive_centos7"
        binary = "main-x86_64-centos7-gcc11-opt"
    return f"{base}/{date_str}-{binary}-{job}"

get_url(date.today(), TrackingFlavor.default)

In [None]:
def parse_spot(txt):
    p = re.compile(r"\d\d:\d\d:\d\d PerfMonMTSvc\s+INFO (\w+?)\s+(\d+)\s+(\d+(?:\.\d*)?)\s+(\d+(?:\.\d*)?)\s+(\d+(?:\.\d*)?)\s+(.+)")
    columns = ["step", "count", "cpu", "vmem", "malloc", "component"]
    data = []

    for line in txt.splitlines():
        m = p.match(line)
        if not m:
            continue
        data.append(m.groups())

    result = pd.DataFrame(data, columns=columns)
    result[["count"]] = result[["count"]].astype(int)
    result[["cpu", "vmem", "malloc"]] = result[["cpu", "vmem", "malloc"]].astype(float)
    return result

def load_and_parse_spot(d, flavor):
    url = get_url(d, flavor)
    f = urllib.request.urlopen(url)
    txt = f.read().decode("utf-8")
    return parse_spot(txt)

load_and_parse_spot(date.fromisoformat("2024-05-26"), TrackingFlavor.default)

In [None]:
def date_range(start, end, step=1):
    result = [start]
    while result[-1] < end:
        result.append(result[-1] + timedelta(days=step))
    return result

def convert_point(d, p):
    return {
        "date": d,
        "cpu": p["cpu"] / p["count"] * 1e-3,
        "vmem": p["vmem"] * 1e-3,
        "malloc": p["malloc"] * 1e-3,
    }

def sum_two_points(a, b):
    if a["date"] != b["date"]:
        raise Exception("mixed dates")
    return {
        "date": a["date"],
        "cpu": a["cpu"] + b["cpu"],
        "vmem": a["vmem"] + b["vmem"],
        "malloc": a["malloc"] + b["malloc"],
    }

def null_point(d):
    return {
        "date": d,
        "cpu": None,
        "vmem": None,
        "malloc": None,
    }

def extract_athena_track_finding_point(data):
    data = data[(data["component"] == "ITkSiSpTrackFinder")]
    if len(data) == 0:
        raise Exception("not found")
    return data.iloc[0]

def extract_acts_pixel_seeding_point(data):
    data = data[((data["component"] == "ActsPixelSeedingAlg") | (data["component"] == "ActsBenchmarkSpotPixelSeedingAlg"))]
    if len(data) == 0:
        raise Exception("not found")
    return data.iloc[0]

def extract_acts_strip_seeding_point(data):
    data = data[((data["component"] == "ActsStripSeedingAlg") | (data["component"] == "ActsBenchmarkSpotStripSeedingAlg"))]
    if len(data) == 0:
        raise Exception("not found")
    return data.iloc[0]

def extract_acts_ckf_point(data):
    data = data[((data["component"] == "ActsTrackFindingAlg") | (data["component"] == "ActsBenchmarkSpotTrackFindingAlg"))]
    if len(data) == 0:
        raise Exception("not found")
    return data.iloc[0]

def extract_athena_points(d, data):
    null = null_point(d)

    try:
        track_finding_point = convert_point(d, extract_athena_track_finding_point(data))
    except Exception as e:
        track_finding_point = null
        print(d, url, "no track finding data")

    return track_finding_point

def extract_acts_points(d, data):
    null = null_point(d)

    try:
        pixel_seeding_point = convert_point(d, extract_acts_pixel_seeding_point(data))
    except Exception as e:
        pixel_seeding_point = null
        print(d, url, "no pixel seeding data")

    try:
        strip_seeding_point = convert_point(d, extract_acts_strip_seeding_point(data))
    except Exception as e:
        strip_seeding_point = null
        print(d, url, "no strip seeding data")

    if pixel_seeding_point is not null and strip_seeding_point is not null:
        seeding_point = sum_two_points(pixel_seeding_point, strip_seeding_point)
    else:
        seeding_point = null

    try:
        ckf_point = convert_point(d, extract_acts_ckf_point(data))
    except Exception as e:
        ckf_point = null
        print(d, url, "no ckf data")

    if seeding_point is not null and ckf_point is not null:
        track_finding_point = sum_two_points(seeding_point, ckf_point)
    else:
        track_finding_point = null

    return pixel_seeding_point, strip_seeding_point, seeding_point, ckf_point, track_finding_point

start_date = date.fromisoformat("2023-11-01")
end_date = date.fromisoformat("2024-05-26")
date_list = date_range(start_date, end_date)

default_track_finding_data = []
fast_track_finding_data = []
acts_pixel_seeding_data = []
acts_strip_seeding_data = []
acts_seeding_data = []
acts_ckf_data = []
acts_track_finding_data = []

for d in date_list:
    try:
        default_url = get_url(d, TrackingFlavor.default)
        default_f = urllib.request.urlopen(default_url)
        default_txt = default_f.read().decode("utf-8")

        default_spot_data = parse_spot(default_txt)
        default_spot_exec_data = default_spot_data[default_spot_data["step"] == "Execute"]

        default_track_finding_point = extract_athena_points(d, default_spot_exec_data)

        default_track_finding_data.append(default_track_finding_point)
    except HTTPError as e:
        print(d, default_url, e)
        continue

    try:
        fast_url = get_url(d, TrackingFlavor.fast)
        fast_f = urllib.request.urlopen(fast_url)
        fast_txt = fast_f.read().decode("utf-8")

        fast_spot_data = parse_spot(fast_txt)
        fast_spot_exec_data = fast_spot_data[fast_spot_data["step"] == "Execute"]

        fast_track_finding_point = extract_athena_points(d, fast_spot_exec_data)

        fast_track_finding_data.append(fast_track_finding_point)
    except HTTPError as e:
        print(d, fast_url, e)
        continue

    try:
        acts_url = get_url(d, TrackingFlavor.acts)
        acts_f = urllib.request.urlopen(acts_url)
        acts_txt = acts_f.read().decode("utf-8")

        acts_spot_data = parse_spot(acts_txt)
        acts_spot_exec_data = acts_spot_data[acts_spot_data["step"] == "Execute"]

        acts_pixel_seeding_point, acts_strip_seeding_point, acts_seeding_point, acts_ckf_point, acts_track_finding_point = extract_acts_points(d, acts_spot_exec_data)

        acts_pixel_seeding_data.append(acts_pixel_seeding_point)
        acts_strip_seeding_data.append(acts_strip_seeding_point)
        acts_seeding_data.append(acts_seeding_point)
        acts_ckf_data.append(acts_ckf_point)
        acts_track_finding_data.append(acts_track_finding_point)
    except HTTPError as e:
        print(d, acts_url, e)

default_track_finding_data = pd.DataFrame(default_track_finding_data).dropna()
fast_track_finding_data = pd.DataFrame(fast_track_finding_data).dropna()
acts_pixel_seeding_data = pd.DataFrame(acts_pixel_seeding_data).dropna()
acts_strip_seeding_data = pd.DataFrame(acts_strip_seeding_data).dropna()
acts_seeding_data = pd.DataFrame(acts_seeding_data).dropna()
acts_ckf_data = pd.DataFrame(acts_ckf_data).dropna()
acts_track_finding_data = pd.DataFrame(acts_track_finding_data).dropna()

In [None]:
def piecewise_const(x, y, edges):
    result_x = []
    result_y = []

    for a, b in edges:
        mask = (x >= a) & (x <= b)
        result_x.append(x[mask].min())
        result_y.append(y[mask].mean())
        result_x.append(x[mask].max())
        result_y.append(y[mask].mean())

    return result_x, result_y

In [None]:
acts_edges = list(map(lambda x: tuple(date.fromisoformat(xi) for xi in x), [
    ("2023-11-01", "2023-11-07"),
    ("2023-11-08", "2023-11-12"),
    ("2023-11-13", "2023-11-23"),
    ("2023-11-24", "2023-11-28"),
    ("2023-11-29", "2024-02-07"),
    ("2024-02-08", "2024-02-14"),
    ("2024-02-15", "2024-02-23"),
    ("2024-03-01", "2024-03-12"),
    ("2024-03-13", "2024-03-17"),
    ("2024-03-18", "2024-04-02"),
    ("2024-04-03", "2024-04-04"),
    ("2024-04-05", "2024-04-12"),
    ("2024-04-13", "2024-04-22"),
    ("2024-04-23", "2024-04-29"),
    ("2024-04-30", "2024-05-16"),
    ("2024-05-17", "2024-05-23"),
    ("2024-05-24", "2024-05-31"),
]))

acts_track_finding_data_fit = pd.DataFrame({
    "date": piecewise_const(acts_track_finding_data["date"], acts_track_finding_data["cpu"], acts_edges)[0],
    "cpu": piecewise_const(acts_track_finding_data["date"], acts_track_finding_data["cpu"], acts_edges)[1],
})

In [None]:
plt.figure(figsize=(15, 10))

plt.title("Track Finding CPU")
plt.ylabel("CPU time per event [s]")
plt.xticks(rotation=45, ha="right")

plt.plot(default_track_finding_data["date"], default_track_finding_data["cpu"], ".--", label="Athena Default Track Finding")
plt.plot(fast_track_finding_data["date"], fast_track_finding_data["cpu"], ".--", label="Athena Fast Track Finding")
plt.plot(acts_track_finding_data["date"], acts_track_finding_data["cpu"], ".--", label="Acts-based Track Finding")
plt.plot(acts_track_finding_data_fit["date"], acts_track_finding_data_fit["cpu"], ".--", label="Acts-based Track Finding fit")
#plt.plot(acts_seeding_data["date"], acts_seeding_data["cpu"], ".--", label="Acts-based Seeding")
#plt.plot(acts_ckf_data["date"], acts_ckf_data["cpu"], ".--", label="Acts-based CKF")

#plt.annotate("some detail", xy=acts_track_finding_data_fit.iloc[2][["date", "cpu"]], xy)

plt.legend();

In [None]:
plt.figure(figsize=(15, 10))
plt.plot(default_track_finding_data["date"], default_track_finding_data["vmem"], ".--", label="Athena Default Track Finding")
plt.plot(fast_track_finding_data["date"], fast_track_finding_data["vmem"], ".--", label="Athena Fast Track Finding")
plt.plot(acts_track_finding_data["date"], acts_track_finding_data["vmem"], ".--", label="Acts-based Track Finding")
plt.title("Track Finding Memory")
plt.ylabel("Memory [mb]")
plt.legend()
plt.xticks(rotation=45, ha="right");