In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import glob
import re
from pandas.errors import EmptyDataError
import numpy as np

%matplotlib inline
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

plt.rcParams.update({'font.size': 20})

In [None]:
# device = "orin_agx"
# device = "orin_nano"
device = "**"
tdp = "*w"

In [None]:
import os

model_dirs = []
for app in ["MLCChat", "LLMFarmEval", "LlamaCpp"]:
    if app == "MLCChat":
        model = "**/"
    else:
        model = "**/**"
    model_dirs.extend(glob.glob(f"../experiment_outputs/{device}/{app}/{model}/{tdp}/"))

app = "**"
model_dirs = sorted(model_dirs)
print(f"{len(model_dirs)} found!")
for model_dir in model_dirs[:]:
    print(model_dir)


In [None]:
model_dirs[0]

In [None]:
def generate_stats_plots(dir):
    for d in dir:
        # print(d)
        if not os.path.exists(os.path.join(d, "results_model_inference_measurements.csv")):
            print(d)
            !python ../src/report_performance.py -p {d}
generate_stats_plots(model_dirs[:])

In [None]:
files = []
files.extend(glob.glob(f"../experiment_outputs/{device}/{app}/**/{tdp}/results_model_inference_measurements.csv"))
files.extend(glob.glob(f"../experiment_outputs/{device}/{app}/**/**/{tdp}/results_model_inference_measurements.csv"))
files

In [None]:
dfs = []
for f in files:
    print(f)
    le_device = re.search("experiment_outputs/(.*?)/(.*?)/.*/(.*?w)/", f).group(1)
    application = re.search("experiment_outputs/(.*?)/(.*?)/.*/(.*?w)/", f).group(2)
    tdp = re.search("experiment_outputs/(.*?)/(.*?)/.*/(.*?w)/", f).group(3)
    model_regex = f"{application}/(.*)/\d+w/"
    context_size = 2048
    max_gen_len = 512
    try:
        df = pd.read_csv(f)
        df.replace([np.inf, -np.inf], np.nan, inplace=True)
        print(f"dropping {df.isna().sum().sum()} NaNs")
        df.dropna(inplace=True)
        df["context_size"] = context_size
        df["max_gen_len"] = max_gen_len
        df["batch_size"] = 1024
        full_model_name = re.search(model_regex, f).group(1)
        model_name = full_model_name.split('/')[0].split('-q')[0]
        df["model"] = model_name
        df["quantisation"] = full_model_name.split("-")[-1].split('.')[0]
        df["device"] = le_device
        df["app"] = application
        df['tdp'] = tdp
        dfs.append(df)
    except EmptyDataError as e:
        print(f"Empty file: {f}")
        continue

df = pd.concat(dfs)
grouped_df = df.groupby(["device", "tdp", "app", "model", "quantisation"])
grouped_df_tps = grouped_df.mean()
grouped_df_tps["tps_std"] = grouped_df[['tps']].std()
grouped_df_tps = grouped_df_tps[["tps", "tps_std"]].reset_index()
display(grouped_df_tps[:5])

# These are only accessible when you have energy measurements from orin agx and nano, otherwise pick your columns.
energy_cols = ["energy_pt VDD_GPU_SOC (mWh)",
               "energy_pt VDD_CPU_CV (mWh)",
               "energy_pt VIN_SYS_5V0 (mWh)",
               "energy_pt NC (mWh)",
               "energy_pt VDDQ_VDD2_1V8AO (mWh)",
               "energy_pt VDD_IN (mWh)",
               "energy_pt VDD_CPU_GPU_CV (mWh)",
               "energy_pt VDD_SOC (mWh)"]
discharge_cols = [col.replace("energy", "discharge") for col in energy_cols]
all_energy_cols = energy_cols #+ discharge_cols
relevant_energy_cols = ["input_tokens", "output_tokens", "tps"] + all_energy_cols

df_energy = df
for col in all_energy_cols:
    if col in df_energy.columns:
        new_col = col.replace("pt", "total")
        total_energy = df_energy[col] * df_energy["output_tokens"]
        df_energy[new_col] = total_energy
display(df_energy[:5])

# Pick the columns that you have populated here.
# df_energy["total_energy"] = df_energy[[col for col in df_energy.columns if col.startswith("energy_total")]].sum(axis=1)
df_energy["total_energy"] = df_energy[["energy_pt VDD_GPU_SOC (mWh)", "energy_pt VDD_CPU_CV (mWh)",
                                       "energy_pt VDD_CPU_GPU_CV (mWh)", "energy_pt VDD_SOC (mWh)"]].sum(axis=1)
# df_energy["total_energy"] = df_energy[["energy_pt VIN_SYS_5V0 (mWh)", "energy_pt VDD_IN (mWh)"]].sum(axis=1)

df_energy_grouped = df_energy.groupby(["device", "tdp", "app", "model", "quantisation", "iteration"])[["total_energy", "output_tokens"]]
grouped_df_energy = df_energy_grouped.sum()
grouped_df_energy["ept"] = grouped_df_energy["total_energy"] / grouped_df_energy["output_tokens"]
ggde = grouped_df_energy.reset_index().groupby(["device", "tdp", "app", "model", "quantisation"])
grouped_df_energy_total = ggde[["total_energy", "ept", "output_tokens"]].mean()
grouped_df_energy_total = pd.concat([
        grouped_df_energy_total,
        ggde[["total_energy", "ept", "output_tokens"]].std().rename(columns={"total_energy": "total_energy_std", "ept": "ept_std", "output_tokens": "output_tokens_std"})
    ], axis=1)

display(grouped_df_energy_total[:5])