In [1]:
import pandas as pd
import glob
import re
from pandas.errors import EmptyDataError
import numpy as np
import seaborn as sns

%matplotlib inline
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [None]:
# device = "iPhone_14_Pro"
# device = "Galaxy_S23"
# device = "iPhone_SE"
# device = "Pixel_6a"
device = "**"

In [None]:
import os

model_dirs = []
for app in ["MLCChat", "MLCChat++", "LLMFarmEval", "LlamaCpp"]:
    if app in ["MLCChat", "MLCChat++"]:
        model = "**/"
    else:
        model = "**/**"
    model_dirs.extend(glob.glob(f"../../experiment_outputs/{device}/{app}/{model}/run*"))

app = "**"
model_dirs = sorted(model_dirs)
print(f"{len(model_dirs)} found!")
for model_dir in model_dirs[:]:
    print(model_dir)


In [None]:
model_dirs[0]

In [None]:
def generate_stats_plots(dir):
    for d in dir:
        if not os.path.exists(os.path.join(d, "results_model_inference_measurements.csv")):
            print(d)
            !python ../..//src/tools/report-measurements-llms.py -p {d}
generate_stats_plots(model_dirs)

In [None]:
files = []
files.extend(glob.glob(f"../../experiment_outputs/{device}/{app}/**/run*/results_model_inference_measurements.csv"))
files.extend(glob.glob(f"../../experiment_outputs/{device}/{app}/**/**/run*/results_model_inference_measurements.csv"))

In [None]:
dfs = []
for f in files:
    application = re.search("experiment_outputs/.*?/(.*?)/", f).group(1)
    le_device = re.search("experiment_outputs/(.*?)/", f).group(1)
    if application in ["MLCChat", "MLCChat++"]:
        regex = "run_cs(\d+)_mgl(\d+)"
    else:
        regex = "run_cs(\d+)_mgl(\d+)_bs(\d+)"
    model_regex = f"{re.escape(application)}/(.*)/run"
    match = re.search(regex, f)
    if match:
        context_size = int(match.group(1))
        max_gen_len = int(match.group(2))
        if "MLCChat" not in application:
            batch_size = int(match.group(3))
        else:
            batch_size = -1
        try:
            df = pd.read_csv(f)
            df.replace([np.inf, -np.inf], np.nan, inplace=True)
            print(f"dropping {df.isna().sum().sum()} NaNs")
            df.dropna(inplace=True)
            df["context_size"] = context_size
            df["max_gen_len"] = max_gen_len
            df["batch_size"] = batch_size
            df["model"] = re.search(model_regex, f).group(1)
            df["device"] = le_device
            df["app"] = application
            dfs.append(df)
        except EmptyDataError as e:
            print(f"Empty file: {f}")
            continue

df = pd.concat(dfs)
df["discharge_per_token"] = df["discharge_pt (mAh)"] / df["output_tokens"]

def filter_per_quantile(df, value, q):
    r_quantile = df[value].quantile(q)
    l_quantile = df[value].quantile(1-q)
    # Filter values greater than the quantile value
    filtered_df = df[(df[value] > l_quantile) & (df[value] < r_quantile)]
    return filtered_df

grouped_df_all = df.groupby(["device", "model", "app", "iteration"])\
                    .apply(filter_per_quantile, "tps", 0.95)\
                    .reset_index(drop=True)\
                    .groupby(["device", "model", "app"])
grouped_df_tps = pd.concat([grouped_df_all.mean()[["tps", "prefill_tps", "input_tokens", "output_tokens"]],
                            grouped_df_all.std()[["tps", "prefill_tps", "input_tokens", "output_tokens"]].rename(columns={"tps": "std_tps",
                                                                                                                          "prefill_tps": "std_prefill_tps",
                                                                                                                          "input_tokens": "std_input_tokens",
                                                                                                                          "output_tokens": "std_output_tokens"})],
                            axis=1)


# grouped_df_all_energy = df.groupby(["device", "model", "app", "iteration"]).sum()[['energy_pt (mWh)', 'discharge_pt (mAh)', 'discharge_per_token']]
grouped_df_all_energy = df.groupby(["device", "model", "app", "iteration"])\
                            .apply(filter_per_quantile, "discharge_per_token", 0.95)\
                            .reset_index(drop=True)\
                            .groupby(["device", "model", "app", "iteration"])\
                            .sum()[['energy_pt (mWh)', 'discharge_pt (mAh)', 'discharge_per_token']]
# display(grouped_df_all_energy)

grouped_df_energy = grouped_df_all_energy.reset_index().groupby(["device", "model", "app"]).mean()[['energy_pt (mWh)', 'discharge_pt (mAh)', 'discharge_per_token']]
grouped_df_energy = pd.concat([grouped_df_energy,
                               grouped_df_all_energy.reset_index().groupby(["device", "model", "app"])[["energy_pt (mWh)", 'discharge_pt (mAh)', 'discharge_per_token']].std().rename(columns={"energy_pt (mWh)": "std_energy_pt (mWh)",
                                                                                                                                                                                               "discharge_pt (mAh)": "std_discharge_pt (mAh)",
                                                                                                                                                                                               "discharge_per_token": "std_discharge_per_token"})],
                              axis=1)

grouped_df_energy.reset_index(inplace=True)
grouped_df_tps.reset_index(inplace=True)
display(grouped_df_tps)
display(grouped_df_energy)