<!-- du4://thèse/cai/results.ipynb?d=20251024?loc=ttum?=hPa=1020 -->

# Conventional vs. Confidential Performances

In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from pathlib import Path
from typing import Dict, List

In [None]:
sns.set_theme(style="ticks", context="paper", font_scale=1.1)
sns.set_palette("colorblind")

In [None]:
# Parent folder containing the data
data_path = Path("data", "ko") # ← FIXME

In [None]:
class Experiment:
    def __init__(self, path: Path):
        self.path: Path = path
        self.name: str = path.stem
        self.conditions: Dict[str, Condition] = {}
        self._set_conditions()

    def _set_conditions(self):
        conditions = [p for p in self.path.iterdir() if p.is_dir()]
        assert len(conditions) > 0, "No condition found…"
        self.conditions.update(
            {c.stem: Condition(c) for c in conditions}
        )

    def get_all_conditions(self):
        return self.conditions.items()
    
    def get_condition(self, name: str):
        return self.conditions[name]
    
    def __str__(self):
        return f"{self.name}, {self.path.absolute()}, {self.conditions}"
        
class Condition:
    def __init__(self, path: Path):
        self.path: Path = path
        self.name: str = path.stem
        self.tee_on: bool # TODO
        self.dataset: str
        self.model: str
        self.input_length: int
        self.output_length: int
        self.concurrency: int
        self.temperature: float
        self.repetitions: List[Repetition] = []
        self._set_self()

    def _set_self(self):
        repetitions = list(self.path.glob("*repetition_*"))
        json_files  = sorted([r for r in repetitions if r.suffix == ".json"])
        csv_files   = sorted([r for r in repetitions if r.suffix == ".csv"])
        
        assert len(list(repetitions)) > 0, "Empty results"
        assert len(json_files) == len(csv_files), f"Mismatch: {len(json_files)} .json vs. {len(csv_files)} .csv"
        
        self.repetitions.extend(
            Repetition(idx, json_file, self.path / f"{json_file.stem}_power_metrics.csv")
            for idx, json_file in enumerate(json_files)
        )

    def get_all_repetitions(self):
        return self.repetitions
    
    def get_repetition(self, index: int):
        return self.repetitions[index]
    
    def __str__(self):
        return f"Condition: {self.name}, Path: {self.path}, Repetitions: {[r.index for r in self.repetitions]}"
    
    def __repr__(self):
        return self.__str__()


class Repetition:
    def __init__(self, index: int, vllm_json: Path, power_csv: Path):
        # Index
        self.index: int = index
        # Raw results
        self.vllm_json: Path = Path(vllm_json)
        self.power_csv: Path = Path(power_csv)
        # Parsed results
        self.vllm_results = json.loads(vllm_json.read_text())
        self.power_results = pd.read_csv(power_csv)
  
    def get_vllm_results(self):
        return self.vllm_results
    
    def get_power_results(self):
        return self.power_results

    def get_vllm_key(self, key: str):
        return self.vllm_results[key]
    
    def __str__(self):
        return f"Repetition(index={self.index}, json='{self.vllm_json.path}', csv='{self.power_csv.path}')"

# 0. Data summary

- Number of run
- Total accumulated time (+ estimated cost)

In [None]:
def format_seconds_long(seconds: float) -> str:
    h, remainder = divmod(int(seconds), 3600)
    m, s = divmod(remainder, 60)
    return f"{h:02d}:{m:02d}:{s:02d}"

def get_total_runtime(exp: Experiment):
    total_runtime = 0
    for condition_name, condition in exp.get_all_conditions():
        condition_runtime = 0
        repetitions = condition.get_all_repetitions()
        for r in repetitions:
            duration = r.get_vllm_key("duration")
            condition_runtime += duration
            total_runtime += duration
            print(condition_name, r.index, format_seconds_long(duration))
        print(condition_name, format_seconds_long(total_runtime))
    print("Total runtime", format_seconds_long(total_runtime))


In [None]:
exp_throughput_latency = Experiment(data_path.joinpath("experiment-1"))
get_total_runtime(exp_throughput_latency)

## 1. Throughput and Latency

In [None]:
def create_summary(exp: Experiment):
    """ Returs the summary of the experiments results """
    rows = [] # We build row by row
    for condition_name, condition in exp.get_all_conditions():
        repetitions = condition.get_all_repetitions()

        for rep in repetitions:
            # This is a row
            vllm_data = rep.get_vllm_results()
            assert all(e == "" for e in vllm_data.get("errors")), "vLLM reported an error. Check .json."
            rows.append({
                # Repetition
                "condition": condition_name,
                "repetition": rep.index,
                "duration": vllm_data.get("duration"),
                # Throughput
                "output throughput": vllm_data.get("output_throughput"),
                "total token throughput": vllm_data.get("total_token_throughput")
                # Latency
            })
    return pd.DataFrame(rows)

In [None]:
exp_throughput_latency = Experiment(data_path.joinpath("experiment-1"))
summary_throughput_latency = create_summary(exp_throughput_latency)


In [None]:
# Collect data for plotting
condition_names = []
mean_throughputs = []
stddev_throughputs = []

exp_throughput_latency = Experiment(data_path.joinpath("experiment-1"))
for condition_name, condition in exp_throughput_latency.get_all_conditions():
    all_output_throughputs = []

    for rep in condition.get_all_repetitions():
        vllm_data = rep.get_vllm_results()
        assert all(e == "" for e in vllm_data.get("errors")), "vLLM reported an error. Check .json."
        all_output_throughputs.append(vllm_data.get("output_throughput"))

    mean_output_throughput = np.mean(all_output_throughputs)
    std_dev_output_throughput = np.std(all_output_throughputs)

    condition_names.append(condition_name)
    mean_throughputs.append(mean_output_throughput)
    stddev_throughputs.append(std_dev_output_throughput)

# Plotting the bar chart with error bars
plt.figure(figsize=(4, 4))
plt.bar(condition_names, mean_throughputs, yerr=stddev_throughputs, capsize=5, alpha=0.7)
plt.xlabel("Model")
plt.ylabel("Mean Output Throughput (tok/s)")
plt.title("Outout throughput")
plt.tight_layout()
plt.show()


## 2. Saturation point

In [None]:
# vllm_data.get("max_concurrent_requests")

## 3. Sequence length overhead

## 4. Price of operations

## 5. Energy efficiency