In [1]:
import json
import os, statistics, math

def extract_step_to_acc(path: str):
    """
    Read a raw metrics file and return {step: test_acc} from even-numbered lines only.
    - 1-based line numbering (keep only even lines)
    - JSON parse; keep only split == 'test'
    - Map: step -> acc (latest occurrence wins if duplicated)
    """
    result = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, start=1):  # 1-based
            line = line.strip()
            if not line or (i % 2 != 0):  # skip odd lines
                continue
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                continue
            if obj.get("split") != "test":
                continue
            step = obj.get("step")
            acc = obj.get("acc")
            if step is not None and acc is not None:
                result[step] = acc  # latest one wins
    return result

In [2]:
def summarize_by_step(root='.', start=1, end=53, prefix='client_', ext='.raw',
                      variance='population', step_keys=None, require_all=False):
    """
    Aggregate across clients PER STEP and return TWO dictionaries:
      - mean_by_step: {step: mean_test_acc_over_clients}
      - var_by_step:  {step: variance_test_acc_over_clients}

    Parameters:
      - variance: 'population' (statistics.pvariance) | 'sample' (statistics.variance)
      - step_keys: optional iterable of steps to enforce (e.g., [0,10,20,...,100]).
                   If None, uses the union of steps observed across clients.
      - require_all: if True, only include a step if ALL clients have a value for it.
                     If False, compute from available values.

    Notes:
      - Uses extract_step_to_acc(path) from earlier cell (even-line, split=='test').
      - Missing files or missing steps are ignored per 'require_all' policy.
    """
    # Collect per-step lists of accuracies across clients
    accs_by_step = {}
    client_count = 0
    for i in range(start, end + 1):
        path = os.path.join(root, f"{prefix}{i:03d}{ext}")
        try:
            d = extract_step_to_acc(path)
        except FileNotFoundError:
            # Missing client file; skip
            continue
        client_count += 1
        for step, acc in d.items():
            accs_by_step.setdefault(step, []).append(acc)

    # Decide which steps to include
    if step_keys is None:
        steps = sorted(accs_by_step.keys())
    else:
        steps = list(step_keys)

    mean_by_step = {}
    var_by_step  = {}
    for step in steps:
        vals = accs_by_step.get(step, [])
        if not vals:
            continue  # no data at this step
        if require_all and len(vals) != client_count:
            # Skip this step because not all clients provided it
            continue
        m = sum(vals) / len(vals)
        if variance == 'population':
            v = statistics.pvariance(vals)
        elif variance == 'sample':
            v = statistics.variance(vals) if len(vals) > 1 else float('nan')
        else:
            raise ValueError("variance must be 'population' or 'sample'")
        mean_by_step[step] = m
        var_by_step[step]  = v
    return mean_by_step, var_by_step

In [3]:
mean_dict, var_dict = summarize_by_step(root='.', start=1, end=53)
print(mean_dict)
print(var_dict)

{0: 0.5320754716981133, 10: 0.5099056603773585, 20: 0.5136792452830189, 30: 0.5056603773584906, 40: 0.5150943396226416, 50: 0.5202830188679246, 60: 0.5188679245283018, 70: 0.5249999999999998, 80: 0.5240566037735849, 90: 0.518867924528302, 100: 0.5235849056603773}
{0: 0.007390975436098256, 10: 0.005927821288714844, 20: 0.006428444286222855, 30: 0.006430224279102884, 40: 0.007154236383054468, 50: 0.007430580277678889, 60: 0.005705322178711285, 70: 0.005377358490566038, 80: 0.006602883588465647, 90: 0.005186454254182983, 100: 0.00354752580989676}
