In [1]:
import json
import os, statistics, math

def extract_step_to_acc(path: str):
    """
    Read a raw metrics file and return {step: test_acc} from even-numbered lines only.
    - 1-based line numbering (keep only even lines)
    - JSON parse; keep only split == 'test'
    - Map: step -> acc (latest occurrence wins if duplicated)
    """
    result = {}
    with open(path, "r", encoding="utf-8") as f:
        for i, line in enumerate(f, start=1):  # 1-based
            line = line.strip()
            if not line or (i % 2 != 0):  # skip odd lines
                continue
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                continue
            if obj.get("split") != "test":
                continue
            step = obj.get("step")
            acc = obj.get("acc")
            if step is not None and acc is not None:
                result[step] = acc  # latest one wins
    return result

In [2]:
def summarize_by_step(root='.', start=1, end=53, prefix='client_', ext='.raw',
                      variance='population', step_keys=None, require_all=False):
    """
    Aggregate across clients PER STEP and return TWO dictionaries:
      - mean_by_step: {step: mean_test_acc_over_clients}
      - var_by_step:  {step: variance_test_acc_over_clients}

    Parameters:
      - variance: 'population' (statistics.pvariance) | 'sample' (statistics.variance)
      - step_keys: optional iterable of steps to enforce (e.g., [0,10,20,...,100]).
                   If None, uses the union of steps observed across clients.
      - require_all: if True, only include a step if ALL clients have a value for it.
                     If False, compute from available values.

    Notes:
      - Uses extract_step_to_acc(path) from earlier cell (even-line, split=='test').
      - Missing files or missing steps are ignored per 'require_all' policy.
    """
    # Collect per-step lists of accuracies across clients
    accs_by_step = {}
    client_count = 0
    for i in range(start, end + 1):
        path = os.path.join(root, f"{prefix}{i:03d}{ext}")
        try:
            d = extract_step_to_acc(path)
        except FileNotFoundError:
            # Missing client file; skip
            continue
        client_count += 1
        for step, acc in d.items():
            accs_by_step.setdefault(step, []).append(acc)

    # Decide which steps to include
    if step_keys is None:
        steps = sorted(accs_by_step.keys())
    else:
        steps = list(step_keys)

    mean_by_step = {}
    var_by_step  = {}
    for step in steps:
        vals = accs_by_step.get(step, [])
        if not vals:
            continue  # no data at this step
        if require_all and len(vals) != client_count:
            # Skip this step because not all clients provided it
            continue
        m = sum(vals) / len(vals)
        if variance == 'population':
            v = statistics.pvariance(vals)
        elif variance == 'sample':
            v = statistics.variance(vals) if len(vals) > 1 else float('nan')
        else:
            raise ValueError("variance must be 'population' or 'sample'")
        mean_by_step[step] = m
        var_by_step[step]  = v
    return mean_by_step, var_by_step

In [3]:
mean_dict, var_dict = summarize_by_step(root='.', start=1, end=53)
print(mean_dict)
print(var_dict)

{0: 0.5660377358490565, 10: 0.5438679245283018, 20: 0.5547169811320753, 30: 0.5570754716981133, 40: 0.5570754716981133, 50: 0.5570754716981132, 60: 0.57688679245283, 70: 0.5702830188679244, 80: 0.5735849056603772, 90: 0.5712264150943397, 100: 0.5759433962264152}
{0: 0.007054111783552865, 10: 0.005634567461730153, 20: 0.005732467070131719, 30: 0.006683428266286935, 40: 0.005645692417230332, 50: 0.006565503737985048, 60: 0.005562477750088998, 70: 0.005190014239943042, 80: 0.005505072979708081, 90: 0.006801797792808828, 100: 0.004881185475258099}


In [5]:
import os
import math

def step0_by_client(root='.', start=1, end=53, prefix='client_', ext='.raw'):
    """
    ./client_001.raw ~ ./client_053.raw을 순회해
    {클라번호: step0의 test acc} 딕셔너리를 반환.
    값이 없거나 파일이 없으면 NaN.
    """
    out = {}
    for i in range(start, end + 1):
        path = os.path.join(root, f"{prefix}{i:03d}{ext}")
        try:
            d = extract_step_to_acc(path)   # {step: acc}
        except FileNotFoundError:
            out[i] = float('nan')
            continue
        out[i] = d.get(0, float('nan'))
    return out

# 사용 예:
step0 = step0_by_client(root='.', start=1, end=53)
print(step0)

{1: 0.625, 2: 0.55, 3: 0.6, 4: 0.325, 5: 0.625, 6: 0.425, 7: 0.75, 8: 0.575, 9: 0.6, 10: 0.475, 11: 0.575, 12: 0.625, 13: 0.55, 14: 0.625, 15: 0.65, 16: 0.575, 17: 0.575, 18: 0.7, 19: 0.425, 20: 0.625, 21: 0.525, 22: 0.425, 23: 0.6, 24: 0.275, 25: 0.55, 26: 0.625, 27: 0.625, 28: 0.575, 29: 0.625, 30: 0.525, 31: 0.55, 32: 0.55, 33: 0.625, 34: 0.55, 35: 0.575, 36: 0.65, 37: 0.6, 38: 0.525, 39: 0.5, 40: 0.525, 41: 0.6, 42: 0.65, 43: 0.575, 44: 0.675, 45: 0.475, 46: 0.625, 47: 0.525, 48: 0.6, 49: 0.5, 50: 0.525, 51: 0.575, 52: 0.65, 53: 0.55}
