In [1]:
import os, glob, json, re
from collections import defaultdict

def best_test_acc_per_client(raw_dir="runs/mid_eval", pattern="client_*.raw"):
    paths = sorted(glob.glob(os.path.join(raw_dir, pattern)))
    if not paths:
        raise FileNotFoundError(f"No files matched: {os.path.join(raw_dir, pattern)}")
    
    best = {}  # {client_id: best_acc}
    for p in paths:
        # 파일명에서 client id 추출 (예: client_001.raw -> 1)
        m = re.search(r"client_(\d+)\.raw$", os.path.basename(p))
        file_client_id = int(m.group(1)) if m else None

        best_acc = None
        with open(p, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    rec = json.loads(line)
                except json.JSONDecodeError:
                    continue
                if rec.get("split") != "test":
                    continue
                # client id는 레코드에 있으면 그걸 우선 사용, 없으면 파일명에서 추출
                cid = int(rec.get("client", file_client_id))
                acc = float(rec.get("acc", float("nan")))
                if acc != acc:  # NaN check
                    continue
                if (best_acc is None) or (acc > best_acc):
                    best_acc = acc
        if best_acc is not None:
            best[cid] = best_acc
        else:
            # test가 전혀 없거나 전부 NaN이면 0.0 같은 기본값으로 넣고 싶다면 아래 주석 해제
            # best[file_client_id if file_client_id is not None else len(best)+1] = 0.0
            pass
    return best

best_dict = best_test_acc_per_client(raw_dir="raw/mid_eval", pattern="client_*.raw")
print(f"총 {len(best_dict)}명 수집됨")
# 확인용: client id 기준 정렬 출력
for cid in sorted(best_dict):
    print(f"client {cid:03d}: {best_dict[cid]:.6f}")


총 53명 수집됨
client 001: 0.775000
client 002: 0.650000
client 003: 0.600000
client 004: 0.625000
client 005: 0.750000
client 006: 0.650000
client 007: 0.700000
client 008: 0.575000
client 009: 0.725000
client 010: 0.875000
client 011: 0.625000
client 012: 0.700000
client 013: 0.750000
client 014: 0.725000
client 015: 0.675000
client 016: 0.675000
client 017: 0.650000
client 018: 0.775000
client 019: 0.750000
client 020: 0.750000
client 021: 0.725000
client 022: 0.725000
client 023: 0.750000
client 024: 0.625000
client 025: 0.600000
client 026: 0.750000
client 027: 0.650000
client 028: 0.825000
client 029: 0.775000
client 030: 0.675000
client 031: 0.700000
client 032: 0.650000
client 033: 0.675000
client 034: 0.775000
client 035: 0.700000
client 036: 0.675000
client 037: 0.650000
client 038: 0.700000
client 039: 0.625000
client 040: 0.675000
client 041: 0.700000
client 042: 0.750000
client 043: 0.725000
client 044: 0.700000
client 045: 0.675000
client 046: 0.750000
client 047: 0.700000
cli

In [2]:
print(best_dict.values())

dict_values([0.775, 0.65, 0.6, 0.625, 0.75, 0.65, 0.7, 0.575, 0.725, 0.875, 0.625, 0.7, 0.75, 0.725, 0.675, 0.675, 0.65, 0.775, 0.75, 0.75, 0.725, 0.725, 0.75, 0.625, 0.6, 0.75, 0.65, 0.825, 0.775, 0.675, 0.7, 0.65, 0.675, 0.775, 0.7, 0.675, 0.65, 0.7, 0.625, 0.675, 0.7, 0.75, 0.725, 0.7, 0.675, 0.75, 0.7, 0.725, 0.675, 0.55, 0.8, 0.75, 0.65])


In [3]:
print(sum(best_dict.values())/len(best_dict.values()))

0.6995283018867923
