In [16]:
# Cell 1: 기본 설정 & import

from pathlib import Path
from typing import List, Tuple, Dict
import re
import sys

import numpy as np
import torch
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

# -----------------------------
# 사용자 설정 영역
# -----------------------------

# 1) 여러 개의 이미지 루트 폴더 설정
#    - 각 폴더는 DMI/W4A8, SMI/W4A8 처럼
#      그 안에 DMI-iter-..., SMI-iter-... 폴더들이 들어있는 구조라고 가정
#
# 예시:
#   "/home/.../TBD_MI/dataset/deit_base_16_imagenet/DMI_iter/W4A8"
#   "/home/.../TBD_MI/dataset/deit_base_16_imagenet/SMI_iter/W4A8"
ROOT_CONFIG = [
    {
        "name": "DMI_W4A8",
        "root": Path("./dataset/deit_base_16_imagenet/DMI_iter/W4A8").expanduser().resolve(),
    },
    {
        "name": "SMI_W4A8",
        "root": Path("./dataset/deit_base_16_imagenet/SMI_iter/W4A8").expanduser().resolve(),
    },
    # 필요하면 여기에 더 추가
    # {
    #     "name": "AnotherMethod",
    #     "root": Path("/path/to/Another/W4A8").expanduser().resolve(),
    # },
]

# 2) 결과 저장 경로
SAVE_DIR = Path("./observation/noise_performance").expanduser().resolve()
SAVE_DIR.mkdir(parents=True, exist_ok=True)

# 3) noise 측정에 사용할 cutoff 비율 (도넛)
CUTOFF_RATIO = 0.8

# 4) (후에 채울 예정) 각 실험별 iter → accuracy 정보
ACC_DATA: Dict[str, Dict[int, float]] = {
    # "DMI_W4A8": {
    #     4000: 74.562, 3900: 75.212, 3800: 75.266, 3700: 74.882, 3600: 76.148, 3500: 75.220, 3400: 75.666, 3300: 75.898, 3200: 75.534, 3100: 75.626,
    #     3000:	75.498, 2900:	75.504, 2800:	75.400, 2700:	75.210, 2600:	75.506,	2500: 74.408, 2400: 75.752, 2300:	74.898, 2200:	75.762, 2100:	75.260,
    #     2000:	75.236, 1900:	75.900, 1800:	75.542, 1700:	75.586, 1600:	75.538, 1500:	75.812, 1400:	75.588, 1300:	74.874, 1200:	75.340, 1100:	75.374,
    #     1000:	75.110, 900:	75.250, 800:	75.320, 700:	74.272, 600:	74.898, 500:	75.572, 400:	75.140, 300:	75.788, 200:	75.468, 100:	76.376
    #     },
    # "SMI_w4A8": {
    #     4000: 77.616, 3800:	77.434, 3700:	77.718, 3600:	77.430, 3500:	77.398, 3400:	77.154, 3300:	77.534, 3200:	77.408, 3100:	77.404, 3000:	77.328,
    #     2900:	77.530, 2800:	77.302, 2700:	77.364, 2600:	77.110, 2500:	77.700, 2400:	77.128, 2300:	77.290, 2200:	77.160, 2100:	77.648, 2000:	77.470,
    #     1900:	77.822, 1800:	77.992, 1700:	77.546, 1600:	77.572, 1500:	77.760, 1400:	77.668, 1300:	77.456, 1200:	76.886, 1100:	76.204, 1000:	76.862,
    #     900:	77.822, 800:	77.992, 700:	77.546, 600:	77.572, 500:	77.760, 400:	77.668, 300:	77.456, 200:	76.886, 100:	76.204, 0: 76.862
    # },
}

# -----------------------------
# 정보 출력
# -----------------------------
print("[INFO] ROOT_CONFIG:")
for cfg in ROOT_CONFIG:
    print(f"  - {cfg['name']}: {cfg['root']}")

print(f"[INFO] SAVE_DIR     : {SAVE_DIR}")
print(f"[INFO] CUTOFF_RATIO : {CUTOFF_RATIO}")

if ACC_DATA:
    print("[INFO] ACC_DATA keys:", list(ACC_DATA.keys()))
else:
    print("[INFO] ACC_DATA is currently empty. (다음 셀에서 채울 예정)")


[INFO] ROOT_CONFIG:
  - DMI_W4A8: /home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/DMI_iter/W4A8
  - SMI_W4A8: /home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/SMI_iter/W4A8
[INFO] SAVE_DIR     : /home/jener05458/src/EdgeMI/TBD_MI/observation/noise_performance
[INFO] CUTOFF_RATIO : 0.8
[INFO] ACC_DATA is currently empty. (다음 셀에서 채울 예정)


In [17]:
# Cell 2: 여러 root 폴더에 대해 이미지 수집

# iteration 폴더 패턴
_iter_root_pattern = re.compile(r"^(DMI|SMI)-\d+-")

# 이미지 확장자
IMAGE_EXTENSIONS = ["*.png", "*.jpg", "*.jpeg"]

def find_images_in_root(root_path: Path) -> List[Path]:
    """
    주어진 root_path (예: DMI/W4A8 또는 SMI/W4A8) 내부에서
    DMI-iter / SMI-iter 폴더를 찾아
    그 안의 png/jpg/jpeg 이미지를 모두 반환한다.
    """

    if not root_path.exists():
        raise FileNotFoundError(f"[ERROR] 이미지 루트 폴더가 존재하지 않습니다: {root_path}")

    image_paths: List[Path] = []

    # 1) 하위 디렉토리 순회
    for subdir in sorted(root_path.iterdir()):
        if not subdir.is_dir():
            continue

        # 2) 폴더 이름이 DMI-iter-* or SMI-iter-* 인지 검사
        if _iter_root_pattern.match(subdir.name) is None:
            continue

        # 3) 해당 폴더에서 이미지 확장자 검색
        for ext in IMAGE_EXTENSIONS:
            imgs = sorted(subdir.rglob(ext))
            image_paths.extend(imgs)

    if len(image_paths) == 0:
        print(f"[WARN] {root_path} 내부에서 이미지를 찾지 못했습니다.")

    return image_paths


# ----------------------------------------------------
# ROOT_CONFIG 전체에 대해 이미지 수집
# ----------------------------------------------------

all_images: Dict[str, List[Path]] = {}

print("[INFO] Collecting images from ROOT_CONFIG ...")

for cfg in ROOT_CONFIG:
    name = cfg["name"]
    root = cfg["root"]
    imgs = find_images_in_root(root)
    all_images[name] = imgs

    print(f"[INFO] {name}: found {len(imgs)} images")

# preview
for k in all_images:
    print(f"\n=== {k} ===")
    sample = all_images[k][:5]
    for s in sample:
        print("  ", s)


[INFO] Collecting images from ROOT_CONFIG ...
[INFO] DMI_W4A8: found 1280 images
[INFO] SMI_W4A8: found 1280 images

=== DMI_W4A8 ===
   /home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/DMI_iter/W4A8/DMI-100-0-32-W4A8/0-0.png
   /home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/DMI_iter/W4A8/DMI-100-0-32-W4A8/0-1.png
   /home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/DMI_iter/W4A8/DMI-100-0-32-W4A8/0-10.png
   /home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/DMI_iter/W4A8/DMI-100-0-32-W4A8/0-11.png
   /home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/DMI_iter/W4A8/DMI-100-0-32-W4A8/0-12.png

=== SMI_W4A8 ===
   /home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/SMI_iter/W4A8/SMI-100-[50, 100, 200, 300]-[0.3, 0.3, 0.3, 0.3]-0-32-W4A8/0-0.png
   /home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/SMI_iter/W4A8/SMI-100-[50, 100, 200, 300]-[0.3, 0.3, 0.3, 0.3]-0-32-W4A8/0-1.png
   /hom

In [18]:
# Cell 3: 이미지 로딩 (torch.Tensor [C,H,W], 0~1)

from torchvision import transforms

def load_images(image_paths: List[Path]) -> List[Tuple[Path, torch.Tensor]]:
    """
    image_paths 리스트를 순회하며 이미지를 로드하여 텐서로 변환한다.
    반환 형식: [(Path, Tensor[C,H,W]), ...]
    """
    to_tensor = transforms.ToTensor()
    loaded: List[Tuple[Path, torch.Tensor]] = []

    for p in image_paths:
        try:
            img = Image.open(p).convert("RGB")
            tensor = to_tensor(img)  # [C,H,W], float32 in [0,1]
            loaded.append((p, tensor))
        except Exception as e:
            print(f"[WARN] Failed to load {p}: {e}", file=sys.stderr)

    if len(loaded) == 0:
        print("[WARN] 주어진 경로 리스트에서 로드에 성공한 이미지가 없습니다.")
    return loaded


# ----------------------------------------------------
# ROOT_CONFIG 전체에 대해 로딩
# ----------------------------------------------------

loaded_images: Dict[str, List[Tuple[Path, torch.Tensor]]] = {}

print("[INFO] Loading images as tensors ...")

for name, paths in all_images.items():
    print(f"[INFO] {name}: {len(paths)} files -> loading ...")
    imgs = load_images(paths)
    loaded_images[name] = imgs
    print(f"[INFO] {name}: successfully loaded {len(imgs)} images")

# 간단 확인: 각 method에서 첫 이미지 경로와 shape 출력
for name, imgs in loaded_images.items():
    if len(imgs) == 0:
        print(f"[WARN] {name}: no loaded images.")
        continue
    p0, t0 = imgs[0]
    print(f"[CHECK] {name}: first image = {p0}, shape = {tuple(t0.shape)}")


[INFO] Loading images as tensors ...
[INFO] DMI_W4A8: 1280 files -> loading ...
[INFO] DMI_W4A8: successfully loaded 1280 images
[INFO] SMI_W4A8: 1280 files -> loading ...
[INFO] SMI_W4A8: successfully loaded 1280 images
[CHECK] DMI_W4A8: first image = /home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/DMI_iter/W4A8/DMI-100-0-32-W4A8/0-0.png, shape = (3, 224, 224)
[CHECK] SMI_W4A8: first image = /home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/SMI_iter/W4A8/SMI-100-[50, 100, 200, 300]-[0.3, 0.3, 0.3, 0.3]-0-32-W4A8/0-0.png, shape = (3, 224, 224)


In [19]:
# Cell 4: iteration 숫자 추출 함수

# 예:
#   DMI-50-0-32-W4A8
#   SMI-100-[50,100,200,300]-[0.3,0.3,0.3,0.3]-0-32-W4A8
_iter_pattern = re.compile(r"^(DMI|SMI)-(\d+)-")

def extract_iter_from_path(img_path: Path) -> int:
    """
    이미지 파일이 속한 상위 폴더 이름에서 iteration 숫자를 추출한다.
    예:
      - DMI-50-0-32-W4A8  → 50
      - SMI-100-[...]     → 100
    """
    folder_name = img_path.parent.name
    m = _iter_pattern.match(folder_name)

    if m is None:
        raise ValueError(
            f"[ERROR] 폴더 이름에서 iteration을 파싱하지 못했습니다: '{folder_name}' (path={img_path})"
        )

    return int(m.group(2))


# ----------------------------------------------------
# 간단한 테스트
# ----------------------------------------------------
print("[TEST] Iteration extraction:")

for method_name, items in loaded_images.items():
    if len(items) == 0:
        continue
    
    sample_path = items[0][0]
    print(f"  method={method_name}, folder={sample_path.parent.name}")
    print("     iter =", extract_iter_from_path(sample_path))
    break  # 하나만 테스트


[TEST] Iteration extraction:
  method=DMI_W4A8, folder=DMI-100-0-32-W4A8
     iter = 100


In [20]:
# Cell 5: 고주파(high-frequency) 에너지 비율 계산 함수 (원형 도넛 annulus 방식)

def compute_high_freq_ratio(image_tensor: torch.Tensor, cutoff_ratio: float = 0.7) -> float:
    """
    image_tensor : torch.Tensor [C,H,W], float32 (0~1)
    cutoff_ratio : 내접원의 반지름 비율 (0~1)
                   예: 0.7 → 내접원의 0.7 * R_max 밖의 영역을 high-frequency로 계산

    반환값 : high-frequency 에너지 비율 (float)
    """

    # 1) RGB → grayscale (H,W)
    img_np = image_tensor.numpy()     # (C,H,W)
    gray = img_np.mean(axis=0)        # (H,W)

    H, W = gray.shape

    # 2) FFT → frequency magnitude
    fft = np.fft.fft2(gray)
    fft_shift = np.fft.fftshift(fft)
    mag = np.abs(fft_shift)

    # 3) radius map 생성
    y, x = np.indices((H, W))
    cy, cx = (H - 1) / 2.0, (W - 1) / 2.0
    r = np.sqrt((x - cx)**2 + (y - cy)**2)

    # 4) 내접원의 최대 반지름 (정사각형 모서리 제거)
    #    중심에서 가장 가까운 이미지 경계까지의 거리
    R_max = min(cy, H-1-cy, cx, W-1-cx)

    # high-frequency 시작 반지름
    cutoff_R = cutoff_ratio * R_max

    # 5) 도넛(annulus) 영역 마스크
    high_mask = (r >= cutoff_R) & (r <= R_max)

    # 6) 고주파 비율 계산
    total_energy = mag.sum()
    high_energy  = mag[high_mask].sum()

    eps = 1e-8
    return float(high_energy / (total_energy + eps))


# -------------------------------
# 간단 테스트
# -------------------------------
for method_name, items in loaded_images.items():
    if len(items) == 0:
        continue
    print(f"[TEST] {method_name}, sample = {items[0][0]}")
    test_ratio = compute_high_freq_ratio(items[0][1], cutoff_ratio=CUTOFF_RATIO)
    print("   high-frequency ratio =", test_ratio)
    break


[TEST] DMI_W4A8, sample = /home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/DMI_iter/W4A8/DMI-100-0-32-W4A8/0-0.png
   high-frequency ratio = 0.1989058914127248


In [21]:
# Cell 6: method × iteration 단위 noise 집계

from collections import defaultdict

# noise_data[method_name][iteration] = [noise1, noise2, ...]
noise_data: Dict[str, Dict[int, List[float]]] = defaultdict(lambda: defaultdict(list))

print("[INFO] Computing high-frequency noise for all methods...")

for method_name, img_list in loaded_images.items():

    print(f"\n[INFO] === {method_name} ===")
    if len(img_list) == 0:
        print(f"[WARN] {method_name}: no images loaded.")
        continue

    for img_path, img_tensor in img_list:
        try:
            it = extract_iter_from_path(img_path)
        except ValueError as e:
            print(f"[WARN] {e}")
            continue

        # noise 측정 (도넛 기반 high-frequency ratio)
        noise_val = compute_high_freq_ratio(img_tensor, cutoff_ratio=CUTOFF_RATIO)

        # iteration별 noise 리스트에 저장
        noise_data[method_name][it].append(noise_val)

    # summary 출력
    for it in sorted(noise_data[method_name].keys()):
        arr = np.array(noise_data[method_name][it], dtype=np.float32)
        print(f"  iter={it:4d} | N={len(arr):4d} | mean={arr.mean():.6f} | std={arr.std():.6f}")


# ------------------------------------------------------
# 각 method별 (iteration → mean noise, std noise)로 요약
# ------------------------------------------------------

noise_mean: Dict[str, Dict[int, float]] = defaultdict(dict)
noise_std: Dict[str, Dict[int, float]]  = defaultdict(dict)

for method_name in noise_data:
    for it, arr_list in noise_data[method_name].items():
        arr = np.array(arr_list, dtype=np.float32)
        noise_mean[method_name][it] = float(arr.mean())
        noise_std[method_name][it]  = float(arr.std())


print("\n[INFO] === noise_mean summary ===")
for method_name in noise_mean:
    print(f"\n{method_name}:")
    for it in sorted(noise_mean[method_name].keys()):
        print(f"  iter={it:4d}, mean_noise={noise_mean[method_name][it]:.6f}")


[INFO] Computing high-frequency noise for all methods...

[INFO] === DMI_W4A8 ===
  iter= 100 | N=  32 | mean=0.196469 | std=0.006520
  iter= 200 | N=  32 | mean=0.152781 | std=0.011748
  iter= 300 | N=  32 | mean=0.139443 | std=0.021316
  iter= 400 | N=  32 | mean=0.123157 | std=0.027504
  iter= 500 | N=  32 | mean=0.135170 | std=0.020855
  iter= 600 | N=  32 | mean=0.128728 | std=0.023574
  iter= 700 | N=  32 | mean=0.124239 | std=0.019681
  iter= 800 | N=  32 | mean=0.135649 | std=0.028224
  iter= 900 | N=  32 | mean=0.127637 | std=0.021889
  iter=1000 | N=  32 | mean=0.123636 | std=0.022444
  iter=1100 | N=  32 | mean=0.134856 | std=0.030337
  iter=1200 | N=  32 | mean=0.149701 | std=0.033343
  iter=1300 | N=  32 | mean=0.133110 | std=0.024898
  iter=1400 | N=  32 | mean=0.125818 | std=0.024869
  iter=1500 | N=  32 | mean=0.119095 | std=0.021823
  iter=1600 | N=  32 | mean=0.130988 | std=0.032037
  iter=1700 | N=  32 | mean=0.134016 | std=0.026284
  iter=1800 | N=  32 | mean=0.1274

In [23]:
# Cell 7: ACC_DATA 정의 + noise–accuracy scatter plot 생성

# ------------------------------------------------------
# 1) 각 method별 iter → accuracy 데이터 입력
#    (여기 부분을 직접 채워 넣으면 됨)
# ------------------------------------------------------

ACC_DATA: Dict[str, Dict[int, float]] = {
    "DMI_W4A8": {
        4000: 74.562, 3900: 75.212, 3800: 75.266, 3700: 74.882, 3600: 76.148, 3500: 75.220, 3400: 75.666, 3300: 75.898, 3200: 75.534, 3100: 75.626,
        3000:	75.498, 2900:	75.504, 2800:	75.400, 2700:	75.210, 2600:	75.506,	2500: 74.408, 2400: 75.752, 2300:	74.898, 2200:	75.762, 2100:	75.260,
        2000:	75.236, 1900:	75.900, 1800:	75.542, 1700:	75.586, 1600:	75.538, 1500:	75.812, 1400:	75.588, 1300:	74.874, 1200:	75.340, 1100:	75.374,
        1000:	75.110, 900:	75.250, 800:	75.320, 700:	74.272, 600:	74.898, 500:	75.572, 400:	75.140, 300:	75.788, 200:	75.468, 100:	76.376
        },
    "SMI_W4A8": {
        4000: 77.616, 3800:	77.434, 3700:	77.718, 3600:	77.430, 3500:	77.398, 3400:	77.154, 3300:	77.534, 3200:	77.408, 3100:	77.404, 3000:	77.328,
        2900:	77.530, 2800:	77.302, 2700:	77.364, 2600:	77.110, 2500:	77.700, 2400:	77.128, 2300:	77.290, 2200:	77.160, 2100:	77.648, 2000:	77.470,
        1900:	77.822, 1800:	77.992, 1700:	77.546, 1600:	77.572, 1500:	77.760, 1400:	77.668, 1300:	77.456, 1200:	76.886, 1100:	76.204, 1000:	76.862,
        900:	77.822, 800:	77.992, 700:	77.546, 600:	77.572, 500:	77.760, 400:	77.668, 300:	77.456, 200:	76.886, 100:	76.204, 0: 76.862
    },
}

print("[INFO] ACC_DATA methods:", list(ACC_DATA.keys()))


# ------------------------------------------------------
# 2) noise_mean 와 ACC_DATA를 매칭하여 scatter plot용 포인트 생성
# ------------------------------------------------------

scatter_points: Dict[str, List[Tuple[float, float, int]]] = {}  
# {method: [(noise, acc, iter), ...]}

for method_name, iter_acc in ACC_DATA.items():
    if method_name not in noise_mean:
        print(f"[WARN] ACC_DATA에 있는 {method_name}가 noise_mean에는 없습니다. (이 method는 스킵)")
        continue

    method_points = []
    for it, acc in iter_acc.items():
        if it not in noise_mean[method_name]:
            print(f"[WARN] {method_name}: iter={it} 는 noise_mean에 없습니다. (스킵)")
            continue
        n = noise_mean[method_name][it]
        method_points.append((n, acc, it))

    if len(method_points) == 0:
        print(f"[WARN] {method_name}: 매칭되는 (noise, acc) 포인트가 없습니다.")
        continue

    scatter_points[method_name] = method_points

print("\n[INFO] scatter_points summary:")
for method_name, pts in scatter_points.items():
    print(f"  {method_name}: {len(pts)} points")


# ------------------------------------------------------
# 3) scatter plot 그리기 및 저장
#    x축 = noise_mean, y축 = accuracy
# ------------------------------------------------------

plt.figure(figsize=(8, 6))

for method_name, pts in scatter_points.items():
    xs = [p[0] for p in pts]  # noise
    ys = [p[1] for p in pts]  # accuracy
    its = [p[2] for p in pts] # iteration

    plt.scatter(xs, ys, label=method_name)

    # 각 점 옆에 iteration 숫자 annotate (원하면 끄거나 수정 가능)
    # for x, y, it in zip(xs, ys, its):
    #     plt.text(x, y, str(it), fontsize=8, ha="left", va="bottom")

plt.xlabel("High-frequency noise ratio")
plt.ylabel("Accuracy (%)")
plt.title("Noise vs Performance (scatter)")
plt.grid(True)
plt.legend()

fig_path = SAVE_DIR / "noise_performance_scatter.png"
plt.tight_layout()
plt.savefig(fig_path)
plt.close()

print(f"[INFO] Scatter plot saved to: {fig_path}")


# ------------------------------------------------------
# 4) (선택) 텍스트로 포인트도 같이 저장
# ------------------------------------------------------

txt_path = SAVE_DIR / "noise_performance_points.txt"
with open(txt_path, "w") as f:
    f.write("# method\titer\tnoise\taccuracy\n")
    for method_name, pts in scatter_points.items():
        for n, acc, it in pts:
            f.write(f"{method_name}\t{it}\t{n:.8f}\t{acc:.4f}\n")

print(f"[INFO] Scatter points saved to: {txt_path}")


[INFO] ACC_DATA methods: ['DMI_W4A8', 'SMI_W4A8']
[WARN] SMI_W4A8: iter=0 는 noise_mean에 없습니다. (스킵)

[INFO] scatter_points summary:
  DMI_W4A8: 40 points
  SMI_W4A8: 39 points
[INFO] Scatter plot saved to: /home/jener05458/src/EdgeMI/TBD_MI/observation/noise_performance/noise_performance_scatter.png
[INFO] Scatter points saved to: /home/jener05458/src/EdgeMI/TBD_MI/observation/noise_performance/noise_performance_points.txt
