In [44]:
# Cell 1: 기본 설정 & import

from pathlib import Path
from typing import List, Tuple, Dict
import re
import sys

import numpy as np
import torch
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt

# ---------- 사용자 설정 ----------
IMG_ROOT = Path("./dataset/deit_base_16_imagenet/SMI_Variance").expanduser().resolve()

# 결과 저장 폴더
SAVE_DIR = Path("./observation/05_Variance_Noise").expanduser().resolve()

# 고주파/저주파 cutoff 비율 (0~1)
# 1에 가까울수록 high frequency 영역이 좁아짐
CUTOFF_RATIO = 0.7

SAVE_DIR.mkdir(parents=True, exist_ok=True)

print(f"[INFO] IMG_ROOT     : {IMG_ROOT}")
print(f"[INFO] SAVE_DIR     : {SAVE_DIR}")
print(f"[INFO] CUTOFF_RATIO : {CUTOFF_RATIO}")

[INFO] IMG_ROOT     : /home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/SMI_Variance
[INFO] SAVE_DIR     : /home/jener05458/src/EdgeMI/TBD_MI/observation/05_Variance_Noise
[INFO] CUTOFF_RATIO : 0.7


In [45]:
# Cell 2: 이미지 찾기 (IMG_ROOT 아래에서 DMI-iter / SMI-iter 폴더만 대상)

# 예: "DMI-4000-0.0-0-32-W4A8", "SMI-4000-0.0-[50,100,200,300]-[...]-0-32-W4A8"
_variance_root_pattern = re.compile(r"^(DMI|SMI)-4000-(\d+(\.\d+)?)-")

def find_images(img_root: Path) -> List[Path]:
    """
    img_root (예: .../DMI/W4A8)을 입력받아,
    그 내부의 DMI-{iter}-..., SMI-{iter}-... 폴더들에서 *.png 이미지를 모두 수집한다.
    """
    if not img_root.exists():
        raise FileNotFoundError(f"[ERROR] 이미지 폴더가 존재하지 않습니다: {img_root}")

    image_paths: List[Path] = []

    # 1) 하위 디렉토리 중 DMI-*/SMI-* 폴더만 선택
    for subdir in sorted(img_root.iterdir()):
        if not subdir.is_dir():
            continue

        name = subdir.name
        if _variance_root_pattern.match(name) is None:
            # iteration 폴더가 아닌 경우 skip
            print("실행")
            continue

        # 2) 해당 폴더에서 모든 .jpg 파일 찾기
        jpgs = sorted(p for p in subdir.rglob("*.png") if p.is_file())
        image_paths.extend(jpgs)

    if len(image_paths) == 0:
        raise FileNotFoundError(
            f"[ERROR] '{img_root}' 아래에서 png 이미지를 찾지 못했습니다. "
            "DMI-iter 또는 SMI-iter 폴더 구조를 확인하세요."
        )
    
    return image_paths


# 실행
all_image_paths = find_images(IMG_ROOT)
print(f"[INFO] Found {len(all_image_paths)} images")
all_image_paths[:5]


[INFO] Found 1632 images


[PosixPath('/home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/SMI_Variance/SMI-4000-0.0-50-100-200-300-0.3-0.3-0.3-0.3-0-32-W4A8/0-0.png'),
 PosixPath('/home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/SMI_Variance/SMI-4000-0.0-50-100-200-300-0.3-0.3-0.3-0.3-0-32-W4A8/0-1.png'),
 PosixPath('/home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/SMI_Variance/SMI-4000-0.0-50-100-200-300-0.3-0.3-0.3-0.3-0-32-W4A8/0-10.png'),
 PosixPath('/home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/SMI_Variance/SMI-4000-0.0-50-100-200-300-0.3-0.3-0.3-0.3-0-32-W4A8/0-11.png'),
 PosixPath('/home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/SMI_Variance/SMI-4000-0.0-50-100-200-300-0.3-0.3-0.3-0.3-0-32-W4A8/0-12.png')]

In [46]:
# Cell 3: 이미지 로딩 (torch.Tensor [C,H,W], float32 in [0,1])

from torchvision import transforms

def load_images(image_paths: List[Path]) -> List[Tuple[Path, torch.Tensor]]:
    """
    image_paths 리스트를 순회하며 이미지를 로드하여 텐서로 변환한다.
    반환 형식: [(Path, Tensor[C,H,W]), ...]
    """
    to_tensor = transforms.ToTensor()
    loaded: List[Tuple[Path, torch.Tensor]] = []

    for p in image_paths:
        try:
            img = Image.open(p).convert("RGB")
            tensor = to_tensor(img)  # [C,H,W], float32 in [0,1]
            loaded.append((p, tensor))
        except Exception as e:
            print(f"[WARN] Failed to load {p}: {e}", file=sys.stderr)

    if len(loaded) == 0:
        raise RuntimeError("모든 이미지 로드에 실패했습니다; 시각화할 이미지가 없습니다.")

    return loaded


# 실제 실행
images = load_images(all_image_paths)
print(f"[INFO] Successfully loaded {len(images)} images")

# 첫 1개 확인
images[0][0], images[0][1].shape

[INFO] Successfully loaded 1632 images


(PosixPath('/home/jener05458/src/EdgeMI/TBD_MI/dataset/deit_base_16_imagenet/SMI_Variance/SMI-4000-0.0-50-100-200-300-0.3-0.3-0.3-0.3-0-32-W4A8/0-0.png'),
 torch.Size([3, 224, 224]))

In [47]:
# Cell 4: 폴더 이름에서 variance 숫자 추출
_variance_pattern = re.compile(r"^(DMI|SMI)-4000-(\d+(\.\d+)?)-")

def extract_variance_from_path(path: Path) -> int:
    folder_name = path.parent.name
    m = _variance_pattern.match(folder_name)
    if m is None:
        raise ValueError(
            f"폴더 이름에서 variance를 찾을 수 없습니다: '{folder_name}' (path={path})"
        )
    
    variance_str = m.group(2)
    variance_float = float(variance_str)

    return variance_float


# ---- 간단 테스트 ----
print("[TEST] 첫 5개 이미지에 대해 폴더/variance 확인")

for i in range(min(5, len(images))):
    p = images[i][0]
    it = extract_variance_from_path(p)
    print(f"{p.parent.name:30s} -> variance = {it}")


[TEST] 첫 5개 이미지에 대해 폴더/variance 확인
SMI-4000-0.0-50-100-200-300-0.3-0.3-0.3-0.3-0-32-W4A8 -> variance = 0.0
SMI-4000-0.0-50-100-200-300-0.3-0.3-0.3-0.3-0-32-W4A8 -> variance = 0.0
SMI-4000-0.0-50-100-200-300-0.3-0.3-0.3-0.3-0-32-W4A8 -> variance = 0.0
SMI-4000-0.0-50-100-200-300-0.3-0.3-0.3-0.3-0-32-W4A8 -> variance = 0.0
SMI-4000-0.0-50-100-200-300-0.3-0.3-0.3-0.3-0-32-W4A8 -> variance = 0.0


In [48]:
# Cell 5: 수정된 high-frequency 에너지 비율 계산 (정확한 원형 annulus만 사용)

def compute_high_freq_ratio(image_tensor: torch.Tensor, cutoff_ratio: float = 0.7) -> float:
    """
    image_tensor: [C,H,W], 0~1 float
    cutoff_ratio: 0~1 사이 값
      - 0.7이면: 내접원 반지름의 70% 바깥을 high-frequency로 계산
    """

    # 1) RGB -> grayscale
    img_np = image_tensor.numpy()          # (C,H,W)
    gray = img_np.mean(axis=0)             # (H,W)

    H, W = gray.shape

    # 2) FFT
    fft = np.fft.fft2(gray)
    fft_shift = np.fft.fftshift(fft)
    mag = np.abs(fft_shift)                # magnitude spectrum

    # 3) radius grid
    y, x = np.indices((H, W))
    cy, cx = (H - 1) / 2.0, (W - 1) / 2.0
    r = np.sqrt((x - cx)**2 + (y - cy)**2)

    # 4) 내접원의 최대 반지름 (corner 문제 제거)
    R_max = min(cy, H-1-cy, cx, W-1-cx)

    # cutoff 계산
    cutoff_radius = cutoff_ratio * R_max

    # 5) annulus mask (원형 도넛)
    high_mask = (r >= cutoff_radius) & (r <= R_max)

    # 6) 에너지 계산
    total_energy = mag.sum()
    high_energy = mag[high_mask].sum()

    eps = 1e-8
    return float(high_energy / (total_energy + eps))


# 테스트
compute_high_freq_ratio(images[0][1], cutoff_ratio=CUTOFF_RATIO)


0.2427126020830079

In [49]:
# Cell 6: variance vs high-frequency ratio 집계 및 저장

from collections import defaultdict

variance_to_ratios: Dict[int, list] = defaultdict(list)

print("[MODE1] Iteration vs High-frequency ratio 계산 중...")

for p, img_tensor in images:
    try:
        variance = extract_variance_from_path(p)
    except ValueError as e:
        print(f"[WARN] {e}", file=sys.stderr)
        continue

    # 고주파 비율 계산
    ratio = compute_high_freq_ratio(img_tensor, cutoff_ratio=CUTOFF_RATIO)
    variance_to_ratios[variance].append(ratio)

# variance 하나도 없으면 오류
if len(variance_to_ratios) == 0:
    raise RuntimeError("variance 정보를 가진 이미지가 하나도 없습니다.")

# variance 정렬
variances = sorted(variance_to_ratios.keys())
mean_vals = []
std_vals = []
counts = []

for var in variances:
    arr = np.array(variance_to_ratios[var], dtype=np.float32)
    mean_vals.append(arr.mean())
    std_vals.append(arr.std())
    counts.append(len(arr))

    print(f"[MODE1] variance={var:.1f} | N={len(arr):2d} | "
          f"mean={arr.mean():.6f} | std={arr.std():.6f}")

variances_np = np.array(variances, dtype=np.float32)
mean_np = np.array(mean_vals, dtype=np.float32)
std_np = np.array(std_vals, dtype=np.float32)

[MODE1] Iteration vs High-frequency ratio 계산 중...
[MODE1] variance=0.0 | N=32 | mean=0.229587 | std=0.019413
[MODE1] variance=0.1 | N=32 | mean=0.224278 | std=0.014828
[MODE1] variance=0.2 | N=32 | mean=0.227007 | std=0.017919
[MODE1] variance=0.3 | N=32 | mean=0.227261 | std=0.014075
[MODE1] variance=0.4 | N=32 | mean=0.228564 | std=0.018642
[MODE1] variance=0.5 | N=32 | mean=0.224180 | std=0.013794
[MODE1] variance=0.6 | N=32 | mean=0.230043 | std=0.017199
[MODE1] variance=0.7 | N=32 | mean=0.225359 | std=0.016408
[MODE1] variance=0.8 | N=32 | mean=0.223564 | std=0.018968
[MODE1] variance=0.9 | N=32 | mean=0.227298 | std=0.016738
[MODE1] variance=1.0 | N=32 | mean=0.221107 | std=0.016981
[MODE1] variance=1.1 | N=32 | mean=0.224585 | std=0.014346
[MODE1] variance=1.2 | N=32 | mean=0.227416 | std=0.016632
[MODE1] variance=1.3 | N=32 | mean=0.223796 | std=0.014912
[MODE1] variance=1.4 | N=32 | mean=0.228089 | std=0.016967
[MODE1] variance=1.5 | N=32 | mean=0.226575 | std=0.016139
[MODE1

In [50]:
# -----------------------------
# 그래프 저장 (mean curve only)
# -----------------------------
plt.figure(figsize=(8, 5))
plt.plot(
    variances_np,
    mean_np,
    "-o",
    linewidth=2,
    markersize=6,
)
plt.xlabel("Variance")
plt.ylabel("High-frequency energy ratio")
plt.title(f"Variance vs High-frequency ratio (cutoff={CUTOFF_RATIO})")
plt.grid(True)

fig_path = SAVE_DIR / "variance_vs_highfreq.png"
plt.tight_layout()
plt.savefig(fig_path)
plt.close()

print(f"[MODE1] 플롯 저장: {fig_path}")
print("[MODE1] 완료!")


[MODE1] 플롯 저장: /home/jener05458/src/EdgeMI/TBD_MI/observation/05_Variance_Noise/variance_vs_highfreq.png
[MODE1] 완료!


In [51]:
import pandas as pd

# CSV 저장
df = pd.DataFrame({
    "Variances": variances_np,
    "High_Frequency_Ratio": mean_np
})

csv_path = SAVE_DIR / "variance_noise.csv"

df.to_csv(csv_path, index=False)

print(f"[Mode1] CSV 저장: {csv_path}")

[Mode1] CSV 저장: /home/jener05458/src/EdgeMI/TBD_MI/observation/05_Variance_Noise/variance_noise.csv
