In [1]:
import os, sys, warnings
warnings.filterwarnings("ignore")

# ▶ 여기를 본인 환경에 맞게 바꾸세요.
# os.chdir("/home/mbd1234/yolact")  # 예시

if os.getcwd() not in sys.path:
    sys.path.append(os.getcwd())

print("CWD:", os.getcwd())


CWD: /home/mbd1234/yolact


In [3]:
import torch
import numpy as np
from collections import Counter

from data import cfg, set_cfg, set_dataset, COCODetection, MEANS
from utils.augmentations import BaseTransform
from yolact import Yolact
from layers.output_utils import postprocess

import eval as eval_script  # 평가 플래그 주입용

# ▶ 본인이 쓰는 설정/체크포인트로 맞춰주세요.
CFG_NAME = "cell_yolact_im700_config"   # 예: cell_yolact_im700_config
DATASET_OVERRIDE = None                  # 보통 None
CHECKPOINT = "weights/cell_yolact_im700_86_1726_interrupt.pth"  # 없으면 None로 두면 backbone init

SPLIT = "val"       # 'train' or 'val'
K = 3               # sanity probe에 사용할 이미지 수
PROBE_SCORE_THR = 0.01

EVAL_SCORE_THR = 0.05
EVAL_TOP_K = 200
EVAL_NMS = 0.5
VALIDATION_SIZE = 200

USE_CUDA = torch.cuda.is_available()

def device():
    return torch.device('cuda' if USE_CUDA else 'cpu')


In [4]:
set_cfg(CFG_NAME)
if DATASET_OVERRIDE:
    set_dataset(DATASET_OVERRIDE)

def build_dataset(split: str):
    if split == 'train':
        ds = COCODetection(image_path=cfg.dataset.train_images,
                           info_file=cfg.dataset.train_info,
                           transform=BaseTransform(MEANS))
    else:
        ds = COCODetection(image_path=cfg.dataset.valid_images,
                           info_file=cfg.dataset.valid_info,
                           transform=BaseTransform(MEANS))
    return ds

dataset = build_dataset(SPLIT)
print(f"[DATA] split={SPLIT}, len={len(dataset)}")


loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
[DATA] split=val, len=10


In [5]:
def check_anchor_config():
    ok = True
    bb = getattr(cfg, 'backbone', None)
    if bb is None:
        print("[ANCHOR] cfg.backbone 이 없습니다.")
        return False

    ps = getattr(bb, 'pred_scales', None)
    pa = getattr(bb, 'pred_aspect_ratios', None)

    print("\n=== Anchor/Level Configuration ===")
    print("- cfg.num_classes:", getattr(cfg, "num_classes", None))
    print("- cfg.max_size:", getattr(cfg, "max_size", None))
    if ps is not None:
        print(f"- pred_scales levels: {len(ps)} -> {ps}")
    else:
        print("- pred_scales: None")

    if pa is not None:
        print(f"- pred_aspect_ratios levels: {len(pa)}")
    else:
        print("- pred_aspect_ratios: None")

    if (ps is None) or (pa is None):
        print("⚠️ pred_scales 또는 pred_aspect_ratios 가 없습니다.")
        return False

    if len(ps) != len(pa):
        print("❗ 레벨 수 불일치: pred_scales 와 pred_aspect_ratios 길이가 다릅니다.")
        ok = False
    else:
        print("✅ 레벨 수 일치")

    sel = getattr(bb, 'selected_layers', None)
    if sel is not None:
        print(f"- backbone.selected_layers: {sel} (len={len(sel)})")

    return ok

_ = check_anchor_config()



=== Anchor/Level Configuration ===
- cfg.num_classes: 2
- cfg.max_size: 1024
- pred_scales levels: 6 -> [[8], [16], [24], [48], [96], [192]]
- pred_aspect_ratios levels: 6
✅ 레벨 수 일치
- backbone.selected_layers: [1, 2, 3] (len=3)


In [6]:
def peek_label_distribution(ds, first_n=5):
    print("\n=== Label Distribution Probe ===")
    num_classes = getattr(cfg, "num_classes", None)
    agg = Counter()
    limit = min(first_n, len(ds))
    for i in range(limit):
        img, (tgt, gt_masks, num_crowd) = ds[i]
        try:
            cls = [int(x[-1]) for x in tgt]
        except Exception:
            cls = [int(tgt[j, -1].item()) for j in range(tgt.shape[0])]
        cnt = Counter(cls)
        agg.update(cnt)
        print(f"- sample #{i}: labels={dict(cnt)}  (num_inst={sum(cnt.values())})")
    print(f"==> Aggregated: {dict(agg)} ; cfg.num_classes={num_classes}")

    # 휴리스틱 경고
    if len(agg) == 0:
        print("⚠️ GT 라벨이 비어 있거나 포맷이 다를 수 있습니다.")
    if num_classes is not None and any((c < 0 or c >= num_classes) for c in agg.keys()):
        print("⚠️ 라벨 인덱스가 cfg.num_classes 범위를 벗어나요. (카테고리 id/라벨맵 의심)")
    if num_classes == 2:
        # 배경 0, 유효 1 한 개만 기대
        non_bg = [c for c in agg if c != 0]
        if len(non_bg) > 1:
            print("⚠️ 단일 클래스 문제인데 유효 클래스 라벨이 여러 개 감지됨 (categories.id/label_map 확인).")
        elif 1 not in agg and len(agg) > 0:
            print("⚠️ 유효 클래스(1)가 보이지 않습니다. (COCO categories.id=1 권장)")

peek_label_distribution(dataset, first_n=5)



=== Label Distribution Probe ===
- sample #0: labels={0: 248}  (num_inst=248)
- sample #1: labels={0: 78}  (num_inst=78)
- sample #2: labels={0: 117, -1: 64}  (num_inst=181)
- sample #3: labels={0: 299}  (num_inst=299)
- sample #4: labels={0: 319}  (num_inst=319)
==> Aggregated: {0: 1061, -1: 64} ; cfg.num_classes=2
⚠️ 라벨 인덱스가 cfg.num_classes 범위를 벗어나요. (카테고리 id/라벨맵 의심)
⚠️ 유효 클래스(1)가 보이지 않습니다. (COCO categories.id=1 권장)


In [7]:
@torch.no_grad()
def load_model_and_print_proto(ckpt_path=None, dev=None):
    dev = dev or device()
    model = Yolact().to(dev)
    if ckpt_path and os.path.isfile(ckpt_path):
        print(f"[MODEL] load_weights: {ckpt_path}")
        model.load_weights(ckpt_path)
    else:
        print("[MODEL] init_weights from backbone")
        backbone_path = cfg.backbone.path
        if not os.path.isabs(backbone_path):
            backbone_path = os.path.join("weights", backbone_path)
        model.init_weights(backbone_path=backbone_path)
    model.eval()

    # 한 장 던져 proto 유무/shape 확인
    if len(dataset) > 0:
        img, (tgt, gt_masks, num_crowd) = dataset[0]
        x = img.unsqueeze(0).to(dev)
        preds = model(x)
        proto = preds.get("proto", None) if isinstance(preds, dict) else None
        print("[PROTO] exists:", proto is not None)
        if proto is not None:
            print("[PROTO] shape:", tuple(proto.shape))
        # GT 마스크 값 범위/형 확인
        if hasattr(gt_masks, "dtype"):
            print("[GT MASK] dtype:", gt_masks.dtype,
                  "min/max:", float(gt_masks.min()), float(gt_masks.max()))
    model.train()
    return model

model = load_model_and_print_proto(CHECKPOINT, device())


[MODEL] load_weights: weights/cell_yolact_im700_86_1726_interrupt.pth
[PROTO] exists: False
[GT MASK] dtype: uint8 min/max: 0.0 1.0


In [8]:
@torch.no_grad()
def sanity_probe(model, ds, dev=None, k=3, score_thr=0.01):
    dev = dev or device()
    print(f"\n=== Postprocess Sanity Probe (k={k}, score_thr={score_thr}) ===")
    model.eval()
    n = min(k, len(ds))
    if n == 0:
        print("⚠️ 데이터셋이 비어 있습니다.")
        return
    for i in range(n):
        img, _ = ds[i]
        h, w = img.shape[1:]
        x = img.unsqueeze(0).to(dev)
        preds = model(x)
        out = postprocess(preds, w, h, score_threshold=float(score_thr))
        if out is None:
            print(f"- sample #{i}: det=0 (postprocess returned None)")
            continue
        classes, scores, boxes, masks = out
        d = len(scores)
        smin = float(scores.min()) if d > 0 else None
        smax = float(scores.max()) if d > 0 else None
        ucls = Counter([int(c) for c in classes]) if d > 0 else {}
        print(f"- sample #{i}: det={d}, score[min,max]=({smin},{smax}), classes={dict(ucls)}")

sanity_probe(model, dataset, device(), k=K, score_thr=PROBE_SCORE_THR)



=== Postprocess Sanity Probe (k=3, score_thr=0.01) ===
- sample #0: det=200, score[min,max]=(0.23720140755176544,0.965642511844635), classes={0: 200}
- sample #1: det=200, score[min,max]=(0.1616656333208084,0.8309450745582581), classes={0: 200}
- sample #2: det=200, score[min,max]=(0.20879216492176056,0.8090166449546814), classes={0: 200}


In [9]:
import numpy as np
import torch

def quick_mask_iou(pred_mask, gt_mask):
    """
    pred_mask, gt_mask: HxW (bool or {0,1}), 같은 크기로 가정
    """
    pm = pred_mask.astype(np.bool_)
    gm = gt_mask.astype(np.bool_)
    inter = np.logical_and(pm, gm).sum()
    union = np.logical_or(pm, gm).sum()
    return (inter / union) if union > 0 else 0.0

@torch.no_grad()
def probe_one_image_mask_iou(model, ds, index=0, score_thr=0.1, top_k=5, dev=None):
    dev = dev or device()
    print(f"\n=== One-Image Mask IoU Probe (idx={index}, score_thr={score_thr}) ===")
    if index >= len(ds):
        print("Index out of range.")
        return
    img, (tgt, gt_masks, num_crowd) = ds[index]
    H, W = img.shape[1:]
    x = img.unsqueeze(0).to(dev)
    preds = model(x)
    out = postprocess(preds, W, H, score_threshold=float(score_thr), top_k=top_k)
    if out is None:
        print("No detections.")
        return
    classes, scores, boxes, masks = out  # masks: [N, H, W] expected
    print(f"Detections: {len(scores)}")

    # GT 마스크가 [N,H,W] 텐서로 들어오는 포크가 많음
    if isinstance(gt_masks, torch.Tensor):
        gt_bin = (gt_masks.detach().cpu().float() > 0.5).numpy()
        if gt_bin.ndim == 2:
            gt_bin = gt_bin[None, ...]  # [1,H,W]
    else:
        gt_bin = np.asarray(gt_masks)
        if gt_bin.ndim == 2:
            gt_bin = gt_bin[None, ...]

    # pred 상위 1~few개만 샘플 IoU
    m = min(len(scores), gt_bin.shape[0], top_k)
    if m == 0:
        print("No comparable masks (either pred or gt is empty).")
        return

    ious = []
    for i in range(m):
        pm = (masks[i].detach().cpu().numpy() > 0.5).astype(np.uint8)
        gm = gt_bin[i].astype(np.uint8) if i < gt_bin.shape[0] else gt_bin[0].astype(np.uint8)
        iou = quick_mask_iou(pm, gm)
        ious.append(iou)
        print(f"  - pair #{i}: IoU={iou:.4f}")

    if all(i == 0.0 for i in ious):
        print("⚠️ 모든 샘플 IoU=0 → 마스크 정렬/크롭/스케일/포맷 문제 가능성 큼 (box는 가능).")

probe_one_image_mask_iou(model, dataset, index=0, score_thr=0.1, top_k=5, dev=device())



=== One-Image Mask IoU Probe (idx=0, score_thr=0.1) ===


TypeError: postprocess() got an unexpected keyword argument 'top_k'

In [11]:
def inject_eval_flags(max_images: int, score_thr=0.05, top_k=200, nms=0.5):
    print("\n=== Inject eval.py flags ===")
    print(f"--max_images={max_images}, --score_threshold={score_thr}, --top_k={top_k}, --nms_threshold={nms}")
    eval_script.parse_args([
        '--no_bar',
        f'--max_images={int(max_images)}',
        f'--score_threshold={float(score_thr)}',
        f'--top_k={int(top_k)}',
        f'--nms_threshold={float(nms)}'
    ])

inject_eval_flags(VALIDATION_SIZE, score_thr=EVAL_SCORE_THR, top_k=EVAL_TOP_K, nms=EVAL_NMS)



=== Inject eval.py flags ===
--max_images=200, --score_threshold=0.05, --top_k=200, --nms_threshold=0.5


usage: ipykernel_launcher.py [-h] [--trained_model TRAINED_MODEL]
                             [--top_k TOP_K] [--cuda CUDA]
                             [--fast_nms FAST_NMS]
                             [--cross_class_nms CROSS_CLASS_NMS]
                             [--display_masks DISPLAY_MASKS]
                             [--display_bboxes DISPLAY_BBOXES]
                             [--display_text DISPLAY_TEXT]
                             [--display_scores DISPLAY_SCORES] [--display]
                             [--shuffle] [--ap_data_file AP_DATA_FILE]
                             [--resume] [--max_images MAX_IMAGES]
                             [--output_coco_json]
                             [--bbox_det_file BBOX_DET_FILE]
                             [--mask_det_file MASK_DET_FILE] [--config CONFIG]
                             [--output_web_json] [--web_det_path WEB_DET_PATH]
                             [--no_bar] [--display_lincomb DISPLAY_LINCOMB]
                   

SystemExit: 2

In [12]:
print("mask_proto_crop:", getattr(cfg, "mask_proto_crop", None))
print("mask_proto_crop_with_pred_box:", getattr(cfg, "mask_proto_crop_with_pred_box", None))
print("mask_size:", getattr(cfg, "mask_size", None))


mask_proto_crop: True
mask_proto_crop_with_pred_box: True
mask_size: 16


In [13]:
@torch.no_grad()
def load_model_and_inspect_outputs(ckpt_path=None, dev=None, probe_index=0):
    from data import mask_type as _mask_type
    dev = dev or device()
    model = Yolact().to(dev)

    if ckpt_path and os.path.isfile(ckpt_path):
        print(f"[MODEL] load_weights: {ckpt_path}")
        model.load_weights(ckpt_path)
    else:
        print("[MODEL] init_weights from backbone")
        backbone_path = cfg.backbone.path
        if not os.path.isabs(backbone_path):
            backbone_path = os.path.join("weights", backbone_path)
        model.init_weights(backbone_path=backbone_path)

    print("\n=== CFG MASK BRANCH SETTINGS ===")
    print("- cfg.mask_type:", getattr(cfg, "mask_type", None))
    print("- cfg.mask_size:", getattr(cfg, "mask_size", None))
    print("- cfg.mask_proto_net:", getattr(cfg, "mask_proto_net", None) is not None)
    print("- cfg.mask_proto_crop:", getattr(cfg, "mask_proto_crop", None))
    print("- cfg.mask_proto_crop_with_pred_box:", getattr(cfg, "mask_proto_crop_with_pred_box", None))
    print("- cfg.masks_to_train:", getattr(cfg, "masks_to_train", None))

    # 한 장으로 모델 출력 형태 조사
    model.eval()
    if len(dataset) == 0:
        print("[WARN] dataset empty.")
        model.train()
        return model

    img, (tgt, gt_masks, num_crowd) = dataset[min(probe_index, len(dataset)-1)]
    x = img.unsqueeze(0).to(dev)
    with torch.no_grad():
        preds = model(x)

    # 출력이 dict/tuple 등 어떤 형태인지 조사
    print("\n=== MODEL OUTPUT INSPECTION ===")
    if isinstance(preds, dict):
        print("- type: dict")
        print("- keys:", list(preds.keys()))
        # YOLACT 계열에서 프로토로 흔한 키 후보들
        proto = None
        for k in ["proto", "proto_out", "proto_pooled", "mask_proto", "proto_masks"]:
            if k in preds:
                proto = preds[k]
                print(f"[FOUND] proto-like key: '{k}' shape={tuple(proto.shape)}")
                break
        if proto is None:
            print("[INFO] proto-like key not found in dict outputs.")
    elif isinstance(preds, (list, tuple)):
        print(f"- type: {type(preds).__name__}, len={len(preds)}")
        # 첫 몇 개 텐서 shape 출력
        for i, p in enumerate(preds[:6]):
            try:
                shape = tuple(p.shape)
            except Exception:
                shape = type(p)
            print(f"  idx {i}: {shape}")
    else:
        print("- type:", type(preds))
        print(preds)

    # GT 마스크 값/타입 간단 점검
    if hasattr(gt_masks, "dtype"):
        print("\n[GT MASK] dtype:", gt_masks.dtype,
              "min/max:", float(gt_masks.min()), float(gt_masks.max()))
        if gt_masks.dtype != torch.bool and gt_masks.dtype != torch.uint8 and gt_masks.dtype != torch.float32:
            print("[WARN] Unusual gt_masks dtype. Expect bool/uint8/float.")
    model.train()
    return model

model = load_model_and_inspect_outputs(CHECKPOINT, device(), probe_index=0)


[MODEL] load_weights: weights/cell_yolact_im700_86_1726_interrupt.pth

=== CFG MASK BRANCH SETTINGS ===
- cfg.mask_type: 1
- cfg.mask_size: 16
- cfg.mask_proto_net: True
- cfg.mask_proto_crop: True
- cfg.mask_proto_crop_with_pred_box: True
- cfg.masks_to_train: 300

=== MODEL OUTPUT INSPECTION ===
- type: list, len=1
  idx 0: <class 'dict'>

[GT MASK] dtype: uint8 min/max: 0.0 1.0
[WARN] Unusual gt_masks dtype. Expect bool/uint8/float.


In [14]:
@torch.no_grad()
def sanity_probe(model, ds, dev=None, k=3, score_thr=0.01):
    dev = dev or device()
    print(f"\n=== Postprocess Sanity Probe (k={k}, score_thr={score_thr}) ===")
    model.eval()
    n = min(k, len(ds))
    if n == 0:
        print("⚠️ 데이터셋이 비어 있습니다.")
        return
    for i in range(n):
        img, _ = ds[i]
        h, w = img.shape[1:]
        x = img.unsqueeze(0).to(dev)
        preds = model(x)
        # NOTE: 당신 포크의 postprocess는 top_k 키워드가 없음 → 제거
        out = postprocess(preds, w, h, score_threshold=float(score_thr))
        if out is None:
            print(f"- sample #{i}: det=0 (postprocess returned None)")
            continue
        classes, scores, boxes, masks = out
        d = len(scores)
        smin = float(scores.min()) if d > 0 else None
        smax = float(scores.max()) if d > 0 else None
        from collections import Counter
        ucls = Counter([int(c) for c in classes]) if d > 0 else {}
        print(f"- sample #{i}: det={d}, score[min,max]=({smin},{smax}), classes={dict(ucls)}")

sanity_probe(model, dataset, device(), k=K, score_thr=PROBE_SCORE_THR)



=== Postprocess Sanity Probe (k=3, score_thr=0.01) ===
- sample #0: det=200, score[min,max]=(0.23720140755176544,0.965642511844635), classes={0: 200}
- sample #1: det=200, score[min,max]=(0.1616656333208084,0.8309450745582581), classes={0: 200}
- sample #2: det=200, score[min,max]=(0.20879216492176056,0.8090166449546814), classes={0: 200}


In [15]:
import numpy as np
import torch

def quick_mask_iou(pred_mask, gt_mask):
    pm = pred_mask.astype(np.bool_)
    gm = gt_mask.astype(np.bool_)
    inter = np.logical_and(pm, gm).sum()
    union = np.logical_or(pm, gm).sum()
    return (inter / union) if union > 0 else 0.0

@torch.no_grad()
def probe_one_image_mask_iou(model, ds, index=0, score_thr=0.1, dev=None, max_pairs=5):
    dev = dev or device()
    print(f"\n=== One-Image Mask IoU Probe (idx={index}, score_thr={score_thr}) ===")
    if index >= len(ds):
        print("Index out of range.")
        return
    img, (tgt, gt_masks, num_crowd) = ds[index]
    H, W = img.shape[1:]
    x = img.unsqueeze(0).to(dev)
    preds = model(x)
    out = postprocess(preds, W, H, score_threshold=float(score_thr))
    if out is None:
        print("No detections.")
        return
    classes, scores, boxes, masks = out  # expected masks: [N, H, W]
    print(f"Detections: {len(scores)}")

    # GT as numpy [K,H,W]
    if isinstance(gt_masks, torch.Tensor):
        gt_bin = (gt_masks.detach().cpu().float() > 0.5).numpy()
        if gt_bin.ndim == 2:
            gt_bin = gt_bin[None, ...]
    else:
        gt_bin = np.asarray(gt_masks)
        if gt_bin.ndim == 2:
            gt_bin = gt_bin[None, ...]

    # 예측 마스크 유효성 간단 진단
    if masks is None or len(masks) == 0:
        print("⚠️ No predicted masks returned by postprocess.")
        return
    # 마스크 값 범위/유효 픽셀 간단 통계
    m0 = masks[0].detach().cpu().numpy()
    print(f"[PRED MASK] sample0: shape={m0.shape}, min/max=({m0.min():.4f},{m0.max():.4f}), nonzero={(m0>0).sum()}")

    m = min(len(scores), gt_bin.shape[0], max_pairs)
    if m == 0:
        print("No comparable masks (either pred or gt is empty).")
        return

    ious = []
    for i in range(m):
        pm = (masks[i].detach().cpu().numpy() > 0.5).astype(np.uint8)
        gm = gt_bin[i].astype(np.uint8) if i < gt_bin.shape[0] else gt_bin[0].astype(np.uint8)
        iou = quick_mask_iou(pm, gm)
        ious.append(iou)
        print(f"  - pair #{i}: IoU={iou:.4f}")
    if all(i == 0.0 for i in ious):
        print("⚠️ 모든 샘플 IoU=0 → 마스크 정렬/크롭/스케일/포맷 문제 가능성 큼 (box는 정상일 수 있음).")

probe_one_image_mask_iou(model, dataset, index=0, score_thr=0.1, dev=device(), max_pairs=5)



=== One-Image Mask IoU Probe (idx=0, score_thr=0.1) ===
Detections: 200
[PRED MASK] sample0: shape=(1024, 1024), min/max=(0.0000,0.0000), nonzero=0
  - pair #0: IoU=0.0000
  - pair #1: IoU=0.0000
  - pair #2: IoU=0.0000
  - pair #3: IoU=0.0000
  - pair #4: IoU=0.0000
⚠️ 모든 샘플 IoU=0 → 마스크 정렬/크롭/스케일/포맷 문제 가능성 큼 (box는 정상일 수 있음).


In [16]:
def inject_eval_flags(max_images: int, score_thr=0.05, top_k=200):
    print("\n=== Inject eval.py flags ===")
    print(f"--max_images={max_images}, --score_threshold={score_thr}, --top_k={top_k}")
    eval_script.parse_args([
        '--no_bar',
        f'--max_images={int(max_images)}',
        f'--score_threshold={float(score_thr)}',
        f'--top_k={int(top_k)}'
    ])

inject_eval_flags(VALIDATION_SIZE, score_thr=EVAL_SCORE_THR, top_k=EVAL_TOP_K)



=== Inject eval.py flags ===
--max_images=200, --score_threshold=0.05, --top_k=200


In [17]:
print("\n=== Runtime Mask-Crop Toggle (for quick test) ===")
before_crop  = getattr(cfg, "mask_proto_crop", None)
before_cropb = getattr(cfg, "mask_proto_crop_with_pred_box", None)
print("BEFORE  - mask_proto_crop:", before_crop, " mask_proto_crop_with_pred_box:", before_cropb)

cfg.mask_proto_crop = True
cfg.mask_proto_crop_with_pred_box = True

print("APPLIED - mask_proto_crop:", cfg.mask_proto_crop, " mask_proto_crop_with_pred_box:", cfg.mask_proto_crop_with_pred_box)

# 모델은 cfg를 참조해 postprocess하므로, 같은 모델로 다시 probe
sanity_probe(model, dataset, device(), k=K, score_thr=PROBE_SCORE_THR)
probe_one_image_mask_iou(model, dataset, index=0, score_thr=0.1, dev=device(), max_pairs=5)



=== Runtime Mask-Crop Toggle (for quick test) ===
BEFORE  - mask_proto_crop: True  mask_proto_crop_with_pred_box: True
APPLIED - mask_proto_crop: True  mask_proto_crop_with_pred_box: True

=== Postprocess Sanity Probe (k=3, score_thr=0.01) ===
- sample #0: det=200, score[min,max]=(0.23720140755176544,0.965642511844635), classes={0: 200}
- sample #1: det=200, score[min,max]=(0.1616656333208084,0.8309450745582581), classes={0: 200}
- sample #2: det=200, score[min,max]=(0.20879216492176056,0.8090166449546814), classes={0: 200}

=== One-Image Mask IoU Probe (idx=0, score_thr=0.1) ===
Detections: 200
[PRED MASK] sample0: shape=(1024, 1024), min/max=(0.0000,0.0000), nonzero=0
  - pair #0: IoU=0.0000
  - pair #1: IoU=0.0000
  - pair #2: IoU=0.0000
  - pair #3: IoU=0.0000
  - pair #4: IoU=0.0000
⚠️ 모든 샘플 IoU=0 → 마스크 정렬/크롭/스케일/포맷 문제 가능성 큼 (box는 정상일 수 있음).


In [18]:
import torch
import numpy as np

@torch.no_grad()
def inspect_proto_and_coeffs(model, ds, idx=0, dev=None):
    dev = dev or device()
    model.eval()

    img, (tgt, gt_masks, num_crowd) = ds[idx]
    H, W = img.shape[1:]
    x = img.unsqueeze(0).to(dev)
    preds = model(x)

    print("\n=== RAW OUTPUT KEYS / TYPES ===")
    proto = None
    coeff = None
    out_type = type(preds).__name__
    print("type:", out_type)

    # 1) dict인 경우 키 탐색
    if isinstance(preds, dict):
        keys = list(preds.keys())
        print("keys:", keys)

        # proto 후보 키들
        for k in ["proto", "proto_out", "mask_proto", "proto_pooled", "proto_masks"]:
            if k in preds and isinstance(preds[k], torch.Tensor):
                proto = preds[k]
                print(f"[FOUND] proto='{k}' shape={tuple(proto.shape)}")
                break

        # coeff 후보 키들
        for k in ["mask", "mask_coeff", "coeff", "coeffs", "proto_coeff", "mask_coefficients"]:
            if k in preds and isinstance(preds[k], torch.Tensor):
                coeff = preds[k]
                print(f"[FOUND] coeff='{k}' shape={tuple(coeff.shape)}")
                break

    # 2) list/tuple인 경우 모양으로 추정
    elif isinstance(preds, (list, tuple)):
        for i, p in enumerate(preds):
            if isinstance(p, torch.Tensor):
                shape = tuple(p.shape)
                print(f"idx {i}: {shape}")
                # 대략적인 휴리스틱: proto는 [B, H', W', K], coeff는 [N, K]
                if proto is None and len(shape) == 4 and shape[0] == 1 and min(shape[1],shape[2])<=128 and shape[3]>=8:
                    proto = p
                    print(f"[GUESS] proto at idx {i}")
                if coeff is None and len(shape) == 2 and shape[1] >= 8 and shape[0] > 0:
                    coeff = p
                    print(f"[GUESS] coeff at idx {i}")

    # 통계 출력
    def stat(name, t):
        if t is None:
            print(f"{name}: None")
            return
        a = t.detach().float().cpu()
        print(f"{name}: shape={tuple(a.shape)}, min={a.min().item():.4g}, max={a.max().item():.4g}, mean={a.mean().item():.4g}, norm={a.norm().item():.4g}")

    print("\n=== STATS ===")
    stat("PROTO", proto)
    stat("COEFF", coeff)

    # 반환해서 다음 셀에서 바로 재사용 가능
    return proto, coeff, preds

proto, coeff, raw_preds = inspect_proto_and_coeffs(model, dataset, idx=0, dev=device())



=== RAW OUTPUT KEYS / TYPES ===
type: list

=== STATS ===
PROTO: None
COEFF: None


In [19]:
import torch.nn.functional as F

def try_direct_lincomb(proto, coeff, top_n=5, upsample_to=None):
    """
    proto: [1, H', W', K] 또는 [H', W', K] 가능
    coeff: [N, K]
    """
    if proto is None or coeff is None:
        print("No proto/coeff to compose.")
        return

    p = proto
    if p.dim()==4 and p.shape[0]==1:
        p = p[0]  # [H', W', K]
    if p.dim()!=3:
        print("Unexpected proto shape:", tuple(proto.shape))
        return

    H_, W_, K = p.shape
    N, K2 = coeff.shape
    if K != K2:
        print(f"Channel mismatch: proto.K={K}, coeff.K={K2}")
        return

    # 상위 N개 (최대 top_n)만 합성
    n = min(N, top_n)
    masks = []
    for i in range(n):
        c = coeff[i].detach().cpu().float()           # [K]
        m_small = (p.detach().cpu().float() @ c).numpy()   # [H', W']
        masks.append(m_small)

    masks = np.stack(masks, axis=0)  # [n, H', W']
    print(f"[LINCOMB] small masks: shape={masks.shape}, min={masks.min():.4f}, max={masks.max():.4f}, mean={masks.mean():.4f}")

    # 업샘플 테스트 (선택)
    if upsample_to is not None:
        H, W = upsample_to
        t = torch.from_numpy(masks).unsqueeze(1)      # [n,1,H',W']
        t_up = F.interpolate(t, size=(H, W), mode="bilinear", align_corners=False)
        mu = t_up.squeeze(1).numpy()
        print(f"[LINCOMB] upsampled: shape={mu.shape}, min={mu.min():.4f}, max={mu.max():.4f}, mean={mu.mean():.4f}")
        nz = (mu > 0).sum()
        print(f"[LINCOMB] upsampled nonzero: {nz}")
    return masks

# proto/coeff가 있을 때만 실행
_ = try_direct_lincomb(proto, coeff, top_n=5, upsample_to=(dataset[0][0].shape[1], dataset[0][0].shape[2]))


No proto/coeff to compose.


In [20]:
from layers.modules import MultiBoxLoss
from utils.augmentations import SSDAugmentation

@torch.no_grad()
def quick_mask_loss_probe(sample_n=4):
    train_set = COCODetection(cfg.dataset.train_images, cfg.dataset.train_info, transform=SSDAugmentation(MEANS))
    if len(train_set) == 0:
        print("Train set empty.")
        return
    batch = [train_set[i] for i in range(min(sample_n, len(train_set)))]
    images = torch.stack([b[0] for b in batch], 0).to(device())  # [B,3,H,W]
    targets = [b[1][0].to(device()) for b in batch]              # 포크에 따라 조정
    gt_masks = [b[1][1].to(device()) for b in batch]
    num_crowds = [b[1][2] for b in batch]                        # 그대로 전달만

    model.eval()
    preds = model(images)

    crit = MultiBoxLoss(num_classes=cfg.num_classes,
                        pos_threshold=cfg.positive_iou_threshold,
                        neg_threshold=cfg.negative_iou_threshold,
                        negpos_ratio=cfg.ohem_negpos_ratio)
    # 포크별로 wrapper 필요할 수 있음. 기본 시도:
    try:
        losses = crit(model, preds, targets, gt_masks, num_crowds)
        # 보통 키 'M' (mask), 'B','C','P' 등 존재
        print("Loss keys:", list(losses.keys()))
        for k,v in losses.items():
            print(f" {k}: {float(v.mean()):.6f}")
    except Exception as e:
        print("Loss probe failed:", e)

quick_mask_loss_probe(sample_n=2)


loading annotations into memory...
Done (t=0.19s)
creating index...
index created!


AttributeError: 'numpy.ndarray' object has no attribute 'to'

In [21]:
import torch

@torch.no_grad()
def inspect_list_outputs(model, ds, idx=0, dev=None):
    dev = dev or device()
    model.eval()
    img, _ = ds[idx]
    x = img.unsqueeze(0).to(dev)
    outs = model(x)

    print("\n=== LIST OUTPUT INSPECTION ===")
    print("type:", type(outs).__name__)
    if not isinstance(outs, (list, tuple)):
        print("Not a list/tuple. Got:", type(outs))
        return outs, {}

    roles = {}
    for i, t in enumerate(outs):
        if isinstance(t, torch.Tensor):
            shp = tuple(t.shape)
            print(f"idx {i}: Tensor shape={shp}")
            # 휴리스틱으로 역할 추정
            # - loc: [B, N, 4]
            # - conf: [B, N, C]
            # - coeff(mask): [B, N, K]  (K >= 8)
            # - priors: [N, 4] or [1, N, 4]
            # - proto: [B, H', W', K] (K >= 8, H' W' 비교적 작음)
            B = shp[0] if len(shp) >= 1 else None
            if len(shp) == 3 and shp[-1] == 4 and B is not None:
                roles.setdefault('loc_idx', []).append(i)
            elif len(shp) == 3 and B is not None and shp[-1] >= 2:
                # conf or coeff
                roles.setdefault('conf_or_coeff_idx', []).append((i, shp[-1]))
            elif len(shp) == 2 and shp[-1] == 4:
                roles.setdefault('priors_idx', []).append(i)
            elif len(shp) == 4 and shp[0] == 1 and shp[-1] >= 8:
                roles.setdefault('proto_idx', []).append(i)
        else:
            print(f"idx {i}: type={type(t)}")

    print("\n=== ROLE GUESSES ===")
    for k,v in roles.items():
        print(k, ":", v)
    return outs, roles

raw_list, role_guess = inspect_list_outputs(model, dataset, idx=0, dev=device())



=== LIST OUTPUT INSPECTION ===
type: list
idx 0: type=<class 'dict'>

=== ROLE GUESSES ===


In [22]:
def probe_model_mask_modules(m):
    print("\n=== MODEL MASK MODULES PRESENCE ===")
    for name in ["proto_net", "mask_linear", "mask_upconv", "maskiou_net", "prediction_layers"]:
        print(f"hasattr(model, '{name}'):", hasattr(m, name))
    # cfg에 설정된 mask_type/옵션도 다시 확인
    print("\n=== CFG MASK SETTINGS ===")
    print("mask_type:", getattr(cfg, "mask_type", None))
    print("mask_size:", getattr(cfg, "mask_size", None))
    print("mask_proto_net exists:", getattr(cfg, "mask_proto_net", None) is not None)
    print("mask_proto_src:", getattr(cfg, "mask_proto_src", None))
    print("mask_proto_crop:", getattr(cfg, "mask_proto_crop", None))
    print("mask_proto_crop_with_pred_box:", getattr(cfg, "mask_proto_crop_with_pred_box", None))
    print("use_maskiou:", getattr(cfg, "use_maskiou", None), "rescore_mask:", getattr(cfg, "rescore_mask", None))

probe_model_mask_modules(model)



=== MODEL MASK MODULES PRESENCE ===
hasattr(model, 'proto_net'): True
hasattr(model, 'mask_linear'): False
hasattr(model, 'mask_upconv'): False
hasattr(model, 'maskiou_net'): False
hasattr(model, 'prediction_layers'): True

=== CFG MASK SETTINGS ===
mask_type: 1
mask_size: 16
mask_proto_net exists: True
mask_proto_src: 0
mask_proto_crop: True
mask_proto_crop_with_pred_box: True
use_maskiou: False rescore_mask: True


In [23]:
import numpy as np
import torch
import torch.nn.functional as F

def try_lincomb_from_list_outputs(outs, roles, H, W):
    """
    outs: list/tuple of tensors from model(x)
    roles: from D1
    H, W: upsample target size
    """
    # proto 후보
    proto = None
    if 'proto_idx' in roles and len(roles['proto_idx']) > 0:
        pi = roles['proto_idx'][0]
        proto = outs[pi]
        if proto.dim()==4 and proto.shape[0]==1:
            proto = proto[0]  # [H',W',K]

    # conf/coeff 후보들
    coeff = None
    num_classes = getattr(cfg, "num_classes", None)
    if 'conf_or_coeff_idx' in roles:
        # conf 채널 수(=num_classes)와 일치하면 conf, 그 외 큰 채널(K) 쪽을 coeff로 가정
        for i, ch in roles['conf_or_coeff_idx']:
            if num_classes is not None and ch == num_classes:
                pass  # conf
            elif ch >= 8:
                coeff = outs[i]
                break

    if proto is None or coeff is None:
        print("No proto/coeff found -> cannot compose lincomb.")
        return

    # [B,N,K] -> N,K
    if coeff.dim()==3:
        coeff = coeff[0]

    print("\n=== LINCOMB FROM LIST OUTS ===")
    print("proto:", tuple(proto.shape), "coeff:", tuple(coeff.shape))
    P_H, P_W, K = proto.shape
    N, K2 = coeff.shape
    if K != K2:
        print(f"Channel mismatch: proto.K={K}, coeff.K={K2}")
        return

    # 상위 5개만 테스트
    n = min(N, 5)
    p_np = proto.detach().cpu().float().numpy()   # [H',W',K]
    masks_small = []
    for i in range(n):
        c = coeff[i].detach().cpu().float().numpy()  # [K]
        m = p_np @ c                                  # [H',W']
        masks_small.append(m)
    masks_small = np.stack(masks_small, 0)            # [n,H',W']
    print(f"[small] shape={masks_small.shape}, min={masks_small.min():.4f}, max={masks_small.max():.4f}, mean={masks_small.mean():.4f}")

    # 업샘플
    t = torch.from_numpy(masks_small).unsqueeze(1)
    up = F.interpolate(t, size=(H, W), mode="bilinear", align_corners=False).squeeze(1).numpy()
    nz = (up > 0).sum()
    print(f"[upsampled] shape={up.shape}, min={up.min():.4f}, max={up.max():.4f}, mean={up.mean():.4f}, nonzero={nz}")

# 실행
img0, _ = dataset[0]
H0, W0 = img0.shape[1:]
try_lincomb_from_list_outputs(raw_list, role_guess, H0, W0)


No proto/coeff found -> cannot compose lincomb.


In [24]:
# 최대한 호환을 맞추는 손쉬운 버전 (없는 경우 자동 스킵)
try:
    from layers.modules import MultiBoxLoss
    from utils.augmentations import SSDAugmentation
    # 어떤 포크는 ScatterWrapper가 여기/다른 모듈에 존재
    try:
        from layers.output_utils import ScatterWrapper
    except Exception:
        ScatterWrapper = None

    @torch.no_grad()
    def quick_mask_loss_probe_safe(sample_n=2):
        train_set = COCODetection(cfg.dataset.train_images, cfg.dataset.train_info, transform=SSDAugmentation(MEANS))
        if len(train_set) == 0:
            print("Train set empty.")
            return
        batch = [train_set[i] for i in range(min(sample_n, len(train_set)))]
        images = torch.stack([b[0] for b in batch], 0).to(device())
        targets = [b[1][0].to(device()) for b in batch]
        gt_masks = [b[1][1].to(device()) for b in batch]
        num_crowds = [b[1][2] for b in batch]

        model.eval()
        preds = model(images)

        crit = MultiBoxLoss(num_classes=cfg.num_classes,
                            pos_threshold=cfg.positive_iou_threshold,
                            neg_threshold=cfg.negative_iou_threshold,
                            negpos_ratio=cfg.ohem_negpos_ratio)

        try:
            if ScatterWrapper is not None and isinstance(preds, (list, tuple)):
                # 일부 포크는 이렇게 호출: criterion(preds, wrapper, mask)
                wrapper = ScatterWrapper(targets, gt_masks, num_crowds)
                losses = crit(preds, wrapper, wrapper.make_mask())
            else:
                # train.py의 NetLoss처럼 호출 (포크에 따라 다름)
                losses = crit(model, preds, targets, gt_masks, num_crowds)

            print("Loss keys:", list(losses.keys()))
            for k,v in losses.items():
                try:
                    vv = float(v.mean())
                except Exception:
                    vv = float(v)
                print(f" {k}: {vv:.6f}")
        except Exception as e:
            print("Loss probe failed:", e)

    quick_mask_loss_probe_safe(sample_n=2)

except Exception as e:
    print("Could not import MultiBoxLoss compatible path:", e)


loading annotations into memory...
Done (t=0.12s)
creating index...
index created!
Could not import MultiBoxLoss compatible path: 'numpy.ndarray' object has no attribute 'to'


In [25]:
import torch

@torch.no_grad()
def inspect_inner_dict(model, ds, idx=0, dev=None):
    dev = dev or device()
    model.eval()
    img, _ = ds[idx]
    x = img.unsqueeze(0).to(dev)
    outs = model(x)

    print("\n=== INNER DICT INSPECTION ===")
    if not (isinstance(outs, (list, tuple)) and len(outs) > 0 and isinstance(outs[0], dict)):
        print("Unexpected structure. Got:", type(outs))
        return None

    d = outs[0]
    print("dict keys:", list(d.keys()))
    for k, v in d.items():
        if isinstance(v, torch.Tensor):
            t = v.detach().float().cpu()
            shp = tuple(t.shape)
            try:
                vmin, vmax, vmean, vnorm = t.min().item(), t.max().item(), t.mean().item(), t.norm().item()
                print(f"  - {k:12s} shape={shp}  min={vmin:.4g} max={vmax:.4g} mean={vmean:.4g} norm={vnorm:.4g}")
            except Exception:
                print(f"  - {k:12s} shape={shp}")
        else:
            print(f"  - {k:12s} type={type(v)}")
    return d

inner = inspect_inner_dict(model, dataset, idx=0, dev=device())



=== INNER DICT INSPECTION ===
dict keys: ['detection', 'net']
  - detection    type=<class 'dict'>
  - net          type=<class 'yolact.Yolact'>


In [26]:
import numpy as np
import torch
from layers.output_utils import postprocess

def build_postprocess_tuple_from_inner(d):
    """
    당신 포크의 키 이름을 추정해서 (loc, conf, mask_coeff, priors, proto) 튜플을 만들어줍니다.
    없는 건 None 반환.
    """
    # 흔한 키 이름 후보
    loc    = d.get('loc')    or d.get('boxes') or d.get('bbox')
    conf   = d.get('conf')   or d.get('scores') or d.get('cls')
    coeff  = d.get('mask')   or d.get('mask_coeff') or d.get('coeff') or d.get('mask_coefficients')
    priors = d.get('priors') or d.get('anchors') or d.get('default_boxes') or d.get('dboxes')
    proto  = d.get('proto')  or d.get('proto_out') or d.get('mask_proto') or d.get('proto_masks') or d.get('proto_pooled')

    missing = [name for name, obj in [('loc',loc),('conf',conf),('coeff',coeff),('priors',priors),('proto',proto)] if obj is None]
    if missing:
        print("⚠️ Missing keys for postprocess:", missing)
    return (loc, conf, coeff, priors, proto)

@torch.no_grad()
def try_postprocess_from_inner(model, ds, idx=0, score_thr=0.05, dev=None):
    dev = dev or device()
    model.eval()
    img, _ = ds[idx]
    H, W = img.shape[1:]
    x = img.unsqueeze(0).to(dev)
    outs = model(x)
    if not (isinstance(outs, (list, tuple)) and len(outs) > 0 and isinstance(outs[0], dict)):
        print("Structure not supported for this probe.")
        return
    d = outs[0]
    loc, conf, coeff, priors, proto = build_postprocess_tuple_from_inner(d)

    # postprocess는 대개 (preds_tuple, W, H, ...) 형태를 받습니다.
    preds_tuple = (loc, conf, coeff, priors, proto)
    out = postprocess(preds_tuple, W, H, score_threshold=float(score_thr))
    if out is None:
        print("postprocess returned None.")
        return
    classes, scores, boxes, masks = out
    print(f"Detections: {len(scores)}")
    if masks is None or len(masks) == 0:
        print("⚠️ No predicted masks returned.")
    else:
        m0 = masks[0].detach().cpu().numpy()
        print(f"[PRED MASK] sample0: shape={m0.shape}, min/max=({m0.min():.4f},{m0.max():.4f}), nonzero={(m0>0).sum()}")

try_postprocess_from_inner(model, dataset, idx=0, score_thr=0.05, dev=device())


⚠️ Missing keys for postprocess: ['loc', 'conf', 'coeff', 'priors', 'proto']


TypeError: 'NoneType' object is not subscriptable

In [28]:
import torch

@torch.no_grad()
def inspect_detection_deep(model, ds, idx=0, dev=None, max_items=50):
    dev = dev or device()
    model.eval()
    img, _ = ds[idx]
    x = img.unsqueeze(0).to(dev)
    outs = model(x)

    print("\n=== DETECTION DICT (DEEP INSPECTION) ===")
    if not (isinstance(outs, (list, tuple)) and len(outs) > 0 and isinstance(outs[0], dict)):
        print("Unexpected structure:", type(outs))
        return None

    inner = outs[0]
    det = inner.get('detection', None)
    if not isinstance(det, dict):
        print("No 'detection' dict. Got:", type(det))
        return None

    keys = list(det.keys())
    print("detection.keys:", keys[:max_items])

    # 흔히 등장하는 키 후보들을 먼저 보자
    likely_proto_keys = ['proto', 'proto_out', 'mask_proto', 'proto_masks', 'proto_pooled']
    likely_coeff_keys = ['mask', 'mask_coeff', 'coeff', 'mask_coefficients', 'proto_coeff']

    found = {'proto': None, 'coeff': None, 'boxes': None, 'scores': None, 'classes': None, 'priors': None}
    for k in keys:
        v = det[k]
        if isinstance(v, torch.Tensor):
            t = v.detach().float().cpu()
            shp = tuple(t.shape)
            vmin, vmax, vmean, vnorm = t.min().item(), t.max().item(), t.mean().item(), t.norm().item()
            print(f"  - {k:16s} shape={shp}  min={vmin:.4g} max={vmax:.4g} mean={vmean:.4g} norm={vnorm:.4g}")

            # 역할 추정
            if k in likely_proto_keys and found['proto'] is None:
                found['proto'] = t
            if k in likely_coeff_keys and found['coeff'] is None:
                found['coeff'] = t
            if 'box' in k and found['boxes'] is None:
                found['boxes'] = t
            if 'score' in k and found['scores'] is None:
                found['scores'] = t
            if 'class' in k and found['classes'] is None:
                found['classes'] = t
            if 'prior' in k or 'anchor' in k or 'dflt' in k:
                found['priors'] = t
        else:
            print(f"  - {k:16s} type={type(v)}")

    # 요약
    print("\n=== SUMMARY (LIKELY) ===")
    for name, val in found.items():
        if isinstance(val, torch.Tensor):
            print(f"{name:8s}: shape={tuple(val.shape)}  min={float(val.min()):.4g} max={float(val.max()):.4g} mean={float(val.mean()):.4g}")
        else:
            print(f"{name:8s}: {val}")

    return det, found

det_dict, found = inspect_detection_deep(model, dataset, idx=0, dev=device())



=== DETECTION DICT (DEEP INSPECTION) ===
detection.keys: ['box', 'mask', 'class', 'score', 'proto']
  - box              shape=(200, 4)  min=-0.00352 max=1 mean=0.5048 norm=15.73
  - mask             shape=(200, 32)  min=-0.9992 max=0.9968 mean=-0.1118 norm=54.71
  - class            shape=(200,)  min=0 max=0 mean=0 norm=0
  - score            shape=(200,)  min=0.2372 max=0.9656 mean=0.3782 norm=5.916
  - proto            shape=(256, 256, 32)  min=0 max=10.11 mean=0.5887 norm=1845

=== SUMMARY (LIKELY) ===
proto   : shape=(256, 256, 32)  min=0 max=10.11 mean=0.5887
coeff   : shape=(200, 32)  min=-0.9992 max=0.9968 mean=-0.1118
boxes   : shape=(200, 4)  min=-0.00352 max=1 mean=0.5048
scores  : shape=(200,)  min=0.2372 max=0.9656 mean=0.3782
classes : shape=(200,)  min=0 max=0 mean=0
priors  : None


In [29]:
import numpy as np
import torch.nn.functional as F

def try_lincomb_from_detection(found, H, W, top_n=5):
    proto = found.get('proto', None)
    coeff = found.get('coeff', None)
    if proto is None or coeff is None:
        print("No proto/coeff in detection dict.")
        return

    p = proto  # CPU float tensor
    c = coeff  # CPU float tensor

    # proto shape 정규화: [H', W', K]
    if p.dim() == 4 and p.shape[0] == 1:
        p = p[0]
    if p.dim() != 3:
        print("Unexpected proto shape:", tuple(p.shape))
        return

    H_, W_, K = p.shape
    if c.dim() == 3:  # [B, N, K]
        c = c[0]
    if c.dim() != 2:
        print("Unexpected coeff shape:", tuple(c.shape))
        return
    N, K2 = c.shape
    if K != K2:
        print(f"Channel mismatch: proto.K={K}, coeff.K={K2}")
        return

    n = min(N, top_n)
    p_np = p.numpy()
    masks_small = []
    for i in range(n):
        m = p_np @ c[i].numpy()  # [H', W']
        masks_small.append(m)
    masks_small = np.stack(masks_small, 0)  # [n,H',W']
    print(f"[small] shape={masks_small.shape}, min={masks_small.min():.4f}, max={masks_small.max():.4f}, mean={masks_small.mean():.4f}")

    # 업샘플
    t = torch.from_numpy(masks_small).unsqueeze(1)         # [n,1,H',W']
    up = F.interpolate(t, size=(H, W), mode="bilinear", align_corners=False).squeeze(1).numpy()
    nz = (up > 0).sum()
    print(f"[upsampled] shape={up.shape}, min={up.min():.4f}, max={up.max():.4f}, mean={up.mean():.4f}, nonzero={nz}")

# 실행
img0, _ = dataset[0]
H0, W0 = img0.shape[1:]
try_lincomb_from_detection(found, H0, W0, top_n=5)


[small] shape=(5, 256, 256), min=-36.7743, max=-1.7035, mean=-14.6955
[upsampled] shape=(5, 1024, 1024), min=-36.7641, max=-1.7035, mean=-14.6955, nonzero=0


In [30]:
import torch, numpy as np
import torch.nn.functional as F

@torch.no_grad()
def compose_masks_with_sigmoid_and_crop(model, ds, idx=0, score_thr=0.05, bin_thr=0.5, dev=None, top_k=5):
    dev = dev or (torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))
    model.eval()
    img, (tgt, gt_masks, num_crowd) = ds[idx]
    H, W = img.shape[1:]
    x = img.unsqueeze(0).to(dev)

    # 모델 출력
    outs = model(x)
    det = outs[0]['detection']
    boxes = det['box'].detach().cpu()           # [N,4] in xyxy (0~1) or abs? *당신 포크 기준 지금 0~1*
    scores = det['score'].detach().cpu()        # [N]
    classes = det['class'].detach().cpu()       # [N]
    coeff  = det['mask'].detach().cpu().float() # [N,K]
    proto  = det['proto'].detach().cpu().float()# [H',W',K]  (256,256,K)

    # 상위 N 선택
    keep = torch.nonzero(scores >= score_thr).squeeze(1)
    if keep.numel() == 0:
        print("No det above score_thr.")
        return
    keep = keep[:top_k]
    boxes = boxes[keep]
    scores_k = scores[keep]
    classes_k = classes[keep]
    coeff_k = coeff[keep]           # [n,K]

    # lincomb + sigmoid
    # proto: [H',W',K], coeff: [n,K]
    Hs, Ws, K = proto.shape
    m_small = torch.einsum('hwk,nk->nhw', proto, coeff_k)  # [n,H',W']
    m_prob  = torch.sigmoid(m_small)                       # [n,H',W']

    # 업샘플 → [n,1,H,W]
    m_up = F.interpolate(m_prob.unsqueeze(1), size=(H, W), mode='bilinear', align_corners=False).squeeze(1)  # [n,H,W]

    # 크롭 (xyxy가 0~1 기준이라 가정)
    masks_bin = []
    for i, box in enumerate(boxes):
        x1,y1,x2,y2 = box
        # 정규화→픽셀좌표
        x1 = int((x1.item())*W) if x1<=1.0 else int(x1.item())
        x2 = int((x2.item())*W) if x2<=1.0 else int(x2.item())
        y1 = int((y1.item())*H) if y1<=1.0 else int(y1.item())
        y2 = int((y2.item())*H) if y2<=1.0 else int(y2.item())
        x1,x2 = max(0,x1), min(W,x2)
        y1,y2 = max(0,y1), min(H,y2)

        m = m_up[i].clone()
        # (선택) pred box 바깥은 0으로 마스킹
        if x2>x1 and y2>y1:
            pad = torch.zeros_like(m)
            pad[y1:y2, x1:x2] = m[y1:y2, x1:x2]
            m = pad
        m = (m > bin_thr).to(torch.uint8)  # 이진화
        masks_bin.append(m.numpy())
    masks_bin = np.stack(masks_bin, 0)  # [n,H,W]

    # GT 준비
    if isinstance(gt_masks, torch.Tensor):
        gt = (gt_masks.detach().cpu().float() > 0.5).numpy()
        if gt.ndim==2: gt = gt[None,...]
    else:
        gt = np.asarray(gt_masks)
        if gt.ndim==2: gt = gt[None,...]

    # IoU 간단 샘플
    def iou(a,b):
        inter = np.logical_and(a,b).sum()
        union = np.logical_or(a,b).sum()
        return (inter/union) if union>0 else 0.0

    print(f"[DEBUG] masks_bin: shape={masks_bin.shape}, nonzero_total={(masks_bin>0).sum()}")
    n = min(masks_bin.shape[0], gt.shape[0], 5)
    for i in range(n):
        print(f"  - pair#{i} IoU={iou(masks_bin[i], gt[i]):.4f} (score={float(scores_k[i]):.3f})")

# 먼저 기본 임계치로
compose_masks_with_sigmoid_and_crop(model, dataset, idx=0, score_thr=0.05, bin_thr=0.5, top_k=5)
# 그리고 조금 더 느슨하게 (임시로 마스크 이진화 임계치 낮춰 보기)
compose_masks_with_sigmoid_and_crop(model, dataset, idx=0, score_thr=0.05, bin_thr=0.3, top_k=5)


[DEBUG] masks_bin: shape=(5, 1024, 1024), nonzero_total=0
  - pair#0 IoU=0.0000 (score=0.966)
  - pair#1 IoU=0.0000 (score=0.943)
  - pair#2 IoU=0.0000 (score=0.938)
  - pair#3 IoU=0.0000 (score=0.932)
  - pair#4 IoU=0.0000 (score=0.922)
[DEBUG] masks_bin: shape=(5, 1024, 1024), nonzero_total=0
  - pair#0 IoU=0.0000 (score=0.966)
  - pair#1 IoU=0.0000 (score=0.943)
  - pair#2 IoU=0.0000 (score=0.938)
  - pair#3 IoU=0.0000 (score=0.932)
  - pair#4 IoU=0.0000 (score=0.922)


In [31]:
import types
import torch
import torch.nn.functional as F
import numpy as np
import layers.output_utils as ou

def postprocess_patched(det_output, w, h, batch_idx=0, interpolation_mode='bilinear',
                        visualize_lincomb=False, crop_masks=True, score_threshold=0.05, bin_thr=0.5):
    """
    - 당신 포크의 구조( dict{'net':..., 'detection': {...}} )를 그대로 지원
    - lincomb 후 sigmoid 적용을 보장하고, bin_thr로 이진화
    - 매우 단순화(속성/옵션 최소화); 정확한 정합은 프로젝트 postprocess를 참고해 반영
    """
    # 1) 입력 정규화
    if isinstance(det_output, (list, tuple)) and len(det_output)>0 and isinstance(det_output[0], dict):
        dd = det_output[0]
    elif isinstance(det_output, dict) and 'detection' in det_output:
        dd = det_output
    else:
        return None

    net = dd['net'] if 'net' in dd else None
    det = dd['detection'] if 'detection' in dd else dd

    boxes  = det.get('box', None)      # [N,4] (0~1 xyxy라고 가정)
    scores = det.get('score', None)    # [N]
    classes= det.get('class', None)    # [N]
    coeff  = det.get('mask', None)     # [N,K]
    proto  = det.get('proto', None)    # [H',W',K]

    if any(x is None for x in [boxes, scores, classes, coeff, proto]):
        return None
    if len(scores)==0:
        return None

    # 2) 상위 score 필터
    keep = torch.nonzero(scores >= score_threshold).squeeze(1)
    if keep.numel()==0:
        return None
    boxes, scores, classes, coeff = boxes[keep], scores[keep], classes[keep], coeff[keep]

    # 3) lincomb + sigmoid
    Hs, Ws, K = proto.shape
    m_small = torch.einsum('hwk,nk->nhw', proto, coeff.float().cpu())  # [n,H',W']
    m_prob  = torch.sigmoid(m_small)

    # 4) 업샘플
    m_up = F.interpolate(m_prob.unsqueeze(1), size=(h, w), mode=interpolation_mode, align_corners=False).squeeze(1)  # [n,H,W]

    # 5) pred box 바깥 0 (crop)
    if crop_masks:
        m_up_np = m_up.clone()
        for i, box in enumerate(boxes):
            x1,y1,x2,y2 = box
            # 정규화 좌표 가정 → 픽셀로
            x1 = int((x1.item())*w) if x1<=1.0 else int(x1.item())
            x2 = int((x2.item())*w) if x2<=1.0 else int(x2.item())
            y1 = int((y1.item())*h) if y1<=1.0 else int(y1.item())
            y2 = int((y2.item())*h) if y2<=1.0 else int(y2.item())
            x1,x2 = max(0,x1), min(w,x2); y1,y2 = max(0,y1), min(h,y2)
            pad = torch.zeros_like(m_up_np[i])
            if x2>x1 and y2>y1:
                pad[y1:y2, x1:x2] = m_up_np[i][y1:y2, x1:x2]
            m_up[i] = pad

    # 6) 이진화
    m_bin = (m_up > bin_thr)

    # 출력 포맷 맞추기: (classes, scores, boxes, masks)
    return (classes.cpu(), scores.cpu(), boxes.cpu(), m_bin.cpu())

# monkey patch
ou.postprocess = postprocess_patched

# 패치 후 sanity
from layers.output_utils import postprocess
out = postprocess(model(dataset[0][0].unsqueeze(0).to(device())), dataset[0][0].shape[2], dataset[0][0].shape[1],
                  score_threshold=0.05, bin_thr=0.5)
print("postprocess patched output is None?" , out is None)


RuntimeError: Expected all tensors to be on the same device, but got mat2 is on cpu, different from other tensors on cuda:0 (when checking argument in method wrapper_CUDA_bmm)

In [32]:
import torch, torch.nn.functional as F
from layers.output_utils import sanitize_coordinates, crop
import layers.output_utils as ou

def postprocess_patched(det_output, w, h, batch_idx=0, interpolation_mode='bilinear',
                        visualize_lincomb=False, crop_masks=True, score_threshold=0.05, bin_thr=0.5):
    # det_output은 [{'net':..., 'detection': {...}}] 형태
    dets = det_output[batch_idx]
    net = dets['net']
    dets = dets['detection']
    if dets is None:
        return [torch.Tensor()] * 4

    if score_threshold > 0:
        keep = dets['score'] > score_threshold
        for k in dets:
            if k != 'proto':
                dets[k] = dets[k][keep]
        if dets['score'].size(0) == 0:
            return [torch.Tensor()] * 4

    classes = dets['class']
    boxes   = dets['box']
    scores  = dets['score']
    coeff   = dets['mask']     # [N,K]
    proto   = dets['proto']    # [H',W',K]

    # --- 디바이스 맞추기 ---
    dev = proto.device
    coeff = coeff.to(dev)
    boxes = boxes.to(dev)

    # lincomb + sigmoid (활성화 보장)
    masks = torch.einsum('hwk,nk->nhw', proto, coeff.float())  # [n,H',W']
    masks = torch.sigmoid(masks)

    # crop (box는 보통 정규화; sanitize는 아래에서)
    if crop_masks:
        masks_c = masks.permute(1,2,0).contiguous()   # [H',W',n]
        masks_c = crop(masks_c, boxes)                # [H',W',n]
        masks   = masks_c.permute(2,0,1).contiguous() # [n,H',W']

    # 업샘플
    masks = F.interpolate(masks.unsqueeze(1), (h, w), mode=interpolation_mode, align_corners=False).squeeze(1)  # [n,H,W]

    # 이진화
    masks = masks.gt(bin_thr)

    # boxes sanitize (마스크 출력과 무관하지만 원 코드 흐름 유지)
    boxes = boxes.clone()
    boxes[:, 0], boxes[:, 2] = sanitize_coordinates(boxes[:, 0], boxes[:, 2], w, cast=False)
    boxes[:, 1], boxes[:, 3] = sanitize_coordinates(boxes[:, 1], boxes[:, 3], h, cast=False)
    boxes = boxes.long()

    return classes.cpu(), scores.cpu(), boxes.cpu(), masks.cpu()

ou.postprocess = postprocess_patched
print("✅ postprocess patched (device fix + sigmoid + configurable bin_thr)")

# 간단 sanity
img0, _ = dataset[0]
out = ou.postprocess(model(img0.unsqueeze(0).to(device())), img0.shape[2], img0.shape[1],
                     score_threshold=0.05, bin_thr=0.3)
print("None?", out is None)


✅ postprocess patched (device fix + sigmoid + configurable bin_thr)
None? False
