In [3]:
import torch
from PIL import Image
from tqdm import tqdm
import pandas as pd
import os
import json
from pathlib import Path


from engine.core import YAMLConfig
from engine.misc import dist_utils
from engine.data.transforms import Compose, Normalize, Resize, ConvertPILImage
from engine.solver import TASKS

In [4]:
def load_dfine_solver(config_path, checkpoint_path, device="cuda"):

    update_dict = {
        "resume": None,
        "device": device,
        "seed": 42,
        "tuning": checkpoint_path,
        "use_amp": False,
        "use_ema": True,
    }

    cfg = YAMLConfig(config_path, **update_dict)

    if "HGNetv2" in cfg.yaml_cfg:
        cfg.yaml_cfg["HGNetv2"]["pretrained"] = False

    # Solver ÏÉùÏÑ± (model, postprocessor, criterion Îì±ÏùÄ ÏïÑÏßÅ None)
    SolverClass = TASKS[cfg.yaml_cfg["task"]]
    solver = SolverClass(cfg)

    solver._setup()

    # checkpoint load Î∞©ÏãùÎèÑ train.pyÏôÄ ÎèôÏùº
    ckpt = torch.load(checkpoint_path, map_location="cpu")
    state = ckpt["model"] if "model" in ckpt else ckpt["ema"]["module"]
    solver.model.load_state_dict(state, strict=False)

    solver.model.to(device)
    solver.model.eval()
    return solver

In [None]:
def dfine_test_inference(
    config,
    checkpoint,
    image_dir,
    output_csv="output.csv",
    threshold=0.01,
    device="cuda"
):
    print("üîÑ Loading model for Single Inference (Normalized)...")
    solver = load_dfine_solver(config, checkpoint, device)

    model = solver.model
    postprocessor = solver.postprocessor
    model.eval()

    # Ï†ïÍ∑úÌôî(Normalize) Ï∂îÍ∞Ä
    transform = Compose([
        Resize(size=[1024, 1024]),
        ConvertPILImage(dtype="float32", scale=True), # 0~255 -> 0.0~1.0 Î≥ÄÌôò
        # Î™®Îç∏ ÌïôÏäµ Ïãú ÏÇ¨Ïö©Îêú ÌèâÍ∑†Í≥º ÌëúÏ§ÄÌé∏Ï∞®Î°ú Ï†ïÍ∑úÌôî 
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    image_paths = sorted(list(Path(image_dir).glob("*.jpg"))) + \
                  sorted(list(Path(image_dir).glob("*.png")))

    predictions = []
    filenames = []

    print(f"Starting Inference on {len(image_paths)} images...")

    for img_path in tqdm(image_paths):
        img = Image.open(img_path).convert("RGB")
        w0, h0 = img.size

        # Ï†ÑÏ≤òÎ¶¨ (Resize -> Scale -> Normalize)
        img_tensor = transform(img)
        img_tensor = img_tensor.unsqueeze(0).to(device)

        # model inference
        with torch.no_grad():
            outputs = model(img_tensor)

        # Post-process (Ï¢åÌëú Î≥µÏõê)
        pred = postprocessor(
            outputs,
            orig_target_sizes=torch.tensor([[h0, w0]], device=device)
        )[0]

        boxes = pred["boxes"].cpu().numpy()
        scores = pred["scores"].cpu().numpy()
        labels = pred["labels"].cpu().numpy()

        pred_str = ""
        for box, score, label in zip(boxes, scores, labels):
            if score < threshold:
                continue
            
            label = int(label)
            x1, y1, x2, y2 = box
            # ÏÜåÏàòÏ†ê 4ÏûêÎ¶¨ÍπåÏßÄ Ï†ÄÏû• (Ï†ïÎ∞ÄÎèÑ Ïú†ÏßÄ)
            pred_str += f"{label} {score:.4f} {x1:.2f} {y1:.2f} {x2:.2f} {y2:.2f} "

        predictions.append(pred_str.strip())
        filenames.append(f"test/{img_path.name}")

    df = pd.DataFrame({
        "PredictionString": predictions,
        "image_id": filenames,
    })

    df.to_csv(output_csv, index=False)
    print(f"üìÑ CSV saved ‚Üí {output_csv}")
    return df

In [6]:
df = dfine_test_inference( ## XÎ≤ÑÏ†Ñ
    config="./configs/deimv2/deimv2_dinov3_x_coco.yml",
    checkpoint="./outputs/deimv2_dinov3_x_coco/checkpoint0019.pth",
    image_dir="../dataset/test",
    output_csv="deimv2_submission.csv",
    threshold=0.01
)


df.head()

üîÑ Loading model for Single Inference (Normalized)...
Training DINOv3 from scratch...
Using Lite Spatial Prior Module with inplanes=64
     --- Use Gateway@True ---
     --- Use Share Bbox Head@False ---
     --- Use Share Score Head@False ---
     --- Wide Layer@1 ---
Tuning checkpoint from ./outputs/deimv2_dinov3_x_coco/checkpoint0019.pth
Load model.state_dict, {'missed': [], 'unmatched': []}
Using the new matching cost with iou_order_alpha = 4.0 at epoch 45
üöÄ Starting Inference on 4871 images...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4871/4871 [15:17<00:00,  5.31it/s]


üìÑ CSV saved ‚Üí deimv2_submission.csv


Unnamed: 0,PredictionString,image_id
0,7 0.9821 604.12 518.01 956.29 1024.20 7 0.9752...,test/0000.jpg
1,4 0.9191 344.26 249.83 752.80 694.60 5 0.8249 ...,test/0001.jpg
2,1 0.9558 775.86 407.55 1024.15 1024.50 1 0.930...,test/0002.jpg
3,9 0.9813 146.37 262.66 911.80 823.56 9 0.6196 ...,test/0003.jpg
4,1 0.9446 198.11 252.49 872.56 779.42 0 0.7777 ...,test/0004.jpg
