In [4]:
import torch
from PIL import Image
from tqdm import tqdm
import pandas as pd
import os
import json
from pathlib import Path


from src.core import YAMLConfig
from src.misc import dist_utils
from src.data.transforms import Compose, Resize, ConvertPILImage
from src.solver import TASKS

In [5]:
def load_dfine_solver(config_path, checkpoint_path, device="cuda"):

    update_dict = {
        "resume": None,
        "device": device,
        "seed": 42,
        "tuning": checkpoint_path,
        "use_amp": False,
        "use_ema": True,
    }

    cfg = YAMLConfig(config_path, **update_dict)

    if "HGNetv2" in cfg.yaml_cfg:
        cfg.yaml_cfg["HGNetv2"]["pretrained"] = False

    # Solver ÏÉùÏÑ± (model, postprocessor, criterion Îì±ÏùÄ ÏïÑÏßÅ None)
    SolverClass = TASKS[cfg.yaml_cfg["task"]]
    solver = SolverClass(cfg)

    solver._setup()

    # checkpoint load Î∞©ÏãùÎèÑ train.pyÏôÄ ÎèôÏùº
    ckpt = torch.load(checkpoint_path, map_location="cpu")
    state = ckpt["model"] if "model" in ckpt else ckpt["ema"]["module"]
    solver.model.load_state_dict(state, strict=False)

    solver.model.to(device)
    solver.model.eval()
    return solver

IndentationError: expected an indented block after function definition on line 1 (1617811176.py, line 3)

In [None]:
def dfine_test_inference(
    config,
    checkpoint,
    image_dir,
    output_csv="output.csv",
    threshold=0.05,
    device="cuda"
):

    solver = load_dfine_solver(config, checkpoint, device)

    model = solver.model
    postprocessor = solver.postprocessor

    transform = Compose([
        Resize(size=[1024,1024]),
        ConvertPILImage(dtype="float32", scale=True),
    ])

    image_paths = sorted(list(Path(image_dir).glob("*.jpg"))) + \
                  sorted(list(Path(image_dir).glob("*.png")))

    predictions = []
    filenames = []

    for img_path in tqdm(image_paths):
        img = Image.open(img_path).convert("RGB")
        w0, h0 = img.size

        img_tensor = transform(img)
        img_tensor = img_tensor.unsqueeze(0).to(device)

        # model inference
        with torch.no_grad():
            outputs = model(img_tensor)

        pred = postprocessor(
            outputs,
            orig_target_sizes=torch.tensor([[h0, w0]], device=device)
        )[0]

        boxes = pred["boxes"].cpu().numpy()
        scores = pred["scores"].cpu().numpy()
        labels = pred["labels"].cpu().numpy()

        pred_str = ""
        for box, score, label in zip(boxes, scores, labels):
            if score < threshold:
                continue
            x1, y1, x2, y2 = box
            pred_str += f"{label} {score:.4f} {x1:.2f} {y1:.2f} {x2:.2f} {y2:.2f} "

        predictions.append(pred_str.strip())
        filenames.append(f"test/{img_path.name}")

    df = pd.DataFrame({
        "PredictionString": predictions,
        "image_id": filenames,
    })

    df.to_csv(output_csv, index=False)
    print(f"üìÑ CSV saved ‚Üí {output_csv}")
    return df

# TTA


In [30]:
# import torch
# import numpy as np
# from torchvision.ops import nms
# from ensemble_boxes import weighted_boxes_fusion


# def tta_inference(model, img_tensor, orig_size, postprocessor, device="cuda"):
#     """
#     img_tensor: (1, 3, H, W)
#     orig_size: (h0, w0)
#     """

#     h0, w0 = orig_size

#     # 1) ÏõêÎ≥∏ Ïù¥ÎØ∏ÏßÄ inference
#     with torch.no_grad():
#         out_original = model(img_tensor)

#     pred_original = postprocessor(
#         out_original,
#         orig_target_sizes=torch.tensor([[h0, w0]], device=device)
#     )[0]

#     # 2) horizontal flip TTA
#     img_flipped = torch.flip(img_tensor, dims=[3])  # width Î∞©Ìñ• flip

#     with torch.no_grad():
#         out_flip = model(img_flipped)

#     pred_flip = postprocessor(
#         out_flip,
#         orig_target_sizes=torch.tensor([[h0, w0]], device=device)
#     )[0]

#     # 3) flipÎêú Í≤∞Í≥ºÎ•º Îã§Ïãú ÏõêÎûò Ï¢åÌëúÍ≥ÑÎ°ú Î≥ÄÌôò
#     boxes = pred_flip["boxes"].cpu().numpy()
#     boxes[:, [0, 2]] = w0 - boxes[:, [2, 0]]  # x1,x2 swap

#     pred_flip["boxes"] = torch.tensor(boxes)
#     pred_flip["scores"] = pred_flip["scores"]
#     pred_flip["labels"] = pred_flip["labels"]

#     # 4) original + flip ensemble (concat Ï†ïÎèÑÎ©¥ Ï∂©Î∂Ñ)
#     final_boxes = torch.cat([
#         pred_original["boxes"].cpu(),
#         pred_flip["boxes"].cpu()
#     ])

#     final_scores = torch.cat([
#         pred_original["scores"].cpu(),
#         pred_flip["scores"].cpu()
#     ])

#     final_labels = torch.cat([
#         pred_original["labels"].cpu(),
#         pred_flip["labels"].cpu()
#     ])

#     return final_boxes, final_scores, final_labels


# def tta_nms(boxes, scores, labels, iou_thr=0.6):
#     keep = nms(boxes, scores, iou_thr)
#     return boxes[keep], scores[keep], labels[keep]

# def tta_wbf(boxes, scores, labels, image_size, iou_thr=0.6, skip_box_thr=0.0):
#     """
#     boxes: (N, 4) torch tensor, x1,y1,x2,y2 (pixel Îã®ÏúÑ)
#     scores: (N,) torch tensor
#     labels: (N,) torch tensor
#     image_size: (w0, h0) ÏõêÎ≥∏ Ïù¥ÎØ∏ÏßÄ ÌÅ¨Í∏∞
#     """
#     w0, h0 = image_size

#     # to numpy
#     boxes_np = boxes.detach().cpu().numpy()
#     scores_np = scores.detach().cpu().numpy()
#     labels_np = labels.detach().cpu().numpy()

#     # 0~1 Î°ú Ï†ïÍ∑úÌôî (ensemble-boxes Í∑úÍ≤©)
#     boxes_norm = boxes_np.copy()
#     boxes_norm[:, [0, 2]] /= float(w0)
#     boxes_norm[:, [1, 3]] /= float(h0)

#     # WBFÎäî Î™®Îç∏Î≥Ñ Î¶¨Ïä§Ìä∏Î•º Î∞õÏúºÎØÄÎ°ú, TTA Ï†ÑÏ≤¥Î•º ÌïòÎÇòÏùò Î™®Îç∏Î°ú Î≥∏Îã§.
#     boxes_list = [boxes_norm]
#     scores_list = [scores_np]
#     labels_list = [labels_np]

#     boxes_wbf, scores_wbf, labels_wbf = weighted_boxes_fusion(
#         boxes_list,
#         scores_list,
#         labels_list,
#         weights=None,
#         iou_thr=iou_thr,
#         skip_box_thr=skip_box_thr
#     )

#     # Îã§Ïãú ÌîΩÏÖÄ Ï¢åÌëúÎ°ú ÎêòÎèåÎ¶¨Í∏∞
#     boxes_wbf[:, [0, 2]] *= float(w0)
#     boxes_wbf[:, [1, 3]] *= float(h0)

#     # Îã§Ïãú torch tensor Î°ú Î≥ÄÌôò
#     device = boxes.device
#     boxes_out = torch.tensor(boxes_wbf, device=device, dtype=boxes.dtype)
#     scores_out = torch.tensor(scores_wbf, device=device, dtype=scores.dtype)
#     labels_out = torch.tensor(labels_wbf, device=device, dtype=labels.dtype)

#     return boxes_out, scores_out, labels_out

In [32]:
# #TTAÏ†ÅÏö©Î≤ÑÏ†Ñ
# from pathlib import Path

# def dfine_test_inference_TTA(
#     config,
#     checkpoint,
#     image_dir,
#     output_csv="output.csv",
#     threshold=0.05,
#     device="cuda"
# ):

#     solver = load_dfine_solver(config, checkpoint, device)

#     model = solver.model
#     postprocessor = solver.postprocessor

#     transform = Compose([
#         Resize(size=[1024,1024]),
#         ConvertPILImage(dtype="float32", scale=True),
#     ])

#     image_paths = sorted(list(Path(image_dir).glob("*.jpg"))) + \
#                   sorted(list(Path(image_dir).glob("*.png")))

#     predictions = []
#     filenames = []

#     for img_path in tqdm(image_paths):
#         img = Image.open(img_path).convert("RGB")
#         w0, h0 = img.size

#         img_tensor = transform(img)
#         img_tensor = img_tensor.unsqueeze(0).to(device)

#         # ‚òÖ TTA inference Ï†ÅÏö© Î∂ÄÎ∂Ñ ‚òÖ
#         boxes, scores, labels = tta_inference(
#             model=model,
#             img_tensor=img_tensor,
#             orig_size=(h0, w0),
#             postprocessor=postprocessor,
#             device=device
#         )
        
#         # boxes, scores, labels = tta_nms(
#         #     boxes, scores, labels, iou_thr=0.6
#         # )
#         boxes, scores, labels = tta_wbf(
#             boxes, scores, labels,
#             image_size=(w0, h0),
#             iou_thr=0.6,
#             skip_box_thr=threshold
#         )
        
#         boxes = boxes.numpy()
#         scores = scores.numpy()
#         labels = labels.numpy()

#         pred_str = ""
#         for box, score, label in zip(boxes, scores, labels):
#             if score < threshold:
#                 continue
#             x1, y1, x2, y2 = box
#             pred_str += f"{label} {score:.4f} {x1:.2f} {y1:.2f} {x2:.2f} {y2:.2f} "

#         predictions.append(pred_str.strip())
#         filenames.append(f"test/{img_path.name}")

#     df = pd.DataFrame({
#         "PredictionString": predictions,
#         "image_id": filenames,
#     })

#     df.to_csv(output_csv, index=False)
#     print(f"üìÑ CSV saved ‚Üí {output_csv}")
#     return df

In [3]:
df = dfine_test_inference( #_TTA
    config="configs/dfine/custom/dfine_hgnetv2_l_custom.yml",
    checkpoint="./output/dfine_hgnetv2_l_custom/last.pth",
    image_dir="../../dataset_for_RFDETR/test/test",
    output_csv="dfine_submission.csv",
    threshold=0.01
)

df.head()

NameError: name 'dfine_test_inference' is not defined