In [6]:
import cv2
import glob
import os
import numpy as np
import pandas as pd
import pickle
import json

from natsort import natsorted
from attrdict import AttrDict
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
from torchvision.ops import box_convert

import supervision as sv
from notebooks.train.GroundingDINO.groundingdino.util.inference import load_model, load_image, predict, annotate
import warnings

warnings.filterwarnings("ignore")

ModuleNotFoundError: No module named 'groundingdino'

### model load

In [2]:
model = load_model(
    "/root/twkim/GroundingDINO/groundingdino/config/GroundingDINO_SwinB_cfg.py",
    "/root/twkim/groundingdino_swinb_cogcoor.pth",
)

final text_encoder_type: bert-base-uncased


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### def

In [3]:
def post_process_result(
    source_h: int, source_w: int, boxes: torch.Tensor, logits: torch.Tensor
) -> sv.Detections:
    boxes = boxes * torch.Tensor([source_w, source_h, source_w, source_h])
    xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()
    confidence = logits.numpy()
    return sv.Detections(xyxy=xyxy, confidence=confidence)

### image list & cfg

In [4]:
IMAGE_LIST = natsorted(glob.glob("/root/twkim/test/*.png"))
TEXT_PROMPT = "car"
BOX_TRESHOLD = 0.35
TEXT_TRESHOLD = 0.25

In [5]:
path = "/root/dacon/cars/data/annotated"
crop = "/root/dacon/cars/data/test_crop"

if not os.path.exists(path):
    os.makedirs(path)
    print("ok1")
if not os.path.exists(crop):
    os.makedirs(crop)
    print("ok2")

ok2


### image load & inference

In [6]:
def post_process_result(
    source_h: int, source_w: int, boxes: torch.Tensor, logits: torch.Tensor
) -> sv.Detections:
    boxes = boxes * torch.Tensor([source_w, source_h, source_w, source_h])
    xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()
    confidence = logits.numpy()
    return sv.Detections(xyxy=xyxy, confidence=confidence)

In [7]:
!pwd

/root/twkim


In [25]:
results = pd.read_csv("/root/twkim/sample_submission.csv")

for IMAGE_PATH in tqdm(IMAGE_LIST, desc=f"bbox_calc"):
    image_source, image = load_image(IMAGE_PATH)
    name = IMAGE_PATH.split("/")[-1].split(".")[0]

    boxes, logits, phrases = predict(
        model=model,
        image=image,
        caption=TEXT_PROMPT,
        box_threshold=BOX_TRESHOLD,
        text_threshold=TEXT_TRESHOLD,
    )

    detections = post_process_result(
        source_h=image_source.shape[0],
        source_w=image_source.shape[1],
        boxes=boxes,
        logits=logits,
    )

    for idx, conf in enumerate(detections.confidence):
        crop_image = image_source[
            int(detections.xyxy[idx][1]) : int(detections.xyxy[idx][3]),
            int(detections.xyxy[idx][0]) : int(detections.xyxy[idx][2]),
        ]

        # with open(crop + f"/{name}_{idx}_{int(conf*100)}.pkl", "wb") as f:
        #     pickle.dump(crop_image, f)
        # np.save(crop + f"/{name}_{idx}_{int(conf*100)}.npy", crop_image)

        x1, y1, x2, y2 = detections.xyxy[idx]
        results.loc[len(results)] = {
            "file_name": f"{name}.png",
            "class_id": np.nan,
            "confidence": conf,
            "point1_x": x1,
            "point1_y": y1,
            "point2_x": x2,
            "point2_y": y1,
            "point3_x": x2,
            "point3_y": y2,
            "point4_x": x1,
            "point4_y": y2,
        }

    # annotated_frame = annotate(image_source=image_source, boxes=boxes, logits=logits, phrases=phrases)
    # cv2.imwrite(path + f"/{name}_annotated_image.png", annotated_frame)

    # 결과를 CSV 파일로 저장
    results.to_csv("./bbox_submit_npy_재현.csv", index=False)

bbox_calc:   1%|          | 26/3400 [00:09<19:49,  2.84it/s]

In [None]:
# load pkl
with open("/root/dacon/cars/data/crop/070638309_90.pkl", "rb") as f:
    load_image = pickle.load(f)

In [None]:
# annotate image with detections
box_annotator = sv.BoxAnnotator()
labels = [
    f"{phrases[0]} {confidence:0.2f}" 
    for _, _, confidence, _, _ 
    in detections]
annotated_frame = box_annotator.annotate(scene=image_source.copy(), detections=detections, labels=labels)

%matplotlib inline
sv.plot_image(annotated_frame, (16, 16))