In [1]:
import gc
from collections import deque
import datetime
import logging
import threading
import time

import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import torch.nn as nn
import ultralytics
from ultralytics import YOLO

from utils.models import Detector, EmbeddingModel
from utils import common
from utils.losses import v8DetectionLoss, torch_square_norm
from utils.ops import non_max_suppression
from utils.dataset import *
from utils.tal import TaskAlignedAssigner

2025-01-26 18:57:05.115011: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-26 18:57:05.115111: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-26 18:57:05.177373: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-26 18:57:05.293824: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [44]:
X_train = pd.read_csv("datasets/train.csv")
id_list = X_train["id"].tolist()
prefix = "datasets/card_images_small/"
id_list = list(map(lambda x: prefix + str(x) + ".jpg", id_list))
card_type = list(map(lambda x: x.lower().startswith("pendulum"), X_train["type"].tolist()))
train_dataset = DecklistDataset(id_list, card_type)
del X_train, card_type, id_list

In [2]:
X_valid = pd.read_csv("datasets/valid.csv")
id_list = X_valid["id"].tolist()
prefix = "datasets/card_images_small/"
id_list = list(map(lambda x: prefix + str(x) + ".jpg", id_list))
card_type = list(map(lambda x: x.lower().startswith("pendulum"), X_valid["type"].tolist()))
valid_dataset = DecklistDataset(id_list, card_type)
del X_valid, card_type, id_list

In [22]:
state_dict = torch.load("best 278.pt")
model = Detector(1000)
model.load_state_dict(state_dict)

<All keys matched successfully>

In [23]:
teach_preprocess = common.EmbeddingPreprocessor()
teacher_model = EmbeddingModel()
teacher_model.load("weights/embedding.h5")



In [24]:
valid_dataset.shuffle()
test_image, test_label = next(iter(valid_dataset))

In [25]:
cv2.imwrite("test.png", test_image[:,:,::-1])

True

In [26]:
device = next(model.parameters()).device
torch_inputs = common.detector_preprocess(test_image).to(device=device)

In [27]:
pred_detect, pred_embedding = model(torch_inputs.unsqueeze(0))

# post processing
pred_embedding = [
    xi.view(pred_embedding[0].shape[0], pred_embedding[0].shape[1], -1) for xi in pred_embedding
]
pred_embedding = torch.cat(pred_embedding, 2)
pred_embedding = pred_embedding.transpose(-1, -2)

pred_bbox = model.layer22._inference(pred_detect)
bbox_pos, bbox_mask = non_max_suppression(pred_bbox)

bbox_pred = bbox_pos[bbox_mask]
embedding = pred_embedding[bbox_mask]

In [30]:
x1, y1, x2, y2

(0, 27, 21, 0)

In [29]:
teacher_batch_size = 8
embedding_preprocessor = common.EmbeddingPreprocessor()
for i, indexes in enumerate(
    common.make_batch(range(bbox_pred.shape[0]), teacher_batch_size)):
    teacher_inputs = []
    for index in indexes:
        bbox = bbox_pred[index]
        image_index = int(bbox[0].item())
        bbox = bbox[1:5].clamp_(min=0).detach().cpu().numpy()
        x1, y1, x2, y2 = np.round(bbox).astype(np.int32)
        crop_img = test_image[y1:y2, x1:x2, :]
        crop_img = embedding_preprocessor(crop_img)
        teacher_inputs.append(crop_img)
    teacher_inputs = tf.stack(teacher_inputs, axis=0)
    if len(teacher_inputs) == 0:
        continue
    teacher_embedding = teacher_model.pred(teacher_inputs, len(teacher_inputs))
    teacher_embedding = torch.from_numpy(teacher_embedding)
    teacher_embedding = teacher_embedding.to(
        dtype=embedding.dtype, device=embedding.device
    )
    loss = torch_square_norm(teacher_embedding, embedding[[indexes]])
    print(loss)

InvalidArgumentError: Exception encountered when calling layer 'resizing_5' (type Resizing).

{{function_node __wrapped__ResizeBilinear_device_/job:localhost/replica:0/task:0/device:GPU:0}} input image must be of non-zero size [Op:ResizeBilinear] name: 

Call arguments received by layer 'resizing_5' (type Resizing):
  • inputs=tf.Tensor(shape=(0, 21, 3), dtype=uint8)

In [None]:
for position in bbox_pred:
    x1, y1, x2, y2 = position[:4]
    

In [19]:
bbox_pred

tensor([[ 7.8062e+01,  0.0000e+00,  1.2840e+02,  1.6950e+01,  1.9036e-03],
        [ 9.6674e+01,  0.0000e+00,  1.4357e+02,  1.5850e+01,  2.2066e-03],
        [ 2.5973e+02,  0.0000e+00,  3.0351e+02,  1.4969e+01,  4.9136e-03],
        [ 3.3057e+02,  0.0000e+00,  3.7658e+02,  1.5358e+01,  5.4335e-03],
        [ 4.4765e+02,  0.0000e+00,  4.9815e+02,  1.4845e+01,  4.2637e-03],
        [ 4.8040e+02,  0.0000e+00,  5.2959e+02,  1.5213e+01,  3.8684e-03],
        [ 5.3193e+02,  2.3260e-03,  5.9810e+02,  1.4273e+01,  2.7017e-03],
        [ 5.5071e+02,  0.0000e+00,  6.1921e+02,  1.3174e+01,  2.0581e-03],
        [ 2.8265e+01,  6.1470e-02,  1.0735e+02,  4.7189e+01,  2.3038e-04],
        [ 1.0432e+02,  3.4046e-01,  1.4465e+02,  2.7698e+01,  6.0792e-04],
        [ 1.8566e+02,  7.8014e-01,  2.2426e+02,  2.5194e+01,  9.2594e-04],
        [ 2.1035e+02,  9.0075e-01,  2.4697e+02,  2.4873e+01,  1.2091e-03],
        [ 3.2170e+02,  3.1931e-01,  3.6427e+02,  2.6695e+01,  1.7295e-03],
        [ 3.3751e+02,  2.