In [1]:
import pprint

import math
import os
import warnings

import argparse

import glob
import numpy as np
import scipy
from scipy.spatial.transform import Rotation as rotation_util
import tensorflow as tf
import torch
import time
import tqdm

import objectron.dataset.iou as IoU3D
import objectron.dataset.box as Box

# We incorporate confidence into the AP calculation
# METRIC_UPDATED = False
METRIC_UPDATED = True
if METRIC_UPDATED:
    import objectron.dataset.metrics_nvidia as metrics
else:
    import objectron.dataset.metrics as metrics

import objectron.dataset.parser as parser

import sys
sys.path.insert(0, '../..')

from lib.utils.pnp.cuboid_pnp_shell import pnp_shell

import shutil
import simplejson as json

from lib.detectors.detector_factory import detector_factory
from lib.opts import opts

import cv2

from eval_opts import eval_opts
from eval_utils import draw_axes

_MAX_PIXEL_ERROR = 0.1
_MAX_AZIMUTH_ERROR = 30.
_MAX_POLAR_ERROR = 20.
_MAX_SCALE_ERROR = 2.
_MAX_DISTANCE = 1.0  # In meters
_NUM_BINS = 21

dimension_ref = {
    'bike': [[0.65320896, 1.021797894, 1.519635599, 0.6520559199, 1.506392621],
             [0.1179380561, 0.176747817, 0.2981715678, 0.1667947895, 0.3830536275]],
    'book': [[0.225618019, 0.03949624326, 0.1625821624, 7.021850281, 5.064694187],
             [0.1687487664, 0.07391230822, 0.06436673199, 3.59629568, 2.723290812]],
    'bottle': [
        [0.07889784977450116, 0.24127451915330908, 0.0723714257114412, 0.33644069262302545, 0.3091134992864717, ],
        [0.02984649578071775, 0.06381390122918497, 0.03088144838560917, 0.11052240441921059,
         0.13327627592012867, ]],
    'camera': [[0.11989848700326843, 0.08226238775595619, 0.09871718158089632, 1.507216484439368, 1.1569407159290284, ],
               [0.021177290310316968, 0.02158788017191602, 0.055673710278419844, 0.28789183678046854,
                0.5342094080365904, ]],
    'cereal_box': [
        [0.19202754401417296, 0.2593114001714919, 0.07723794925413519, 0.7542602699204104, 0.29441151268928173, ],
        [0.08481640897407464, 0.09999915952084068, 0.09495429981036707, 0.19829004029411457, 0.2744797990483879, ]],
    'chair': [[0.5740664085137888, 0.8434027515832329, 0.6051523831888338, 0.6949691013776601, 0.7326891354260606, ],
              [0.12853104253707456, 0.14852086453095492, 0.13428881418587957, 0.16897092539619352,
               0.18636134566748525, ]],
    'cup': [[0.08587637391801063, 0.12025228955138188, 0.08486836104868696, 0.7812126934904675, 0.7697895244331658, ],
            [0.05886805978497525, 0.06794896438246326, 0.05875681990718713, 0.2887038681446475, 0.283821205157399, ]],
    'mug': [[0.14799136566553112, 0.09729087667918128, 0.08845449667169905, 1.3875694883045138, 1.0224997119392225, ],
            [1.0488828523223728, 0.2552672927963539, 0.039095350310480705, 0.3947832854104711, 0.31089415283872546, ]],
    'laptop': [[0.33685059747485196, 0.1528068814247063, 0.2781020624738614, 35.920214652427696, 23.941173992376903, ],
               [0.03529983948867832, 0.07017080198389423, 0.0665823136876069, 391.915687801732, 254.21325950495455, ]],
    'shoe': [[0.10308848289662519, 0.10932616184503478, 0.2611737789760352, 1.0301976264129833, 2.6157393112424328, ],
             [0.02274768925924402, 0.044958380226590516, 0.04589720205423542, 0.3271000267177176,
              0.8460337534776092, ]],
}

epnp_alpha_default = np.array([4.0, -1.0, -1.0, -1.0, 2.0, -1.0, -1.0, 1.0, 2.0,
                               -1.0, 1.0, -1.0, 0.0, -1.0, 1.0, 1.0, 2.0, 1.0, -1.0, -1.0,
                               0.0, 1.0, -1.0, 1.0, 0.0, 1.0, 1.0, -1.0, -2.0, 1.0, 1.0,
                               1.0]).reshape(8, 4)


2024-10-21 05:07:10.376065: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-10-21 05:07:10.388332: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-10-21 05:07:10.391880: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-21 05:07:10.401609: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.4.1+cu121


### 이미지 경로 txt 파일생성

In [60]:
# base_dir = '/home/custom_dataset/box_folders'
# output_file = base_dir + '/image_paths.txt'

# with open(output_file, 'w') as f:
#     for root, dirs, files in tqdm.tqdm(os.walk(base_dir)):
#         if '_output' in root:
#             for file in files:
#                 if file.endswith('.jpg'):
#                     full_path = os.path.join(root, file)
#                     f.write(full_path + '\n')

# print(f'이미지 경로가 {output_file}에 저장되었습니다.')

In [61]:
# 이미지 경로가 저장된 txt 파일 읽기
image_paths_file = '/home/custom_dataset/box_folders/image_paths.txt'

# 빈 리스트 생성
image_paths = []

# txt 파일을 열어서 각 줄을 리스트에 추가
with open(image_paths_file, 'r') as f:
    for line in f:
        image_paths.append(line.strip())

print(f'총 {len(image_paths)}개의 이미지 경로를 읽었습니다.')
print(f'첫 번째 이미지 경로: {image_paths[0]}')
print(f'마지막 이미지 경로: {image_paths[-1]}')

총 275개의 이미지 경로를 읽었습니다.
첫 번째 이미지 경로: /home/custom_dataset/box_folders/box_tea_1/_output/0005.jpg
마지막 이미지 경로: /home/custom_dataset/box_folders/box_tissue_2/_output/0001.jpg


### 자체 Dataset 생성

In [62]:
from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self, image_paths):
        self.image_paths = image_paths

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        json_path = image_path.replace('.jpg', '.json')
        
        with open(json_path, 'r') as f:
            json_data = json.load(f)
            xy_dict = json_data['labelingInfo'][0]['3DBox']['location'][0]
            label = [[int(xy_dict[f"x{i}"]), int(xy_dict[f"y{i}"])] for i in [9]+list(range(1, 9))]

        return image_path, label



In [63]:
my_dataset = CustomDataset(image_paths)
coords_2d = my_dataset[0][1]
print(coords_2d)

[[1237, 559], [1086, 551], [1344, 599], [1332, 687], [1086, 633], [1160, 431], [1389, 464], [1385, 533], [1158, 494]]


In [64]:
# Default param setting for opt_eval
opt_eval = eval_opts().parser.parse_args([])

# Default param setting for opt_detector
opt_detector = opts().parser.parse_args([])

In [65]:
# Todo: Some important settings to change, should be commented if not using Pycharm but .sh instead
opt_eval.outf = 'custom_eval/'
# opt_eval.eval_num_symmetry = 1
# opt_eval.eval_confidence_thresh=0.3
opt_eval.eval_debug_save_thresh = 100
opt_eval.eval_max_num = len(my_dataset)
opt_detector.nms = True

In [66]:
# Read the subset, default is False
if opt_eval.eval_subset == True:
    json_path = 'selected_frames.json'
    with open(json_path) as f:
        data_json = json.load(f)
    opt_eval.eval_subset_list = list(map(int, data_json[opt_eval.eval_c]))
    opt_eval.eval_subset_list.sort()
else:
    opt_eval.eval_subset_list = None

# For debug purpose
# opt_eval.eval_skip=35 # Run images on [eval_skip, eval_max_num]
# opt_detector.batch_size=1
# opt_eval.eval_debug = True # Whether to save img for debug
opt_eval.eval_debug_json = True  # Whether to save json for debug
opt_eval.eval_debug_clean = True

opt_detector.debug = 0  # do not save extra visualization in demo/ for debug, e.g., heatmap
# opt_detector.debug = 6  # save extra visualization in demo/ for debug, e.g., heatmap

# Objectron paper https://arxiv.org/abs/2012.09988 assumes mug is also symmetric, for fair comparison we also have this option
opt_eval.eval_mug_symmetric = True

# True: only evaluate mug case, False: only evaluate cup case, None: Evaluate them all
opt_eval.mug_only = None

In [67]:
# opt_eval.eval_c = 'cereal_box' # default 'bike'
opt_eval.eval_c = 'chair'

opt_detector.c = opt_eval.eval_c

opt_eval.eval_arch = 'dlav1_34' # default 'dla_34'
opt_detector.arch = opt_eval.eval_arch

opt_detector.rep_mode = opt_eval.eval_rep_mode
opt_detector.vis_thresh = opt_eval.eval_confidence_thresh

In [68]:
# No symmetry
if 'v1' in opt_detector.arch:
    opt_eval.report_file = f'{opt_detector.c}_v1_report_{opt_eval.eval_confidence_thresh}.txt'
    opt_detector.load_model = f"../../../models/CenterPose/{opt_detector.c}_v1_{opt_eval.eval_weight_id}.pth"
else:
    opt_eval.report_file = f'{opt_detector.c}_report_{opt_eval.eval_confidence_thresh}.txt'
    opt_detector.load_model = f"../../../models/CenterPose/{opt_detector.c}_{opt_eval.eval_weight_id}.pth"


In [69]:
print(opt_eval.eval_save_id)
print(opt_eval.eval_rep_mode)

0
1


In [70]:
# Some exp naming rules
opt_eval.eval_save_id = opt_eval.eval_rep_mode
if opt_detector.nms == True:
    opt_eval.report_file = os.path.splitext(opt_eval.report_file)[0] + '_nms.txt'
if opt_detector.rep_mode == 0:
    opt_eval.report_file = os.path.splitext(opt_eval.report_file)[0] + '_8rep.txt'
elif opt_detector.rep_mode == 1:
    opt_eval.report_file = os.path.splitext(opt_eval.report_file)[0] + '_16rep.txt'
elif opt_detector.rep_mode == 2:
    opt_eval.report_file = os.path.splitext(opt_eval.report_file)[0] + '_samplerep.txt'
elif opt_detector.rep_mode == 3:
    opt_eval.report_file = os.path.splitext(opt_eval.report_file)[0] + '_disrep.txt'
elif opt_detector.rep_mode == 4:
    opt_eval.report_file = os.path.splitext(opt_eval.report_file)[0] + '_hmrep.txt'

if opt_eval.eval_arch == 'dla_34':
    opt_eval.eval_save_id = 5
if opt_eval.eval_MobilePose_postprocessing == True:
    opt_eval.report_file = os.path.splitext(opt_eval.report_file)[0] + '_MobilePose.txt'
    opt_eval.eval_save_id = 6
if opt_eval.eval_gt_scale == True:
    opt_eval.report_file = os.path.splitext(opt_eval.report_file)[0] + '_gtscale.txt'
    opt_eval.eval_save_id = 7
if opt_eval.eval_mug_symmetric == False:
    opt_eval.report_file = os.path.splitext(opt_eval.report_file)[0] + '_partsymmetry.txt'
    opt_eval.eval_save_id = 8

opt_eval.report_file = os.path.splitext(opt_eval.report_file)[0] + f'_{opt_eval.eval_max_num}.txt'
opt_eval.report_file = os.path.splitext(opt_eval.report_file)[0] + f'_{opt_eval.eval_exp_id}.txt'
opt_eval.report_file = os.path.splitext(opt_eval.report_file)[0] + f'_{opt_eval.eval_weight_id}.txt'

if opt_eval.eval_debug == True or opt_eval.eval_debug_json == True:
    if opt_eval.eval_debug_clean == True and opt_eval.eval_continue != True:
        # Clean up debug/
        if os.path.isdir(f'{opt_eval.outf}/{opt_detector.c}_{opt_eval.eval_save_id}'):
            shutil.rmtree(f'{opt_eval.outf}/{opt_detector.c}_{opt_eval.eval_save_id}')
        # Clean up demo/
        if os.path.exists(
                os.path.join('demo/', f'{os.path.splitext(os.path.basename(opt_detector.load_model))[0]}')):
            shutil.rmtree(
                os.path.join('demo/', f'{os.path.splitext(os.path.basename(opt_detector.load_model))[0]}'))

    if os.path.isdir(f'{opt_eval.outf}'):
        print(f'folder {opt_eval.outf}/ exists')
    else:
        os.mkdir(f'{opt_eval.outf}')
        print(f'created folder {opt_eval.outf}/')

    if os.path.isdir(f'{opt_eval.outf}/{opt_detector.c}_{opt_eval.eval_save_id}'):
        print(f'folder {opt_eval.outf}/{opt_detector.c}_{opt_eval.eval_save_id} exists')
    else:
        os.mkdir(f'{opt_eval.outf}/{opt_detector.c}_{opt_eval.eval_save_id}')
        print(f'created folder {opt_eval.outf}/{opt_detector.c}_{opt_eval.eval_save_id}')

opt_detector.obj_scale = True
opt_detector.use_pnp = True

opt_detector = opts().parse(opt_detector)
opt_detector = opts().init(opt_detector)
opt_detector.eval_data = f'/{opt_detector.c}/{opt_detector.c}_test*'

opt_combined = argparse.Namespace(**vars(opt_eval), **vars(opt_detector))


folder custom_eval// exists
created folder custom_eval//chair_1
Fix size testing.
training chunk_sizes: [32]
The output will be saved to  /home/CenterPose/src/tools/objectron_eval/../../lib/../../exp/object_pose/default
heads {'hm': 1, 'wh': 2, 'hps': 16, 'reg': 2, 'hm_hp': 8, 'hp_offset': 2, 'scale': 3}


In [71]:
print(len(vars(opt_combined)))
# 각 속성과 값을 키의 사전순으로 출력
for attr, value in sorted(vars(opt_combined).items()):
    print(f"{attr}: {value}")

188
K: 100
KL_kps_uncertainty: 0.1
KL_scale_uncertainty: 0.1
R: 20
arch: dlav1_34
aug_rot: 0
balance_coefficient: {'bike': 2, 'book': 2, 'bottle': 2, 'camera': 2, 'cereal_box': 2, 'chair': 2, 'cup': 2, 'mug': 2, 'laptop': 2, 'shoe': 2}
batch_size: 32
c: chair
cam_intrinsic: None
center_3D: False
center_thresh: 0.3
chunk_sizes: [32]
conf_border: {'bike': [3, 9], 'book': [3, 9], 'bottle': [3, 9], 'camera': [3, 9], 'cereal_box': [3, 9], 'chair': [3, 9], 'cup': [3, 9], 'mug': [3, 9], 'laptop': [3, 9], 'shoe': [3, 9]}
data_dir: /home/CenterPose/src/tools/objectron_eval/../../lib/../../data
data_generation_mode_ratio: 0
dataset: objectron
debug: 0
debug_dir: /home/CenterPose/src/tools/objectron_eval/../../lib/../../exp/object_pose/default/debug
debugger_theme: white
demo: 
demo_save: ../demo/
dense_hp: False
down_ratio: 4
empty_pre_hm: False
eval_CenterPose_initialization: False
eval_MobilePose_postprocessing: False
eval_R: 20
eval_add_noise: False
eval_arch: dlav1_34
eval_c: chair
eval_conf

In [81]:
from lib.utils.debugger import Debugger
from lib.detectors.object_pose import ObjectPoseDetector
from lib.utils.pnp.cuboid_pnp_shell import pnp_shell_alter
class MyObjectPoseDetector(ObjectPoseDetector):
    def __init__(self, opt):
        super().__init__(opt)

    def run(self, image_or_path_or_tensor, filename=None, meta_inp={}, preprocessed_flag=False):
        load_time, pre_time, net_time, dec_time, post_time = 0, 0, 0, 0, 0
        merge_time, track_time, pnp_time, tot_time = 0, 0, 0, 0
        debugger = Debugger(dataset=self.opt.dataset, ipynb=(self.opt.debug == 3),
                            theme=self.opt.debugger_theme)
        start_time = time.time()

        # pre_processed = False
        pre_processed = preprocessed_flag

        # File input
        if isinstance(image_or_path_or_tensor, np.ndarray):
            # For eval or for CenterPose as data generator
            image = image_or_path_or_tensor

            # We usually use image_or_path_or_tensor to represent filename
            if filename is not None:
                image_or_path_or_tensor = filename

        # String input
        elif type(image_or_path_or_tensor) == type(''):
            # For demo
            image = cv2.imread(image_or_path_or_tensor)
        else:
            # Not used yet
            image = image_or_path_or_tensor['image'][0].numpy()
            pre_processed_images = image_or_path_or_tensor
            pre_processed = True

        loaded_time = time.time()
        load_time += (loaded_time - start_time)

        detections = []
        for scale in self.scales:
            scale_start_time = time.time()
            if not pre_processed:
                images, meta = self.pre_process(image, scale, meta_inp)
            else:

                # Used for data generation
                # 1 * 3 * 512 * 512
                images = np.expand_dims(image, axis=0)
                # images = image.reshape(1, 3, meta_inp['inp_height'], meta_inp['inp_width'])
                images = torch.from_numpy(images)
                meta = meta_inp

            images = images.to(self.opt.device)

            # initializing tracker
            pre_hms, pre_hm_hp, pre_inds = None, None, None

            if self.opt.refined_Kalman:
                self.tracker.init_track(
                    meta)

            if self.opt.tracking_task:
                # initialize the first frame
                if self.pre_images is None:
                    print('Initialize tracking!')
                    self.pre_images = images
                    self.tracker.init_track(
                        meta)

                # Initialize if given gt_pre_hm_hmhp
                elif self.opt.gt_pre_hm_hmhp or (self.opt.gt_pre_hm_hmhp_first and meta['id'] == 0):
                    self.tracker.init_track(
                        meta)

                if self.opt.pre_hm or self.opt.pre_hm_hp:
                    # render input heatmap from tracker status
                    # pre_inds is not used in the current version.
                    # We used pre_inds for learning an offset from previous image to
                    # the current image.
                    pre_hms, pre_hm_hp, pre_inds = self._get_additional_inputs(
                        self.tracker.tracks, meta, with_hm=self.opt.pre_hm, with_hm_hp=self.opt.pre_hm_hp)

            torch.cuda.synchronize()
            pre_process_time = time.time()
            pre_time += pre_process_time - scale_start_time

            # run the network
            # output: the output feature maps, only used for visualizing
            # dets: output tensors after extracting peaks
            output, dets, forward_time = self.process(
                images, self.pre_images, pre_hms, pre_hm_hp, pre_inds, return_time=True)

            torch.cuda.synchronize()
            net_time += forward_time - pre_process_time
            decode_time = time.time()
            dec_time += decode_time - forward_time

            if self.opt.debug >= 2:
                # Mainly save keypoint heatmap & displacement for debug
                self.debug(debugger, images, copy.deepcopy(dets), output, scale, pre_hms, pre_hm_hp)

            # convert the cropped and 4x downsampled output coordinate system
            # back to the input image coordinate system
            dets = self.post_process(dets, meta, scale)
            torch.cuda.synchronize()
            post_process_time = time.time()
            post_time += post_process_time - decode_time

            detections.append(dets)

        # Mainly apply NMS
        results = self.merge_outputs(detections)
        torch.cuda.synchronize()
        merge_outputs_time = time.time()
        merge_time += merge_outputs_time - post_process_time

        # print("results(detector.run().merge_outputs(), after nms) : ", )
        # print("len(results) : ", len(results))
        # pprint.pprint(results[0])


        # The goal is to get 2d projection of keypoints & 6-DoF & 3d keypoint in camera frame
        boxes = []
        if self.opt.use_pnp == True:

            for bbox in results:
                # Point processing according to different rep_modes
                if self.opt.rep_mode == 0 or self.opt.rep_mode == 3 or self.opt.rep_mode == 4:

                    # 8 representation from centernet
                    points = [(x[0], x[1]) for x in np.array(bbox['kps']).reshape(-1, 2)]
                    points_filtered = points

                elif self.opt.rep_mode == 1:

                    # 16 representation
                    points_1 = np.array(bbox['kps_displacement_mean']).reshape(-1, 2)
                    points_1 = [(x[0] / meta['width'], x[1] / meta['height']) for x in points_1] 
                    
                    points_2 = np.array(bbox['kps_heatmap_mean']).reshape(-1, 2)
                    points_2 = [(x[0], x[1]) for x in points_2]
                    
                    points = np.hstack((points_1, points_2)).reshape(-1, 2)
                    points_filtered = points

                elif self.opt.rep_mode == 2:

                    points = []

                    N_sample = 20

                    confidence_list = []
                    dis_list = []
                    weight_list = []

                    keypoint_heatmap_mean_list = []
                    keypoint_heatmap_std_list = []

                    keypoint_displacement_mean_list = []
                    keypoint_displacement_std_list = []

                    GMM_list = []

                    for i in range(8):

                        # Normalized L2
                        keypoint_displacement_norm = np.array(
                            [bbox['kps_displacement_mean'][i * 2] / meta['width'],
                             bbox['kps_displacement_mean'][i * 2 + 1] / meta['height']])
                        keypoint_heatmap_norm = np.array(
                            [bbox['kps_heatmap_mean'][i * 2] / meta['width'],
                             bbox['kps_heatmap_mean'][i * 2 + 1] / meta['height']])
                        dis = np.linalg.norm(keypoint_displacement_norm - keypoint_heatmap_norm)

                        confidence_list.append(bbox['kps_heatmap_height'][i])
                        dis_list.append(dis)

                        def gaussian(dist, sigma=10.):
                            return math.e ** (-dist ** 2 / 2 / sigma ** 2)

                        # Calculate new weight list according to confidence & gaussian distribution on dis
                        weight_list.append(confidence_list[i] * gaussian(dis))

                        # 1. Heatmap
                        keypoint_heatmap_mean = [bbox['kps_heatmap_mean'][i * 2], bbox['kps_heatmap_mean'][i * 2 + 1]]
                        keypoint_heatmap_std = [bbox['kps_heatmap_std'][i * 2], bbox['kps_heatmap_std'][i * 2 + 1]]

                        # 2. Displacement
                        kps_displacement_mean = [bbox['kps_displacement_mean'][i * 2],
                                                 bbox['kps_displacement_mean'][i * 2 + 1]]
                        kps_displacement_std = keypoint_heatmap_std

                        # Fit a GMM by sampling from keypoint_displacement &  keypoint_heatmap distributions
                        X_train = []
                        if keypoint_heatmap_mean[0] < -5000 or keypoint_heatmap_mean[1] < -5000:
                            kps_displacement_std = [5, 5]
                            points_sample = np.random.multivariate_normal(
                                np.array(kps_displacement_mean),
                                np.array([[kps_displacement_std[0], 0], [0, kps_displacement_std[1]]]), size=1000)
                            X_train.append(points_sample)
                        else:
                            points_sample = np.random.multivariate_normal(
                                np.array(keypoint_heatmap_mean),
                                np.array([[keypoint_heatmap_mean[0], 0], [0, keypoint_heatmap_mean[1]]]), size=500)
                            X_train.append(points_sample)

                            points_sample = np.random.multivariate_normal(
                                np.array(kps_displacement_mean),
                                np.array([[kps_displacement_std[0], 0], [0, kps_displacement_std[1]]]), size=500)
                            X_train.append(points_sample)

                        keypoint_heatmap_mean_list.append(keypoint_heatmap_mean)
                        keypoint_heatmap_std_list.append(keypoint_heatmap_std)
                        keypoint_displacement_mean_list.append(kps_displacement_mean)
                        keypoint_displacement_std_list.append(kps_displacement_std)

                        X_train = np.array(X_train).reshape(-1, 2)
                        clf = mixture.GaussianMixture(n_components=2, covariance_type='full')
                        clf.fit(X_train)
                        GMM_list.append(clf)

                        points_sample = clf.sample(N_sample)
                        points_sample = np.hstack((points_sample[0], np.array(points_sample[1]).reshape(-1, 1)))
                        points.append(points_sample)

                    points = np.array(points).reshape(-1, 3)
                    # Do not need labels for pnp
                    points_filtered = points[:, 0:2]

                # ret = pnp_shell(self.opt, meta, bbox, points_filtered, bbox['obj_scale'], OPENCV_RETURN=self.opt.show_axes)
                ret = pnp_shell_alter(self.opt, meta, bbox, points_filtered, bbox['obj_scale'], OPENCV_RETURN=self.opt.show_axes)

                if ret is not None:
                    boxes.append(ret)

        pnp_process_time = time.time()
        pnp_time += pnp_process_time - merge_outputs_time

        # Tracker update
        if self.opt.tracking_task:
            results, boxes = self.tracker.step(results, boxes)
            self.pre_images = images
        # For baseline (CenterPose + Kalman)
        elif self.opt.refined_Kalman:
            results, boxes = self.tracker.step(results, boxes)

        end_time = time.time()
        track_time += end_time - pnp_process_time
        tot_time += end_time - start_time

        # Dict is for output debug
        dict_out = {"camera_data": [], "objects": []}
        if 'camera_matrix' in meta:
            camera_matrix = meta['camera_matrix']
            dict_out['camera_data'] = camera_matrix.tolist()

        # if self.opt.tracking_task or self.opt.refined_Kalman:
        #     for track in self.tracker.tracks:
        #         # Basic part
        #         dict_obj = {
        #             'class': self.opt.c,
        #             'ct': track['ct'],
        #             'bbox': np.array(track['bbox']).tolist(),
        #             'confidence': track['score'],
        #             'kps_displacement_mean': track['kps_displacement_mean'].tolist(),
        #             'kps_heatmap_mean': track['kps_heatmap_mean'].tolist(),

        #             'kps_heatmap_std': track['kps_heatmap_std'].tolist(),
        #             'kps_heatmap_height': track['kps_heatmap_height'].tolist(),
        #             'obj_scale': (track['obj_scale']/track['obj_scale'][1]).tolist(),

        #             'tracking_id':track['tracking_id'],
        #         }

        #         # Optional part
        #         if self.opt.use_pnp:
        #             if 'location' in track:
        #                 dict_obj['location'] = track['location']
        #                 dict_obj['quaternion_xyzw'] = track['quaternion_xyzw'].tolist()
        #             if 'kps_pnp' in track:
        #                 dict_obj['kps_pnp'] = track['kps_pnp'].tolist()
        #                 dict_obj['kps_3d_cam'] = track['kps_3d_cam'].tolist()

        #         if self.opt.obj_scale_uncertainty:
        #             dict_obj['obj_scale_uncertainty'] = track['obj_scale_uncertainty'].tolist()

        #         if self.opt.kalman:
        #             dict_obj['kps_mean_kf'] = track['kps_mean_kf'].tolist()
        #             dict_obj['kps_std_kf'] = track['kps_std_kf']
        #             if self.opt.use_pnp and 'kps_pnp_kf' in track:
        #                 dict_obj['kps_pnp_kf'] = track['kps_pnp_kf'].tolist()
        #                 dict_obj['kps_3d_cam_kf'] = track['kps_3d_cam_kf'].tolist()

        #         if self.opt.scale_pool == True:
        #             dict_obj['obj_scale_kf'] = (track['obj_scale_kf']/track['obj_scale_kf'][1]).tolist()
        #             dict_obj['obj_scale_uncertainty_kf'] = track['obj_scale_uncertainty_kf'].tolist()

        #         if self.opt.hps_uncertainty:
        #             dict_obj['kps_displacement_std'] = track['kps_displacement_std'].tolist()
        #             dict_obj['kps_fusion_mean'] = track['kps_fusion_mean'].tolist()
        #             dict_obj['kps_fusion_std'] = track['kps_fusion_std'].tolist()

        #         if self.opt.tracking:
        #             dict_obj['tracking'] = track['tracking'].tolist()
        #         if self.opt.tracking_hp:
        #             dict_obj['tracking_hp'] = track['tracking_hp'].tolist()

        #         dict_out['objects'].append(dict_obj)
        # else:
        #     for box in boxes:
        #         # Basic part
        #         dict_obj = {
        #             'class': self.opt.c,
        #             'ct': box[4]['ct'],
        #             'bbox': np.array(box[4]['bbox']).tolist(),
        #             'confidence': box[4]['score'],
        #             'kps_displacement_mean': box[4]['kps_displacement_mean'].tolist(),
        #             'kps_heatmap_mean': box[4]['kps_heatmap_mean'].tolist(),

        #             'kps_heatmap_std': box[4]['kps_heatmap_std'].tolist(),
        #             'kps_heatmap_height': box[4]['kps_heatmap_height'].tolist(),
        #             'obj_scale': box[4]['obj_scale'].tolist(),
        #         }

        #         # Optional part
        #         if self.opt.use_pnp:
        #             if 'location' in box[4]:
        #                 dict_obj['location'] = box[4]['location']
        #                 dict_obj['quaternion_xyzw'] = box[4]['quaternion_xyzw'].tolist()
        #             if 'kps_pnp' in box[4]:
        #                 dict_obj['kps_pnp'] = box[4]['kps_pnp'].tolist()
        #                 dict_obj['kps_3d_cam'] = box[4]['kps_3d_cam'].tolist()

        #         dict_out['objects'].append(dict_obj)


        # if self.opt.debug >= 1 and self.opt.debug < 4:
        #     self.show_results(debugger, image, results)

        # # Saving path is specific for demo folder structure
        # elif self.opt.debug == 4:
        #     self.save_results(debugger, image, results, image_or_path_or_tensor, dict_out)

        # # Saving path is specific for evaluation
        # elif self.opt.debug == 6:
        #     self.save_results_eval(debugger, image, results, image_or_path_or_tensor, dict_out)

        # Save results for debug, boxes for evaluation, output is the original network output
        # Todo: Actually results could be combined with boxes
        return {'results': results, 'boxes': boxes, 'output': output, 'meta_inp' : meta_inp,'tot': tot_time, 'load': load_time,
                'pre': pre_time, 'net': net_time, 'dec': dec_time,
                'post': post_time, 'merge': merge_time, 'pnp': pnp_time, 'track': track_time}




In [89]:
from eval_image_official import Evaluator

class MyEvaluator(Evaluator):
    def __init__(self, opt):
        super().__init__(opt)
        self.detector = MyObjectPoseDetector(self.opt)
        
    def predict(self, image):
        image = cv2.cvtColor(image.copy(), cv2.COLOR_RGB2BGR)
        meta = {"image_shape": image.shape}
        
        ret = self.detector.run(image, meta_inp=meta)
        
        return ret

    def evaluate(self, batch):
        images, labels, projs, cam_intrinsics, planes, views = [], [], [], [], [], []
        filenames = []

        # 1
        for image_path in batch[0]:
            image = cv2.imread(image_path)
            # image = cv2.imread('/home/CenterPose/images/CenterPose/chair/00000.png')

            images.append(image)
            
        
        for label in batch[1]:
            labels.append(label)

            # Save to check the number of views
            # self.filename_list.append(filename)

        # It can be incorporated into the next for block if we support batch processing.
        # Since we use pnp here, not valid for now.
        local_id = 0
        results = []
        # 2
        for image, label in zip(images, labels):

            local_id = local_id + 1
            if self.NUM_SAMPLE % self.opt.batch_size == 0:
                global_id = self.NUM_SAMPLE - self.opt.batch_size + local_id
            else:
                global_id = self.NUM_SAMPLE - self.NUM_SAMPLE % self.opt.batch_size + local_id

            if type(self.opt.eval_skip) is list:
                if global_id in self.opt.eval_skip:
                    results.append([])
                    continue
            else:
                if global_id < self.opt.eval_skip:
                    results.append([])
                    continue

            if self.opt.eval_subset_list is not None and global_id not in self.opt.eval_subset_list:
                results.append([])
                continue

            # # The camera intrinsics have to be updated
            # cam_intrinsic[:2, :3] = cam_intrinsic[:2, :3] / self.opt.eval_resolution_ratio
            # cx = cam_intrinsic[0, 2]
            # cy = cam_intrinsic[1, 2]
            # cam_intrinsic[0, 2] = cy
            # cam_intrinsic[1, 2] = cx

            # if self.opt.c == 'cup':
            #     if all(label['MugFlag_instance']) == True:
            #         results.append(self.predict(image, cam_intrinsic, projection_matrix, filename, global_id, True))
            #     elif all(np.invert(label['MugFlag_instance'])) == True:
            #         results.append(self.predict(image, cam_intrinsic, projection_matrix, filename, global_id, False))
            #     else:
            #         results.append(self.predict(image, cam_intrinsic, projection_matrix, filename, global_id, True))
            #         # Todo: May assume that we already know it is cup or mug

            # else:
            #     results.append(self.predict(image, cam_intrinsic, projection_matrix, filename, global_id))
    
            results.append(self.predict(image))

        local_id = 0
        # 3
        for ret, label in zip(results, labels):
            boxes = ret['boxes']
            meta_inp = ret['meta_inp']

            local_id = local_id + 1
            if self.NUM_SAMPLE % self.opt.batch_size == 0:
                global_id = self.NUM_SAMPLE - self.opt.batch_size + local_id
            else:
                global_id = self.NUM_SAMPLE - self.NUM_SAMPLE % self.opt.batch_size + local_id

            if type(self.opt.eval_skip) is list:
                if global_id in self.opt.eval_skip:
                    continue
            else:
                if global_id < self.opt.eval_skip:
                    continue

            if self.opt.eval_subset_list is not None and global_id not in self.opt.eval_subset_list:
                continue

            # # Extract gt info
            # instances_scale = label['scale_instance']
            print('label : \n', label)
            print('image_shape : \n', meta_inp['image_shape'])
            normalized_label = [[x / meta_inp['image_shape'][1], y / meta_inp['image_shape'][0]] for x, y in label]
            print('normalized_label : \n', normalized_label)
            # instances_3d = label['3d_instance']
            # instances_Mo2c = label['Mo2c_instance']
            # if self.opt.c == 'cup':
            #     instances_MugFlag = label['MugFlag_instance']

            #     if self.opt.mug_only == True:
            #         # Only count the case with mug
            #         if all(np.invert(label['MugFlag_instance'])) == True:
            #             continue


            #     elif self.opt.mug_only == False:
            #         # Only count the case with cup
            #         if all(np.invert(label['MugFlag_instance'])) == False:
            #             continue

            # visibilities = label['visibility']

            # num_instances = 0
            # for instance, instance_3d, visibility in zip(
            #         instances_2d, instances_3d, visibilities):
            #     if (visibility > self._vis_thresh and
            #             self._is_visible(instance[0]) and instance_3d[0, 2] < 0):
            #         num_instances += 1

            # # We don't have negative examples in evaluation.
            # if num_instances == 0:
            #     continue

            # scale_hit_miss = metrics.HitMiss(self._scale_thresholds)
            # iou_hit_miss = metrics.HitMiss(self._iou_thresholds)
            # azimuth_hit_miss = metrics.HitMiss(self._azimuth_thresholds)
            # polar_hit_miss = metrics.HitMiss(self._polar_thresholds)
            # pixel_hit_miss = metrics.HitMiss(self._pixel_thresholds)
            # add_hit_miss = metrics.HitMiss(self._add_thresholds)
            # adds_hit_miss = metrics.HitMiss(self._adds_thresholds)

            # # For debug
            # pred_box_list = []
            # gt_box_list = []

            # # Save gt info for Stephen
            # M_c2w = np.linalg.inv(view)
            # dict_save = {
            #     'filename': filename,
            #     'camera_pose': M_c2w.tolist(),  # M_c2w
            #     'camera_intrinsics': cam_intrinsic.tolist(),  # has been transformed to list
            #     'image_id': int(label['image_id']),
            #     "objects": [],
            # }

            num_matched = 0

        
            for idx_box, box in enumerate(boxes):

                # Correspond to one prediction in one image
                # box_point_2d, box_point_3d, relative_scale, box_point_2d_ori, result_ori = box
                box_point_2d, relative_scale, box_point_2d_ori, result_ori = box

                print("box_point_2d : \n", box_point_2d)
                print("relative_scale : \n", relative_scale)
                print("box_point_2d_ori : \n", box_point_2d_ori)
                print("result_ori : \n", result_ori)
                # Todo:
                if self.opt.eval_MobilePose_postprocessing == True:
                    box_point_2d, box_point_3d = self.Lift2DTo3D(projection_matrix, result_ori, image.shape[0],
                                                                 image.shape[1])

                index = self.match_box(box_point_2d, instances_2d, visibilities)
                if index >= 0:
                    num_matched += 1

                    # Apply gt_scale to recalculate pnp
                    if self.opt.eval_gt_scale == True:
                        result_gt_scale = self.predict_gt_scale(result_ori, instances_scale[index], cam_intrinsic)
                        if result_gt_scale is not None:
                            box_point_2d, box_point_3d, _, _, _ = result_gt_scale

                    # If you only compute the 3D bounding boxes from RGB images,
                    # your 3D keypoints may be upto scale. However the ground truth
                    # is at metric scale. There is a hack to re-scale your box using
                    # the ground planes (assuming your box is sitting on the ground).
                    # However many models learn to predict depths and scale correctly.

                    if not self.opt.use_absolute_scale:
                        scale = self.compute_scale(box_point_3d, plane)
                        box_point_3d = box_point_3d * scale
                        boxes[idx_box] = list(boxes[idx_box])
                        boxes[idx_box].append(box_point_3d)

                    print(f'Sample {global_id}')
                    print(f'GT: {instances_scale[index] / instances_scale[index][1]}')
                    print(f'Pred: {relative_scale / relative_scale[1]}')
                    if self.opt.c == 'cup':
                        pixel_error = self.evaluate_2d(box_point_2d, instances_2d[index], instances_3d[index],
                                                       instances_Mo2c[index], projection_matrix,
                                                       instances_MugFlag[index])
                        azimuth_error, polar_error, iou, pred_box, gt_box, add, adds = self.evaluate_3d(box_point_3d,
                                                                                                        instances_3d[
                                                                                                            index],
                                                                                                        instances_Mo2c[
                                                                                                            index],
                                                                                                        instances_MugFlag[
                                                                                                            index])


                    else:
                        pixel_error = self.evaluate_2d(box_point_2d, instances_2d[index], instances_3d[index],
                                                       instances_Mo2c[index], projection_matrix)
                        azimuth_error, polar_error, iou, pred_box, gt_box, add, adds = self.evaluate_3d(box_point_3d,
                                                                                                        instances_3d[
                                                                                                            index],
                                                                                                        instances_Mo2c[
                                                                                                            index])

                    # Record some predictions
                    M_o2w = M_c2w @ instances_Mo2c[index]
                    instances_3d_w = M_c2w @ np.hstack(
                        (instances_3d[index], np.ones((instances_3d[index].shape[0], 1)))).T

                    instances_3d_w = instances_3d_w[:3, :].T

                    keypoint_2d_gt = [np.multiply(keypoint, np.asarray([self.width, self.height], np.float32)) for
                                      keypoint in instances_2d[index]]

                    result_pnp = [np.multiply(keypoint, np.asarray([self.width, self.height], np.float32)) for
                                  keypoint in box_point_2d]

                    scale_error = self.evaluate_scale(relative_scale, instances_scale[index])

                    print(f'Scale_error: {scale_error}')
                    print('\n')

                    dict_obj = {
                        'class': self.opt.c,
                        'keypoint_2d_pred_displacement': np.array(result_ori['kps_displacement_mean']).reshape(1,
                                                                                                               -1).tolist(),
                        'keypoint_2d_pred_heatmap': np.array(result_ori['kps_heatmap_mean']).reshape(1, -1).tolist(),
                        'keypoint_2d_pred_pnp': np.array(result_pnp).reshape(1, -1).tolist(),
                        'keypoint_2d_gt': np.array(keypoint_2d_gt).reshape(1, -1).tolist(),
                        'relative_scale': relative_scale.tolist(),
                        'object_pose_gt_w': M_o2w.tolist(),  # 4x4 matrix
                        'keypoint_3d_gt_w': instances_3d_w.tolist(),  # 9x3 array
                        'keypoint_3d_pred_unscaled_c': np.array(boxes[idx_box][1]).reshape(1, -1).tolist(),  # 27 list
                        'keypoint_3d_pred_scaled_c': np.array(boxes[idx_box][5]).reshape(1, -1).tolist(),  # 27 list
                        '3DIoU': iou,
                        'error_2Dpixel': pixel_error,
                        'error_azimuth': azimuth_error,
                        'error_polar_error': polar_error,
                        'error_scale': scale_error
                    }
                    dict_save['objects'].append(dict_obj)

                    pred_box_list.append(pred_box)
                    gt_box_list.append(gt_box)

                    conf = result_ori['score']

                else:
                    conf = 0
                    pixel_error = _MAX_PIXEL_ERROR
                    azimuth_error = _MAX_AZIMUTH_ERROR
                    polar_error = _MAX_POLAR_ERROR
                    iou = 0.
                    add = _MAX_DISTANCE
                    adds = _MAX_DISTANCE
                    scale_error = _MAX_SCALE_ERROR

                if METRIC_UPDATED:
                    # New
                    scale_hit_miss.record_hit_miss([scale_error, conf], greater=False)
                    iou_hit_miss.record_hit_miss([iou, conf])
                    add_hit_miss.record_hit_miss([add, conf], greater=False)
                    adds_hit_miss.record_hit_miss([adds, conf], greater=False)
                    pixel_hit_miss.record_hit_miss([pixel_error, conf], greater=False)
                    azimuth_hit_miss.record_hit_miss([azimuth_error, conf], greater=False)
                    polar_hit_miss.record_hit_miss([polar_error, conf], greater=False)
                else:
                    # Old
                    scale_hit_miss.record_hit_miss(scale_error, greater=False)
                    iou_hit_miss.record_hit_miss(iou)
                    add_hit_miss.record_hit_miss(add, greater=False)
                    adds_hit_miss.record_hit_miss(adds, greater=False)
                    pixel_hit_miss.record_hit_miss(pixel_error, greater=False)
                    azimuth_hit_miss.record_hit_miss(azimuth_error, greater=False)
                    polar_hit_miss.record_hit_miss(polar_error, greater=False)

            if self.opt.eval_debug_json == True:
                json_filename = f'{self.opt.outf}/{self.opt.c}_{self.opt.eval_save_id}/{filename}_{global_id}_record.json'
                with open(json_filename, 'w+') as fp:
                    json.dump(dict_save, fp, indent=4, sort_keys=True)

            # For debug
            if self.opt.eval_debug == True:
                # if self.opt.eval_debug == True and iou<self.opt.eval_debug_save_thresh:
                self.debug(image.copy(), num_instances, instances_2d, instances_3d, projection_matrix, boxes,
                           instances_scale, filename, pred_box_list, gt_box_list, global_id)

            self._scale_ap.append(scale_hit_miss, len(instances_2d))
            self._iou_ap.append(iou_hit_miss, len(instances_2d))
            self._pixel_ap.append(pixel_hit_miss, len(instances_2d))
            self._azimuth_ap.append(azimuth_hit_miss, len(instances_2d))
            self._polar_ap.append(polar_hit_miss, len(instances_2d))
            self._add_ap.append(add_hit_miss, len(instances_2d))
            self._adds_ap.append(adds_hit_miss, len(instances_2d))
            self._matched += num_matched

        


In [90]:
from torch.utils.data import DataLoader
evaluator = MyEvaluator(opt_combined)
# objectron_buckett = 'gs://objectron/v1/records_shuffled'
# eval_shards = tf.io.gfile.glob(objectron_buckett + opt.eval_data)
# ds = tf.data.TFRecordDataset(eval_shards).take(opt.eval_max_num)
def custom_collate_fn(batch):
    # batch는 [(image1, dict1), (image2, dict2), ...] 형식
    images = [item[0] for item in batch]
    dicts = [item[1] for item in batch]
    
    return images, dicts

# DataLoader 설정
dataloader = DataLoader(my_dataset, batch_size=opt_combined.batch_size, shuffle=False, collate_fn=custom_collate_fn)

batch = []
for batch in tqdm.tqdm(dataloader):

    # print(batch)
    # print(len(batch))
    # print(len(batch[0]))

    
    # For debug
    evaluator.NUM_SAMPLE = evaluator.NUM_SAMPLE + 1

    evaluator.evaluate(batch)

    break
    # if evaluator.NUM_SAMPLE in [1000,5000,10000,20000,50000,100000]:
    #     evaluator.finalize()
    #     evaluator.write_report( report_file= os.path.splitext(opt.report_file)[0] + f'_{evaluator.NUM_SAMPLE}.txt')

Creating model...


loaded ../../../models/CenterPose/chair_v1_140.pth, epoch 140
Creating model...
loaded ../../../models/CenterPose/chair_v1_140.pth, epoch 140


  0%|          | 0/9 [00:01<?, ?it/s]

label : 
 [[1237, 559], [1086, 551], [1344, 599], [1332, 687], [1086, 633], [1160, 431], [1389, 464], [1385, 533], [1158, 494]]
image_shape : 
 (1080, 1920, 3)
normalized_label : 
 [[0.6442708333333333, 0.5175925925925926], [0.565625, 0.5101851851851852], [0.7, 0.5546296296296296], [0.69375, 0.6361111111111111], [0.565625, 0.5861111111111111], [0.6041666666666666, 0.3990740740740741], [0.7234375, 0.42962962962962964], [0.7213541666666666, 0.4935185185185185], [0.603125, 0.45740740740740743]]
box_point_2d : 
 [array([0.12011461, 0.62818146]), array([0.27649432, 0.7273408 ]), array([0.02368109, 0.20974551]), array([0.18852365, 0.22672388]), array([0.22894239, 0.4559918 ]), array([0.37535554, 0.49913502]), array([0.16074832, 0.06825723]), array([0.32991973, 0.05733914])]
relative_scale : 
 [0.7418223  1.0003369  0.76515776]
box_point_2d_ori : 
 [[0.21055368 0.35766687]
 [0.12499993 0.61111079]
 [0.28906253 0.69444434]
 [0.02368109 0.20974551]
 [0.20312494 0.22222201]
 [0.21093756 0.444443




NameError: name 'instances_2d' is not defined