TODO:
- Reproduce the error for image loading
- Assert cam_R_w2c with reprojection
- How can I use @pytest.fixture and unit_test.mock (mock = MagicMock())

In [44]:
import os
import time
import json

from collections import OrderedDict
import yaml
import argparse

import torch
import numpy as np
import pandas as pd
import pickle as pkl
import logging
from cosypose.config import EXP_DIR, MEMORY, RESULTS_DIR, LOCAL_DATA_DIR

import cosypose.utils.tensor_collection as tc

from cosypose.datasets.datasets_cfg import make_scene_dataset, make_object_dataset


In [46]:
n_workers = 8
n_plotters = 8
n_views = 1

n_frames = None
scene_id = None
group_id = None
n_groups = None
skip_mv = n_views < 2
skip_predictions = False

object_set = 'bracket_assembly'
coarse_run_id = f'bracket_assembly_coarse-transnoise-zxyavg-616093'
refiner_run_id = 'bracket_assembly_refiner--558735'
n_coarse_iterations = 1
n_refiner_iterations = 2
config = ds_name = 'bracket_assembly'
comment = ''

In [47]:
scene_id = 0
n_rand = np.random.randint(1e10)
save_dir = RESULTS_DIR / f'{config}-n_views={n_views}-{comment}-{n_rand}'
scene_ds = make_scene_dataset(ds_name)
mask = scene_ds.frame_index['scene_id'] == scene_id
scene_ds.frame_index = scene_ds.frame_index[mask].reset_index(drop=True)

1 day, 22:57:48.369978 - Building index and loading annotations...


[Memory]169068.5s, 2817.8min: Loading build_index...


In [48]:
from cosypose.utils.logging import get_logger
logger = get_logger(__name__)

In [49]:
def load_custom_detection_from_gt():
    
    path_data_dir = LOCAL_DATA_DIR / 'bop_datasets' / 'bracket_assembly'
    path_scene_dir = os.path.join(path_data_dir, "train_pbr")
    scene_names = os.listdir(path_scene_dir)
    infos, poses, bboxes = [], [], []
    # debug_only = 0
    for scene_id, scene_name in enumerate(scene_names):
        path_scene_gt_info = os.path.join(path_scene_dir, scene_name, "scene_gt_info.json")
        path_scene_gt = os.path.join(path_scene_dir, scene_name, "scene_gt.json")
        path_scene_gt_camera = os.path.join(path_scene_dir, scene_name, "scene_camera.json")
        with open(path_scene_gt_info, "r") as f:
            json_data_gt_info = json.load(f)
        with open(path_scene_gt, "r") as f:
            json_data_gt = json.load(f)
        with open(path_scene_gt_camera, "r") as f:
            json_gt_camera = json.load(f)
        img_names_rgb = os.listdir(os.path.join(path_scene_dir, scene_name, "rgb"))
        # todo
        # scene_camera = load_scene_camera(path_scene_gt_camera)
        from cosypose.lib3d import Transform
        for img_id, img_name in enumerate(img_names_rgb[:-1]):
            if not f"{img_id}" in json_data_gt_info:
                continue
            if not f"{img_id}" in json_data_gt:
                continue
            if not f"{img_id}" in json_gt_camera:
                continue
            # cam_R_w2c = scene_camera[img_id]["cam_R_w2c"]
            # TODO
            cam_R_w2c = json_gt_camera[f"{img_id}"]["cam_R_w2c"]
            cam_t_w2c = json_gt_camera[f"{img_id}"]["cam_t_w2c"]
            row0 = [cam_R_w2c[0], cam_R_w2c[1], cam_R_w2c[2], cam_t_w2c[0]] 
            row1 = [cam_R_w2c[3], cam_R_w2c[4], cam_R_w2c[5], cam_t_w2c[1]]
            row2 = [cam_R_w2c[6], cam_R_w2c[7], cam_R_w2c[8], cam_t_w2c[2]]           
            row3 = [0, 0, 0, 1]
            cam_rot_loc_mat = np.asarray([row0, row1, row2, row3])


            if 'cam_R_w2c' in json_gt_camera[f"{img_id}"]:
                RC0 = np.array(json_gt_camera[f"{img_id}"]['cam_R_w2c']).reshape(3, 3)
                tC0 = np.array(json_gt_camera[f"{img_id}"]['cam_t_w2c'])# * 0.001
                TC0 = Transform(RC0, tC0)
                T0C = TC0.inverse()
                T0C = T0C.toHomogeneousMatrix()
            for label_idx, label in enumerate(json_data_gt[f"{img_id}"]):
                obj_id = label["obj_id"] # int
                list_bbox = json_data_gt_info[f"{img_id}"][label_idx]["bbox_obj"] # TODO: ?
                xmin = list_bbox[0]
                ymin = list_bbox[1]
                xmax = list_bbox[2]
                ymax = list_bbox[1] + list_bbox[3]
                list_bbox = [xmin, ymin, xmax, ymax]
                list_rot  = json_data_gt[f"{img_id}"][label_idx]["cam_R_m2c"]
                list_loc  = json_data_gt[f"{img_id}"][label_idx]["cam_t_m2c"]
                # RCO = np.array(json_data_gt[f"{img_id}"][label_idx]['cam_R_m2c']).reshape(3, 3)
                # tCO = np.array(json_data_gt[f"{img_id}"][label_idx]['cam_t_m2c']) #* 0.001
                # TCO = Transform(RCO, tCO)
                # T0O = T0C * TCO
                # T0O = T0O.toHomogeneousMatrix()

                row0 = [list_rot[0], list_rot[1], list_rot[2], list_loc[0]] 
                row1 = [list_rot[3], list_rot[4], list_rot[5], list_loc[1]]
                row2 = [list_rot[6], list_rot[7], list_rot[8], list_loc[2]]
                row3 = [0, 0, 0, 1]
                rot_loc_mat = np.asarray([row0, row1, row2, row3])
                rot_loc_mat = np.matmul(np.linalg.inv(cam_rot_loc_mat), rot_loc_mat)
                # if scene_id == 0 and img_id == 20:
                #     print("label_idx",label_idx)
                #     print("rot_loc_mat", rot_loc_mat)
                infos.append(dict(
                        scene_id=scene_id,
                        view_id=img_id,
                        score=1,
                        label=f"obj_{obj_id:06d}",
                    ))
                # poses.append(T0O)
                poses.append(rot_loc_mat)
                bboxes.append(list_bbox)
    data = tc.PandasTensorCollection(
        infos=pd.DataFrame(infos),
        poses=torch.as_tensor(np.stack(poses)).float(),
        bboxes=torch.as_tensor(np.stack(bboxes)).float(),
    ).cpu()
    return data
bracket_detections = load_custom_detection_from_gt(ds_name).cpu()
# from IPython.display import clear_output
# clear_output()

In [50]:
from pathlib import Path
import torch
LOCAL_DATA = Path('/home/ubuntu/synthetic_pose_estimation/cosypose/local_data')
DEBUG_DATA_DIR = LOCAL_DATA / 'debug_data'
debug_file = open(DEBUG_DATA_DIR/'debug_iter=1.pth.tar','rb')
debug_data = torch.load(debug_file)

In [51]:
debug_data['images']

tensor([[[[0.1176, 0.1333, 0.1373,  ..., 0.0824, 0.0706, 0.0824],
          [0.1412, 0.1451, 0.1451,  ..., 0.0824, 0.0980, 0.1216],
          [0.1529, 0.1490, 0.1412,  ..., 0.0863, 0.0941, 0.1176],
          ...,
          [0.1020, 0.1255, 0.0000,  ..., 0.0039, 0.0000, 0.0000],
          [0.0980, 0.1255, 0.0000,  ..., 0.0039, 0.0000, 0.0000],
          [0.0824, 0.1137, 0.0000,  ..., 0.0039, 0.0000, 0.0000]],

         [[0.4039, 0.4196, 0.4314,  ..., 0.5647, 0.5961, 0.5529],
          [0.4353, 0.4392, 0.4392,  ..., 0.5647, 0.6118, 0.5922],
          [0.4471, 0.4431, 0.4353,  ..., 0.5608, 0.6078, 0.5882],
          ...,
          [0.4157, 0.3529, 0.0941,  ..., 0.0000, 0.0000, 0.0039],
          [0.3686, 0.3098, 0.0667,  ..., 0.0000, 0.0000, 0.0039],
          [0.3176, 0.2627, 0.0314,  ..., 0.0000, 0.0000, 0.0039]],

         [[0.4314, 0.4471, 0.4549,  ..., 0.6863, 0.7098, 0.6510],
          [0.4588, 0.4627, 0.4627,  ..., 0.6863, 0.7294, 0.6902],
          [0.4706, 0.4667, 0.4588,  ..., 0

In [52]:
len(bracket_detections)

14917

In [53]:
def load_models(coarse_run_id, refiner_run_id=None, n_workers=8, object_set='tless'):
    if object_set == 'bracket_assembly':
        object_ds_name, urdf_ds_name = 'bracket_assembly', 'bracket_assembly'
    
    object_ds = make_object_dataset(object_ds_name)
    mesh_db = MeshDataBase.from_object_ds(object_ds)
    renderer = BulletBatchRenderer(object_set=urdf_ds_name, n_workers=n_workers)
    mesh_db_batched = mesh_db.batched().cuda()

    def load_model(run_id):
        if run_id is None:
            return
        run_dir = EXP_DIR / run_id
        cfg = yaml.unsafe_load((run_dir / 'config.yaml').read_text())
        cfg = check_update_config(cfg)
        if cfg.train_refiner:
            model = create_model_refiner(cfg, renderer=renderer, mesh_db=mesh_db_batched)
            ckpt = torch.load(run_dir / 'checkpoint.pth.tar')
        else:
            model = create_model_coarse(cfg, renderer=renderer, mesh_db=mesh_db_batched)
            ckpt = torch.load(run_dir / 'checkpoint.pth.tar')
        ckpt = ckpt['state_dict']
        model.load_state_dict(ckpt)
        model = model.cuda().eval()
        model.cfg = cfg
        return model

    coarse_model = load_model(coarse_run_id)
    refiner_model = load_model(refiner_run_id)
    model = CoarseRefinePosePredictor(coarse_model=coarse_model,
                                      refiner_model=refiner_model,
                                      bsz_objects=1)
    return model, mesh_db

In [54]:
n_frames = 20 # None
if scene_id is not None:
    mask = scene_ds.frame_index['scene_id'] == scene_id
    scene_ds.frame_index = scene_ds.frame_index[mask].reset_index(drop=True)
if n_frames is not None:
    # scene_ds.frame_index = scene_ds.frame_index.reset_index(drop=True)[:n_frames]
    scene_ds.frame_index = scene_ds.frame_index.reset_index(drop=True)[n_frames:n_frames+1]

In [55]:
len(scene_ds)

1

In [56]:
def get_pose_meters(scene_ds):
    ds_name = scene_ds.name

    compute_add = False
    spheres_overlap_check = True
    large_match_threshold_diameter_ratio = 0.5
    print("ds_name", ds_name)
    if ds_name == 'tless.primesense.test.bop19':
        targets_filename = 'test_targets_bop19.json'
        visib_gt_min = -1
        n_top = -1  # Given by targets
    elif ds_name == 'tless.primesense.test':
        targets_filename = 'all_target_tless.json'
        n_top = 1
        visib_gt_min = 0.1
    elif 'ycbv' in ds_name:
        compute_add = True
        visib_gt_min = -1
        targets_filename = None
        n_top = 1
        spheres_overlap_check = False
    elif 'bracket_assembly' in ds_name:
        targets_filename = None
        visib_gt_min = -1
        n_top = 1  # Given by targets
        spheres_overlap_check = False
    else:
        raise ValueError

    if 'tless' in ds_name:
        object_ds_name = 'tless.eval'
    elif 'ycbv' in ds_name:
        object_ds_name = 'ycbv.bop-compat.eval'  # This is important for definition of symmetric objects
    elif 'bracket_assembly' in ds_name:
        object_ds_name = 'bracket_assembly'
    else:
        raise ValueError

    if targets_filename is not None:
        targets_path = scene_ds.ds_dir / targets_filename
        targets = pd.read_json(targets_path)
        targets = remap_bop_targets(targets)
    else:
        targets = None

    object_ds = make_object_dataset(object_ds_name)
    print("object_ds_name", object_ds_name)
    mesh_db = MeshDataBase.from_object_ds(object_ds)

    error_types = ['ADD-S'] + (['ADD(-S)'] if compute_add else [])

    base_kwargs = dict(
        mesh_db=mesh_db,
        # exact_meshes=True,
        # sample_n_points=None,
        exact_meshes=False,
        sample_n_points=100,
        errors_bsz=1,

        # BOP-Like parameters
        n_top=n_top,
        visib_gt_min=visib_gt_min,
        targets=targets,
        spheres_overlap_check=spheres_overlap_check,
    )

    meters = dict()
    for error_type in error_types:
        # For measuring ADD-S AUC on T-LESS and average errors on ycbv/tless.
        meters[f'{error_type}_ntop=BOP_matching=OVERLAP'] = PoseErrorMeter(
            error_type=error_type, consider_all_predictions=False,
            match_threshold=large_match_threshold_diameter_ratio,
            report_error_stats=True, report_error_AUC=True, **base_kwargs)

        if 'ycbv' in ds_name:
            # For fair comparison with PoseCNN/DeepIM on YCB-Video ADD(-S) AUC
            meters[f'{error_type}_ntop=1_matching=CLASS'] = PoseErrorMeter(
                error_type=error_type, consider_all_predictions=False,
                match_threshold=np.inf,
                report_error_stats=False, report_error_AUC=True, **base_kwargs)

        if 'tless' in ds_name:
            meters.update({f'{error_type}_ntop=BOP_matching=BOP':  # For ADD-S<0.1d
                           PoseErrorMeter(error_type=error_type, match_threshold=0.1, **base_kwargs),

                           f'{error_type}_ntop=ALL_matching=BOP':  # For mAP
                           PoseErrorMeter(error_type=error_type, match_threshold=0.1,
                                          consider_all_predictions=True,
                                          report_AP=True, **base_kwargs)})
    return meters

In [57]:
# Predictions
predictor, mesh_db = load_models(coarse_run_id, refiner_run_id, n_workers=n_plotters, object_set=object_set)

mv_predictor = MultiviewScenePredictor(mesh_db)
base_pred_kwargs = dict(
    n_coarse_iterations=n_coarse_iterations,
    n_refiner_iterations=n_refiner_iterations,
    skip_mv=skip_mv,
    pose_predictor=predictor,
    mv_predictor=mv_predictor,
)

if 'bracket_assembly' in ds_name:
    bracket_detections = load_custom_detection_from_gt().cpu()
    pred_kwargs = {
        'pix2pose_detections': dict(
            detections=bracket_detections,
            **base_pred_kwargs
        )
    }

scene_ds_pred = MultiViewWrapper(scene_ds, n_views=n_views)
if group_id is not None:
    mask = scene_ds_pred.frame_index['group_id'] == group_id
    scene_ds_pred.frame_index = scene_ds_pred.frame_index[mask].reset_index(drop=True)
elif n_groups is not None:
    scene_ds_pred.frame_index = scene_ds_pred.frame_index[:n_groups]

pred_runner = MultiviewPredictionRunner(
    scene_ds_pred, batch_size=1, n_workers=n_workers,
    cache_data=len(pred_kwargs) > 1)

all_predictions = dict()
for pred_prefix, pred_kwargs_n in pred_kwargs.items():
    preds = pred_runner.get_predictions(**pred_kwargs_n)
    # logger.info(f"preds,{pred_prefix}, {preds}")
    for preds_name, preds_n in preds.items():
        all_predictions[f'{pred_prefix}/{preds_name}'] = preds_n

logger.info("Done with predictions")
# torch.distributed.barrier()

# Evaluation
predictions_to_evaluate = set()
if 'ycbv' in ds_name:
    det_key = 'posecnn_init'
    all_predictions['posecnn'] = posecnn_detections
    predictions_to_evaluate.add('posecnn')
    predictions_to_evaluate.add(f'{det_key}/refiner/iteration={n_refiner_iterations}')
elif 'tless' in ds_name:
    det_key = 'pix2pose_detections'
    predictions_to_evaluate.add(f'{det_key}/refiner/iteration={n_refiner_iterations}')
elif 'bracket_assembly' in ds_name: # BOP dataset
    det_key = 'pix2pose_detections'
    predictions_to_evaluate.add(f'{det_key}/coarse/iteration=1')
    predictions_to_evaluate.add(f'{det_key}/refiner/iteration={n_refiner_iterations}')
else:
    raise ValueError(ds_name)

if n_views > 1:
    for k in [
            # f'ba_input',
            # f'ba_output',
            f'ba_output+all_cand'
    ]:
        predictions_to_evaluate.add(f'{det_key}/{k}')

all_predictions = OrderedDict({k: v for k, v in sorted(all_predictions.items(), key=lambda item: item[0])})
# Evaluation.
meters = get_pose_meters(scene_ds)
mv_group_ids = list(iter(pred_runner.sampler))
scene_ds_ids = np.concatenate(scene_ds_pred.frame_index.loc[mv_group_ids, 'scene_ds_ids'].values)
sampler = ListSampler(scene_ds_ids)
eval_runner = PoseEvaluation(scene_ds, meters, n_workers=n_workers,
                                cache_data=True, batch_size=1, sampler=sampler)

1 day, 22:57:55.099552 - Backbone: efficientnet-b3
1 day, 22:57:55.534823 - Backbone: efficientnet-b3
EGL device choice: 0 of 4 (from EGL_VISIBLE_DEVICES)
EGL device choice: 0 of 4 (from EGL_VISIBLE_DEVICES)


Loaded EGL 1.5 after reload.
Loaded EGL 1.5 after reload.


EGL device choice: 0 of 4 (from EGL_VISIBLE_DEVICES)
EGL device choice: 0 of 4 (from EGL_VISIBLE_DEVICES)
EGL device choice: 0 of 4 (from EGL_VISIBLE_DEVICES)


Loaded EGL 1.5 after reload.
Loaded EGL 1.5 after reload.
Loaded EGL 1.5 after reload.
Loaded EGL 1.5 after reload.


EGL device choice: 0 of 4 (from EGL_VISIBLE_DEVICES)
EGL device choice: 0 of 4 (from EGL_VISIBLE_DEVICES)
EGL device choice: 0 of 4 (from EGL_VISIBLE_DEVICES)


Loaded EGL 1.5 after reload.
Loaded EGL 1.5 after reload.
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=Tesla T4/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 510.73.08
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 510.73.08
Vendor = NVIDIA Corporation
Renderer = Tesla T4/PCIe/SSE2
Destroy EGL OpenGL window.
Destroy EGL OpenGL window.
Destroy EGL OpenGL window.
Destroy EGL OpenGL window.
Destroy EGL OpenGL window.
Destroy EGL OpenGL window.
Destroy EGL OpenGL window.
Destroy EGL OpenGL window.
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=Tesla T4/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 510.73.08
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 510.73.08
Vendor = NVIDIA Corporation
Renderer = Tesla T4/PCIe/SSE2


  0%|          | 0/1 [00:00<?, ?it/s]

GL_VENDOR=NVIDIA Corporation
GL_RENDERER=Tesla T4/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 510.73.08
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 510.73.08
Vendor = NVIDIA Corporation
Renderer = Tesla T4/PCIe/SSE2
multiview wrapper get item 0 [0] scene_id           0
view_ids        [20]
n_views            1
scene_ds_ids     [0]
group_id           0
Name: 0, dtype: object
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=Tesla T4/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 510.73.08
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 510.73.08
Vendor = NVIDIA Corporation
Renderer = Tesla T4/PCIe/SSE2
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=Tesla T4/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 510.73.08
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 510.73.08
Vendor = NVIDIA Corporation
Renderer = Tesla T4/PCIe/SSE2
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=Tesla T4/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 510.73.08
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 510.73.

1 day, 22:57:57.590342 - batched_model_predictions torch.Size([1, 3, 540, 720]) 1 
1 day, 22:57:57.593101 - batched_model_predictions tensor([0]) [0] torch.Size([1, 3, 540, 720])


multiview predict torch.Size([1, 3, 540, 720])
get_predictions torch.Size([1, 3, 540, 720]) None 2
pose model torch.Size([1, 3, 540, 720])


1 day, 22:58:01.263319 - Wrote debug data: /home/ubuntu/synthetic_pose_estimation/cosypose/local_data/debug_data/debug_iter=1.pth.tar
1 day, 22:58:01.295863 - outputs, {'iteration=1': {'TCO_input': tensor([[[ 0.0000,  1.0000,  0.0000,  0.0083],
         [ 0.0000,  0.0000, -1.0000, -0.0168],
         [-1.0000,  0.0000,  0.0000,  0.1202],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'TCO_output': tensor([[[-0.2365,  0.5946,  0.7685, -0.2927],
         [-0.9494, -0.3098, -0.0524, -0.3871],
         [ 0.2069, -0.7419,  0.6378,  0.3032],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'K_crop': tensor([[[ 53.9720,   0.0000, 155.7807],
         [  0.0000,  53.9720, 127.0675],
         [  0.0000,   0.0000,   1.0000]]], device='cuda:0'), 'model_outputs': {'pose': tensor([[  1.1109,  -0.5788,  -1.3861,  -2.1547,   0.0769,  -2.1093, -55.8145,
         -61.3351,   2.5234]], device='cuda:0')}, 'boxes_rend': tensor([[  -85.7894, -2263.6277,   642.6074,   186

ven = NVIDIA Corporation
torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
pose model torch.Size([1, 3, 540, 720])
ven = NVIDIA Corporation


1 day, 22:58:01.410883 - Wrote debug data: /home/ubuntu/synthetic_pose_estimation/cosypose/local_data/debug_data/debug_iter=1.pth.tar
1 day, 22:58:01.430305 - outputs, {'iteration=1': {'TCO_input': tensor([[[ 0.0000,  1.0000,  0.0000, -0.0160],
         [ 0.0000,  0.0000, -1.0000,  0.0058],
         [-1.0000,  0.0000,  0.0000,  0.1988],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'TCO_output': tensor([[[-0.4660,  0.5019,  0.7287, -2.2609],
         [-0.7560, -0.6538, -0.0331, -1.9796],
         [ 0.4598, -0.5663,  0.6840,  1.7072],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'K_crop': tensor([[[369.0375,   0.0000, 189.2115],
         [  0.0000, 369.0374, 108.7988],
         [  0.0000,   0.0000,   1.0000]]], device='cuda:0'), 'model_outputs': {'pose': tensor([[   3.6996,   -4.8191,   -4.1746,  -17.8867,   -1.2663,  -20.2102,
         -459.0229, -438.6347,    8.5885]], device='cuda:0')}, 'boxes_rend': tensor([[182.0855, -28.4171, 300.5348, 31

torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
pose model torch.Size([1, 3, 540, 720])
torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
pose model torch.Size([1, 3, 540, 720])
torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
pose model torch.Size([1, 3, 540, 720])
ven = NVIDIA Corporation


1 day, 22:58:01.666814 - Wrote debug data: /home/ubuntu/synthetic_pose_estimation/cosypose/local_data/debug_data/debug_iter=1.pth.tar
1 day, 22:58:01.682831 - outputs, {'iteration=1': {'TCO_input': tensor([[[ 0.0000,  1.0000,  0.0000, -0.0133],
         [ 0.0000,  0.0000, -1.0000, -0.0040],
         [-1.0000,  0.0000,  0.0000,  0.0670],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'TCO_output': tensor([[[-0.1651,  0.6757,  0.7185, -0.0635],
         [-0.9824, -0.1770, -0.0593, -0.0513],
         [ 0.0871, -0.7156,  0.6930,  0.1478],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'K_crop': tensor([[[122.4163,   0.0000, 185.6634],
         [  0.0000, 122.4163, 131.8471],
         [  0.0000,   0.0000,   1.0000]]], device='cuda:0'), 'model_outputs': {'pose': tensor([[  0.9872,  -0.2586,  -1.0456,  -0.9577,   0.0671,  -1.0577, -28.3468,
         -35.1787,   2.2047]], device='cuda:0')}, 'boxes_rend': tensor([[-465.2318,  725.4101,  558.1038, 1147.896

torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
pose model torch.Size([1, 3, 540, 720])
torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
pose model torch.Size([1, 3, 540, 720])
ven = NVIDIA Corporation
torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
pose model torch.Size([1, 3, 540, 720])


1 day, 22:58:01.915756 - Wrote debug data: /home/ubuntu/synthetic_pose_estimation/cosypose/local_data/debug_data/debug_iter=1.pth.tar
1 day, 22:58:02.045552 - Wrote debug data: /home/ubuntu/synthetic_pose_estimation/cosypose/local_data/debug_data/debug_iter=2.pth.tar
1 day, 22:58:02.072186 - outputs, {'iteration=1': {'TCO_input': tensor([[[-0.2365,  0.5946,  0.7685, -0.2927],
         [-0.9494, -0.3098, -0.0524, -0.3871],
         [ 0.2069, -0.7419,  0.6378,  0.3032],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'TCO_output': tensor([[[-0.2253,  0.5593,  0.7978, -0.6481],
         [-0.9088, -0.4158,  0.0349, -0.7361],
         [ 0.3512, -0.7172,  0.6019,  2.2181],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'K_crop': tensor([[[142.2791,   0.0000, 296.8316],
         [  0.0000, 142.2791, 301.1387],
         [  0.0000,   0.0000,   1.0000]]], device='cuda:0'), 'model_outputs': {'pose': tensor([[ 30.7047,  -0.1702,  -1.4422, -10.8007,  14.4379, 

torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
ven = NVIDIA Corporation
torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
pose model torch.Size([1, 3, 540, 720])
torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])


1 day, 22:58:02.240009 - Wrote debug data: /home/ubuntu/synthetic_pose_estimation/cosypose/local_data/debug_data/debug_iter=2.pth.tar
1 day, 22:58:02.260036 - outputs, {'iteration=1': {'TCO_input': tensor([[[-0.4660,  0.5019,  0.7287, -2.2609],
         [-0.7560, -0.6538, -0.0331, -1.9796],
         [ 0.4598, -0.5663,  0.6840,  1.7072],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'TCO_output': tensor([[[ -0.4412,   0.4768,   0.7602, -23.1370],
         [ -0.6614,  -0.7454,   0.0837, -19.7125],
         [  0.6065,  -0.4659,   0.6442,  18.2425],
         [  0.0000,   0.0000,   0.0000,   1.0000]]], device='cuda:0'), 'K_crop': tensor([[[2.6483e+03, 0.0000e+00, 3.6668e+03],
         [0.0000e+00, 2.6483e+03, 3.1905e+03],
         [0.0000e+00, 0.0000e+00, 1.0000e+00]]], device='cuda:0'), 'model_outputs': {'pose': tensor([[ 46.9202,  -0.2324,  -2.2083, -17.5485,  20.7374,  -2.8545, 148.4230,
         209.2485,  10.6858]], device='cuda:0')}, 'boxes_rend': tensor([[-1582.4

ven = NVIDIA Corporation
torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
pose model torch.Size([1, 3, 540, 720])
torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
ven = NVIDIA Corporation


1 day, 22:58:02.426294 - Wrote debug data: /home/ubuntu/synthetic_pose_estimation/cosypose/local_data/debug_data/debug_iter=2.pth.tar
1 day, 22:58:02.446106 - outputs, {'iteration=1': {'TCO_input': tensor([[[-0.1799,  0.7491,  0.6376,  0.0030],
         [-0.9803, -0.1904, -0.0529, -0.0016],
         [ 0.0818, -0.6345,  0.7686, -0.0745],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'TCO_output': tensor([[[-0.1902,  0.7518,  0.6314,  0.0016],
         [-0.9780, -0.2012, -0.0551, -0.0030],
         [ 0.0856, -0.6280,  0.7735, -0.0710],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'K_crop': tensor([[[607.2506,   0.0000, 401.2878],
         [  0.0000, 607.2506, 323.7209],
         [  0.0000,   0.0000,   1.0000]]], device='cuda:0'), 'model_outputs': {'pose': tensor([[ 3.9049, -0.0384,  0.0287, -0.5176,  2.5929, -0.0176, 10.3929, 12.9403,
          0.9529]], device='cuda:0')}, 'boxes_rend': tensor([[-419.2448, -430.2801,  -28.9633,  -16.5561]], devi

torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
pose model torch.Size([1, 3, 540, 720])
ven = NVIDIA Corporation
torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
ven = NVIDIA Corporation


1 day, 22:58:02.691536 - Wrote debug data: /home/ubuntu/synthetic_pose_estimation/cosypose/local_data/debug_data/debug_iter=2.pth.tar
1 day, 22:58:02.711782 - outputs, {'iteration=1': {'TCO_input': tensor([[[-0.2203,  0.5917,  0.7755, -0.3888],
         [-0.9624, -0.2613, -0.0741,  0.5665],
         [ 0.1588, -0.7627,  0.6270,  7.7759],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'TCO_output': tensor([[[-2.1717e-01,  5.7482e-01,  7.8894e-01, -9.0228e-01],
         [-9.3624e-01, -3.5134e-01, -1.7360e-03,  1.3639e+00],
         [ 2.7619e-01, -7.3902e-01,  6.1447e-01,  1.8391e+01],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  1.0000e+00]]],
       device='cuda:0'), 'K_crop': tensor([[[ 3.2494e+04,  0.0000e+00,  1.7841e+03],
         [ 0.0000e+00,  3.2494e+04, -2.2480e+03],
         [ 0.0000e+00,  0.0000e+00,  1.0000e+00]]], device='cuda:0'), 'model_outputs': {'pose': tensor([[ 9.3192e+00, -2.7588e-02, -2.0146e-01, -3.5380e+00,  4.7055e+00,
         -4.8255e-01,

torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
pose model torch.Size([1, 3, 540, 720])
ven = NVIDIA Corporation
torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
ven = NVIDIA Corporation


1 day, 22:58:02.943520 - Wrote debug data: /home/ubuntu/synthetic_pose_estimation/cosypose/local_data/debug_data/debug_iter=2.pth.tar
1 day, 22:58:02.962875 - outputs, {'iteration=1': {'TCO_input': tensor([[[-0.1651,  0.6757,  0.7185, -0.0635],
         [-0.9824, -0.1770, -0.0593, -0.0513],
         [ 0.0871, -0.7156,  0.6930,  0.1478],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'TCO_output': tensor([[[-1.5216e-01,  6.5170e-01,  7.4306e-01, -6.9890e-04],
         [-9.5425e-01, -2.9265e-01,  6.1266e-02,  2.1020e-01],
         [ 2.5739e-01, -6.9975e-01,  6.6641e-01,  8.2245e-01],
         [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  1.0000e+00]]],
       device='cuda:0'), 'K_crop': tensor([[[174.5110,   0.0000, 234.4991],
         [  0.0000, 174.5110, 180.1271],
         [  0.0000,   0.0000,   1.0000]]], device='cuda:0'), 'model_outputs': {'pose': tensor([[ 2.3469e+01,  8.9469e-02, -8.5691e-01, -8.8445e+00,  1.0755e+01,
         -1.5254e+00,  7.4851e+01,  1.0523e+02, 

torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
pose model torch.Size([1, 3, 540, 720])
torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
ven = NVIDIA Corporation
torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
pose model torch.Size([1, 3, 540, 720])


1 day, 22:58:03.175230 - Wrote debug data: /home/ubuntu/synthetic_pose_estimation/cosypose/local_data/debug_data/debug_iter=1.pth.tar
1 day, 22:58:03.289540 - Wrote debug data: /home/ubuntu/synthetic_pose_estimation/cosypose/local_data/debug_data/debug_iter=2.pth.tar
1 day, 22:58:03.308748 - outputs, {'iteration=1': {'TCO_input': tensor([[[-0.3751,  0.5573,  0.7408, -0.8288],
         [-0.8460, -0.5325, -0.0277, -0.6108],
         [ 0.3790, -0.6371,  0.6712,  0.8562],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'TCO_output': tensor([[[-0.3556,  0.5493,  0.7562, -2.8986],
         [-0.7925, -0.6061,  0.0676, -2.0704],
         [ 0.4955, -0.5752,  0.6509,  3.0963],
         [ 0.0000,  0.0000,  0.0000,  1.0000]]], device='cuda:0'), 'K_crop': tensor([[[1.4266e+03, 0.0000e+00, 1.5404e+03],
         [0.0000e+00, 1.4266e+03, 1.1372e+03],
         [0.0000e+00, 0.0000e+00, 1.0000e+00]]], device='cuda:0'), 'model_outputs': {'pose': tensor([[13.9679,  0.1339, -0.3385, -5.29

GL_VENDOR=NVIDIA Corporation
GL_RENDERER=Tesla T4/PCIe/SSE2
GL_VERSION=4.6.0 NVIDIA 510.73.08
GL_SHADING_LANGUAGE_VERSION=4.60 NVIDIA
Version = 4.6.0 NVIDIA 510.73.08
Vendor = NVIDIA Corporation
Renderer = Tesla T4/PCIe/SSE2


100%|██████████| 1/1 [00:06<00:00,  6.42s/it]

torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])
ven = NVIDIA Corporation
torch.Size([1, 3, 540, 720]) torch.Size([1, 3, 240, 320])



1 day, 22:58:02.165951 - preds,pix2pose_detections, {'detections': PandasTensorCollection(
    poses: torch.Size([7, 4, 4]) torch.float32 cpu,
    bboxes: torch.Size([7, 4]) torch.float32 cpu,
----------------------------------------
    infos:
   scene_id  view_id  score       label  det_id  batch_im_id  group_id
0         0       20      1  obj_000000     140            0         0
1         0       20      1  obj_000004     141            0         0
2         0       20      1  obj_000005     142            0         0
3         0       20      1  obj_000006     143            0         0
4         0       20      1  obj_000007     144            0         0
5         0       20      1  obj_000008     145            0         0
6         0       20      1  obj_000009     146            0         0
), 'coarse/iteration=1': PandasTensorCollection(
    poses: torch.Size([7, 4, 4]) torch.float32 cpu,
    poses_input: torch.Size([7, 4, 4]) torch.float32 cpu,
    K_crop: torch.Size([7, 

detections PandasTensorCollection(
    poses: torch.Size([14917, 4, 4]) torch.float32 cpu,
    bboxes: torch.Size([14917, 4]) torch.float32 cpu,
----------------------------------------
    infos:
       scene_id  view_id  score       label
0             0        0      1  obj_000000
1             0        0      1  obj_000004
2             0        0      1  obj_000005
3             0        0      1  obj_000006
4             0        0      1  obj_000007
...         ...      ...    ...         ...
14912         2      998      1  obj_000005
14913         2      998      1  obj_000006
14914         2      998      1  obj_000007
14915         2      998      1  obj_000008
14916         2      998      1  obj_000009

[14917 rows x 4 columns]
)
ds_name bracket_assembly
object_ds_name bracket_assembly


100%|██████████| 1/1 [00:00<00:00,  2.36it/s]


In [58]:
scene_id = 0
frame_id = 20
object_ds_name = 'bracket_assembly'
object_ds = make_object_dataset(object_ds_name)
mesh_db = MeshDataBase.from_object_ds(object_ds).batched().cuda().float()
meshes = mesh_db.select(labels)
points = meshes.sample_points(2000, deterministic=True)
uv = project_points(points, K, TCO)
boxes_rend = boxes_from_uv(uv)
boxes_crop, images_cropped = deepim_crops(
    images=images, obs_boxes=boxes_rend, K=K,
    TCO_pred=TCO, O_vertices=points, output_size=self.render_size, lamb=1.4
)
plot(image)
plot(boxes)

NameError: name 'labels' is not defined