# 360-degree 3D Object Detection

In [1]:
import argparse
import mmcv
import os
import torch
import warnings
from mmcv import Config, DictAction
from mmcv.cnn import fuse_conv_bn
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
                         wrap_fp16_model)

from mmdet3d.apis import single_gpu_test
from mmdet3d.datasets import build_dataset
from projects.mmdet3d_plugin.datasets.builder import build_dataloader
from mmdet3d.models import build_model
from mmdet.apis import set_random_seed
from projects.mmdet3d_plugin.bevformer.apis.test import custom_multi_gpu_test
from mmdet.datasets import replace_ImageToTensor
from mmdet3d.datasets.pipelines import Compose
from mmdet3d.core.bbox import get_box_type

import time
import os.path as osp
from copy import deepcopy

In [2]:
def init_model(config, checkpoint, device='cuda:0'):
    cfg = Config.fromfile(config)

    # import modules from string list.
    if cfg.get('custom_imports', None):
        print("custom import")
        from mmcv.utils import import_modules_from_strings
        import_modules_from_strings(**cfg['custom_imports'])

    # import modules from plguin/xx, registry will be updated
    if hasattr(cfg, 'plugin'):
        print("plugin import")
        if cfg.plugin:
            import importlib
            if hasattr(cfg, 'plugin_dir'):
                plugin_dir = cfg.plugin_dir
                _module_dir = os.path.dirname(plugin_dir)
                _module_dir = _module_dir.split('/')
                _module_path = _module_dir[0]

                for m in _module_dir[1:]:
                    _module_path = _module_path + '.' + m
                print(_module_path)
                plg_lib = importlib.import_module(_module_path)
            else:
                # import dir is the dirpath for the config file
                _module_dir = os.path.dirname(args.config)
                _module_dir = _module_dir.split('/')
                _module_path = _module_dir[0]
                for m in _module_dir[1:]:
                    _module_path = _module_path + '.' + m
                print(_module_path)
                plg_lib = importlib.import_module(_module_path)

    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    cfg.model.pretrained = None

    # in case the test dataset is concatenated
    samples_per_gpu = 1
    if isinstance(cfg.data.test, dict):
        cfg.data.test.test_mode = True
        samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
        if samples_per_gpu > 1:
            # Replace 'ImageToTensor' to 'DefaultFormatBundle'
            cfg.data.test.pipeline = replace_ImageToTensor(
                cfg.data.test.pipeline)
    elif isinstance(cfg.data.test, list):
        for ds_cfg in cfg.data.test:
            ds_cfg.test_mode = True
        samples_per_gpu = max(
            [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])
        if samples_per_gpu > 1:
            for ds_cfg in cfg.data.test:
                ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)

    # build the model and load checkpoint
    cfg.model.train_cfg = None
    model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)

    # 1 MMDet3D APIs - inference.py version checkpoint load
    if checkpoint is not None:
        checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')
        if 'CLASSES' in checkpoint['meta']:
            model.CLASSES = checkpoint['meta']['CLASSES']
        else:
            model.CLASSES = config.class_names
        if 'PALETTE' in checkpoint['meta']:  # 3D Segmentor
            model.PALETTE = checkpoint['meta']['PALETTE']

    model.cfg = config

    if device is not 'cpu':
        torch.cuda.set_device(device)
    model.to(device)
    model.eval()

    return model, cfg

  if device is not 'cpu':


In [3]:
config = './projects/configs/bevformer/bevformer_base.py'
checkpoint = '../BEVFormer/ckpts/bevformer_r101_dcn_24ep.pth'

device = 'cuda:7'

model, cfg = init_model(config, checkpoint, device)



plugin import
projects.mmdet3d_plugin
load checkpoint from local path: ../BEVFormer/ckpts/bevformer_r101_dcn_24ep.pth


2022-10-05 20:31:34,344 - root - INFO - ModulatedDeformConvPack img_backbone.layer3.0.conv2 is upgraded to version 2.
2022-10-05 20:31:34,350 - root - INFO - ModulatedDeformConvPack img_backbone.layer3.1.conv2 is upgraded to version 2.
2022-10-05 20:31:34,355 - root - INFO - ModulatedDeformConvPack img_backbone.layer3.2.conv2 is upgraded to version 2.
2022-10-05 20:31:34,360 - root - INFO - ModulatedDeformConvPack img_backbone.layer3.3.conv2 is upgraded to version 2.
2022-10-05 20:31:34,366 - root - INFO - ModulatedDeformConvPack img_backbone.layer3.4.conv2 is upgraded to version 2.
2022-10-05 20:31:34,370 - root - INFO - ModulatedDeformConvPack img_backbone.layer3.5.conv2 is upgraded to version 2.
2022-10-05 20:31:34,375 - root - INFO - ModulatedDeformConvPack img_backbone.layer3.6.conv2 is upgraded to version 2.
2022-10-05 20:31:34,380 - root - INFO - ModulatedDeformConvPack img_backbone.layer3.7.conv2 is upgraded to version 2.
2022-10-05 20:31:34,392 - root - INFO - ModulatedDeformC

# Using DataLoader (Jeho's custom pipeline)

In [4]:
samples_per_gpu = 1
if isinstance(cfg.data.test, dict):
    cfg.data.test.test_mode = True
    samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
    if samples_per_gpu > 1:
        # Replace 'ImageToTensor' to 'DefaultFormatBundle'
        cfg.data.test.pipeline = replace_ImageToTensor(
            cfg.data.test.pipeline)
elif isinstance(cfg.data.test, list):
    for ds_cfg in cfg.data.test:
        ds_cfg.test_mode = True
    samples_per_gpu = max(
        [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])
    if samples_per_gpu > 1:
        for ds_cfg in cfg.data.test:
            ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)

# distributed = True
# init_dist("pytorch", **cfg.dist_params)

# build the dataloader
dataset = build_dataset(cfg.data.test)

data_loader = build_dataloader(
    dataset,
    samples_per_gpu=samples_per_gpu,
    workers_per_gpu=cfg.data.workers_per_gpu,
    dist=False,
    shuffle=False,
    nonshuffler_sampler=cfg.data.nonshuffler_sampler,
)



In [18]:
bbox_results = []
mask_results = []
dataset = data_loader.dataset
rank, world_size = get_dist_info()
if rank == 0:
    prog_bar = mmcv.ProgressBar(len(dataset))
time.sleep(2)  # This line can prevent deadlock problem in some cases.
have_mask = False

# device = torch.device("cuda:7" if torch.cuda.is_available() else "cpu")

for i, data in enumerate(data_loader):
    with torch.no_grad():
        
        # JEHO: Move image Tensor to CUDA device
        data['img_metas'] = data['img_metas'][0].data
        data['img'] = data['img'][0].data
        data['img'][0] = data['img'][0].to(device)
        
        result = model(return_loss=False, rescale=True, **data)
        # encode mask results
        if isinstance(result, dict):
            if 'bbox_results' in result.keys():
                bbox_result = result['bbox_results']
                batch_size = len(result['bbox_results'])
                bbox_results.extend(bbox_result)
            if 'mask_results' in result.keys() and result['mask_results'] is not None:
                mask_result = custom_encode_mask_results(result['mask_results'])
                mask_results.extend(mask_result)
                have_mask = True
        else:
            batch_size = len(result)
            bbox_results.extend(result)
    
    if rank == 0:
        for _ in range(batch_size * world_size):
            prog_bar.update()

[                              ] 13/6019, 0.3 task/s, elapsed: 51s, ETA: 23550s

KeyboardInterrupt: 

In [8]:
for i, data in enumerate(data_loader):
    with torch.no_grad():
        if i == 0:
            first_data = data
            break

In [12]:
first_data['img'][0].data[0].to('cuda:7')

tensor([[[[[-37.5300, -37.5300, -37.5300,  ...,  81.4700,  80.4700,
             76.4700],
           [-40.5300, -40.5300, -41.5300,  ...,  83.4700,  81.4700,
             77.4700],
           [-41.5300, -41.5300, -41.5300,  ...,  81.4700,  77.4700,
             72.4700],
           ...,
           [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,
              0.0000],
           [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,
              0.0000],
           [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,
              0.0000]],

          [[-54.2800, -54.2800, -54.2800,  ...,  64.7200,  63.7200,
             59.7200],
           [-57.2800, -57.2800, -58.2800,  ...,  66.7200,  64.7200,
             60.7200],
           [-55.2800, -55.2800, -55.2800,  ...,  64.7200,  60.7200,
             55.7200],
           ...,
           [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,
              0.0000],
           [  0.0000,   0.0000,   0.0000,  ...,   0.0000

In [None]:
data['img'] = data['img'][0].data[0].to('cuda:7')

In [None]:
img_metas[0].data[0][0]['scene_token']

In [None]:
img_metas_tmp = img_metas[0]

In [None]:
img_metas_tmp = img_metas_tmp.data

In [None]:
img_metas_tmp[0][0]['scene_token']

# Using Custom Data feeder

In [None]:
"""inference.py"""

device = next(model.parameters()).device  # model device

# build the data pipeline
test_pipeline = deepcopy(cfg.data.test.pipeline)
test_pipeline = Compose(test_pipeline)
box_type_3d, box_mode_3d = get_box_type(cfg.data.test.box_type_3d)

ann_file = "./data/nuscenes/nuscenes_infos_temporal_val.pkl"

# get data info containing calib
data_infos = mmcv.load(ann_file)

In [None]:
data_infos['infos'][0]

In [None]:
# find the info corresponding to this image
for x in data_infos['images']:
    if osp.basename(x['file_name']) != osp.basename(image):
        continue
    img_info = x
    break
data = dict(
    img_prefix=osp.dirname(image),
    img_info=dict(filename=osp.basename(image)),
    box_type_3d=box_type_3d,
    box_mode_3d=box_mode_3d,
    img_fields=[],
    bbox3d_fields=[],
    pts_mask_fields=[],
    pts_seg_fields=[],
    bbox_fields=[],
    mask_fields=[],
    seg_fields=[])

# camera points to image conversion
if box_mode_3d == Box3DMode.CAM:
    data['img_info'].update(dict(cam_intrinsic=img_info['cam_intrinsic']))

data = test_pipeline(data)

data = collate([data], samples_per_gpu=1)
if next(model.parameters()).is_cuda:
    # scatter to specified GPU
    data = scatter(data, [device.index])[0]
else:
    # this is a workaround to avoid the bug of MMDataParallel
    data['img_metas'] = data['img_metas'][0].data
    data['img'] = data['img'][0].data

# forward the model
with torch.no_grad():
    result = model(return_loss=False, rescale=True, **data)

In [None]:
def genTangentPatches(erp_img, tangent_h, tangent_w, num_rows, num_cols, phi_centers, fov):
    [erp_h, erp_w, _] = erp_img.shape
    img_new = erp_img.astype(np.float32) / 255
    img_new = np.transpose(img_new, [2, 0, 1]) # permutation, 세 번째 axis가 첫 번째 axis로
    img_new = torch.from_numpy(img_new) # Create Tensor from numpy array
    img_new = img_new.unsqueeze(0) # Increase Tensor dimension by 1
    
    height, width = tangent_h, tangent_w

    FOV = fov
    FOV = [FOV[0] / 360.0, FOV[1] / 180.0]
    FOV = torch.tensor(FOV, dtype=torch.float32)

    PI = math.pi
    PI_2 = math.pi * 0.5
    PI2 = math.pi * 2

    yy, xx = torch.meshgrid(torch.linspace(0, 1, height), torch.linspace(0, 1, width))
    screen_points = torch.stack([xx.flatten(), yy.flatten()], -1)
    
    num_rows = num_rows
    num_cols = num_cols
    phi_centers = phi_centers

    phi_interval = 180 // num_rows # 45도
    all_combos = []
    erp_mask = []
    
    for i, n_cols in enumerate(num_cols):
        for j in np.arange(n_cols): # 0 ~ num_cols.length
            theta_interval = 360 / n_cols # 현재 row (위도)에서 쪼개질 경도 (col)의 위치
            theta_center = j * theta_interval + theta_interval / 2
            center = [theta_center, phi_centers[i]] # 각 tangent image의 center position
            all_combos.append(center)

            # 구좌표계에서의 tangent image가 차지하는 영역에 대한 좌표들
            up = phi_centers[i] + phi_interval / 2
            down = phi_centers[i] - phi_interval / 2
            left = theta_center - theta_interval / 2
            right = theta_center + theta_interval / 2

            # ERP image에서 현재 tangent가 차지하는 영역에 대한 pixel 위치들
            up = int((up + 90) / 180 * erp_h)
            down = int((down + 90) / 180 * erp_h)
            left = int(left / 360 * erp_w)
            right = int(right / 360 * erp_w)

            # ERP 이미지에서 현재 tangent image 영역에 해당하는 부분에 1로 마스킹
            mask = np.zeros((erp_h, erp_w), dtype=int)
            mask[down:up, left:right] = 1
            erp_mask.append(mask)

    all_combos = np.vstack(all_combos)
    shifts = np.arange(all_combos.shape[0]) * width
    shifts = torch.from_numpy(shifts).float()
    erp_mask = np.stack(erp_mask)
    erp_mask = torch.from_numpy(erp_mask).float()
    n_patch = all_combos.shape[0]
    
    center_point = torch.from_numpy(all_combos).float()  # -180 to 180, -90 to 90
    center_point[:, 0] = (center_point[:, 0]) / 360  #0 to 1
    center_point[:, 1] = (center_point[:, 1] + 90) / 180  #0 to 1

    cp = center_point * 2 - 1
    cp[:, 0] = cp[:, 0] * PI
    cp[:, 1] = cp[:, 1] * PI_2
    cp = cp.unsqueeze(1)

    convertedCoord = screen_points * 2 - 1
    convertedCoord[:, 0] = convertedCoord[:, 0] * PI
    convertedCoord[:, 1] = convertedCoord[:, 1] * PI_2
    convertedCoord = convertedCoord * (torch.ones(screen_points.shape, dtype=torch.float32) * FOV)
    convertedCoord = convertedCoord.unsqueeze(0).repeat(cp.shape[0], 1, 1)
    
    x = convertedCoord[:, :, 0]
    y = convertedCoord[:, :, 1]

    rou = torch.sqrt(x ** 2 + y ** 2)
    c = torch.atan(rou)
    sin_c = torch.sin(c)
    cos_c = torch.cos(c)
    lat = torch.asin(cos_c * torch.sin(cp[:, :, 1]) + (y * sin_c * torch.cos(cp[:, :, 1])) / rou)
    lon = cp[:, :, 0] + torch.atan2(x * sin_c, rou * torch.cos(cp[:, :, 1]) * cos_c - y * torch.sin(cp[:, :, 1]) * sin_c)
    lat_new = lat / PI_2 
    lon_new = lon / PI 
    lon_new[lon_new > 1] -= 2
    lon_new[lon_new<-1] += 2

    lon_new = lon_new.view(1, n_patch, height, width).permute(0, 2, 1, 3).contiguous().view(height, n_patch*width)
    lat_new = lat_new.view(1, n_patch, height, width).permute(0, 2, 1, 3).contiguous().view(height, n_patch*width)
    grid = torch.stack([lon_new, lat_new], -1)

    grid = grid.unsqueeze(0)
    persp = F.grid_sample(img_new, grid, mode='bilinear', padding_mode='zeros', align_corners=True)

    # persp_int = persp[0].permute(1, 2, 0).numpy()
    # persp_int = persp_int * 255
    # plt.figure(figsize=(20, 12))
    # plt.imshow(persp_int[:,:,[2,1,0]].astype(np.uint8), aspect=1)
    # plt.show()

    persp_reshape = F.unfold(persp, kernel_size=(height, width), stride=(height, width))
    persp_reshape = persp_reshape.reshape(1, 3, height, width, n_patch)

    return persp_reshape

In [None]:
# 3. show the results
def visualize_single_inference(result, data, score_thr=0.0, thickness=2):
    # show_result_meshlab(data, result, output_dir, score_thr, show, snapshot, task='mono-det')
    ################ show_proj_det_result_meshlab(data, result, output_dir, score_thr, show, snapshot) ################
    """Show result of projecting 3D bbox to 2D image by meshlab."""
    assert 'img' in data.keys(), 'image data is not provided for visualization'

    img_filename = data['img_metas'][0][0]['filename']
    file_name = osp.split(img_filename)[-1].split('.')[0]
    show = True
    snapshot = False

    # read from file because img in data_dict has undergone pipeline transform
    img = mmcv.imread(img_filename)

    if 'pts_bbox' in result[0].keys():
        result[0] = result[0]['pts_bbox']
    elif 'img_bbox' in result[0].keys():
        result[0] = result[0]['img_bbox']
    pred_bboxes = result[0]['boxes_3d'].tensor.numpy()
    pred_scores = result[0]['scores_3d'].numpy()

    # filter out low score bboxes for visualization
    if score_thr > 0:
        inds = pred_scores > score_thr
        pred_bboxes = pred_bboxes[inds]

    box_mode = 'camera'
    pred_bboxes = CameraInstance3DBoxes(pred_bboxes, box_dim=pred_bboxes.shape[-1], origin=(0.5, 1.0, 0.5))

    ############################ show_multi_modality_result() ############################
    proj_mat = data['img_metas'][0][0]['cam2img']
    img_metas = None
    pred_bbox_color=(241, 101, 72)

    # result_path = osp.join(output_dir, file_name)
    # mmcv.mkdir_or_exist(result_path)

    show_img = img.copy()
    proj_mat = copy.deepcopy(proj_mat)
    corners_3d = pred_bboxes.corners
    num_bbox = corners_3d.shape[0]
    points_3d = corners_3d.reshape(-1, 3)

    if not isinstance(proj_mat, torch.Tensor):
        proj_mat = torch.from_numpy(np.array(proj_mat))

    assert (proj_mat.shape == torch.Size([3, 3])
            or proj_mat.shape == torch.Size([4, 4]))
    proj_mat = proj_mat.float().cpu()

    # project to 2d to get image coords (uv)
    uv_origin = points_cam2img(points_3d, proj_mat)
    uv_origin = (uv_origin - 1).round()
    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()

    ################ draw boundinb box ###################
    line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
                    (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
    for i in range(num_bbox):
        corners = imgfov_pts_2d[i].astype(np.int)
        for start, end in line_indices:
            cv2.line(show_img, (corners[start, 0], corners[start, 1]),
                     (corners[end, 0], corners[end, 1]), pred_bbox_color, thickness,
                     cv2.LINE_AA)

    return show_img.astype(np.uint8)

# Test: Dual-fisheye images and Perspective image

### Camera Calibration
1. perspective cam
    - center point of the image: 696, 256
    - calibrated center point: 687.158398, 317.752196


In [None]:
def change_img_filename(l_name, r_name, p_name):
    with open("./working_dir/360dataset/left_cam.json", "r") as f:
        data = json.load(f)
        data['images'][0]['file_name'] = l_name
    with open("./working_dir/360dataset/left_cam.json", "w") as f:
        json.dump(data, f)

    with open("./working_dir/360dataset/right_cam.json", "r") as f:
        data = json.load(f)
        data['images'][0]['file_name'] = r_name
    with open("./working_dir/360dataset/right_cam.json", "w") as f:
        json.dump(data, f)
        
    with open("./working_dir/360dataset/pers_cam.json", "r") as f:
        data = json.load(f)
        data['images'][0]['file_name'] = p_name
    with open("./working_dir/360dataset/pers_cam.json", "w") as f:
        json.dump(data, f)

In [None]:
def select_sample_from_dataset(fname=None):
    erp_dir = "./working_dir/360dataset/image_erp/"
    pers_dir = "./working_dir/360dataset/perspective_images/data_2d_raw/"
    img_files = os.listdir(erp_dir)
    
    if fname is None:
        fname = img_files[randrange(len(img_files))] # randomly pick

    print(fname)
    
    l_cam = "./working_dir/360dataset/left_cam.json"
    r_cam = "./working_dir/360dataset/right_cam.json"
    p_cam = "./working_dir/360dataset/pers_cam.json"

    erp_img = erp_dir + fname
    img = cv2.imread(erp_img, cv2.IMREAD_COLOR)
    # plt.imshow(img[:,:,[2,1,0]])

    width = img.shape[1]
    width_cutoff = width // 2
    s1 = img[:, :width_cutoff]
    s2 = img[:, width_cutoff:]
    l_img = "./working_dir/360dataset/l_sample.png"
    r_img = "./working_dir/360dataset/r_sample.png"
    cv2.imwrite(l_img, s1)
    cv2.imwrite(r_img, s2)
    p_img = pers_dir + fname
    
    change_img_filename(l_img, r_img, p_img)
    
    return erp_img, l_img, r_img, p_img, l_cam, r_cam, p_cam

In [None]:
def inference_and_vis(num_sample=1, fname=None, score_thr=0.0, thickness=1):
    for i in range(num_sample):
        erp_img, l_img, r_img, p_img, l_cam, r_cam, p_cam = select_sample_from_dataset(fname)

        # Inference on dfisheye and perspective
        t1 = time.time()
        l_output = inference_mono_3d_detector(model, l_img, l_cam)
        r_output = inference_mono_3d_detector(model, r_img, r_cam)
        print("dfisheye latency: " + str((time.time() - t1) * 1000))
        
        t1 = time.time()
        p_output = inference_mono_3d_detector(model, p_img, p_cam)
        print("perspective latency: " + str((time.time() - t1) * 1000))
        
        # Visualize results
        l_resimg = visualize_single_inference(l_output[0], l_output[1], score_thr=score_thr, thickness=thickness)
        r_resimg = visualize_single_inference(r_output[0], r_output[1], score_thr=score_thr, thickness=thickness)
        concat_img = cv2.hconcat([l_resimg, r_resimg])
        
        p_resimg = visualize_single_inference(p_output[0], p_output[1], score_thr=score_thr, thickness=thickness)
        
        plt.figure(figsize=(12, 8))
        plt.imshow(concat_img[:,:,[2,1,0]].astype(np.uint8), aspect=1)
        
        plt.figure(figsize=(12, 8))
        plt.imshow(p_resimg[:,:,[2,1,0]].astype(np.uint8), aspect=1)
        
        # Generate Tangent Images
        
        # height, width = 512, 512
        # height, width = 96, 96
        # height, width = 256, 256
        # num_rows = 4
        # num_cols = [3, 6, 6, 3]
        # phi_centers = [-67.5, -22.5, 22.5, 67.5]
        # num_rows = 2
        # num_cols = [6, 6]
        # phi_centers = [-22.5, 22.5]
        # num_rows = 5
        # num_cols = [3, 6, 8, 6, 3]
        # phi_centers = [-72.2, -36.1, 0, 36.1, 72.2]
        # num_rows = 6
        # num_cols = [3, 8, 12, 12, 8, 3]
        # phi_centers = [-75.2, -45.93, -15.72, 15.72, 45.93, 75.2]
        tangent_h = 512
        tangent_w = 512
        num_rows = 2
        num_cols = [6, 6]
        phi_centers = [-22.5, 22.5]
        
        img = cv2.imread(erp_img, cv2.IMREAD_COLOR)
        patches = genTangentPatches(img, tangent_h, tangent_w, num_rows, num_cols, phi_centers)
        
        # Inference on tangent images
        patch_num = 0
        for num_col in num_cols:
            patch_num = num_col + patch_num
        
        time_sum = 0
        for i in range(patch_num):
            cur_patch = patches[0, :, :, :, i].permute(1, 2, 0).numpy()
            cur_patch = cur_patch * 255
            t_img = "./working_dir/360dataset/tangent_patch.png"
            cv2.imwrite(t_img, cur_patch)
            t_cam = "./working_dir/360dataset/tangent_patch.json"
            
            # Single tangent image inference
            t1 = time.time()
            t_output = inference_mono_3d_detector(model, t_img, t_cam)
            latency = (time.time() - t1) * 1000
            time_sum = time_sum + latency

            t_resimg = visualize_single_inference(t_output[0], t_output[1], score_thr=score_thr, thickness=thickness)
            
            plt.figure(figsize=(7, 7))
            plt.imshow(t_resimg[:,:,[2,1,0]].astype(np.uint8), aspect=1)

        plt.show()
        print("tangent patch avg. latency: " + str(time_sum / patch_num))
        
        # Visualize results

In [None]:
# inference_and_vis(1, "0000004123.png", score_thr=0.15, thickness=2)

# 0000008119.png
# 0000005572.png
# 0000011342.png
# 0000006907.png
# 0000004123.png

inference_and_vis(3, score_thr=0.15, thickness=3)

# Insta 360 One X2 data

In [None]:
def select_select_sample_from_instaonex2_dataset(fname=None):
    img_dir = "./working_dir/360_urban_scene_instaOneX2/data_rgb/"
    img_files = os.listdir(img_dir)

    if fname is None:
        fname = img_files[randrange(len(img_files))] # randomly pick

    print(fname)
    erp_img = img_dir + fname
    cam = "./working_dir/360_urban_scene_instaOneX2/onex2_cam.json"

    # img = cv2.imread(erp_img, cv2.IMREAD_COLOR)
    # width = img.shape[1]
    # width_cutoff = width // 2
    # s1 = img[:, :width_cutoff]
    # s2 = img[:, width_cutoff:]
    # l_img = "./working_dir/360dataset/l_sample.png"
    # r_img = "./working_dir/360dataset/r_sample.png"
    # cv2.imwrite(l_img, s1)
    # cv2.imwrite(r_img, s2)
    # p_img = pers_dir + fname

    with open("./working_dir/360_urban_scene_instaOneX2/onex2_cam.json", "r") as f:
        data = json.load(f)
        data['images'][0]['file_name'] = erp_img
    with open("./working_dir/360_urban_scene_instaOneX2/onex2_cam.json", "w") as f:
        json.dump(data, f)
    
    return erp_img, cam

In [None]:
def inference_and_vis_instaonex2(num_sample=1, fname=None, score_thr=0.0, thickness=1):
    for i in range(num_sample):
        erp_img, cam = select_select_sample_from_instaonex2_dataset(fname)

        # ERP image
        t1 = time.time()
        erp_output = inference_mono_3d_detector(model, erp_img, cam)
        print("erp latency: " + str((time.time() - t1) * 1000))
        
        erp_resimg = visualize_single_inference(erp_output[0], erp_output[1], score_thr=score_thr, thickness=thickness)
        
        plt.figure(figsize=(12, 8))
        plt.imshow(erp_resimg[:,:,[2,1,0]].astype(np.uint8), aspect=1)
        
        # 3840x2160
        # Tangent Images

        # tangent_h = 512
        # tangent_w = 512
        tangent_h = 1000
        tangent_w = 1000
        # num_rows = 2
        # num_cols = [6, 6]
        # phi_centers = [-22.5, 22.5]
        num_rows = 1
        num_cols = [6]
        phi_centers = [0]
        # fov  = [90, 90]
        fov = [60, 60]

        img = cv2.imread(erp_img, cv2.IMREAD_COLOR)
        patches = genTangentPatches(img, tangent_h, tangent_w, num_rows, num_cols, phi_centers, fov)
        
        # Inference on tangent images
        patch_num = 0
        for num_col in num_cols:
            patch_num = num_col + patch_num
        
        time_sum = 0
        for i in range(patch_num):
            cur_patch = patches[0, :, :, :, i].permute(1, 2, 0).numpy()
            cur_patch = cur_patch * 255
            t_img = "./working_dir/360_urban_scene_instaOneX2/tangent_patch.png"
            cv2.imwrite(t_img, cur_patch)
            t_cam = "./working_dir/360_urban_scene_instaOneX2/tangent_patch.json"
            
            # Single tangent image inference
            t1 = time.time()
            t_output = inference_mono_3d_detector(model, t_img, t_cam)
            latency = (time.time() - t1) * 1000
            time_sum = time_sum + latency

            t_resimg = visualize_single_inference(t_output[0], t_output[1], score_thr=score_thr, thickness=thickness)
            
            plt.figure(figsize=(7, 7))
            plt.imshow(t_resimg[:,:,[2,1,0]].astype(np.uint8), aspect=1)

        plt.show()
        print("tangent patch avg. latency: " + str(time_sum / patch_num))
        
        # Visualize results

In [None]:
# inference_and_vis_instaonex2(3, score_thr=0.15, thickness=3)
inference_and_vis_instaonex2(1, "0000779.png", score_thr=0.1, thickness=3)