In [19]:
import os
import io
import csv
import base64
import detectron2
import glob
# import some common detectron2 utilities
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

# import some common libraries
import numpy as np
import cv2
import torch

import pandas as pd
from tqdm.notebook import tqdm
import json

# Show the image in ipynb
from IPython.display import clear_output, Image, display
import PIL.Image
def showarray(a, fmt='jpeg'):
    a = np.uint8(np.clip(a, 0, 255))
    f = io.BytesIO()
    PIL.Image.fromarray(a).save(f, fmt)
    display(Image(data=f.getvalue()))

In [20]:
with open("/data2/zhongkai/VIP/real_world_dataset/random_split/train/image_map.json", 'r') as f:
    maps = json.load(f)
PATH = "/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/"

In [21]:
from detectron2.modeling.postprocessing import detector_postprocess
from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers, FastRCNNOutputs, fast_rcnn_inference_single_image

def doit(raw_image, NUM_OBJECTS):
    with torch.no_grad():
        raw_height, raw_width = raw_image.shape[:2]
        print("Original image size: ", (raw_height, raw_width))
        
        # Preprocessing
        image = predictor.transform_gen.get_transform(raw_image).apply_image(raw_image)
        print("Transformed image size: ", image.shape[:2])
        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
        inputs = [{"image": image, "height": raw_height, "width": raw_width}]
        images = predictor.model.preprocess_image(inputs)
        
        # Run Backbone Res1-Res4
        features = predictor.model.backbone(images.tensor)
        
        # Generate proposals with RPN
        proposals, _ = predictor.model.proposal_generator(images, features, None)
        proposal = proposals[0]
        print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape)
        
        # Run RoI head for each proposal (RoI Pooling + Res5)
        proposal_boxes = [x.proposal_boxes for x in proposals]
        features = [features[f] for f in predictor.model.roi_heads.in_features]
        box_features = predictor.model.roi_heads._shared_roi_transform(
            features, proposal_boxes
        )
        feature_pooled = box_features.mean(dim=[2, 3])  # pooled to 1x1
        print('Pooled features size:', feature_pooled.shape)
        
        # Predict classes and boxes for each proposal.
        pred_class_logits, pred_attr_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(feature_pooled)
        outputs = FastRCNNOutputs(
            predictor.model.roi_heads.box2box_transform,
            pred_class_logits,
            pred_proposal_deltas,
            proposals,
            predictor.model.roi_heads.smooth_l1_beta,
        )
        probs = outputs.predict_probs()[0]
        boxes = outputs.predict_boxes()[0]
        
        attr_prob = pred_attr_logits[..., :-1].softmax(-1)
        max_attr_prob, max_attr_label = attr_prob.max(-1)
        
        # Note: BUTD uses raw RoI predictions,
        #       we use the predicted boxes instead.
        # boxes = proposal_boxes[0].tensor    
        
        # NMS
        for nms_thresh in np.arange(0.5, 1.0, 0.1):
            instances, ids = fast_rcnn_inference_single_image(
                boxes, probs, image.shape[1:], 
                score_thresh=0.2, nms_thresh=nms_thresh, topk_per_image=NUM_OBJECTS
            )
            if len(ids) == NUM_OBJECTS:
                break
                
        instances = detector_postprocess(instances, raw_height, raw_width)
        roi_features = feature_pooled[ids].detach()
        max_attr_prob = max_attr_prob[ids].detach()
        max_attr_label = max_attr_label[ids].detach()
        instances.attr_scores = max_attr_prob
        instances.attr_classes = max_attr_label
        
        print(instances)
        
        return instances, roi_features


In [22]:
# Load VG Classes
data_path = 'data/genome/1600-400-20'

vg_classes = []
with open(os.path.join(data_path, 'objects_vocab.txt')) as f:
    for object in f.readlines():
        vg_classes.append(object.split(',')[0].lower().strip())

MetadataCatalog.get("vg").thing_classes = vg_classes


In [23]:
cfg = get_cfg()
cfg.merge_from_file("../configs/VG-Detection/faster_rcnn_R_101_C4_attr_caffemaxpool.yaml")
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 300
cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.6
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2
cfg.MODEL.DEVICE = 'cuda:1'
# VG Weight
cfg.MODEL.WEIGHTS = "http://nlp.cs.unc.edu/models/faster_rcnn_from_caffe_attr.pkl"
predictor = DefaultPredictor(cfg)

Config '../configs/VG-Detection/faster_rcnn_R_101_C4_attr_caffemaxpool.yaml' has no VERSION. Assuming it to be compatible with latest v2.


Modifications for VG in ResNet Backbone (modeling/backbone/resnet.py):
	Using pad 0 in stem max_pool instead of pad 1.

Modifications for VG in RPN (modeling/proposal_generator/rpn.py):
	Use hidden dim 512 instead fo the same dim as Res4 (1024).

Modifications for VG in RoI heads (modeling/roi_heads/roi_heads.py):
	1. Change the stride of conv1 and shortcut in Res5.Block1 from 2 to 1.
	2. Modifying all conv2 with (padding: 1 --> 2) and (dilation: 1 --> 2).
	For more details, please check 'https://github.com/peteanderson80/bottom-up-attention/blob/master/models/vg/ResNet-101/faster_rcnn_end2end_final/test.prototxt'.

Modifications for VG in RoI heads (modeling/roi_heads/fast_rcnn.py))
	Embedding: 1601 --> 256	Linear: 2304 --> 512	Linear: 512 --> 401



In [24]:
def generate_feature(image, ids, num_objects=30):
    instances, features = doit(image, num_objects)

    boxes = instances.pred_boxes.tensor.cpu().numpy()
    image_height, image_width = instances.image_size
    feature = features.cpu().numpy()

    box_width = boxes[:, 2] - boxes[:, 0]
    box_height = boxes[:, 3] - boxes[:, 1]
    scaled_width = box_width/image_width
    scaled_height = box_height/image_height
    scaled_x = boxes[:, 0]/image_width
    scaled_y = boxes[:, 1] / image_height
    scaled_width = scaled_width[..., np.newaxis]
    scaled_height = scaled_height[..., np.newaxis]
    scaled_x = scaled_x[..., np.newaxis]
    scaled_y = scaled_y[..., np.newaxis]

    spatial_features = np.concatenate((scaled_x, scaled_y, scaled_x+scaled_width, scaled_y+scaled_height, scaled_width, scaled_height), axis=1)

    full_features = np.concatenate((feature, spatial_features), axis=1)
    fea_base64 = base64.b64encode(full_features).decode('utf-8')
    fea_info = {"num_boxes": boxes.shape[0], "features": fea_base64}

    objects = instances.pred_classes.cpu().numpy()
    conf = instances.scores.cpu().numpy()

    labels = []
    for i in range(len(boxes)):
        labels.append({
            "class": vg_classes[objects[i]],
            "rect": list(boxes[i, :]), 
            "conf": conf[i]
        })
    return {'image_id': ids, 'images': fea_info}, {'image_id': ids, 'list':labels}

In [25]:
def generate_lineidx_file(filein, idxout):
    idxout_tmp = idxout + '.tmp'
    with open(filein, 'r') as tsvin, open(idxout_tmp,'w') as tsvout:
        fsize = os.fstat(tsvin.fileno()).st_size
        fpos = 0
        while fpos!=fsize:
            tsvout.write(str(fpos)+"\n")
            tsvin.readline()
            fpos = tsvin.tell()
    os.rename(idxout_tmp, idxout)

def generate_tsv(feature_path="../test/train.feature.tsv", label_path="../test/train.label.tsv"):
    with open(feature_path, 'a') as tsvfile, open(label_path, 'a') as labeltsvfile:
        writer = csv.DictWriter(tsvfile, delimiter='\t', fieldnames=['image_id', 'images'])
        Labelwriter=csv.DictWriter(labeltsvfile, delimiter='\t', fieldnames=['image_id', 'list'])
        for img_path in glob.glob(f"{PATH}/*.jpg"):
            print(img_path)
            im = cv2.imread(img_path)
            im_rgb = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            index = img_path.split('/')[-1].split('.')[0]
            print(index)
            rst = generate_feature(im_rgb, ids=int(index))

            writer.writerow(rst[0])
            Labelwriter.writerow(rst[1])
    generate_lineidx_file(feature_path, feature_path.replace("tsv", "lineidx"))
    generate_lineidx_file(label_path, label_path.replace("tsv", "lineidx"))



In [26]:
!rm ../test/train*.tsv

rm: cannot remove '../test/train*.tsv': No such file or directory


In [27]:
generate_tsv()

/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1177.jpg
1177
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([177, 4])
Pooled features size: torch.Size([177, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/193.jpg
193
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([134, 4])
Pooled features size: torch.Size([134, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1678.jpg
1678
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([143, 4])
Pooled features size: torch.Size([143, 2048])
Instances(num_i

815
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([150, 4])
Pooled features size: torch.Size([150, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/212.jpg
212
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([143, 4])
Pooled features size: torch.Size([143, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/835.jpg
835
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([132, 4])
Pooled features size: torch.Size([132, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores,

316
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([144, 4])
Pooled features size: torch.Size([144, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1248.jpg
1248
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([161, 4])
Pooled features size: torch.Size([161, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1209.jpg
1209
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([124, 4])
Pooled features size: torch.Size([124, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, sco

331
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([143, 4])
Pooled features size: torch.Size([143, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1657.jpg
1657
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([165, 4])
Pooled features size: torch.Size([165, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1210.jpg
1210
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([132, 4])
Pooled features size: torch.Size([132, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, sco

1099
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([155, 4])
Pooled features size: torch.Size([155, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1494.jpg
1494
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([117, 4])
Pooled features size: torch.Size([117, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1732.jpg
1732
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([137, 4])
Pooled features size: torch.Size([137, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, sc

1444
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([123, 4])
Pooled features size: torch.Size([123, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1262.jpg
1262
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([132, 4])
Pooled features size: torch.Size([132, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1043.jpg
1043
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([129, 4])
Pooled features size: torch.Size([129, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, sc

1913
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([124, 4])
Pooled features size: torch.Size([124, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1410.jpg
1410
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([117, 4])
Pooled features size: torch.Size([117, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1253.jpg
1253
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([125, 4])
Pooled features size: torch.Size([125, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, sc

1065
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([132, 4])
Pooled features size: torch.Size([132, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1058.jpg
1058
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([136, 4])
Pooled features size: torch.Size([136, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/734.jpg
734
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([142, 4])
Pooled features size: torch.Size([142, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scor

1091
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([154, 4])
Pooled features size: torch.Size([154, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/197.jpg
197
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([129, 4])
Pooled features size: torch.Size([129, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/444.jpg
444
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([126, 4])
Pooled features size: torch.Size([126, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores

1037
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([134, 4])
Pooled features size: torch.Size([134, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1148.jpg
1148
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([137, 4])
Pooled features size: torch.Size([137, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1128.jpg
1128
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([133, 4])
Pooled features size: torch.Size([133, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, sc

260
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([153, 4])
Pooled features size: torch.Size([153, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1607.jpg
1607
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([143, 4])
Pooled features size: torch.Size([143, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/504.jpg
504
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([131, 4])
Pooled features size: torch.Size([131, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, score

1351
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([180, 4])
Pooled features size: torch.Size([180, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/522.jpg
522
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([142, 4])
Pooled features size: torch.Size([142, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/36.jpg
36
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([124, 4])
Pooled features size: torch.Size([124, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, 

1175
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([144, 4])
Pooled features size: torch.Size([144, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1868.jpg
1868
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([125, 4])
Pooled features size: torch.Size([125, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1514.jpg
1514
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([158, 4])
Pooled features size: torch.Size([158, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, sc

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/18.jpg
18
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([144, 4])
Pooled features size: torch.Size([144, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1793.jpg
1793
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([143, 4])
Pooled features size: torch.Size([143, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1285.jpg
1285
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1360.jpg
1360
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([129, 4])
Pooled features size: torch.Size([129, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1842.jpg
1842
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([154, 4])
Pooled features size: torch.Size([154, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1633.jpg
1633
Original image size:  (2160, 3840)
Tra

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1032.jpg
1032
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([162, 4])
Pooled features size: torch.Size([162, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1593.jpg
1593
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([140, 4])
Pooled features size: torch.Size([140, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/358.jpg
358
Original image size:  (2160, 3840)
Trans

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/569.jpg
569
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([144, 4])
Pooled features size: torch.Size([144, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1845.jpg
1845
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([131, 4])
Pooled features size: torch.Size([131, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1254.jpg
1254
Original image size:  (2160, 3840)
Trans

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1851.jpg
1851
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([123, 4])
Pooled features size: torch.Size([123, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/648.jpg
648
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([132, 4])
Pooled features size: torch.Size([132, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1577.jpg
1577
Original image size:  (2160, 3840)
Trans

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/123.jpg
123
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([131, 4])
Pooled features size: torch.Size([131, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1006.jpg
1006
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([132, 4])
Pooled features size: torch.Size([132, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/97.jpg
97
Original image size:  (2160, 3840)
Transform

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/684.jpg
684
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([139, 4])
Pooled features size: torch.Size([139, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1081.jpg
1081
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([161, 4])
Pooled features size: torch.Size([161, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1468.jpg
1468
Original image size:  (2160, 3840)
Trans

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1707.jpg
1707
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([160, 4])
Pooled features size: torch.Size([160, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1735.jpg
1735
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([124, 4])
Pooled features size: torch.Size([124, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/171.jpg
171
Original image size:  (2160, 3840)
Trans

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/355.jpg
355
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([141, 4])
Pooled features size: torch.Size([141, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/943.jpg
943
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([130, 4])
Pooled features size: torch.Size([130, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1251.jpg
1251
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1864.jpg
1864
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([141, 4])
Pooled features size: torch.Size([141, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/673.jpg
673
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([125, 4])
Pooled features size: torch.Size([125, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/579.jpg
579
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/715.jpg
715
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([123, 4])
Pooled features size: torch.Size([123, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/162.jpg
162
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([141, 4])
Pooled features size: torch.Size([141, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/176.jpg
176
Original image size:  (2160, 3840)
Transform

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/973.jpg
973
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([128, 4])
Pooled features size: torch.Size([128, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/495.jpg
495
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([130, 4])
Pooled features size: torch.Size([130, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/530.jpg
530
Original image size:  (2160, 3840)
Transform

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1445.jpg
1445
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([147, 4])
Pooled features size: torch.Size([147, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/616.jpg
616
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([137, 4])
Pooled features size: torch.Size([137, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/151.jpg
151
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1788.jpg
1788
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([127, 4])
Pooled features size: torch.Size([127, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1888.jpg
1888
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([123, 4])
Pooled features size: torch.Size([123, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1450.jpg
1450
Original image size:  (2160, 3840)
Tra

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/229.jpg
229
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([131, 4])
Pooled features size: torch.Size([131, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/423.jpg
423
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([166, 4])
Pooled features size: torch.Size([166, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1614.jpg
1614
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/393.jpg
393
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([142, 4])
Pooled features size: torch.Size([142, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/905.jpg
905
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([140, 4])
Pooled features size: torch.Size([140, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/383.jpg
383
Original image size:  (2160, 3840)
Transform

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1662.jpg
1662
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([123, 4])
Pooled features size: torch.Size([123, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1742.jpg
1742
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([122, 4])
Pooled features size: torch.Size([122, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1219.jpg
1219
Original image size:  (2160, 3840)
Tra

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/593.jpg
593
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([144, 4])
Pooled features size: torch.Size([144, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1755.jpg
1755
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([127, 4])
Pooled features size: torch.Size([127, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1223.jpg
1223
Original image size:  (2160, 3840)
Trans

990
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([136, 4])
Pooled features size: torch.Size([136, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1372.jpg
1372
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([142, 4])
Pooled features size: torch.Size([142, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1402.jpg
1402
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([147, 4])
Pooled features size: torch.Size([147, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, sco

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/244.jpg
244
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([129, 4])
Pooled features size: torch.Size([129, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/249.jpg
249
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([118, 4])
Pooled features size: torch.Size([118, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/618.jpg
618
Original image size:  (2160, 3840)
Transform

208
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([137, 4])
Pooled features size: torch.Size([137, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1585.jpg
1585
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([130, 4])
Pooled features size: torch.Size([130, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/233.jpg
233
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([134, 4])
Pooled features size: torch.Size([134, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, score

114
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([153, 4])
Pooled features size: torch.Size([153, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1573.jpg
1573
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([131, 4])
Pooled features size: torch.Size([131, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/987.jpg
987
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([118, 4])
Pooled features size: torch.Size([118, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, score

1062
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([128, 4])
Pooled features size: torch.Size([128, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1870.jpg
1870
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([159, 4])
Pooled features size: torch.Size([159, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1243.jpg
1243
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([142, 4])
Pooled features size: torch.Size([142, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, sc

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1077.jpg
1077
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([134, 4])
Pooled features size: torch.Size([134, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1019.jpg
1019
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([148, 4])
Pooled features size: torch.Size([148, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/794.jpg
794
Original image size:  (2160, 3840)
Trans

416
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([131, 4])
Pooled features size: torch.Size([131, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1475.jpg
1475
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([138, 4])
Pooled features size: torch.Size([138, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1311.jpg
1311
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([157, 4])
Pooled features size: torch.Size([157, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, sco

1021
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([133, 4])
Pooled features size: torch.Size([133, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/374.jpg
374
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([130, 4])
Pooled features size: torch.Size([130, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/798.jpg
798
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([159, 4])
Pooled features size: torch.Size([159, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores

623
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([129, 4])
Pooled features size: torch.Size([129, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/327.jpg
327
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([133, 4])
Pooled features size: torch.Size([133, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/822.jpg
822
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([121, 4])
Pooled features size: torch.Size([121, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores,

1305
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([167, 4])
Pooled features size: torch.Size([167, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/34.jpg
34
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([134, 4])
Pooled features size: torch.Size([134, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1380.jpg
1380
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([129, 4])
Pooled features size: torch.Size([129, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores

1224
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([127, 4])
Pooled features size: torch.Size([127, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/427.jpg
427
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([139, 4])
Pooled features size: torch.Size([139, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1240.jpg
1240
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([128, 4])
Pooled features size: torch.Size([128, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scor

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/668.jpg
668
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([128, 4])
Pooled features size: torch.Size([128, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/407.jpg
407
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([121, 4])
Pooled features size: torch.Size([121, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/247.jpg
247
Original image size:  (2160, 3840)
Transform

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/843.jpg
843
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([135, 4])
Pooled features size: torch.Size([135, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/862.jpg
862
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([127, 4])
Pooled features size: torch.Size([127, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1427.jpg
1427
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1594.jpg
1594
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([164, 4])
Pooled features size: torch.Size([164, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1228.jpg
1228
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([149, 4])
Pooled features size: torch.Size([149, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/53.jpg
53
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/596.jpg
596
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([143, 4])
Pooled features size: torch.Size([143, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/784.jpg
784
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([128, 4])
Pooled features size: torch.Size([128, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/108.jpg
108
Original image size:  (2160, 3840)
Transform

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1791.jpg
1791
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([131, 4])
Pooled features size: torch.Size([131, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/367.jpg
367
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([144, 4])
Pooled features size: torch.Size([144, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1245.jpg
1245
Original image size:  (2160, 3840)
Trans

Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([134, 4])
Pooled features size: torch.Size([134, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/84.jpg
84
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([112, 4])
Pooled features size: torch.Size([112, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/348.jpg
348
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([131, 4])
Pooled features size: torch.Size([131, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes]

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/485.jpg
485
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([128, 4])
Pooled features size: torch.Size([128, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1165.jpg
1165
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([125, 4])
Pooled features size: torch.Size([125, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1798.jpg
1798
Original image size:  (2160, 3840)
Trans

1354
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([147, 4])
Pooled features size: torch.Size([147, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/693.jpg
693
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([127, 4])
Pooled features size: torch.Size([127, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/966.jpg
966
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([136, 4])
Pooled features size: torch.Size([136, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores

626
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([150, 4])
Pooled features size: torch.Size([150, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1652.jpg
1652
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([117, 4])
Pooled features size: torch.Size([117, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/181.jpg
181
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([131, 4])
Pooled features size: torch.Size([131, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, score

Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([130, 4])
Pooled features size: torch.Size([130, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/498.jpg
498
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([147, 4])
Pooled features size: torch.Size([147, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/723.jpg
723
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([137, 4])
Pooled features size: torch.Size([137, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classe

666
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([142, 4])
Pooled features size: torch.Size([142, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/294.jpg
294
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([123, 4])
Pooled features size: torch.Size([123, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/516.jpg
516
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([127, 4])
Pooled features size: torch.Size([127, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores,

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1044.jpg
1044
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([125, 4])
Pooled features size: torch.Size([125, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1517.jpg
1517
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([113, 4])
Pooled features size: torch.Size([113, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/717.jpg
717
Original image size:  (2160, 3840)
Trans

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/398.jpg
398
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([134, 4])
Pooled features size: torch.Size([134, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/625.jpg
625
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([169, 4])
Pooled features size: torch.Size([169, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1017.jpg
1017
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/191.jpg
191
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([133, 4])
Pooled features size: torch.Size([133, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/524.jpg
524
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([150, 4])
Pooled features size: torch.Size([150, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/442.jpg
442
Original image size:  (2160, 3840)
Transform

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1206.jpg
1206
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([134, 4])
Pooled features size: torch.Size([134, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1807.jpg
1807
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([127, 4])
Pooled features size: torch.Size([127, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1302.jpg
1302
Original image size:  (2160, 3840)
Tra

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/515.jpg
515
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([126, 4])
Pooled features size: torch.Size([126, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/76.jpg
76
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([163, 4])
Pooled features size: torch.Size([163, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/814.jpg
814
Original image size:  (2160, 3840)
Transformed

315
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([162, 4])
Pooled features size: torch.Size([162, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1399.jpg
1399
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([128, 4])
Pooled features size: torch.Size([128, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1049.jpg
1049
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([124, 4])
Pooled features size: torch.Size([124, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, sco

600
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([126, 4])
Pooled features size: torch.Size([126, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1105.jpg
1105
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([135, 4])
Pooled features size: torch.Size([135, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1205.jpg
1205
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([139, 4])
Pooled features size: torch.Size([139, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, sco

567
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([141, 4])
Pooled features size: torch.Size([141, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1465.jpg
1465
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([168, 4])
Pooled features size: torch.Size([168, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/883.jpg
883
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([134, 4])
Pooled features size: torch.Size([134, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, score

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1878.jpg
1878
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([144, 4])
Pooled features size: torch.Size([144, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1625.jpg
1625
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([145, 4])
Pooled features size: torch.Size([145, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/432.jpg
432
Original image size:  (2160, 3840)
Trans

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/761.jpg
761
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([138, 4])
Pooled features size: torch.Size([138, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1310.jpg
1310
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([178, 4])
Pooled features size: torch.Size([178, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/679.jpg
679
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1070.jpg
1070
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([138, 4])
Pooled features size: torch.Size([138, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/33.jpg
33
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([153, 4])
Pooled features size: torch.Size([153, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1028.jpg
1028
Original image size:  (2160, 3840)
Transfo

264
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([109, 4])
Pooled features size: torch.Size([109, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/339.jpg
339
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([131, 4])
Pooled features size: torch.Size([131, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/856.jpg
856
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([146, 4])
Pooled features size: torch.Size([146, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores,

466
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([136, 4])
Pooled features size: torch.Size([136, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/692.jpg
692
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([132, 4])
Pooled features size: torch.Size([132, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1296.jpg
1296
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([137, 4])
Pooled features size: torch.Size([137, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, score

Proposal Boxes size: torch.Size([159, 4])
Pooled features size: torch.Size([159, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1317.jpg
1317
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([112, 4])
Pooled features size: torch.Size([112, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/329.jpg
329
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([169, 4])
Pooled features size: torch.Size([169, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1646.jpg
1646
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([130, 4])
Pooled features size: torch.Size([130, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/697.jpg
697
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([142, 4])
Pooled features size: torch.Size([142, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/793.jpg
793
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1389.jpg
1389
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([134, 4])
Pooled features size: torch.Size([134, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/165.jpg
165
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([121, 4])
Pooled features size: torch.Size([121, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/768.jpg
768
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1047.jpg
1047
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([117, 4])
Pooled features size: torch.Size([117, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/359.jpg
359
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([150, 4])
Pooled features size: torch.Size([150, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/289.jpg
289
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/146.jpg
146
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([119, 4])
Pooled features size: torch.Size([119, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1318.jpg
1318
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([147, 4])
Pooled features size: torch.Size([147, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1770.jpg
1770
Original image size:  (2160, 3840)
Trans

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/532.jpg
532
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([141, 4])
Pooled features size: torch.Size([141, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1196.jpg
1196
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([118, 4])
Pooled features size: torch.Size([118, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/669.jpg
669
Original image size:  (2160, 3840)
Transfo

231
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([126, 4])
Pooled features size: torch.Size([126, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/926.jpg
926
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([147, 4])
Pooled features size: torch.Size([147, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/952.jpg
952
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([116, 4])
Pooled features size: torch.Size([116, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores,

780
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([138, 4])
Pooled features size: torch.Size([138, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/39.jpg
39
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([155, 4])
Pooled features size: torch.Size([155, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/434.jpg
434
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([137, 4])
Pooled features size: torch.Size([137, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, p

1597
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([149, 4])
Pooled features size: torch.Size([149, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/190.jpg
190
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([137, 4])
Pooled features size: torch.Size([137, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/312.jpg
312
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([132, 4])
Pooled features size: torch.Size([132, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores

Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([113, 4])
Pooled features size: torch.Size([113, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1495.jpg
1495
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([138, 4])
Pooled features size: torch.Size([138, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/944.jpg
944
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([154, 4])
Pooled features size: torch.Size([154, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_clas

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/21.jpg
21
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([124, 4])
Pooled features size: torch.Size([124, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1255.jpg
1255
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([142, 4])
Pooled features size: torch.Size([142, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/509.jpg
509
Original image size:  (2160, 3840)
Transform

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1045.jpg
1045
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([133, 4])
Pooled features size: torch.Size([133, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1592.jpg
1592
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([150, 4])
Pooled features size: torch.Size([150, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/275.jpg
275
Original image size:  (2160, 3840)
Trans

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/961.jpg
961
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([146, 4])
Pooled features size: torch.Size([146, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/709.jpg
709
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([132, 4])
Pooled features size: torch.Size([132, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1875.jpg
1875
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1894.jpg
1894
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([148, 4])
Pooled features size: torch.Size([148, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1691.jpg
1691
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([129, 4])
Pooled features size: torch.Size([129, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/868.jpg
868
Original image size:  (2160, 3840)
Trans

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1166.jpg
1166
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([139, 4])
Pooled features size: torch.Size([139, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/286.jpg
286
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([136, 4])
Pooled features size: torch.Size([136, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/914.jpg
914
Original image size:  (2160, 3840)
Transfo

930
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([142, 4])
Pooled features size: torch.Size([142, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/209.jpg
209
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([129, 4])
Pooled features size: torch.Size([129, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1114.jpg
1114
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([148, 4])
Pooled features size: torch.Size([148, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, score

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/792.jpg
792
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([129, 4])
Pooled features size: torch.Size([129, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/63.jpg
63
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([145, 4])
Pooled features size: torch.Size([145, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1876.jpg
1876
Original image size:  (2160, 3840)
Transform

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1186.jpg
1186
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([123, 4])
Pooled features size: torch.Size([123, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/32.jpg
32
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([140, 4])
Pooled features size: torch.Size([140, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1528.jpg
1528
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/397.jpg
397
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([134, 4])
Pooled features size: torch.Size([134, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/898.jpg
898
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([135, 4])
Pooled features size: torch.Size([135, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1422.jpg
1422
Original image size:  (2160, 3840)
Transfo

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1887.jpg
1887
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([144, 4])
Pooled features size: torch.Size([144, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/584.jpg
584
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([137, 4])
Pooled features size: torch.Size([137, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1084.jpg
1084
Original image size:  (2160, 3840)
Trans

Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1005.jpg
1005
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([124, 4])
Pooled features size: torch.Size([124, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/1483.jpg
1483
Original image size:  (2160, 3840)
Transformed image size:  (750, 1333)
Proposal Boxes size: torch.Size([149, 4])
Pooled features size: torch.Size([149, 2048])
Instances(num_instances=30, image_height=2160, image_width=3840, fields=[pred_boxes, scores, pred_classes, attr_scores, attr_classes])
/data2/zhongkai/VIP/real_world_dataset/random_split/train/images/154.jpg
154
Original image size:  (2160, 3840)
Trans