In [1]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import cv2
from matplotlib import pyplot as plt
import random
import json
import pickle

import time
import os
from tqdm import tqdm

# import some common detectron2 utilities
import torch

from detectron2.modeling.postprocessing import detector_postprocess
from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers, FastRCNNOutputs, fast_rcnn_inference_single_image
from detectron2.structures.boxes import Boxes
from detectron2.structures.instances import Instances

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.modeling import build_model
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog

In [19]:
detectron2.__version__

'0.4'

In [2]:
cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml")
predictor = DefaultPredictor(cfg)

In [4]:
coco_key = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_dataset_id_to_contiguous_id
coco_key = {coco_key[k]: k for k in coco_key}

In [5]:
IMAGE_DIR = "/home/jamesp/data/vcr/vcr1images"
OUTPUT_DIR = "/home/jamesp/data/visualcomet/features-test"
os.makedirs(OUTPUT_DIR, exist_ok=True)
movie_dirs = sorted(os.listdir(IMAGE_DIR))
print(len(movie_dirs))

2336


In [8]:
def doit(raw_image, raw_boxes):
    # Process Boxes
    raw_boxes = Boxes(torch.from_numpy(raw_boxes).cuda())
    
    with torch.no_grad():
        raw_height, raw_width = raw_image.shape[:2]
        
        # Preprocessing
        image = predictor.aug.get_transform(raw_image).apply_image(raw_image) # use aug attribute to get transform
        
        # Scale the box
        new_height, new_width = image.shape[:2]
        scale_x = 1. * new_width / raw_width
        scale_y = 1. * new_height / raw_height
        #print(scale_x, scale_y)
        boxes = raw_boxes.clone()
        boxes.scale(scale_x=scale_x, scale_y=scale_y)
        
        # ----
        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
        inputs = [{"image": image, "height": raw_height, "width": raw_width}]
        images = predictor.model.preprocess_image(inputs)
        
        # Run Backbone Res1-Res4
        features = predictor.model.backbone(images.tensor)
        
        # Run RoI head for each proposal (RoI Pooling + Res5)
        proposal_boxes = [boxes]
        features = [features[f] for f in predictor.model.roi_heads.in_features]
        box_features = predictor.model.roi_heads._shared_roi_transform(
            features, proposal_boxes
        )
        feature_pooled = box_features.mean(dim=[2, 3])  # pooled to 1x1
        return feature_pooled  

In [11]:
for movie in tqdm(movie_dirs[-2:]):
    img_ids = list(set([id[:id.rfind('.')] for id in os.listdir(os.path.join(IMAGE_DIR,movie))]))
    for id in sorted(tqdm(img_ids)):
        im = cv2.imread(os.path.join(IMAGE_DIR,movie,id+'.jpg'))
        metadata = json.load(open(os.path.join(IMAGE_DIR,movie,id+'.json')))
        boxes = np.array(metadata['boxes'])[:,:4]
        h = metadata['height']
        w = metadata['width']
        boxes = np.row_stack((np.array([0,0,w,h]),boxes))
        obj_rep = doit(im, boxes).to("cpu").numpy()
        
        features = {'image_features' : obj_rep[0],
                    'object_features' : obj_rep[1:]}
        output_name = os.path.join(OUTPUT_DIR,id+'.pkl')
        pickle.dump(features, open(output_name,'wb'))

  0%|                                                                                                      | 0/2 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 176974.85it/s][A


/home/jamesp/data/visualcomet/features-test/-qq785V7JOU@1.pkl
/home/jamesp/data/visualcomet/features-test/4zJ3a0K2DP0@10.pkl
/home/jamesp/data/visualcomet/features-test/89hYiDNscBE@18.pkl
/home/jamesp/data/visualcomet/features-test/89hYiDNscBE@6.pkl
/home/jamesp/data/visualcomet/features-test/HdU-6bKqhzk@23.pkl
/home/jamesp/data/visualcomet/features-test/W1fkINKMwHA@2.pkl
/home/jamesp/data/visualcomet/features-test/Wt5LAZa7LAU@15.pkl
/home/jamesp/data/visualcomet/features-test/o3f521sUTaE@2.pkl
/home/jamesp/data/visualcomet/features-test/o3f521sUTaE@25.pkl
/home/jamesp/data/visualcomet/features-test/o3f521sUTaE@27.pkl
/home/jamesp/data/visualcomet/features-test/o3f521sUTaE@33.pkl
/home/jamesp/data/visualcomet/features-test/o3f521sUTaE@37.pkl
/home/jamesp/data/visualcomet/features-test/qfym2Neaz4c@21.pkl
/home/jamesp/data/visualcomet/features-test/ssM67LXOwQw@11.pkl
/home/jamesp/data/visualcomet/features-test/ssM67LXOwQw@18.pkl
/home/jamesp/data/visualcomet/features-test/ssM67LXOwQw@22.

  0%|                                                                                                      | 0/2 [00:01<?, ?it/s]

/home/jamesp/data/visualcomet/features-test/ssM67LXOwQw@33.pkl
/home/jamesp/data/visualcomet/features-test/ssM67LXOwQw@35.pkl



