In [76]:
import os

import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
from tqdm import tqdm

import _init_paths
from model.config import cfg
from model.test import im_detect
from model.nms_wrapper import nms
from nets.vgg16 import vgg16
from nets.resnet_v1 import resnetv1
from utils.timer import Timer

%matplotlib inline

In [86]:
NETS = {
    'vgg16': ('vgg16_faster_rcnn_iter_%d.pth',),
    'res101': ('res101_faster_rcnn_iter_%d.pth',),
}

DATASETS= {
    'pascal_voc': ('voc_2007_trainval',),
    'pascal_voc_0712': ('voc_2007_trainval+voc_2012_trainval',),
}

IMAGE_DIRECTORY = 'frames/gangnam'
SAVE_DIRECTORY = 'output/gangnam'

torch.set_num_threads(1)
#IMAGE_DIRECTORY = 'demo'

In [78]:
def vis_detections(im, class_name, dets, thresh=0.5, save_fig=False, fname=None):
    """Draw detected bounding boxes."""
    inds = np.where(dets[:, -1] >= thresh)[0]

    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 8))
    ax.imshow(im, aspect='equal')
    for i in inds:
        bbox = dets[i, :4]
        score = dets[i, -1]

        ax.add_patch(
            plt.Rectangle((bbox[0], bbox[1]),
                          bbox[2] - bbox[0],
                          bbox[3] - bbox[1], fill=False,
                          edgecolor='red', linewidth=3.5)
            )
        ax.text(bbox[0], bbox[1] - 2,
                '{:s} {:.3f}'.format(class_name, score),
                bbox=dict(facecolor='blue', alpha=0.5),
                fontsize=14, color='white')
        
    plt.axis('off')
    plt.tight_layout()
    if save_fig:
        save_file = os.path.join(cfg.DATA_DIR, SAVE_DIRECTORY, fname)
        plt.savefig(save_file)
        plt.close()
    else:
        plt.show()


In [75]:
def detect_person(net, image_name, save_fig=False):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, IMAGE_DIRECTORY, image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time(), boxes.shape[0]))

    # Visualize detections for each class
    conf_thresh = 0.8
    nms_thresh = 0.3
    
    cls_ind = 15
    cls = 'Person'
    cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
    cls_scores = scores[:, cls_ind]
    dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
    keep = nms(torch.from_numpy(dets), nms_thresh)
    dets = dets[keep.numpy(), :]
    vis_detections(im, cls, dets, thresh=conf_thresh, save_fig=save_fig, fname=image_name)
    return scores, boxes

In [73]:
demonet = 'res101' # Network to use [vgg16 res101]
dataset = 'pascal_voc_0712' # Trained dataset [pascal_voc pascal_voc_0712]

cfg.TEST.HAS_RPN = True  # Use RPN for proposals
cfg.TEST.RPN_POST_NMS_TOP_N = 300 # Paper uses 2000 region proposals

# model path
saved_model = os.path.join('../output', demonet, DATASETS[dataset][0], 'default',
                          NETS[demonet][0] %(70000 if dataset == 'pascal_voc' else 110000))


# load network
net = resnetv1(num_layers=101)
net.create_architecture(21, tag='default', anchor_scales=[8, 16, 32])

net.load_state_dict(torch.load(saved_model, map_location=lambda storage, loc: storage))

net.eval()
net._device = 'cpu'
net.to(net._device)

print('Loaded network {:s}'.format(saved_model))


Loaded network ../output/res101/voc_2007_trainval+voc_2012_trainval/default/res101_faster_rcnn_iter_110000.pth


In [None]:
frame_skip = 12

im_names = sorted(os.listdir(os.path.join(cfg.DATA_DIR, IMAGE_DIRECTORY)))[10::frame_skip]
#im_names = ['004545.jpg']

for im_name in im_names:
    print(im_name)
    scores, boxes = detect_person(net, im_name, save_fig=True)

frame_000011.jpg
Detection took 32.992s for 300 object proposals
frame_000023.jpg
Detection took 31.171s for 300 object proposals
frame_000035.jpg
Detection took 31.781s for 300 object proposals
frame_000047.jpg
Detection took 33.651s for 300 object proposals
frame_000059.jpg
Detection took 31.565s for 292 object proposals
frame_000071.jpg
Detection took 28.069s for 240 object proposals
frame_000083.jpg
Detection took 26.333s for 220 object proposals
frame_000095.jpg
Detection took 25.408s for 209 object proposals
frame_000107.jpg
Detection took 28.726s for 265 object proposals
frame_000119.jpg
Detection took 30.701s for 278 object proposals
frame_000131.jpg
Detection took 31.650s for 300 object proposals
frame_000143.jpg
Detection took 21.559s for 155 object proposals
frame_000155.jpg
Detection took 19.912s for 136 object proposals
frame_000167.jpg
Detection took 17.267s for 104 object proposals
frame_000179.jpg
Detection took 17.888s for 109 object proposals
frame_000191.jpg
Detectio

Detection took 30.296s for 300 object proposals
frame_001535.jpg
Detection took 30.536s for 300 object proposals
frame_001547.jpg
Detection took 30.287s for 300 object proposals
frame_001559.jpg
Detection took 30.964s for 300 object proposals
frame_001571.jpg
Detection took 30.159s for 300 object proposals
frame_001583.jpg
Detection took 30.218s for 300 object proposals
frame_001595.jpg
Detection took 30.161s for 300 object proposals
frame_001607.jpg
Detection took 30.573s for 300 object proposals
frame_001619.jpg
Detection took 32.545s for 300 object proposals
frame_001631.jpg
Detection took 33.363s for 300 object proposals
frame_001643.jpg
Detection took 36.467s for 300 object proposals
frame_001655.jpg
Detection took 36.459s for 300 object proposals
frame_001667.jpg
Detection took 31.634s for 300 object proposals
frame_001679.jpg
Detection took 30.385s for 256 object proposals
frame_001691.jpg
Detection took 22.765s for 208 object proposals
frame_001703.jpg
Detection took 17.084s fo