In [1]:
import os

import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
from tqdm import tqdm

import _init_paths
from model.config import cfg
from model.test import im_detect
from model.nms_wrapper import nms
from nets.vgg16 import vgg16
from nets.resnet_v1 import resnetv1
from utils.timer import Timer

%matplotlib inline

In [2]:
NETS = {
    'vgg16': ('vgg16_faster_rcnn_iter_%d.pth',),
    'res101': ('res101_faster_rcnn_iter_%d.pth',),
}

DATASETS= {
    'pascal_voc': ('voc_2007_trainval',),
    'pascal_voc_0712': ('voc_2007_trainval+voc_2012_trainval',),
}

IMAGE_DIRECTORY = 'frames/gangnam'
SAVE_DIRECTORY = 'output/gangnam'

torch.set_num_threads(1)
#IMAGE_DIRECTORY = 'demo'

In [3]:
def vis_detections(im, class_name, dets, thresh=0.5, save_fig=False, fname=None):
    """Draw detected bounding boxes."""
    inds = np.where(dets[:, -1] >= thresh)[0]

    im = im[:, :, (2, 1, 0)]
    fig, ax = plt.subplots(figsize=(12, 8))
    ax.imshow(im, aspect='equal')
    for i in inds:
        bbox = dets[i, :4]
        score = dets[i, -1]

        ax.add_patch(
            plt.Rectangle((bbox[0], bbox[1]),
                          bbox[2] - bbox[0],
                          bbox[3] - bbox[1], fill=False,
                          edgecolor='red', linewidth=3.5)
            )
        ax.text(bbox[0], bbox[1] - 2,
                '{:s} {:.3f}'.format(class_name, score),
                bbox=dict(facecolor='blue', alpha=0.5),
                fontsize=14, color='white')
        
    plt.axis('off')
    plt.tight_layout()
    if save_fig:
        save_file = os.path.join(cfg.DATA_DIR, SAVE_DIRECTORY, fname)
        plt.savefig(save_file)
        plt.close()
    else:
        plt.show()


In [4]:
def detect_person(net, image_name, save_fig=False):
    """Detect object classes in an image using pre-computed object proposals."""

    # Load the demo image
    im_file = os.path.join(cfg.DATA_DIR, IMAGE_DIRECTORY, image_name)
    im = cv2.imread(im_file)

    # Detect all object classes and regress object bounds
    timer = Timer()
    timer.tic()
    scores, boxes = im_detect(net, im)
    timer.toc()
    print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time(), boxes.shape[0]))

    # Visualize detections for each class
    conf_thresh = 0.8
    nms_thresh = 0.3
    
    cls_ind = 15
    cls = 'Person'
    cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)]
    cls_scores = scores[:, cls_ind]
    dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
    keep = nms(torch.from_numpy(dets), nms_thresh)
    dets = dets[keep.numpy(), :]
    vis_detections(im, cls, dets, thresh=conf_thresh, save_fig=save_fig, fname=image_name)
    return scores, boxes

In [5]:
demonet = 'res101' # Network to use [vgg16 res101]
dataset = 'pascal_voc_0712' # Trained dataset [pascal_voc pascal_voc_0712]

cfg.TEST.HAS_RPN = True  # Use RPN for proposals
cfg.TEST.RPN_POST_NMS_TOP_N = 300 # Paper uses 2000 region proposals

# model path
saved_model = os.path.join('../output', demonet, DATASETS[dataset][0], 'default',
                          NETS[demonet][0] %(70000 if dataset == 'pascal_voc' else 110000))


# load network
net = resnetv1(num_layers=101)
net.create_architecture(21, tag='default', anchor_scales=[8, 16, 32])

net.load_state_dict(torch.load(saved_model, map_location=lambda storage, loc: storage))

net.eval()
net._device = 'cpu'
net.to(net._device)

print('Loaded network {:s}'.format(saved_model))


Loaded network ../output/res101/voc_2007_trainval+voc_2012_trainval/default/res101_faster_rcnn_iter_110000.pth


In [None]:
frame_skip = 12

im_names = sorted(os.listdir(os.path.join(cfg.DATA_DIR, IMAGE_DIRECTORY)))[2::frame_skip]
#im_names = ['004545.jpg']

for im_name in im_names:
    print(im_name)
    scores, boxes = detect_person(net, im_name, save_fig=True)

frame_000003.jpg
Detection took 17.181s for 300 object proposals
frame_000015.jpg
Detection took 24.079s for 300 object proposals
frame_000027.jpg
Detection took 31.168s for 300 object proposals
frame_000039.jpg
Detection took 30.421s for 300 object proposals
frame_000051.jpg
Detection took 32.202s for 300 object proposals
frame_000063.jpg
Detection took 29.580s for 283 object proposals
frame_000075.jpg
Detection took 26.138s for 225 object proposals
frame_000087.jpg
Detection took 24.052s for 212 object proposals
frame_000099.jpg
Detection took 24.342s for 205 object proposals
frame_000111.jpg
Detection took 29.086s for 267 object proposals
frame_000123.jpg
Detection took 29.529s for 268 object proposals
frame_000135.jpg
Detection took 17.584s for 120 object proposals
frame_000147.jpg
Detection took 21.986s for 165 object proposals
frame_000159.jpg
Detection took 19.727s for 142 object proposals
frame_000171.jpg
Detection took 17.183s for 114 object proposals
frame_000183.jpg
Detectio

Detection took 31.504s for 300 object proposals
frame_001527.jpg
Detection took 30.821s for 300 object proposals
frame_001539.jpg
Detection took 31.368s for 300 object proposals
frame_001551.jpg
Detection took 30.715s for 300 object proposals
frame_001563.jpg
Detection took 30.364s for 300 object proposals
frame_001575.jpg
Detection took 30.883s for 300 object proposals
frame_001587.jpg
Detection took 30.760s for 300 object proposals
frame_001599.jpg
Detection took 31.351s for 300 object proposals
frame_001611.jpg
Detection took 31.229s for 300 object proposals
frame_001623.jpg
Detection took 33.897s for 300 object proposals
frame_001635.jpg
Detection took 33.748s for 300 object proposals
frame_001647.jpg
Detection took 37.361s for 300 object proposals
frame_001659.jpg
Detection took 36.649s for 300 object proposals
frame_001671.jpg
Detection took 32.143s for 300 object proposals
frame_001683.jpg
Detection took 29.425s for 264 object proposals
frame_001695.jpg
Detection took 23.769s fo