Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1939479
commit 05bb3d5
Showing
100 changed files
with
8,213 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
function H = fhog( I, binSize, nOrients, clip, crop ) | ||
% Efficiently compute Felzenszwalb's HOG (FHOG) features. | ||
% | ||
% A fast implementation of the HOG variant used by Felzenszwalb et al. | ||
% in their work on discriminatively trained deformable part models. | ||
% http://www.cs.berkeley.edu/~rbg/latent/index.html | ||
% Gives nearly identical results to features.cc in code release version 5 | ||
% but runs 4x faster (over 125 fps on VGA color images). | ||
% | ||
% The computed HOG features are 3*nOrients+5 dimensional. There are | ||
% 2*nOrients contrast sensitive orientation channels, nOrients contrast | ||
% insensitive orientation channels, 4 texture channels and 1 all zeros | ||
% channel (used as a 'truncation' feature). Using the standard value of | ||
% nOrients=9 gives a 32 dimensional feature vector at each cell. This | ||
% variant of HOG, refered to as FHOG, has been shown to achieve superior | ||
% performance to the original HOG features. For details please refer to | ||
% work by Felzenszwalb et al. (see link above). | ||
% | ||
% This function is essentially a wrapper for calls to gradientMag() | ||
% and gradientHist(). Specifically, it is equivalent to the following: | ||
% [M,O] = gradientMag( I,0,0,0,1 ); softBin = -1; useHog = 2; | ||
% H = gradientHist(M,O,binSize,nOrients,softBin,useHog,clip); | ||
% See gradientHist() for more general usage. | ||
% | ||
% This code requires SSE2 to compile and run (most modern Intel and AMD | ||
% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2. | ||
% | ||
% USAGE | ||
% H = fhog( I, [binSize], [nOrients], [clip], [crop] ) | ||
% | ||
% INPUTS | ||
% I - [hxw] color or grayscale input image (must have type single) | ||
% binSize - [8] spatial bin size | ||
% nOrients - [9] number of orientation bins | ||
% clip - [.2] value at which to clip histogram bins | ||
% crop - [0] if true crop boundaries | ||
% | ||
% OUTPUTS | ||
% H - [h/binSize w/binSize nOrients*3+5] computed hog features | ||
% | ||
% EXAMPLE | ||
% I=imResample(single(imread('peppers.png'))/255,[480 640]); | ||
% tic, for i=1:100, H=fhog(I,8,9); end; disp(100/toc) % >125 fps | ||
% figure(1); im(I); V=hogDraw(H,25,1); figure(2); im(V) | ||
% | ||
% EXAMPLE | ||
% % comparison to features.cc (requires DPM code release version 5) | ||
% I=imResample(single(imread('peppers.png'))/255,[480 640]); Id=double(I); | ||
% tic, for i=1:100, H1=features(Id,8); end; disp(100/toc) | ||
% tic, for i=1:100, H2=fhog(I,8,9,.2,1); end; disp(100/toc) | ||
% figure(1); montage2(H1); figure(2); montage2(H2); | ||
% D=abs(H1-H2); mean(D(:)) | ||
% | ||
% See also hog, hogDraw, gradientHist | ||
% | ||
% Piotr's Image&Video Toolbox Version 3.23 | ||
% Copyright 2013 Piotr Dollar. [pdollar-at-caltech.edu] | ||
% Please email me if you find bugs, or have suggestions or questions! | ||
% Licensed under the Simplified BSD License [see external/bsd.txt] | ||
|
||
%Note: modified to be more self-contained | ||
|
||
if( nargin<2 ), binSize=8; end | ||
if( nargin<3 ), nOrients=9; end | ||
if( nargin<4 ), clip=.2; end | ||
if( nargin<5 ), crop=0; end | ||
|
||
softBin = -1; useHog = 2; b = binSize; | ||
|
||
[M,O]=gradientMex('gradientMag',I,0,1); | ||
|
||
H = gradientMex('gradientHist',M,O,binSize,nOrients,softBin,useHog,clip); | ||
|
||
if( crop ), e=mod(size(I),b)<b/2; H=H(2:end-e(1),2:end-e(2),:); end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
function kf = gaussian_correlation(xf, yf, sigma) | ||
%GAUSSIAN_CORRELATION Gaussian Kernel at all shifts, i.e. kernel correlation. | ||
% Evaluates a Gaussian kernel with bandwidth SIGMA for all relative | ||
% shifts between input images X and Y, which must both be MxN. They must | ||
% also be periodic (ie., pre-processed with a cosine window). The result | ||
% is an MxN map of responses. | ||
% | ||
% Inputs and output are all in the Fourier domain. | ||
% | ||
% Joao F. Henriques, 2014 | ||
% http://www.isr.uc.pt/~henriques/ | ||
|
||
N = size(xf,1) * size(xf,2); | ||
xx = xf(:)' * xf(:) / N; %squared norm of x | ||
yy = yf(:)' * yf(:) / N; %squared norm of y | ||
|
||
%cross-correlation term in Fourier domain | ||
xyf = xf .* conj(yf); | ||
xy = sum(real(ifft2(xyf)), 3); %to spatial domain | ||
|
||
%calculate gaussian response for all positions, then go back to the | ||
%Fourier domain | ||
kf = fft2(exp(-1 / sigma^2 * max(0, (xx + yy - 2 * xy) / numel(xf)))); | ||
|
||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
function labels = gaussian_shaped_labels(sigma, sz) | ||
%GAUSSIAN_SHAPED_LABELS | ||
% Gaussian-shaped labels for all shifts of a sample. | ||
% | ||
% LABELS = GAUSSIAN_SHAPED_LABELS(SIGMA, SZ) | ||
% Creates an array of labels (regression targets) for all shifts of a | ||
% sample of dimensions SZ. The output will have size SZ, representing | ||
% one label for each possible shift. The labels will be Gaussian-shaped, | ||
% with the peak at 0-shift (top-left element of the array), decaying | ||
% as the distance increases, and wrapping around at the borders. | ||
% The Gaussian function has spatial bandwidth SIGMA. | ||
% | ||
% Joao F. Henriques, 2014 | ||
% http://www.isr.uc.pt/~henriques/ | ||
|
||
|
||
% %as a simple example, the limit sigma = 0 would be a Dirac delta, | ||
% %instead of a Gaussian: | ||
% labels = zeros(sz(1:2)); %labels for all shifted samples | ||
% labels(1,1) = magnitude; %label for 0-shift (original sample) | ||
|
||
|
||
%evaluate a Gaussian with the peak at the center element | ||
[rs, cs] = ndgrid((1:sz(1)) - floor(sz(1)/2), (1:sz(2)) - floor(sz(2)/2)); | ||
labels = exp(-0.5 / sigma^2 * (rs.^2 + cs.^2)); | ||
|
||
%move the peak to the top-left, with wrap-around | ||
labels = circshift(labels, -floor(sz(1:2) / 2) + 1); | ||
|
||
%sanity check: make sure it's really at top-left | ||
assert(labels(1,1) == 1) | ||
|
||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
function x = get_features(im, features, cell_size, cos_window) | ||
%GET_FEATURES | ||
% Extracts dense features from image. | ||
% | ||
% X = GET_FEATURES(IM, FEATURES, CELL_SIZE) | ||
% Extracts features specified in struct FEATURES, from image IM. The | ||
% features should be densely sampled, in cells or intervals of CELL_SIZE. | ||
% The output has size [height in cells, width in cells, features]. | ||
% | ||
% To specify HOG features, set field 'hog' to true, and | ||
% 'hog_orientations' to the number of bins. | ||
% | ||
% To experiment with other features simply add them to this function | ||
% and include any needed parameters in the FEATURES struct. To allow | ||
% combinations of features, stack them with x = cat(3, x, new_feat). | ||
% | ||
% Joao F. Henriques, 2014 | ||
% http://www.isr.uc.pt/~henriques/ | ||
|
||
|
||
if features.hog, | ||
%HOG features, from Piotr's Toolbox | ||
x = double(fhog(single(im) / 255, cell_size, features.hog_orientations)); | ||
x(:,:,end) = []; %remove all-zeros channel ("truncation feature") | ||
end | ||
|
||
if features.gray, | ||
%gray-level (scalar feature) | ||
x = double(im) / 255; | ||
|
||
x = x - mean(x(:)); | ||
end | ||
|
||
%process with cosine window if needed | ||
if ~isempty(cos_window), | ||
x = bsxfun(@times, x, cos_window); | ||
end | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
function out = get_subwindow(im, pos, sz) | ||
%GET_SUBWINDOW Obtain sub-window from image, with replication-padding. | ||
% Returns sub-window of image IM centered at POS ([y, x] coordinates), | ||
% with size SZ ([height, width]). If any pixels are outside of the image, | ||
% they will replicate the values at the borders. | ||
% | ||
% Joao F. Henriques, 2014 | ||
% http://www.isr.uc.pt/~henriques/ | ||
|
||
if isscalar(sz), %square sub-window | ||
sz = [sz, sz]; | ||
end | ||
|
||
xs = floor(pos(2)) + (1:sz(2)) - floor(sz(2)/2); | ||
ys = floor(pos(1)) + (1:sz(1)) - floor(sz(1)/2); | ||
|
||
%check for out-of-bounds coordinates, and set them to the values at | ||
%the borders | ||
xs(xs < 1) = 1; | ||
ys(ys < 1) = 1; | ||
xs(xs > size(im,2)) = size(im,2); | ||
ys(ys > size(im,1)) = size(im,1); | ||
|
||
%extract image | ||
out = im(ys, xs, :); | ||
|
||
end | ||
|
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
function kf = linear_correlation(xf, yf) | ||
%LINEAR_CORRELATION Linear Kernel at all shifts, i.e. correlation. | ||
% Computes the dot-product for all relative shifts between input images | ||
% X and Y, which must both be MxN. They must also be periodic (ie., | ||
% pre-processed with a cosine window). The result is an MxN map of | ||
% responses. | ||
% | ||
% Inputs and output are all in the Fourier domain. | ||
% | ||
% Joao F. Henriques, 2014 | ||
% http://www.isr.uc.pt/~henriques/ | ||
|
||
%cross-correlation term in Fourier domain | ||
kf = sum(xf .* conj(yf), 3) / numel(xf); | ||
|
||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
function [img_files, pos, target_sz, ground_truth, video_path] = load_video_info(base_path) | ||
%LOAD_VIDEO_INFO | ||
% Loads all the relevant information for the video in the given path: | ||
% the list of image files (cell array of strings), initial position | ||
% (1x2), target size (1x2), the ground truth information for precision | ||
% calculations (Nx2, for N frames), and the path where the images are | ||
% located. The ordering of coordinates and sizes is always [y, x]. | ||
% | ||
% Joao F. Henriques, 2014 | ||
% http://www.isr.uc.pt/~henriques/ | ||
|
||
|
||
%full path to the video's files | ||
if base_path(end) ~= '/' && base_path(end) ~= '\', | ||
base_path(end+1) = '/'; | ||
end | ||
video_path = base_path; | ||
|
||
%try to load ground truth from text file (Benchmark's format) | ||
filename = [video_path 'groundtruth_rect.txt']; | ||
f = fopen(filename); | ||
assert(f ~= -1, ['No initial position or ground truth to load ("' filename '").']) | ||
|
||
%the format is [x, y, width, height] | ||
try | ||
ground_truth = textscan(f, '%f,%f,%f,%f', 'ReturnOnError',false); | ||
catch %#ok, try different format (no commas) | ||
frewind(f); | ||
ground_truth = textscan(f, '%f %f %f %f'); | ||
end | ||
ground_truth = cat(2, ground_truth{:}); | ||
fclose(f); | ||
|
||
%set initial position and size | ||
target_sz = [ground_truth(1,4), ground_truth(1,3)]; | ||
pos = [ground_truth(1,2), ground_truth(1,1)] + floor(target_sz/2); | ||
|
||
if size(ground_truth,1) == 1, | ||
%we have ground truth for the first frame only (initial position) | ||
ground_truth = []; | ||
else | ||
%store positions instead of boxes | ||
ground_truth = ground_truth(:,[2,1]) + ground_truth(:,[4,3]) / 2; | ||
end | ||
|
||
|
||
%from now on, work in the subfolder where all the images are | ||
video_path = [video_path 'img/']; | ||
|
||
%general case, just list all images | ||
img_files = dir([video_path '*.png']); | ||
if isempty(img_files), | ||
img_files = dir([video_path '*.jpg']); | ||
assert(~isempty(img_files), 'No image files to load.') | ||
end | ||
img_files = sort({img_files.name}); | ||
|
||
|
||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
function kf = polynomial_correlation(xf, yf, a, b) | ||
%POLYNOMIAL_CORRELATION Polynomial Kernel at all shifts, i.e. kernel correlation. | ||
% Evaluates a polynomial kernel with constant A and exponent B, for all | ||
% relative shifts between input images XF and YF, which must both be MxN. | ||
% They must also be periodic (ie., pre-processed with a cosine window). | ||
% The result is an MxN map of responses. | ||
% | ||
% Inputs and output are all in the Fourier domain. | ||
% | ||
% Joao F. Henriques, 2014 | ||
% http://www.isr.uc.pt/~henriques/ | ||
|
||
%cross-correlation term in Fourier domain | ||
xyf = xf .* conj(yf); | ||
xy = sum(real(ifft2(xyf)), 3); %to spatial domain | ||
|
||
%calculate polynomial response for all positions, then go back to the | ||
%Fourier domain | ||
kf = fft2((xy / numel(xf) + a) .^ b); | ||
|
||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
function update_visualization_func = show_video(img_files, video_path, resize_image) | ||
%SHOW_VIDEO | ||
% Visualizes a tracker in an interactive figure, given a cell array of | ||
% image file names, their path, and whether to resize the images to | ||
% half size or not. | ||
% | ||
% This function returns an UPDATE_VISUALIZATION function handle, that | ||
% can be called with a frame number and a bounding box [x, y, width, | ||
% height], as soon as the results for a new frame have been calculated. | ||
% This way, your results are shown in real-time, but they are also | ||
% remembered so you can navigate and inspect the video afterwards. | ||
% Press 'Esc' to send a stop signal (returned by UPDATE_VISUALIZATION). | ||
% | ||
% Joao F. Henriques, 2014 | ||
% http://www.isr.uc.pt/~henriques/ | ||
|
||
|
||
%store one instance per frame | ||
num_frames = numel(img_files); | ||
boxes = cell(num_frames,1); | ||
|
||
%create window | ||
[fig_h, axes_h, unused, scroll] = videofig(num_frames, @redraw, [], [], @on_key_press); %#ok, unused outputs | ||
% set(fig_h, 'number','off', 'name', ['Tracker - ' video_path]) | ||
axis off; | ||
|
||
%image and rectangle handles start empty, they are initialized later | ||
im_h = []; | ||
rect_h = []; | ||
|
||
update_visualization_func = @update_visualization; | ||
stop_tracker = false; | ||
|
||
|
||
function stop = update_visualization(frame, box) | ||
%store the tracker instance for one frame, and show it. returns | ||
%true if processing should stop (user pressed 'Esc'). | ||
boxes{frame} = box; | ||
scroll(frame); | ||
stop = stop_tracker; | ||
end | ||
|
||
function redraw(frame) | ||
%render main image | ||
im = imread([video_path img_files{frame}]); | ||
% if size(im,3) > 1, | ||
% im = rgb2gray(im); | ||
% end | ||
if resize_image, | ||
im = imresize(im, 0.5); | ||
end | ||
|
||
if isempty(im_h), %create image | ||
im_h = imshow(im, 'Border','tight', 'InitialMag',200, 'Parent',axes_h); | ||
else %just update it | ||
set(im_h, 'CData', im) | ||
end | ||
|
||
%render target bounding box for this frame | ||
if isempty(rect_h), %create it for the first time | ||
rect_h = rectangle('Position',[0,0,1,1], 'EdgeColor','r', 'LineWidth', 3, 'Parent',axes_h); | ||
end | ||
if ~isempty(boxes{frame}), | ||
set(rect_h, 'Visible', 'on', 'Position', boxes{frame}); | ||
else | ||
set(rect_h, 'Visible', 'off'); | ||
end | ||
end | ||
|
||
function on_key_press(key) | ||
if strcmp(key, 'escape'), %stop on 'Esc' | ||
stop_tracker = true; | ||
end | ||
end | ||
|
||
end | ||
|
Oops, something went wrong.