diff --git a/KCF/fhog.m b/KCF/fhog.m new file mode 100644 index 0000000..608af40 --- /dev/null +++ b/KCF/fhog.m @@ -0,0 +1,76 @@ +function H = fhog( I, binSize, nOrients, clip, crop ) +% Efficiently compute Felzenszwalb's HOG (FHOG) features. +% +% A fast implementation of the HOG variant used by Felzenszwalb et al. +% in their work on discriminatively trained deformable part models. +% http://www.cs.berkeley.edu/~rbg/latent/index.html +% Gives nearly identical results to features.cc in code release version 5 +% but runs 4x faster (over 125 fps on VGA color images). +% +% The computed HOG features are 3*nOrients+5 dimensional. There are +% 2*nOrients contrast sensitive orientation channels, nOrients contrast +% insensitive orientation channels, 4 texture channels and 1 all zeros +% channel (used as a 'truncation' feature). Using the standard value of +% nOrients=9 gives a 32 dimensional feature vector at each cell. This +% variant of HOG, refered to as FHOG, has been shown to achieve superior +% performance to the original HOG features. For details please refer to +% work by Felzenszwalb et al. (see link above). +% +% This function is essentially a wrapper for calls to gradientMag() +% and gradientHist(). Specifically, it is equivalent to the following: +% [M,O] = gradientMag( I,0,0,0,1 ); softBin = -1; useHog = 2; +% H = gradientHist(M,O,binSize,nOrients,softBin,useHog,clip); +% See gradientHist() for more general usage. +% +% This code requires SSE2 to compile and run (most modern Intel and AMD +% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2. +% +% USAGE +% H = fhog( I, [binSize], [nOrients], [clip], [crop] ) +% +% INPUTS +% I - [hxw] color or grayscale input image (must have type single) +% binSize - [8] spatial bin size +% nOrients - [9] number of orientation bins +% clip - [.2] value at which to clip histogram bins +% crop - [0] if true crop boundaries +% +% OUTPUTS +% H - [h/binSize w/binSize nOrients*3+5] computed hog features +% +% EXAMPLE +% I=imResample(single(imread('peppers.png'))/255,[480 640]); +% tic, for i=1:100, H=fhog(I,8,9); end; disp(100/toc) % >125 fps +% figure(1); im(I); V=hogDraw(H,25,1); figure(2); im(V) +% +% EXAMPLE +% % comparison to features.cc (requires DPM code release version 5) +% I=imResample(single(imread('peppers.png'))/255,[480 640]); Id=double(I); +% tic, for i=1:100, H1=features(Id,8); end; disp(100/toc) +% tic, for i=1:100, H2=fhog(I,8,9,.2,1); end; disp(100/toc) +% figure(1); montage2(H1); figure(2); montage2(H2); +% D=abs(H1-H2); mean(D(:)) +% +% See also hog, hogDraw, gradientHist +% +% Piotr's Image&Video Toolbox Version 3.23 +% Copyright 2013 Piotr Dollar. [pdollar-at-caltech.edu] +% Please email me if you find bugs, or have suggestions or questions! +% Licensed under the Simplified BSD License [see external/bsd.txt] + +%Note: modified to be more self-contained + +if( nargin<2 ), binSize=8; end +if( nargin<3 ), nOrients=9; end +if( nargin<4 ), clip=.2; end +if( nargin<5 ), crop=0; end + +softBin = -1; useHog = 2; b = binSize; + +[M,O]=gradientMex('gradientMag',I,0,1); + +H = gradientMex('gradientHist',M,O,binSize,nOrients,softBin,useHog,clip); + +if( crop ), e=mod(size(I),b) size(im,2)) = size(im,2); + ys(ys > size(im,1)) = size(im,1); + + %extract image + out = im(ys, xs, :); + +end + diff --git a/KCF/gradientMex.mexa64 b/KCF/gradientMex.mexa64 new file mode 100644 index 0000000..3ca9460 Binary files /dev/null and b/KCF/gradientMex.mexa64 differ diff --git a/KCF/linear_correlation.m b/KCF/linear_correlation.m new file mode 100644 index 0000000..1928318 --- /dev/null +++ b/KCF/linear_correlation.m @@ -0,0 +1,17 @@ +function kf = linear_correlation(xf, yf) +%LINEAR_CORRELATION Linear Kernel at all shifts, i.e. correlation. +% Computes the dot-product for all relative shifts between input images +% X and Y, which must both be MxN. They must also be periodic (ie., +% pre-processed with a cosine window). The result is an MxN map of +% responses. +% +% Inputs and output are all in the Fourier domain. +% +% Joao F. Henriques, 2014 +% http://www.isr.uc.pt/~henriques/ + + %cross-correlation term in Fourier domain + kf = sum(xf .* conj(yf), 3) / numel(xf); + +end + diff --git a/KCF/load_video_info.m b/KCF/load_video_info.m new file mode 100644 index 0000000..81c0c5e --- /dev/null +++ b/KCF/load_video_info.m @@ -0,0 +1,60 @@ +function [img_files, pos, target_sz, ground_truth, video_path] = load_video_info(base_path) +%LOAD_VIDEO_INFO +% Loads all the relevant information for the video in the given path: +% the list of image files (cell array of strings), initial position +% (1x2), target size (1x2), the ground truth information for precision +% calculations (Nx2, for N frames), and the path where the images are +% located. The ordering of coordinates and sizes is always [y, x]. +% +% Joao F. Henriques, 2014 +% http://www.isr.uc.pt/~henriques/ + + + %full path to the video's files + if base_path(end) ~= '/' && base_path(end) ~= '\', + base_path(end+1) = '/'; + end + video_path = base_path; + + %try to load ground truth from text file (Benchmark's format) + filename = [video_path 'groundtruth_rect.txt']; + f = fopen(filename); + assert(f ~= -1, ['No initial position or ground truth to load ("' filename '").']) + + %the format is [x, y, width, height] + try + ground_truth = textscan(f, '%f,%f,%f,%f', 'ReturnOnError',false); + catch %#ok, try different format (no commas) + frewind(f); + ground_truth = textscan(f, '%f %f %f %f'); + end + ground_truth = cat(2, ground_truth{:}); + fclose(f); + + %set initial position and size + target_sz = [ground_truth(1,4), ground_truth(1,3)]; + pos = [ground_truth(1,2), ground_truth(1,1)] + floor(target_sz/2); + + if size(ground_truth,1) == 1, + %we have ground truth for the first frame only (initial position) + ground_truth = []; + else + %store positions instead of boxes + ground_truth = ground_truth(:,[2,1]) + ground_truth(:,[4,3]) / 2; + end + + + %from now on, work in the subfolder where all the images are + video_path = [video_path 'img/']; + + %general case, just list all images + img_files = dir([video_path '*.png']); + if isempty(img_files), + img_files = dir([video_path '*.jpg']); + assert(~isempty(img_files), 'No image files to load.') + end + img_files = sort({img_files.name}); + + +end + diff --git a/KCF/polynomial_correlation.m b/KCF/polynomial_correlation.m new file mode 100644 index 0000000..4d8b22b --- /dev/null +++ b/KCF/polynomial_correlation.m @@ -0,0 +1,22 @@ +function kf = polynomial_correlation(xf, yf, a, b) +%POLYNOMIAL_CORRELATION Polynomial Kernel at all shifts, i.e. kernel correlation. +% Evaluates a polynomial kernel with constant A and exponent B, for all +% relative shifts between input images XF and YF, which must both be MxN. +% They must also be periodic (ie., pre-processed with a cosine window). +% The result is an MxN map of responses. +% +% Inputs and output are all in the Fourier domain. +% +% Joao F. Henriques, 2014 +% http://www.isr.uc.pt/~henriques/ + + %cross-correlation term in Fourier domain + xyf = xf .* conj(yf); + xy = sum(real(ifft2(xyf)), 3); %to spatial domain + + %calculate polynomial response for all positions, then go back to the + %Fourier domain + kf = fft2((xy / numel(xf) + a) .^ b); + +end + diff --git a/KCF/show_video.m b/KCF/show_video.m new file mode 100644 index 0000000..e122be6 --- /dev/null +++ b/KCF/show_video.m @@ -0,0 +1,77 @@ +function update_visualization_func = show_video(img_files, video_path, resize_image) +%SHOW_VIDEO +% Visualizes a tracker in an interactive figure, given a cell array of +% image file names, their path, and whether to resize the images to +% half size or not. +% +% This function returns an UPDATE_VISUALIZATION function handle, that +% can be called with a frame number and a bounding box [x, y, width, +% height], as soon as the results for a new frame have been calculated. +% This way, your results are shown in real-time, but they are also +% remembered so you can navigate and inspect the video afterwards. +% Press 'Esc' to send a stop signal (returned by UPDATE_VISUALIZATION). +% +% Joao F. Henriques, 2014 +% http://www.isr.uc.pt/~henriques/ + + + %store one instance per frame + num_frames = numel(img_files); + boxes = cell(num_frames,1); + + %create window + [fig_h, axes_h, unused, scroll] = videofig(num_frames, @redraw, [], [], @on_key_press); %#ok, unused outputs +% set(fig_h, 'number','off', 'name', ['Tracker - ' video_path]) + axis off; + + %image and rectangle handles start empty, they are initialized later + im_h = []; + rect_h = []; + + update_visualization_func = @update_visualization; + stop_tracker = false; + + + function stop = update_visualization(frame, box) + %store the tracker instance for one frame, and show it. returns + %true if processing should stop (user pressed 'Esc'). + boxes{frame} = box; + scroll(frame); + stop = stop_tracker; + end + + function redraw(frame) + %render main image + im = imread([video_path img_files{frame}]); +% if size(im,3) > 1, +% im = rgb2gray(im); +% end + if resize_image, + im = imresize(im, 0.5); + end + + if isempty(im_h), %create image + im_h = imshow(im, 'Border','tight', 'InitialMag',200, 'Parent',axes_h); + else %just update it + set(im_h, 'CData', im) + end + + %render target bounding box for this frame + if isempty(rect_h), %create it for the first time + rect_h = rectangle('Position',[0,0,1,1], 'EdgeColor','r', 'LineWidth', 3, 'Parent',axes_h); + end + if ~isempty(boxes{frame}), + set(rect_h, 'Visible', 'on', 'Position', boxes{frame}); + else + set(rect_h, 'Visible', 'off'); + end + end + + function on_key_press(key) + if strcmp(key, 'escape'), %stop on 'Esc' + stop_tracker = true; + end + end + +end + diff --git a/KCF/videofig.m b/KCF/videofig.m new file mode 100644 index 0000000..2af846d --- /dev/null +++ b/KCF/videofig.m @@ -0,0 +1,230 @@ +function [fig_handle, axes_handle, scroll_bar_handles, scroll_func] = ... + videofig(num_frames, redraw_func, play_fps, big_scroll, ... + key_func, varargin) +%VIDEOFIG Figure with horizontal scrollbar and play capabilities. +% VIDEOFIG(NUM_FRAMES, @REDRAW_FUNC) +% Creates a figure with a horizontal scrollbar and shortcuts to scroll +% automatically. The scroll range is 1 to NUM_FRAMES. The function +% REDRAW_FUNC(F) is called to redraw at scroll position F (for example, +% REDRAW_FUNC can show the frame F of a video). +% This can be used not only to play and analyze standard videos, but it +% also lets you place any custom Matlab plots and graphics on top. +% +% The keyboard shortcuts are: +% Enter (Return) -- play/pause video (25 frames-per-second default). +% Backspace -- play/pause video 5 times slower. +% Right/left arrow keys -- advance/go back one frame. +% Page down/page up -- advance/go back 30 frames. +% Home/end -- go to first/last frame of video. +% +% Advanced usage +% -------------- +% VIDEOFIG(NUM_FRAMES, @REDRAW_FUNC, FPS, BIG_SCROLL) +% Also specifies the speed of the play function (frames-per-second) and +% the frame step of page up/page down (or empty for defaults). +% +% VIDEOFIG(NUM_FRAMES, @REDRAW_FUNC, FPS, BIG_SCROLL, @KEY_FUNC) +% Also calls KEY_FUNC(KEY) with any keys that weren't processed, so you +% can add more shortcut keys (or empty for none). +% +% VIDEOFIG(NUM_FRAMES, @REDRAW_FUNC, FPS, BIG_SCROLL, @KEY_FUNC, ...) +% Passes any additional arguments to the native FIGURE function (for +% example: 'Name', 'Video figure title'). +% +% [FIG_HANDLE, AX_HANDLE, OTHER_HANDLES, SCROLL] = VIDEOFIG(...) +% Returns the handles of the figure, drawing axes and other handles (of +% the scrollbar's graphics), respectively. SCROLL(F) can be called to +% scroll to frame F, or with no arguments to just redraw the figure. +% +% Example 1 +% --------- +% Place this in a file called "redraw.m": +% function redraw(frame) +% imshow(['AT3_1m4_' num2str(frame, '%02.0f') '.tif']) +% end +% +% Then from a script or the command line, call: +% videofig(10, @redraw); +% redraw(1) +% +% The images "AT3_1m4_01.tif" ... "AT3_1m4_10.tif" are part of the Image +% Processing Toolbox and there's no need to download them elsewhere. +% +% Example 2 +% --------- +% Change the redraw function to visualize the contour of a single cell: +% function redraw(frame) +% im = imread(['AT3_1m4_' num2str(frame, '%02.0f') '.tif']); +% slice = im(210:310, 210:340); +% [ys, xs] = find(slice < 50 | slice > 100); +% pos = 210 + median([xs, ys]); +% siz = 3.5 * std([xs, ys]); +% imshow(im), hold on +% rectangle('Position',[pos - siz/2, siz], 'EdgeColor','g', 'Curvature',[1, 1]) +% hold off +% end +% +% João Filipe Henriques, 2010 + + %default parameter values + if nargin < 3 || isempty(play_fps), play_fps = 25; end %play speed (frames per second) + if nargin < 4 || isempty(big_scroll), big_scroll = 30; end %page-up and page-down advance, in frames + if nargin < 5, key_func = []; end + + %check arguments + check_int_scalar(num_frames); + check_callback(redraw_func); + check_int_scalar(play_fps); + check_int_scalar(big_scroll); + check_callback(key_func); + + click = 0; + f = 1; %current frame + + %initialize figure + fig_handle = figure('Color',[.3 .3 .3], 'MenuBar','none', 'Units','norm', ... + 'WindowButtonDownFcn',@button_down, 'WindowButtonUpFcn',@button_up, ... + 'WindowButtonMotionFcn', @on_click, 'KeyPressFcn', @key_press, ... + 'Interruptible','off', 'BusyAction','cancel', varargin{:}); + + %axes for scroll bar + scroll_axes_handle = axes('Parent',fig_handle, 'Position',[0 0 1 0.03], ... + 'Visible','off', 'Units', 'normalized'); + axis([0 1 0 1]); + axis off + + %scroll bar + scroll_bar_width = max(1 / num_frames, 0.01); + scroll_handle = patch([0 1 1 0] * scroll_bar_width, [0 0 1 1], [.8 .8 .8], ... + 'Parent',scroll_axes_handle, 'EdgeColor','none', 'ButtonDownFcn', @on_click); + + %timer to play video + play_timer = timer('TimerFcn',@play_timer_callback, 'ExecutionMode','fixedRate'); + + %main drawing axes for video display + axes_handle = axes('Position',[0 0.03 1 0.97]); + + %return handles + scroll_bar_handles = [scroll_axes_handle; scroll_handle]; + scroll_func = @scroll; + + + + function key_press(src, event) %#ok, unused arguments + switch event.Key, %process shortcut keys + case 'leftarrow', + scroll(f - 1); + case 'rightarrow', + scroll(f + 1); + case 'pageup', + if f - big_scroll < 1, %scrolling before frame 1, stop at frame 1 + scroll(1); + else + scroll(f - big_scroll); + end + case 'pagedown', + if f + big_scroll > num_frames, %scrolling after last frame + scroll(num_frames); + else + scroll(f + big_scroll); + end + case 'home', + scroll(1); + case 'end', + scroll(num_frames); + case 'return', + play(1/play_fps) + case 'backspace', + play(5/play_fps) + otherwise, + if ~isempty(key_func), + key_func(event.Key); %#ok, call custom key handler + end + end + end + + %mouse handler + function button_down(src, event) %#ok, unused arguments + set(src,'Units','norm') + click_pos = get(src, 'CurrentPoint'); + if click_pos(2) <= 0.03, %only trigger if the scrollbar was clicked + click = 1; + on_click([],[]); + end + end + + function button_up(src, event) %#ok, unused arguments + click = 0; + end + + function on_click(src, event) %#ok, unused arguments + if click == 0, return; end + + %get x-coordinate of click + set(fig_handle, 'Units', 'normalized'); + click_point = get(fig_handle, 'CurrentPoint'); + set(fig_handle, 'Units', 'pixels'); + x = click_point(1); + + %get corresponding frame number + new_f = floor(1 + x * num_frames); + + if new_f < 1 || new_f > num_frames, return; end %outside valid range + + if new_f ~= f, %don't redraw if the frame is the same (to prevent delays) + scroll(new_f); + end + end + + function play(period) + %toggle between stoping and starting the "play video" timer + if strcmp(get(play_timer,'Running'), 'off'), + set(play_timer, 'Period', period); + start(play_timer); + else + stop(play_timer); + end + end + function play_timer_callback(src, event) %#ok + %executed at each timer period, when playing the video + if f < num_frames, + scroll(f + 1); + elseif strcmp(get(play_timer,'Running'), 'on'), + stop(play_timer); %stop the timer if the end is reached + end + end + + function scroll(new_f) + if nargin == 1, %scroll to another position (new_f) + if new_f < 1 || new_f > num_frames, + return + end + f = new_f; + end + + %convert frame number to appropriate x-coordinate of scroll bar + scroll_x = (f - 1) / num_frames; + + %move scroll bar to new position + set(scroll_handle, 'XData', scroll_x + [0 1 1 0] * scroll_bar_width); + + %set to the right axes and call the custom redraw function + set(fig_handle, 'CurrentAxes', axes_handle); + redraw_func(f); + + %used to be "drawnow", but when called rapidly and the CPU is busy + %it didn't let Matlab process events properly (ie, close figure). + pause(0.001) + end + + %convenience functions for argument checks + function check_int_scalar(a) + assert(isnumeric(a) && isscalar(a) && isfinite(a) && a == round(a), ... + [upper(inputname(1)) ' must be a scalar integer number.']); + end + function check_callback(a) + assert(isempty(a) || strcmp(class(a), 'function_handle'), ... + [upper(inputname(1)) ' must be a valid function handle.']) + end +end + diff --git a/channels/Contents.m b/channels/Contents.m new file mode 100644 index 0000000..c8a9813 --- /dev/null +++ b/channels/Contents.m @@ -0,0 +1,38 @@ +% CHANNELS +% See also +% +% Fast channel feature computation code based on the papers: +% [1] P. Dollár, Z. Tu, P. Perona and S. Belongie +% "Integral Channel Features", BMVC 2009. +% [2] P. Dollár, S. Belongie and P. Perona +% "The Fastest Pedestrian Detector in the West," BMVC 2010. +% [3] P. Dollár, R. Appel and W. Kienzle +% "Crosstalk Cascades for Frame-Rate Pedestrian Detection," ECCV 2012. +% [4] P. Dollár, R. Appel, S. Belongie and P. Perona +% "Fast Feature Pyramids for Object Detection", PAMI 2014. +% Please cite a subset of the above papers if you end up using the code. +% The PAMI 2014 paper has the most thorough and up to date descriptions. +% Code written and maintained by Piotr Dollar and Ron Appel. +% +% Channels: +% chnsCompute - Compute channel features at a single scale given an input image. +% chnsPyramid - Compute channel feature pyramid given an input image. +% chnsScaling - Compute lambdas for channel power law scaling. +% +% Constant time image smoothing: +% convBox - Extremely fast 2D image convolution with a box filter. +% convMax - Extremely fast 2D image convolution with a max filter. +% convTri - Extremely fast 2D image convolution with a triangle filter. +% +% Gradients and gradient histograms: +% gradient2 - Compute numerical gradients along x and y directions. +% gradientHist - Compute oriented gradient histograms. +% gradientMag - Compute gradient magnitude and orientation at each image location. +% hog - Efficiently compute histogram of oriented gradient (HOG) features. +% hogDraw - Create visualization of hog descriptor. +% fhog - Efficiently compute Felzenszwalb's HOG (FHOG) features. +% +% Miscellaneous: +% imPad - Pad an image along its four boundaries. +% imResample - Fast bilinear image downsampling/upsampling. +% rgbConvert - Convert RGB image to other color spaces (highly optimized). diff --git a/channels/chnsCompute.m b/channels/chnsCompute.m new file mode 100644 index 0000000..6864da0 --- /dev/null +++ b/channels/chnsCompute.m @@ -0,0 +1,187 @@ +function chns = chnsCompute( I, varargin ) +% Compute channel features at a single scale given an input image. +% +% Compute the channel features as described in: +% P. Dollár, Z. Tu, P. Perona and S. Belongie +% "Integral Channel Features", BMVC 2009. +% Channel features have proven very effective in sliding window object +% detection, both in terms of *accuracy* and *speed*. Numerous feature +% types including histogram of gradients (hog) can be converted into +% channel features, and overall, channels are general and powerful. +% +% Given an input image I, a corresponding channel is a registered map of I, +% where the output pixels are computed from corresponding patches of input +% pixels (thus preserving overall image layout). A trivial channel is +% simply the input grayscale image, likewise for a color image each color +% channel can serve as a channel. Other channels can be computed using +% linear or non-linear transformations of I, various choices implemented +% here are described below. The only constraint is that channels must be +% translationally invariant (i.e. translating the input image or the +% resulting channels gives the same result). This allows for fast object +% detection, as the channels can be computed once on the entire image +% rather than separately for each overlapping detection window. +% +% Currently, three channel types are available by default (to date, these +% have proven the most effective for sliding window object detection): +% (1) color channels (computed using rgbConvert.m) +% (2) gradient magnitude (computed using gradientMag.m) +% (3) quantized gradient channels (computed using gradientHist.m) +% For more information about each channel type, including the exact input +% parameters and their meanings, see the respective m-files which perform +% the actual computatons (chnsCompute is essentially a wrapper function). +% The converted color channels serve as input to gradientMag/gradientHist. +% +% Additionally, custom channels can be specified via an optional struct +% array "pCustom" which may have 0 or more custom channel definitions. Each +% custom channel is generated via a call to "chns=feval(hFunc,I,pFunc{:})". +% The color space of I is determined by pColor.colorSpace, use the setting +% colorSpace='orig' if the input image is not an 'rgb' image and should be +% left unchanged (e.g. if I has multiple channels). The input I will have +% type single and the output of hFunc should also have type single. +% +% "shrink" (which should be an integer) determines the amount to subsample +% the computed channels (in applications such as detection subsamping does +% not affect performance). The params for each channel type are described +% in detail in the respective function. In addition, each channel type has +% a param "enabled" that determines if the channel is computed. If +% chnsCompute() is called with no inputs, the output is the complete +% default params (pChns). Otherwise the outputs are the computed channels +% and additional meta-data (see below). The channels are computed at a +% single scale, for (fast) multi-scale channel computation see chnsPyramid. +% +% An emphasis has been placed on speed, with the code undergoing heavy +% optimization. Computing the full set of channels used in the BMVC09 paper +% referenced above on a 480x640 image runs over *100 fps* on a single core +% of a machine from 2011 (although runtime depends on input parameters). +% +% USAGE +% pChns = chnsCompute() +% chns = chnsCompute( I, pChns ) +% +% INPUTS +% I - [hxwx3] input image (uint8 or single/double in [0,1]) +% pChns - parameters (struct or name/value pairs) +% .shrink - [4] integer downsampling amount for channels +% .pColor - parameters for color space: +% .enabled - [1] if true enable color channels +% .smooth - [1] radius for image smoothing (using convTri) +% .colorSpace - ['luv'] choices are: 'gray', 'rgb', 'hsv', 'orig' +% .pGradMag - parameters for gradient magnitude: +% .enabled - [1] if true enable gradient magnitude channel +% .colorChn - [0] if>0 color channel to use for grad computation +% .normRad - [5] normalization radius for gradient +% .normConst - [.005] normalization constant for gradient +% .full - [0] if true compute angles in [0,2*pi) else in [0,pi) +% .pGradHist - parameters for gradient histograms: +% .enabled - [1] if true enable gradient histogram channels +% .binSize - [shrink] spatial bin size (defaults to shrink) +% .nOrients - [6] number of orientation channels +% .softBin - [0] if true use "soft" bilinear spatial binning +% .useHog - [0] if true perform 4-way hog normalization/clipping +% .clipHog - [.2] value at which to clip hog histogram bins +% .pCustom - parameters for custom channels (optional struct array): +% .enabled - [1] if true enable custom channel type +% .name - ['REQ'] custom channel type name +% .hFunc - ['REQ'] function handle for computing custom channels +% .pFunc - [{}] additional params for chns=hFunc(I,pFunc{:}) +% .padWith - [0] how channel should be padded (e.g. 0,'replicate') +% .complete - [] if true does not check/set default vals in pChns +% +% OUTPUTS +% chns - output struct +% .pChns - exact input parameters used +% .nTypes - number of channel types +% .data - [nTypes x 1] cell [h/shrink x w/shrink x nChns] channels +% .info - [nTypes x 1] struct array +% .name - channel type name +% .pChn - exact input parameters for given channel type +% .nChns - number of channels for given channel type +% .padWith - how channel should be padded (0,'replicate') +% +% EXAMPLE - default channels +% I=imResample(imread('peppers.png'),[480 640]); pChns=chnsCompute(); +% tic, for i=1:100, chns=chnsCompute(I,pChns); end; toc +% figure(1); montage2(cat(3,chns.data{:})); +% +% EXAMPLE - default + custom channels +% I=imResample(imread('peppers.png'),[480 640]); pChns=chnsCompute(); +% hFunc=@(I) 5*sqrt(max(0,max(convBox(I.^2,2)-convBox(I,2).^2,[],3))); +% pChns.pCustom=struct('name','Std02','hFunc',hFunc); pChns.complete=0; +% tic, chns=chnsCompute(I,pChns); toc +% figure(1); im(chns.data{4}); +% +% See also rgbConvert, gradientMag, gradientHist, chnsPyramid +% +% Piotr's Computer Vision Matlab Toolbox Version 3.23 +% Copyright 2014 Piotr Dollar & Ron Appel. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +% get default parameters pChns +if(nargin==2), pChns=varargin{1}; else pChns=[]; end +if( ~isfield(pChns,'complete') || pChns.complete~=1 || isempty(I) ) + p=struct('enabled',{},'name',{},'hFunc',{},'pFunc',{},'padWith',{}); + pChns = getPrmDflt(varargin,{'shrink',4,'pColor',{},'pGradMag',{},... + 'pGradHist',{},'pCustom',p,'complete',1},1); + pChns.pColor = getPrmDflt( pChns.pColor, {'enabled',1,... + 'smooth',1, 'colorSpace','luv'}, 1 ); + pChns.pGradMag = getPrmDflt( pChns.pGradMag, {'enabled',1,... + 'colorChn',0,'normRad',5,'normConst',.005,'full',0}, 1 ); + pChns.pGradHist = getPrmDflt( pChns.pGradHist, {'enabled',1,... + 'binSize',[],'nOrients',6,'softBin',0,'useHog',0,'clipHog',.2}, 1 ); + nc=length(pChns.pCustom); pc=cell(1,nc); + for i=1:nc, pc{i} = getPrmDflt( pChns.pCustom(i), {'enabled',1,... + 'name','REQ','hFunc','REQ','pFunc',{},'padWith',0}, 1 ); end + if( nc>0 ), pChns.pCustom=[pc{:}]; end +end +if(nargin==0), chns=pChns; return; end + +% create output struct +info=struct('name',{},'pChn',{},'nChns',{},'padWith',{}); +chns=struct('pChns',pChns,'nTypes',0,'data',{{}},'info',info); + +% crop I so divisible by shrink and get target dimensions +shrink=pChns.shrink; [h,w,~]=size(I); cr=mod([h w],shrink); +if(any(cr)), h=h-cr(1); w=w-cr(2); I=I(1:h,1:w,:); end +h=h/shrink; w=w/shrink; + +% compute color channels +p=pChns.pColor; nm='color channels'; +I=rgbConvert(I,p.colorSpace); I=convTri(I,p.smooth); +if(p.enabled), chns=addChn(chns,I,nm,p,'replicate',h,w); end + +% compute gradient magnitude channel +p=pChns.pGradMag; nm='gradient magnitude'; +full=0; if(isfield(p,'full')), full=p.full; end +if( pChns.pGradHist.enabled ) + [M,O]=gradientMag(I,p.colorChn,p.normRad,p.normConst,full); +elseif( p.enabled ) + M=gradientMag(I,p.colorChn,p.normRad,p.normConst,full); +end +if(p.enabled), chns=addChn(chns,M,nm,p,0,h,w); end + +% compute gradient histgoram channels +p=pChns.pGradHist; nm='gradient histogram'; +if( p.enabled ) + binSize=p.binSize; if(isempty(binSize)), binSize=shrink; end + H=gradientHist(M,O,binSize,p.nOrients,p.softBin,p.useHog,p.clipHog,full); + chns=addChn(chns,H,nm,pChns.pGradHist,0,h,w); +end + +% compute custom channels +p=pChns.pCustom; +for i=find( [p.enabled] ) + C=feval(p(i).hFunc,I,p(i).pFunc{:}); + chns=addChn(chns,C,p(i).name,p(i),p(i).padWith,h,w); +end + +end + +function chns = addChn( chns, data, name, pChn, padWith, h, w ) +% Helper function to add a channel to chns. +[h1,w1,~]=size(data); +if(h1~=h || w1~=w), data=imResampleMex(data,h,w,1); + assert(all(mod([h1 w1]./[h w],1)==0)); end +chns.data{end+1}=data; chns.nTypes=chns.nTypes+1; +chns.info(end+1)=struct('name',name,'pChn',pChn,... + 'nChns',size(data,3),'padWith',padWith); +end diff --git a/channels/chnsPyramid.m b/channels/chnsPyramid.m new file mode 100644 index 0000000..2c83bf7 --- /dev/null +++ b/channels/chnsPyramid.m @@ -0,0 +1,204 @@ +function pyramid = chnsPyramid( I, varargin ) +% Compute channel feature pyramid given an input image. +% +% While chnsCompute() computes channel features at a single scale, +% chnsPyramid() calls chnsCompute() multiple times on different scale +% images to create a scale-space pyramid of channel features. +% +% In its simplest form, chnsPyramid() first creates an image pyramid, then +% calls chnsCompute() with the specified "pChns" on each scale of the image +% pyramid. The parameter "nPerOct" determines the number of scales per +% octave in the image pyramid (an octave is the set of scales up to half of +% the initial scale), a typical value is nPerOct=8 in which case each scale +% in the pyramid is 2^(-1/8)~=.917 times the size of the previous. The +% smallest scale of the pyramid is determined by "minDs", once either image +% dimension in the resized image falls below minDs, pyramid creation stops. +% The largest scale in the pyramid is determined by "nOctUp" which +% determines the number of octaves to compute above the original scale. +% +% While calling chnsCompute() on each image scale works, it is unnecessary. +% For a broad family of features, including gradient histograms and all +% channel types tested, the feature responses computed at a single scale +% can be used to approximate feature responses at nearby scales. The +% approximation is accurate at least within an entire scale octave. For +% details and to understand why this unexpected result holds, please see: +% P. Dollár, R. Appel, S. Belongie and P. Perona +% "Fast Feature Pyramids for Object Detection", PAMI 2014. +% +% The parameter "nApprox" determines how many intermediate scales are +% approximated using the techniques described in the above paper. Roughly +% speaking, channels at approximated scales are computed by taking the +% corresponding channel at the nearest true scale (computed w chnsCompute) +% and resampling and re-normalizing it appropriately. For example, if +% nPerOct=8 and nApprox=7, then the 7 intermediate scales are approximated +% and only power of two scales are actually computed (using chnsCompute). +% The parameter "lambdas" determines how the channels are normalized (see +% the above paper). lambdas for a given set of channels can be computed +% using chnsScaling.m, alternatively, if no lambdas are specified, the +% lambdas are automatically approximated using two true image scales. +% +% Typically approximating all scales within an octave (by setting +% nApprox=nPerOct-1 or nApprox=-1) works well, and results in large speed +% gains (~4x). See example below for a visualization of the pyramid +% computed with and without the approximation. While there is a slight +% difference in the channels, during detection the approximated channels +% have been shown to be essentially as effective as the original channels. +% +% While every effort is made to space the image scales evenly, this is not +% always possible. For example, given a 101x100 image, it is impossible to +% downsample it by exactly 1/2 along the first dimension, moreover, the +% exact scaling along the two dimensions will differ. Instead, the scales +% are tweaked slightly (e.g. for a 101x101 image the scale would go from +% 1/2 to something like 50/101), and the output contains the exact scaling +% factors used for both the heights and the widths ("scaleshw") and also +% the approximate scale for both dimensions ("scales"). If "shrink">1 the +% scales are further tweaked so that the resized image has dimensions that +% are exactly divisible by shrink (for details please see the code). +% +% If chnsPyramid() is called with no inputs, the output is the complete +% default parameters (pPyramid). Finally, we describe the remaining +% parameters: "pad" controls the amount the channels are padded after being +% created (useful for detecting objects near boundaries); "smooth" controls +% the amount of smoothing after the channels are created (and controls the +% integration scale of the channels); finally "concat" determines whether +% all channels at a single scale are concatenated in the output. +% +% An emphasis has been placed on speed, with the code undergoing heavy +% optimization. Computing the full set of (approximated) *multi-scale* +% channels on a 480x640 image runs over *30 fps* on a single core of a +% machine from 2011 (although runtime depends on input parameters). +% +% USAGE +% pPyramid = chnsPyramid() +% pyramid = chnsPyramid( I, pPyramid ) +% +% INPUTS +% I - [hxwx3] input image (uint8 or single/double in [0,1]) +% pPyramid - parameters (struct or name/value pairs) +% .pChns - parameters for creating channels (see chnsCompute.m) +% .nPerOct - [8] number of scales per octave +% .nOctUp - [0] number of upsampled octaves to compute +% .nApprox - [-1] number of approx. scales (if -1 nApprox=nPerOct-1) +% .lambdas - [] coefficients for power law scaling (see BMVC10) +% .pad - [0 0] amount to pad channels (along T/B and L/R) +% .minDs - [16 16] minimum image size for channel computation +% .smooth - [1] radius for channel smoothing (using convTri) +% .concat - [1] if true concatenate channels +% .complete - [] if true does not check/set default vals in pPyramid +% +% OUTPUTS +% pyramid - output struct +% .pPyramid - exact input parameters used (may change from input) +% .nTypes - number of channel types +% .nScales - number of scales computed +% .data - [nScales x nTypes] cell array of computed channels +% .info - [nTypes x 1] struct array (mirrored from chnsCompute) +% .lambdas - [nTypes x 1] scaling coefficients actually used +% .scales - [nScales x 1] relative scales (approximate) +% .scaleshw - [nScales x 2] exact scales for resampling h and w +% +% EXAMPLE +% I=imResample(imread('peppers.png'),[480 640]); +% pPyramid=chnsPyramid(); pPyramid.minDs=[128 128]; +% pPyramid.nApprox=0; tic, P1=chnsPyramid(I,pPyramid); toc +% pPyramid.nApprox=7; tic, P2=chnsPyramid(I,pPyramid); toc +% figure(1); montage2(P1.data{2}); figure(2); montage2(P2.data{2}); +% figure(3); montage2(abs(P1.data{2}-P2.data{2})); colorbar; +% +% See also chnsCompute, chnsScaling, convTri, imPad +% +% Piotr's Computer Vision Matlab Toolbox Version 3.25 +% Copyright 2014 Piotr Dollar & Ron Appel. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +% get default parameters pPyramid +if(nargin==2), p=varargin{1}; else p=[]; end +if( ~isfield(p,'complete') || p.complete~=1 || isempty(I) ) + dfs={ 'pChns',{}, 'nPerOct',8, 'nOctUp',0, 'nApprox',-1, ... + 'lambdas',[], 'pad',[0 0], 'minDs',[16 16], ... + 'smooth',1, 'concat',1, 'complete',1 }; + p=getPrmDflt(varargin,dfs,1); chns=chnsCompute([],p.pChns); + p.pChns=chns.pChns; p.pChns.complete=1; shrink=p.pChns.shrink; + p.pad=round(p.pad/shrink)*shrink; p.minDs=max(p.minDs,shrink*4); + if(p.nApprox<0), p.nApprox=p.nPerOct-1; end +end +if(nargin==0), pyramid=p; return; end; pPyramid=p; +vs=struct2cell(p); [pChns,nPerOct,nOctUp,nApprox,lambdas,... + pad,minDs,smooth,concat,~]=deal(vs{:}); shrink=pChns.shrink; + +% convert I to appropriate color space (or simply normalize) +cs=pChns.pColor.colorSpace; sz=[size(I,1) size(I,2)]; +if(~all(sz==0) && size(I,3)==1 && ~any(strcmpi(cs,{'gray','orig'}))), + I=I(:,:,[1 1 1]); warning('Converting image to color'); end %#ok +I=rgbConvert(I,cs); pChns.pColor.colorSpace='orig'; + +% get scales at which to compute features and list of real/approx scales +[scales,scaleshw]=getScales(nPerOct,nOctUp,minDs,shrink,sz); +nScales=length(scales); if(1), isR=1; else isR=1+nOctUp*nPerOct; end +isR=isR:nApprox+1:nScales; isA=1:nScales; isA(isR)=[]; +j=[0 floor((isR(1:end-1)+isR(2:end))/2) nScales]; +isN=1:nScales; for i=1:length(isR), isN(j(i)+1:j(i+1))=isR(i); end +nTypes=0; data=cell(nScales,nTypes); info=struct([]); + +% compute image pyramid [real scales] +for i=isR + s=scales(i); sz1=round(sz*s/shrink)*shrink; + if(all(sz==sz1)), I1=I; else I1=imResampleMex(I,sz1(1),sz1(2),1); end + if(s==.5 && (nApprox>0 || nPerOct==1)), I=I1; end + chns=chnsCompute(I1,pChns); info=chns.info; + if(i==isR(1)), nTypes=chns.nTypes; data=cell(nScales,nTypes); end + data(i,:) = chns.data; +end + +% if lambdas not specified compute image specific lambdas +if( nScales>0 && nApprox>0 && isempty(lambdas) ) + is=1+nOctUp*nPerOct:nApprox+1:nScales; + assert(length(is)>=2); if(length(is)>2), is=is(2:3); end + f0=zeros(1,nTypes); f1=f0; d0=data(is(1),:); d1=data(is(2),:); + for j=1:nTypes, d=d0{j}; f0(j)=sum(d(:))/numel(d); end + for j=1:nTypes, d=d1{j}; f1(j)=sum(d(:))/numel(d); end + lambdas = - log2(f0./f1) / log2(scales(is(1))/scales(is(2))); +end + +% compute image pyramid [approximated scales] +for i=isA + iR=isN(i); sz1=round(sz*scales(i)/shrink); + for j=1:nTypes, ratio=(scales(i)/scales(iR)).^-lambdas(j); + data{i,j}=imResampleMex(data{iR,j},sz1(1),sz1(2),ratio); end +end + +% smooth channels, optionally pad and concatenate channels +for i=1:nScales*nTypes, data{i}=convTri(data{i},smooth); end +if(any(pad)), for i=1:nScales, for j=1:nTypes + data{i,j}=imPad(data{i,j},pad/shrink,info(j).padWith); end; end; end +if(concat && nTypes), data0=data; data=cell(nScales,1); end +if(concat && nTypes), for i=1:nScales, data{i}=cat(3,data0{i,:}); end; end + +% create output struct +j=info; if(~isempty(j)), j=find(strcmp('color channels',{j.name})); end +if(~isempty(j)), info(j).pChn.colorSpace=cs; end +pyramid = struct( 'pPyramid',pPyramid, 'nTypes',nTypes, ... + 'nScales',nScales, 'data',{data}, 'info',info, 'lambdas',lambdas, ... + 'scales',scales, 'scaleshw',scaleshw ); + +end + +function [scales,scaleshw] = getScales(nPerOct,nOctUp,minDs,shrink,sz) +% set each scale s such that max(abs(round(sz*s/shrink)*shrink-sz*s)) is +% minimized without changing the smaller dim of sz (tricky algebra) +if(any(sz==0)), scales=[]; scaleshw=[]; return; end +nScales = floor(nPerOct*(nOctUp+log2(min(sz./minDs)))+1); +scales = 2.^(-(0:nScales-1)/nPerOct+nOctUp); +if(sz(1)1000) the computed +% lambdas should not depend on the exact images used. +% +% USAGE +% [lambdas,as,scales,fs] = chnsScaling( pChns, Is, [show] ) +% +% INPUTS +% pChns - parameters for creating channels (see chnsCompute.m) +% Is - [nImages x 1] cell array of images (nImages may be 1) +% show - [1] figure in which to display results +% +% OUTPUTS +% lambdas - [nTypes x 1] computed lambdas +% as - [nTypes x 1] computed y-intercepts +% scales - [nScales x 1] vector of actual scales used +% fs - [nImages x nScales x nTypes] array of feature means +% +% EXAMPLE +% sDir = 'data/Inria/train/neg/'; +% Is = fevalImages( @(x) {x}, {}, sDir, 'I', 'png', 0, 200 ); +% p = chnsCompute(); lambdas = chnsScaling( p, Is, 1 ); +% +% See also chnsCompute, chnsPyramid, fevalImages +% +% Piotr's Computer Vision Matlab Toolbox Version 3.25 +% Copyright 2014 Piotr Dollar & Ron Appel. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +% get additional input arguments +if(nargin<3 || isempty(show)), show=1; end + +% construct pPyramid (don't pad, concat or appoximate) +pPyramid=chnsPyramid(); pPyramid.pChns=pChns; pPyramid.concat=0; +pPyramid.pad=[0 0]; pPyramid.nApprox=0; pPyramid.smooth=0; +pPyramid.minDs(:)=max(8,pChns.shrink*4); + +% crop all images to smallest image size +ds=[inf inf]; nImages=numel(Is); +for i=1:nImages, ds=min(ds,[size(Is{i},1) size(Is{i},2)]); end +ds=round(ds/pChns.shrink)*pChns.shrink; +for i=1:nImages, Is{i}=Is{i}(1:ds(1),1:ds(2),:); end + +% compute fs [nImages x nScales x nTypes] array of feature means +P=chnsPyramid(Is{1},pPyramid); scales=P.scales'; info=P.info; +nScales=P.nScales; nTypes=P.nTypes; fs=zeros(nImages,nScales,nTypes); +parfor i=1:nImages, P=chnsPyramid(Is{i},pPyramid); for j=1:nScales + for k=1:nTypes, fs(i,j,k)=mean(P.data{j,k}(:)); end; end; end + +% remove fs with fs(:,1,:) having small values +kp=max(fs(:,1,:)); kp=fs(:,1,:)>kp(ones(1,nImages),1,:)/50; +kp=min(kp,[],3); fs=fs(kp,:,:); nImages=size(fs,1); + +% compute ratios, intercepts and lambdas using least squares +scales1=scales(2:end); nScales=nScales-1; O=ones(nScales,1); +rs=fs(:,2:end,:)./fs(:,O,:); mus=permute(mean(rs,1),[2 3 1]); +out=[O -log2(scales1)]\log2(mus); as=2.^out(1,:); lambdas=out(2,:); +if(0), lambdas=-log2(scales1)\log2(mus); as(:)=1; end +if(show==0), return; end + +% compute predicted means and errors for display purposes +musp=as(O,:).*scales1(:,ones(1,nTypes)).^-lambdas(O,:); +errsFit=mean(abs(musp-mus)); stds=permute(std(rs,0,1),[2 3 1]); + +% plot results +if(show<0), show=-show; clear=0; else clear=1; end +figureResized(.75,show); if(clear), clf; end +lp={'LineWidth',2}; tp={'FontSize',12}; +for k=1:nTypes + % plot ratios + subplot(2,nTypes,k); set(gca,tp{:}); + for i=round(linspace(1,nImages,20)) + loglog(1./scales1,rs(i,:,k),'Color',[1 1 1]*.8); hold on; end + h0=loglog(1./scales1,mus(:,k),'go',lp{:}); + h1=loglog(1./scales1,musp(:,k),'b-',lp{:}); + title(sprintf('%s\n\\lambda = %.03f, error = %.2e',... + info(k).name,lambdas(k),errsFit(k))); + legend([h0 h1],{'real','fit'},'location','ne'); + xlabel('log2(scale)'); ylabel('\mu (ratio)'); axis tight; + ax=axis; ax(1)=1; ax(3)=min(.9,ax(3)); ax(4)=max(2,ax(4)); axis(ax); + set(gca,'ytick',[.5 1 1.4 2 3 4],'YMinorTick','off'); + set(gca,'xtick',2.^(-10:.5:10),'XTickLabel',10:-.5:-10); + % plot variances + subplot(2,nTypes,k+nTypes); set(gca,tp{:}); + semilogx(1./scales1,stds(:,k),'go',lp{:}); hold on; + xlabel('log2(scale)'); ylabel('\sigma (ratio)'); axis tight; + ax=axis; ax(1)=1; ax(3)=0; ax(4)=max(.5,ax(4)); axis(ax); + set(gca,'xtick',2.^(-10:.5:10),'XTickLabel',10:-.5:-10); +end + +end diff --git a/channels/convBox.m b/channels/convBox.m new file mode 100644 index 0000000..9a1dae4 --- /dev/null +++ b/channels/convBox.m @@ -0,0 +1,75 @@ +function J = convBox( I, r, s, nomex ) +% Extremely fast 2D image convolution with a box filter. +% +% Convolves an image by a F=ones(2*r+1,2*r+1)/(2*r+1)^2 filter. The +% convolution can be performed in constant time per-pixel, independent of +% the radius r. In fact the implementation is nearly optimal, with the +% convolution taking only slightly more time than creating a copy of the +% input array. Boundary effects are handled as if the image were padded +% symmetrically prior to performing the convolution. An optional integer +% downsampling parameter "s" can be specified, in which case the output is +% downsampled by s (the implementation is efficient with downsampling +% occurring simultaneously with smoothing, saving additional time). +% +% The output is exactly equivalent to the following Matlab operations: +% f = ones(1,2*r+1); f=f/sum(f); +% J = padarray(I,[r r],'symmetric','both'); +% J = convn(convn(J,f,'valid'),f','valid'); +% if(s>1), t=floor(s/2)+1; J=J(t:s:end-s+t,t:s:end-s+t,:); end +% The computation, however, is an order of magnitude faster than the above. +% +% When used as a smoothing filter, the standard deviation (sigma) of a box +% filter with radius r can be computed using [sigma=sqrt(r*(r+1)/3)]. For +% the first few values of r this translates to: r=1: sigma=sqrt(2/3), r=2: +% sigma=sqrt(2), r=3: sigma=2. Given sigma, the equivalent value of r can +% be computed via [r=sqrt(12*sigma*sigma+1)/2-.5]. +% +% The related function convTri performs convolution with a triangle filter, +% which has nicer properties if used for smoothing, but is slightly slower. +% +% This code requires SSE2 to compile and run (most modern Intel and AMD +% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2. +% +% USAGE +% J = convBox( I, r, [s], [nomex] ) +% +% INPUTS +% I - [hxwxk] input k channel single image +% r - integer filter radius +% s - [1] integer downsampling amount after convolving +% nomex - [0] if true perform computation in matlab (for testing/timing) +% +% OUTPUTS +% J - [hxwxk] smoothed image +% +% EXAMPLE +% I = single(imResample(imread('cameraman.tif'),[480 640]))/255; +% r = 5; s = 2; % set parameters as desired +% tic, J1=convBox(I,r,s); toc % mex version (fast) +% tic, J2=convBox(I,r,s,1); toc % matlab version (slow) +% figure(1); im(J1); figure(2); im(abs(J2-J1)); +% +% See also conv2, convTri +% +% Piotr's Computer Vision Matlab Toolbox Version 3.02 +% Copyright 2014 Piotr Dollar & Ron Appel. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +assert( r>=0 ); +if( nargin<3 ), s=1; end +if( nargin<4 ), nomex=0; end +if( isempty(I) || (r==0 && s==1) ), J = I; return; end +m=min(size(I,1),size(I,2)); if( m<4 || 2*r+1>=m ), nomex=1; end + +if( nomex==0 ) + if( r==1 && s<=2 ) + J = convConst('convTri1',I,1,s); + else + J = convConst('convBox',I,r,s); + end +else + f = ones(1,2*r+1); f=f/sum(f); + J = padarray(I,[r r],'symmetric','both'); + J = convn(convn(J,f,'valid'),f','valid'); + if(s>1), t=floor(s/2)+1; J=J(t:s:end-s+t,t:s:end-s+t,:); end +end diff --git a/channels/convMax.m b/channels/convMax.m new file mode 100644 index 0000000..75e194d --- /dev/null +++ b/channels/convMax.m @@ -0,0 +1,61 @@ +function J = convMax( I, r, nomex ) +% Extremely fast 2D image convolution with a max filter. +% +% For each location computes J(y,x) = max(max(I(y-r:y+r,x-r:x+r))). The +% filtering is constant time per-window, independent of r. First, the +% filtering is separable, which brings the complexity down to O(r) per +% window from O(r*r). To bring the implemention down to constant time +% (independent of r) we use the van Herk/Gil-Werman algorithm. Ignoring +% boundaries, just 3 max operations are need per-window regardless of r. +% http://www.leptonica.com/grayscale-morphology.html#FAST-IMPLEMENTATION +% +% The output is exactly equivalent to the following Matlab operations: +% I=padarray(I,[r r],'replicate','both'); [h,w,d]=size(I); J=I; +% for z=1:d, for x=r+1:w-r, for y=r+1:h-r +% J(y,x,z) = max(max(I(y-r:y+r,x-r:x+r,z))); end; end; end +% J=J(r+1:h-r,r+1:w-r,:); +% The computation, however, is an order of magnitude faster than the above. +% +% USAGE +% J = convMax( I, r, [nomex] ) +% +% INPUTS +% I - [hxwxk] input k channel single image +% r - integer filter radius or radii along y and x +% nomex - [0] if true perform computation in matlab (for testing/timing) +% +% OUTPUTS +% J - [hxwxk] max image +% +% EXAMPLE +% I = single(imResample(imread('cameraman.tif'),[480 640]))/255; +% r = 5; % set parameter as desired +% tic, J1=convMax(I,r); toc % mex version (fast) +% tic, J2=convMax(I,r,1); toc % matlab version (slow) +% figure(1); im(J1); figure(2); im(abs(J2-J1)); +% +% See also conv2, convTri, convBox +% +% Piotr's Computer Vision Matlab Toolbox Version 3.00 +% Copyright 2014 Piotr Dollar & Ron Appel. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +assert( all(r>=0) ); +if( nargin<3 ), nomex=0; end +if( all(r==0) ), J = I; return; end +if( numel(r)==1 ), ry=r; rx=r; else ry=r(1); rx=r(2); end + +if( nomex==0 ) + d=size(I,3); + if(d==1), J=convConst('convMax',convConst('convMax',I,ry,1)',rx,1)'; else + J=I; for z=1:d, J(:,:,z) = ... + convConst('convMax',convConst('convMax',J(:,:,z),ry,1)',rx,1)'; end + end +else + I=padarray(I,[ry rx],'replicate','both'); [h,w,d]=size(I); J=I; + for z=1:d, for x=rx+1:w-rx, for y=ry+1:h-ry + J(y,x,z) = max(max(I(y-ry:y+ry,x-rx:x+rx,z))); end; end; end + J=J(ry+1:h-ry,rx+1:w-rx,:); +end + +end diff --git a/channels/convTri.m b/channels/convTri.m new file mode 100644 index 0000000..a8ae11c --- /dev/null +++ b/channels/convTri.m @@ -0,0 +1,95 @@ +function J = convTri( I, r, s, nomex ) +% Extremely fast 2D image convolution with a triangle filter. +% +% Convolves an image by a 2D triangle filter (the 1D triangle filter f is +% [1:r r+1 r:-1:1]/(r+1)^2, the 2D version is simply conv2(f,f')). The +% convolution can be performed in constant time per-pixel, independent of +% the radius r. In fact the implementation is nearly optimal, with the +% convolution taking only slightly more time than creating a copy of the +% input array. Boundary effects are handled as if the image were padded +% symmetrically prior to performing the convolution. An optional integer +% downsampling parameter "s" can be specified, in which case the output is +% downsampled by s (the implementation is efficient with downsampling +% occurring simultaneously with smoothing, saving additional time). +% +% The output is exactly equivalent to the following Matlab operations: +% f = [1:r r+1 r:-1:1]/(r+1)^2; +% J = padarray(I,[r r],'symmetric','both'); +% J = convn(convn(J,f,'valid'),f','valid'); +% if(s>1), t=floor(s/2)+1; J=J(t:s:end-s+t,t:s:end-s+t,:); end +% The computation, however, is an order of magnitude faster than the above. +% +% When used as a smoothing filter, the standard deviation (sigma) of a tri +% filter with radius r can be computed using [sigma=sqrt(r*(r+2)/6)]. For +% the first few values of r this translates to: r=1: sigma=1/sqrt(2), r=2: +% sigma=sqrt(4/3), r=3: sqrt(5/2), r=4: sigma=2. Given sigma, the +% equivalent value of r can be computed via [r=sqrt(6*sigma*sigma+1)-1]. +% +% For even finer grained control for very small amounts of smoothing, any +% value of r between 0 and 1 can be used (normally if r>=1 then r must be +% an integer). In this case a filter of the form fp=[1 p 1]/(2+p) is used, +% with p being determined automatically from r. The filter fp has a +% standard deviation of [sigma=sqrt(2/(p+2))]. Hence p can be determined +% from r by setting [sqrt(r*(r+2)/6)=sqrt(2/(p+2))], which gives +% [p=12/r/(r+2)-2]. Note that r=1 gives p=2, so fp=[1 2 1]/4 which is the +% same as the normal r=1 triangle filter. As r goes to 0, p goes to +% infinity, and fp becomes the delta function [0 1 0]. The computation for +% r<=1 is particularly fast. +% +% The related function convBox performs convolution with a box filter, +% which is slightly faster but has worse properties if used for smoothing. +% +% This code requires SSE2 to compile and run (most modern Intel and AMD +% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2. +% +% USAGE +% J = convTri( I, r, [s], [nomex] ) +% +% INPUTS +% I - [hxwxk] input k channel single image +% r - integer filter radius (or any value between 0 and 1) +% filter standard deviation is: sigma=sqrt(r*(r+2)/6) +% s - [1] integer downsampling amount after convolving +% nomex - [0] if true perform computation in matlab (for testing/timing) +% +% OUTPUTS +% J - [hxwxk] smoothed image +% +% EXAMPLE - matlab versus mex +% I = single(imResample(imread('cameraman.tif'),[480 640]))/255; +% r = 5; s = 2; % set parameters as desired +% tic, J1=convTri(I,r,s); toc % mex version (fast) +% tic, J2=convTri(I,r,s,1); toc % matlab version (slow) +% figure(1); im(J1); figure(2); im(abs(J2-J1)); +% +% EXAMPLE - triangle versus gaussian smoothing +% I = single(imResample(imread('cameraman.tif'),[480 640]))/255; +% sigma = 4; rg = ceil(3*sigma); f = filterGauss(2*rg+1,[],sigma^2); +% tic, J1=conv2(conv2(imPad(I,rg,'symmetric'),f,'valid'),f','valid'); toc +% r=sqrt(6*sigma*sigma+1)-1; tic, J2=convTri(I,r); toc +% figure(1); im(J1); figure(2); im(J2); figure(3); im(abs(J2-J1)); +% +% See also conv2, convBox, gaussSmooth +% +% Piotr's Computer Vision Matlab Toolbox Version 3.02 +% Copyright 2014 Piotr Dollar & Ron Appel. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +if( nargin<3 ), s=1; end +if( nargin<4 ), nomex=0; end +if( isempty(I) || (r==0 && s==1) ), J = I; return; end +m=min(size(I,1),size(I,2)); if( m<4 || 2*r+1>=m ), nomex=1; end + +if( nomex==0 ) + if( r>0 && r<=1 && s<=2 ) + J = convConst('convTri1',I,12/r/(r+2)-2,s); + else + J = convConst('convTri',I,r,s); + end +else + if(r<=1), p=12/r/(r+2)-2; f=[1 p 1]/(2+p); r=1; + else f=[1:r r+1 r:-1:1]/(r+1)^2; end + J = padarray(I,[r r],'symmetric','both'); + J = convn(convn(J,f,'valid'),f','valid'); + if(s>1), t=floor(s/2)+1; J=J(t:s:end-s+t,t:s:end-s+t,:); end +end diff --git a/channels/fhog.m b/channels/fhog.m new file mode 100644 index 0000000..beaaefe --- /dev/null +++ b/channels/fhog.m @@ -0,0 +1,70 @@ +function H = fhog( I, binSize, nOrients, clip, crop ) +% Efficiently compute Felzenszwalb's HOG (FHOG) features. +% +% A fast implementation of the HOG variant used by Felzenszwalb et al. +% in their work on discriminatively trained deformable part models. +% http://www.cs.berkeley.edu/~rbg/latent/index.html +% Gives nearly identical results to features.cc in code release version 5 +% but runs 4x faster (over 125 fps on VGA color images). +% +% The computed HOG features are 3*nOrients+5 dimensional. There are +% 2*nOrients contrast sensitive orientation channels, nOrients contrast +% insensitive orientation channels, 4 texture channels and 1 all zeros +% channel (used as a 'truncation' feature). Using the standard value of +% nOrients=9 gives a 32 dimensional feature vector at each cell. This +% variant of HOG, refered to as FHOG, has been shown to achieve superior +% performance to the original HOG features. For details please refer to +% work by Felzenszwalb et al. (see link above). +% +% This function is essentially a wrapper for calls to gradientMag() +% and gradientHist(). Specifically, it is equivalent to the following: +% [M,O] = gradientMag( I,0,0,0,1 ); softBin = -1; useHog = 2; +% H = gradientHist(M,O,binSize,nOrients,softBin,useHog,clip); +% See gradientHist() for more general usage. +% +% This code requires SSE2 to compile and run (most modern Intel and AMD +% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2. +% +% USAGE +% H = fhog( I, [binSize], [nOrients], [clip], [crop] ) +% +% INPUTS +% I - [hxw] color or grayscale input image (must have type single) +% binSize - [8] spatial bin size +% nOrients - [9] number of orientation bins +% clip - [.2] value at which to clip histogram bins +% crop - [0] if true crop boundaries +% +% OUTPUTS +% H - [h/binSize w/binSize nOrients*3+5] computed hog features +% +% EXAMPLE +% I=imResample(single(imread('peppers.png'))/255,[480 640]); +% tic, for i=1:100, H=fhog(I,8,9); end; disp(100/toc) % >125 fps +% figure(1); im(I); V=hogDraw(H,25,1); figure(2); im(V) +% +% EXAMPLE +% % comparison to features.cc (requires DPM code release version 5) +% I=imResample(single(imread('peppers.png'))/255,[480 640]); Id=double(I); +% tic, for i=1:100, H1=features(Id,8); end; disp(100/toc) +% tic, for i=1:100, H2=fhog(I,8,9,.2,1); end; disp(100/toc) +% figure(1); montage2(H1); figure(2); montage2(H2); +% D=abs(H1-H2); mean(D(:)) +% +% See also hog, hogDraw, gradientHist +% +% Piotr's Computer Vision Matlab Toolbox Version 3.23 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +if( nargin<2 ), binSize=8; end +if( nargin<3 ), nOrients=9; end +if( nargin<4 ), clip=.2; end +if( nargin<5 ), crop=0; end + +softBin = -1; useHog = 2; b = binSize; +[M,O] = gradientMag( I,0,0,0,1 ); +H = gradientHist(M,O,binSize,nOrients,softBin,useHog,clip); +if( crop ), e=mod(size(I),b)= 0 +% or simply to the nearest orientation bin if softBin < 0. Next, spatial +% binning is performed by summing the pixels in each binSize x binSize +% region of each [hxw] orientation channel. If "softBin" is odd each pixel +% can contribute to multiple spatial bins (using bilinear interpolation), +% otherwise each pixel contributes to a single spatial bin. The result of +% these steps is a floor([h/binSize w/binSize nOrients]) feature map +% representing the gradient histograms in each image region. +% +% Parameter settings of particular interest: +% binSize=1: simply quantize the gradient magnitude into nOrients channels +% softBin=1, useHog=1, clip=.2: original HOG features (see hog.m) +% softBin=-1; useHog=2, clip=.2: FHOG features (see fhog.m) +% softBin=0, useHog=0: channels used in Dollar's BMVC09 paper +% +% This code requires SSE2 to compile and run (most modern Intel and AMD +% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2. +% +% USAGE +% H = gradientHist( M, O, [binSize,nOrients,softBin,useHog,clipHog,full] ) +% +% INPUTS +% M - [hxw] gradient magnitude at each location (see gradientMag.m) +% O - [hxw] gradient orientation in range defined by param flag +% binSize - [8] spatial bin size +% nOrients - [9] number of orientation bins +% softBin - [1] set soft binning (odd: spatial=soft, >=0: orient=soft) +% useHog - [0] 1: compute HOG (see hog.m), 2: compute FHOG (see fhog.m) +% clipHog - [.2] value at which to clip hog histogram bins +% full - [false] if true expects angles in [0,2*pi) else in [0,pi) +% +% OUTPUTS +% H - [w/binSize x h/binSize x nOrients] gradient histograms +% +% EXAMPLE +% I=rgbConvert(imread('peppers.png'),'gray'); [M,O]=gradientMag(I); +% H1=gradientHist(M,O,2,6,0); figure(1); montage2(H1); +% H2=gradientHist(M,O,2,6,1); figure(2); montage2(H2); +% +% See also gradientMag, gradient2, hog, fhog +% +% Piotr's Computer Vision Matlab Toolbox Version 3.23 +% Copyright 2014 Piotr Dollar & Ron Appel. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +H = gradientMex('gradientHist',M,O,varargin{:}); diff --git a/channels/gradientMag.m b/channels/gradientMag.m new file mode 100644 index 0000000..767a42c --- /dev/null +++ b/channels/gradientMag.m @@ -0,0 +1,53 @@ +function [M,O] = gradientMag( I, channel, normRad, normConst, full ) +% Compute gradient magnitude and orientation at each image location. +% +% If input image has k>1 channels and channel=0, keeps gradient with +% maximum magnitude (over all channels) at each location. Otherwise if +% channel is between 1 and k computes gradient for the given channel. +% If full==1 orientation is computed in [0,2*pi) else it is in [0,pi). +% +% If normRad>0, normalization is performed by first computing S, a smoothed +% version of the gradient magnitude, then setting: M = M./(S + normConst). +% S is computed by S = convTri( M, normRad ). +% +% This code requires SSE2 to compile and run (most modern Intel and AMD +% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2. +% +% USAGE +% [M,O] = gradientMag( I, [channel], [normRad], [normConst], [full] ) +% +% INPUTS +% I - [hxwxk] input k channel single image +% channel - [0] if>0 color channel to use for gradient computation +% normRad - [0] normalization radius (no normalization if 0) +% normConst - [.005] normalization constant +% full - [0] if true compute angles in [0,2*pi) else in [0,pi) +% +% OUTPUTS +% M - [hxw] gradient magnitude at each location +% O - [hxw] approximate gradient orientation modulo PI +% +% EXAMPLE +% I=rgbConvert(imread('peppers.png'),'gray'); +% [Gx,Gy]=gradient2(I); M=sqrt(Gx.^2+Gy.^2); O=atan2(Gy,Gx); +% full=0; [M1,O1]=gradientMag(I,0,0,0,full); +% D=abs(M-M1); mean2(D), if(full), o=pi*2; else o=pi; end +% D=abs(O-O1); D(~M)=0; D(D>o*.99)=o-D(D>o*.99); mean2(abs(D)) +% +% See also gradient, gradient2, gradientHist, convTri +% +% Piotr's Computer Vision Matlab Toolbox Version 3.23 +% Copyright 2014 Piotr Dollar & Ron Appel. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +if(nargin<1 || isempty(I)), M=single([]); O=M; return; end +if(nargin<2 || isempty(channel)), channel=0; end +if(nargin<3 || isempty(normRad)), normRad=0; end +if(nargin<4 || isempty(normConst)), normConst=.005; end +if(nargin<5 || isempty(full)), full=0; end + +if(nargout<=1), M=gradientMex('gradientMag',I,channel,full); +else [M,O]=gradientMex('gradientMag',I,channel,full); end + +if( normRad==0 ), return; end; S = convTri( M, normRad ); +gradientMex('gradientMagNorm',M,S,normConst); % operates on M diff --git a/channels/hog.m b/channels/hog.m new file mode 100644 index 0000000..d847165 --- /dev/null +++ b/channels/hog.m @@ -0,0 +1,75 @@ +function H = hog( I, binSize, nOrients, clip, crop ) +% Efficiently compute histogram of oriented gradient (HOG) features. +% +% Heavily optimized code to compute HOG features described in "Histograms +% of Oriented Gradients for Human Detection" by Dalal & Triggs, CVPR05. +% This function is made largely obsolete by fhog, see fhog.m for details. +% +% If I has dimensions [hxw], the size of the computed feature vector H is +% floor([h/binSize w/binSize nOrients*4]). For each binSize x binSize +% region, computes a histogram of gradients, with each gradient quantized +% by its angle and weighed by its magnitude. For color images, the gradient +% is computed separately for each color channel and the one with maximum +% magnitude is used. The centered gradient is used except at boundaries +% (where uncentered gradient is used). Trilinear interpolation is used to +% place each gradient in the appropriate spatial and orientation bin. +% +% For each resulting histogram (with nOrients bins), four different +% normalizations are computed using adjacent histograms, resulting in a +% nOrients*4 length feature vector for each region. To compute the +% normalizations, first for each block of adjacent 2x2 histograms we +% compute their L2 norm (over all 4*nOrient bins). Each histogram (except +% at boundaries) thus has 4 different normalization values associated with +% it. Each histogram bin is then normalized by each of the 4 different L2 +% norms, resulting in a 4 times expansion of the number of bins. Finally, +% any bin whose value is bigger than "clip" is set to "clip". +% +% The computed features are NOT identical to those described in the CVPR05 +% paper. Specifically, there is no Gaussian spatial window, and other minor +% details differ. The choices were made for speed of the resulting code: +% ~.008s for a 640x480x3 color image on a standard machine from 2011. +% +% This function is essentially a wrapper for calls to gradientMag() +% and gradientHist(). Specifically, it is equivalent to the following: +% [M,O] = gradientMag( I ); softBin = 1; useHog = 1; +% H = gradientHist(M,O,binSize,nOrients,softBin,useHog,clip); +% See gradientHist() for more general usage. +% +% This code requires SSE2 to compile and run (most modern Intel and AMD +% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2. +% +% USAGE +% H = hog( I, [binSize], [nOrients], [clip], [crop] ) +% +% INPUTS +% I - [hxw] color or grayscale input image (must have type single) +% binSize - [8] spatial bin size +% nOrients - [9] number of orientation bins +% clip - [.2] value at which to clip histogram bins +% crop - [0] if true crop boundaries +% +% OUTPUTS +% H - [h/binSize w/binSize nOrients*4] computed hog features +% +% EXAMPLE +% I=imResample(single(imread('peppers.png')),[480 640])/255; +% tic, for i=1:125, H=hog(I,8,9); end; toc % ~1s for 125 iterations +% figure(1); im(I); V=hogDraw(H,25); figure(2); im(V) +% +% See also hogDraw, gradientHist +% +% Piotr's Computer Vision Matlab Toolbox Version 3.23 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +if( nargin<2 ), binSize=8; end +if( nargin<3 ), nOrients=9; end +if( nargin<4 ), clip=.2; end +if( nargin<5 ), crop=0; end + +softBin = 1; useHog = 1; b = binSize; +[M,O] = gradientMag( I ); +H = gradientHist(M,O,binSize,nOrients,softBin,useHog,clip); +if( crop ), e=mod(size(I),b) +#include +#include "rgbConvertMex.cpp" +#include "imPadMex.cpp" +#include "convConst.cpp" +#include "imResampleMex.cpp" +#include "gradientMex.cpp" + +// compile and test standalone channels source code +int main(int argc, const char* argv[]) +{ + // initialize test array (misalign controls memory mis-alignment) + const int h=12, w=12, misalign=1; int x, y, d; + float I[h*w*3+misalign], *I0=I+misalign; + for( x=0; x +#include "sse.hpp" + +// convolve one column of I by a 2rx1 ones filter +void convBoxY( float *I, float *O, int h, int r, int s ) { + float t; int j, p=r+1, q=2*h-(r+1), h0=r+1, h1=h-r, h2=h; + t=0; for(j=0; j<=r; j++) t+=I[j]; t=2*t-I[r]; j=0; + if( s==1 ) { + for(; jh2) h0=h2; if(h1>h2) h1=h2; + for(; j 0) { + // initialize T + memset( T, 0, h1*sizeof(float) ); + for(i=0; i<=r; i++) for(j=0; j=w-r) Ir=I+(2*w-r-i-1)*h; + for(j=0; j= 2) ? 1 : 0, h2=(h-d)/2; + if( s==2 ) { + for( ; jh2) h0=h2; if(h1>h2) h1=h2; + if(++k==s) { k=0; *O++=u; } j=1; + for(;j 0) { + // initialize T and U + for(j=0; jw-r) Ir=I+(2*w-r-i)*h; + for( j=0; j0) Il-=h; if(ib ? a : b; + #define maxk(y0,y1) { O[y]=I[y0]; \ + for( yi=y0+1; yi<=y1; yi++ ) { if(I[yi]>O[y]) O[y]=I[yi]; }} + for( y=0; yh-1) y1=h-1; maxk(0,y1); } + for( ; y<=h-m-r; y+=m ) { + T[m-1] = I[y+r]; + for( yi=1; yiw-1 ) r=w-1; if( r>h-1 ) r=h-1; int m=2*r+1; + float *T=(float*) alMalloc(m*2*sizeof(float),16); + for( int d0=0; d0 1) mexErrMsgTxt("One output expected."); + nDims = mxGetNumberOfDimensions(prhs[1]); + id = mxGetClassID(prhs[1]); + ns = (int*) mxGetDimensions(prhs[1]); + d = (nDims == 3) ? ns[2] : 1; + m = (ns[0] < ns[1]) ? ns[0] : ns[1]; + if( (nDims!=2 && nDims!=3) || id!=mxSINGLE_CLASS || m<4 ) + mexErrMsgTxt("A must be a 4x4 or bigger 2D or 3D float array."); + + // extract inputs + if(mxGetString(prhs[0],type,1024)) + mexErrMsgTxt("Failed to get type."); + A = (float*) mxGetData(prhs[1]); + p = (float) mxGetScalar(prhs[2]); + r = (int) mxGetScalar(prhs[2]); + s = (int) mxGetScalar(prhs[3]); + if( s<1 ) mexErrMsgTxt("Invalid sampling value s"); + if( r<0 ) mexErrMsgTxt("Invalid radius r"); + + // create output array (w/o initializing to 0) + ms[0]=ns[0]/s; ms[1]=ns[1]/s; ms[2]=d; + B = (float*) mxMalloc(ms[0]*ms[1]*d*sizeof(float)); + plhs[0] = mxCreateNumericMatrix(0, 0, mxSINGLE_CLASS, mxREAL); + mxSetData(plhs[0], B); mxSetDimensions(plhs[0],(mwSize*)ms,nDims); + + // perform appropriate type of convolution + if(!strcmp(type,"convBox")) { + if(r>=m/2) mexErrMsgTxt("mask larger than image (r too large)"); + convBox( A, B, ns[0], ns[1], d, r, s ); + } else if(!strcmp(type,"convTri")) { + if(r>=m/2) mexErrMsgTxt("mask larger than image (r too large)"); + convTri( A, B, ns[0], ns[1], d, r, s ); + } else if(!strcmp(type,"conv11")) { + if( s>2 ) mexErrMsgTxt("conv11 can sample by at most s=2"); + conv11( A, B, ns[0], ns[1], d, r, s ); + } else if(!strcmp(type,"convTri1")) { + if( s>2 ) mexErrMsgTxt("convTri1 can sample by at most s=2"); + convTri1( A, B, ns[0], ns[1], d, p, s ); + } else if(!strcmp(type,"convMax")) { + if( s>1 ) mexErrMsgTxt("convMax cannot sample"); + convMax( A, B, ns[0], ns[1], d, r ); + } else { + mexErrMsgTxt("Invalid type."); + } +} +#endif diff --git a/channels/private/convConst.mexa64 b/channels/private/convConst.mexa64 new file mode 100644 index 0000000..c55cfa9 Binary files /dev/null and b/channels/private/convConst.mexa64 differ diff --git a/channels/private/convConst.mexmaci64 b/channels/private/convConst.mexmaci64 new file mode 100644 index 0000000..de3ad0c Binary files /dev/null and b/channels/private/convConst.mexmaci64 differ diff --git a/channels/private/convConst.mexw64 b/channels/private/convConst.mexw64 new file mode 100644 index 0000000..d91a5ed Binary files /dev/null and b/channels/private/convConst.mexw64 differ diff --git a/channels/private/gradientMex.cpp b/channels/private/gradientMex.cpp new file mode 100644 index 0000000..fc1349c --- /dev/null +++ b/channels/private/gradientMex.cpp @@ -0,0 +1,414 @@ +/******************************************************************************* +* Piotr's Computer Vision Matlab Toolbox Version 3.30 +* Copyright 2014 Piotr Dollar & Ron Appel. [pdollar-at-gmail.com] +* Licensed under the Simplified BSD License [see external/bsd.txt] +*******************************************************************************/ +#include "wrappers.hpp" +#include +#include "string.h" +#include "sse.hpp" + +#define PI 3.14159265f + +// compute x and y gradients for just one column (uses sse) +void grad1( float *I, float *Gx, float *Gy, int h, int w, int x ) { + int y, y1; float *Ip, *In, r; __m128 *_Ip, *_In, *_G, _r; + // compute column of Gx + Ip=I-h; In=I+h; r=.5f; + if(x==0) { r=1; Ip+=h; } else if(x==w-1) { r=1; In-=h; } + if( h<4 || h%4>0 || (size_t(I)&15) || (size_t(Gx)&15) ) { + for( y=0; yh-1) y1=h-1; + GRADY(1); Ip--; for(y=1; y PI-1e-6f ) a1[i]=PI-1e-6f; + init=true; return a1; +} + +// compute gradient magnitude and orientation at each location (uses sse) +void gradMag( float *I, float *M, float *O, int h, int w, int d, bool full ) { + int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; __m128 *_Gx, *_Gy, *_M2, _m; + float *acost = acosTable(), acMult=10000.0f; + // allocate memory for storing one column of output (padded so h4%4==0) + h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float); + M2=(float*) alMalloc(s,16); _M2=(__m128*) M2; + Gx=(float*) alMalloc(s,16); _Gx=(__m128*) Gx; + Gy=(float*) alMalloc(s,16); _Gy=(__m128*) Gy; + // compute gradient magnitude and orientation for each column + for( x=0; x=oMax) o0=0; O0[i]=o0; + o1=o0+nb; if(o1==oMax) o1=0; O1[i]=o1; + m=M[i]*norm; M1[i]=od*m; M0[i]=m-M1[i]; + } else for(; i=oMax) o0=0; O0[i]=o0; + M0[i]=M[i]*norm; M1[i]=0; O1[i]=0; + } +} + +// compute nOrients gradient histograms per bin x bin block of pixels +void gradHist( float *M, float *O, float *H, int h, int w, + int bin, int nOrients, int softBin, bool full ) +{ + const int hb=h/bin, wb=w/bin, h0=hb*bin, w0=wb*bin, nb=wb*hb; + const float s=(float)bin, sInv=1/s, sInv2=1/s/s; + float *H0, *H1, *M0, *M1; int x, y; int *O0, *O1; float xb, init; + O0=(int*)alMalloc(h*sizeof(int),16); M0=(float*) alMalloc(h*sizeof(float),16); + O1=(int*)alMalloc(h*sizeof(int),16); M1=(float*) alMalloc(h*sizeof(float),16); + // main loop + for( x=0; x=0); + + if( softBin<0 && softBin%2==0 ) { + // no interpolation w.r.t. either orienation or spatial bin + H1=H+(x/bin)*hb; + #define GH H1[O0[y]]+=M0[y]; y++; + if( bin==1 ) for(y=0; y=0; xb0 = hasLf?(int)xb:-1; hasRt = xb0 < wb-1; + xd=xb-xb0; xb+=sInv; yb=init; y=0; + // macros for code conciseness + #define GHinit yd=yb-yb0; yb+=sInv; H0=H+xb0*hb+yb0; xyd=xd*yd; \ + ms[0]=1-xd-yd+xyd; ms[1]=yd-xyd; ms[2]=xd-xyd; ms[3]=xyd; + #define GH(H,ma,mb) H1=H; STRu(*H1,ADD(LDu(*H1),MUL(ma,mb))); + // leading rows, no top bin + for( ; y=hb-1) break; GHinit; _m0=SET(M0[y]); + if(hasLf) { _m=SET(0,0,ms[1],ms[0]); GH(H0+O0[y],_m,_m0); } + if(hasRt) { _m=SET(0,0,ms[3],ms[2]); GH(H0+O0[y]+hb,_m,_m0); } + } else for( ; ; y++ ) { + yb0 = (int) yb; if(yb0>=hb-1) break; GHinit; + _m0=SET(M0[y]); _m1=SET(M1[y]); + if(hasLf) { _m=SET(0,0,ms[1],ms[0]); + GH(H0+O0[y],_m,_m0); GH(H0+O1[y],_m,_m1); } + if(hasRt) { _m=SET(0,0,ms[3],ms[2]); + GH(H0+O0[y]+hb,_m,_m0); GH(H0+O1[y]+hb,_m,_m1); } + } + // final rows, no bottom bin + for( ; yclip) t=clip; c++; + const float r=.2357f; int o, x, y, c; float t; + const int nb=wb*hb, nbo=nOrients*nb, hb1=hb+1; + for( o=0; onl1 ) mexErrMsgTxt("Incorrect number of outputs."); + if( nrnr1 ) mexErrMsgTxt("Incorrect number of inputs."); + nDims = mxGetNumberOfDimensions(pr[0]); dims = mxGetDimensions(pr[0]); + *h=dims[0]; *w=dims[1]; *d=(nDims==2) ? 1 : dims[2]; *I = mxGetPr(pr[0]); + if( nDims!=2 && nDims!=3 ) mexErrMsgTxt("I must be a 2D or 3D array."); + if( mxGetClassID(pr[0])!=id ) mexErrMsgTxt("I has incorrect type."); +} + +// [Gx,Gy] = grad2(I) - see gradient2.m +void mGrad2( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) { + int h, w, d; float *I, *Gx, *Gy; + checkArgs(nl,pl,nr,pr,1,2,1,1,&h,&w,&d,mxSINGLE_CLASS,(void**)&I); + if(h<2 || w<2) mexErrMsgTxt("I must be at least 2x2."); + pl[0]= mxCreateMatrix3( h, w, d, mxSINGLE_CLASS, 0, (void**) &Gx ); + pl[1]= mxCreateMatrix3( h, w, d, mxSINGLE_CLASS, 0, (void**) &Gy ); + grad2( I, Gx, Gy, h, w, d ); +} + +// [M,O] = gradMag( I, channel, full ) - see gradientMag.m +void mGradMag( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) { + int h, w, d, c, full; float *I, *M, *O=0; + checkArgs(nl,pl,nr,pr,1,2,3,3,&h,&w,&d,mxSINGLE_CLASS,(void**)&I); + if(h<2 || w<2) mexErrMsgTxt("I must be at least 2x2."); + c = (int) mxGetScalar(pr[1]); full = (int) mxGetScalar(pr[2]); + if( c>0 && c<=d ) { I += h*w*(c-1); d=1; } + pl[0] = mxCreateMatrix3(h,w,1,mxSINGLE_CLASS,0,(void**)&M); + if(nl>=2) pl[1] = mxCreateMatrix3(h,w,1,mxSINGLE_CLASS,0,(void**)&O); + gradMag(I, M, O, h, w, d, full>0 ); +} + +// gradMagNorm( M, S, norm ) - operates on M - see gradientMag.m +void mGradMagNorm( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) { + int h, w, d; float *M, *S, norm; + checkArgs(nl,pl,nr,pr,0,0,3,3,&h,&w,&d,mxSINGLE_CLASS,(void**)&M); + if( mxGetM(pr[1])!=h || mxGetN(pr[1])!=w || d!=1 || + mxGetClassID(pr[1])!=mxSINGLE_CLASS ) mexErrMsgTxt("M or S is bad."); + S = (float*) mxGetPr(pr[1]); norm = (float) mxGetScalar(pr[2]); + gradMagNorm(M,S,h,w,norm); +} + +// H=gradHist(M,O,[...]) - see gradientHist.m +void mGradHist( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) { + int h, w, d, hb, wb, nChns, binSize, nOrients, softBin, useHog; + bool full; float *M, *O, *H, clipHog; + checkArgs(nl,pl,nr,pr,1,3,2,8,&h,&w,&d,mxSINGLE_CLASS,(void**)&M); + O = (float*) mxGetPr(pr[1]); + if( mxGetM(pr[1])!=h || mxGetN(pr[1])!=w || d!=1 || + mxGetClassID(pr[1])!=mxSINGLE_CLASS ) mexErrMsgTxt("M or O is bad."); + binSize = (nr>=3) ? (int) mxGetScalar(pr[2]) : 8; + nOrients = (nr>=4) ? (int) mxGetScalar(pr[3]) : 9; + softBin = (nr>=5) ? (int) mxGetScalar(pr[4]) : 1; + useHog = (nr>=6) ? (int) mxGetScalar(pr[5]) : 0; + clipHog = (nr>=7) ? (float) mxGetScalar(pr[6]) : 0.2f; + full = (nr>=8) ? (bool) (mxGetScalar(pr[7])>0) : false; + hb = h/binSize; wb = w/binSize; + nChns = useHog== 0 ? nOrients : (useHog==1 ? nOrients*4 : nOrients*3+5); + pl[0] = mxCreateMatrix3(hb,wb,nChns,mxSINGLE_CLASS,1,(void**)&H); + if( nOrients==0 ) return; + if( useHog==0 ) { + gradHist( M, O, H, h, w, binSize, nOrients, softBin, full ); + } else if(useHog==1) { + hog( M, O, H, h, w, binSize, nOrients, softBin, full, clipHog ); + } else { + fhog( M, O, H, h, w, binSize, nOrients, softBin, clipHog ); + } +} + +// inteface to various gradient functions (see corresponding Matlab functions) +void mexFunction( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) { + int f; char action[1024]; f=mxGetString(pr[0],action,1024); nr--; pr++; + if(f) mexErrMsgTxt("Failed to get action."); + else if(!strcmp(action,"gradient2")) mGrad2(nl,pl,nr,pr); + else if(!strcmp(action,"gradientMag")) mGradMag(nl,pl,nr,pr); + else if(!strcmp(action,"gradientMagNorm")) mGradMagNorm(nl,pl,nr,pr); + else if(!strcmp(action,"gradientHist")) mGradHist(nl,pl,nr,pr); + else mexErrMsgTxt("Invalid action."); +} +#endif diff --git a/channels/private/gradientMex.mexa64 b/channels/private/gradientMex.mexa64 new file mode 100644 index 0000000..3ca9460 Binary files /dev/null and b/channels/private/gradientMex.mexa64 differ diff --git a/channels/private/gradientMex.mexmaci64 b/channels/private/gradientMex.mexmaci64 new file mode 100644 index 0000000..498874b Binary files /dev/null and b/channels/private/gradientMex.mexmaci64 differ diff --git a/channels/private/gradientMex.mexw64 b/channels/private/gradientMex.mexw64 new file mode 100644 index 0000000..8daf014 Binary files /dev/null and b/channels/private/gradientMex.mexw64 differ diff --git a/channels/private/gradientMexNew.mexmaci64 b/channels/private/gradientMexNew.mexmaci64 new file mode 100644 index 0000000..498874b Binary files /dev/null and b/channels/private/gradientMexNew.mexmaci64 differ diff --git a/channels/private/imPadMex.cpp b/channels/private/imPadMex.cpp new file mode 100644 index 0000000..a8363e3 --- /dev/null +++ b/channels/private/imPadMex.cpp @@ -0,0 +1,123 @@ +/******************************************************************************* +* Piotr's Computer Vision Matlab Toolbox Version 3.00 +* Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +* Licensed under the Simplified BSD License [see external/bsd.txt] +*******************************************************************************/ +#include "wrappers.hpp" +#include "string.h" +typedef unsigned char uchar; + +// pad A by [pt,pb,pl,pr] and store result in B +template void imPad( T *A, T *B, int h, int w, int d, int pt, int pb, + int pl, int pr, int flag, T val ) +{ + int h1=h+pt, hb=h1+pb, w1=w+pl, wb=w1+pr, x, y, z, mPad; + int ct=0, cb=0, cl=0, cr=0; + if(pt<0) { ct=-pt; pt=0; } if(pb<0) { h1+=pb; cb=-pb; pb=0; } + if(pl<0) { cl=-pl; pl=0; } if(pr<0) { w1+=pr; cr=-pr; pr=0; } + int *xs, *ys; x=pr>pl?pr:pl; y=pt>pb?pt:pb; mPad=x>y?x:y; + bool useLookup = ((flag==2 || flag==3) && (mPad>h || mPad>w)) + || (flag==3 && (ct || cb || cl || cr )); + // helper macro for padding + #define PAD(XL,XM,XR,YT,YM,YB) \ + for(x=0; x1 ) mexErrMsgTxt("One output expected."); + nDims=mxGetNumberOfDimensions(prhs[0]); id=mxGetClassID(prhs[0]); + ns = (int*) mxGetDimensions(prhs[0]); nCh=(nDims==2) ? 1 : ns[2]; + if( (nDims!=2 && nDims!=3) || + (id!=mxSINGLE_CLASS && id!=mxDOUBLE_CLASS && id!=mxUINT8_CLASS) ) + mexErrMsgTxt("A should be 2D or 3D single, double or uint8 array."); + if( !mxIsDouble(prhs[1]) ) mexErrMsgTxt("Input pad must be a double array."); + + // extract padding amounts + k = (int) mxGetNumberOfElements(prhs[1]); + p = (double*) mxGetData(prhs[1]); + if(k==1) { pt=pb=pl=pr=int(p[0]); } + else if (k==2) { pt=pb=int(p[0]); pl=pr=int(p[1]); } + else if (k==4) { pt=int(p[0]); pb=int(p[1]); pl=int(p[2]); pr=int(p[3]); } + else mexErrMsgTxt( "Input pad must have 1, 2, or 4 values."); + + // figure out padding type (flag and val) + if( !mxGetString(prhs[2],type,1024) ) { + if(!strcmp(type,"replicate")) flag=1; + else if(!strcmp(type,"symmetric")) flag=2; + else if(!strcmp(type,"circular")) flag=3; + else mexErrMsgTxt("Invalid pad value."); + } else { + flag=0; val=(double)mxGetScalar(prhs[2]); + } + if( ns[0]==0 || ns[1]==0 ) flag=0; + + // create output array + ms[0]=ns[0]+pt+pb; ms[1]=ns[1]+pl+pr; ms[2]=nCh; + if( ms[0]<0 || ns[0]<=-pt || ns[0]<=-pb ) ms[0]=0; + if( ms[1]<0 || ns[1]<=-pl || ns[1]<=-pr ) ms[1]=0; + plhs[0] = mxCreateNumericArray(3, (const mwSize*) ms, id, mxREAL); + if( ms[0]==0 || ms[1]==0 ) return; + + // pad array + A=mxGetData(prhs[0]); B=mxGetData(plhs[0]); + if( id==mxDOUBLE_CLASS ) { + imPad( (double*)A,(double*)B,ns[0],ns[1],nCh,pt,pb,pl,pr,flag,val ); + } else if( id==mxSINGLE_CLASS ) { + imPad( (float*)A,(float*)B,ns[0],ns[1],nCh,pt,pb,pl,pr,flag,float(val) ); + } else if( id==mxUINT8_CLASS ) { + imPad( (uchar*)A,(uchar*)B,ns[0],ns[1],nCh,pt,pb,pl,pr,flag,uchar(val) ); + } else { + mexErrMsgTxt("Unsupported image type."); + } +} +#endif diff --git a/channels/private/imPadMex.mexa64 b/channels/private/imPadMex.mexa64 new file mode 100644 index 0000000..2554fc1 Binary files /dev/null and b/channels/private/imPadMex.mexa64 differ diff --git a/channels/private/imPadMex.mexmaci64 b/channels/private/imPadMex.mexmaci64 new file mode 100644 index 0000000..af07722 Binary files /dev/null and b/channels/private/imPadMex.mexmaci64 differ diff --git a/channels/private/imPadMex.mexw64 b/channels/private/imPadMex.mexw64 new file mode 100644 index 0000000..63dfeca Binary files /dev/null and b/channels/private/imPadMex.mexw64 differ diff --git a/channels/private/imResampleMex.cpp b/channels/private/imResampleMex.cpp new file mode 100644 index 0000000..8d23eeb --- /dev/null +++ b/channels/private/imResampleMex.cpp @@ -0,0 +1,170 @@ +/******************************************************************************* +* Piotr's Computer Vision Matlab Toolbox Version 3.00 +* Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +* Licensed under the Simplified BSD License [see external/bsd.txt] +*******************************************************************************/ +#include "wrappers.hpp" +#include "string.h" +#include +#include +#include "sse.hpp" +typedef unsigned char uchar; + +// compute interpolation values for single column for resapling +template void resampleCoef( int ha, int hb, int &n, int *&yas, + int *&ybs, T *&wts, int bd[2], int pad=0 ) +{ + const T s = T(hb)/T(ha), sInv = 1/s; T wt, wt0=T(1e-3)*s; + bool ds=ha>hb; int nMax; bd[0]=bd[1]=0; + if(ds) { n=0; nMax=ha+(pad>2 ? pad : 2)*hb; } else { n=nMax=hb; } + // initialize memory + wts = (T*)alMalloc(nMax*sizeof(T),16); + yas = (int*)alMalloc(nMax*sizeof(int),16); + ybs = (int*)alMalloc(nMax*sizeof(int),16); + if( ds ) for( int yb=0; ybwt0 && ya>=0) { ybs[n]=yb; yas[n]=ya; wts[n]=wt; n++; n1++; W+=wt; } + } + if(W>1) for( int i=0; ibd[0]) bd[0]=n1; + while( n1=0 && ya=ha-1) { ya=ha-1; bd[1]++; } + ybs[yb]=yb; yas[yb]=ya; wts[yb]=wt; + } +} + +// resample A using bilinear interpolation and and store result in B +template +void resample( T *A, T *B, int ha, int hb, int wa, int wb, int d, T r ) { + int hn, wn, x, x1, y, z, xa, xb, ya; T *A0, *A1, *A2, *A3, *B0, wt, wt1; + T *C = (T*) alMalloc((ha+4)*sizeof(T),16); for(y=ha; y( wa, wb, wn, xas, xbs, xwts, xbd, 0 ); + resampleCoef( ha, hb, hn, yas, ybs, ywts, ybd, 4 ); + if( wa==2*wb ) r/=2; if( wa==3*wb ) r/=3; if( wa==4*wb ) r/=4; + r/=T(1+1e-6); for( y=0; y C) + #define FORs(X) if(sse) for(; ywb ) { + int m=1; while( x1+m=4) { FORs(ADD(U(0),U(1),U(2),U(3))); FORr(V(0)+V(1)+V(2)+V(3)); } + #undef U + #undef V + for( int x0=4; x0=wb-xbd[1]; x1++; + if(xBd) memcpy(C,A0,ha*sizeof(T)); + if(!xBd) FORs(ADD(MUL(LDu(Af0[y]),SET(wtf)),MUL(LDu(Af1[y]),SET(wt1f)))); + if(!xBd) FORr( A0[y]*wt + A1[y]*wt1 ); + } + #undef FORs + #undef FORr + // resample along y direction (B -> C) + if( ha==hb*2 ) { + T r2 = r/2; int k=((~((size_t) B0) + 1) & 15)/4; y=0; + for( ; yhb ) { + y=0; + //if( sse && ybd[0]<=4 ) for(; y4) for(; y1 ) mexErrMsgTxt("One output expected."); + nDims=mxGetNumberOfDimensions(prhs[0]); id=mxGetClassID(prhs[0]); + ns = (int*) mxGetDimensions(prhs[0]); nCh=(nDims==2) ? 1 : ns[2]; + if( (nDims!=2 && nDims!=3) || + (id!=mxSINGLE_CLASS && id!=mxDOUBLE_CLASS && id!=mxUINT8_CLASS) ) + mexErrMsgTxt("A should be 2D or 3D single, double or uint8 array."); + ms[0]=(int)mxGetScalar(prhs[1]); ms[1]=(int)mxGetScalar(prhs[2]); ms[2]=nCh; + if( ms[0]<=0 || ms[1]<=0 ) mexErrMsgTxt("downsampling factor too small."); + nrm=(double)mxGetScalar(prhs[3]); + + // create output array + plhs[0] = mxCreateNumericArray(3, (const mwSize*) ms, id, mxREAL); + n=ns[0]*ns[1]*nCh; m=ms[0]*ms[1]*nCh; + + // perform resampling (w appropriate type) + A=mxGetData(prhs[0]); B=mxGetData(plhs[0]); + if( id==mxDOUBLE_CLASS ) { + resample((double*)A, (double*)B, ns[0], ms[0], ns[1], ms[1], nCh, nrm); + } else if( id==mxSINGLE_CLASS ) { + resample((float*)A, (float*)B, ns[0], ms[0], ns[1], ms[1], nCh, float(nrm)); + } else if( id==mxUINT8_CLASS ) { + float *A1 = (float*) mxMalloc(n*sizeof(float)); + float *B1 = (float*) mxCalloc(m,sizeof(float)); + for(int i=0; i +#include +#include "sse.hpp" + +// Constants for rgb2luv conversion and lookup table for y-> l conversion +template oT* rgb2luv_setup( oT z, oT *mr, oT *mg, oT *mb, + oT &minu, oT &minv, oT &un, oT &vn ) +{ + // set constants for conversion + const oT y0=(oT) ((6.0/29)*(6.0/29)*(6.0/29)); + const oT a= (oT) ((29.0/3)*(29.0/3)*(29.0/3)); + un=(oT) 0.197833; vn=(oT) 0.468331; + mr[0]=(oT) 0.430574*z; mr[1]=(oT) 0.222015*z; mr[2]=(oT) 0.020183*z; + mg[0]=(oT) 0.341550*z; mg[1]=(oT) 0.706655*z; mg[2]=(oT) 0.129553*z; + mb[0]=(oT) 0.178325*z; mb[1]=(oT) 0.071330*z; mb[2]=(oT) 0.939180*z; + oT maxi=(oT) 1.0/270; minu=-88*maxi; minv=-134*maxi; + // build (padded) lookup table for y->l conversion assuming y in [0,1] + static oT lTable[1064]; static bool lInit=false; + if( lInit ) return lTable; oT y, l; + for(int i=0; i<1025; i++) { + y = (oT) (i/1024.0); + l = y>y0 ? 116*(oT)pow((double)y,1.0/3.0)-16 : y*a; + lTable[i] = l*maxi; + } + for(int i=1025; i<1064; i++) lTable[i]=lTable[i-1]; + lInit = true; return lTable; +} + +// Convert from rgb to luv +template void rgb2luv( iT *I, oT *J, int n, oT nrm ) { + oT minu, minv, un, vn, mr[3], mg[3], mb[3]; + oT *lTable = rgb2luv_setup(nrm,mr,mg,mb,minu,minv,un,vn); + oT *L=J, *U=L+n, *V=U+n; iT *R=I, *G=R+n, *B=G+n; + for( int i=0; i void rgb2luv_sse( iT *I, float *J, int n, float nrm ) { + const int k=256; float R[k], G[k], B[k]; + if( (size_t(R)&15||size_t(G)&15||size_t(B)&15||size_t(I)&15||size_t(J)&15) + || n%4>0 ) { rgb2luv(I,J,n,nrm); return; } + int i=0, i1, n1; float minu, minv, un, vn, mr[3], mg[3], mb[3]; + float *lTable = rgb2luv_setup(nrm,mr,mg,mb,minu,minv,un,vn); + while( in) n1=n; float *J1=J+i; float *R1, *G1, *B1; + // convert to floats (and load input into cache) + if( typeid(iT) != typeid(float) ) { + R1=R; G1=G; B1=B; iT *Ri=I+i, *Gi=Ri+n, *Bi=Gi+n; + for( i1=0; i1<(n1-i); i1++ ) { + R1[i1] = (float) *Ri++; G1[i1] = (float) *Gi++; B1[i1] = (float) *Bi++; + } + } else { R1=((float*)I)+i; G1=R1+n; B1=G1+n; } + // compute RGB -> XYZ + for( int j=0; j<3; j++ ) { + __m128 _mr, _mg, _mb, *_J=(__m128*) (J1+j*n); + __m128 *_R=(__m128*) R1, *_G=(__m128*) G1, *_B=(__m128*) B1; + _mr=SET(mr[j]); _mg=SET(mg[j]); _mb=SET(mb[j]); + for( i1=i; i1 LUV (without doing L lookup/normalization) + __m128 _c15, _c3, _cEps, _c52, _c117, _c1024, _cun, _cvn; + _c15=SET(15.0f); _c3=SET(3.0f); _cEps=SET(1e-35f); + _c52=SET(52.0f); _c117=SET(117.0f), _c1024=SET(1024.0f); + _cun=SET(13*un); _cvn=SET(13*vn); + __m128 *_X, *_Y, *_Z, _x, _y, _z; + _X=(__m128*) J1; _Y=(__m128*) (J1+n); _Z=(__m128*) (J1+2*n); + for( i1=i; i1 void rgb2hsv( iT *I, oT *J, int n, oT nrm ) { + oT *H=J, *S=H+n, *V=S+n; + iT *R=I, *G=R+n, *B=G+n; + for(int i=0; i=g && r>=b ) { + maxv = r; minv = g=6) h-=6; + } else if( g>=r && g>=b ) { + maxv = g; minv = r void rgb2gray( iT *I, oT *J, int n, oT nrm ) { + oT *GR=J; iT *R=I, *G=R+n, *B=G+n; int i; + oT mr=(oT).2989360213*nrm, mg=(oT).5870430745*nrm, mb=(oT).1140209043*nrm; + for(i=0; i void rgb2gray( double *I, float *J, int n, float nrm ) { + float *GR=J; double *R=I, *G=R+n, *B=G+n; int i; + double mr=.2989360213*nrm, mg=.5870430745*nrm, mb=.1140209043*nrm; + for(i=0; i void normalize( iT *I, oT *J, int n, oT nrm ) { + for(int i=0; i +oT* rgbConvert( iT *I, int n, int d, int flag, oT nrm ) { + oT *J = (oT*) wrMalloc(n*(flag==0 ? (d==1?1:d/3) : d)*sizeof(oT)); + int i, n1=d*(n<1000?n/10:100); oT thr = oT(1.001); + if(flag>1 && nrm==1) for(i=0; ithr) + wrError("For floats all values in I must be smaller than 1."); + bool useSse = n%4==0 && typeid(oT)==typeid(float); + if( flag==2 && useSse ) + for(i=0; i1 ) mexErrMsgTxt("One output expected."); + dims = (const int*) mxGetDimensions(pr[0]); n=dims[0]*dims[1]; + nDims = mxGetNumberOfDimensions(pr[0]); + d = 1; for( int i=2; i0); + idIn = mxGetClassID(pr[0]); + + // call rgbConvert() based on type of input and output array + if(!((d==1 && flag==0) || flag==1 || (d/3)*3==d)) + mexErrMsgTxt("I must have third dimension d==1 or (d/3)*3==d."); + if( idIn == mxSINGLE_CLASS && !single ) + J = (void*) rgbConvert( (float*) I, n, d, flag, 1.0 ); + else if( idIn == mxSINGLE_CLASS && single ) + J = (void*) rgbConvert( (float*) I, n, d, flag, 1.0f ); + else if( idIn == mxDOUBLE_CLASS && !single ) + J = (void*) rgbConvert( (double*) I, n, d, flag, 1.0 ); + else if( idIn == mxDOUBLE_CLASS && single ) + J = (void*) rgbConvert( (double*) I, n, d, flag, 1.0f ); + else if( idIn == mxUINT8_CLASS && !single ) + J = (void*) rgbConvert( (unsigned char*) I, n, d, flag, 1.0/255 ); + else if( idIn == mxUINT8_CLASS && single ) + J = (void*) rgbConvert( (unsigned char*) I, n, d, flag, 1.0f/255 ); + else + mexErrMsgTxt("Unsupported image type."); + + // create and set output array + dims1[0]=dims[0]; dims1[1]=dims[1]; dims1[2]=(flag==0 ? (d==1?1:d/3) : d); + idOut = single ? mxSINGLE_CLASS : mxDOUBLE_CLASS; + pl[0] = mxCreateNumericMatrix(0,0,idOut,mxREAL); + mxSetData(pl[0],J); mxSetDimensions(pl[0],(const mwSize*) dims1,3); +} +#endif diff --git a/channels/private/rgbConvertMex.mexa64 b/channels/private/rgbConvertMex.mexa64 new file mode 100644 index 0000000..f19e6cf Binary files /dev/null and b/channels/private/rgbConvertMex.mexa64 differ diff --git a/channels/private/rgbConvertMex.mexmaci64 b/channels/private/rgbConvertMex.mexmaci64 new file mode 100644 index 0000000..1ce7dda Binary files /dev/null and b/channels/private/rgbConvertMex.mexmaci64 differ diff --git a/channels/private/rgbConvertMex.mexw64 b/channels/private/rgbConvertMex.mexw64 new file mode 100644 index 0000000..08c8833 Binary files /dev/null and b/channels/private/rgbConvertMex.mexw64 differ diff --git a/channels/private/sse.hpp b/channels/private/sse.hpp new file mode 100644 index 0000000..e3d60f2 --- /dev/null +++ b/channels/private/sse.hpp @@ -0,0 +1,62 @@ +/******************************************************************************* +* Piotr's Computer Vision Matlab Toolbox Version 3.23 +* Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +* Licensed under the Simplified BSD License [see external/bsd.txt] +*******************************************************************************/ +#ifndef _SSE_HPP_ +#define _SSE_HPP_ +#include // SSE2:, SSE3:, SSE4: + +#define RETf inline __m128 +#define RETi inline __m128i + +// set, load and store values +RETf SET( const float &x ) { return _mm_set1_ps(x); } +RETf SET( float x, float y, float z, float w ) { return _mm_set_ps(x,y,z,w); } +RETi SET( const int &x ) { return _mm_set1_epi32(x); } +RETf LD( const float &x ) { return _mm_load_ps(&x); } +RETf LDu( const float &x ) { return _mm_loadu_ps(&x); } +RETf STR( float &x, const __m128 y ) { _mm_store_ps(&x,y); return y; } +RETf STR1( float &x, const __m128 y ) { _mm_store_ss(&x,y); return y; } +RETf STRu( float &x, const __m128 y ) { _mm_storeu_ps(&x,y); return y; } +RETf STR( float &x, const float y ) { return STR(x,SET(y)); } + +// arithmetic operators +RETi ADD( const __m128i x, const __m128i y ) { return _mm_add_epi32(x,y); } +RETf ADD( const __m128 x, const __m128 y ) { return _mm_add_ps(x,y); } +RETf ADD( const __m128 x, const __m128 y, const __m128 z ) { + return ADD(ADD(x,y),z); } +RETf ADD( const __m128 a, const __m128 b, const __m128 c, const __m128 &d ) { + return ADD(ADD(ADD(a,b),c),d); } +RETf SUB( const __m128 x, const __m128 y ) { return _mm_sub_ps(x,y); } +RETf MUL( const __m128 x, const __m128 y ) { return _mm_mul_ps(x,y); } +RETf MUL( const __m128 x, const float y ) { return MUL(x,SET(y)); } +RETf MUL( const float x, const __m128 y ) { return MUL(SET(x),y); } +RETf INC( __m128 &x, const __m128 y ) { return x = ADD(x,y); } +RETf INC( float &x, const __m128 y ) { __m128 t=ADD(LD(x),y); return STR(x,t); } +RETf DEC( __m128 &x, const __m128 y ) { return x = SUB(x,y); } +RETf DEC( float &x, const __m128 y ) { __m128 t=SUB(LD(x),y); return STR(x,t); } +RETf MIN( const __m128 x, const __m128 y ) { return _mm_min_ps(x,y); } +RETf RCP( const __m128 x ) { return _mm_rcp_ps(x); } +RETf RCPSQRT( const __m128 x ) { return _mm_rsqrt_ps(x); } + +// logical operators +RETf AND( const __m128 x, const __m128 y ) { return _mm_and_ps(x,y); } +RETi AND( const __m128i x, const __m128i y ) { return _mm_and_si128(x,y); } +RETf ANDNOT( const __m128 x, const __m128 y ) { return _mm_andnot_ps(x,y); } +RETf OR( const __m128 x, const __m128 y ) { return _mm_or_ps(x,y); } +RETf XOR( const __m128 x, const __m128 y ) { return _mm_xor_ps(x,y); } + +// comparison operators +RETf CMPGT( const __m128 x, const __m128 y ) { return _mm_cmpgt_ps(x,y); } +RETf CMPLT( const __m128 x, const __m128 y ) { return _mm_cmplt_ps(x,y); } +RETi CMPGT( const __m128i x, const __m128i y ) { return _mm_cmpgt_epi32(x,y); } +RETi CMPLT( const __m128i x, const __m128i y ) { return _mm_cmplt_epi32(x,y); } + +// conversion operators +RETf CVT( const __m128i x ) { return _mm_cvtepi32_ps(x); } +RETi CVT( const __m128 x ) { return _mm_cvttps_epi32(x); } + +#undef RETf +#undef RETi +#endif diff --git a/channels/private/wrappers.hpp b/channels/private/wrappers.hpp new file mode 100644 index 0000000..3009281 --- /dev/null +++ b/channels/private/wrappers.hpp @@ -0,0 +1,42 @@ +/******************************************************************************* +* Piotr's Computer Vision Matlab Toolbox Version 3.00 +* Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +* Licensed under the Simplified BSD License [see external/bsd.txt] +*******************************************************************************/ +#ifndef _WRAPPERS_HPP_ +#define _WRAPPERS_HPP_ +#ifdef MATLAB_MEX_FILE + +// wrapper functions if compiling from Matlab +#include "mex.h" +inline void wrError(const char *errormsg) { mexErrMsgTxt(errormsg); } +inline void* wrCalloc( size_t num, size_t size ) { return mxCalloc(num,size); } +inline void* wrMalloc( size_t size ) { return mxMalloc(size); } +inline void wrFree( void * ptr ) { mxFree(ptr); } + +#else + +// wrapper functions if compiling from C/C++ +inline void wrError(const char *errormsg) { throw errormsg; } +inline void* wrCalloc( size_t num, size_t size ) { return calloc(num,size); } +inline void* wrMalloc( size_t size ) { return malloc(size); } +inline void wrFree( void * ptr ) { free(ptr); } + +#endif + +// platform independent aligned memory allocation (see also alFree) +void* alMalloc( size_t size, int alignment ) { + const size_t pSize = sizeof(void*), a = alignment-1; + void *raw = wrMalloc(size + a + pSize); + void *aligned = (void*) (((size_t) raw + pSize + a) & ~a); + *(void**) ((size_t) aligned-pSize) = raw; + return aligned; +} + +// platform independent alignned memory de-allocation (see also alMalloc) +void alFree(void* aligned) { + void* raw = *(void**)((char*)aligned-sizeof(void*)); + wrFree(raw); +} + +#endif diff --git a/channels/rgbConvert.m b/channels/rgbConvert.m new file mode 100644 index 0000000..f08b534 --- /dev/null +++ b/channels/rgbConvert.m @@ -0,0 +1,80 @@ +function J = rgbConvert( I, colorSpace, useSingle ) +% Convert RGB image to other color spaces (highly optimized). +% +% If colorSpace=='gray' transforms I to grayscale. The output is within +% numerical error of Matlab's rgb2gray, except ~10x faster. The output in +% this case is hxwx1, and while the input must be hxwx3 for all other +% cases, the input for this case can also be hxwx1 (normalization only). +% +% If colorSpace=='hsv' transforms I to the HSV color space. The output is +% within numerical error of Matlab's rgb2hsv, except ~15x faster. +% +% If colorSpace=='rgb' or colorSpace='orig' only normalizes I to be in the +% range [0,1]. In this case both the input and output may have an arbitrary +% number of channels (that is I may be [hxwxd] for any d). +% +% If colorSpace=='luv' transforms I to the LUV color space. The LUV color +% space is "perceptually uniform" (meaning that two colors equally distant +% in the color space according to the Euclidean metric are equally distant +% perceptually). The L,u,v channels correspond roughly to luminance, +% green-red, blue-yellow. For more information see: +% http://en.wikipedia.org/wiki/CIELUV - using this color spaces +% http://en.wikipedia.org/wiki/CIELAB - more info about color spaces +% The LUV channels are normalized to fall in ~[0,1]. Without normalization +% the ranges are L~[0,100], u~[-88,182], and v~[-134,105] (and typically +% u,v~[-100,100]). The applied transformation is L=L/270, u=(u+88)/270, and +% v=(v+134)/270. This results in ranges L~[0,.37], u~[0,1], and v~[0,.89]. +% Perceptual uniformity is maintained since divisor is constant +% (normalizing each color channel independently would break uniformity). +% To undo the normalization on an LUV image J use: +% J=J*270; J(:,:,2)=J(:,:,2)-88; J(:,:,3)=J(:,:,3)-134; +% To test the range of the colorSpace use: +% R=100; I=zeros(R^3,1,3); k=1; R=linspace(0,1,R); +% for r=R, for g=R, for b=R, I(k,1,:)=[r g b]; k=k+1; end; end; end +% J=rgbConvert(I,'luv'); [min(J), max(J)] +% +% This code requires SSE2 to compile and run (most modern Intel and AMD +% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2. +% +% USAGE +% J = rgbConvert( I, colorSpace, [useSingle] ); +% +% INPUTS +% I - [hxwx3] input rgb image (uint8 or single/double in [0,1]) +% colorSpace - ['luv'] other choices include: 'gray', 'hsv', 'rgb', 'orig' +% useSingle - [true] determines output type (faster if useSingle) +% +% OUTPUTS +% J - [hxwx3] single or double output image (normalized to [0,1]) +% +% EXAMPLE - luv +% I = imread('peppers.png'); +% tic, J = rgbConvert( I, 'luv' ); toc +% figure(1); montage2( J ); +% +% EXAMPLE - hsv +% I=imread('peppers.png'); +% tic, J1=rgb2hsv( I ); toc +% tic, J2=rgbConvert( I, 'hsv' ); toc +% mean2(abs(J1-J2)) +% +% EXAMPLE - gray +% I=imread('peppers.png'); +% tic, J1=rgb2gray( I ); toc +% tic, J2=rgbConvert( I, 'gray' ); toc +% J1=single(J1)/255; mean2(abs(J1-J2)) +% +% See also rgb2hsv, rgb2gray +% +% Piotr's Computer Vision Matlab Toolbox Version 3.02 +% Copyright 2014 Piotr Dollar & Ron Appel. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +if(nargin<3 || isempty(useSingle)), useSingle=true; end +flag = find(strcmpi(colorSpace,{'gray','rgb','luv','hsv','orig'}))-1; +if(isempty(flag)), error('unknown colorSpace: %s',colorSpace); end +if(useSingle), outClass='single'; else outClass='double'; end +if(isempty(I) && flag>0 && flag~=4), I=I(:,:,[1 1 1]); end +d=size(I,3); if(flag==4), flag=1; end; norm=(d==1 && flag==0) || flag==1; +if( norm && isa(I,outClass) ), J=I; return; end +J=rgbConvertMex(I,flag,useSingle); diff --git a/classify/Contents.m b/classify/Contents.m new file mode 100644 index 0000000..5be11df --- /dev/null +++ b/classify/Contents.m @@ -0,0 +1,46 @@ +% CLASSIFY +% See also +% +% Clustering: +% demoCluster - Clustering demo. +% demoGenData - Generate data drawn form a mixture of Gaussians. +% kmeans2 - Fast version of kmeans clustering. +% meanShift - meanShift clustering algorithm. +% meanShiftIm - Applies the meanShift algorithm to a joint spatial/range image. +% meanShiftImExplore - Visualization to help choose sigmas for meanShiftIm. +% +% Calculating distances efficiently: +% distMatrixShow - Useful visualization of a distance matrix of clustered points. +% pdist2 - Calculates the distance between sets of vectors. +% softMin - Calculates the softMin of a vector. +% +% Principal components analysis: +% pca - Principal components analysis (alternative to princomp). +% pcaApply - Companion function to pca. +% pcaRandVec - Generate random vectors in PCA subspace. +% pcaVisualize - Visualization of quality of approximation of X given principal comp. +% visualizeData - Project high dim. data unto principal components (PCA) for visualization. +% +% Confusion matrix display: +% confMatrix - Generates a confusion matrix according to true and predicted data labels. +% confMatrixShow - Used to display a confusion matrix. +% +% Radial Basis Functions (RBFs): +% rbfComputeBasis - Get locations and sizes of radial basis functions for use in rbf network. +% rbfComputeFtrs - Evaluate features of X given a set of radial basis functions. +% rbfDemo - Demonstration of rbf networks for regression. +% +% Fast random fern/forest classification/regression code: +% fernsClfApply - Apply learned fern classifier. +% fernsClfTrain - Train random fern classifier. +% fernsInds - Compute indices for each input by each fern. +% fernsRegApply - Apply learned fern regressor. +% fernsRegTrain - Train boosted fern regressor. +% forestApply - Apply learned forest classifier. +% forestTrain - Train random forest classifier. +% +% Fast boosted decision tree code: +% adaBoostTrain - Train boosted decision tree classifier. +% adaBoostApply - Apply learned boosted decision tree classifier. +% binaryTreeTrain - Train binary decision tree classifier. +% binaryTreeApply - Apply learned binary decision tree classifier. diff --git a/classify/adaBoostApply.m b/classify/adaBoostApply.m new file mode 100644 index 0000000..691070e --- /dev/null +++ b/classify/adaBoostApply.m @@ -0,0 +1,36 @@ +function hs = adaBoostApply( X, model, maxDepth, minWeight, nThreads ) +% Apply learned boosted decision tree classifier. +% +% USAGE +% hs = adaBoostApply( X, model, [maxDepth], [minWeight], [nThreads] ) +% +% INPUTS +% X - [NxF] N length F feature vectors +% model - learned boosted tree classifier +% maxDepth - [] maximum depth of tree +% minWeight - [] minimum sample weigth to allow split +% nThreads - [16] max number of computational threads to use +% +% OUTPUTS +% hs - [Nx1] predicted output log ratios +% +% EXAMPLE +% +% See also adaBoostTrain +% +% Piotr's Computer Vision Matlab Toolbox Version 3.40 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +if(nargin<3 || isempty(maxDepth)), maxDepth=0; end +if(nargin<4 || isempty(minWeight)), minWeight=0; end +if(nargin<5 || isempty(nThreads)), nThreads=16; end +if(maxDepth>0), model.child(model.depth>=maxDepth) = 0; end +if(minWeight>0), model.child(model.weights<=minWeight) = 0; end +nWeak=size(model.fids,2); N=size(X,1); hs=zeros(N,1); nt=nThreads; +for i=1:nWeak + ids = forestInds(X,model.thrs(:,i),model.fids(:,i),model.child(:,i),nt); + hs = hs + model.hs(ids,i); +end + +end diff --git a/classify/adaBoostTrain.m b/classify/adaBoostTrain.m new file mode 100644 index 0000000..4acb0e7 --- /dev/null +++ b/classify/adaBoostTrain.m @@ -0,0 +1,111 @@ +function model = adaBoostTrain( X0, X1, varargin ) +% Train boosted decision tree classifier. +% +% Heavily optimized code for training Discrete or Real AdaBoost where the +% weak classifiers are decision trees. With multi-core support enabled (see +% binaryTreeTrain.m), boosting 256 depth-2 trees over 5,000 features and +% 5,000 data points takes under 5 seconds, see example below. Most of the +% training time is spent in binaryTreeTrain.m. +% +% For more information on how to quickly boost decision trees see: +% [1] R. Appel, T. Fuchs, P. Dollár, P. Perona; "Quickly Boosting +% Decision Trees – Pruning Underachieving Features Early," ICML 2013. +% The code here implements a simple brute-force strategy with the option to +% sample features used for training each node for additional speedups. +% Further gains using the ideas from the ICML paper are possible. If you +% use this code please consider citing our ICML paper. +% +% USAGE +% model = adaBoostTrain( X0, X1, [pBoost] ) +% +% INPUTS +% X0 - [N0xF] negative feature vectors +% X1 - [N1xF] positive feature vectors +% pBoost - additional params (struct or name/value pairs) +% .pTree - ['REQ'] parameters for binaryTreeTrain +% .nWeak - [128] number of trees to learn +% .discrete - [1] train Discrete-AdaBoost or Real-AdaBoost +% .verbose - [0] if true print status information +% +% OUTPUTS +% model - learned boosted tree classifier w the following fields +% .fids - [K x nWeak] feature ids for each node +% .thrs - [K x nWeak] threshold corresponding to each fid +% .child - [K x nWeak] index of child for each node (1-indexed) +% .hs - [K x nWeak] log ratio (.5*log(p/(1-p)) at each node +% .weights - [K x nWeak] total sample weight at each node +% .depth - [K x nWeak] depth of each node +% .errs - [1 x nWeak] error for each tree (for debugging) +% .losses - [1 x nWeak] loss after every iteration (for debugging) +% .treeDepth - depth of all leaf nodes (or 0 if leaf depth varies) +% +% EXAMPLE +% % output should be: 'Testing err=0.0145 fp=0.0165 fn=0.0125' +% N=5000; F=5000; sep=.01; RandStream.getGlobalStream.reset(); +% [xTrn,hTrn,xTst,hTst]=demoGenData(N,N,2,F/10,sep,.5,0); +% xTrn=repmat(single(xTrn),[1 10]); xTst=repmat(single(xTst),[1 10]); +% pBoost=struct('nWeak',256,'verbose',16,'pTree',struct('maxDepth',2)); +% model = adaBoostTrain( xTrn(hTrn==1,:), xTrn(hTrn==2,:), pBoost ); +% fp = mean(adaBoostApply( xTst(hTst==1,:), model )>0); +% fn = mean(adaBoostApply( xTst(hTst==2,:), model )<0); +% fprintf('Testing err=%.4f fp=%.4f fn=%.4f\n',(fp+fn)/2,fp,fn); +% +% See also adaBoostApply, binaryTreeTrain, demoGenData +% +% Piotr's Computer Vision Matlab Toolbox Version 3.21 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +% get additional parameters +dfs={ 'pTree','REQ', 'nWeak',128, 'discrete',1, 'verbose',0 }; +[pTree,nWeak,discrete,verbose]=getPrmDflt(varargin,dfs,1); +nThreads=[]; if(isfield(pTree,'nThreads')), nThreads=pTree.nThreads; end + +% main loop +[N0,F]=size(X0); [N1,F1]=size(X1); assert(F==F1); +msg='Training AdaBoost: nWeak=%3i nFtrs=%i pos=%i neg=%i\n'; +if(verbose), fprintf(msg,nWeak,F,N1,N0); start=clock; end +data=struct('X0',X0,'X1',X1); +H0=zeros(N0,1); H1=zeros(N1,1); +losses=zeros(1,nWeak); errs=losses; +for i=1:nWeak + % train tree and classify each example + [tree,data,err]=binaryTreeTrain(data,pTree); + if(discrete), tree.hs=(tree.hs>0)*2-1; end + h0 = binaryTreeApply(X0,tree,[],[],nThreads); + h1 = binaryTreeApply(X1,tree,[],[],nThreads); + % compute alpha and incorporate directly into tree model + alpha=1; if(discrete), alpha=max(-5,min(5,.5*log((1-err)/err))); end + if(verbose && alpha<=0), nWeak=i-1; disp(' stopping early'); break; end + tree.hs=tree.hs*alpha; + % update cumulative scores H and weights + H0=H0+h0*alpha; data.wts0=exp( H0)/N0/2; + H1=H1+h1*alpha; data.wts1=exp(-H1)/N1/2; + loss=sum(data.wts0)+sum(data.wts1); + if(i==1), trees=repmat(tree,nWeak,1); end + trees(i)=tree; errs(i)=err; losses(i)=loss; + msg=' i=%4i alpha=%.3f err=%.3f loss=%.2e\n'; + if(mod(i,verbose)==0), fprintf(msg,i,alpha,err,loss); end + if(verbose && loss<1e-40), nWeak=i; disp(' stopping early'); break; end +end + +% create output model struct +k=0; for i=1:nWeak, k=max(k,size(trees(i).fids,1)); end +Z = @(type) zeros(k,nWeak,type); +model=struct( 'fids',Z('uint32'), 'thrs',Z(data.xType), ... + 'child',Z('uint32'), 'hs',Z('single'), 'weights',Z('single'), ... + 'depth',Z('uint32'), 'errs',errs, 'losses',losses ); +for i=1:nWeak, T=trees(i); k=size(T.fids,1); + model.fids(1:k,i)=T.fids; model.thrs(1:k,i)=T.thrs; + model.child(1:k,i)=T.child; model.hs(1:k,i)=T.hs; + model.weights(1:k,i)=T.weights; model.depth(1:k,i)=T.depth; +end +depth = max(model.depth(:)); +model.treeDepth = depth * uint32(all(model.depth(~model.child)==depth)); + +% output info to log +msg='Done training err=%.4f fp=%.4f fn=%.4f (t=%.1fs).\n'; +if(verbose), fp=mean(H0>0); fn=mean(H1<0); + fprintf(msg,(fp+fn)/2,fp,fn,etime(clock,start)); end + +end diff --git a/classify/binaryTreeApply.m b/classify/binaryTreeApply.m new file mode 100644 index 0000000..e8d597d --- /dev/null +++ b/classify/binaryTreeApply.m @@ -0,0 +1,32 @@ +function hs = binaryTreeApply( X, tree, maxDepth, minWeight, nThreads ) +% Apply learned binary decision tree classifier. +% +% USAGE +% hs = binaryTreeApply( X, tree, [maxDepth], [minWeight], [nThreads] ) +% +% INPUTS +% X - [NxF] N length F feature vectors +% tree - learned tree classification model +% maxDepth - [] maximum depth of tree +% minWeight - [] minimum sample weigth to allow split +% nThreads - [16] max number of computational threads to use +% +% OUTPUTS +% hs - [Nx1] predicted output log ratios +% +% EXAMPLE +% +% See also binaryTreeTrain +% +% Piotr's Computer Vision Matlab Toolbox Version 3.40 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +if(nargin<3 || isempty(maxDepth)), maxDepth=0; end +if(nargin<4 || isempty(minWeight)), minWeight=0; end +if(nargin<5 || isempty(nThreads)), nThreads=16; end +if(maxDepth>0), tree.child(tree.depth>=maxDepth) = 0; end +if(minWeight>0), tree.child(tree.weights<=minWeight) = 0; end +hs = tree.hs(forestInds(X,tree.thrs,tree.fids,tree.child,nThreads)); + +end diff --git a/classify/binaryTreeTrain.m b/classify/binaryTreeTrain.m new file mode 100644 index 0000000..f7af397 --- /dev/null +++ b/classify/binaryTreeTrain.m @@ -0,0 +1,125 @@ +function [tree,data,err] = binaryTreeTrain( data, varargin ) +% Train binary decision tree classifier. +% +% Highly optimized code for training decision trees over binary variables. +% Training a decision stump (depth=1) over 5000 features and 10000 training +% examples takes 70ms on a single core machine and *7ms* with 12 cores and +% OpenMP enabled (OpenMP is enabled by default, see toolboxCompile). This +% code shares similarities with forestTrain.m but is optimized for binary +% labels. Moreover, while forestTrain is meant for training random decision +% forests, this code is tuned for use with boosting (see adaBoostTrain.m). +% +% For more information on how to quickly boost decision trees see: +% [1] R. Appel, T. Fuchs, P. Dollár, P. Perona; "Quickly Boosting +% Decision Trees – Pruning Underachieving Features Early," ICML 2013. +% The code here implements a simple brute-force strategy with the option to +% sample features used for training each node for additional speedups. +% Further gains using the ideas from the ICML paper are possible. If you +% use this code please consider citing our ICML paper. +% +% During training each feature is quantized to lie between [0,nBins-1], +% where nBins<=256. Quantization is expensive and should be performed just +% once if training multiple trees. Note that the second output of the +% algorithm is the quantized data, this can be reused in future training. +% +% USAGE +% [tree,data,err] = binaryTreeTrain( data, [pTree] ) +% +% INPUTS +% data - data for training tree +% .X0 - [N0xF] negative feature vectors +% .X1 - [N1xF] positive feature vectors +% .wts0 - [N0x1] negative weights +% .wts1 - [N1x1] positive weights +% .xMin - [1xF] optional vals defining feature quantization +% .xStep - [1xF] optional vals defining feature quantization +% .xType - [] optional original data type for features +% pTree - additional params (struct or name/value pairs) +% .nBins - [256] maximum number of quanizaton bins (<=256) +% .maxDepth - [1] maximum depth of tree +% .minWeight - [.01] minimum sample weigth to allow split +% .fracFtrs - [1] fraction of features to sample for each node split +% .nThreads - [16] max number of computational threads to use +% +% OUTPUTS +% tree - learned decision tree model struct w the following fields +% .fids - [Kx1] feature ids for each node +% .thrs - [Kx1] threshold corresponding to each fid +% .child - [Kx1] index of child for each node (1-indexed) +% .hs - [Kx1] log ratio (.5*log(p/(1-p)) at each node +% .weights - [Kx1] total sample weight at each node +% .depth - [Kx1] depth of each node +% data - data used for training tree (quantized version of input) +% err - decision tree training error +% +% EXAMPLE +% +% See also binaryTreeApply, adaBoostTrain, forestTrain +% +% Piotr's Computer Vision Matlab Toolbox Version 3.40 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +% get parameters +dfs={'nBins',256,'maxDepth',1,'minWeight',.01,'fracFtrs',1,'nThreads',16}; +[nBins,maxDepth,minWeight,fracFtrs,nThreads]=getPrmDflt(varargin,dfs,1); +assert(nBins<=256); + +% get data and normalize weights +dfs={ 'X0','REQ', 'X1','REQ', 'wts0',[], 'wts1',[], ... + 'xMin',[], 'xStep',[], 'xType',[] }; +[X0,X1,wts0,wts1,xMin,xStep,xType]=getPrmDflt(data,dfs,1); +[N0,F]=size(X0); [N1,F1]=size(X1); assert(F==F1); +if(isempty(xType)), xMin=zeros(1,F); xStep=ones(1,F); xType=class(X0); end +assert(isfloat(wts0)); if(isempty(wts0)), wts0=ones(N0,1)/N0; end +assert(isfloat(wts1)); if(isempty(wts1)), wts1=ones(N1,1)/N1; end +w=sum(wts0)+sum(wts1); if(abs(w-1)>1e-3), wts0=wts0/w; wts1=wts1/w; end + +% quantize data to be between [0,nBins-1] if not already quantized +if( ~isa(X0,'uint8') || ~isa(X1,'uint8') ) + xMin = min(min(X0),min(X1))-.01; + xMax = max(max(X0),max(X1))+.01; + xStep = (xMax-xMin) / (nBins-1); + X0 = uint8(bsxfun(@times,bsxfun(@minus,X0,xMin),1./xStep)); + X1 = uint8(bsxfun(@times,bsxfun(@minus,X1,xMin),1./xStep)); +end +data=struct( 'X0',X0, 'X1',X1, 'wts0',wts0, 'wts1',wts1, ... + 'xMin',xMin, 'xStep',xStep, 'xType',xType ); + +% train decision tree classifier +K=2*(N0+N1); thrs=zeros(K,1,xType); +hs=zeros(K,1,'single'); weights=hs; errs=hs; +fids=zeros(K,1,'uint32'); child=fids; depth=fids; +wtsAll0=cell(K,1); wtsAll0{1}=wts0; +wtsAll1=cell(K,1); wtsAll1{1}=wts1; k=1; K=2; +while( k < K ) + % get node weights and prior + wts0=wtsAll0{k}; wtsAll0{k}=[]; w0=sum(wts0); + wts1=wtsAll1{k}; wtsAll1{k}=[]; w1=sum(wts1); + w=w0+w1; prior=w1/w; weights(k)=w; errs(k)=min(prior,1-prior); + hs(k)=max(-4,min(4,.5*log(prior/(1-prior)))); + % if nearly pure node or insufficient data don't train split + if( prior<1e-3||prior>1-1e-3||depth(k)>=maxDepth||w=3), err=sum(errs(1:K).*tree.weights.*(tree.child==0)); end + +end diff --git a/classify/binaryTreeTrain1.cpp b/classify/binaryTreeTrain1.cpp new file mode 100644 index 0000000..7c7cbf8 --- /dev/null +++ b/classify/binaryTreeTrain1.cpp @@ -0,0 +1,73 @@ +/******************************************************************************* +* Piotr's Computer Vision Matlab Toolbox Version 3.24 +* Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +* Licensed under the Simplified BSD License [see external/bsd.txt] +*******************************************************************************/ +#include +#ifdef USEOMP +#include +#endif + +typedef unsigned char uint8; +typedef unsigned int uint32; +#define min(x,y) ((x) < (y) ? (x) : (y)) + +// construct cdf given data vector and wts +void constructCdf( uint8* data, float *wts, int nBins, + int N, int M, uint32 *ord, float *cdf ) +{ + int i; for( i=0; ie1) { e0=1-e; e1=e; thr=i; } + } + errs[f]=e0; thrs[f]=(uint8) thr; + } +} diff --git a/classify/binaryTreeTrain1.mexa64 b/classify/binaryTreeTrain1.mexa64 new file mode 100644 index 0000000..18caced Binary files /dev/null and b/classify/binaryTreeTrain1.mexa64 differ diff --git a/classify/binaryTreeTrain1.mexmaci64 b/classify/binaryTreeTrain1.mexmaci64 new file mode 100644 index 0000000..858cad6 Binary files /dev/null and b/classify/binaryTreeTrain1.mexmaci64 differ diff --git a/classify/binaryTreeTrain1.mexw64 b/classify/binaryTreeTrain1.mexw64 new file mode 100644 index 0000000..ffa42c5 Binary files /dev/null and b/classify/binaryTreeTrain1.mexw64 differ diff --git a/classify/confMatrix.m b/classify/confMatrix.m new file mode 100644 index 0000000..0098979 --- /dev/null +++ b/classify/confMatrix.m @@ -0,0 +1,61 @@ +function CM = confMatrix( IDXtrue, IDXpred, ntypes ) +% Generates a confusion matrix according to true and predicted data labels. +% +% CM(i,j) denotes the number of elements of class i that were given label +% j. In other words, each row i contains the predictions for elements whos +% actual class was i. If IDXpred is perfect, then CM is a diagonal matrix +% with CM(i,i) equal to the number of instances of class i. +% +% To normalize CM to [0,1], divide each row by sum of that row: +% CMnorm = CM ./ repmat( sum(CM,2), [1 size(CM,2)] ); +% +% USAGE +% CM = confMatrix( IDXtrue, IDXpred, ntypes ) +% +% INPUTS +% IDXtrue - [nx1] array of true labels [int values in 1-ntypes] +% IDXpred - [nx1] array of predicted labels [int values in 1-ntypes] +% ntypes - maximum number of types (should be > max(IDX)) +% +% OUTPUTS +% CM - ntypes x ntypes confusion array with integer values +% +% EXAMPLE +% IDXtrue = [ones(1,25) ones(1,25)*2]; +% IDXpred = [ones(1,10) randint2(1,30,[1 2]) ones(1,10)*2]; +% CM = confMatrix( IDXtrue, IDXpred, 2 ) +% confMatrixShow( CM, {'class-A','class-B'}, {'FontSize',20} ) +% +% See also CONFMATRIXSHOW +% +% Piotr's Computer Vision Matlab Toolbox Version 2.12 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +IDXtrue=IDXtrue(:); IDXpred=IDXpred(:); + +%%% convert common binary labels [-1/+1] or [0/1] to [1/2] +if( ntypes==2 ) + IDX = [IDXtrue;IDXpred]; + if( min(IDX)>=-1 && max(IDX)<=1 && all(IDX~=0)) + IDXtrue=IDXtrue+2; IDXpred=IDXpred+2; + IDXtrue(IDXtrue==3) = 2; IDXpred(IDXpred==3) = 2; + elseif( min(IDX)>=0 && max(IDX)<=1 ) + IDXtrue=IDXtrue+1; IDXpred=IDXpred+1; + end +end + +%%% error check +[IDXtrue,er] = checkNumArgs( IDXtrue, [], 0, 2 ); error(er); +[IDXpred,er] = checkNumArgs( IDXpred, [], 0, 2 ); error(er); +if( length(IDXtrue)~=length(IDXpred) ) + error('Lengths of IDXs must match up.'); end +if( max([IDXtrue;IDXpred])>ntypes ) + error(['ntypes = ' int2str(ntypes) ' not large enough']); end + +%%% generate CM +CM = zeros(ntypes); +for i=1:ntypes + vals = IDXpred( IDXtrue==i ); + for j=1:ntypes; CM(i,j) = sum(vals==j); end +end diff --git a/classify/confMatrixShow.m b/classify/confMatrixShow.m new file mode 100644 index 0000000..e06383c --- /dev/null +++ b/classify/confMatrixShow.m @@ -0,0 +1,53 @@ +function confMatrixShow( CM, types, pvPairs, nDigits, showCnts ) +% Used to display a confusion matrix. +% +% See confMatrix for general format and info on confusion matricies. This +% function normalizes the CM before displaying, hence all values range in +% [0,1] and rows sum to 1. +% +% USAGE +% confMatrixShow( CM, [types], [pvPairs], [nDigits], [showCnts] ) +% +% INPUTS +% CM - [nTypes x nTypes] confusion array -- see confMatrix +% types - [] cell array of length nTypes of text labels +% pvPairs - [{'FontSize',20}] parameter / value list for text.m +% nDigits - [2] number of digits after decimal to display +% showCnts - [0] show total count per row to the right +% +% OUTPUTS +% +% EXAMPLE +% CM = randint2(6,6,[1,100])+eye(6)*500; +% types = { 'anger','disgust','fear','joy','sadness','surprise' }; +% confMatrixShow( CM, types, {'FontSize',20}, [], 0 ) +% title('confusion matrix','FontSize',24); +% +% See also confMatrix, imLabel, dispMatrixIm +% +% Piotr's Computer Vision Matlab Toolbox Version 2.50 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +if( nargin<2 ); types=[]; end +if( nargin<3 || isempty(pvPairs)); pvPairs = {'FontSize',20}; end +if( nargin<4 || isempty(nDigits)); nDigits=2; end +if( nargin<5 || isempty(showCnts)); showCnts=0; end +if( nDigits<1 || nDigits>10 ); error('too few or too many digits'); end +if( any(CM(:)<0) ); error( 'CM must have non-negative entries' ); end + +% normalize and round appropriately +cnts = sum(CM,2); +CM = CM ./ repmat( cnts+eps, [1 size(CM,2)] ); +CM = round(CM*10^nDigits) / 10^nDigits; + +% display as image using dispMatrixIm +dispMatrixIm(CM,'maxM',1,'maxLen',nDigits+1,'show0',0,... + 'fStr','%f','invert',1,'pvPairs',pvPairs); axis square; + +% now add type labels +if( ~isempty(types) ) + imLabel( types, 'left', 0, pvPairs ); + imLabel( types, 'bottom', -35, pvPairs ); + if(showCnts), imLabel(int2str2(cnts),'right',0,pvPairs); end +end diff --git a/classify/demoCluster.m b/classify/demoCluster.m new file mode 100644 index 0000000..f625a8f --- /dev/null +++ b/classify/demoCluster.m @@ -0,0 +1,53 @@ +% Clustering demo. +% +% Used to test different clustering algorithms on 2D and 3D mixture of +% gaussian data. Alter demo by edititing this file. +% +% USAGE +% demoCluster +% +% INPUTS +% +% OUTPUTS +% +% EXAMPLE +% demoCluster +% +% See also KMEANS2, MEANSHIFT +% +% Piotr's Computer Vision Matlab Toolbox Version 2.0 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +%%% generate data +if(1) % mixture of gaussians -- see demoGenData + kTr = 5; sep = 3; ecc = 3; nFracTr = 0.1; nPnts = 1000; d = 2; + [X,IDXtr] = demoGenData(nPnts,0,kTr,d,sep,ecc,nFracTr); +else + % two parallel clusters - kmeans will fail + kTr = 2; nPnts = 200; sep = 4; + X = [([5 0; 0 .5] * randn(2,nPnts) + sep/2)' ; ... + ([5 0; 0 .5] * randn(2,nPnts) - sep/2)' ] / 5; + IDXtr = [ones(1,nPnts) 2*ones(1,nPnts)]; + nFracTr=0; +end; +nFrac = nFracTr; k = kTr; + +%%% cluster +switch 'kmeans2' + case 'kmeans2' + prm.nTrial=4; prm.display=1; prm.outFrac=nFrac; + [IDX,C,sumd] = kmeans2( X, k, prm ); + case 'meanShift' + %(X,radius,rate,maxiter,minCsize,blur) + [IDX,C] = meanShift( X, .4, .2, 100 , 10, 0 ); +end + +%%% show data & clustering results +figure(1); clf; d2 = min(d,3); +subplot(2,2,1); visualizeData(X, d2); title('orig points'); +if(~isempty(IDXtr)) + subplot(2,2,2); visualizeData(X, d2, IDXtr); title('true clusters'); +end; +subplot(2,2,3); visualizeData(X, d2, IDX, [], C); title('rec clusters'); +subplot(2,2,4); D=distMatrixShow(sqrt(pdist2(X,X)),IDX,0); im(D); diff --git a/classify/demoGenData.m b/classify/demoGenData.m new file mode 100644 index 0000000..e3b3138 --- /dev/null +++ b/classify/demoGenData.m @@ -0,0 +1,82 @@ +function [X0,H0,X1,H1] = demoGenData1(n0,n1,k,d,sep,ecc,frc) +% Generate data drawn form a mixture of Gaussians. +% +% For definitions of separation and eccentricity see: +% Sanjoy Dasgupta, "Learning Mixtures of Gaussians", FOCS, 1999. +% http://cseweb.ucsd.edu/~dasgupta/papers/mog.pdf +% +% USAGE +% [X0,H0,X1,H1] = demoGenData(n0,n1,k,d,sep,ecc,[frc]) +% +% INPUTS +% n0 - size of training set +% n1 - size of testing set +% k - number of mixture components +% d - data dimension +% sep - minimum separation degree between clusters (sep > 0) +% ecc - maximum eccentricity of clusters (0 < ecc < 1) +% frc - [0] frac of points that are noise (uniformly distributed) +% +% OUTPUTS +% X0 - [n0xd] training set data vectors +% H0 - [n0x1] cluster membership in [1,k] (and -1 for noise) +% X1 - [n1xd] testing set data vectors +% H1 - [n1x1] cluster membership in [1,k] (and -1 for noise) +% +% EXAMPLE +% n0=1000; k=5; d=2; sep=2; ecc=1; frc=0; +% [X0,H0,X1,H1] = demoGenData(n0,n0,k,d,sep,ecc,frc); +% figure(1); clf; visualizeData( X0, 2, H0 ); title('train'); +% figure(2); clf; visualizeData( X1, 2, H1 ); title('test'); +% +% See also visualizeData, demoCluster +% +% Piotr's Computer Vision Matlab Toolbox Version 3.20 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +% generate mixing weights and adjust n0 and n1 for noise fraction +w=0; while(any(w<=1/(4*k))), w=rand(k,1); w=w/sum(w); end +if( nargin<7 ), frc=0; end; frc=max(0,min(frc,1)); +n=floor(frc*n0); n0=n0-n; ns0=[ceil(n0*w); n]; +n=floor(frc*n1); n1=n1-n; ns1=[ceil(n1*w); n]; + +% create sep-separated Gaussian clusters of maximum eccentricity ecc +for trial=1:1000 + lam = ones(k,1)/1000; + n0=sum(ns0); X0=zeros(n0,d); H0=zeros(n0,1); n0=0; + n1=sum(ns1); X1=zeros(n1,d); H1=zeros(n1,1); n1=0; + mu = randn(k,d)*sqrt(k)*sqrt(sep)*trial/10; + for i = 1:k + % generate a random covariance matrix S=C'*C + U=rand(d,d)-0.5; U=sqrtm(inv(U*U'))*U; + L=diag(rand(d,1)*(ecc-1)+1).^2/100; C=chol(U*L*U'); + % populate X0, H0 + n=ns0(i); X0j=randn(n,d)*C + mu(ones(n,1)*i,:); + H0(n0+1:n0+n)=i; X0(n0+1:n0+n,:)=X0j; n0=n0+n; + if(n>1), lam(i) = sqrt(trace(cov(X0j))); end + % populate X1, H1 + n=ns1(i); X1j=randn(n,d)*C + mu(ones(n,1)*i,:); + H1(n1+1:n1+n)=i; X1(n1+1:n1+n,:)=X1j; n1=n1+n; + end + % check that degree of separation is sufficient (see Dasgupta 99) + % use "lam=sqrt(trace(S))" instead of "lam=sqrt(eigs(S,1))*d" + S = pdist2(mu,mu,'euclidean'); S(eye(k)>0)=inf; + for i=1:k, for j=1:k, S(i,j)=S(i,j)/max(lam(i),lam(j)); end; end + if(all(S(:)>=sep)), break; end +end; assert(trial<1000); + +% add uniformly distributed noise and permute order +if( frc>0 ) + v=max(abs(X0(:))); if(n1), v=max(v,max(abs(X1(:)))); end + % populate X0, H0 + n=ns0(k+1); X0j=(rand(n,d)-.5)*v*2.5; + H0(n0+1:n0+n)=-1; X0(n0+1:n0+n,:)=X0j; + n0=n0+n; p=randperm(n0); X0=X0(p,:); H0=H0(p); + % populate X1, H1 + n=ns1(k+1); X1j=(rand(n,d)-.5)*v*2.5; + H1(n1+1:n1+n)=-1; X1(n1+1:n1+n,:)=X1j; + n1=n1+n; p=randperm(n1); X1=X1(p,:); H1=H1(p); +end + +end diff --git a/classify/distMatrixShow.m b/classify/distMatrixShow.m new file mode 100644 index 0000000..7890f35 --- /dev/null +++ b/classify/distMatrixShow.m @@ -0,0 +1,73 @@ +function [D, Dsm] = distMatrixShow( D, IDX, show ) +% Useful visualization of a distance matrix of clustered points. +% +% D is sorted into k blocks, where the ith block contains all the points in +% cluster i. When D is displayed the blocks are shown explicitly. Hence +% for a good clustering (under a spherical gaussian assumption) the +% 'diagonal' blocks ought to be mostly dark, and all other block ought to be +% relatively white. One can thus quickly visualize the quality of the +% clustering, or even how clusterable the points are. Outliers (according +% to IDX) are removed from D. +% +% USAGE +% [D, Dsm] = distMatrixShow( D, IDX, [show] ) +% +% INPUTS +% D - nxn distance matrix +% IDX - cluster membership [see kmeans2.m] +% show - [1] will display results in figure(show) +% +% OUTPUTS +% D - sorted nxn distance matrix +% Dsm - sorted and smoothed nxn distance matrix +% +% EXAMPLE +% % not the best example since points are already ordered +% [X,IDX] = demoGenData(100,0,5,2,10,2,0); +% distMatrixShow( pdist2(X,X), IDX ); +% +% See also VISUALIZEDATA, KMEANS2 +% +% Piotr's Computer Vision Matlab Toolbox Version 2.0 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +if( nargin<3 || isempty(show) ); show=1; end + +k = max(IDX); +n = size(D,1); + +%%% remove outliers from D and IDX +inliers = IDX>0; +D = D( inliers, inliers ); +IDX = IDX( inliers ); + +%%% get order of points and rearrange D and IDX +order = IDX2order( IDX ); +IDX = IDX( order ); +D = D( order, order ); + +%%% compute smoothed version of D +cnts = zeros(1,k); for i=1:k; cnts(i)=sum(IDX==i); end +cumCnts = cumsum(cnts); cumCnts2=[0 cumCnts]; Dsm = D; +inds = 1:k; inds = inds( cnts>0 ); +for i=inds + rs = cumCnts2(i)+1:cumCnts2(i+1); + for j=inds + cs = cumCnts2(j)+1:cumCnts2(j+1); + ds = D( rs, cs ); + Dsm( rs, cs ) = mean(ds(:)); + end; +end; + +%%% show D and lines seperating super clusters. +if(show) + figure(show); clf; + subplot(1,2,1); im(D); hold('on') + for i=1:k-1 + line( [.5,n+.5], [cumCnts(i)+.5,cumCnts(i)+.5] ); + line( [cumCnts(i)+.5,cumCnts(i)+.5], [.5,n+.5] ); + end; + hold('off'); + subplot(1,2,2); im( Dsm ); +end diff --git a/classify/fernsClfApply.m b/classify/fernsClfApply.m new file mode 100644 index 0000000..71e6a0f --- /dev/null +++ b/classify/fernsClfApply.m @@ -0,0 +1,28 @@ +function [hs,probs] = fernsClfApply( data, ferns, inds ) +% Apply learned fern classifier. +% +% USAGE +% [hs,probs] = fernsClfApply( data, ferns, [inds] ) +% +% INPUTS +% data - [NxF] N length F binary feature vectors +% ferns - learned fern classification model +% inds - [NxM] cached inds (from previous call to fernsInds) +% +% OUTPUTS +% hs - [Nx1] predicted output labels +% probs - [NxH] predicted output label probabilities +% +% EXAMPLE +% +% See also fernsClfTrain, fernsInds +% +% Piotr's Computer Vision Matlab Toolbox Version 2.50 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] +if( nargin<3 || isempty(inds) ) + inds = fernsInds(data,ferns.fids,ferns.thrs); end +[N,M]=size(inds); H=ferns.H; probs=zeros(N,H); +for m=1:M, probs = probs + ferns.pFern(inds(:,m),:,m); end +if(ferns.bayes==0), probs=probs/M; end; [~,hs]=max(probs,[],2); +end diff --git a/classify/fernsClfTrain.m b/classify/fernsClfTrain.m new file mode 100644 index 0000000..094a02c --- /dev/null +++ b/classify/fernsClfTrain.m @@ -0,0 +1,92 @@ +function [ferns,hsPr] = fernsClfTrain( data, hs, varargin ) +% Train random fern classifier. +% +% See "Fast Keypoint Recognition in Ten Lines of Code" by Mustafa Ozuysal, +% Pascal Fua and Vincent Lepetit, CVPR07. +% +% Dimensions: +% M - number ferns +% S - fern depth +% F - number features +% N - number input vectors +% H - number classes +% +% USAGE +% [ferns,hsPr] = fernsClfTrain( data, hs, [varargin] ) +% +% INPUTS +% data - [NxF] N length F feature vectors +% hs - [Nx1] target output labels in [1,H] +% varargin - additional params (struct or name/value pairs) +% .S - [10] fern depth (ferns are exponential in S) +% .M - [50] number of ferns to train +% .thrr - [0 1] range for randomly generated thresholds +% .bayes - [1] if true combine probs using bayes assumption +% .ferns - [] if given reuse previous ferns (recompute pFern) +% +% OUTPUTS +% ferns - learned fern model w the following fields +% .fids - [MxS] feature ids for each fern for each depth +% .thrs - [MxS] threshold corresponding to each fid +% .pFern - [2^SxHxM] learned log probs at fern leaves +% .bayes - if true combine probs using bayes assumption +% .inds - [NxM] cached indices for original training data +% .H - number classes +% hsPr - [Nx1] predicted output labels +% +% EXAMPLE +% N=5000; H=5; d=2; [xs0,hs0,xs1,hs1]=demoGenData(N,N,H,d,1,1); +% fernPrm=struct('S',4,'M',50,'thrr',[-1 1],'bayes',1); +% tic, [ferns,hsPr0]=fernsClfTrain(xs0,hs0,fernPrm); toc +% tic, hsPr1 = fernsClfApply( xs1, ferns ); toc +% e0=mean(hsPr0~=hs0); e1=mean(hsPr1~=hs1); +% fprintf('errors trn=%f tst=%f\n',e0,e1); figure(1); +% subplot(2,2,1); visualizeData(xs0,2,hs0); +% subplot(2,2,2); visualizeData(xs0,2,hsPr0); +% subplot(2,2,3); visualizeData(xs1,2,hs1); +% subplot(2,2,4); visualizeData(xs1,2,hsPr1); +% +% See also fernsClfApply, fernsInds +% +% Piotr's Computer Vision Matlab Toolbox Version 2.61 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +% get additional parameters and check dimensions +dfs={'S',10,'M',50,'thrr',[0 1],'bayes',1,'ferns',[]}; +[S,M,thrr,bayes,ferns]=getPrmDflt(varargin,dfs,1); +[N,F]=size(data); assert(length(hs)==N); +H=max(hs); assert(all(hs>0)); assert(S<=20); + +if( isempty(ferns) ) + % create ferns model and compute inds (w/o field pFern) + thrs=rand(M,S)*(thrr(2)-thrr(1))+thrr(1); + fids=uint32(floor(rand(M,S)*F+1)); inds=fernsInds(data,fids,thrs); + ferns=struct('fids',fids,'thrs',thrs,'bayes',bayes,'H',H,'inds',inds); +else + % re-use cached model (will need to recompute pFern) + ferns.H=H; ferns.pFern=[]; inds=ferns.inds; assert(size(inds,1)==N); +end + +% get counts for each leaf for each class for each fern +pFern = zeros(2^S,H,M); edges = 1:2^S; +for h=1:H, inds1=inds(hs==h,:); + for m=1:M, pFern(:,h,m)=histc(inds1(:,m),edges); end +end +pFern = pFern + bayes; + +% convert fern leaf class counts into probabilities +if( bayes<=0 ) + norm = 1./sum(pFern,2); + pFern = bsxfun(@times,pFern,norm); +else + norm = 1./sum(pFern,1); + pFern = bsxfun(@times,pFern,norm); + pFern=log(pFern); +end + +% store pFern and compute output values +ferns.pFern=pFern; clear pFern; +if(nargout==2), hsPr=fernsClfApply([],ferns,inds); end + +end diff --git a/classify/fernsInds.m b/classify/fernsInds.m new file mode 100644 index 0000000..1d4e2de --- /dev/null +++ b/classify/fernsInds.m @@ -0,0 +1,39 @@ +function inds = fernsInds( data, fids, thrs ) +% Compute indices for each input by each fern. +% +% USAGE +% inds = fernsInds( data, fids, thrs ) +% +% INPUTS +% data - [NxF] N length F binary feature vectors +% fids - [MxS] feature ids for each fern for each depth +% thrs - [MxS] threshold corresponding to each fid +% +% OUTPUTS +% inds - [NxM] computed indices for each input by each fern +% +% EXAMPLE +% +% See also fernsClfTrain, fernsClfApply +% +% Piotr's Computer Vision Matlab Toolbox Version 2.50 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +inds = fernsInds1( data, fids, thrs ); + +%%% OLD MATLAB CODE -- NOW IN MEX +% [M,S]=size(fids); N=size(data,1); +% inds = zeros(N,M,'uint32'); +% for n=1:N +% for m=1:M +% for s=1:S +% inds(n,m)=inds(n,m)*2; +% if( data(n,fids(m,s)) +typedef unsigned int uint; + +void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { + int N, F, M, S, n, f, m, s; + double *data, *thrs; + uint *fids, *inds; + + /* Error checking on arguments */ + if( nrhs!=3) mexErrMsgTxt("Three input arguments required."); + if( nlhs>1 ) mexErrMsgTxt("Too many output arguments."); + if( !mxIsClass(prhs[0], "double") || !mxIsClass(prhs[1], "uint32") + || !mxIsClass(prhs[2], "double")) + mexErrMsgTxt("Input arrays are of incorrect type."); + + /* extract inputs */ + data = (double*) mxGetData(prhs[0]); /* N x F */ + fids = (uint*) mxGetData(prhs[1]); /* M x S */ + thrs = (double*) mxGetData(prhs[2]); /* N x F */ + N=mxGetM(prhs[0]); F=mxGetN(prhs[0]); + M=mxGetM(prhs[1]); S=mxGetN(prhs[1]); + + /* create outputs */ + plhs[0] = mxCreateNumericMatrix(N, M, mxUINT32_CLASS, mxREAL); + inds = (uint*) mxGetData(plhs[0]); /* N x M */ + + /* compute inds */ + for(m=0; m=sum(w)/2); +m = x(ind); +end diff --git a/classify/forestApply.m b/classify/forestApply.m new file mode 100644 index 0000000..e64500c --- /dev/null +++ b/classify/forestApply.m @@ -0,0 +1,40 @@ +function [hs,ps] = forestApply( data, forest, maxDepth, minCount, best ) +% Apply learned forest classifier. +% +% USAGE +% [hs,ps] = forestApply( data, forest, [maxDepth], [minCount], [best] ) +% +% INPUTS +% data - [NxF] N length F feature vectors +% forest - learned forest classification model +% maxDepth - [] maximum depth of tree +% minCount - [] minimum number of data points to allow split +% best - [0] if true use single best prediction per tree +% +% OUTPUTS +% hs - [Nx1] predicted output labels +% ps - [NxH] predicted output label probabilities +% +% EXAMPLE +% +% See also forestTrain +% +% Piotr's Computer Vision Matlab Toolbox Version 3.24 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] +if(nargin<3 || isempty(maxDepth)), maxDepth=0; end +if(nargin<4 || isempty(minCount)), minCount=0; end +if(nargin<5 || isempty(best)), best=0; end +assert(isa(data,'single')); M=length(forest); +H=size(forest(1).distr,2); N=size(data,1); +if(best), hs=zeros(N,M); else ps=zeros(N,H); end +discr=iscell(forest(1).hs); if(discr), best=1; hs=cell(N,M); end +for i=1:M, tree=forest(i); + if(maxDepth>0), tree.child(tree.depth>=maxDepth) = 0; end + if(minCount>0), tree.child(tree.count<=minCount) = 0; end + ids = forestInds(data,tree.thrs,tree.fids,tree.child); + if(best), hs(:,i)=tree.hs(ids); else ps=ps+tree.distr(ids,:); end +end +if(discr), ps=[]; return; end % output is actually {NxM} in this case +if(best), ps=histc(hs',1:H)'; end; [~,hs]=max(ps,[],2); ps=ps/M; +end diff --git a/classify/forestFindThr.cpp b/classify/forestFindThr.cpp new file mode 100644 index 0000000..3767894 --- /dev/null +++ b/classify/forestFindThr.cpp @@ -0,0 +1,86 @@ +/******************************************************************************* +* Piotr's Computer Vision Matlab Toolbox Version 3.24 +* Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +* Licensed under the Simplified BSD License [see external/bsd.txt] +*******************************************************************************/ +#include +#include +#include +#include + +typedef unsigned int uint32; +#define gini(p) p*p +#define entropy(p) (-p*flog2(float(p))) + +// fast approximate log2(x) from Paul Mineiro +inline float flog2( float x ) { + union { float f; uint32_t i; } vx = { x }; + union { uint32_t i; float f; } mx = { (vx.i & 0x007FFFFF) | 0x3f000000 }; + float y = float(vx.i); y *= 1.1920928955078125e-7f; + return y - 124.22551499f - 1.498030302f * mx.f + - 1.72587999f / (0.3520887068f + mx.f); +} + +// perform actual computation +void forestFindThr( int H, int N, int F, const float *data, + const uint32 *hs, const float *ws, const uint32 *order, const int split, + uint32 &fid, float &thr, double &gain ) +{ + double *Wl, *Wr, *W; float *data1; uint32 *order1; + int i, j, j1, j2, h; double vBst, vInit, v, w, wl, wr, g, gl, gr; + Wl=new double[H]; Wr=new double[H]; W=new double[H]; + // perform initialization + vBst = vInit = 0; g = 0; w = 0; fid = 1; thr = 0; + for( i=0; i>0] + j1=order1[j]; j2=order1[j+1]; h=hs[j1]-1; + wl+=ws[j1]; Wl[h]+=ws[j1]; wr-=ws[j1]; Wr[h]-=ws[j1]; + g=0; for( int h1=0; h1=1e-6f ) { + vBst=v; fid=i+1; thr=0.5f*(data1[j1]+data1[j2]); } + } + } + delete [] Wl; delete [] Wr; delete [] W; gain = vInit-vBst; +} + +// [fid,thr,gain] = mexFunction(data,hs,ws,order,H,split); +void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { + int H, N, F, split; float *data, *ws, thr; + double gain; uint32 *hs, *order, fid; + data = (float*) mxGetData(prhs[0]); + hs = (uint32*) mxGetData(prhs[1]); + ws = (float*) mxGetData(prhs[2]); + order = (uint32*) mxGetData(prhs[3]); + H = (int) mxGetScalar(prhs[4]); + split = (int) mxGetScalar(prhs[5]); + N = (int) mxGetM(prhs[0]); + F = (int) mxGetN(prhs[0]); + forestFindThr(H,N,F,data,hs,ws,order,split,fid,thr,gain); + plhs[0] = mxCreateDoubleScalar(fid); + plhs[1] = mxCreateDoubleScalar(thr); + plhs[2] = mxCreateDoubleScalar(gain); +} diff --git a/classify/forestFindThr.mexa64 b/classify/forestFindThr.mexa64 new file mode 100644 index 0000000..916305f Binary files /dev/null and b/classify/forestFindThr.mexa64 differ diff --git a/classify/forestFindThr.mexmaci64 b/classify/forestFindThr.mexmaci64 new file mode 100644 index 0000000..f7728cf Binary files /dev/null and b/classify/forestFindThr.mexmaci64 differ diff --git a/demo.m b/demo.m new file mode 100644 index 0000000..5265daa --- /dev/null +++ b/demo.m @@ -0,0 +1,39 @@ +% +% Visual Tracking Using Attention-Modulated Disintegration and Integration +% +% Jongwon Choi, 2016 +% https://sites.google.com/site/jwchoivision/ +% contact: jwchoi.pil@gmail.com +% +% +% Demo program of SCT4. +% You can use this program freely for research and please acknowledge the paper[1]. +% You should contact to us for any commercial usage. +% When you need the program of SCT6, please contact to the authors. +% +% *** Piotr Dollar's toolbox[2] and some codes from Henriques et al.[3] were utilized. +% +% [1] J. Choi, H. J. Chang, J. Jeong, Y. Demiris, J. Y. Choi, "Visual Tracking +% Using Attention-Modulated Disintegration and Integration", CVPR, 2016 +% [2] P. Dollar, ¡°Piotr¡¯s Computer Vision Matlab Toolbox (PMT)¡±, +% http://vision.ucsd.edu/?pdollar/toolbox/doc/index.html. +% [3] J. F. Henriques, R. Caseiro, P. Martins, and J. Batista, ¡°HighSpeed Tracking +% with Kernelized Correlation Filters¡±, IEEE Transactions on PAMI, 2015 +% +% + +addpath('KCF'); +addpath('strong'); +addpath(genpath('PiotrDollarToolbox')); + +% Inputs +base_path = 'Deer'; %dataset path +show_visualization = 1; %visualization option (0: not visible, 1: visible) + +% Load the image data +[img_files, pos, target_sz, ground_truth, video_path] = load_video_info(base_path); + +% Tracking start +% Position : [left-top-x left-top-y right-bottom-x right-bottom-y] +% time : computational time in second (without time for image load & visualization) +[positions, time] = sct4(video_path, img_files, pos, target_sz, show_visualization); diff --git a/detector/acfDemoCal.m b/detector/acfDemoCal.m new file mode 100644 index 0000000..77fb885 --- /dev/null +++ b/detector/acfDemoCal.m @@ -0,0 +1,49 @@ +% Demo for aggregate channel features object detector on Caltech dataset. +% +% See also acfReadme.m +% +% Piotr's Computer Vision Matlab Toolbox Version 3.40 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +%% extract training and testing images and ground truth +cd(fileparts(which('acfDemoCal.m'))); dataDir='../../data/Caltech/'; +for s=1:2 + if(s==1), type='test'; skip=[]; else type='train'; skip=4; end + dbInfo(['Usa' type]); if(s==2), type=['train' int2str2(skip,2)]; end + if(exist([dataDir type '/annotations'],'dir')), continue; end + dbExtract([dataDir type],1,skip); +end + +%% set up opts for training detector (see acfTrain) +opts=acfTrain(); opts.modelDs=[50 20.5]; opts.modelDsPad=[64 32]; +opts.pPyramid.pChns.pColor.smooth=0; opts.nWeak=[64 256 1024 4096]; +opts.pBoost.pTree.maxDepth=5; opts.pBoost.discrete=0; +opts.pBoost.pTree.fracFtrs=1/16; opts.nNeg=25000; opts.nAccNeg=50000; +opts.pPyramid.pChns.pGradHist.softBin=1; opts.pJitter=struct('flip',1); +opts.posGtDir=[dataDir 'train' int2str2(skip,2) '/annotations']; +opts.posImgDir=[dataDir 'train' int2str2(skip,2) '/images']; +opts.pPyramid.pChns.shrink=2; opts.name='models/AcfCaltech+'; +pLoad={'lbls',{'person'},'ilbls',{'people'},'squarify',{3,.41}}; +opts.pLoad = [pLoad 'hRng',[50 inf], 'vRng',[1 1] ]; + +%% optionally switch to LDCF version of detector (see acfTrain) +if( 0 ), opts.filters=[5 4]; opts.name='models/LdcfCaltech'; end + +%% train detector (see acfTrain) +detector = acfTrain( opts ); + +%% modify detector (see acfModify) +pModify=struct('cascThr',-1,'cascCal',.025); +detector=acfModify(detector,pModify); + +%% run detector on a sample image (see acfDetect) +imgNms=bbGt('getFiles',{[dataDir 'test/images']}); +I=imread(imgNms{1862}); tic, bbs=acfDetect(I,detector); toc +figure(1); im(I); bbApply('draw',bbs); pause(.1); + +%% test detector and plot roc (see acfTest) +[~,~,gt,dt]=acfTest('name',opts.name,'imgDir',[dataDir 'test/images'],... + 'gtDir',[dataDir 'test/annotations'],'pLoad',[pLoad, 'hRng',[50 inf],... + 'vRng',[.65 1],'xRng',[5 635],'yRng',[5 475]],... + 'pModify',pModify,'reapply',0,'show',2); diff --git a/detector/acfDemoInria.m b/detector/acfDemoInria.m new file mode 100644 index 0000000..7a5d6d0 --- /dev/null +++ b/detector/acfDemoInria.m @@ -0,0 +1,61 @@ +% Demo for aggregate channel features object detector on Inria dataset. +% +% See also acfReadme.m +% +% Piotr's Computer Vision Matlab Toolbox Version 3.40 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +%% extract training and testing images and ground truth +cd(fileparts(which('acfDemoInria.m'))); dataDir='../../data/Inria/'; +for s=1:2, pth=dbInfo('InriaTest'); + if(s==1), set='00'; type='train'; else set='01'; type='test'; end + if(exist([dataDir type '/posGt'],'dir')), continue; end + seqIo([pth '/videos/set' set '/V000'],'toImgs',[dataDir type '/pos']); + seqIo([pth '/videos/set' set '/V001'],'toImgs',[dataDir type '/neg']); + V=vbb('vbbLoad',[pth '/annotations/set' set '/V000']); + vbb('vbbToFiles',V,[dataDir type '/posGt']); +end + +%% set up opts for training detector (see acfTrain) +opts=acfTrain(); opts.modelDs=[100 41]; opts.modelDsPad=[128 64]; +opts.posGtDir=[dataDir 'train/posGt']; opts.nWeak=[32 128 512 2048]; +opts.posImgDir=[dataDir 'train/pos']; opts.pJitter=struct('flip',1); +opts.negImgDir=[dataDir 'train/neg']; opts.pBoost.pTree.fracFtrs=1/16; +opts.pLoad={'squarify',{3,.41}}; opts.name='models/AcfInria'; + +%% optionally switch to LDCF version of detector (see acfTrain) +if( 0 ) + opts.filters=[5 4]; opts.pJitter=struct('flip',1,'nTrn',3,'mTrn',1); + opts.pBoost.pTree.maxDepth=3; opts.pBoost.discrete=0; opts.seed=2; + opts.pPyramid.pChns.shrink=2; opts.name='models/LdcfInria'; +end + +%% train detector (see acfTrain) +detector = acfTrain( opts ); + +%% modify detector (see acfModify) +pModify=struct('cascThr',-1,'cascCal',.01); +detector=acfModify(detector,pModify); + +%% run detector on a sample image (see acfDetect) +imgNms=bbGt('getFiles',{[dataDir 'test/pos']}); +I=imread(imgNms{1}); tic, bbs=acfDetect(I,detector); toc +figure(1); im(I); bbApply('draw',bbs); pause(.1); + +%% test detector and plot roc (see acfTest) +[miss,~,gt,dt]=acfTest('name',opts.name,'imgDir',[dataDir 'test/pos'],... + 'gtDir',[dataDir 'test/posGt'],'pLoad',opts.pLoad,... + 'pModify',pModify,'reapply',0,'show',2); + +%% optional timing test for detector (should be ~30 fps) +if( 0 ) + detector1=acfModify(detector,'pad',[0 0]); n=60; Is=cell(1,n); + for i=1:n, Is{i}=imResample(imread(imgNms{i}),[480 640]); end + tic, for i=1:n, acfDetect(Is{i},detector1); end; + fprintf('Detector runs at %.2f fps on 640x480 images.\n',n/toc); +end + +%% optionally show top false positives ('type' can be 'fp','fn','tp','dt') +if( 0 ), bbGt('cropRes',gt,dt,imgNms,'type','fp','n',50,... + 'show',3,'dims',opts.modelDs([2 1])); end diff --git a/detector/acfDetect.m b/detector/acfDetect.m new file mode 100644 index 0000000..a28981a --- /dev/null +++ b/detector/acfDetect.m @@ -0,0 +1,88 @@ +function bbs = acfDetect( I, detector, fileName ) +% Run aggregate channel features object detector on given image(s). +% +% The input 'I' can either be a single image (or filename) or a cell array +% of images (or filenames). In the first case, the return is a set of bbs +% where each row has the format [x y w h score] and score is the confidence +% of detection. If the input is a cell array, the output is a cell array +% where each element is a set of bbs in the form above (in this case a +% parfor loop is used to speed execution). If 'fileName' is specified, the +% bbs are saved to a comma separated text file and the output is set to +% bbs=1. If saving detections for multiple images the output is stored in +% the format [imgId x y w h score] and imgId is a one-indexed image id. +% +% A cell of detectors trained with the same channels can be specified, +% detected bbs from each detector are concatenated. If using multiple +% detectors and opts.pNms.separate=1 then each bb has a sixth element +% bbType=j, where j is the j-th detector, see bbNms.m for details. +% +% USAGE +% bbs = acfDetect( I, detector, [fileName] ) +% +% INPUTS +% I - input image(s) of filename(s) of input image(s) +% detector - detector(s) trained via acfTrain +% fileName - [] target filename (if specified return is 1) +% +% OUTPUTS +% bbs - [nx5] array of bounding boxes or cell array of bbs +% +% EXAMPLE +% +% See also acfTrain, acfModify, bbGt>loadAll, bbNms +% +% Piotr's Computer Vision Matlab Toolbox Version 3.40 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +% run detector on every image +if(nargin<3), fileName=''; end; multiple=iscell(I); +if(~isempty(fileName) && exist(fileName,'file')), bbs=1; return; end +if(~multiple), bbs=acfDetectImg(I,detector); else + n=length(I); bbs=cell(n,1); + parfor i=1:n, bbs{i}=acfDetectImg(I{i},detector); end +end + +% write results to disk if fileName specified +if(isempty(fileName)), return; end +d=fileparts(fileName); if(~isempty(d)&&~exist(d,'dir')), mkdir(d); end +if( multiple ) % add image index to each bb and flatten result + for i=1:n, bbs{i}=[ones(size(bbs{i},1),1)*i bbs{i}]; end + bbs=cell2mat(bbs); +end +dlmwrite(fileName,bbs); bbs=1; + +end + +function bbs = acfDetectImg( I, detector ) +% Run trained sliding-window object detector on given image. +Ds=detector; if(~iscell(Ds)), Ds={Ds}; end; nDs=length(Ds); +opts=Ds{1}.opts; pPyramid=opts.pPyramid; pNms=opts.pNms; +imreadf=opts.imreadf; imreadp=opts.imreadp; +shrink=pPyramid.pChns.shrink; pad=pPyramid.pad; +separate=nDs>1 && isfield(pNms,'separate') && pNms.separate; +% read image and compute features (including optionally applying filters) +if(all(ischar(I))), I=feval(imreadf,I,imreadp{:}); end +P=chnsPyramid(I,pPyramid); bbs=cell(P.nScales,nDs); +if(isfield(opts,'filters') && ~isempty(opts.filters)), shrink=shrink*2; + for i=1:P.nScales, fs=opts.filters; C=repmat(P.data{i},[1 1 size(fs,4)]); + for j=1:size(C,3), C(:,:,j)=conv2(C(:,:,j),fs(:,:,j),'same'); end + P.data{i}=imResample(C,.5); + end +end +% apply sliding window classifiers +for i=1:P.nScales + for j=1:nDs, opts=Ds{j}.opts; + modelDsPad=opts.modelDsPad; modelDs=opts.modelDs; + bb = acfDetect1(P.data{i},Ds{j}.clf,shrink,... + modelDsPad(1),modelDsPad(2),opts.stride,opts.cascThr); + shift=(modelDsPad-modelDs)/2-pad; + bb(:,1)=(bb(:,1)+shift(2))/P.scaleshw(i,2); + bb(:,2)=(bb(:,2)+shift(1))/P.scaleshw(i,1); + bb(:,3)=modelDs(2)/P.scales(i); + bb(:,4)=modelDs(1)/P.scales(i); + if(separate), bb(:,6)=j; end; bbs{i,j}=bb; + end +end; bbs=cat(1,bbs{:}); +if(~isempty(pNms)), bbs=bbNms(bbs,pNms); end +end diff --git a/detector/acfModify.m b/detector/acfModify.m new file mode 100644 index 0000000..43d628f --- /dev/null +++ b/detector/acfModify.m @@ -0,0 +1,89 @@ +function detector = acfModify( detector, varargin ) +% Modify aggregate channel features object detector. +% +% Takes an object detector trained by acfTrain() and modifies it. Only +% certain modifications are allowed to the detector and the detector should +% never be modified directly (this may cause the detector to be invalid and +% cause segmentation faults). Any valid modification to a detector after it +% is trained should be performed using acfModify(). +% +% The parameters 'nPerOct', 'nOctUp', 'nApprox', 'lambdas', 'pad', 'minDs' +% modify the channel feature pyramid created (see help of chnsPyramid.m for +% more details) and primarily control the scales used. The parameters +% 'pNms', 'stride', 'cascThr' and 'cascCal' modify the detector behavior +% (see help of acfTrain.m for more details). Finally, 'rescale' can be +% used to rescale the trained detector (this change is irreversible). +% +% USAGE +% detector = acfModify( detector, pModify ) +% +% INPUTS +% detector - detector trained via acfTrain +% pModify - parameters (struct or name/value pairs) +% .nPerOct - [] number of scales per octave +% .nOctUp - [] number of upsampled octaves to compute +% .nApprox - [] number of approx. scales to use +% .lambdas - [] coefficients for power law scaling (see BMVC10) +% .pad - [] amount to pad channels (along T/B and L/R) +% .minDs - [] minimum image size for channel computation +% .pNms - [] params for non-maximal suppression (see bbNms.m) +% .stride - [] spatial stride between detection windows +% .cascThr - [] constant cascade threshold (affects speed/accuracy) +% .cascCal - [] cascade calibration (affects speed/accuracy) +% .rescale - [] rescale entire detector by given ratio +% +% OUTPUTS +% detector - modified object detector +% +% EXAMPLE +% +% See also chnsPyramid, bbNms, acfTrain, acfDetect +% +% Piotr's Computer Vision Matlab Toolbox Version 3.20 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +% get parameters (and copy to detector and pPyramid structs) +opts=detector.opts; p=opts.pPyramid; +dfs={ 'nPerOct',p.nPerOct, 'nOctUp',p.nOctUp, 'nApprox',p.nApprox, ... + 'lambdas',p.lambdas, 'pad',p.pad, 'minDs',p.minDs, 'pNms',opts.pNms, ... + 'stride',opts.stride,'cascThr',opts.cascThr,'cascCal',0,'rescale',1 }; +[p.nPerOct,p.nOctUp,p.nApprox,p.lambdas,p.pad,p.minDs,opts.pNms,... + opts.stride,opts.cascThr,cascCal,rescale] = getPrmDflt(varargin,dfs,1); + +% finalize pPyramid and opts +p.complete=0; p.pChns.complete=0; p=chnsPyramid([],p); p=p.pPyramid; +p.complete=1; p.pChns.complete=1; shrink=p.pChns.shrink; +opts.stride=max(1,round(opts.stride/shrink))*shrink; +opts.pPyramid=p; detector.opts=opts; + +% calibrate and rescale detector +detector.clf.hs = detector.clf.hs+cascCal; +if(rescale~=1), detector=detectorRescale(detector,rescale); end + +end + +function detector = detectorRescale( detector, rescale ) +% Rescale detector by ratio rescale. +opts=detector.opts; shrink=opts.pPyramid.pChns.shrink; +bh=opts.modelDsPad(1)/shrink; bw=opts.modelDsPad(2)/shrink; +opts.stride=max(1,round(opts.stride*rescale/shrink))*shrink; +modelDsPad=round(opts.modelDsPad*rescale/shrink)*shrink; +rescale=modelDsPad./opts.modelDsPad; opts.modelDsPad=modelDsPad; +opts.modelDs=round(opts.modelDs.*rescale); detector.opts=opts; +bh1=opts.modelDsPad(1)/shrink; bw1=opts.modelDsPad(2)/shrink; +% move 0-indexed (x,y) location of each lookup feature +clf=detector.clf; fids=clf.fids; is=find(clf.child>0); +fids=double(fids(is)); n=length(fids); loc=zeros(n,3); +loc(:,3)=floor(fids/bh/bw); fids=fids-loc(:,3)*bh*bw; +loc(:,2)=floor(fids/bh); fids=fids-loc(:,2)*bh; loc(:,1)=fids; +loc(:,1)=min(bh1-1,round(loc(:,1)*rescale(1))); +loc(:,2)=min(bw1-1,round(loc(:,2)*rescale(2))); +fids = loc(:,3)*bh1*bw1 + loc(:,2)*bh1 + loc(:,1); +clf.fids(is)=int32(fids); +% rescale thrs for all features (fpdw trick) +nChns=[detector.info.nChns]; assert(max(loc(:,3)) +lambdas=opts.pPyramid.lambdas; lambdas=sqrt(prod(rescale)).^-lambdas(k); +clf.thrs(is)=clf.thrs(is).*lambdas(loc(:,3)+1)'; detector.clf=clf; +end diff --git a/detector/acfReadme.m b/detector/acfReadme.m new file mode 100644 index 0000000..23dbe9a --- /dev/null +++ b/detector/acfReadme.m @@ -0,0 +1,109 @@ +% Aggregate Channel Features Detector Overview. +% +% Piotr's Computer Vision Matlab Toolbox Version 3.40 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] +% +% %%%%%%%%%%%%%%%%%%%%%%%%%%%% 1. Introduction. %%%%%%%%%%%%%%%%%%%%%%%%%%% +% +% The detector portion of this toolbox implements the Aggregate Channel +% Features (ACF) object detection code. The ACF detector is a fast and +% effective sliding window detector (30 fps on a single core). It is an +% evolution of the Viola & Jones (VJ) detector but with an ~1000 fold +% decrease in false positives (at the same detection rate). ACF is best +% suited for quasi-rigid object detection (e.g. faces, pedestrians, cars). +% +% The detection code was written by Piotr Dollár with contributions by Ron +% Appel and Woonhyun Nam (with bug reports/suggestions from many others). +% +% %%%%%%%%%%%%%%%%%%%%%%%%%%%% 2. Papers. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% +% The detector was introduced and described through the following papers: +% [1] P. Dollár, Z. Tu, P. Perona and S. Belongie +% "Integral Channel Features", BMVC 2009. +% [2] P. Dollár, S. Belongie and P. Perona +% "The Fastest Pedestrian Detector in the West," BMVC 2010. +% [3] P. Dollár, R. Appel and W. Kienzle +% "Crosstalk Cascades for Frame-Rate Pedestrian Detection," ECCV 2012. +% [4] P. Dollár, R. Appel, S. Belongie and P. Perona +% "Fast Feature Pyramids for Object Detection," PAMI 2014. +% [5] W. Nam, P. Dollár, and J.H. Han +% "Local Decorrelation For Improved Pedestrian Detection," NIPS 2014. +% Please see: http://vision.ucsd.edu/~pdollar/research.html#ObjectDetection +% +% A short summary of the papers, organized by detector name: +% +% [1] "Integral Channel Features" [ICF] - Introduced channel features and +% modified the VJ framework to compute integral images (and Haar wavelets) +% over the channels. Substantially outperformed HOG and at faster speeds. +% +% [2] "Fastest Pedestrian Detector in the West" [FPDW] - We observed that +% features computed at one scale can be used to approximate features at +% nearby scales, increasing detector speed with little loss in accuracy. +% +% [3] "Crosstalk Cascades" - This work coupled cascade evaluation at nearby +% positions and scales to exploit correlations in detector responses at +% neighboring locations. Further increased speed of the ICF detector. +% +% [4] "Aggregate Channel Features" [ACF] - We found that single-scale +% square Haar wavelets were sufficient in the ICF framework. Thus instead +% of computing integral images and Haar wavelets, we simply smooth and +% downsample the channels and the features are now single pixel lookups in +% the "aggregated" channels. +% +% [5] "Locally Decorralated Channel Features" [LDCF] - Filtering the +% channel features with appropriate data-derived filters can remove local +% correlations from the channels. Given decorrelated features, boosted +% decision trees generalize much better giving a nice boost in accuracy. +% +% This code implements ACF [4] and LDCF [5]. It does not implement ICF [1] +% or FPDW [2] which are now obsolete and supplemented by ACF. Crosstalk +% cascades [3] are also not used as classifier evalution in ACF is very +% fast (no need to compute Haar wavelets). However, ACF does use the simple +% but highly effective "constant soft cascades" from [3]. +% +% Please cite a subset of the above papers as appropriate if you end up +% using this code to support a publication. Thanks! +% +% %%%%%%%%%%%%%%%%%%%%%%%%%%%% 3. Setup. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% +% (A) Please install and setup the toolbox as described online: +% http://vision.ucsd.edu/~pdollar/toolbox/doc/index.html +% You may need to recompile for your system, see toolboxCompile. Note: +% enabling OpenMP during compile will significantly speed training. +% +% (B) Important: to train the detectors and run the detection demos you +% need to install the Caltech Pedestrian Detection Benchmark available at: +% http://www.vision.caltech.edu/Image_Datasets/CaltechPedestrians/ +% In particular, make sure to download and install: +% (B1) Matlab evaluation/labeling code version 3.2.1 or later +% (B2) INRIA data (necessary for the INRIA demo) +% (B3) Caltech-USA data (necessary for the Caltech demo) +% Please follow the instruction in the readme of the Caltech code. You only +% need to download the data and code and place appropriately, there is no +% need to look closely at the evaluation code. Initially running the demos +% (acfDemoInria and acfDemoCal) will convert the data from the Caltech data +% format to a format useable by ACF. If this step fails it means the +% Caltech code or data is not properly setup. +% +% %%%%%%%%%%%%%%%%%%%%%%%%%%%% 4. Getting Started. %%%%%%%%%%%%%%%%%%%%%%%% +% +% After performing the setup, see acfDemoInria.m and acfDemoCal.m for demos +% and visualizations. +% +% For an overview of available functionality please see detector/Contents.m +% and channels/Contents.m. The various detector/acf*.m and channels/chns*.m +% functions are well documented and worth checking for additional details. +% +% Finally, a note about pre-trained models. The detector/models/ directory +% contains four pre-trained pedestrian models (ACF/LDCF on INRIA/Caltech). +% Running acfDemoInria/Cal.m with the ACF/LDCF flag toggled gives rise to +% these models (just delete the existing models to retrain from scratch). +% Note, however, that results will differ by up to +/-2% MR depending on +% operating system and random seed (see opts.seed), and the models here are +% not exactly equivalent to the models in the papers (due to evolution of +% the code). Small changes in MR should not be considered significant (nor +% should they be used as a basis for publishing). Whenever making a change +% I suggest training/testing the same model with multiple random seeds. +% +% Enjoy and I hope you find the detectors useful :) diff --git a/detector/acfSweeps.m b/detector/acfSweeps.m new file mode 100644 index 0000000..5c2edfa --- /dev/null +++ b/detector/acfSweeps.m @@ -0,0 +1,262 @@ +function acfSweeps +% Parameter sweeps for ACF pedestrian detector. +% +% Running the parameter sweeps requires altering internal flags. +% The sweeps are not well documented, use at your own discretion. +% +% Piotr's Computer Vision Matlab Toolbox Version NEW +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +% specify type and location of cluster (see fevalDistr.m) +rtDir=[fileparts(fileparts(fileparts(mfilename('fullpath')))) '/data/']; +pDistr={'type','parfor'}; if(0), matlabpool('open',11); end + +% define all parameter sweeps +expNms = {'FtrsColorSpace','FtrsChnTypes','FtrsGradColorChn',... + 'FtrsGradNormRad','FtrsGradNormConst','FtrsGradOrients',... + 'FtrsGradSoftBins','FtrsSmoothIm','FtrsSmoothChns','FtrsShrink',... + 'DetModelDs','DetModelDsPad','DetStride','DetNumOctaves',... + 'DetNumApprox','DetLambda','DetCascThr','DetCascCal','DetNmsThr',... + 'TrnNumWeak','TrnNumBoot','TrnDepth','TrnNumBins','TrnFracFtrs',... + 'DataNumPos','DataNumNeg','DataNumNegAcc','DataNumNegPer',... + 'DataNumPosStump','DataJitterTran','DataJitterRot'}; +expNms=expNms(:); T = 10; +[opts,lgd,lbl]=createExp(rtDir,expNms); + +% run training and testing jobs +[jobsTrn,jobsTst] = createJobs( rtDir, opts, T ); N=length(expNms); +fprintf('nTrain = %i; nTest = %i\n',length(jobsTrn),length(jobsTst)); +tic, s=fevalDistr('acfTrain',jobsTrn,pDistr); assert(s==1); toc +tic, s=fevalDistr('acfTest',jobsTst,pDistr); assert(s==1); toc + +% create plots for all experiments +for e=1:N, plotExps(rtDir,expNms{e},opts{e},lgd{e},lbl{e},T); end + +end + +function plotExps( rtDir, expNm, opts, lgd, lbl, T ) +% data location and parameters for plotting +plDir=[rtDir 'sweeps/plots/']; if(~exist(plDir,'dir')), mkdir(plDir); end +diary([plDir 'sweeps.txt']); disp([expNm ' [' lbl ']']); N=length(lgd); +pLoad=struct('squarify',{{3,.41}},'hRng',[0 inf]); +pTest=struct('name','', 'imgDir',[rtDir 'Inria/test/pos'],... + 'gtDir',[rtDir 'Inria/test/posGt'], 'pLoad',pLoad); +pTest=repmat(pTest,N,T); for e=1:N, for t=1:T, + pTest(e,t).name=[opts(e).name 'T' int2str2(t,2)]; end; end +% get all miss rates and display error +miss=zeros(N,T); parfor e=1:N*T, miss(e)=acfTest(pTest(e)); end +stds=std(miss,0,2); R=mean(miss,2); msg=' %.2f +/- %.2f [%s]\n'; +for e=1:N, fprintf(msg,R(e)*100,stds(e)*100,lgd{e}); end +% plot sweeps +figPrp = {'Units','Pixels','Position',[800 600 800 400]}; +figure(1); clf; set(1,figPrp{:}); set(gca,'FontSize',24); clr=[0 .69 .94]; +pPl1={'LineWidth',3,'MarkerSize',15,'Color',clr,'MarkerFaceColor',clr}; +pPl2=pPl1; clr=[1 .75 0]; pPl2{6}=clr; pPl2{8}=clr; +for e=1:N, if(lgd{e}(end)=='*'), def=e; end; end; lgd{def}(end)=[]; +plot(R,'-d',pPl1{:}); hold on; plot(def,R(def),'d',pPl2{:}); e=.001; +ylabel('MR'); axis([.5 N+.5 min([R; .15]) max([R; .3])+e]); +if(isempty(lbl)), imLabel(lgd,'bottom',30,{'FontSize',24}); lgd=[]; end +xlabel(lbl); set(gca,'XTick',1:N,'XTickLabel',lgd); +% save plot +fFig=[plDir expNm]; diary('off'); +for t=1:25, try savefig(fFig,1,'png'); break; catch, pause(1), end; end +end + +function [jobsTrn,jobsTst] = createJobs( rtDir, opts, T ) +% Prepare all jobs (one train and one test job per set of opts). +opts=[opts{:}]; N=length(opts); NT=N*T; +opts=repmat(opts,1,T); nms=cell(1,NT); +jobsTrn=cell(1,NT); doneTrn=zeros(1,NT); +jobsTst=cell(1,NT); doneTst=zeros(1,NT); +pLoad=struct('squarify',{{3,.41}},'hRng',[0 inf]); +pTest=struct('name','', 'imgDir',[rtDir 'Inria/test/pos'],... + 'gtDir',[rtDir 'Inria/test/posGt'], 'pLoad',pLoad); +for e=1:NT + t=ceil(e/N); opts(e).seed=(t-1)*100000+1; + nm=[opts(e).name 'T' int2str2(t,2)]; + opts(e).name=nm; pTest.name=nm; nms{e}=nm; + doneTrn(e)=exist([nm 'Detector.mat'],'file')==2; jobsTrn{e}={opts(e)}; + doneTst(e)=exist([nm 'Dets.txt'],'file')==2; jobsTst{e}={pTest}; +end +[~,kp]=unique(nms,'stable'); +doneTrn=doneTrn(kp); jobsTrn=jobsTrn(kp); jobsTrn=jobsTrn(~doneTrn); +doneTst=doneTst(kp); jobsTst=jobsTst(kp); jobsTst=jobsTst(~doneTst); +end + +function [opts,lgd,lbl] = createExp( rtDir, expNm ) + +% if expNm is a cell, call recursively and return +if( iscell(expNm) ) + N=length(expNm); opts=cell(1,N); lgd=cell(1,N); lbl=lgd; + for e=1:N, [opts{e},lgd{e},lbl{e}]=createExp(rtDir,expNm{e}); end; return +end + +% default params for detectorTrain.m +dataDir=[rtDir 'Inria/']; +opts=acfTrain(); opts.modelDs=[100 41]; opts.modelDsPad=[128 64]; +opts.posGtDir=[dataDir 'train/posGt']; opts.nWeak=[32 128 512 2048]; +opts.posImgDir=[dataDir 'train/pos']; opts.pJitter=struct('flip',1); +opts.negImgDir=[dataDir 'train/neg']; opts.pBoost.pTree.fracFtrs=1/16; +if(~exist([rtDir 'sweeps/res/'],'dir')), mkdir([rtDir 'sweeps/res/']); end +opts.pBoost.pTree.nThreads=1; + +% setup experiments (N sets of params) +optsDefault=opts; N=100; lgd=cell(1,N); ss=lgd; lbl=''; O=ones(1,N); +pChns=opts.pPyramid.pChns(O); pPyramid=opts.pPyramid(O); opts=opts(O); +switch expNm + case 'FtrsColorSpace' + N=8; clrs={'Gray','rgb','hsv','luv'}; + for e=1:N, pChns(e).pColor.colorSpace=clrs{mod(e-1,4)+1}; end + for e=5:N, pChns(e).pGradMag.enabled=0; end + for e=5:N, pChns(e).pGradHist.enabled=0; end + ss=[clrs clrs]; for e=1:4, ss{e}=[ss{e} '+G+H']; end + ss=upper(ss); lgd=ss; + case 'FtrsChnTypes' + nms={'LUV+','G+','H+'}; N=7; + for e=1:N + en=false(1,3); for i=1:3, en(i)=bitget(uint8(e),i); end + pChns(e).pColor.enabled=en(1); pChns(e).pGradMag.enabled=en(2); + pChns(e).pGradHist.enabled=en(3); + nm=[nms{en}]; nm=nm(1:end-1); lgd{e}=nm; ss{e}=nm; + end + case 'FtrsGradColorChn' + lbl='gradient color channel'; + N=4; ss={'Max','L','U','V'}; lgd=ss; + for e=1:N, pChns(e).pGradMag.colorChn=e-1; end + case 'FtrsGradNormRad' + lbl='norm radius'; + vs=[0 1 2 5 10]; N=length(vs); + for e=1:N, pChns(e).pGradMag.normRad=vs(e); end + case 'FtrsGradNormConst' + lbl='norm constant x 10^3'; + vs=[1 2 5 10 20 50 100]; N=length(vs); + for e=1:N, pChns(e).pGradMag.normConst=vs(e)/1000; end + case 'FtrsGradOrients' + lbl='# orientations'; + vs=[2 4 6 8 10 12]; N=length(vs); + for e=1:N, pChns(e).pGradHist.nOrients=vs(e); end + case 'FtrsGradSoftBins' + lbl='use soft bins'; + vs=[0 1]; N=length(vs); + for e=1:N, pChns(e).pGradHist.softBin=vs(e); end + case 'FtrsSmoothIm' + lbl='image smooth radius'; + vs=[0 50 100 200]; N=length(vs); + for e=1:N, pChns(e).pColor.smooth=vs(e)/100; end + for e=1:N, lgd{e}=num2str(vs(e)/100); end + case 'FtrsSmoothChns' + lbl='channel smooth radius'; + vs=[0 50 100 200]; N=length(vs); + for e=1:N, pPyramid(e).smooth=vs(e)/100; end + for e=1:N, lgd{e}=num2str(vs(e)/100); end + case 'FtrsShrink' + lbl='channel shrink'; + vs=2.^(1:4); N=length(vs); + for e=1:N, pChns(e).shrink=vs(e); end + case 'DetModelDs' + lbl='model height'; + rs=1.1.^(-2:2); vs=round(100*rs); ws=round(41*rs); N=length(vs); + for e=1:N, opts(e).modelDs=[vs(e) ws(e)]; end + for e=1:N, opts(e).modelDsPad=opts(e).modelDs+[28 23]; end + case 'DetModelDsPad' + lbl='padded model height'; + rs=1.1.^(-2:2); vs=round(128*rs); ws=round(64*rs); N=length(vs); + for e=1:N, opts(e).modelDsPad=[vs(e) ws(e)]; end + case 'DetStride' + lbl='detector stride'; + vs=4:4:16; N=length(vs); + for e=1:N, opts(e).stride=vs(e); end + case 'DetNumOctaves' + lbl='# scales per octave'; + vs=2.^(0:5); N=length(vs); + for e=1:N, pPyramid(e).nPerOct=vs(e); pPyramid(e).nApprox=vs(e)-1; end + case 'DetNumApprox' + lbl='# approx scales'; + vs=2.^(0:5)-1; N=length(vs); + for e=1:N, pPyramid(e).nApprox=vs(e); end + case 'DetLambda' + lbl='lambda x 100'; + vs=-45:15:70; N=length(vs); + for e=[1:4 6:N], pPyramid(e).lambdas=[0 vs(e) vs(e)]/100; end + for e=1:N, lgd{e}=int2str(vs(e)); end; vs=vs+100; + case 'DetCascThr' + lbl='cascade threshold'; + vs=[-.5 -1 -2 -5 -10]; N=length(vs); + for e=1:N, opts(e).cascThr=vs(e); end + for e=1:N, lgd{e}=num2str(vs(e)); end; vs=vs*-10; + case 'DetCascCal' + lbl='cascade offset x 10^4'; + vs=[5 10 20 50 100 200 500]; N=length(vs); + for e=1:N, opts(e).cascCal=vs(e)/1e4; end + case 'DetNmsThr' + lbl='nms overlap'; + vs=25:10:95; N=length(vs); + for e=1:N, opts(e).pNms.overlap=vs(e)/1e2; end + for e=1:N, lgd{e}=['.' num2str(vs(e))]; end + case 'TrnNumWeak' + lbl='# decision trees / x'; + vs=2.^(0:3); N=length(vs); + for e=1:N, opts(e).nWeak=opts(e).nWeak/vs(e); end + case 'TrnNumBoot' + lbl='bootstrap schedule'; + vs={5:1:11,5:2:11,3:1:11,3:2:11}; N=length(vs); + ss={'5-1-11','5-2-11','3-1-11','3-2-11'}; lgd=ss; + for e=1:N, opts(e).nWeak=2.^vs{e}; end + case 'TrnDepth' + lbl='tree depth'; + vs=1:5; N=length(vs); + for e=1:N, opts(e).pBoost.pTree.maxDepth=vs(e); end + case 'TrnNumBins' + lbl='# bins'; + vs=2.^(4:8); N=length(vs); + for e=1:N, opts(e).pBoost.pTree.nBins=vs(e); end + case 'TrnFracFtrs' + lbl='fraction features'; + vs=2.^(1:8); N=length(vs); + for e=1:N, opts(e).pBoost.pTree.fracFtrs=1/vs(e); end + case 'DataNumPos' + lbl='# pos examples'; + vs=[2.^(6:9) inf]; N=length(vs); + for e=1:N-1, opts(e).nPos=vs(e); end + case 'DataNumNeg' + lbl='# neg examples'; + vs=[5 10 25 50 100 250]*100; N=length(vs); + for e=1:N, opts(e).nNeg=vs(e); end + case 'DataNumNegAcc' + lbl='# neg examples total'; + vs=[25 50 100 250 500]*100; N=length(vs); + for e=1:N, opts(e).nAccNeg=vs(e); end + case 'DataNumNegPer' + lbl='# neg example / image'; + vs=[5 10 25 50 100]; N=length(vs); + for e=1:N, opts(e).nPerNeg=vs(e); end + case 'DataNumPosStump' + lbl='# pos examples (stumps)'; + vs=[2.^(6:9) 1237 1237]; N=length(vs); lgd{N}='1237*'; + for e=1:N-1, opts(e).nPos=vs(e); opts(e).pBoost.pTree.maxDepth=1; end + case 'DataJitterTran' + lbl='translational jitter'; + vs=[0 1 2 4]; N=length(vs); opts(1).pJitter=struct('flip',1); + for e=2:N, opts(e).pJitter=struct('flip',1,'nTrn',3,'mTrn',vs(e)); end + for e=1:N, lgd{e}=['+/-' int2str(vs(e))]; end + case 'DataJitterRot' + lbl='rotational jitter'; + vs=[0 2 4 8]; N=length(vs); + for e=2:N, opts(e).pJitter=struct('flip',1,'nPhi',3,'mPhi',vs(e)); end + for e=1:N, lgd{e}=['+/-' int2str(vs(e))]; end + otherwise, error('invalid exp: %s',expNm); +end + +% produce final set of opts and find default opts +for e=1:N, if(isempty(lgd{e})), lgd{e}=int2str(vs(e)); end; end +for e=1:N, if(isempty(ss{e})), ss{e}=int2str2(vs(e),5); end; end +O=1:N; opts=opts(O); lgd=lgd(O); ss=ss(O); d=0; +for e=1:N, pPyramid(e).pChns=pChns(e); opts(e).pPyramid=pPyramid(e); end +for e=1:N, if(isequal(optsDefault,opts(e))), d=e; break; end; end +if(d==0), disp(expNm); assert(false); end +for e=1:N, opts(e).name=[rtDir 'sweeps/res/' expNm ss{e}]; end +lgd{d}=[lgd{d} '*']; opts(d).name=[rtDir 'sweeps/res/Default']; +if(0), disp([ss' lgd']'); end + +end diff --git a/detector/acfTest.m b/detector/acfTest.m new file mode 100644 index 0000000..977a5db --- /dev/null +++ b/detector/acfTest.m @@ -0,0 +1,66 @@ +function [miss,roc,gt,dt] = acfTest( varargin ) +% Test aggregate channel features object detector given ground truth. +% +% USAGE +% [miss,roc,gt,dt] = acfTest( pTest ) +% +% INPUTS +% pTest - parameters (struct or name/value pairs) +% .name - ['REQ'] detector name +% .imgDir - ['REQ'] dir containing test images +% .gtDir - ['REQ'] dir containing test ground truth +% .pLoad - [] params for bbGt>bbLoad for test data (see bbGt>bbLoad) +% .pModify - [] params for acfModify for modifying detector +% .thr - [.5] threshold on overlap area for comparing two bbs +% .mul - [0] if true allow multiple matches to each gt +% .reapply - [0] if true re-apply detector even if bbs already computed +% .ref - [10.^(-2:.25:0)] reference points (see bbGt>compRoc) +% .lims - [3.1e-3 1e1 .05 1] plot axis limits +% .show - [0] optional figure number for display +% +% OUTPUTS +% miss - log-average miss rate computed at reference points +% roc - [nx3] n data points along roc of form [score fp tp] +% gt - [mx5] ground truth results [x y w h match] (see bbGt>evalRes) +% dt - [nx6] detect results [x y w h score match] (see bbGt>evalRes) +% +% EXAMPLE +% +% See also acfTrain, acfDetect, acfModify, acfDemoInria, bbGt +% +% Piotr's Computer Vision Matlab Toolbox Version 3.40 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +% get parameters +dfs={ 'name','REQ', 'imgDir','REQ', 'gtDir','REQ', 'pLoad',[], ... + 'pModify',[], 'thr',.5,'mul',0, 'reapply',0, 'ref',10.^(-2:.25:0), ... + 'lims',[3.1e-3 1e1 .05 1], 'show',0 }; +[name,imgDir,gtDir,pLoad,pModify,thr,mul,reapply,ref,lims,show] = ... + getPrmDflt(varargin,dfs,1); + +% run detector on directory of images +bbsNm=[name 'Dets.txt']; +if(reapply && exist(bbsNm,'file')), delete(bbsNm); end +if(reapply || ~exist(bbsNm,'file')) + detector = load([name 'Detector.mat']); + detector = detector.detector; + if(~isempty(pModify)), detector=acfModify(detector,pModify); end + imgNms = bbGt('getFiles',{imgDir}); + acfDetect( imgNms, detector, bbsNm ); +end + +% run evaluation using bbGt +[gt,dt] = bbGt('loadAll',gtDir,bbsNm,pLoad); +[gt,dt] = bbGt('evalRes',gt,dt,thr,mul); +[fp,tp,score,miss] = bbGt('compRoc',gt,dt,1,ref); +miss=exp(mean(log(max(1e-10,1-miss)))); roc=[score fp tp]; + +% optionally plot roc +if( ~show ), return; end +figure(show); plotRoc([fp tp],'logx',1,'logy',1,'xLbl','fppi',... + 'lims',lims,'color','g','smooth',1,'fpTarget',ref); +title(sprintf('log-average miss rate = %.2f%%',miss*100)); +savefig([name 'Roc'],show,'png'); + +end diff --git a/detector/acfTrain.m b/detector/acfTrain.m new file mode 100644 index 0000000..12610b3 --- /dev/null +++ b/detector/acfTrain.m @@ -0,0 +1,345 @@ +function detector = acfTrain( varargin ) +% Train aggregate channel features object detector. +% +% Train aggregate channel features (ACF) object detector as described in: +% P. Dollár, R. Appel, S. Belongie and P. Perona +% "Fast Feature Pyramids for Object Detection", PAMI 2014. +% The ACF detector is fast (30 fps on a single core) and achieves top +% accuracy on rigid object detection. Please see acfReadme.m for details. +% +% Takes a set of parameters opts (described in detail below) and trains a +% detector from start to finish including performing multiple rounds of +% bootstrapping if need be. The return is a struct 'detector' for use with +% acfDetect.m which fully defines a sliding window detector. Training is +% fast (on the INRIA pedestrian dataset training takes ~10 minutes on a +% single core or ~3m using four cores). Taking advantage of parallel +% training requires launching matlabpool (see help for matlabpool). The +% trained detector may be altered in certain ways via acfModify(). Calling +% opts=acfTrain() returns all default options. +% +% (1) Specifying features and model: The channel features are defined by +% 'pPyramid'. See chnsCompute.m and chnsPyramid.m for more details. The +% channels may be convolved by a set 'filters' to remove local correlations +% (see our NIPS14 paper on LDCF), improving accuracy but slowing detection. +% If 'filters'=[wFilter,nFilter] these are automatically computed. The +% model dimensions ('modelDs') define the window height and width. The +% padded dimensions ('modelDsPad') define the extended region around object +% candidates that are used for classification. For example, for 100 pixel +% tall pedestrians, typically a 128 pixel tall region is used to make a +% decision. 'pNms' controls non-maximal suppression (see bbNms.m), 'stride' +% controls the window stride, and 'cascThr' and 'cascCal' are the threshold +% and calibration used for the constant soft cascades. Typically, set +% 'cascThr' to -1 and adjust 'cascCal' until the desired recall is reached +% (setting 'cascCal' shifts the final scores output by the detector by the +% given amount). Training alternates between sampling (bootstrapping) and +% training an AdaBoost classifier (clf). 'nWeak' determines the number of +% training stages and number of trees after each stage, e.g. nWeak=[32 128 +% 512 2048] defines four stages with the final clf having 2048 trees. +% 'pBoost' specifies parameters for AdaBoost, and 'pBoost.pTree' are the +% decision tree parameters, see adaBoostTrain.m for details. Finally, +% 'seed' is the random seed used and makes results reproducible and 'name' +% defines the location for storing the detector and log file. +% +% (2) Specifying training data location and amount: The training data can +% take on a number of different forms. The positives can be specified using +% either a dir of pre-cropped windows ('posWinDir') or dirs of full images +% ('posImgDir') and ground truth labels ('posGtDir'). The negatives can by +% specified using a dir of pre-cropped windows ('negWinDir'), a dir of full +% images without any positives and from which negatives can be sampled +% ('negImgDir'), and finally if neither 'negWinDir' or 'negImgDir' are +% given negatives are sampled from the images in 'posImgDir' (avoiding the +% positives). For the pre-cropped windows all images must have size at +% least modelDsPad and have the object (of size exactly modelDs) centered. +% 'imreadf' can be used to specify a custom function for loading an image, +% and 'imreadp' are custom additional parameters to imreadf. When sampling +% from full images, 'pLoad' determines how the ground truth is loaded and +% converted to a set of positive bbs (see bbGt>bbLoad). 'nPos' controls the +% total number of positives to sample for training (if nPos=inf the number +% of positives is limited by the training set). 'nNeg' controls the total +% number of negatives to sample and 'nPerNeg' limits the number of +% negatives to sample per image. 'nAccNeg' controls the maximum number of +% negatives that can accumulate over multiple stages of bootstrapping. +% Define 'pJitter' to jitter the positives (see jitterImage.m) and thus +% artificially increase the number of positive training windows. Finally if +% 'winsSave' is true cropped windows are saved to disk as a mat file. +% +% USAGE +% detector = acfTrain( opts ) +% opts = acfTrain() +% +% INPUTS +% opts - parameters (struct or name/value pairs) +% (1) features and model: +% .pPyramid - [{}] params for creating pyramid (see chnsPyramid) +% .filters - [] [wxwxnChnsxnFilter] filters or [wFilter,nFilter] +% .modelDs - [] model height+width without padding (eg [100 41]) +% .modelDsPad - [] model height+width with padding (eg [128 64]) +% .pNms - [..] params for non-maximal suppression (see bbNms.m) +% .stride - [4] spatial stride between detection windows +% .cascThr - [-1] constant cascade threshold (affects speed/accuracy) +% .cascCal - [.005] cascade calibration (affects speed/accuracy) +% .nWeak - [128] vector defining number weak clfs per stage +% .pBoost - [..] parameters for boosting (see adaBoostTrain.m) +% .seed - [0] seed for random stream (for reproducibility) +% .name - [''] name to prepend to clf and log filenames +% (2) training data location and amount: +% .posGtDir - [''] dir containing ground truth +% .posImgDir - [''] dir containing full positive images +% .negImgDir - [''] dir containing full negative images +% .posWinDir - [''] dir containing cropped positive windows +% .negWinDir - [''] dir containing cropped negative windows +% .imreadf - [@imread] optional custom function for reading images +% .imreadp - [{}] optional custom parameters for imreadf +% .pLoad - [..] params for bbGt>bbLoad (see bbGt) +% .nPos - [inf] max number of pos windows to sample +% .nNeg - [5000] max number of neg windows to sample +% .nPerNeg - [25] max number of neg windows to sample per image +% .nAccNeg - [10000] max number of neg windows to accumulate +% .pJitter - [{}] params for jittering pos windows (see jitterImage) +% .winsSave - [0] if true save cropped windows at each stage to disk +% +% OUTPUTS +% detector - trained object detector (modify only via acfModify) +% .opts - input parameters used for model training +% .clf - learned boosted tree classifier (see adaBoostTrain) +% .info - info about channels (see chnsCompute.m) +% +% EXAMPLE +% +% See also acfReadme, acfDetect, acfDemoInria, acfModify, acfTest, +% chnsCompute, chnsPyramid, adaBoostTrain, bbGt, bbNms, jitterImage +% +% Piotr's Computer Vision Matlab Toolbox Version NEW +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +% initialize opts struct +opts = initializeOpts( varargin{:} ); +if(nargin==0), detector=opts; return; end + +% load or initialize detector and begin logging +nm=[opts.name 'Detector.mat']; t=exist(nm,'file'); +if(t), if(nargout), t=load(nm); detector=t.detector; end; return; end +t=fileparts(nm); if(~isempty(t) && ~exist(t,'dir')), mkdir(t); end +detector = struct( 'opts',opts, 'clf',[], 'info',[] ); +startTrain=clock; nm=[opts.name 'Log.txt']; +if(exist(nm,'file')), diary(nm); diary('off'); delete(nm); end; diary(nm); +RandStream.setGlobalStream(RandStream('mrg32k3a','Seed',opts.seed)); + +% iterate bootstraping and training +for stage = 0:numel(opts.nWeak)-1 + diary('on'); fprintf([repmat('-',[1 75]) '\n']); + fprintf('Training stage %i\n',stage); startStage=clock; + + % sample positives and compute info about channels + if( stage==0 ) + [Is1,IsOrig1] = sampleWins( detector, stage, 1 ); + t=ndims(Is1); if(t==3), t=Is1(:,:,1); else t=Is1(:,:,:,1); end + t=chnsCompute(t,opts.pPyramid.pChns); detector.info=t.info; + end + + % compute local decorrelation filters + if( stage==0 && length(opts.filters)==2 ) + fs = opts.filters; opts.filters = []; + X1 = chnsCompute1( IsOrig1, opts ); + fs = chnsCorrelation( X1, fs(1), fs(2) ); + opts.filters = fs; detector.opts.filters = fs; + end + + % compute lambdas + if( stage==0 && isempty(opts.pPyramid.lambdas) ) + fprintf('Computing lambdas... '); start=clock; + ds=size(IsOrig1); ds(1:end-1)=1; IsOrig1=mat2cell2(IsOrig1,ds); + ls=chnsScaling(opts.pPyramid.pChns,IsOrig1,0); + ls=round(ls*10^5)/10^5; detector.opts.pPyramid.lambdas=ls; + fprintf('done (time=%.0fs).\n',etime(clock,start)); + end + + % compute features for positives + if( stage==0 ) + X1 = chnsCompute1( Is1, opts ); + X1 = reshape(X1,[],size(X1,4))'; + clear Is1 IsOrig1 ls fs ds t; + end + + % sample negatives and compute features + Is0 = sampleWins( detector, stage, 0 ); + X0 = chnsCompute1( Is0, opts ); clear Is0; + X0 = reshape(X0,[],size(X0,4))'; + + % accumulate negatives from previous stages + if( stage>0 ) + n0=size(X0p,1); n1=max(opts.nNeg,opts.nAccNeg)-size(X0,1); + if(n0>n1 && n1>0), X0p=X0p(randSample(n0,n1),:); end + if(n0>0 && n1>0), X0=[X0p; X0]; end %#ok + end; X0p=X0; + + % train boosted clf + detector.opts.pBoost.nWeak = opts.nWeak(stage+1); + detector.clf = adaBoostTrain(X0,X1,detector.opts.pBoost); + detector.clf.hs = detector.clf.hs + opts.cascCal; + + % update log + fprintf('Done training stage %i (time=%.0fs).\n',... + stage,etime(clock,startStage)); diary('off'); +end + +% save detector +save([opts.name 'Detector.mat'],'detector'); + +% finalize logging +diary('on'); fprintf([repmat('-',[1 75]) '\n']); +fprintf('Done training (time=%.0fs).\n',... + etime(clock,startTrain)); diary('off'); + +end + +function opts = initializeOpts( varargin ) +% Initialize opts struct. +dfs= { 'pPyramid',{}, 'filters',[], ... + 'modelDs',[100 41], 'modelDsPad',[128 64], ... + 'pNms',struct(), 'stride',4, 'cascThr',-1, 'cascCal',.005, ... + 'nWeak',128, 'pBoost', {}, 'seed',0, 'name','', 'posGtDir','', ... + 'posImgDir','', 'negImgDir','', 'posWinDir','', 'negWinDir','', ... + 'imreadf',@imread, 'imreadp',{}, 'pLoad',{}, 'nPos',inf, 'nNeg',5000, ... + 'nPerNeg',25, 'nAccNeg',10000, 'pJitter',{}, 'winsSave',0 }; +opts = getPrmDflt(varargin,dfs,1); +% fill in remaining parameters +p=chnsPyramid([],opts.pPyramid); p=p.pPyramid; +p.minDs=opts.modelDs; shrink=p.pChns.shrink; +opts.modelDsPad=ceil(opts.modelDsPad/shrink)*shrink; +p.pad=ceil((opts.modelDsPad-opts.modelDs)/shrink/2)*shrink; +p=chnsPyramid([],p); p=p.pPyramid; p.complete=1; +p.pChns.complete=1; opts.pPyramid=p; +% initialize pNms, pBoost, pBoost.pTree, and pLoad +dfs={ 'type','maxg', 'overlap',.65, 'ovrDnm','min' }; +opts.pNms=getPrmDflt(opts.pNms,dfs,-1); +dfs={ 'pTree',{}, 'nWeak',0, 'discrete',1, 'verbose',16 }; +opts.pBoost=getPrmDflt(opts.pBoost,dfs,1); +dfs={'nBins',256,'maxDepth',2,'minWeight',.01,'fracFtrs',1,'nThreads',16}; +opts.pBoost.pTree=getPrmDflt(opts.pBoost.pTree,dfs,1); +opts.pLoad=getPrmDflt(opts.pLoad,{'squarify',{0,1}},-1); +opts.pLoad.squarify{2}=opts.modelDs(2)/opts.modelDs(1); +end + +function [Is,IsOrig] = sampleWins( detector, stage, positive ) +% Load or sample windows for training detector. +opts=detector.opts; start=clock; +if( positive ), n=opts.nPos; else n=opts.nNeg; end +if( positive ), crDir=opts.posWinDir; else crDir=opts.negWinDir; end +if( exist(crDir,'dir') && stage==0 ) + % if window directory is specified simply load windows + fs=bbGt('getFiles',{crDir}); nImg=length(fs); assert(nImg>0); + if(nImg>n), fs=fs(:,randSample(nImg,n)); else n=nImg; end + for i=1:n, fs{i}=[{opts.imreadf},fs(i),opts.imreadp]; end + Is=cell(1,n); parfor i=1:n, Is{i}=feval(fs{i}{:}); end +else + % sample windows from full images using sampleWins1() + hasGt=positive||isempty(opts.negImgDir); fs={opts.negImgDir}; + if(hasGt), fs={opts.posImgDir,opts.posGtDir}; end + fs=bbGt('getFiles',fs); nImg=size(fs,2); assert(nImg>0); + if(~isinf(n)), fs=fs(:,randperm(nImg)); end; Is=cell(nImg*1000,1); + diary('off'); tid=ticStatus('Sampling windows',1,30); k=0; i=0; batch=64; + while( i + gt=[]; if(hasGt), [~,gt]=bbGt('bbLoad',fs{2,ij},opts.pLoad); end + Is1{j} = sampleWins1( I, gt, detector, stage, positive ); + end + Is1=[Is1{:}]; k1=length(Is1); Is(k+1:k+k1)=Is1; k=k+k1; + if(k>n), Is=Is(randSample(k,n)); k=n; end + i=i+batch; tocStatus(tid,max(i/nImg,k/n)); + end + Is=Is(1:k); diary('on'); + fprintf('Sampled %i windows from %i images.\n',k,i); +end +% optionally jitter positive windows +if(length(Is)<2), Is={}; return; end +nd=ndims(Is{1})+1; Is=cat(nd,Is{:}); IsOrig=Is; +if( positive && isstruct(opts.pJitter) ) + opts.pJitter.hasChn=(nd==4); Is=jitterImage(Is,opts.pJitter); + ds=size(Is); ds(nd)=ds(nd)*ds(nd+1); Is=reshape(Is,ds(1:nd)); +end +% make sure dims are divisible by shrink and not smaller than modelDsPad +ds=size(Is); cr=rem(ds(1:2),opts.pPyramid.pChns.shrink); s=floor(cr/2)+1; +e=ceil(cr/2); Is=Is(s(1):end-e(1),s(2):end-e(2),:,:); ds=size(Is); +if(any(ds(1:2)1/50}),4)); + sig=reshape(full(convmtx2(sig,w,w)),wp+w-1,wp+w-1,[]); + sig=reshape(sig(w:wp,w:wp,:),w^2,w^2); sig=(sig+sig')/2; + % compute filters for each channel from sig (sorted by eigenvalue) + [fs,D]=eig(sig); fs=reshape(fs,w,w,[]); + [~,ord]=sort(diag(D),'descend'); + fs=flipdim(flipdim(fs,1),2); %#ok + filters(:,:,i,:)=fs(:,:,ord(1:nFilter)); +end +fprintf('done (time=%.0fs).\n',etime(clock,start)); +end diff --git a/detector/bbApply.m b/detector/bbApply.m new file mode 100644 index 0000000..c24593a --- /dev/null +++ b/detector/bbApply.m @@ -0,0 +1,619 @@ +function varargout = bbApply( action, varargin ) +% Functions for manipulating bounding boxes (bb). +% +% A bounding box (bb) is also known as a position vector or a rectangle +% object. It is a four element vector with the fields: [x y w h]. A set of +% n bbs can be stores as an [nx4] array, most funcitons below can handle +% either a single or multiple bbs. In addtion, typically [nxm] inputs with +% m>4 are ok (with the additional columns ignored/copied to the output). +% +% bbApply contains a number of utility functions for working with bbs. The +% format for accessing the various utility functions is: +% outputs = bbApply( 'action', inputs ); +% The list of functions and help for each is given below. Also, help on +% individual subfunctions can be accessed by: "help bbApply>action". +% +% Compute area of bbs. +% bb = bbApply( 'area', bb ) +% Shift center of bbs. +% bb = bbApply( 'shift', bb, xdel, ydel ) +% Get center of bbs. +% cen = bbApply( 'getCenter', bb ) +% Get bb at intersection of bb1 and bb2 (may be empty). +% bb = bbApply( 'intersect', bb1, bb2 ) +% Get bb that is union of bb1 and bb2 (smallest bb containing both). +% bb = bbApply( 'union', bb1, bb2 ) +% Resize the bbs (without moving their centers). +% bb = bbApply( 'resize', bb, hr, wr, [ar] ) +% Fix bb aspect ratios (without moving the bb centers). +% bbr = bbApply( 'squarify', bb, flag, [ar] ) +% Draw single or multiple bbs to image (calls rectangle()). +% hs = bbApply( 'draw', bb, [col], [lw], [ls], [prop], [ids] ) +% Embed single or multiple bbs directly into image. +% I = bbApply( 'embed', I, bb, [varargin] ) +% Crop image regions from I encompassed by bbs. +% [patches, bbs] = bbApply('crop',I,bb,[padEl],[dims]) +% Convert bb relative to absolute coordinates and vice-versa. +% bb = bbApply( 'convert', bb, bbRef, isAbs ) +% Randomly generate bbs that fall in a specified region. +% bbs = bbApply( 'random', pRandom ) +% Convert weighted mask to bbs. +% bbs = bbApply('frMask',M,bbw,bbh,[thr]) +% Create weighted mask encoding bb centers (or extent). +% M = bbApply('toMask',bbs,w,h,[fill],[bgrd]) +% +% USAGE +% varargout = bbApply( action, varargin ); +% +% INPUTS +% action - string specifying action +% varargin - depends on action, see above +% +% OUTPUTS +% varargout - depends on action, see above +% +% EXAMPLE +% +% See also bbApply>area bbApply>shift bbApply>getCenter bbApply>intersect +% bbApply>union bbApply>resize bbApply>squarify bbApply>draw bbApply>crop +% bbApply>convert bbApply>random bbApply>frMask bbApply>toMask +% +% Piotr's Computer Vision Matlab Toolbox Version 3.30 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +%#ok<*DEFNU> +varargout = cell(1,max(1,nargout)); +[varargout{:}] = feval(action,varargin{:}); +end + +function a = area( bb ) +% Compute area of bbs. +% +% USAGE +% bb = bbApply( 'area', bb ) +% +% INPUTS +% bb - [nx4] original bbs +% +% OUTPUTS +% a - [nx1] area of each bb +% +% EXAMPLE +% a = bbApply('area', [0 0 10 10]) +% +% See also bbApply +a=prod(bb(:,3:4),2); +end + +function bb = shift( bb, xdel, ydel ) +% Shift center of bbs. +% +% USAGE +% bb = bbApply( 'shift', bb, xdel, ydel ) +% +% INPUTS +% bb - [nx4] original bbs +% xdel - amount to shift x coord of each bb left +% ydel - amount to shift y coord of each bb up +% +% OUTPUTS +% bb - [nx4] shifted bbs +% +% EXAMPLE +% bb = bbApply('shift', [0 0 10 10], 1, 2) +% +% See also bbApply +bb(:,1)=bb(:,1)-xdel; bb(:,2)=bb(:,2)-ydel; +end + +function cen = getCenter( bb ) +% Get center of bbs. +% +% USAGE +% cen = bbApply( 'getCenter', bb ) +% +% INPUTS +% bb - [nx4] original bbs +% +% OUTPUTS +% cen - [nx1] centers of bbs +% +% EXAMPLE +% cen = bbApply('getCenter', [0 0 10 10]) +% +% See also bbApply +cen=bb(:,1:2)+bb(:,3:4)/2; +end + +function bb = intersect( bb1, bb2 ) +% Get bb at intersection of bb1 and bb2 (may be empty). +% +% USAGE +% bb = bbApply( 'intersect', bb1, bb2 ) +% +% INPUTS +% bb1 - [nx4] first set of bbs +% bb2 - [nx4] second set of bbs +% +% OUTPUTS +% bb - [nx4] intersection of bbs +% +% EXAMPLE +% bb = bbApply('intersect', [0 0 10 10], [5 5 10 10]) +% +% See also bbApply bbApply>union +n1=size(bb1,1); n2=size(bb2,1); +if(n1==0 || n2==0), bb=zeros(0,4); return, end +if(n1==1 && n2>1), bb1=repmat(bb1,n2,1); n1=n2; end +if(n2==1 && n1>1), bb2=repmat(bb2,n1,1); n2=n1; end +assert(n1==n2); +lcsE=min(bb1(:,1:2)+bb1(:,3:4),bb2(:,1:2)+bb2(:,3:4)); +lcsS=max(bb1(:,1:2),bb2(:,1:2)); empty=any(lcsEintersect +n1=size(bb1,1); n2=size(bb2,1); +if(n1==0 || n2==0), bb=zeros(0,4); return, end +if(n1==1 && n2>1), bb1=repmat(bb1,n2,1); n1=n2; end +if(n2==1 && n1>1), bb2=repmat(bb2,n1,1); n2=n1; end +assert(n1==n2); +lcsE=max(bb1(:,1:2)+bb1(:,3:4),bb2(:,1:2)+bb2(:,3:4)); +lcsS=min(bb1(:,1:2),bb2(:,1:2)); +bb=[lcsS lcsE-lcsS]; +end + +function bb = resize( bb, hr, wr, ar ) +% Resize the bbs (without moving their centers). +% +% If wr>0 or hr>0, the w/h of each bb is adjusted in the following order: +% if(hr~=0), h=h*hr; end +% if(wr~=0), w=w*wr; end +% if(hr==0), h=w/ar; end +% if(wr==0), w=h*ar; end +% Only one of hr/wr may be set to 0, and then only if ar>0. If, however, +% hr=wr=0 and ar>0 then resizes bbs such that areas and centers are +% preserved but aspect ratio becomes ar. +% +% USAGE +% bb = bbApply( 'resize', bb, hr, wr, [ar] ) +% +% INPUTS +% bb - [nx4] original bbs +% hr - ratio by which to multiply height (or 0) +% wr - ratio by which to multiply width (or 0) +% ar - [0] target aspect ratio (used only if hr=0 or wr=0) +% +% OUTPUT +% bb - [nx4] the output resized bbs +% +% EXAMPLE +% bb = bbApply('resize',[0 0 1 1],1.2,0,.5) % h'=1.2*h; w'=h'/2; +% +% See also bbApply, bbApply>squarify +if(nargin<4), ar=0; end; assert(size(bb,2)>=4); +assert((hr>0&&wr>0)||ar>0); +% preserve area and center, set aspect ratio +if(hr==0 && wr==0), a=sqrt(bb(:,3).*bb(:,4)); ar=sqrt(ar); + d=a*ar-bb(:,3); bb(:,1)=bb(:,1)-d/2; bb(:,3)=bb(:,3)+d; + d=a/ar-bb(:,4); bb(:,2)=bb(:,2)-d/2; bb(:,4)=bb(:,4)+d; return; +end +% possibly adjust h/w based on hr/wr +if(hr~=0), d=(hr-1)*bb(:,4); bb(:,2)=bb(:,2)-d/2; bb(:,4)=bb(:,4)+d; end +if(wr~=0), d=(wr-1)*bb(:,3); bb(:,1)=bb(:,1)-d/2; bb(:,3)=bb(:,3)+d; end +% possibly adjust h/w based on ar and NEW h/w +if(~hr), d=bb(:,3)/ar-bb(:,4); bb(:,2)=bb(:,2)-d/2; bb(:,4)=bb(:,4)+d; end +if(~wr), d=bb(:,4)*ar-bb(:,3); bb(:,1)=bb(:,1)-d/2; bb(:,3)=bb(:,3)+d; end +end + +function bbr = squarify( bb, flag, ar ) +% Fix bb aspect ratios (without moving the bb centers). +% +% The w or h of each bb is adjusted so that w/h=ar. +% The parameter flag controls whether w or h should change: +% flag==0: expand bb to given ar +% flag==1: shrink bb to given ar +% flag==2: use original w, alter h +% flag==3: use original h, alter w +% flag==4: preserve area, alter w and h +% If ar==1 (the default), always converts bb to a square, hence the name. +% +% USAGE +% bbr = bbApply( 'squarify', bb, flag, [ar] ) +% +% INPUTS +% bb - [nx4] original bbs +% flag - controls whether w or h should change +% ar - [1] desired aspect ratio +% +% OUTPUT +% bbr - the output 'squarified' bbs +% +% EXAMPLE +% bbr = bbApply('squarify',[0 0 1 2],0) +% +% See also bbApply, bbApply>resize +if(nargin<3 || isempty(ar)), ar=1; end; bbr=bb; +if(flag==4), bbr=resize(bb,0,0,ar); return; end +for i=1:size(bb,1), p=bb(i,1:4); + usew = (flag==0 && p(3)>p(4)*ar) || (flag==1 && p(3)embed, rectangle +[n,m]=size(bb); if(n==0), hs=[]; return; end +if(nargin<2 || isempty(col)), col=[]; end +if(nargin<3 || isempty(lw)), lw=2; end +if(nargin<4 || isempty(ls)), ls='-'; end +if(nargin<5 || isempty(prop)), prop={}; end +if(nargin<6 || isempty(ids)), ids=ones(1,n); end +% prepare display properties +prop=['LineWidth' lw 'LineStyle' ls prop 'EdgeColor']; +tProp={'FontSize',10,'color','w','FontWeight','bold',... + 'VerticalAlignment','bottom'}; k=max(ids); +if(isempty(col)), if(k==1), col='g'; else col=hsv(k); end; end +if(size(col,1)draw, char2img + +% get additional parameters +dfs={'col',[0 255 0],'lw',3,'fh',35,'fcol',[255 0 0]}; +[col,lw,fh,fcol]=getPrmDflt(varargin,dfs,1); +n=size(bb,1); bb(:,1:4)=round(bb(:,1:4)); +if(size(col,1)==1), col=col(ones(1,n),:); end +if(size(fcol,1)==1), fcol=fcol(ones(1,n),:); end +if( ismatrix(I) ), I=I(:,:,[1 1 1]); end +% embed each bb +x0=bb(:,1); x1=x0+bb(:,3)-1; y0=bb(:,2); y1=y0+bb(:,4)-1; +j0=floor((lw-1)/2); j1=ceil((lw-1)/2); h=size(I,1); w=size(I,2); +x00=max(1,x0-j0); x01=min(x0+j1,w); x10=max(1,x1-j0); x11=min(x1+j1,w); +y00=max(1,y0-j0); y01=min(y0+j1,h); y10=max(1,y1-j0); y11=min(y1+j1,h); +for b=1:n + for c=1:3, I([y00(b):y01(b) y10(b):y11(b)],x00(b):x11(b),c)=col(b,c); end + for c=1:3, I(y00(b):y11(b),[x00(b):x01(b) x10(b):x11(b)],c)=col(b,c); end +end +% embed text displaying bb score (inside upper-left bb corner) +if(size(bb,2)<5 || fh==0), return; end +bb(:,1:4)=intersect(bb(:,1:4),[1 1 w h]); +for b=1:n + M=char2img(sprintf('%.4g',bb(b,5)),fh); M=M{1}==0; [h,w]=size(M); + y0=bb(b,2); y1=y0+h-1; x0=bb(b,1); x1=x0+w-1; + if( x0>=1 && y0>=1 && x1<=size(I,2) && y1<=size(I,1)) + Ir=I(y0:y1,x0:x1,1); Ig=I(y0:y1,x0:x1,2); Ib=I(y0:y1,x0:x1,3); + Ir(M)=fcol(b,1); Ig(M)=fcol(b,2); Ib(M)=fcol(b,3); + I(y0:y1,x0:x1,:)=cat(3,Ir,Ig,Ib); + end +end +end + +function [patches, bbs] = crop( I, bbs, padEl, dims ) +% Crop image regions from I encompassed by bbs. +% +% The only subtlety is that a pixel centered at location (i,j) would have a +% bb of [j-1/2,i-1/2,1,1]. The -1/2 is because pixels are located at +% integer locations. This is a Matlab convention, to confirm use: +% im(rand(3)); bbApply('draw',[1.5 1.5 1 1],'g') +% If bb contains all integer entries cropping is straightforward. If +% entries are not integers, x=round(x+.499) is used, eg 1.2 actually goes +% to 2 (since it is closer to 1.5 then .5), and likewise for y. +% +% If ~isempty(padEl), image is padded so can extract full bb region (no +% actual padding is done, this is fast). Otherwise bb is intersected with +% the image bb prior to cropping. If padEl is a string ('circular', +% 'replicate', or 'symmetric'), uses padarray to do actual padding (slow). +% +% USAGE +% [patches, bbs] = bbApply('crop',I,bb,[padEl],[dims]) +% +% INPUTS +% I - image from which to crop patches +% bbs - bbs that indicate regions to crop +% padEl - [0] value to pad I or [] to indicate no padding (see above) +% dims - [] if specified resize each cropped patch to [w h] +% +% OUTPUTS +% patches - [1xn] cell of cropped image regions +% bbs - actual integer-valued bbs used to crop +% +% EXAMPLE +% I=imread('cameraman.tif'); bb=[-10 -10 100 100]; +% p1=bbApply('crop',I,bb); p2=bbApply('crop',I,bb,'replicate'); +% figure(1); im(I); figure(2); im(p1{1}); figure(3); im(p2{1}); +% +% See also bbApply, ARRAYCROP, PADARRAY, IMRESAMPLE + +% get padEl, bound bb to visible region if empty +if( nargin<3 ), padEl=0; end; h=size(I,1); w=size(I,2); +if( nargin<4 ), dims=[]; end; +if(isempty(padEl)), bbs=intersect([.5 .5 w h],bbs); end +% crop each patch in turn +n=size(bbs,1); patches=cell(1,n); +for i=1:n, [patches{i},bbs(i,1:4)]=crop1(bbs(i,1:4)); end + + function [patch, bb] = crop1( bb ) + % crop single patch (use arrayCrop only if necessary) + lcsS=round(bb([2 1])+.5-.001); lcsE=lcsS+round(bb([4 3]))-1; + if( any(lcsS<1) || lcsE(1)>h || lcsE(2)>w ) + if( ischar(padEl) ) + pt=max(0,1-lcsS(1)); pb=max(0,lcsE(1)-h); + pl=max(0,1-lcsS(2)); pr=max(0,lcsE(2)-w); + lcsS1=max(1,lcsS); lcsE1=min(lcsE,[h w]); + patch = I(lcsS1(1):lcsE1(1),lcsS1(2):lcsE1(2),:); + patch = padarray(patch,[pt pl],padEl,'pre'); + patch = padarray(patch,[pb pr],padEl,'post'); + else + if(ndims(I)==3); lcsS=[lcsS 1]; lcsE=[lcsE 3]; end + patch = arrayCrop(I,lcsS,lcsE,padEl); + end + else + patch = I(lcsS(1):lcsE(1),lcsS(2):lcsE(2),:); + end + bb = [lcsS([2 1]) lcsE([2 1])-lcsS([2 1])+1]; + if(~isempty(dims)), patch=imResample(patch,[dims(2),dims(1)]); end + end +end + +function bb = convert( bb, bbRef, isAbs ) +% Convert bb relative to absolute coordinates and vice-versa. +% +% If isAbs==1, bb is assumed to be given in absolute coords, and the output +% is given in coords relative to bbRef. Otherwise, if isAbs==0, bb is +% assumed to be given in coords relative to bbRef and the output is given +% in absolute coords. +% +% USAGE +% bb = bbApply( 'convert', bb, bbRef, isAbs ) +% +% INPUTS +% bb - original bb, either in abs or rel coords +% bbRef - reference bb +% isAbs - 1: bb is in abs coords, 0: bb is in rel coords +% +% OUTPUTS +% bb - converted bb +% +% EXAMPLE +% bbRef=[5 5 15 15]; bba=[10 10 5 5]; +% bbr = bbApply( 'convert', bba, bbRef, 1 ) +% bba2 = bbApply( 'convert', bbr, bbRef, 0 ) +% +% See also bbApply +if( isAbs ) + bb(1:2)=bb(1:2)-bbRef(1:2); + bb=bb./bbRef([3 4 3 4]); +else + bb=bb.*bbRef([3 4 3 4]); + bb(1:2)=bb(1:2)+bbRef(1:2); +end +end + +function bbs = random( varargin ) +% Randomly generate bbs that fall in a specified region. +% +% The vector dims defines the region in which bbs are generated. Specify +% dims=[height width] to generate bbs=[x y w h] such that: 1<=x<=width, +% 1<=y<=height, x+w-1<=width, y+h-1<=height. The biggest bb generated can +% be bb=[1 1 width height]. If dims is a three element vector the third +% coordinate is the depth, in this case bbs=[x y w h d] where 1<=d<=depth. +% +% A number of constraints can be specified that control the size and other +% characteristics of the generated bbs. Note that if incompatible +% constraints are specified (e.g. if the maximum width and height are both +% 5 while the minimum area is 100) no bbs will be generated. More +% generally, if fewer than n bbs are generated a warning is displayed. +% +% USAGE +% bbs = bbApply( 'random', pRandom ) +% +% INPUTS +% pRandom - parameters (struct or name/value pairs) +% .n - ['REQ'] number of bbs to generate +% .dims - ['REQ'] region in which to generate bbs [height,width] +% .wRng - [1 inf] range for width of bbs (or scalar value) +% .hRng - [1 inf] range for height of bbs (or scalar value) +% .aRng - [1 inf] range for area of bbs +% .arRng - [0 inf] range for aspect ratio (width/height) of bbs +% .unique - [1] if true generate unique bbs +% .maxOverlap - [1] max overlap (intersection/union) between bbs +% .maxIter - [100] max iterations to go w/o changes before giving up +% .show - [0] if true show sample generated bbs +% +% OUTPUTS +% bbs - [nx4] array of randomly generated integer bbs +% +% EXAMPLE +% bbs=bbApply('random','n',50,'dims',[20 20],'arRng',[.5 .5],'show',1); +% +% See also bbApply + +% get parameters +rng=[1 inf]; dfs={ 'n','REQ', 'dims','REQ', 'wRng',rng, 'hRng',rng, ... + 'aRng',rng, 'arRng',[0 inf], 'unique',1, 'maxOverlap',1, ... + 'maxIter',100, 'show',0 }; +[n,dims,wRng,hRng,aRng,arRng,uniqueOnly,maxOverlap,maxIter,show] ... + = getPrmDflt(varargin,dfs,1); +if(length(hRng)==1), hRng=[hRng hRng]; end +if(length(wRng)==1), wRng=[wRng wRng]; end +if(length(dims)==3), d=5; else d=4; end + +% generate random bbs satisfying constraints +bbs=zeros(0,d); ids=zeros(0,1); n1=min(n*10,1000); +M=max(dims)+1; M=M.^(0:d-1); iter=0; k=0; +tid=ticStatus('generating random bbs',1,2); +while( k0 & xs0>0 & ys1<=dims(1) & xs1<=dims(2) & ... + hs>=hRng(1) & hs<=hRng(2) & ws>=wRng(1) & ws<=wRng(2) & ... + as>=aRng(1) & as<=aRng(2) & ars>=arRng(1) & ars<=arRng(2); + bbs1=[xs0' ys0' ws' hs' ds']; bbs1=bbs1(kp,:); + k0=k; bbs=[bbs; bbs1]; k=size(bbs,1); %#ok + if( maxOverlap<1 && k ), bbs=bbs(1:k0,:); + for j=1:size(bbs1,1), bbs0=bbs; bb=bbs1(j,:); + if(d==5), bbs=bbs(bbs(:,5)==bb(5),:); end + if(isempty(bbs)), bbs=[bbs0; bb]; continue; end + ws1=min(bbs(:,1)+bbs(:,3),bb(1)+bb(3))-max(bbs(:,1),bb(1)); + hs1=min(bbs(:,2)+bbs(:,4),bb(2)+bb(4))-max(bbs(:,2),bb(2)); + o=max(0,ws1).*max(0,hs1); o=o./(bbs(:,3).*bbs(:,4)+bb(3).*bb(4)-o); + if(max(o)<=maxOverlap), bbs=[bbs0; bb]; else bbs=bbs0; end + end + elseif( uniqueOnly && k ) + ids=[ids; sum(bbs1.*M(ones(1,size(bbs1,1)),:),2)]; %#ok + [ids,o]=sort(ids); bbs=bbs(o,:); kp=[ids(1:end-1)~=ids(2:end); true]; + bbs=bbs(kp,:); ids=ids(kp,:); + end + k=size(bbs,1); if(k0==k), iter=iter+1; else iter=0; end + if(k>n), bbs=bbs(randSample(k,n),:); k=n; end; + tocStatus(tid,max(k/n,iter/maxIter)); +end +if( k + +% optionally display a few bbs +if( show ) + k=8; figure(show); im(zeros(dims)); cs=uniqueColors(1,k,0,0); + if(n>k), bbs1=bbs(randsample(n,k),:); else bbs1=bbs; end + bbs1(:,1:2)=bbs1(:,1:2)-.5; + for i=1:min(k,n), rectangle('Position',bbs1(i,:),... + 'EdgeColor',cs(i,:),'LineStyle','--'); end +end + +end + +function bbs = frMask( M, bbw, bbh, thr ) +% Convert weighted mask to bbs. +% +% Pixels in mask above given threshold (thr) indicate bb centers. +% +% USAGE +% bbs = bbApply('frMask',M,bbw,bbh,[thr]) +% +% INPUTS +% M - mask +% bbw - bb target width +% bbh - bb target height +% thr - [0] mask threshold +% +% OUTPUTS +% bbs - bounding boxes +% +% EXAMPLE +% w=20; h=10; bbw=5; bbh=8; M=double(rand(h,w)); M(M<.95)=0; +% bbs=bbApply('frMask',M,bbw,bbh); M2=bbApply('toMask',bbs,w,h); +% sum(abs(M(:)-M2(:))) +% +% See also bbApply, bbApply>toMask +if(nargin<4), thr=0; end +ids=find(M>thr); ids=ids(:); h=size(M,1); +if(isempty(ids)), bbs=zeros(0,5); return; end +xs=floor((ids-1)/h); ys=ids-xs*h; xs=xs+1; +bbs=[xs-floor(bbw/2) ys-floor(bbh/2)]; +bbs(:,3)=bbw; bbs(:,4)=bbh; bbs(:,5)=M(ids); +end + +function M = toMask( bbs, w, h, fill, bgrd ) +% Create weighted mask encoding bb centers (or extent). +% +% USAGE +% M = bbApply('toMask',bbs,w,h,[fill],[bgrd]) +% +% INPUTS +% bbs - bounding boxes +% w - mask target width +% h - mask target height +% fill - [0] if 1 encodes extent of bbs +% bgrd - [0] default value for background pixels +% +% OUTPUTS +% M - hxw mask +% +% EXAMPLE +% +% See also bbApply, bbApply>frMask +if(nargin<4||isempty(fill)), fill=0; end +if(nargin<5||isempty(bgrd)), bgrd=0; end +if(size(bbs,2)==4), bbs(:,5)=1; end +M=zeros(h,w); B=true(h,w); n=size(bbs,1); +if( fill==0 ) + p=floor(getCenter(bbs)); p=sub2ind([h w],p(:,2),p(:,1)); + for i=1:n, M(p(i))=M(p(i))+bbs(i,5); end + if(bgrd~=0), B(p)=0; end +else + bbs=[intersect(round(bbs),[1 1 w h]) bbs(:,5)]; n=size(bbs,1); + x0=bbs(:,1); x1=x0+bbs(:,3)-1; y0=bbs(:,2); y1=y0+bbs(:,4)-1; + for i=1:n, y=y0(i):y1(i); x=x0(i):x1(i); + M(y,x)=M(y,x)+bbs(i,5); B(y,x)=0; end +end +if(bgrd~=0), M(B)=bgrd; end +end diff --git a/detector/bbGt.m b/detector/bbGt.m new file mode 100644 index 0000000..0b2da2a --- /dev/null +++ b/detector/bbGt.m @@ -0,0 +1,885 @@ +function varargout = bbGt( action, varargin ) +% Bounding box (bb) annotations struct, evaluation and sampling routines. +% +% bbGt gives access to two types of routines: +% (1) Data structure for storing bb image annotations. +% (2) Routines for evaluating the Pascal criteria for object detection. +% +% The bb annotation stores bb for objects of interest with additional +% information per object, such as occlusion information. The underlying +% data structure is simply a Matlab stuct array, one struct per object. +% This annotation format is an alternative to the annotation format used +% for the PASCAL object challenges (in addition routines for loading PASCAL +% format data are provided, see bbLoad()). +% +% Each object struct has the following fields: +% lbl - a string label describing object type (eg: 'pedestrian') +% bb - [l t w h]: bb indicating predicted object extent +% occ - 0/1 value indicating if bb is occluded +% bbv - [l t w h]: bb indicating visible region (may be [0 0 0 0]) +% ign - 0/1 value indicating bb was marked as ignore +% ang - [0-360] orientation of bb in degrees +% +% Note: although orientation (angle) is stored for each bb, for now it is +% not being used during evaluation or sampling. +% +% bbGt contains a number of utility functions, accessed using: +% outputs = bbGt( 'action', inputs ); +% The list of functions and help for each is given below. Also, help on +% individual subfunctions can be accessed by: "help bbGt>action". +% +%%% (1) Data structure for storing bb image annotations. +% Create annotation of n empty objects. +% objs = bbGt( 'create', [n] ); +% Save bb annotation to text file. +% objs = bbGt( 'bbSave', objs, fName ) +% Load bb annotation from text file and filter. +% [objs,bbs] = bbGt( 'bbLoad', fName, [pLoad] ) +% Get object property 'name' (in a standard array). +% vals = bbGt( 'get', objs, name ) +% Set object property 'name' (with a standard array). +% objs = bbGt( 'set', objs, name, vals ) +% Draw an ellipse for each labeled object. +% hs = draw( objs, pDraw ) +% +%%% (2) Routines for evaluating the Pascal criteria for object detection. +% Get all corresponding files in given directories. +% [fs,fs0] = bbGt('getFiles', dirs, [f0], [f1] ) +% Copy corresponding files into given directories. +% fs = bbGt( 'copyFiles', fs, dirs ) +% Load all ground truth and detection bbs in given directories. +% [gt0,dt0] = bbGt( 'loadAll', gtDir, [dtDir], [pLoad] ) +% Evaluates detections against ground truth data. +% [gt,dt] = bbGt( 'evalRes', gt0, dt0, [thr], [mul] ) +% Display evaluation results for given image. +% [hs,hImg] = bbGt( 'showRes' I, gt, dt, varargin ) +% Compute ROC or PR based on outputs of evalRes on multiple images. +% [xs,ys,ref] = bbGt( 'compRoc', gt, dt, roc, ref ) +% Extract true or false positives or negatives for visualization. +% [Is,scores,imgIds] = bbGt( 'cropRes', gt, dt, imFs, varargin ) +% Computes (modified) overlap area between pairs of bbs. +% oa = bbGt( 'compOas', dt, gt, [ig] ) +% Optimized version of compOas for a single pair of bbs. +% oa = bbGt( 'compOa', dt, gt, ig ) +% +% USAGE +% varargout = bbGt( action, varargin ); +% +% INPUTS +% action - string specifying action +% varargin - depends on action, see above +% +% OUTPUTS +% varargout - depends on action, see above +% +% EXAMPLE +% +% See also bbApply, bbLabeler, bbGt>create, bbGt>bbSave, bbGt>bbLoad, +% bbGt>get, bbGt>set, bbGt>draw, bbGt>getFiles, bbGt>copyFiles, +% bbGt>loadAll, bbGt>evalRes, bbGt>showRes, bbGt>compRoc, bbGt>cropRes, +% bbGt>compOas, bbGt>compOa +% +% Piotr's Computer Vision Matlab Toolbox Version 3.26 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +%#ok<*DEFNU> +varargout = cell(1,max(1,nargout)); +[varargout{:}] = feval(action,varargin{:}); +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +function objs = create( n ) +% Create annotation of n empty objects. +% +% USAGE +% objs = bbGt( 'create', [n] ) +% +% INPUTS +% n - [1] number of objects to create +% +% OUTPUTS +% objs - annotation of n 'empty' objects +% +% EXAMPLE +% objs = bbGt('create') +% +% See also bbGt +o=struct('lbl','','bb',[0 0 0 0],'occ',0,'bbv',[0 0 0 0],'ign',0,'ang',0); +if(nargin<1 || n==1), objs=o; return; end; objs=o(ones(n,1)); +end + +function objs = bbSave( objs, fName ) +% Save bb annotation to text file. +% +% USAGE +% objs = bbGt( 'bbSave', objs, fName ) +% +% INPUTS +% objs - objects to save +% fName - name of text file +% +% OUTPUTS +% objs - objects to save +% +% EXAMPLE +% +% See also bbGt, bbGt>bbLoad +vers=3; fid=fopen(fName,'w'); assert(fid>0); +fprintf(fid,'%% bbGt version=%i\n',vers); +objs=set(objs,'bb',round(get(objs,'bb'))); +objs=set(objs,'bbv',round(get(objs,'bbv'))); +objs=set(objs,'ang',round(get(objs,'ang'))); +for i=1:length(objs) + o=objs(i); bb=o.bb; bbv=o.bbv; + fprintf(fid,['%s' repmat(' %i',1,11) '\n'],o.lbl,... + bb,o.occ,bbv,o.ign,o.ang); +end +fclose(fid); +end + +function [objs,bbs] = bbLoad( fName, varargin ) +% Load bb annotation from text file and filter. +% +% FORMAT: Specify 'format' to indicate the format of the ground truth. +% format=0 is the default format (created by bbSave/bbLabeler). format=1 is +% the PASCAL VOC format. Loading ground truth in this format requires +% 'VOCcode/' to be in directory path. It's part of VOCdevkit available from +% the PASCAL VOC: http://pascallin.ecs.soton.ac.uk/challenges/VOC/. Objects +% labeled as either 'truncated' or 'occluded' using the PASCAL definitions +% have the 'occ' flag set to true. Objects labeled as 'difficult' have the +% 'ign' flag set to true. 'class' is used for 'lbl'. format=2 is the +% ImageNet detection format and requires the ImageNet Dev Kit. +% +% FILTERING: After loading, the objects can be filtered. First, only +% objects with lbl in lbls or ilbls or returned. For each object, obj.ign +% is set to 1 if it was already at 1, if its label was in ilbls, or if any +% object property is outside of the specified range. The ignore flag is +% used during training and testing so that objects with certain properties +% (such as very small or heavily occluded objects) are excluded. The range +% for each property is a two element vector, [0 inf] by default; a property +% value v is inside the range if v>=rng(1) && v<=rng(2). Tested properties +% include height (h), width (w), area (a), aspect ratio (ar), orientation +% (o), extent x-coordinate (x), extent y-coordinate (y), and fraction +% visible (v). The last property is computed as the visible object area +% divided by the total area, except if o.occ==0, in which case v=1, or +% all(o.bbv==o.bb), which indicates the object may be barely visible, in +% which case v=0 (note that v~=1 in this case). +% +% RETURN: In addition to outputting the objs, bbLoad() can return the +% corresponding bounding boxes (bbs) in an [nx5] array where each row is of +% the form [x y w h ignore], [x y w h] is the bb and ignore=obj.ign. For +% oriented bbs, the extent of the bb is returned, where the extent is the +% smallest axis aligned bb containing the oriented bb. If the oriented bb +% was labeled as a rectangle as opposed to an ellipse, the tightest bb will +% usually increase slightly in size due to the corners of the rectangle +% sticking out beyond the ellipse bounds. The 'ellipse' flag controls how +% an oriented bb is converted to a regular bb. Specifically, set ellipse=1 +% if an ellipse tightly delineates the object and 0 if a rectangle does. +% Finally, if 'squarify' is not empty the (non-ignore) bbs are converted to +% a fixed aspect ratio using bbs=bbApply('squarify',bbs,squarify{:}). +% +% USAGE +% [objs,bbs] = bbGt( 'bbLoad', fName, [pLoad] ) +% +% INPUTS +% fName - name of text file +% pLoad - parameters (struct or name/value pairs) +% .format - [0] gt format 0:default, 1:PASCAL, 2:ImageNet +% .ellipse - [1] controls how oriented bb is converted to regular bb +% .squarify - [] controls optional reshaping of bbs to fixed aspect ratio +% .lbls - [] return objs with these labels (or [] to return all) +% .ilbls - [] return objs with these labels but set to ignore +% .hRng - [] range of acceptable obj heights +% .wRng - [] range of acceptable obj widths +% .aRng - [] range of acceptable obj areas +% .arRng - [] range of acceptable obj aspect ratios +% .oRng - [] range of acceptable obj orientations (angles) +% .xRng - [] range of x coordinates of bb extent +% .yRng - [] range of y coordinates of bb extent +% .vRng - [] range of acceptable obj occlusion levels +% +% OUTPUTS +% objs - loaded objects +% bbs - [nx5] array containg ground truth bbs [x y w h ignore] +% +% EXAMPLE +% +% See also bbGt, bbGt>bbSave + +% get parameters +df={'format',0,'ellipse',1,'squarify',[],'lbls',[],'ilbls',[],'hRng',[],... + 'wRng',[],'aRng',[],'arRng',[],'oRng',[],'xRng',[],'yRng',[],'vRng',[]}; +[format,ellipse,sqr,lbls,ilbls,hRng,wRng,aRng,arRng,oRng,xRng,yRng,vRng]... + = getPrmDflt(varargin,df,1); + +% load objs +if( format==0 ) + % load objs stored in default format + fId=fopen(fName); + if(fId==-1), error(['unable to open file: ' fName]); end; v=0; + try v=textscan(fId,'%% bbGt version=%d'); v=v{1}; catch, end %#ok + if(isempty(v)), v=0; end + % read in annotation (m is number of fields for given version v) + if(all(v~=[0 1 2 3])), error('Unknown version %i.',v); end + frmt='%s %d %d %d %d %d %d %d %d %d %d %d'; + ms=[10 10 11 12]; m=ms(v+1); frmt=frmt(1:2+(m-1)*3); + in=textscan(fId,frmt); for i=2:m, in{i}=double(in{i}); end; fclose(fId); + % create objs struct from read in fields + n=length(in{1}); objs=create(n); + for i=1:n, objs(i).lbl=in{1}{i}; objs(i).occ=in{6}(i); end + bb=[in{2} in{3} in{4} in{5}]; bbv=[in{7} in{8} in{9} in{10}]; + for i=1:n, objs(i).bb=bb(i,:); objs(i).bbv=bbv(i,:); end + if(m>=11), for i=1:n, objs(i).ign=in{11}(i); end; end + if(m>=12), for i=1:n, objs(i).ang=in{12}(i); end; end +elseif( format==1 ) + % load objs stored in PASCAL VOC format + if(exist('PASreadrecord.m','file')~=2) + error('bbLoad() requires the PASCAL VOC code.'); end + os=PASreadrecord(fName); os=os.objects; + n=length(os); objs=create(n); + if(~isfield(os,'occluded')), for i=1:n, os(i).occluded=0; end; end + for i=1:n + bb=os(i).bbox; bb(3)=bb(3)-bb(1); bb(4)=bb(4)-bb(2); objs(i).bb=bb; + objs(i).lbl=os(i).class; objs(i).ign=os(i).difficult; + objs(i).occ=os(i).occluded || os(i).truncated; + if(objs(i).occ), objs(i).bbv=bb; end + end +elseif( format==2 ) + if(exist('VOCreadxml.m','file')~=2) + error('bbLoad() requires the ImageNet dev code.'); end + os=VOCreadxml(fName); os=os.annotation; + if(isfield(os,'object')), os=os.object; else os=[]; end + n=length(os); objs=create(n); + for i=1:n + bb=os(i).bndbox; bb=str2double({bb.xmin bb.ymin bb.xmax bb.ymax}); + bb(3)=bb(3)-bb(1); bb(4)=bb(4)-bb(2); objs(i).bb=bb; + objs(i).lbl=os(i).name; + end +else error('bbLoad() unknown format: %i',format); +end + +% only keep objects whose lbl is in lbls or ilbls +if(~isempty(lbls) || ~isempty(ilbls)), K=true(n,1); + for i=1:n, K(i)=any(strcmp(objs(i).lbl,[lbls ilbls])); end + objs=objs(K); n=length(objs); +end + +% filter objs (set ignore flags) +for i=1:n, objs(i).ang=mod(objs(i).ang,360); end +if(~isempty(ilbls)), for i=1:n, v=objs(i).lbl; + objs(i).ign = objs(i).ign || any(strcmp(v,ilbls)); end; end +if(~isempty(xRng)), for i=1:n, v=objs(i).bb(1); + objs(i).ign = objs(i).ign || vxRng(2); end; end +if(~isempty(xRng)), for i=1:n, v=objs(i).bb(1)+objs(i).bb(3); + objs(i).ign = objs(i).ign || vxRng(2); end; end +if(~isempty(yRng)), for i=1:n, v=objs(i).bb(2); + objs(i).ign = objs(i).ign || vyRng(2); end; end +if(~isempty(yRng)), for i=1:n, v=objs(i).bb(2)+objs(i).bb(4); + objs(i).ign = objs(i).ign || vyRng(2); end; end +if(~isempty(wRng)), for i=1:n, v=objs(i).bb(3); + objs(i).ign = objs(i).ign || vwRng(2); end; end +if(~isempty(hRng)), for i=1:n, v=objs(i).bb(4); + objs(i).ign = objs(i).ign || vhRng(2); end; end +if(~isempty(oRng)), for i=1:n, v=objs(i).ang; if(v>180), v=v-360; end + objs(i).ign = objs(i).ign || voRng(2); end; end +if(~isempty(aRng)), for i=1:n, v=objs(i).bb(3)*objs(i).bb(4); + objs(i).ign = objs(i).ign || vaRng(2); end; end +if(~isempty(arRng)), for i=1:n, v=objs(i).bb(3)/objs(i).bb(4); + objs(i).ign = objs(i).ign || varRng(2); end; end +if(~isempty(vRng)), for i=1:n, o=objs(i); bb=o.bb; bbv=o.bbv; %#ok + if(~o.occ || all(bbv==0)), v=1; elseif(all(bbv==bb)), v=0; else + v=(bbv(3)*bbv(4))/(bb(3)*bb(4)); end + objs(i).ign = objs(i).ign || vvRng(2); end +end + +% finally get extent of each bounding box (not trivial if ang~=0) +if(nargout<=1), return; end; if(n==0), bbs=zeros(0,5); return; end +bbs=double([reshape([objs.bb],4,[]); [objs.ign]]'); ign=bbs(:,5)==1; +for i=1:n, bbs(i,1:4)=bbExtent(bbs(i,1:4),objs(i).ang,ellipse); end +if(~isempty(sqr)), bbs(~ign,:)=bbApply('squarify',bbs(~ign,:),sqr{:}); end + + function bb = bbExtent( bb, ang, ellipse ) + % get bb that fully contains given oriented bb + if(~ang), return; end + if( ellipse ) % get bb that encompases ellipse (tighter) + x=bbApply('getCenter',bb); a=bb(4)/2; b=bb(3)/2; ang=ang-90; + rx=(a*cosd(ang))^2+(b*sind(ang))^2; rx=abs(rx/sqrt(rx)); + ry=(a*sind(ang))^2+(b*cosd(ang))^2; ry=abs(ry/sqrt(ry)); + bb=[x(1)-rx x(2)-ry 2*rx 2*ry]; + else % get bb that encompases rectangle (looser) + c=cosd(ang); s=sind(ang); R=[c -s; s c]; rs=bb(3:4)/2; + x0=-rs(1); x1=rs(1); y0=-rs(2); y1=rs(2); pc=bb(1:2)+rs; + p=[x0 y0; x1 y0; x1 y1; x0 y1]*R'+pc(ones(4,1),:); + x0=min(p(:,1)); x1=max(p(:,1)); y0=min(p(:,2)); y1=max(p(:,2)); + bb=[x0 y0 x1-x0 y1-y0]; + end + end +end + +function vals = get( objs, name ) +% Get object property 'name' (in a standard array). +% +% USAGE +% vals = bbGt( 'get', objs, name ) +% +% INPUTS +% objs - [nx1] struct array of objects +% name - property name ('lbl','bb','occ',etc.) +% +% OUTPUTS +% vals - [nxk] array of n values (k=1 or 4) +% +% EXAMPLE +% +% See also bbGt, bbGt>set +nObj=length(objs); if(nObj==0), vals=[]; return; end +switch name + case 'lbl', vals={objs.lbl}'; + case 'bb', vals=reshape([objs.bb]',4,[])'; + case 'occ', vals=[objs.occ]'; + case 'bbv', vals=reshape([objs.bbv]',4,[])'; + case 'ign', vals=[objs.ign]'; + case 'ang', vals=[objs.ang]'; + otherwise, error('unkown type %s',name); +end +end + +function objs = set( objs, name, vals ) +% Set object property 'name' (with a standard array). +% +% USAGE +% objs = bbGt( 'set', objs, name, vals ) +% +% INPUTS +% objs - [nx1] struct array of objects +% name - property name ('lbl','bb','occ',etc.) +% vals - [nxk] array of n values (k=1 or 4) +% +% OUTPUTS +% objs - [nx1] struct array of updated objects +% +% EXAMPLE +% +% See also bbGt, bbGt>get +nObj=length(objs); +switch name + case 'lbl', for i=1:nObj, objs(i).lbl=vals{i}; end + case 'bb', for i=1:nObj, objs(i).bb=vals(i,:); end + case 'occ', for i=1:nObj, objs(i).occ=vals(i); end + case 'bbv', for i=1:nObj, objs(i).bbv=vals(i,:); end + case 'ign', for i=1:nObj, objs(i).ign=vals(i); end + case 'ang', for i=1:nObj, objs(i).ang=vals(i); end + otherwise, error('unkown type %s',name); +end +end + +function hs = draw( objs, varargin ) +% Draw an ellipse for each labeled object. +% +% USAGE +% hs = bbGt( 'draw', objs, pDraw ) +% +% INPUTS +% objs - [nx1] struct array of objects +% pDraw - parameters (struct or name/value pairs) +% .col - ['g'] color or [nx1] array of colors +% .lw - [2] line width +% .ls - ['-'] line style +% +% OUTPUTS +% hs - [nx1] handles to drawn graphic objects +% +% EXAMPLE +% +% See also bbGt +dfs={'col',[],'lw',2,'ls','-'}; +[col,lw,ls]=getPrmDflt(varargin,dfs,1); +n=length(objs); hold on; hs=zeros(n,4); +if(isempty(col)), if(n==1), col='g'; else col=hsv(n); end; end +tProp={'FontSize',10,'color','w','FontWeight','bold',... + 'VerticalAlignment','bottom'}; +for i=1:n + bb=objs(i).bb; ci=col(i,:); + hs(i,1)=text(bb(1),bb(2),objs(i).lbl,tProp{:}); + x=bbApply('getCenter',bb); r=bb(3:4)/2; a=objs(i).ang/180*pi-pi/2; + [hs(i,2),hs(i,3),hs(i,4)]=plotEllipse(x(2),x(1),r(2),r(1),a,ci,[],lw,ls); +end; hold off; +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +function [fs,fs0] = getFiles( dirs, f0, f1 ) +% Get all corresponding files in given directories. +% +% The first dir in 'dirs' serves as the baseline dir. getFiles() returns +% all files in the baseline dir and all corresponding files in the +% remaining dirs to the files in the baseline dir, in the same order. Two +% files are in correspondence if they have the same base name (regardless +% of extension). For example, given a file named "name.jpg", a +% corresponding file may be named "name.txt" or "name.jpg.txt". Every file +% in the baseline dir must have a matching file in the remaining dirs. +% +% USAGE +% [fs,fs0] = bbGt('getFiles', dirs, [f0], [f1] ) +% +% INPUTS +% dirs - {1xm} list of m directories +% f0 - [1] index of first file in baseline dir to use +% f1 - [inf] index of last file in baseline dir to use +% +% OUTPUTS +% fs - {mxn} list of full file names in each dir +% fs0 - {1xn} list of file names without path or extensions +% +% EXAMPLE +% +% See also bbGt + +if(nargin<2 || isempty(f0)), f0=1; end +if(nargin<3 || isempty(f1)), f1=inf; end +m=length(dirs); assert(m>0); sep=filesep; + +for d=1:m, dir1=dirs{d}; dir1(dir1=='\')=sep; dir1(dir1=='/')=sep; + if(dir1(end)==sep), dir1(end)=[]; end; dirs{d}=dir1; end + +[fs0,fs1] = getFiles0(dirs{1},f0,f1,sep); +n1=length(fs0); fs=cell(m,n1); fs(1,:)=fs1; +for d=2:m, fs(d,:)=getFiles1(dirs{d},fs0,sep); end + + function [fs0,fs1] = getFiles0( dir1, f0, f1, sep ) + % get fs1 in dir1 (and fs0 without path or extension) + fs1=dir([dir1 sep '*']); fs1={fs1.name}; fs1=fs1(3:end); + fs1=fs1(f0:min(f1,end)); fs0=fs1; n=length(fs0); + if(n==0), error('No files found in baseline dir %s.',dir1); end + for i=1:n, fs1{i}=[dir1 sep fs0{i}]; end + n=length(fs0); for i=1:n, f=fs0{i}; + f(find(f=='.',1,'first'):end)=[]; fs0{i}=f; end + end + + function fs1 = getFiles1( dir1, fs0, sep ) + % get fs1 in dir1 corresponding to fs0 + n=length(fs0); fs1=cell(1,n); i2=0; i1=0; + fs2=dir(dir1); fs2={fs2.name}; n2=length(fs2); + eMsg='''%s'' has no corresponding file in %s.'; + for i0=1:n, r=length(fs0{i0}); match=0; + while(i2getFiles for obtaining a set of corresponding files. +% +% USAGE +% fs = bbGt( 'copyFiles', fs, dirs ) +% +% INPUTS +% fs - {mxn} list of full file names in each dir +% dirs - {1xm} list of m target directories +% +% OUTPUTS +% fs - {mxn} list of full file names of copied files +% +% EXAMPLE +% +% See also bbGt, bbGt>getFiles +[m,n]=size(fs); assert(numel(dirs)==m); if(n==0), return; end +for d=1:m + if(~exist(dirs{d},'dir')), mkdir(dirs{d}); end + for i=1:n, f=fs{d,i}; j=[0 find(f=='/' | f=='\')]; j=j(end); + fs{d,i}=[dirs{d} '/' f(j+1:end)]; copyfile(f,fs{d,i}); end +end +end + +function [gt0,dt0] = loadAll( gtDir, dtDir, pLoad ) +% Load all ground truth and detection bbs in given directories. +% +% Loads each ground truth (gt) annotation in gtDir and the corresponding +% detection (dt) in dtDir. gt and dt files must correspond according to +% getFiles(). Alternatively, dtDir may be a filename of a single text file +% that contains the detection results across all images. +% +% Each dt should be a text file where each row contains 5 numbers +% representing a bb (left/top/width/height/score). If dtDir is a text file, +% it should contain the detection results across the full set of images. In +% this case each row in the text file should have an extra leading column +% specifying the image id: (imgId/left/top/width/height/score). +% +% The output of this function can be used in bbGt>evalRes(). +% +% USAGE +% [gt0,dt0] = bbGt( 'loadAll', gtDir, [dtDir], [pLoad] ) +% +% INPUTS +% gtDir - location of ground truth +% dtDir - [] optional location of detections +% pLoad - {} params for bbGt>bbLoad() (determine format/filtering) +% +% OUTPUTS +% gt0 - {1xn} loaded ground truth bbs (each is a mx5 array of bbs) +% dt0 - {1xn} loaded detections (each is a mx5 array of bbs) +% +% EXAMPLE +% +% See also bbGt, bbGt>getFiles, bbGt>evalRes + +% get list of files +if(nargin<2), dtDir=[]; end +if(nargin<3), pLoad={}; end +if(isempty(dtDir)), fs=getFiles({gtDir}); gtFs=fs(1,:); else + dtFile=length(dtDir)>4 && strcmp(dtDir(end-3:end),'.txt'); + if(dtFile), dirs={gtDir}; else dirs={gtDir,dtDir}; end + fs=getFiles(dirs); gtFs=fs(1,:); + if(dtFile), dtFs=dtDir; else dtFs=fs(2,:); end +end + +% load ground truth +persistent keyPrv gtPrv; key={gtDir,pLoad}; n=length(gtFs); +if(isequal(key,keyPrv)), gt0=gtPrv; else gt0=cell(1,n); + for i=1:n, [~,gt0{i}]=bbLoad(gtFs{i},pLoad); end + gtPrv=gt0; keyPrv=key; +end + +% load detections +if(isempty(dtDir) || nargout<=1), dt0=cell(0); return; end +if(iscell(dtFs)), dt0=cell(1,n); + for i=1:n, dt1=load(dtFs{i},'-ascii'); + if(numel(dt1)==0), dt1=zeros(0,5); end; dt0{i}=dt1(:,1:5); end +else + dt1=load(dtFs,'-ascii'); if(numel(dt1)==0), dt1=zeros(0,6); end + ids=dt1(:,1); assert(max(ids)<=n); + dt0=cell(1,n); for i=1:n, dt0{i}=dt1(ids==i,2:6); end +end + +end + +function [gt,dt] = evalRes( gt0, dt0, thr, mul ) +% Evaluates detections against ground truth data. +% +% Uses modified Pascal criteria that allows for "ignore" regions. The +% Pascal criteria states that a ground truth bounding box (gtBb) and a +% detected bounding box (dtBb) match if their overlap area (oa): +% oa(gtBb,dtBb) = area(intersect(gtBb,dtBb)) / area(union(gtBb,dtBb)) +% is over a sufficient threshold (typically .5). In the modified criteria, +% the dtBb can match any subregion of a gtBb set to "ignore". Choosing +% gtBb' in gtBb that most closely matches dtBb can be done by using +% gtBb'=intersect(dtBb,gtBb). Computing oa(gtBb',dtBb) is equivalent to +% oa'(gtBb,dtBb) = area(intersect(gtBb,dtBb)) / area(dtBb) +% For gtBb set to ignore the above formula for oa is used. +% +% Highest scoring detections are matched first. Matches to standard, +% (non-ignore) gtBb are preferred. Each dtBb and gtBb may be matched at +% most once, except for ignore-gtBb which can be matched multiple times. +% Unmatched dtBb are false-positives, unmatched gtBb are false-negatives. +% Each match between a dtBb and gtBb is a true-positive, except matches +% between dtBb and ignore-gtBb which do not affect the evaluation criteria. +% +% In addition to taking gt/dt results on a single image, evalRes() can take +% cell arrays of gt/dt bbs, in which case evaluation proceeds on each +% element. Use bbGt>loadAll() to load gt/dt for multiple images. +% +% Each gt/dt output row has a flag match that is either -1/0/1: +% for gt: -1=ignore, 0=fn [unmatched], 1=tp [matched] +% for dt: -1=ignore, 0=fp [unmatched], 1=tp [matched] +% +% USAGE +% [gt, dt] = bbGt( 'evalRes', gt0, dt0, [thr], [mul] ) +% +% INPUTS +% gt0 - [mx5] ground truth array with rows [x y w h ignore] +% dt0 - [nx5] detection results array with rows [x y w h score] +% thr - [.5] the threshold on oa for comparing two bbs +% mul - [0] if true allow multiple matches to each gt +% +% OUTPUTS +% gt - [mx5] ground truth results [x y w h match] +% dt - [nx6] detection results [x y w h score match] +% +% EXAMPLE +% +% See also bbGt, bbGt>compOas, bbGt>loadAll + +% get parameters +if(nargin<3 || isempty(thr)), thr=.5; end +if(nargin<4 || isempty(mul)), mul=0; end + +% if gt0 and dt0 are cell arrays run on each element in turn +if( iscell(gt0) && iscell(dt0) ), n=length(gt0); + assert(length(dt0)==n); gt=cell(1,n); dt=gt; + for i=1:n, [gt{i},dt{i}] = evalRes(gt0{i},dt0{i},thr,mul); end; return; +end + +% check inputs +if(isempty(gt0)), gt0=zeros(0,5); end +if(isempty(dt0)), dt0=zeros(0,5); end +assert( size(dt0,2)==5 ); nd=size(dt0,1); +assert( size(gt0,2)==5 ); ng=size(gt0,1); + +% sort dt highest score first, sort gt ignore last +[~,ord]=sort(dt0(:,5),'descend'); dt0=dt0(ord,:); +[~,ord]=sort(gt0(:,5),'ascend'); gt0=gt0(ord,:); +gt=gt0; gt(:,5)=-gt(:,5); dt=dt0; dt=[dt zeros(nd,1)]; + +% Attempt to match each (sorted) dt to each (sorted) gt +oa = compOas( dt(:,1:4), gt(:,1:4), gt(:,5)==-1 ); +for d=1:nd + bstOa=thr; bstg=0; bstm=0; % info about best match so far + for g=1:ng + % if this gt already matched, continue to next gt + m=gt(g,5); if( m==1 && ~mul ), continue; end + % if dt already matched, and on ignore gt, nothing more to do + if( bstm~=0 && m==-1 ), break; end + % compute overlap area, continue to next gt unless better match made + if(oa(d,g)evalRes +dfs={'evShow',1,'gtShow',1,'dtShow',1,'cols','krg',... + 'gtLs','-','dtLs','--','lw',3}; +[evShow,gtShow,dtShow,cols,gtLs,dtLs,lw]=getPrmDflt(varargin,dfs,1); +% optionally display image +if(ischar(I)), I=imread(I); end +if(~isempty(I)), hImg=im(I,[],0); title(''); end +% display bbs with or w/o color coding based on output of evalRes +hold on; hs=cell(1,1000); k=0; +if( evShow ) + if(gtShow), for i=1:size(gt,1), k=k+1; + hs{k}=bbApply('draw',gt(i,1:4),cols(gt(i,5)+2),lw,gtLs); end; end + if(dtShow), for i=1:size(dt,1), k=k+1; + hs{k}=bbApply('draw',dt(i,1:5),cols(dt(i,6)+2),lw,dtLs); end; end +else + if(gtShow), k=k+1; hs{k}=bbApply('draw',gt(:,1:4),cols(3),lw,gtLs); end + if(dtShow), k=k+1; hs{k}=bbApply('draw',dt(:,1:5),cols(3),lw,dtLs); end +end +hs=[hs{:}]; hold off; +end + +function [xs,ys,score,ref] = compRoc( gt, dt, roc, ref ) +% Compute ROC or PR based on outputs of evalRes on multiple images. +% +% ROC="Receiver operating characteristic"; PR="Precision Recall" +% Also computes result at reference points (ref): +% which for ROC curves is the *detection* rate at reference *FPPI* +% which for PR curves is the *precision* at reference *recall* +% Note, FPPI="false positive per image" +% +% USAGE +% [xs,ys,score,ref] = bbGt( 'compRoc', gt, dt, roc, ref ) +% +% INPUTS +% gt - {1xn} first output of evalRes() for each image +% dt - {1xn} second output of evalRes() for each image +% roc - [1] if 1 compue ROC else compute PR +% ref - [] reference points for ROC or PR curve +% +% OUTPUTS +% xs - x coords for curve: ROC->FPPI; PR->recall +% ys - y coords for curve: ROC->TP; PR->precision +% score - detection scores corresponding to each (x,y) +% ref - recall or precision at each reference point +% +% EXAMPLE +% +% See also bbGt, bbGt>evalRes + +% get additional parameters +if(nargin<3 || isempty(roc)), roc=1; end +if(nargin<4 || isempty(ref)), ref=[]; end +% convert to single matrix, discard ignore bbs +nImg=length(gt); assert(length(dt)==nImg); +gt=cat(1,gt{:}); gt=gt(gt(:,5)~=-1,:); +dt=cat(1,dt{:}); dt=dt(dt(:,6)~=-1,:); +% compute results +if(size(dt,1)==0), xs=0; ys=0; score=0; ref=ref*0; return; end +m=length(ref); np=size(gt,1); score=dt(:,5); tp=dt(:,6); +[score,order]=sort(score,'descend'); tp=tp(order); +fp=double(tp~=1); fp=cumsum(fp); tp=cumsum(tp); +if( roc ) + xs=fp/nImg; ys=tp/np; xs1=[-inf; xs]; ys1=[0; ys]; + for i=1:m, j=find(xs1<=ref(i)); ref(i)=ys1(j(end)); end +else + xs=tp/np; ys=tp./(fp+tp); xs1=[xs; inf]; ys1=[ys; 0]; + for i=1:m, j=find(xs1>=ref(i)); ref(i)=ys1(j(1)); end +end +end + +function [Is,scores,imgIds] = cropRes( gt, dt, imFs, varargin ) +% Extract true or false positives or negatives for visualization. +% +% USAGE +% [Is,scores,imgIds] = bbGt( 'cropRes', gt, dt, imFs, varargin ) +% +% INPUTS +% gt - {1xN} first output of evalRes() for each image +% dt - {1xN} second output of evalRes() for each image +% imFs - {1xN} name of each image +% varargin - additional parameters (struct or name/value pairs) +% .dims - ['REQ'] target dimensions for extracted windows +% .pad - [0] padding amount for cropping +% .type - ['fp'] one of: 'fp', 'fn', 'tp', 'dt' +% .n - [100] max number of windows to extract +% .show - [1] figure for displaying results (or 0) +% .fStr - ['%0.1f'] label{i}=num2str(score(i),fStr) +% .embed - [0] if true embed dt/gt bbs into cropped windows +% +% OUTPUTS +% Is - [dimsxn] extracted image windows +% scores - [1xn] detection score for each bb unless 'fn' +% imgIds - [1xn] image id for each cropped window +% +% EXAMPLE +% +% See also bbGt, bbGt>evalRes +dfs={'dims','REQ','pad',0,'type','fp','n',100,... + 'show',1,'fStr','%0.1f','embed',0}; +[dims,pad,type,n,show,fStr,embed]=getPrmDflt(varargin,dfs,1); +N=length(imFs); assert(length(gt)==N && length(dt)==N); +% crop patches either in gt or dt according to type +switch type + case 'fn', bbs=gt; keep=@(bbs) bbs(:,5)==0; + case 'fp', bbs=dt; keep=@(bbs) bbs(:,6)==0; + case 'tp', bbs=dt; keep=@(bbs) bbs(:,6)==1; + case 'dt', bbs=dt; keep=@(bbs) bbs(:,6)>=0; + otherwise, error('unknown type: %s',type); +end +% create ids that will map each bb to correct name +ms=zeros(1,N); for i=1:N, ms(i)=size(bbs{i},1); end; cms=[0 cumsum(ms)]; +ids=zeros(1,sum(ms)); for i=1:N, ids(cms(i)+1:cms(i+1))=i; end +% flatten bbs and keep relevent subset +bbs=cat(1,bbs{:}); K=keep(bbs); bbs=bbs(K,:); ids=ids(K); n=min(n,sum(K)); +% reorder bbs appropriately +if(~strcmp(type,'fn')), [~,ord]=sort(bbs(:,5),'descend'); else + if(size(bbs,1)0)), dims1=dims.*(1+pad); rs=dims1./dims; dims=dims1; end +if(any(pad>0)), bbs=bbApply('resize',bbs,rs(1),rs(2)); end +for i=1:N + locs=find(ids==i); if(isempty(locs)), continue; end; I=imread(imFs{i}); + if( embed ) + if(any(strcmp(type,{'fp','dt'}))), bbs1=gt{i}; + else bbs1=dt{i}(:,[1:4 6]); end + I=bbApply('embed',I,bbs1(bbs1(:,5)==0,1:4),'col',[255 0 0]); + I=bbApply('embed',I,bbs1(bbs1(:,5)==1,1:4),'col',[0 255 0]); + end + Is1=bbApply('crop',I,bbs(locs,1:4),'replicate',dims); + for j=1:length(locs), Is{locs(j)}=Is1{j}; end; + scores(locs)=bbs(locs,5); imgIds(locs)=i; +end; Is=cell2array(Is); +% optionally display +if(~show), return; end; figure(show); pMnt={'hasChn',size(Is1{1},3)>1}; +if(isempty(fStr)), montage2(Is,pMnt); title(type); return; end +ls=cell(1,n); for i=1:n, ls{i}=int2str2(imgIds(i)); end +if(~strcmp(type,'fn')) + for i=1:n, ls{i}=[ls{i} '/' num2str(scores(i),fStr)]; end; end +montage2(Is,[pMnt 'labels' {ls}]); title(type); +end + +function oa = compOas( dt, gt, ig ) +% Computes (modified) overlap area between pairs of bbs. +% +% Uses modified Pascal criteria with "ignore" regions. The overlap area +% (oa) of a ground truth (gt) and detected (dt) bb is defined as: +% oa(gt,dt) = area(intersect(dt,dt)) / area(union(gt,dt)) +% In the modified criteria, a gt bb may be marked as "ignore", in which +% case the dt bb can can match any subregion of the gt bb. Choosing gt' in +% gt that most closely matches dt can be done using gt'=intersect(dt,gt). +% Computing oa(gt',dt) is equivalent to: +% oa'(gt,dt) = area(intersect(gt,dt)) / area(dt) +% +% USAGE +% oa = bbGt( 'compOas', dt, gt, [ig] ) +% +% INPUTS +% dt - [mx4] detected bbs +% gt - [nx4] gt bbs +% ig - [nx1] 0/1 ignore flags (0 by default) +% +% OUTPUTS +% oas - [m x n] overlap area between each gt and each dt bb +% +% EXAMPLE +% dt=[0 0 10 10]; gt=[0 0 20 20]; +% oa0 = bbGt('compOas',dt,gt,0) +% oa1 = bbGt('compOas',dt,gt,1) +% +% See also bbGt, bbGt>evalRes +m=size(dt,1); n=size(gt,1); oa=zeros(m,n); +if(nargin<3), ig=zeros(n,1); end +de=dt(:,[1 2])+dt(:,[3 4]); da=dt(:,3).*dt(:,4); +ge=gt(:,[1 2])+gt(:,[3 4]); ga=gt(:,3).*gt(:,4); +for i=1:m + for j=1:n + w=min(de(i,1),ge(j,1))-max(dt(i,1),gt(j,1)); if(w<=0), continue; end + h=min(de(i,2),ge(j,2))-max(dt(i,2),gt(j,2)); if(h<=0), continue; end + t=w*h; if(ig(j)), u=da(i); else u=da(i)+ga(j)-t; end; oa(i,j)=t/u; + end +end +end + +function oa = compOa( dt, gt, ig ) +% Optimized version of compOas for a single pair of bbs. +% +% USAGE +% oa = bbGt( 'compOa', dt, gt, ig ) +% +% INPUTS +% dt - [1x4] detected bb +% gt - [1x4] gt bb +% ig - 0/1 ignore flag +% +% OUTPUTS +% oa - overlap area between gt and dt bb +% +% EXAMPLE +% dt=[0 0 10 10]; gt=[0 0 20 20]; +% oa0 = bbGt('compOa',dt,gt,0) +% oa1 = bbGt('compOa',dt,gt,1) +% +% See also bbGt, bbGt>compOas +w=min(dt(3)+dt(1),gt(3)+gt(1))-max(dt(1),gt(1)); if(w<=0),oa=0; return; end +h=min(dt(4)+dt(2),gt(4)+gt(2))-max(dt(2),gt(2)); if(h<=0),oa=0; return; end +i=w*h; if(ig),u=dt(3)*dt(4); else u=dt(3)*dt(4)+gt(3)*gt(4)-i; end; oa=i/u; +end diff --git a/detector/bbLabeler.m b/detector/bbLabeler.m new file mode 100644 index 0000000..7d1e159 --- /dev/null +++ b/detector/bbLabeler.m @@ -0,0 +1,438 @@ +function bbLabeler( objTypes, imgDir, resDir ) +% Bounding box or ellipse labeler for static images. +% +% Launch and click "?" icon for more info. +% +% USAGE +% bbLabeler( [objTypes], [imgDir], [resDir] ) +% +% INPUTS +% objTypes - [{'object'}] list of object types to annotate +% imgDir - [pwd] directory with images +% resDir - [imgDir] directory with annotations +% +% OUTPUTS +% +% EXAMPLE +% bbLabeler +% +% See also bbGt, imRectRot +% +% Piotr's Computer Vision Matlab Toolbox Version 2.66 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +if(nargin<1 || isempty(objTypes)), objTypes={'object'}; end +if(nargin<2 || isempty(imgDir)), imgDir=pwd; end +if(nargin<3 || isempty(resDir)), resDir=imgDir; end +if(~exist(resDir,'dir')), mkdir(resDir); end +colors='gcmrkgcmrkgcmrkgcmrkgcmrkgcmrkgcmrk'; minSiz=[12 12]; +[hFig,hPan,hAx,pTop,imgInd,imgFiles,usePnts] = deal([]); +makeLayout(); imgApi=imgMakeApi(); objApi=objMakeApi(); +usePnts=0; imgApi.setImgDir(imgDir); + + function makeLayout() + % common properties + name = 'bounding box labeler'; + bg='BackgroundColor'; fg='ForegroundColor'; ha='HorizontalAlignment'; + units = {'Units','pixels'}; st='String'; ps='Position'; fs='FontSize'; + + % initial figures size / pos + set(0,'Units','pixels'); ss = get(0,'ScreenSize'); + if( ss(3)<800 || ss(4)<600 ); error('screen too small'); end; + figPos = [(ss(3)-620)/2 (ss(4)-500)/2 620 500]; + + % create main figure + hFig = figure('NumberTitle','off', 'Toolbar','none', 'Color','k', ... + 'MenuBar','none', 'Visible','off', ps,figPos, 'Name',[name resDir]); + set(hFig,'DeleteFcn',@(h,e) exitProg,'ResizeFcn',@(h,e) figResized ); + + % display axes + hAx = axes(units{:},'Parent',hFig,'XTick',[],'YTick',[]); imshow(0); + + % top panel + pnlProp = [units {bg,[.1 .1 .1],'BorderType','none'}]; + txtPrp = {'Style','text',bg,[.1 .1 .1],fs,8,fg,'w',ha}; + edtPrp = {'Style','edit',bg,[.1 .1 .1],fs,8,fg,'w',ha}; + btnPrp = [units,{'Style','pushbutton','FontWeight','bold',... + bg,[.7 .7 .7],fs,10}]; + chbPrp = {'Style','checkbox',bg,[.1 .1 .1],fs,8,fg,'w'}; + pTop.h = uipanel(pnlProp{:},'Parent',hFig); + pTop.hImgInd=uicontrol(pTop.h,edtPrp{:},'Right',st,'0'); + pTop.hImgNum=uicontrol(pTop.h,edtPrp{:},'Left',st,'/0',... + 'Enable','inactive'); + pTop.hLbl = uicontrol( pTop.h,'Style','popupmenu',units{:},... + st,objTypes,fs,8,'Value',1); + pTop.hDel=uicontrol(pTop.h,btnPrp{:},fs,11,fg,[.5 0 0],st,'X'); + pTop.hPrv=uicontrol(pTop.h,btnPrp{:},st,'<<'); + pTop.hNxt=uicontrol(pTop.h,btnPrp{:},st,'>>'); + pTop.hOcc=uicontrol(pTop.h,chbPrp{:},st,'occ'); + pTop.hIgn=uicontrol(pTop.h,chbPrp{:},st,'ign'); + pTop.hEll=uicontrol(pTop.h,chbPrp{:},st,'ellipse'); + pTop.hRot=uicontrol(pTop.h,chbPrp{:},st,'rotate'); + pTop.hLim=uicontrol(pTop.h,chbPrp{:},st,'lims'); + pTop.hPnt=uicontrol(pTop.h,chbPrp{:},st,'pnts'); + pTop.hHid=uicontrol(pTop.h,chbPrp{:},st,'hide'); + pTop.hPan=uicontrol(pTop.h,chbPrp{:},st,'pan'); + pTop.hDims=uicontrol(pTop.h,txtPrp{:},'Center',st,''); + pTop.hNum=uicontrol(pTop.h,txtPrp{:},'Center',st,'n=0'); + pTop.hHelp=uicontrol(pTop.h,btnPrp{:},fs,12,st,'?'); + + % set the keyPressFcn for all focusable components (except popupmenus) + set( hFig, 'keyPressFcn',@keyPress ); + set( hFig, 'WindowScrollWheelFcn',@(h,e) mouseWheel(e)); + set( hFig, 'ButtonDownFcn',@(h,e) mousePress ); + set( pTop.hHelp,'CallBack',@(h,e) helpWindow ); + + % set hFig to visible upon completion + set(hFig,'Visible','on'); drawnow; + + % pan controls + hPan = pan( hFig ); + + function figResized() + % overall layout + pos=get(hFig,ps); pad=8; htTop=30; wdTop=620; + wd=pos(3)-2*pad; ht=pos(4)-2*pad-htTop; + x=(pos(3)-wd)/2; y=pad; + set(hAx,ps,[x y wd ht]); y=y+ht; + set(pTop.h,ps,[x y wd htTop]); + % position stuff in top panel + x=max(2,(wd-wdTop)/2); + set(pTop.hImgInd,ps,[x 4 40 22]); x=x+40; + set(pTop.hImgNum,ps,[x 4 40 22]); x=x+50; + set(pTop.hDel,ps,[x 5 20 20]); x=x+20+5; + set(pTop.hPrv,ps,[x 5 24 20]); x=x+25; + set(pTop.hLbl,ps,[x 5 80 21]); x=x+81; + set(pTop.hNxt,ps,[x 5 24 20]); x=x+25+5; + set(pTop.hDims,ps,[x 5 60 20]); x=x+62; + set(pTop.hOcc,ps,[x 15 45 13]); + set(pTop.hIgn,ps,[x 2 45 13]); x=x+50; + set(pTop.hEll,ps,[x 15 55 13]); + set(pTop.hRot,ps,[x 2 55 13]); x=x+60; + set(pTop.hLim,ps,[x 15 45 13]); + set(pTop.hPnt,ps,[x 2 45 13]); x=x+50; + set(pTop.hHid,ps,[x 15 55 13]); + set(pTop.hPan,ps,[x 2 55 13]); x=x+60; + set(pTop.hNum,ps,[x 5 30 20]); x=x+30+20; + set(pTop.hHelp,ps,[x 5 20 20]); + end + + function helpWindow() + helpTxt = { + 'Image Selection:' + ' * spacebar: advance one image' + ' * ctrl-spacebar: go back one image' + ' * double-click: advance one image' + ' * can also directly enter image index' + '' + 'Zoom and Pan controls:' + ' * mouse wheel or +/- keys: zoom in and out' + ' * q-key or pan-icon: toggle pan mode' + ' * click/drag: pans image (while in pan mode)' + '' + 'bb modification with mouse:' + ' * click/drag in blank region: create new bb' + ' * click on existing bb: select bb' + ' * click/drag center of existing bb: move bb' + ' * click/drag edge of existing bb: resize bb' + ' * clck/drag control points: rotate/resize bb' + ' * ctrl+arrow keys: shift selected bb' + '' + 'Other controls:' + ' * d-key or del-key or X-icon: delete selected bb' + ' * o-key or occ-icon: toggle occlusion for bb' + ' * i-key or ign-icon: toggle ignore for bb' + ' * e-key or ellipse-icon: toggle bb ellipse/rect display' + ' * r-key or rotation-icon: toggle bb rotation control' + ' * l-key or lims-icon: toggle bb limits on/off' + ' * p-key or pnts-icon: toggle pnt creation on/off' + ' * left-arrow or <<-icon: select previous bb' + ' * right-arrow or >>-icon: select next bb' + ' * up/down-arrow a-key/z-key or dropbox: select bb label' + ' * ctrl and +/- keys: increase/decrease contrast' }; + pos=get(0,'ScreenSize'); pos=[(pos(3)-400)/2 (pos(4)-520)/2 400 520]; + hHelp = figure('NumberTitle','off', 'Toolbar','auto', ... + 'Color','k', 'MenuBar','none', 'Visible','on', ... + 'Name',[name ' help'], 'Resize','on', ps, pos ); pos(1:2)=0; + uicontrol( hHelp, 'Style','text', ha,'Left', fs,10, bg,'w', ... + ps,pos, st,helpTxt ); + end + + function exitProg(), objApi.closeAnn(); end + end + + function keyPress( h, evnt ) %#ok + c=int8(evnt.Character); if(isempty(c)), c=0; end; + ctrl=strcmp(evnt.Modifier,'control'); if(isempty(ctrl)),ctrl=0; end + if(c==127 || c==100), objApi.objDel(); end % 'del' or 'd' + if(c==32 && ctrl ), imgApi.setImg(imgInd-1); end % ctrl-spacebar + if(c==32 && ~ctrl), imgApi.setImg(imgInd+1); end % spacebar + if(c==28 && ctrl), objApi.objShift(-1,0); end % ctrl-lf + if(c==29 && ctrl), objApi.objShift(+1,0); end % ctrl-rt + if(c==30 && ctrl), objApi.objShift(0,-1); end % ctrl-up + if(c==31 && ctrl), objApi.objShift(0,+1); end % ctrl-dn + if(c==28 && ~ctrl), objApi.objToggle(-1); end % lf + if(c==29 && ~ctrl), objApi.objToggle(+1); end % rt + if((c==30 && ~ctrl) || c==97), objApi.objSetType(-1); end % up or 'a' + if((c==31 && ~ctrl) || c==122), objApi.objSetType(+1); end % dn or 'z' + if(c==111), objApi.objSetVal('occ',0); end % 'o' + if(c==105), objApi.objSetVal('ign',0); end % 'i' + if(c==101), objApi.objSetVal('ell',0); end % 'e' + if(c==114), objApi.objSetVal('rot',0); end % 'r' + if(c==108), objApi.objSetVal('lim',0); end % 'l' + if(c==112), objApi.objSetVal('pnt',0); end % 'p' + if(c==104), objApi.objSetVal('hid',0); end % 'h' + if(c==113), objApi.objSetVal('pan',0); end % 'q' + if(c==43 && ~ctrl), zoom(1.1); end % '+' key, zoom in + if(c==45 && ~ctrl), zoom(1/1.1); end % '-' key, zoom out + if(c==43 && ctrl), imgApi.adjContrast(+1); end % ctrl-'+', inc contrast + if(c==45 && ctrl), imgApi.adjContrast(-1); end % ctrl-'-', dec contrast + end + + function mousePress() + sType = get(hFig,'SelectionType'); + %disp(['mouse pressed: ' sType]); + if( strcmp(sType,'open') ) + if( usePnts ), return; end + imgApi.setImg(imgInd+1); % double click + elseif( strcmp(sType,'normal') ) + objApi.objNew(); % single click + end + end + + function mouseWheel( evnt ) + if( evnt.VerticalScrollCount>0 ), zoom(1/1.1); else zoom(1.1); end + end + + function mouseDrag() + if(isempty(imgInd)), return; end + persistent h; if(~all(ishandle(h))), h=[]; end + xs=get(gca,'xLim'); ys=get(gca,'yLim'); + p=get(hAx,'CurrentPoint'); x=p(1); y=p(3); + if( xxs(2)||yys(2) ), delete(h); return; end + if(isempty(h)), h=[line line]; + set(h,'ButtonDownFcn',@(h,e) mousePress,'Color','k'); end + set(h,{'Xdata'},{[x x];xs},{'YData'},{ys,[y y]}'); + end + + function api = objMakeApi() + % variables + [resNm,objs,nObj,hsObj,curObj,lims] = deal([]); + ellipse=0; rotate=0; useLims=0; hide=0; + + % callbacks + set(pTop.hDel,'Callback',@(h,evnt) objDel()); + set(pTop.hPrv,'Callback',@(h,evnt) objToggle(-1)); + set(pTop.hNxt,'Callback',@(h,evnt) objToggle(+1)); + set(pTop.hLbl,'Callback',@(h,evnt) objSetType()); + set(pTop.hOcc,'Callback',@(h,evnt) objSetVal('occ',1)); + set(pTop.hIgn,'Callback',@(h,evnt) objSetVal('ign',1)); + set(pTop.hEll,'Callback',@(h,evnt) objSetVal('ell',1)); + set(pTop.hRot,'Callback',@(h,evnt) objSetVal('rot',1)); + set(pTop.hLim,'Callback',@(h,evnt) objSetVal('lim',1)); + set(pTop.hPnt,'Callback',@(h,evnt) objSetVal('pnt',1)); + set(pTop.hHid,'Callback',@(h,evnt) objSetVal('hid',1)); + set(pTop.hPan,'Callback',@(h,evnt) objSetVal('pan',1)); + + % create api + api = struct( 'closeAnn',@closeAnn, 'openAnn',@openAnn, ... + 'objNew',@objNew, 'objDel',@objDel, 'objToggle',@objToggle, ... + 'objSetType',@objSetType, 'objSetVal',@objSetVal, ... + 'objShift',@objShift ); + + function closeAnn() + % save annotation and then clear (also use to init) + if(~isempty(nObj)&&~isempty(resNm)), bbGt('bbSave',objs,resNm); end + delete(hsObj); hsObj=[]; nObj=0; resNm=''; curObj=0; objs=[]; + objsDraw(); + end + + function openAnn() + % try to load annotation, prepare for new image + assert(nObj==0); lims=[get(gca,'xLim'); get(gca,'yLim')]; + lims=[lims(:); 0]'; lims(3:4)=lims(3:4)-lims(1:2); + resNm=[resDir '/' imgFiles{imgInd} '.txt']; + if(exist(resNm,'file')), objs=bbGt('bbLoad',resNm); end + objTypes=unique([objTypes bbGt('get',objs,'lbl')']); + set(pTop.hLbl,'String',objTypes); nObj=length(objs); objsDraw(); + end + + function objsDraw() + delete(hsObj); if(hide), hsObj=[]; return; end; hsObj=zeros(1,nObj); + % display regular bbs + for id=1:nObj + o=objs(id); color=colors(strcmp(o.lbl,objTypes)); + rp=struct('ellipse',ellipse,'rotate',rotate,'hParent',hAx,... + 'lw',2,'ls','-','pos',[o.bb o.ang],'color',color); + if(~useLims), rp.lims=[]; else rp.lims=lims; end + if(o.ign), rp.cross=2; end; if(curObj==id), rp.ls=':'; end + [hsObj(id),rectApi]=imRectRot(rp); + rectApi.setPosSetCb(@(bb) objSetBb(bb,id)); + rectApi.setPosChnCb(@(bb) objChnBb(bb,id)); + if(id==curObj), rectApiCur=rectApi; end + end + if(curObj>0), rectApiCur.uistack('top'); end + % display occluded bbs + for id=1:nObj + o=objs(id); ang=o.ang; if(~o.occ), continue; end + rp=struct('ellipse',ellipse,'rotate',0,'hParent',hAx,'lw',1,... + 'ls','-','pos',[o.bbv ang],'lims',[o.bb ang],'color','y'); + [hObj,rectApi] = imRectRot( rp ); + rectApi.setPosSetCb(@(bbv) objSetBbv(bbv,id)); + hsObj=[hsObj hObj]; %#ok + end + % update gui info + if(curObj==0), dimsStr=''; occ=0; ign=0; en='off'; else + o=objs(curObj); occ=o.occ; ign=o.ign; en='on'; + set(pTop.hLbl,'Value',find(strcmp(o.lbl,objTypes))); + dimsStr=sprintf('%i x %i',round(o.bb(3)),round(o.bb(4))); + end + set([pTop.hIgn pTop.hOcc],'Enable',en); set(pTop.hOcc,'Value',occ); + set(pTop.hDims,'String',dimsStr); set(pTop.hIgn,'Value',ign); + set(pTop.hNum,'String', ['n=' int2str(nObj)] ); + set(hFig,'WindowButtonMotionFcn',@(h,e) mouseDrag); mouseDrag(); + end + + function objSetBb( bb, objId ) + curObj=objId; o=objs(objId); bb=round(bb); bbv=o.bbv; + if(any(bb(3:4) + dimsStr=sprintf('%i x %i',round(bb(3)),round(bb(4))); + set( pTop.hDims, 'String', dimsStr ); + end + + function objNew() + if(hide), return; end; curObj=0; objsDraw(); + pnt=get(hAx,'CurrentPoint'); pnt=pnt([1,3]); + if( pnt(1)lims(3) || ... + pnt(2)lims(4)), return; end + lblId=get(pTop.hLbl,'Value'); color=colors(lblId); + rp=struct('ellipse',ellipse,'rotate',rotate/2,'hParent',hAx,... + 'lw',2,'ls',':','pos',pnt,'color',color); + if(~useLims), rp.lims=[]; else rp.lims=lims; end + [hObj,rectApi]=imRectRot(rp); + lbl=objTypes{lblId}; bb=round(rectApi.getPos()); + if( usePnts && all(bb(3:4)=minSiz) ) + obj=bbGt('create'); obj.lbl=lbl; obj.bb=bb(1:4); obj.ang=bb(5); + objs=[objs; obj]; nObj=nObj+1; curObj=nObj; + end; delete(hObj); objsDraw(); + end + + function objDel() + if(curObj==0), return; end + objs(curObj)=[]; curObj=0; nObj=nObj-1; objsDraw(); + end + + function objToggle( del ) + curObj=mod(curObj+del,nObj+1); objsDraw(); + end + + function objSetType( del ) + val = get(pTop.hLbl,'Value'); + if( nargin>0 && del~=0 ) + val = max(1,min(val+del,length(objTypes))); + set(pTop.hLbl,'Value',val); + end + if(curObj), objs(curObj).lbl=objTypes{val}; objsDraw(); end + end + + function objSetVal( type, flag ) + if(strcmp(type,'occ')) + if(curObj==0), return; end + occ = get(pTop.hOcc,'Value'); if(~flag), occ=1-occ; end + objs(curObj).occ=occ; objSetBbv(objs(curObj).bb,curObj); return; + elseif(strcmp(type,'ign')) + if(curObj==0), return; end + ign = get(pTop.hIgn,'Value'); if(~flag), ign=1-ign; end + objs(curObj).ign=ign; + elseif(strcmp(type,'ell')) + ellipse = get(pTop.hEll,'Value'); + if(~flag), ellipse=1-ellipse; set(pTop.hEll,'Value',ellipse); end + elseif(strcmp(type,'rot')) + rotate = get(pTop.hRot,'Value'); + if(~flag), rotate=1-rotate; set(pTop.hRot,'Value',rotate); end + elseif(strcmp(type,'lim')) + useLims = get(pTop.hLim,'Value'); + if(~flag), useLims=1-useLims; set(pTop.hLim,'Value',useLims); end + elseif(strcmp(type,'pnt')) + usePnts = get(pTop.hPnt,'Value'); + if(~flag), usePnts=1-usePnts; set(pTop.hPnt,'Value',usePnts); end + elseif(strcmp(type,'hid')) + hide = get(pTop.hHid,'Value'); + if(~flag), hide=1-hide; set(pTop.hHid,'Value',hide); end + if( hide ), curObj=0; end + elseif(strcmp(type,'pan')) + enabled = get(pTop.hPan,'Value'); + if(~flag), enabled=1-enabled; set(pTop.hPan,'Value',enabled); end + if(~enabled), set(hPan,'Enable','off'); else + set(hPan,'Enable','on'); hM=uigetmodemanager(hFig); + set(hM.WindowListenerHandles,'Enable','off'); + set( hFig, 'keyPressFcn',@keyPress); + set( hFig, 'WindowScrollWheelFcn',@(h,e) mouseWheel(e)); + setptr(hFig,'hand'); %set(hFig,'Pointer','hand'); + end + end + objsDraw(); + end + + function objShift( x, y ) + if(curObj==0), return; end + objs(curObj).bb(1:2)=objs(curObj).bb(1:2)+[x y]; + objsDraw(); + end + end + + function api = imgMakeApi() + [nImg,hImg,contrast,I]=deal([]); + set(pTop.hImgInd,'Callback',@(h,evnt) setImgCb()); + api = struct( 'setImgDir',@setImgDir, 'setImg',@setImg, ... + 'adjContrast',@adjContrast ); + + function setImgDir( imgDir1 ) + objApi.closeAnn(); imgDir=imgDir1; + imgFiles=[dir([imgDir '/*.jpg']); dir([imgDir '/*.jpeg']); ... + dir([imgDir '/*.png']); dir([imgDir '/*.tif'])]; + imgFiles={imgFiles.name}; nImg=length(imgFiles); setImg(1); + set(pTop.hImgNum,'String',['/' int2str(nImg)]); + end + + function adjContrast( del ) + if(isempty(I)), return; end + contrast=max(.1,contrast+del/10); + set(hImg,'CData',I*contrast); + end + + function setImg( imgInd1 ) + if(nImg==0), return; end; objApi.closeAnn(); imgInd=imgInd1; + if(imgInd<1), imgInd=1; end; if(imgInd>nImg), imgInd=nImg; end + I=imread([imgDir '/' imgFiles{imgInd}]); hImg=imshow(I); + set(pTop.hImgInd,'String',int2str(imgInd)); contrast=1; + set(hImg,'ButtonDownFcn',@(h,e) mousePress); objApi.openAnn(); + end + + function setImgCb() + imgInd1=str2double(get(pTop.hImgInd,'String')); + if(isnan(imgInd1)), setImg(imgInd); else setImg(imgInd1); end + end + end + +end diff --git a/detector/bbNms.m b/detector/bbNms.m new file mode 100644 index 0000000..7a0a9b5 --- /dev/null +++ b/detector/bbNms.m @@ -0,0 +1,177 @@ +function bbs = bbNms( bbs, varargin ) +% Bounding box (bb) non-maximal suppression (nms). +% +% type=='max': nms of bbs using area of overlap criteria. For each pair of +% bbs, if their overlap, defined by: +% overlap(bb1,bb2) = area(intersect(bb1,bb2))/area(union(bb1,bb2)) +% is greater than overlap, then the bb with the lower score is suppressed. +% In the Pascal critieria two bbs are considered a match if overlap>=.5. If +% ovrDnm='min', the 'union' in the above formula is replaced with 'min'. +% +% type=='maxg': Similar to 'max', except performs the nms in a greedy +% fashion. Bbs are processed in order of decreasing score, and, unlike in +% 'max' nms, once a bb is suppressed it can no longer suppress other bbs. +% +% type='cover': Perform nms by attempting to choose the smallest subset of +% the bbs such that each remaining bb is within overlap of one of the +% chosen bbs. The above reduces to the weighted set cover problem which is +% NP but greedy optimization yields provably good solutions. The score of +% each bb is set to the sum of the scores of the bbs it covers (the max can +% also be used). In practice similar to 'maxg'. +% +% type=='ms': Mean shift nms of bbs with a variable width kernel. radii is +% a 4 element vector (x,y,w,h) that controls the amount of suppression +% along each dim. Typically the first two elements should be the same, as +% should the last two. Distance between w/h are computed in log2 space (ie +% w and w*2 are 1 unit apart), and the radii should be set accordingly. +% radii may need to change depending on spatial and scale stride of bbs. +% +% Although efficient, nms is O(n^2). To speed things up for large n, can +% divide data into two parts (according to x or y coordinate), run nms on +% each part, combine and run nms on the result. If maxn is specified, will +% split the data in half if n>maxn. Note that this is a heuristic and can +% change the results of nms. Moreover, setting maxn too small will cause an +% increase in overall performance time. +% +% Finally, the bbs are optionally resized before performing nms. The +% resizing is important as some detectors return bbs that are padded. For +% example, if a detector returns a bounding box of size 128x64 around +% objects of size 100x43 (as is typical for some pedestrian detectors on +% the INRIA pedestrian database), the resize parameters should be {100/128, +% 43/64, 0}, see bbApply>resize() for more info. +% +% USAGE +% bbs = bbNms( bbs, [varargin] ) +% +% INPUTS +% bbs - original bbs (must be of form [x y w h wt bbType]) +% varargin - additional params (struct or name/value pairs) +% .type - ['max'] 'max', 'maxg', 'ms', 'cover', or 'none' +% .thr - [-inf] threshold below which to discard (0 for 'ms') +% .maxn - [inf] if n>maxn split and run recursively (see above) +% .radii - [.15 .15 1 1] supression radii ('ms' only, see above) +% .overlap - [.5] area of overlap for bbs +% .ovrDnm - ['union'] area of overlap denominator ('union' or 'min') +% .resize - {} parameters for bbApply('resize') +% .separate - [0] run nms separately on each bb type (bbType) +% +% OUTPUTS +% bbs - suppressed bbs +% +% EXAMPLE +% bbs=[0 0 1 1 1; .1 .1 1 1 1.1; 2 2 1 1 1]; +% bbs1 = bbNms(bbs, 'type','max' ) +% bbs2 = bbNms(bbs, 'thr',.5, 'type','ms') +% +% See also bbApply, nonMaxSuprList +% +% Piotr's Computer Vision Matlab Toolbox Version 2.60 +% Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] +% Licensed under the Simplified BSD License [see external/bsd.txt] + +% get parameters +dfs={'type','max','thr',[],'maxn',inf,'radii',[.15 .15 1 1],... + 'overlap',.5,'ovrDnm','union','resize',{},'separate',0}; +[type,thr,maxn,radii,overlap,ovrDnm,resize,separate] = ... + getPrmDflt(varargin,dfs,1); +if(isempty(thr)), if(strcmp(type,'ms')), thr=0; else thr=-inf; end; end +if(strcmp(ovrDnm,'union')), ovrDnm=1; elseif(strcmp(ovrDnm,'min')), + ovrDnm=0; else assert(false); end +assert(maxn>=2); assert(numel(overlap)==1); + +% discard bbs below threshold and run nms1 +if(isempty(bbs)), bbs=zeros(0,5); end; if(strcmp(type,'none')), return; end +kp=bbs(:,5)>thr; bbs=bbs(kp,:); if(isempty(bbs)), return; end +if(~isempty(resize)), bbs=bbApply('resize',bbs,resize{:}); end +pNms1={type,thr,maxn,radii,overlap,0}; +if(~separate || size(bbs,2)<6), bbs=nms1(bbs,pNms1{:}); else + ts=unique(bbs(:,6)); m=length(ts); bbs1=cell(1,m); + for t=1:m, bbs1{t}=nms1(bbs(bbs(:,6)==ts(t),:),pNms1{:}); end + bbs=cat(1,bbs1{:}); +end + + function bbs = nms1( bbs, type, thr, maxn, radii, overlap, isy ) + % if big split in two, recurse, merge, then run on merged + if( size(bbs,1)>maxn ) + n2=floor(size(bbs,1)/2); [~,ord]=sort(bbs(:,1+isy)+bbs(:,3+isy)/2); + bbs0=nms1(bbs(ord(1:n2),:),type,thr,maxn,radii,overlap,~isy); + bbs1=nms1(bbs(ord(n2+1:end),:),type,thr,maxn,radii,overlap,~isy); + bbs=[bbs0; bbs1]; + end + % run actual nms on given bbs + switch type + case 'max', bbs = nmsMax(bbs,overlap,0,ovrDnm); + case 'maxg', bbs = nmsMax(bbs,overlap,1,ovrDnm); + case 'ms', bbs = nmsMs(bbs,thr,radii); + case 'cover', bbs = nmsCover(bbs,overlap,ovrDnm); + otherwise, error('unknown type: %s',type); + end + end + + function bbs = nmsMax( bbs, overlap, greedy, ovrDnm ) + % for each i suppress all j st j>i and area-overlap>overlap + [~,ord]=sort(bbs(:,5),'descend'); bbs=bbs(ord,:); + n=size(bbs,1); kp=true(1,n); as=bbs(:,3).*bbs(:,4); + xs=bbs(:,1); xe=bbs(:,1)+bbs(:,3); ys=bbs(:,2); ye=bbs(:,2)+bbs(:,4); + for i=1:n, if(greedy && ~kp(i)), continue; end + for j=(i+1):n, if(kp(j)==0), continue; end + iw=min(xe(i),xe(j))-max(xs(i),xs(j)); if(iw<=0), continue; end + ih=min(ye(i),ye(j))-max(ys(i),ys(j)); if(ih<=0), continue; end + o=iw*ih; if(ovrDnm), u=as(i)+as(j)-o; else u=min(as(i),as(j)); end + o=o/u; if(o>overlap), kp(j)=0; end + end + end + bbs=bbs(kp>0,:); + end + + function bbs = nmsMs( bbs, thr, radii ) + % position = [x+w/2,y+h/2,log2(w),log2(h)], ws=weights-thr + ws=bbs(:,5)-thr; w=bbs(:,3); h=bbs(:,4); n=length(w); + ps=[bbs(:,1)+w/2 bbs(:,2)+h/2 log2(w) log2(h)]; + % find modes starting from each elt, then merge nodes that are same + ps1=zeros(n,4); ws1=zeros(n,1); stopThr=1e-2; + for i=1:n, [ps1(i,:), ws1(i,:)]=nmsMs1(i); end + [ps,ws] = nonMaxSuprList(ps1,ws1,stopThr*100,[],[],2); + % convert back to bbs format and sort by weight + w=pow2(ps(:,3)); h=pow2(ps(:,4)); + bbs=[ps(:,1)-w/2 ps(:,2)-h/2 w h ws+thr]; + [ws,ord]=sort(ws,'descend'); bbs=bbs(ord,:); + + function [p,w]=nmsMs1(ind) + % variable bandwith kernel (analytically defined) + p=ps(ind,:); [n,m]=size(ps); onesN=ones(n,1); + h = [pow2(ps(:,3)) pow2(ps(:,4)) onesN onesN]; + h = h .* radii(onesN,:); hInv=1./h; + while(1) + % compute (weighted) squared Euclidean distance to each neighbor + d=(ps-p(onesN,:)).*hInv; d=d.*d; d=sum(d,2); + % compute new mode + wMask=ws.*exp(-d); wMask=wMask/sum(wMask); p1=wMask'*ps; + % stopping criteria + diff=sum(abs(p1-p))/m; p=p1; if(diffoverlap), N(i,j)=1; end + end + end + % perform set cover operation (greedily choose next best) + N=N+N'; bbs1=zeros(n,5); n1=n; c=0; + while( n1>0 ), [~,i0]=max(N*bbs(:,5)); + N0=N(:,i0)==1; n1=n1-sum(N0); N(N0,:)=0; N(:,N0)=0; + c=c+1; bbs1(c,1:4)=bbs(i0,1:4); bbs1(c,5)=sum(bbs(N0,5)); + end + bbs=bbs1(1:c,:); + end +end diff --git a/gradientMex.mexw64 b/gradientMex.mexw64 new file mode 100644 index 0000000..fdcb9e6 Binary files /dev/null and b/gradientMex.mexw64 differ diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..9d52972 --- /dev/null +++ b/readme.txt @@ -0,0 +1,59 @@ + + Visual Tracking Using Attention-Modulated Disintegration and Integration + + J. Choi, H. J. Chang, J. Jeong, Y. Demiris, J. Y. Choi + CVPR2016 + +________________ +Project webpage: https://sites.google.com/site/jwchoivision/ + +This MATLAB program implements a visual tracker, SCT4. + +It is free for research use. If you find it useful, please acknowledge the paper +above with a reference. + + +__________ +Quickstart + +1. Extract the zip-file somewhere. + +2. Run 'demo.m' + +3. The tracking results of sample video ('Deer') will be shown. + + +__________ +References + +[1] J. Choi, H. J. Chang, J. Jeong, Y. Demiris, J. Y. Choi, "Visual Tracking +Using Attention-Modulated Disintegration and Integration", CVPR, 2016 + +[2] P. Dollar, ¡°Piotr¡¯s Computer Vision Matlab Toolbox (PMT)¡±, +http://vision.ucsd.edu/?pdollar/toolbox/doc/index.html. + +[3] J. F. Henriques, R. Caseiro, P. Martins, and J. Batista, ¡°HighSpeed Tracking +with Kernelized Correlation Filters¡±, IEEE Transactions on PAMI, 2015 + +[4] Y. Wu, J. Lim, M.-H. Yang, "Online Object Tracking: A Benchmark", CVPR 2013. +Website: http://visual-tracking.net/ + +[5] P. Dollar, S. Belongie, P. Perona, "The Fastest Pedestrian Detector in the +West", BMVC 2010. + + +_____________________________________ +Copyright (c) 2016, Jongwon Choi + +Permission to use, copy, modify, and distribute this software for research +purposes with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + diff --git a/strong/eval_pgrf.m b/strong/eval_pgrf.m new file mode 100644 index 0000000..ebcc3d6 --- /dev/null +++ b/strong/eval_pgrf.m @@ -0,0 +1,49 @@ +function result = eval_pgrf(input, rf) + +feature = reshape(input, size(input,1)*size(input,2), size(input,3)); + +trees = rf.mainTree; +nTrees = size(trees,1); +d = rf.mainProb; + + +% main tree +prob = zeros(size(feature,1),nTrees); +c = zeros(size(feature,1),nTrees); +for i = 1:nTrees + prob(:,i) = d(forestInds(single(feature),trees{i,1}.thrs,trees{i,1}.fids,trees{i,1}.child,1), i); + +end + +%partial trees +leaf2partialTree = rf.leaf2partialTree; +num_partialTree = size(rf.partialTree,1); + +if(num_partialTree > 0) + partialTree = rf.partialTree; + d3 = rf.subProb; + + for j = 1:nTrees + idx = find(leaf2partialTree(:,j) > 0); + k = leaf2partialTree(idx,j); + + for i = 1:length(idx) + featureIdx = find(c(:,j)==idx(i)); + + if(~isempty(featureIdx)) + partialFeature = feature(featureIdx,:); + [hs, ps] = forestApply(single(partialFeature), partialTree{k,1}); + prob(featureIdx,j) = ps(:,2); + end + + end + + end + +end + + +prob = mean(prob,2); +bgDist = 1 - reshape(prob,[size(input,1),size(input,2)]); +result = bgDist; + diff --git a/strong/evaluate_stSaliency.m b/strong/evaluate_stSaliency.m new file mode 100644 index 0000000..da7b92f --- /dev/null +++ b/strong/evaluate_stSaliency.m @@ -0,0 +1,3 @@ +function output = evaluate_stSaliency(feature, rf) + +output = eval_pgrf(feature, rf); \ No newline at end of file diff --git a/strong/init_pgrf.m b/strong/init_pgrf.m new file mode 100644 index 0000000..887ec2b --- /dev/null +++ b/strong/init_pgrf.m @@ -0,0 +1,127 @@ +function [rf, result] = init_pgrf(input, mask, N_TREE, mainTree) + +maxDepth = 10; +maxNNode = 2^(maxDepth); +AMBIGUITY_THRESH = 0.40; + +origin_bgProb = mask(:); + +feature = reshape(input, size(input,1)*size(input,2), size(input,3)); + + +% training initial forest +inputFeature = cell(N_TREE,1); +inputBgProb = cell(N_TREE,1); + +for i = 1:N_TREE + idx = 1:size(feature,1); + inputFeature{i,1} = feature(idx,:); + inputBgProb{i,1} = origin_bgProb(idx,:); +end + +%training (50ms) +trees = cell(N_TREE,1); +if(isempty(mainTree)) + for i = 1:N_TREE + params.M = 1; + params.N1 = size(inputFeature{i,1},1); + params.F1 = size(inputFeature{i,1},2); + + trees{i,1} = forestTrain(inputFeature{i,1}, inputBgProb{i,1}+1,params); + + end + + prob = zeros(size(feature,1),N_TREE); + d = zeros(maxNNode,N_TREE); + for i = 1:N_TREE + c = forestInds(single(feature),trees{i,1}.thrs,trees{i,1}.fids,trees{i,1}.child,1); + leafIdx = unique(c); + counts1 = histc(c, leafIdx); + counts2 = histc(c(origin_bgProb==1), leafIdx); + d(leafIdx,i) = counts2 ./ counts1; + prob(:,i) = d(c,i); + end + prob = mean(prob,2); + + leaf2partialTree = zeros(size(d)); + d3 = []; + partialTree = []; + +else + + for i = 1:N_TREE + trees{i,1} = mainTree; + end + prob = zeros(size(feature,1),N_TREE); + d = zeros(maxNNode,N_TREE); + for i = 1:N_TREE + c = forestInds(single(feature),trees{i,1}.thrs,trees{i,1}.fids,trees{i,1}.child,1); + leafIdx = unique(c); + counts1 = histc(c, leafIdx); + counts2 = histc(c(origin_bgProb==1), leafIdx); + d(leafIdx,i) = counts2 ./ counts1; + prob(:,i) = d(c,i); + end + + leaf2partialTree = zeros(size(d)); + num_partialTree = sum(sum(d > AMBIGUITY_THRESH & d < 1-AMBIGUITY_THRESH)); + + if(num_partialTree < 1) + prob = mean(prob,2); + + d3 = []; + partialTree = []; + + else + + partialTree = cell(num_partialTree,1); + k = 1; + d3 = cell(num_partialTree, 1); + for j = 1:N_TREE + idx = find(d(:,j) > AMBIGUITY_THRESH & d(:,j) < 1-AMBIGUITY_THRESH); + for i = 1:length(idx) + featureIdx = find(c(:,j)==idx(i)); + + if(~isempty(featureIdx)) + + partialFeature = feature(featureIdx,:); + + partialLabels = origin_bgProb(featureIdx); + params.M = 1; + params.N1 = size(partialFeature,1); + params.F1 = size(partialFeature,2); + + partialTree{k,1} = forestTrain(partialFeature, partialLabels+1 , params); + + if(partialTree{k,1}.fids > 1) + d3{k,1} = partialTree{k,1}.distr(:,1); + [hs, ps] = forestApply(single(partialFeature), partialTree{k,1}); + prob(featureIdx,j) = ps(:,2); + + leaf2partialTree(idx(i),j) = k; + k = k + 1; + + end + + end + + end + end + end + + prob = mean(prob,2); + +end + +rf.mainTree = trees; +rf.leaf2partialTree = leaf2partialTree; +if(size(partialTree,1)<1) + rf.partialTree = []; +else + rf.partialTree = partialTree; +end +rf.mainProb = d; +rf.subProb = d3; + +bgDist = 1 - reshape(prob,[size(input,1),size(input,2)]); +result = bgDist; diff --git a/strong/init_stSaliency.m b/strong/init_stSaliency.m new file mode 100644 index 0000000..2d96335 --- /dev/null +++ b/strong/init_stSaliency.m @@ -0,0 +1,5 @@ +function [rf, result] = init_stSaliency(feature, mask) + +N_TREE = 1; + +[rf, result] = init_pgrf(feature, mask, N_TREE,[]); diff --git a/strong/update_pgrf.m b/strong/update_pgrf.m new file mode 100644 index 0000000..c3b30de --- /dev/null +++ b/strong/update_pgrf.m @@ -0,0 +1,206 @@ +function [rf, result] = update_pgrf(input, mask, rf, N_TREE) + +Beta = 0.15; +G_para = 0.5; + +maxDepth = 10; +maxDepth2 = 5; +maxNNode = 2^(maxDepth); +AMBIGUITY_THRESH = 0.40; + +TOL = 10; + +origin_bgProb = mask(:); + +if(size(input,3) > 1) + input = cat(3, input, rgb2lab(input) / 255 + 0.5); +end +feature = reshape(input, size(input,1)*size(input,2), size(input,3)); + +% training initial forest +inputFeature = cell(N_TREE,1); +inputBgProb = cell(N_TREE,1); + +for i = 1:N_TREE + idx = randsample(size(feature,1),round(0.8*(size(feature,1)))); + inputFeature{i,1} = feature(idx,:); + inputBgProb{i,1} = origin_bgProb(idx,:); +end + + +%training (50ms) +trees = rf.mainTree(1:N_TREE); + +% predict (70ms) +prob = zeros(size(feature,1),N_TREE); +d = zeros(maxNNode,N_TREE); +for i = 1:N_TREE + [yfit, c] = trees{i,1}(feature); + leafIdx = unique(c); + counts1 = histc(c, leafIdx); + counts2 = histc(c(origin_bgProb==1), leafIdx); + d(leafIdx,i) = counts2 ./ counts1; + prob(:,i) = str2num(cell2mat(yfit(:))); +end +prob = mean(prob,2); + +bgProb = prob; + +bgDist = 1 - reshape(bgProb,size(mask)); +fgDist = 1 - bgDist; + + +%%% Get the image gradient +gradH = input(:,2:end,:) - input(:,1:end-1,:); +gradV = input(2:end,:,:) - input(1:end-1,:,:); + +gradH = sum(gradH.^2, 3); +gradV = sum(gradV.^2, 3); + +hC = exp(-Beta.*gradH./mean(gradH(:))); +vC = exp(-Beta.*gradV./mean(gradV(:))); + +%%% These matrices will evantually use as inputs to Bagon's code +hC = [hC zeros(size(hC,1),1)]; +vC = [vC ;zeros(1, size(vC,2))]; +sc = [0 G_para;G_para 0]; + +fgDist(mask(:)==1) = max(max(fgDist)); +bgDist(mask(:)==1) = min(min(bgDist)); + +dc = cat(3, exp(bgDist), log(fgDist+1)); +dc = cat(3, (bgDist), (fgDist)); +graphHandle = GraphCut('open', dc , sc, vC, hC); +graphHandle = GraphCut('set', graphHandle, int32(mask == 0)); +[graphHandle currLabel] = GraphCut('expand', graphHandle,1000); +currLabel = 1 - currLabel; +GraphCut('close', graphHandle); + +bgProb = double(currLabel); +prevLabel = currLabel; + +for ii = 1:5 + + ratio = sum(currLabel(:)) / length(currLabel(:)); + + label = bgProb(:); + + + % predict + prob = zeros(size(feature,1),N_TREE); + c = zeros(size(feature,1),N_TREE); + d = zeros(maxNNode,N_TREE); + for i = 1:N_TREE + [yfit, c2] = eval(trees{i,1},feature); + leafIdx = unique(c2); + counts1 = histc(c2, leafIdx); + counts2 = histc(c2(label==1), leafIdx); + if(size(counts1,1) ~= size(counts2,1)) + counts2 = counts2'; + end + d(leafIdx,i) = counts2 ./ counts1; + + prob(:,i) = str2num(cell2mat(yfit(:))); + c(:,i) = c2; + end + + + leaf2partialTree = zeros(size(d)); + num_partialTree = sum(sum(d > AMBIGUITY_THRESH & d < 1-AMBIGUITY_THRESH)); + + if(num_partialTree < 1) + prob = mean(prob,2); + bgProb = prob; + + bgDist = 1 - reshape(bgProb,size(mask)); + + bgDist(mask(:)==1) = min(min(bgDist)); + + d3 = []; + partialTree = []; + break; + end + + partialTree = cell(num_partialTree,1); + k = 1; + d3 = cell(num_partialTree, 1); + for j = 1:N_TREE + idx = find(d(:,j) > AMBIGUITY_THRESH & d(:,j) < 1-AMBIGUITY_THRESH); + for i = 1:length(idx) + featureIdx = find(c(:,j)==idx(i)); + + if(~isempty(featureIdx)) + + partialFeature = feature(featureIdx,:); + + partialTree{k,1} = classregtree(partialFeature, bgProb(featureIdx),'maxdepth',maxDepth2,'method','classification'); + + if(partialTree{k,1}.numnodes > 1) + + d3{k,1} = classprob(partialTree{k,1}); + [yfit, c3] = eval(partialTree{k,1}, partialFeature); + + prob(featureIdx,j) = d3{k,1}(c3(:),2); + + leaf2partialTree(idx(i),j) = k; + k = k + 1; + + end + + end + + end + end + + prob = mean(prob,2); + bgProb = prob; + + bgDist = 1 - reshape(bgProb,size(mask)); + fgDist = 1 - bgDist; + + %%% Get the image gradient + gradH = input(:,2:end,:) - input(:,1:end-1,:); + gradV = input(2:end,:,:) - input(1:end-1,:,:); + + gradH = sum(gradH.^2, 3); + gradV = sum(gradV.^2, 3); + + hC2 = exp(-Beta.*gradH./mean(gradH(:))); + vC2 = exp(-Beta.*gradV./mean(gradV(:))); + + %%% These matrices will evantually use as inputs to Bagon's code + hC = [hC2 zeros(size(hC2,1),1)]; + vC = [vC2 ;zeros(1, size(vC2,2))]; + sc = [0 G_para;G_para 0]; + + fgDist(mask(:)==1) = max(max(fgDist)); + bgDist(mask(:)==1) = min(min(bgDist)); + + dc = cat(3, (bgDist), (fgDist)); + graphHandle = GraphCut('open', dc , sc, vC, hC); + graphHandle = GraphCut('set', graphHandle, prevLabel); + [graphHandle, currLabel] = GraphCut('expand', graphHandle, 1000); + currLabel = 1 - currLabel; + GraphCut('close', graphHandle); + + bgProb = double(currLabel); + + if(sum(abs(prevLabel - currLabel)) < TOL) + break; + else + prevLabel = currLabel; + end + +end + +result = bgDist; + +rf.mainTree = trees; +rf.leaf2partialTree = leaf2partialTree; +if(size(partialTree,1)<1) + rf.partialTree = []; +else + rf.partialTree = partialTree; +end +rf.mainProb = d; +rf.subProb = d3; \ No newline at end of file diff --git a/strong/update_stSaliency.m b/strong/update_stSaliency.m new file mode 100644 index 0000000..af81a46 --- /dev/null +++ b/strong/update_stSaliency.m @@ -0,0 +1,33 @@ +function [new_rf, result] = update_stSaliency(feature, mask, old_rf) + +N_TREE = 1; +N_ADDTREE = 1; +MAX_TREE = 20; + +[rf, ~] = init_pgrf(feature, mask, N_TREE,old_rf.mainTree{1,1}); + +new_rf.mainTree = cat(1, rf.mainTree(1:N_ADDTREE,1), old_rf.mainTree); +aa = max(max(rf.leaf2partialTree(:, 1:N_ADDTREE))); +new_rf.leaf2partialTree = ... + cat(2, rf.leaf2partialTree(:, 1:N_ADDTREE), old_rf.leaf2partialTree+aa*(old_rf.leaf2partialTree>0)); +if(aa > 0) + new_rf.partialTree = cat(1, rf.partialTree(1:aa,1), old_rf.partialTree); + new_rf.subProb = cat(1, rf.subProb(1:aa,1), old_rf.subProb); +else + new_rf.partialTree = old_rf.partialTree; + new_rf.subProb = old_rf.subProb; +end +new_rf.mainProb = cat(2, rf.mainProb(:,1:N_ADDTREE), old_rf.mainProb); + + +if(size(new_rf.mainTree, 1) > MAX_TREE) + new_rf.mainTree = new_rf.mainTree(1:MAX_TREE, 1); + new_rf.leaf2partialTree = new_rf.leaf2partialTree(:,1:MAX_TREE); + if(max(vec(new_rf.leaf2partialTree)) > 0) + new_rf.partialTree = new_rf.partialTree(1:max(vec(new_rf.leaf2partialTree)), 1); + new_rf.subProb = new_rf.subProb(1:size(new_rf.partialTree,1), 1); + end + new_rf.mainProb = new_rf.mainProb(:,1:MAX_TREE); +end + +result = eval_pgrf(feature, new_rf); diff --git a/strong/vec.m b/strong/vec.m new file mode 100644 index 0000000..55c78b7 --- /dev/null +++ b/strong/vec.m @@ -0,0 +1,3 @@ +function output = vec(input) + +output = input(:); \ No newline at end of file