diff --git a/KCF/fhog.m b/KCF/fhog.m
new file mode 100644
index 0000000..608af40
--- /dev/null
+++ b/KCF/fhog.m
@@ -0,0 +1,76 @@
+function H = fhog( I, binSize, nOrients, clip, crop )
+% Efficiently compute Felzenszwalb's HOG (FHOG) features.
+%
+% A fast implementation of the HOG variant used by Felzenszwalb et al.
+% in their work on discriminatively trained deformable part models.
+%  http://www.cs.berkeley.edu/~rbg/latent/index.html
+% Gives nearly identical results to features.cc in code release version 5
+% but runs 4x faster (over 125 fps on VGA color images).
+%
+% The computed HOG features are 3*nOrients+5 dimensional. There are
+% 2*nOrients contrast sensitive orientation channels, nOrients contrast
+% insensitive orientation channels, 4 texture channels and 1 all zeros
+% channel (used as a 'truncation' feature). Using the standard value of
+% nOrients=9 gives a 32 dimensional feature vector at each cell. This
+% variant of HOG, refered to as FHOG, has been shown to achieve superior
+% performance to the original HOG features. For details please refer to
+% work by Felzenszwalb et al. (see link above).
+%
+% This function is essentially a wrapper for calls to gradientMag()
+% and gradientHist(). Specifically, it is equivalent to the following:
+%  [M,O] = gradientMag( I,0,0,0,1 ); softBin = -1; useHog = 2;
+%  H = gradientHist(M,O,binSize,nOrients,softBin,useHog,clip);
+% See gradientHist() for more general usage.
+%
+% This code requires SSE2 to compile and run (most modern Intel and AMD
+% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2.
+%
+% USAGE
+%  H = fhog( I, [binSize], [nOrients], [clip], [crop] )
+%
+% INPUTS
+%  I        - [hxw] color or grayscale input image (must have type single)
+%  binSize  - [8] spatial bin size
+%  nOrients - [9] number of orientation bins
+%  clip     - [.2] value at which to clip histogram bins
+%  crop     - [0] if true crop boundaries
+%
+% OUTPUTS
+%  H        - [h/binSize w/binSize nOrients*3+5] computed hog features
+%
+% EXAMPLE
+%  I=imResample(single(imread('peppers.png'))/255,[480 640]);
+%  tic, for i=1:100, H=fhog(I,8,9); end; disp(100/toc) % >125 fps
+%  figure(1); im(I); V=hogDraw(H,25,1); figure(2); im(V)
+%
+% EXAMPLE
+%  % comparison to features.cc (requires DPM code release version 5)
+%  I=imResample(single(imread('peppers.png'))/255,[480 640]); Id=double(I);
+%  tic, for i=1:100, H1=features(Id,8); end; disp(100/toc)
+%  tic, for i=1:100, H2=fhog(I,8,9,.2,1); end; disp(100/toc)
+%  figure(1); montage2(H1); figure(2); montage2(H2);
+%  D=abs(H1-H2); mean(D(:))
+%
+% See also hog, hogDraw, gradientHist
+%
+% Piotr's Image&Video Toolbox      Version 3.23
+% Copyright 2013 Piotr Dollar.  [pdollar-at-caltech.edu]
+% Please email me if you find bugs, or have suggestions or questions!
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+%Note: modified to be more self-contained
+
+if( nargin<2 ), binSize=8; end
+if( nargin<3 ), nOrients=9; end
+if( nargin<4 ), clip=.2; end
+if( nargin<5 ), crop=0; end
+
+softBin = -1; useHog = 2; b = binSize;
+
+[M,O]=gradientMex('gradientMag',I,0,1);
+
+H = gradientMex('gradientHist',M,O,binSize,nOrients,softBin,useHog,clip);
+
+if( crop ), e=mod(size(I),b)<b/2; H=H(2:end-e(1),2:end-e(2),:); end
+
+end
diff --git a/KCF/gaussian_correlation.m b/KCF/gaussian_correlation.m
new file mode 100644
index 0000000..e6e7352
--- /dev/null
+++ b/KCF/gaussian_correlation.m
@@ -0,0 +1,26 @@
+function kf = gaussian_correlation(xf, yf, sigma)
+%GAUSSIAN_CORRELATION Gaussian Kernel at all shifts, i.e. kernel correlation.
+%   Evaluates a Gaussian kernel with bandwidth SIGMA for all relative
+%   shifts between input images X and Y, which must both be MxN. They must 
+%   also be periodic (ie., pre-processed with a cosine window). The result
+%   is an MxN map of responses.
+%
+%   Inputs and output are all in the Fourier domain.
+%
+%   Joao F. Henriques, 2014
+%   http://www.isr.uc.pt/~henriques/
+	
+	N = size(xf,1) * size(xf,2);
+	xx = xf(:)' * xf(:) / N;  %squared norm of x
+	yy = yf(:)' * yf(:) / N;  %squared norm of y
+	
+	%cross-correlation term in Fourier domain
+	xyf = xf .* conj(yf);
+	xy = sum(real(ifft2(xyf)), 3);  %to spatial domain
+	
+	%calculate gaussian response for all positions, then go back to the
+	%Fourier domain
+	kf = fft2(exp(-1 / sigma^2 * max(0, (xx + yy - 2 * xy) / numel(xf))));
+
+end
+
diff --git a/KCF/gaussian_shaped_labels.m b/KCF/gaussian_shaped_labels.m
new file mode 100644
index 0000000..e0f583e
--- /dev/null
+++ b/KCF/gaussian_shaped_labels.m
@@ -0,0 +1,34 @@
+function labels = gaussian_shaped_labels(sigma, sz)
+%GAUSSIAN_SHAPED_LABELS
+%   Gaussian-shaped labels for all shifts of a sample.
+%
+%   LABELS = GAUSSIAN_SHAPED_LABELS(SIGMA, SZ)
+%   Creates an array of labels (regression targets) for all shifts of a
+%   sample of dimensions SZ. The output will have size SZ, representing
+%   one label for each possible shift. The labels will be Gaussian-shaped,
+%   with the peak at 0-shift (top-left element of the array), decaying
+%   as the distance increases, and wrapping around at the borders.
+%   The Gaussian function has spatial bandwidth SIGMA.
+%
+%   Joao F. Henriques, 2014
+%   http://www.isr.uc.pt/~henriques/
+
+
+% 	%as a simple example, the limit sigma = 0 would be a Dirac delta,
+% 	%instead of a Gaussian:
+% 	labels = zeros(sz(1:2));  %labels for all shifted samples
+% 	labels(1,1) = magnitude;  %label for 0-shift (original sample)
+	
+
+	%evaluate a Gaussian with the peak at the center element
+	[rs, cs] = ndgrid((1:sz(1)) - floor(sz(1)/2), (1:sz(2)) - floor(sz(2)/2));
+	labels = exp(-0.5 / sigma^2 * (rs.^2 + cs.^2));
+
+	%move the peak to the top-left, with wrap-around
+	labels = circshift(labels, -floor(sz(1:2) / 2) + 1);
+
+	%sanity check: make sure it's really at top-left
+	assert(labels(1,1) == 1)
+
+end
+
diff --git a/KCF/get_features.m b/KCF/get_features.m
new file mode 100644
index 0000000..cae469d
--- /dev/null
+++ b/KCF/get_features.m
@@ -0,0 +1,39 @@
+function x = get_features(im, features, cell_size, cos_window)
+%GET_FEATURES
+%   Extracts dense features from image.
+%
+%   X = GET_FEATURES(IM, FEATURES, CELL_SIZE)
+%   Extracts features specified in struct FEATURES, from image IM. The
+%   features should be densely sampled, in cells or intervals of CELL_SIZE.
+%   The output has size [height in cells, width in cells, features].
+%
+%   To specify HOG features, set field 'hog' to true, and
+%   'hog_orientations' to the number of bins.
+%
+%   To experiment with other features simply add them to this function
+%   and include any needed parameters in the FEATURES struct. To allow
+%   combinations of features, stack them with x = cat(3, x, new_feat).
+%
+%   Joao F. Henriques, 2014
+%   http://www.isr.uc.pt/~henriques/
+
+
+	if features.hog,
+		%HOG features, from Piotr's Toolbox
+		x = double(fhog(single(im) / 255, cell_size, features.hog_orientations));
+		x(:,:,end) = [];  %remove all-zeros channel ("truncation feature")
+	end
+	
+	if features.gray,
+		%gray-level (scalar feature)
+		x = double(im) / 255;
+		
+		x = x - mean(x(:));
+	end
+	
+	%process with cosine window if needed
+	if ~isempty(cos_window),
+		x = bsxfun(@times, x, cos_window);
+	end
+	
+end
diff --git a/KCF/get_subwindow.m b/KCF/get_subwindow.m
new file mode 100644
index 0000000..c0b1dab
--- /dev/null
+++ b/KCF/get_subwindow.m
@@ -0,0 +1,28 @@
+function out = get_subwindow(im, pos, sz)
+%GET_SUBWINDOW Obtain sub-window from image, with replication-padding.
+%   Returns sub-window of image IM centered at POS ([y, x] coordinates),
+%   with size SZ ([height, width]). If any pixels are outside of the image,
+%   they will replicate the values at the borders.
+%
+%   Joao F. Henriques, 2014
+%   http://www.isr.uc.pt/~henriques/
+
+	if isscalar(sz),  %square sub-window
+		sz = [sz, sz];
+	end
+	
+	xs = floor(pos(2)) + (1:sz(2)) - floor(sz(2)/2);
+	ys = floor(pos(1)) + (1:sz(1)) - floor(sz(1)/2);
+	
+	%check for out-of-bounds coordinates, and set them to the values at
+	%the borders
+	xs(xs < 1) = 1;
+	ys(ys < 1) = 1;
+	xs(xs > size(im,2)) = size(im,2);
+	ys(ys > size(im,1)) = size(im,1);
+	
+	%extract image
+	out = im(ys, xs, :);
+
+end
+
diff --git a/KCF/gradientMex.mexa64 b/KCF/gradientMex.mexa64
new file mode 100644
index 0000000..3ca9460
Binary files /dev/null and b/KCF/gradientMex.mexa64 differ
diff --git a/KCF/linear_correlation.m b/KCF/linear_correlation.m
new file mode 100644
index 0000000..1928318
--- /dev/null
+++ b/KCF/linear_correlation.m
@@ -0,0 +1,17 @@
+function kf = linear_correlation(xf, yf)
+%LINEAR_CORRELATION Linear Kernel at all shifts, i.e. correlation.
+%   Computes the dot-product for all relative shifts between input images
+%   X and Y, which must both be MxN. They must also be periodic (ie.,
+%   pre-processed with a cosine window). The result is an MxN map of
+%   responses.
+%
+%   Inputs and output are all in the Fourier domain.
+%
+%   Joao F. Henriques, 2014
+%   http://www.isr.uc.pt/~henriques/
+	
+	%cross-correlation term in Fourier domain
+	kf = sum(xf .* conj(yf), 3) / numel(xf);
+
+end
+
diff --git a/KCF/load_video_info.m b/KCF/load_video_info.m
new file mode 100644
index 0000000..81c0c5e
--- /dev/null
+++ b/KCF/load_video_info.m
@@ -0,0 +1,60 @@
+function [img_files, pos, target_sz, ground_truth, video_path] = load_video_info(base_path)
+%LOAD_VIDEO_INFO
+%   Loads all the relevant information for the video in the given path:
+%   the list of image files (cell array of strings), initial position
+%   (1x2), target size (1x2), the ground truth information for precision
+%   calculations (Nx2, for N frames), and the path where the images are
+%   located. The ordering of coordinates and sizes is always [y, x].
+%
+%   Joao F. Henriques, 2014
+%   http://www.isr.uc.pt/~henriques/
+
+
+	%full path to the video's files
+	if base_path(end) ~= '/' && base_path(end) ~= '\',
+		base_path(end+1) = '/';
+	end
+	video_path = base_path;
+
+	%try to load ground truth from text file (Benchmark's format)
+	filename = [video_path 'groundtruth_rect.txt'];
+	f = fopen(filename);
+	assert(f ~= -1, ['No initial position or ground truth to load ("' filename '").'])
+	
+	%the format is [x, y, width, height]
+	try
+		ground_truth = textscan(f, '%f,%f,%f,%f', 'ReturnOnError',false);  
+	catch  %#ok, try different format (no commas)
+		frewind(f);
+		ground_truth = textscan(f, '%f %f %f %f');  
+	end
+	ground_truth = cat(2, ground_truth{:});
+	fclose(f);
+	
+	%set initial position and size
+	target_sz = [ground_truth(1,4), ground_truth(1,3)];
+	pos = [ground_truth(1,2), ground_truth(1,1)] + floor(target_sz/2);
+	
+	if size(ground_truth,1) == 1,
+		%we have ground truth for the first frame only (initial position)
+		ground_truth = [];
+	else
+		%store positions instead of boxes
+		ground_truth = ground_truth(:,[2,1]) + ground_truth(:,[4,3]) / 2;
+	end
+	
+	
+	%from now on, work in the subfolder where all the images are
+	video_path = [video_path 'img/'];
+	
+    %general case, just list all images
+    img_files = dir([video_path '*.png']);
+    if isempty(img_files),
+        img_files = dir([video_path '*.jpg']);
+        assert(~isempty(img_files), 'No image files to load.')
+    end
+    img_files = sort({img_files.name});
+	
+	
+end
+
diff --git a/KCF/polynomial_correlation.m b/KCF/polynomial_correlation.m
new file mode 100644
index 0000000..4d8b22b
--- /dev/null
+++ b/KCF/polynomial_correlation.m
@@ -0,0 +1,22 @@
+function kf = polynomial_correlation(xf, yf, a, b)
+%POLYNOMIAL_CORRELATION Polynomial Kernel at all shifts, i.e. kernel correlation.
+%   Evaluates a polynomial kernel with constant A and exponent B, for all
+%   relative shifts between input images XF and YF, which must both be MxN.
+%   They must also be periodic (ie., pre-processed with a cosine window).
+%   The result is an MxN map of responses.
+%
+%   Inputs and output are all in the Fourier domain.
+%
+%   Joao F. Henriques, 2014
+%   http://www.isr.uc.pt/~henriques/
+	
+	%cross-correlation term in Fourier domain
+	xyf = xf .* conj(yf);
+	xy = sum(real(ifft2(xyf)), 3);  %to spatial domain
+	
+	%calculate polynomial response for all positions, then go back to the
+	%Fourier domain
+	kf = fft2((xy / numel(xf) + a) .^ b);
+
+end
+
diff --git a/KCF/show_video.m b/KCF/show_video.m
new file mode 100644
index 0000000..e122be6
--- /dev/null
+++ b/KCF/show_video.m
@@ -0,0 +1,77 @@
+function update_visualization_func = show_video(img_files, video_path, resize_image)
+%SHOW_VIDEO
+%   Visualizes a tracker in an interactive figure, given a cell array of
+%   image file names, their path, and whether to resize the images to
+%   half size or not.
+%
+%   This function returns an UPDATE_VISUALIZATION function handle, that
+%   can be called with a frame number and a bounding box [x, y, width,
+%   height], as soon as the results for a new frame have been calculated.
+%   This way, your results are shown in real-time, but they are also
+%   remembered so you can navigate and inspect the video afterwards.
+%   Press 'Esc' to send a stop signal (returned by UPDATE_VISUALIZATION).
+%
+%   Joao F. Henriques, 2014
+%   http://www.isr.uc.pt/~henriques/
+
+
+	%store one instance per frame
+	num_frames = numel(img_files);
+	boxes = cell(num_frames,1);
+
+	%create window
+	[fig_h, axes_h, unused, scroll] = videofig(num_frames, @redraw, [], [], @on_key_press);  %#ok, unused outputs
+% 	set(fig_h, 'number','off', 'name', ['Tracker - ' video_path])
+	axis off;
+	
+	%image and rectangle handles start empty, they are initialized later
+	im_h = [];
+	rect_h = [];
+	
+	update_visualization_func = @update_visualization;
+	stop_tracker = false;
+	
+
+	function stop = update_visualization(frame, box)
+		%store the tracker instance for one frame, and show it. returns
+		%true if processing should stop (user pressed 'Esc').
+		boxes{frame} = box;
+		scroll(frame);
+		stop = stop_tracker;
+	end
+
+	function redraw(frame)
+		%render main image
+		im = imread([video_path img_files{frame}]);
+% 		if size(im,3) > 1,
+% 			im = rgb2gray(im);
+% 		end
+		if resize_image,
+			im = imresize(im, 0.5);
+		end
+		
+		if isempty(im_h),  %create image
+			im_h = imshow(im, 'Border','tight', 'InitialMag',200, 'Parent',axes_h);
+		else  %just update it
+			set(im_h, 'CData', im)
+		end
+		
+		%render target bounding box for this frame
+		if isempty(rect_h),  %create it for the first time
+			rect_h = rectangle('Position',[0,0,1,1], 'EdgeColor','r', 'LineWidth', 3, 'Parent',axes_h);
+		end
+		if ~isempty(boxes{frame}),
+			set(rect_h, 'Visible', 'on', 'Position', boxes{frame});
+		else
+			set(rect_h, 'Visible', 'off');
+		end
+	end
+
+	function on_key_press(key)
+		if strcmp(key, 'escape'),  %stop on 'Esc'
+			stop_tracker = true;
+		end
+	end
+
+end
+
diff --git a/KCF/videofig.m b/KCF/videofig.m
new file mode 100644
index 0000000..2af846d
--- /dev/null
+++ b/KCF/videofig.m
@@ -0,0 +1,230 @@
+function [fig_handle, axes_handle, scroll_bar_handles, scroll_func] = ...
+	videofig(num_frames, redraw_func, play_fps, big_scroll, ...
+	key_func, varargin)
+%VIDEOFIG Figure with horizontal scrollbar and play capabilities.
+%   VIDEOFIG(NUM_FRAMES, @REDRAW_FUNC)
+%   Creates a figure with a horizontal scrollbar and shortcuts to scroll
+%   automatically. The scroll range is 1 to NUM_FRAMES. The function
+%   REDRAW_FUNC(F) is called to redraw at scroll position F (for example,
+%   REDRAW_FUNC can show the frame F of a video).
+%   This can be used not only to play and analyze standard videos, but it
+%   also lets you place any custom Matlab plots and graphics on top.
+%
+%   The keyboard shortcuts are:
+%     Enter (Return) -- play/pause video (25 frames-per-second default).
+%     Backspace -- play/pause video 5 times slower.
+%     Right/left arrow keys -- advance/go back one frame.
+%     Page down/page up -- advance/go back 30 frames.
+%     Home/end -- go to first/last frame of video.
+%
+%   Advanced usage
+%   --------------
+%   VIDEOFIG(NUM_FRAMES, @REDRAW_FUNC, FPS, BIG_SCROLL)
+%   Also specifies the speed of the play function (frames-per-second) and
+%   the frame step of page up/page down (or empty for defaults).
+%
+%   VIDEOFIG(NUM_FRAMES, @REDRAW_FUNC, FPS, BIG_SCROLL, @KEY_FUNC)
+%   Also calls KEY_FUNC(KEY) with any keys that weren't processed, so you
+%   can add more shortcut keys (or empty for none).
+%
+%   VIDEOFIG(NUM_FRAMES, @REDRAW_FUNC, FPS, BIG_SCROLL, @KEY_FUNC, ...)
+%   Passes any additional arguments to the native FIGURE function (for
+%   example: 'Name', 'Video figure title').
+%
+%   [FIG_HANDLE, AX_HANDLE, OTHER_HANDLES, SCROLL] = VIDEOFIG(...)
+%   Returns the handles of the figure, drawing axes and other handles (of
+%   the scrollbar's graphics), respectively. SCROLL(F) can be called to
+%   scroll to frame F, or with no arguments to just redraw the figure.
+%
+%   Example 1
+%   ---------
+%   Place this in a file called "redraw.m":
+%     function redraw(frame)
+%         imshow(['AT3_1m4_' num2str(frame, '%02.0f') '.tif'])
+%     end
+%
+%   Then from a script or the command line, call:
+%     videofig(10, @redraw);
+%     redraw(1)
+%
+%   The images "AT3_1m4_01.tif" ... "AT3_1m4_10.tif" are part of the Image
+%   Processing Toolbox and there's no need to download them elsewhere.
+%
+%   Example 2
+%   ---------
+%   Change the redraw function to visualize the contour of a single cell:
+%     function redraw(frame)
+%         im = imread(['AT3_1m4_' num2str(frame, '%02.0f') '.tif']);
+%         slice = im(210:310, 210:340);
+%         [ys, xs] = find(slice < 50 | slice > 100);
+%         pos = 210 + median([xs, ys]);
+%         siz = 3.5 * std([xs, ys]);
+%         imshow(im), hold on
+%         rectangle('Position',[pos - siz/2, siz], 'EdgeColor','g', 'Curvature',[1, 1])
+%         hold off
+%     end
+%
+%   Jo�o Filipe Henriques, 2010
+	
+	%default parameter values
+	if nargin < 3 || isempty(play_fps), play_fps = 25; end  %play speed (frames per second)
+	if nargin < 4 || isempty(big_scroll), big_scroll = 30; end  %page-up and page-down advance, in frames
+	if nargin < 5, key_func = []; end
+	
+	%check arguments
+	check_int_scalar(num_frames);
+	check_callback(redraw_func);
+	check_int_scalar(play_fps);
+	check_int_scalar(big_scroll);
+	check_callback(key_func);
+
+	click = 0;
+	f = 1;  %current frame
+	
+	%initialize figure
+	fig_handle = figure('Color',[.3 .3 .3], 'MenuBar','none', 'Units','norm', ...
+		'WindowButtonDownFcn',@button_down, 'WindowButtonUpFcn',@button_up, ...
+		'WindowButtonMotionFcn', @on_click, 'KeyPressFcn', @key_press, ...
+		'Interruptible','off', 'BusyAction','cancel', varargin{:});
+	
+	%axes for scroll bar
+	scroll_axes_handle = axes('Parent',fig_handle, 'Position',[0 0 1 0.03], ...
+		'Visible','off', 'Units', 'normalized');
+	axis([0 1 0 1]);
+	axis off
+	
+	%scroll bar
+	scroll_bar_width = max(1 / num_frames, 0.01);
+	scroll_handle = patch([0 1 1 0] * scroll_bar_width, [0 0 1 1], [.8 .8 .8], ...
+		'Parent',scroll_axes_handle, 'EdgeColor','none', 'ButtonDownFcn', @on_click);
+	
+	%timer to play video
+	play_timer = timer('TimerFcn',@play_timer_callback, 'ExecutionMode','fixedRate');
+	
+	%main drawing axes for video display
+	axes_handle = axes('Position',[0 0.03 1 0.97]);
+	
+	%return handles
+	scroll_bar_handles = [scroll_axes_handle; scroll_handle];
+	scroll_func = @scroll;
+	
+	
+	
+	function key_press(src, event)  %#ok, unused arguments
+		switch event.Key,  %process shortcut keys
+		case 'leftarrow',
+			scroll(f - 1);
+		case 'rightarrow',
+			scroll(f + 1);
+		case 'pageup',
+			if f - big_scroll < 1,  %scrolling before frame 1, stop at frame 1
+				scroll(1);
+			else
+				scroll(f - big_scroll);
+			end
+		case 'pagedown',
+			if f + big_scroll > num_frames,  %scrolling after last frame
+				scroll(num_frames);
+			else
+				scroll(f + big_scroll);
+			end
+		case 'home',
+			scroll(1);
+		case 'end',
+			scroll(num_frames);
+		case 'return',
+			play(1/play_fps)
+		case 'backspace',
+			play(5/play_fps)
+		otherwise,
+			if ~isempty(key_func),
+				key_func(event.Key);  %#ok, call custom key handler
+			end
+		end
+	end
+	
+	%mouse handler
+	function button_down(src, event)  %#ok, unused arguments
+		set(src,'Units','norm')
+		click_pos = get(src, 'CurrentPoint');
+		if click_pos(2) <= 0.03,  %only trigger if the scrollbar was clicked
+			click = 1;
+			on_click([],[]);
+		end
+	end
+
+	function button_up(src, event)  %#ok, unused arguments
+		click = 0;
+	end
+
+	function on_click(src, event)  %#ok, unused arguments
+		if click == 0, return; end
+		
+		%get x-coordinate of click
+		set(fig_handle, 'Units', 'normalized');
+		click_point = get(fig_handle, 'CurrentPoint');
+		set(fig_handle, 'Units', 'pixels');
+		x = click_point(1);
+		
+		%get corresponding frame number
+		new_f = floor(1 + x * num_frames);
+		
+		if new_f < 1 || new_f > num_frames, return; end  %outside valid range
+		
+		if new_f ~= f,  %don't redraw if the frame is the same (to prevent delays)
+			scroll(new_f);
+		end
+	end
+
+	function play(period)
+		%toggle between stoping and starting the "play video" timer
+		if strcmp(get(play_timer,'Running'), 'off'),
+			set(play_timer, 'Period', period);
+			start(play_timer);
+		else
+			stop(play_timer);
+		end
+	end
+	function play_timer_callback(src, event)  %#ok
+		%executed at each timer period, when playing the video
+		if f < num_frames,
+			scroll(f + 1);
+		elseif strcmp(get(play_timer,'Running'), 'on'),
+			stop(play_timer);  %stop the timer if the end is reached
+		end
+	end
+
+	function scroll(new_f)
+		if nargin == 1,  %scroll to another position (new_f)
+			if new_f < 1 || new_f > num_frames,
+				return
+			end
+			f = new_f;
+		end
+		
+		%convert frame number to appropriate x-coordinate of scroll bar
+		scroll_x = (f - 1) / num_frames;
+		
+		%move scroll bar to new position
+		set(scroll_handle, 'XData', scroll_x + [0 1 1 0] * scroll_bar_width);
+		
+		%set to the right axes and call the custom redraw function
+		set(fig_handle, 'CurrentAxes', axes_handle);
+		redraw_func(f);
+		
+		%used to be "drawnow", but when called rapidly and the CPU is busy
+		%it didn't let Matlab process events properly (ie, close figure).
+		pause(0.001)
+	end
+	
+	%convenience functions for argument checks
+	function check_int_scalar(a)
+		assert(isnumeric(a) && isscalar(a) && isfinite(a) && a == round(a), ...
+			[upper(inputname(1)) ' must be a scalar integer number.']);
+	end
+	function check_callback(a)
+		assert(isempty(a) || strcmp(class(a), 'function_handle'), ...
+			[upper(inputname(1)) ' must be a valid function handle.'])
+	end
+end
+
diff --git a/channels/Contents.m b/channels/Contents.m
new file mode 100644
index 0000000..c8a9813
--- /dev/null
+++ b/channels/Contents.m
@@ -0,0 +1,38 @@
+% CHANNELS
+% See also
+%
+% Fast channel feature computation code based on the papers:
+%  [1] P. Doll�r, Z. Tu, P. Perona and S. Belongie
+%   "Integral Channel Features", BMVC 2009.
+%  [2] P. Doll�r, S. Belongie and P. Perona
+%   "The Fastest Pedestrian Detector in the West," BMVC 2010.
+%  [3] P. Doll�r, R. Appel and W. Kienzle
+%   "Crosstalk Cascades for Frame-Rate Pedestrian Detection," ECCV 2012.
+%  [4] P. Doll�r, R. Appel, S. Belongie and P. Perona
+%   "Fast Feature Pyramids for Object Detection", PAMI 2014.
+% Please cite a subset of the above papers if you end up using the code.
+% The PAMI 2014 paper has the most thorough and up to date descriptions.
+% Code written and maintained by Piotr Dollar and Ron Appel.
+%
+% Channels:
+%   chnsCompute  - Compute channel features at a single scale given an input image.
+%   chnsPyramid  - Compute channel feature pyramid given an input image.
+%   chnsScaling  - Compute lambdas for channel power law scaling.
+%
+% Constant time image smoothing:
+%   convBox      - Extremely fast 2D image convolution with a box filter.
+%   convMax      - Extremely fast 2D image convolution with a max filter.
+%   convTri      - Extremely fast 2D image convolution with a triangle filter.
+%
+% Gradients and gradient histograms:
+%   gradient2    - Compute numerical gradients along x and y directions.
+%   gradientHist - Compute oriented gradient histograms.
+%   gradientMag  - Compute gradient magnitude and orientation at each image location.
+%   hog          - Efficiently compute histogram of oriented gradient (HOG) features.
+%   hogDraw      - Create visualization of hog descriptor.
+%   fhog         - Efficiently compute Felzenszwalb's HOG (FHOG) features.
+%
+% Miscellaneous:
+%   imPad        - Pad an image along its four boundaries.
+%   imResample   - Fast bilinear image downsampling/upsampling.
+%   rgbConvert   - Convert RGB image to other color spaces (highly optimized).
diff --git a/channels/chnsCompute.m b/channels/chnsCompute.m
new file mode 100644
index 0000000..6864da0
--- /dev/null
+++ b/channels/chnsCompute.m
@@ -0,0 +1,187 @@
+function chns = chnsCompute( I, varargin )
+% Compute channel features at a single scale given an input image.
+%
+% Compute the channel features as described in:
+%  P. Doll�r, Z. Tu, P. Perona and S. Belongie
+%  "Integral Channel Features", BMVC 2009.
+% Channel features have proven very effective in sliding window object
+% detection, both in terms of *accuracy* and *speed*. Numerous feature
+% types including histogram of gradients (hog) can be converted into
+% channel features, and overall, channels are general and powerful.
+%
+% Given an input image I, a corresponding channel is a registered map of I,
+% where the output pixels are computed from corresponding patches of input
+% pixels (thus preserving overall image layout). A trivial channel is
+% simply the input grayscale image, likewise for a color image each color
+% channel can serve as a channel. Other channels can be computed using
+% linear or non-linear transformations of I, various choices implemented
+% here are described below. The only constraint is that channels must be
+% translationally invariant (i.e. translating the input image or the
+% resulting channels gives the same result). This allows for fast object
+% detection, as the channels can be computed once on the entire image
+% rather than separately for each overlapping detection window.
+%
+% Currently, three channel types are available by default (to date, these
+% have proven the most effective for sliding window object detection):
+%  (1) color channels (computed using rgbConvert.m)
+%  (2) gradient magnitude (computed using gradientMag.m)
+%  (3) quantized gradient channels (computed using gradientHist.m)
+% For more information about each channel type, including the exact input
+% parameters and their meanings, see the respective m-files which perform
+% the actual computatons (chnsCompute is essentially a wrapper function).
+% The converted color channels serve as input to gradientMag/gradientHist.
+%
+% Additionally, custom channels can be specified via an optional struct
+% array "pCustom" which may have 0 or more custom channel definitions. Each
+% custom channel is generated via a call to "chns=feval(hFunc,I,pFunc{:})".
+% The color space of I is determined by pColor.colorSpace, use the setting
+% colorSpace='orig' if the input image is not an 'rgb' image and should be
+% left unchanged (e.g. if I has multiple channels). The input I will have
+% type single and the output of hFunc should also have type single.
+%
+% "shrink" (which should be an integer) determines the amount to subsample
+% the computed channels (in applications such as detection subsamping does
+% not affect performance). The params for each channel type are described
+% in detail in the respective function. In addition, each channel type has
+% a param "enabled" that determines if the channel is computed. If
+% chnsCompute() is called with no inputs, the output is the complete
+% default params (pChns). Otherwise the outputs are the computed channels
+% and additional meta-data (see below). The channels are computed at a
+% single scale, for (fast) multi-scale channel computation see chnsPyramid.
+%
+% An emphasis has been placed on speed, with the code undergoing heavy
+% optimization. Computing the full set of channels used in the BMVC09 paper
+% referenced above on a 480x640 image runs over *100 fps* on a single core
+% of a machine from 2011 (although runtime depends on input parameters).
+%
+% USAGE
+%  pChns = chnsCompute()
+%  chns = chnsCompute( I, pChns )
+%
+% INPUTS
+%  I           - [hxwx3] input image (uint8 or single/double in [0,1])
+%  pChns       - parameters (struct or name/value pairs)
+%   .shrink       - [4] integer downsampling amount for channels
+%   .pColor       - parameters for color space:
+%     .enabled      - [1] if true enable color channels
+%     .smooth       - [1] radius for image smoothing (using convTri)
+%     .colorSpace   - ['luv'] choices are: 'gray', 'rgb', 'hsv', 'orig'
+%   .pGradMag     - parameters for gradient magnitude:
+%     .enabled      - [1] if true enable gradient magnitude channel
+%     .colorChn     - [0] if>0 color channel to use for grad computation
+%     .normRad      - [5] normalization radius for gradient
+%     .normConst    - [.005] normalization constant for gradient
+%     .full         - [0] if true compute angles in [0,2*pi) else in [0,pi)
+%   .pGradHist    - parameters for gradient histograms:
+%     .enabled      - [1] if true enable gradient histogram channels
+%     .binSize      - [shrink] spatial bin size (defaults to shrink)
+%     .nOrients     - [6] number of orientation channels
+%     .softBin      - [0] if true use "soft" bilinear spatial binning
+%     .useHog       - [0] if true perform 4-way hog normalization/clipping
+%     .clipHog      - [.2] value at which to clip hog histogram bins
+%   .pCustom      - parameters for custom channels (optional struct array):
+%     .enabled      - [1] if true enable custom channel type
+%     .name         - ['REQ'] custom channel type name
+%     .hFunc        - ['REQ'] function handle for computing custom channels
+%     .pFunc        - [{}] additional params for chns=hFunc(I,pFunc{:})
+%     .padWith      - [0] how channel should be padded (e.g. 0,'replicate')
+%   .complete     - [] if true does not check/set default vals in pChns
+%
+% OUTPUTS
+%  chns       - output struct
+%   .pChns      - exact input parameters used
+%   .nTypes     - number of channel types
+%   .data       - [nTypes x 1] cell [h/shrink x w/shrink x nChns] channels
+%   .info       - [nTypes x 1] struct array
+%     .name       - channel type name
+%     .pChn       - exact input parameters for given channel type
+%     .nChns      - number of channels for given channel type
+%     .padWith    - how channel should be padded (0,'replicate')
+%
+% EXAMPLE - default channels
+%  I=imResample(imread('peppers.png'),[480 640]); pChns=chnsCompute();
+%  tic, for i=1:100, chns=chnsCompute(I,pChns); end; toc
+%  figure(1); montage2(cat(3,chns.data{:}));
+%
+% EXAMPLE - default + custom channels
+%  I=imResample(imread('peppers.png'),[480 640]); pChns=chnsCompute();
+%  hFunc=@(I) 5*sqrt(max(0,max(convBox(I.^2,2)-convBox(I,2).^2,[],3)));
+%  pChns.pCustom=struct('name','Std02','hFunc',hFunc); pChns.complete=0;
+%  tic, chns=chnsCompute(I,pChns); toc
+%  figure(1); im(chns.data{4});
+%
+% See also rgbConvert, gradientMag, gradientHist, chnsPyramid
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.23
+% Copyright 2014 Piotr Dollar & Ron Appel.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% get default parameters pChns
+if(nargin==2), pChns=varargin{1}; else pChns=[]; end
+if( ~isfield(pChns,'complete') || pChns.complete~=1 || isempty(I) )
+  p=struct('enabled',{},'name',{},'hFunc',{},'pFunc',{},'padWith',{});
+  pChns = getPrmDflt(varargin,{'shrink',4,'pColor',{},'pGradMag',{},...
+    'pGradHist',{},'pCustom',p,'complete',1},1);
+  pChns.pColor = getPrmDflt( pChns.pColor, {'enabled',1,...
+    'smooth',1, 'colorSpace','luv'}, 1 );
+  pChns.pGradMag = getPrmDflt( pChns.pGradMag, {'enabled',1,...
+    'colorChn',0,'normRad',5,'normConst',.005,'full',0}, 1 );
+  pChns.pGradHist = getPrmDflt( pChns.pGradHist, {'enabled',1,...
+    'binSize',[],'nOrients',6,'softBin',0,'useHog',0,'clipHog',.2}, 1 );
+  nc=length(pChns.pCustom); pc=cell(1,nc);
+  for i=1:nc, pc{i} = getPrmDflt( pChns.pCustom(i), {'enabled',1,...
+      'name','REQ','hFunc','REQ','pFunc',{},'padWith',0}, 1 ); end
+  if( nc>0 ), pChns.pCustom=[pc{:}]; end
+end
+if(nargin==0), chns=pChns; return; end
+
+% create output struct
+info=struct('name',{},'pChn',{},'nChns',{},'padWith',{});
+chns=struct('pChns',pChns,'nTypes',0,'data',{{}},'info',info);
+
+% crop I so divisible by shrink and get target dimensions
+shrink=pChns.shrink; [h,w,~]=size(I); cr=mod([h w],shrink);
+if(any(cr)), h=h-cr(1); w=w-cr(2); I=I(1:h,1:w,:); end
+h=h/shrink; w=w/shrink;
+
+% compute color channels
+p=pChns.pColor; nm='color channels';
+I=rgbConvert(I,p.colorSpace); I=convTri(I,p.smooth);
+if(p.enabled), chns=addChn(chns,I,nm,p,'replicate',h,w); end
+
+% compute gradient magnitude channel
+p=pChns.pGradMag; nm='gradient magnitude';
+full=0; if(isfield(p,'full')), full=p.full; end
+if( pChns.pGradHist.enabled )
+  [M,O]=gradientMag(I,p.colorChn,p.normRad,p.normConst,full);
+elseif( p.enabled )
+  M=gradientMag(I,p.colorChn,p.normRad,p.normConst,full);
+end
+if(p.enabled), chns=addChn(chns,M,nm,p,0,h,w); end
+
+% compute gradient histgoram channels
+p=pChns.pGradHist; nm='gradient histogram';
+if( p.enabled )
+  binSize=p.binSize; if(isempty(binSize)), binSize=shrink; end
+  H=gradientHist(M,O,binSize,p.nOrients,p.softBin,p.useHog,p.clipHog,full);
+  chns=addChn(chns,H,nm,pChns.pGradHist,0,h,w);
+end
+
+% compute custom channels
+p=pChns.pCustom;
+for i=find( [p.enabled] )
+  C=feval(p(i).hFunc,I,p(i).pFunc{:});
+  chns=addChn(chns,C,p(i).name,p(i),p(i).padWith,h,w);
+end
+
+end
+
+function chns = addChn( chns, data, name, pChn, padWith, h, w )
+% Helper function to add a channel to chns.
+[h1,w1,~]=size(data);
+if(h1~=h || w1~=w), data=imResampleMex(data,h,w,1);
+  assert(all(mod([h1 w1]./[h w],1)==0)); end
+chns.data{end+1}=data; chns.nTypes=chns.nTypes+1;
+chns.info(end+1)=struct('name',name,'pChn',pChn,...
+  'nChns',size(data,3),'padWith',padWith);
+end
diff --git a/channels/chnsPyramid.m b/channels/chnsPyramid.m
new file mode 100644
index 0000000..2c83bf7
--- /dev/null
+++ b/channels/chnsPyramid.m
@@ -0,0 +1,204 @@
+function pyramid = chnsPyramid( I, varargin )
+% Compute channel feature pyramid given an input image.
+%
+% While chnsCompute() computes channel features at a single scale,
+% chnsPyramid() calls chnsCompute() multiple times on different scale
+% images to create a scale-space pyramid of channel features.
+%
+% In its simplest form, chnsPyramid() first creates an image pyramid, then
+% calls chnsCompute() with the specified "pChns" on each scale of the image
+% pyramid. The parameter "nPerOct" determines the number of scales per
+% octave in the image pyramid (an octave is the set of scales up to half of
+% the initial scale), a typical value is nPerOct=8 in which case each scale
+% in the pyramid is 2^(-1/8)~=.917 times the size of the previous. The
+% smallest scale of the pyramid is determined by "minDs", once either image
+% dimension in the resized image falls below minDs, pyramid creation stops.
+% The largest scale in the pyramid is determined by "nOctUp" which
+% determines the number of octaves to compute above the original scale.
+%
+% While calling chnsCompute() on each image scale works, it is unnecessary.
+% For a broad family of features, including gradient histograms and all
+% channel types tested, the feature responses computed at a single scale
+% can be used to approximate feature responses at nearby scales. The
+% approximation is accurate at least within an entire scale octave. For
+% details and to understand why this unexpected result holds, please see:
+%   P. Doll�r, R. Appel, S. Belongie and P. Perona
+%   "Fast Feature Pyramids for Object Detection", PAMI 2014.
+%
+% The parameter "nApprox" determines how many intermediate scales are
+% approximated using the techniques described in the above paper. Roughly
+% speaking, channels at approximated scales are computed by taking the
+% corresponding channel at the nearest true scale (computed w chnsCompute)
+% and resampling and re-normalizing it appropriately. For example, if
+% nPerOct=8 and nApprox=7, then the 7 intermediate scales are approximated
+% and only power of two scales are actually computed (using chnsCompute).
+% The parameter "lambdas" determines how the channels are normalized (see
+% the above paper). lambdas for a given set of channels can be computed
+% using chnsScaling.m, alternatively, if no lambdas are specified, the
+% lambdas are automatically approximated using two true image scales.
+%
+% Typically approximating all scales within an octave (by setting
+% nApprox=nPerOct-1 or nApprox=-1) works well, and results in large speed
+% gains (~4x). See example below for a visualization of the pyramid
+% computed with and without the approximation. While there is a slight
+% difference in the channels, during detection the approximated channels
+% have been shown to be essentially as effective as the original channels.
+%
+% While every effort is made to space the image scales evenly, this is not
+% always possible. For example, given a 101x100 image, it is impossible to
+% downsample it by exactly 1/2 along the first dimension, moreover, the
+% exact scaling along the two dimensions will differ. Instead, the scales
+% are tweaked slightly (e.g. for a 101x101 image the scale would go from
+% 1/2 to something like 50/101), and the output contains the exact scaling
+% factors used for both the heights and the widths ("scaleshw") and also
+% the approximate scale for both dimensions ("scales"). If "shrink">1 the
+% scales are further tweaked so that the resized image has dimensions that
+% are exactly divisible by shrink (for details please see the code).
+%
+% If chnsPyramid() is called with no inputs, the output is the complete
+% default parameters (pPyramid). Finally, we describe the remaining
+% parameters: "pad" controls the amount the channels are padded after being
+% created (useful for detecting objects near boundaries); "smooth" controls
+% the amount of smoothing after the channels are created (and controls the
+% integration scale of the channels); finally "concat" determines whether
+% all channels at a single scale are concatenated in the output.
+%
+% An emphasis has been placed on speed, with the code undergoing heavy
+% optimization. Computing the full set of (approximated) *multi-scale*
+% channels on a 480x640 image runs over *30 fps* on a single core of a
+% machine from 2011 (although runtime depends on input parameters).
+%
+% USAGE
+%  pPyramid = chnsPyramid()
+%  pyramid = chnsPyramid( I, pPyramid )
+%
+% INPUTS
+%  I            - [hxwx3] input image (uint8 or single/double in [0,1])
+%  pPyramid     - parameters (struct or name/value pairs)
+%   .pChns        - parameters for creating channels (see chnsCompute.m)
+%   .nPerOct      - [8] number of scales per octave
+%   .nOctUp       - [0] number of upsampled octaves to compute
+%   .nApprox      - [-1] number of approx. scales (if -1 nApprox=nPerOct-1)
+%   .lambdas      - [] coefficients for power law scaling (see BMVC10)
+%   .pad          - [0 0] amount to pad channels (along T/B and L/R)
+%   .minDs        - [16 16] minimum image size for channel computation
+%   .smooth       - [1] radius for channel smoothing (using convTri)
+%   .concat       - [1] if true concatenate channels
+%   .complete     - [] if true does not check/set default vals in pPyramid
+%
+% OUTPUTS
+%  pyramid      - output struct
+%   .pPyramid     - exact input parameters used (may change from input)
+%   .nTypes       - number of channel types
+%   .nScales      - number of scales computed
+%   .data         - [nScales x nTypes] cell array of computed channels
+%   .info         - [nTypes x 1] struct array (mirrored from chnsCompute)
+%   .lambdas      - [nTypes x 1] scaling coefficients actually used
+%   .scales       - [nScales x 1] relative scales (approximate)
+%   .scaleshw     - [nScales x 2] exact scales for resampling h and w
+%
+% EXAMPLE
+%  I=imResample(imread('peppers.png'),[480 640]);
+%  pPyramid=chnsPyramid(); pPyramid.minDs=[128 128];
+%  pPyramid.nApprox=0; tic, P1=chnsPyramid(I,pPyramid); toc
+%  pPyramid.nApprox=7; tic, P2=chnsPyramid(I,pPyramid); toc
+%  figure(1); montage2(P1.data{2}); figure(2); montage2(P2.data{2});
+%  figure(3); montage2(abs(P1.data{2}-P2.data{2})); colorbar;
+%
+% See also chnsCompute, chnsScaling, convTri, imPad
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.25
+% Copyright 2014 Piotr Dollar & Ron Appel.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% get default parameters pPyramid
+if(nargin==2), p=varargin{1}; else p=[]; end
+if( ~isfield(p,'complete') || p.complete~=1 || isempty(I) )
+  dfs={ 'pChns',{}, 'nPerOct',8, 'nOctUp',0, 'nApprox',-1, ...
+    'lambdas',[], 'pad',[0 0], 'minDs',[16 16], ...
+    'smooth',1, 'concat',1, 'complete',1 };
+  p=getPrmDflt(varargin,dfs,1); chns=chnsCompute([],p.pChns);
+  p.pChns=chns.pChns; p.pChns.complete=1; shrink=p.pChns.shrink;
+  p.pad=round(p.pad/shrink)*shrink; p.minDs=max(p.minDs,shrink*4);
+  if(p.nApprox<0), p.nApprox=p.nPerOct-1; end
+end
+if(nargin==0), pyramid=p; return; end; pPyramid=p;
+vs=struct2cell(p); [pChns,nPerOct,nOctUp,nApprox,lambdas,...
+  pad,minDs,smooth,concat,~]=deal(vs{:}); shrink=pChns.shrink;
+
+% convert I to appropriate color space (or simply normalize)
+cs=pChns.pColor.colorSpace; sz=[size(I,1) size(I,2)];
+if(~all(sz==0) && size(I,3)==1 && ~any(strcmpi(cs,{'gray','orig'}))),
+  I=I(:,:,[1 1 1]); warning('Converting image to color'); end %#ok<WNTAG>
+I=rgbConvert(I,cs); pChns.pColor.colorSpace='orig';
+
+% get scales at which to compute features and list of real/approx scales
+[scales,scaleshw]=getScales(nPerOct,nOctUp,minDs,shrink,sz);
+nScales=length(scales); if(1), isR=1; else isR=1+nOctUp*nPerOct; end
+isR=isR:nApprox+1:nScales; isA=1:nScales; isA(isR)=[];
+j=[0 floor((isR(1:end-1)+isR(2:end))/2) nScales];
+isN=1:nScales; for i=1:length(isR), isN(j(i)+1:j(i+1))=isR(i); end
+nTypes=0; data=cell(nScales,nTypes); info=struct([]);
+
+% compute image pyramid [real scales]
+for i=isR
+  s=scales(i); sz1=round(sz*s/shrink)*shrink;
+  if(all(sz==sz1)), I1=I; else I1=imResampleMex(I,sz1(1),sz1(2),1); end
+  if(s==.5 && (nApprox>0 || nPerOct==1)), I=I1; end
+  chns=chnsCompute(I1,pChns); info=chns.info;
+  if(i==isR(1)), nTypes=chns.nTypes; data=cell(nScales,nTypes); end
+  data(i,:) = chns.data;
+end
+
+% if lambdas not specified compute image specific lambdas
+if( nScales>0 && nApprox>0 && isempty(lambdas) )
+  is=1+nOctUp*nPerOct:nApprox+1:nScales;
+  assert(length(is)>=2); if(length(is)>2), is=is(2:3); end
+  f0=zeros(1,nTypes); f1=f0; d0=data(is(1),:); d1=data(is(2),:);
+  for j=1:nTypes, d=d0{j}; f0(j)=sum(d(:))/numel(d); end
+  for j=1:nTypes, d=d1{j}; f1(j)=sum(d(:))/numel(d); end
+  lambdas = - log2(f0./f1) / log2(scales(is(1))/scales(is(2)));
+end
+
+% compute image pyramid [approximated scales]
+for i=isA
+  iR=isN(i); sz1=round(sz*scales(i)/shrink);
+  for j=1:nTypes, ratio=(scales(i)/scales(iR)).^-lambdas(j);
+    data{i,j}=imResampleMex(data{iR,j},sz1(1),sz1(2),ratio); end
+end
+
+% smooth channels, optionally pad and concatenate channels
+for i=1:nScales*nTypes, data{i}=convTri(data{i},smooth); end
+if(any(pad)), for i=1:nScales, for j=1:nTypes
+      data{i,j}=imPad(data{i,j},pad/shrink,info(j).padWith); end; end; end
+if(concat && nTypes), data0=data; data=cell(nScales,1); end
+if(concat && nTypes), for i=1:nScales, data{i}=cat(3,data0{i,:}); end; end
+
+% create output struct
+j=info; if(~isempty(j)), j=find(strcmp('color channels',{j.name})); end
+if(~isempty(j)), info(j).pChn.colorSpace=cs; end
+pyramid = struct( 'pPyramid',pPyramid, 'nTypes',nTypes, ...
+  'nScales',nScales, 'data',{data}, 'info',info, 'lambdas',lambdas, ...
+  'scales',scales, 'scaleshw',scaleshw );
+
+end
+
+function [scales,scaleshw] = getScales(nPerOct,nOctUp,minDs,shrink,sz)
+% set each scale s such that max(abs(round(sz*s/shrink)*shrink-sz*s)) is
+% minimized without changing the smaller dim of sz (tricky algebra)
+if(any(sz==0)), scales=[]; scaleshw=[]; return; end
+nScales = floor(nPerOct*(nOctUp+log2(min(sz./minDs)))+1);
+scales = 2.^(-(0:nScales-1)/nPerOct+nOctUp);
+if(sz(1)<sz(2)), d0=sz(1); d1=sz(2); else d0=sz(2); d1=sz(1); end
+for i=1:nScales, s=scales(i);
+  s0=(round(d0*s/shrink)*shrink-.25*shrink)./d0;
+  s1=(round(d0*s/shrink)*shrink+.25*shrink)./d0;
+  ss=(0:.01:1-eps)*(s1-s0)+s0;
+  es0=d0*ss; es0=abs(es0-round(es0/shrink)*shrink);
+  es1=d1*ss; es1=abs(es1-round(es1/shrink)*shrink);
+  [~,x]=min(max(es0,es1)); scales(i)=ss(x);
+end
+kp=[scales(1:end-1)~=scales(2:end) true]; scales=scales(kp);
+scaleshw = [round(sz(1)*scales/shrink)*shrink/sz(1);
+  round(sz(2)*scales/shrink)*shrink/sz(2)]';
+end
diff --git a/channels/chnsScaling.m b/channels/chnsScaling.m
new file mode 100644
index 0000000..b9cf103
--- /dev/null
+++ b/channels/chnsScaling.m
@@ -0,0 +1,112 @@
+function [lambdas,as,scales,fs] = chnsScaling( pChns, Is, show )
+% Compute lambdas for channel power law scaling.
+%
+% For a broad family of features, including gradient histograms and all
+% channel types tested, the feature responses computed at a single scale
+% can be used to approximate feature responses at nearby scales. The
+% approximation is accurate at least within an entire scale octave. For
+% details and to understand why this unexpected result holds, please see:
+%   P. Doll�r, R. Appel, S. Belongie and P. Perona
+%   "Fast Feature Pyramids for Object Detection", PAMI 2014.
+%
+% This function computes channels at multiple image scales and plots the
+% resulting power law scaling. The purpose of this function is two-fold:
+% (1) compute lambdas for fast approximate channel computation for use in
+% chnsPyramid() and (2) provide a visualization of the power law channel
+% scaling described in the BMVC2010 paper.
+%
+% chnsScaling() takes two main inputs: the parameters for computing image
+% channels (pChns), and an image or set of images (Is). The images are
+% cropped to the dimension of the smallest image for simplicity of
+% computing the lambdas (and fairly high resolution images are best). The
+% computed lambdas will depend on the channel parameters (e.g. how much
+% smoothing is performed), but given enough images (>1000) the computed
+% lambdas should not depend on the exact images used.
+%
+% USAGE
+%  [lambdas,as,scales,fs] = chnsScaling( pChns, Is, [show] )
+%
+% INPUTS
+%  pChns          - parameters for creating channels (see chnsCompute.m)
+%  Is             - [nImages x 1] cell array of images (nImages may be 1)
+%  show           - [1] figure in which to display results
+%
+% OUTPUTS
+%  lambdas        - [nTypes x 1] computed lambdas
+%  as             - [nTypes x 1] computed y-intercepts
+%  scales         - [nScales x 1] vector of actual scales used
+%  fs             - [nImages x nScales x nTypes] array of feature means
+%
+% EXAMPLE
+%  sDir = 'data/Inria/train/neg/';
+%  Is = fevalImages( @(x) {x}, {}, sDir, 'I', 'png', 0, 200 );
+%  p = chnsCompute(); lambdas = chnsScaling( p, Is, 1 );
+%
+% See also chnsCompute, chnsPyramid, fevalImages
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.25
+% Copyright 2014 Piotr Dollar & Ron Appel.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% get additional input arguments
+if(nargin<3 || isempty(show)), show=1; end
+
+% construct pPyramid (don't pad, concat or appoximate)
+pPyramid=chnsPyramid(); pPyramid.pChns=pChns; pPyramid.concat=0;
+pPyramid.pad=[0 0]; pPyramid.nApprox=0; pPyramid.smooth=0;
+pPyramid.minDs(:)=max(8,pChns.shrink*4);
+
+% crop all images to smallest image size
+ds=[inf inf]; nImages=numel(Is);
+for i=1:nImages, ds=min(ds,[size(Is{i},1) size(Is{i},2)]); end
+ds=round(ds/pChns.shrink)*pChns.shrink;
+for i=1:nImages, Is{i}=Is{i}(1:ds(1),1:ds(2),:); end
+
+% compute fs [nImages x nScales x nTypes] array of feature means
+P=chnsPyramid(Is{1},pPyramid); scales=P.scales'; info=P.info;
+nScales=P.nScales; nTypes=P.nTypes; fs=zeros(nImages,nScales,nTypes);
+parfor i=1:nImages, P=chnsPyramid(Is{i},pPyramid); for j=1:nScales
+    for k=1:nTypes, fs(i,j,k)=mean(P.data{j,k}(:)); end; end; end
+
+% remove fs with fs(:,1,:) having small values
+kp=max(fs(:,1,:)); kp=fs(:,1,:)>kp(ones(1,nImages),1,:)/50;
+kp=min(kp,[],3); fs=fs(kp,:,:); nImages=size(fs,1);
+
+% compute ratios, intercepts and lambdas using least squares
+scales1=scales(2:end); nScales=nScales-1; O=ones(nScales,1);
+rs=fs(:,2:end,:)./fs(:,O,:); mus=permute(mean(rs,1),[2 3 1]);
+out=[O -log2(scales1)]\log2(mus); as=2.^out(1,:); lambdas=out(2,:);
+if(0), lambdas=-log2(scales1)\log2(mus); as(:)=1; end
+if(show==0), return; end
+
+% compute predicted means and errors for display purposes
+musp=as(O,:).*scales1(:,ones(1,nTypes)).^-lambdas(O,:);
+errsFit=mean(abs(musp-mus)); stds=permute(std(rs,0,1),[2 3 1]);
+
+% plot results
+if(show<0), show=-show; clear=0; else clear=1; end
+figureResized(.75,show); if(clear), clf; end
+lp={'LineWidth',2}; tp={'FontSize',12};
+for k=1:nTypes
+  % plot ratios
+  subplot(2,nTypes,k); set(gca,tp{:});
+  for i=round(linspace(1,nImages,20))
+    loglog(1./scales1,rs(i,:,k),'Color',[1 1 1]*.8); hold on; end
+  h0=loglog(1./scales1,mus(:,k),'go',lp{:});
+  h1=loglog(1./scales1,musp(:,k),'b-',lp{:});
+  title(sprintf('%s\n\\lambda = %.03f,  error = %.2e',...
+    info(k).name,lambdas(k),errsFit(k)));
+  legend([h0 h1],{'real','fit'},'location','ne');
+  xlabel('log2(scale)'); ylabel('\mu (ratio)'); axis tight;
+  ax=axis; ax(1)=1; ax(3)=min(.9,ax(3)); ax(4)=max(2,ax(4)); axis(ax);
+  set(gca,'ytick',[.5 1 1.4 2 3 4],'YMinorTick','off');
+  set(gca,'xtick',2.^(-10:.5:10),'XTickLabel',10:-.5:-10);
+  % plot variances
+  subplot(2,nTypes,k+nTypes); set(gca,tp{:});
+  semilogx(1./scales1,stds(:,k),'go',lp{:}); hold on;
+  xlabel('log2(scale)'); ylabel('\sigma (ratio)'); axis tight;
+  ax=axis; ax(1)=1; ax(3)=0; ax(4)=max(.5,ax(4)); axis(ax);
+  set(gca,'xtick',2.^(-10:.5:10),'XTickLabel',10:-.5:-10);
+end
+
+end
diff --git a/channels/convBox.m b/channels/convBox.m
new file mode 100644
index 0000000..9a1dae4
--- /dev/null
+++ b/channels/convBox.m
@@ -0,0 +1,75 @@
+function J = convBox( I, r, s, nomex )
+% Extremely fast 2D image convolution with a box filter.
+%
+% Convolves an image by a F=ones(2*r+1,2*r+1)/(2*r+1)^2 filter. The
+% convolution can be performed in constant time per-pixel, independent of
+% the radius r. In fact the implementation is nearly optimal, with the
+% convolution taking only slightly more time than creating a copy of the
+% input array. Boundary effects are handled as if the image were padded
+% symmetrically prior to performing the convolution. An optional integer
+% downsampling parameter "s" can be specified, in which case the output is
+% downsampled by s (the implementation is efficient with downsampling
+% occurring simultaneously with smoothing, saving additional time).
+%
+% The output is exactly equivalent to the following Matlab operations:
+%  f = ones(1,2*r+1); f=f/sum(f);
+%  J = padarray(I,[r r],'symmetric','both');
+%  J = convn(convn(J,f,'valid'),f','valid');
+%  if(s>1), t=floor(s/2)+1; J=J(t:s:end-s+t,t:s:end-s+t,:); end
+% The computation, however, is an order of magnitude faster than the above.
+%
+% When used as a smoothing filter, the standard deviation (sigma) of a box
+% filter with radius r can be computed using [sigma=sqrt(r*(r+1)/3)]. For
+% the first few values of r this translates to: r=1: sigma=sqrt(2/3), r=2:
+% sigma=sqrt(2), r=3: sigma=2. Given sigma, the equivalent value of r can
+% be computed via [r=sqrt(12*sigma*sigma+1)/2-.5].
+%
+% The related function convTri performs convolution with a triangle filter,
+% which has nicer properties if used for smoothing, but is slightly slower.
+%
+% This code requires SSE2 to compile and run (most modern Intel and AMD
+% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2.
+%
+% USAGE
+%  J = convBox( I, r, [s], [nomex] )
+%
+% INPUTS
+%  I      - [hxwxk] input k channel single image
+%  r      - integer filter radius
+%  s      - [1] integer downsampling amount after convolving
+%  nomex  - [0] if true perform computation in matlab (for testing/timing)
+%
+% OUTPUTS
+%  J      - [hxwxk] smoothed image
+%
+% EXAMPLE
+%  I = single(imResample(imread('cameraman.tif'),[480 640]))/255;
+%  r = 5; s = 2; % set parameters as desired
+%  tic, J1=convBox(I,r,s); toc % mex version (fast)
+%  tic, J2=convBox(I,r,s,1); toc % matlab version (slow)
+%  figure(1); im(J1); figure(2); im(abs(J2-J1));
+%
+% See also conv2, convTri
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.02
+% Copyright 2014 Piotr Dollar & Ron Appel.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+assert( r>=0 );
+if( nargin<3 ), s=1; end
+if( nargin<4 ), nomex=0; end
+if( isempty(I) || (r==0 && s==1) ), J = I; return; end
+m=min(size(I,1),size(I,2)); if( m<4 || 2*r+1>=m ), nomex=1; end
+
+if( nomex==0 )
+  if( r==1 && s<=2 )
+    J = convConst('convTri1',I,1,s);
+  else
+    J = convConst('convBox',I,r,s);
+  end
+else
+  f = ones(1,2*r+1); f=f/sum(f);
+  J = padarray(I,[r r],'symmetric','both');
+  J = convn(convn(J,f,'valid'),f','valid');
+  if(s>1), t=floor(s/2)+1; J=J(t:s:end-s+t,t:s:end-s+t,:); end
+end
diff --git a/channels/convMax.m b/channels/convMax.m
new file mode 100644
index 0000000..75e194d
--- /dev/null
+++ b/channels/convMax.m
@@ -0,0 +1,61 @@
+function J = convMax( I, r, nomex )
+% Extremely fast 2D image convolution with a max filter.
+%
+% For each location computes J(y,x) = max(max(I(y-r:y+r,x-r:x+r))). The
+% filtering is constant time per-window, independent of r. First, the
+% filtering is separable, which brings the complexity down to O(r) per
+% window from O(r*r). To bring the implemention down to constant time
+% (independent of r) we use the van Herk/Gil-Werman algorithm. Ignoring
+% boundaries, just 3 max operations are need per-window regardless of r.
+%  http://www.leptonica.com/grayscale-morphology.html#FAST-IMPLEMENTATION
+%
+% The output is exactly equivalent to the following Matlab operations:
+%  I=padarray(I,[r r],'replicate','both'); [h,w,d]=size(I); J=I;
+%  for z=1:d, for x=r+1:w-r, for y=r+1:h-r
+%        J(y,x,z) = max(max(I(y-r:y+r,x-r:x+r,z))); end; end; end
+%  J=J(r+1:h-r,r+1:w-r,:);
+% The computation, however, is an order of magnitude faster than the above.
+%
+% USAGE
+%  J = convMax( I, r, [nomex] )
+%
+% INPUTS
+%  I      - [hxwxk] input k channel single image
+%  r      - integer filter radius or radii along y and x
+%  nomex  - [0] if true perform computation in matlab (for testing/timing)
+%
+% OUTPUTS
+%  J      - [hxwxk] max image
+%
+% EXAMPLE
+%  I = single(imResample(imread('cameraman.tif'),[480 640]))/255;
+%  r = 5; % set parameter as desired
+%  tic, J1=convMax(I,r); toc % mex version (fast)
+%  tic, J2=convMax(I,r,1); toc % matlab version (slow)
+%  figure(1); im(J1); figure(2); im(abs(J2-J1));
+%
+% See also conv2, convTri, convBox
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.00
+% Copyright 2014 Piotr Dollar & Ron Appel.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+assert( all(r>=0) );
+if( nargin<3 ), nomex=0; end
+if( all(r==0) ), J = I; return; end
+if( numel(r)==1 ), ry=r; rx=r; else ry=r(1); rx=r(2); end
+
+if( nomex==0 )
+  d=size(I,3);
+  if(d==1), J=convConst('convMax',convConst('convMax',I,ry,1)',rx,1)'; else
+    J=I; for z=1:d, J(:,:,z) = ...
+        convConst('convMax',convConst('convMax',J(:,:,z),ry,1)',rx,1)'; end
+  end
+else
+  I=padarray(I,[ry rx],'replicate','both'); [h,w,d]=size(I); J=I;
+  for z=1:d, for x=rx+1:w-rx, for y=ry+1:h-ry
+        J(y,x,z) = max(max(I(y-ry:y+ry,x-rx:x+rx,z))); end; end; end
+  J=J(ry+1:h-ry,rx+1:w-rx,:);
+end
+
+end
diff --git a/channels/convTri.m b/channels/convTri.m
new file mode 100644
index 0000000..a8ae11c
--- /dev/null
+++ b/channels/convTri.m
@@ -0,0 +1,95 @@
+function J = convTri( I, r, s, nomex )
+% Extremely fast 2D image convolution with a triangle filter.
+%
+% Convolves an image by a 2D triangle filter (the 1D triangle filter f is
+% [1:r r+1 r:-1:1]/(r+1)^2, the 2D version is simply conv2(f,f')). The
+% convolution can be performed in constant time per-pixel, independent of
+% the radius r. In fact the implementation is nearly optimal, with the
+% convolution taking only slightly more time than creating a copy of the
+% input array. Boundary effects are handled as if the image were padded
+% symmetrically prior to performing the convolution. An optional integer
+% downsampling parameter "s" can be specified, in which case the output is
+% downsampled by s (the implementation is efficient with downsampling
+% occurring simultaneously with smoothing, saving additional time).
+%
+% The output is exactly equivalent to the following Matlab operations:
+%  f = [1:r r+1 r:-1:1]/(r+1)^2;
+%  J = padarray(I,[r r],'symmetric','both');
+%  J = convn(convn(J,f,'valid'),f','valid');
+%  if(s>1), t=floor(s/2)+1; J=J(t:s:end-s+t,t:s:end-s+t,:); end
+% The computation, however, is an order of magnitude faster than the above.
+%
+% When used as a smoothing filter, the standard deviation (sigma) of a tri
+% filter with radius r can be computed using [sigma=sqrt(r*(r+2)/6)]. For
+% the first few values of r this translates to: r=1: sigma=1/sqrt(2), r=2:
+% sigma=sqrt(4/3), r=3: sqrt(5/2), r=4: sigma=2. Given sigma, the
+% equivalent value of r can be computed via [r=sqrt(6*sigma*sigma+1)-1].
+%
+% For even finer grained control for very small amounts of smoothing, any
+% value of r between 0 and 1 can be used (normally if r>=1 then r must be
+% an integer). In this case a filter of the form fp=[1 p 1]/(2+p) is used,
+% with p being determined automatically from r. The filter fp has a
+% standard deviation of [sigma=sqrt(2/(p+2))]. Hence p can be determined
+% from r by setting [sqrt(r*(r+2)/6)=sqrt(2/(p+2))], which gives
+% [p=12/r/(r+2)-2]. Note that r=1 gives p=2, so fp=[1 2 1]/4 which is the
+% same as the normal r=1 triangle filter. As r goes to 0, p goes to
+% infinity, and fp becomes the delta function [0 1 0]. The computation for
+% r<=1 is particularly fast.
+%
+% The related function convBox performs convolution with a box filter,
+% which is slightly faster but has worse properties if used for smoothing.
+%
+% This code requires SSE2 to compile and run (most modern Intel and AMD
+% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2.
+%
+% USAGE
+%  J = convTri( I, r, [s], [nomex] )
+%
+% INPUTS
+%  I      - [hxwxk] input k channel single image
+%  r      - integer filter radius (or any value between 0 and 1)
+%           filter standard deviation is: sigma=sqrt(r*(r+2)/6)
+%  s      - [1] integer downsampling amount after convolving
+%  nomex  - [0] if true perform computation in matlab (for testing/timing)
+%
+% OUTPUTS
+%  J      - [hxwxk] smoothed image
+%
+% EXAMPLE - matlab versus mex
+%  I = single(imResample(imread('cameraman.tif'),[480 640]))/255;
+%  r = 5; s = 2; % set parameters as desired
+%  tic, J1=convTri(I,r,s); toc % mex version (fast)
+%  tic, J2=convTri(I,r,s,1); toc % matlab version (slow)
+%  figure(1); im(J1); figure(2); im(abs(J2-J1));
+%
+% EXAMPLE - triangle versus gaussian smoothing
+%  I = single(imResample(imread('cameraman.tif'),[480 640]))/255;
+%  sigma = 4; rg = ceil(3*sigma); f = filterGauss(2*rg+1,[],sigma^2);
+%  tic, J1=conv2(conv2(imPad(I,rg,'symmetric'),f,'valid'),f','valid'); toc
+%  r=sqrt(6*sigma*sigma+1)-1; tic, J2=convTri(I,r); toc
+%  figure(1); im(J1); figure(2); im(J2); figure(3); im(abs(J2-J1));
+%
+% See also conv2, convBox, gaussSmooth
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.02
+% Copyright 2014 Piotr Dollar & Ron Appel.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+if( nargin<3 ), s=1; end
+if( nargin<4 ), nomex=0; end
+if( isempty(I) || (r==0 && s==1) ), J = I; return; end
+m=min(size(I,1),size(I,2)); if( m<4 || 2*r+1>=m ), nomex=1; end
+
+if( nomex==0 )
+  if( r>0 && r<=1 && s<=2 )
+    J = convConst('convTri1',I,12/r/(r+2)-2,s);
+  else
+    J = convConst('convTri',I,r,s);
+  end
+else
+  if(r<=1), p=12/r/(r+2)-2; f=[1 p 1]/(2+p); r=1;
+  else f=[1:r r+1 r:-1:1]/(r+1)^2; end
+  J = padarray(I,[r r],'symmetric','both');
+  J = convn(convn(J,f,'valid'),f','valid');
+  if(s>1), t=floor(s/2)+1; J=J(t:s:end-s+t,t:s:end-s+t,:); end
+end
diff --git a/channels/fhog.m b/channels/fhog.m
new file mode 100644
index 0000000..beaaefe
--- /dev/null
+++ b/channels/fhog.m
@@ -0,0 +1,70 @@
+function H = fhog( I, binSize, nOrients, clip, crop )
+% Efficiently compute Felzenszwalb's HOG (FHOG) features.
+%
+% A fast implementation of the HOG variant used by Felzenszwalb et al.
+% in their work on discriminatively trained deformable part models.
+%  http://www.cs.berkeley.edu/~rbg/latent/index.html
+% Gives nearly identical results to features.cc in code release version 5
+% but runs 4x faster (over 125 fps on VGA color images).
+%
+% The computed HOG features are 3*nOrients+5 dimensional. There are
+% 2*nOrients contrast sensitive orientation channels, nOrients contrast
+% insensitive orientation channels, 4 texture channels and 1 all zeros
+% channel (used as a 'truncation' feature). Using the standard value of
+% nOrients=9 gives a 32 dimensional feature vector at each cell. This
+% variant of HOG, refered to as FHOG, has been shown to achieve superior
+% performance to the original HOG features. For details please refer to
+% work by Felzenszwalb et al. (see link above).
+%
+% This function is essentially a wrapper for calls to gradientMag()
+% and gradientHist(). Specifically, it is equivalent to the following:
+%  [M,O] = gradientMag( I,0,0,0,1 ); softBin = -1; useHog = 2;
+%  H = gradientHist(M,O,binSize,nOrients,softBin,useHog,clip);
+% See gradientHist() for more general usage.
+%
+% This code requires SSE2 to compile and run (most modern Intel and AMD
+% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2.
+%
+% USAGE
+%  H = fhog( I, [binSize], [nOrients], [clip], [crop] )
+%
+% INPUTS
+%  I        - [hxw] color or grayscale input image (must have type single)
+%  binSize  - [8] spatial bin size
+%  nOrients - [9] number of orientation bins
+%  clip     - [.2] value at which to clip histogram bins
+%  crop     - [0] if true crop boundaries
+%
+% OUTPUTS
+%  H        - [h/binSize w/binSize nOrients*3+5] computed hog features
+%
+% EXAMPLE
+%  I=imResample(single(imread('peppers.png'))/255,[480 640]);
+%  tic, for i=1:100, H=fhog(I,8,9); end; disp(100/toc) % >125 fps
+%  figure(1); im(I); V=hogDraw(H,25,1); figure(2); im(V)
+%
+% EXAMPLE
+%  % comparison to features.cc (requires DPM code release version 5)
+%  I=imResample(single(imread('peppers.png'))/255,[480 640]); Id=double(I);
+%  tic, for i=1:100, H1=features(Id,8); end; disp(100/toc)
+%  tic, for i=1:100, H2=fhog(I,8,9,.2,1); end; disp(100/toc)
+%  figure(1); montage2(H1); figure(2); montage2(H2);
+%  D=abs(H1-H2); mean(D(:))
+%
+% See also hog, hogDraw, gradientHist
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.23
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+if( nargin<2 ), binSize=8; end
+if( nargin<3 ), nOrients=9; end
+if( nargin<4 ), clip=.2; end
+if( nargin<5 ), crop=0; end
+
+softBin = -1; useHog = 2; b = binSize;
+[M,O] = gradientMag( I,0,0,0,1 );
+H = gradientHist(M,O,binSize,nOrients,softBin,useHog,clip);
+if( crop ), e=mod(size(I),b)<b/2; H=H(2:end-e(1),2:end-e(2),:); end
+
+end
diff --git a/channels/gradient2.m b/channels/gradient2.m
new file mode 100644
index 0000000..b02a8ac
--- /dev/null
+++ b/channels/gradient2.m
@@ -0,0 +1,35 @@
+function [Gx,Gy] = gradient2( I )
+% Compute numerical gradients along x and y directions.
+%
+% For 2D arrays identical to Matlab's gradient() with a spacing value of
+% h=1 but ~10-20x faster (due to mexed implementation). Like gradient(),
+% computes centered derivatives in interior of image and uncentered
+% derivatives along boundaries. For 3D arrays computes x and y gradient
+% separately for each channel and concatenates the results.
+%
+% This code requires SSE2 to compile and run (most modern Intel and AMD
+% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2.
+%
+% USAGE
+%  [Gx,Gy] = gradient2( I )
+%
+% INPUTS
+%  I      - [hxwxk] input k channel single image
+%
+% OUTPUTS
+%  Gx     - [hxwxk] x-gradient (horizontal)
+%  Gy     - [hxwxk] y-gradient (vertical)
+%
+% EXAMPLE
+%  I=single(imread('peppers.png'))/255;
+%  tic, [Gx1,Gy1]=gradient(I,1); toc
+%  tic, [Gx2,Gy2]=gradient2(I); toc
+%  isequal(Gx1,Gx2), isequal(Gy1,Gy2)
+%
+% See also gradient, gradientMag
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.00
+% Copyright 2014 Piotr Dollar & Ron Appel.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+[Gx,Gy]=gradientMex('gradient2',I);
diff --git a/channels/gradientHist.m b/channels/gradientHist.m
new file mode 100644
index 0000000..c28b757
--- /dev/null
+++ b/channels/gradientHist.m
@@ -0,0 +1,69 @@
+function H = gradientHist( M, O, varargin )
+% Compute oriented gradient histograms.
+%
+% For each binSize x binSize region in an image I, computes a histogram of
+% gradients, with each gradient quantized by its angle and weighed by its
+% magnitude. If I has dimensions [hxw], the size of the computed feature
+% vector H is floor([h/binSize w/binSize nOrients]).
+%
+% This function implements the gradient histogram features described in:
+%   P. Doll�r, Z. Tu, P. Perona and S. Belongie
+%   "Integral Channel Features", BMVC 2009.
+% These features in turn generalize the HOG features introduced in:
+%   N. Dalal and B. Triggs, "Histograms of Oriented
+%   Gradients for Human Detection," CVPR 2005.
+% Setting parameters appropriately gives almost identical features to the
+% original HOG or updated FHOG features, see hog.m and fhog.m for details.
+%
+% The input to the function are the gradient magnitude M and orientation O
+% at each image location. See gradientMag.m for computing M and O from I.
+%
+% The first step in computing the gradient histogram is simply quantizing
+% the magnitude M into nOrients [hxw] orientation channels according to the
+% gradient orientation. The magnitude at each location is placed into the
+% two nearest orientation bins using linear interpolation if softBin >= 0
+% or simply to the nearest orientation bin if softBin < 0. Next, spatial
+% binning is performed by summing the pixels in each binSize x binSize
+% region of each [hxw] orientation channel. If "softBin" is odd each pixel
+% can contribute to multiple spatial bins (using bilinear interpolation),
+% otherwise each pixel contributes to a single spatial bin. The result of
+% these steps is a floor([h/binSize w/binSize nOrients]) feature map
+% representing the gradient histograms in each image region.
+%
+% Parameter settings of particular interest:
+%  binSize=1: simply quantize the gradient magnitude into nOrients channels
+%  softBin=1, useHog=1, clip=.2: original HOG features (see hog.m)
+%  softBin=-1; useHog=2, clip=.2: FHOG features (see fhog.m)
+%  softBin=0, useHog=0: channels used in Dollar's BMVC09 paper
+%
+% This code requires SSE2 to compile and run (most modern Intel and AMD
+% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2.
+%
+% USAGE
+%  H = gradientHist( M, O, [binSize,nOrients,softBin,useHog,clipHog,full] )
+%
+% INPUTS
+%  M        - [hxw] gradient magnitude at each location (see gradientMag.m)
+%  O        - [hxw] gradient orientation in range defined by param flag
+%  binSize  - [8] spatial bin size
+%  nOrients - [9] number of orientation bins
+%  softBin  - [1] set soft binning (odd: spatial=soft, >=0: orient=soft)
+%  useHog   - [0] 1: compute HOG (see hog.m), 2: compute FHOG (see fhog.m)
+%  clipHog  - [.2] value at which to clip hog histogram bins
+%  full     - [false] if true expects angles in [0,2*pi) else in [0,pi)
+%
+% OUTPUTS
+%  H        - [w/binSize x h/binSize x nOrients] gradient histograms
+%
+% EXAMPLE
+%  I=rgbConvert(imread('peppers.png'),'gray'); [M,O]=gradientMag(I);
+%  H1=gradientHist(M,O,2,6,0); figure(1); montage2(H1);
+%  H2=gradientHist(M,O,2,6,1); figure(2); montage2(H2);
+%
+% See also gradientMag, gradient2, hog, fhog
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.23
+% Copyright 2014 Piotr Dollar & Ron Appel.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+H = gradientMex('gradientHist',M,O,varargin{:});
diff --git a/channels/gradientMag.m b/channels/gradientMag.m
new file mode 100644
index 0000000..767a42c
--- /dev/null
+++ b/channels/gradientMag.m
@@ -0,0 +1,53 @@
+function [M,O] = gradientMag( I, channel, normRad, normConst, full )
+% Compute gradient magnitude and orientation at each image location.
+%
+% If input image has k>1 channels and channel=0, keeps gradient with
+% maximum magnitude (over all channels) at each location. Otherwise if
+% channel is between 1 and k computes gradient for the given channel.
+% If full==1 orientation is computed in [0,2*pi) else it is in [0,pi).
+%
+% If normRad>0, normalization is performed by first computing S, a smoothed
+% version of the gradient magnitude, then setting: M = M./(S + normConst).
+% S is computed by S = convTri( M, normRad ).
+%
+% This code requires SSE2 to compile and run (most modern Intel and AMD
+% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2.
+%
+% USAGE
+%  [M,O] = gradientMag( I, [channel], [normRad], [normConst], [full] )
+%
+% INPUTS
+%  I          - [hxwxk] input k channel single image
+%  channel    - [0] if>0 color channel to use for gradient computation
+%  normRad    - [0] normalization radius (no normalization if 0)
+%  normConst  - [.005] normalization constant
+%  full       - [0] if true compute angles in [0,2*pi) else in [0,pi)
+%
+% OUTPUTS
+%  M          - [hxw] gradient magnitude at each location
+%  O          - [hxw] approximate gradient orientation modulo PI
+%
+% EXAMPLE
+%  I=rgbConvert(imread('peppers.png'),'gray');
+%  [Gx,Gy]=gradient2(I); M=sqrt(Gx.^2+Gy.^2); O=atan2(Gy,Gx);
+%  full=0; [M1,O1]=gradientMag(I,0,0,0,full);
+%  D=abs(M-M1); mean2(D), if(full), o=pi*2; else o=pi; end
+%  D=abs(O-O1); D(~M)=0; D(D>o*.99)=o-D(D>o*.99); mean2(abs(D))
+%
+% See also gradient, gradient2, gradientHist, convTri
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.23
+% Copyright 2014 Piotr Dollar & Ron Appel.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+if(nargin<1 || isempty(I)), M=single([]); O=M; return; end
+if(nargin<2 || isempty(channel)), channel=0; end
+if(nargin<3 || isempty(normRad)), normRad=0; end
+if(nargin<4 || isempty(normConst)), normConst=.005; end
+if(nargin<5 || isempty(full)), full=0; end
+
+if(nargout<=1), M=gradientMex('gradientMag',I,channel,full);
+else [M,O]=gradientMex('gradientMag',I,channel,full); end
+
+if( normRad==0 ), return; end; S = convTri( M, normRad );
+gradientMex('gradientMagNorm',M,S,normConst); % operates on M
diff --git a/channels/hog.m b/channels/hog.m
new file mode 100644
index 0000000..d847165
--- /dev/null
+++ b/channels/hog.m
@@ -0,0 +1,75 @@
+function H = hog( I, binSize, nOrients, clip, crop )
+% Efficiently compute histogram of oriented gradient (HOG) features.
+%
+% Heavily optimized code to compute HOG features described in "Histograms
+% of Oriented Gradients for Human Detection" by Dalal & Triggs, CVPR05.
+% This function is made largely obsolete by fhog, see fhog.m for details.
+%
+% If I has dimensions [hxw], the size of the computed feature vector H is
+% floor([h/binSize w/binSize nOrients*4]). For each binSize x binSize
+% region, computes a histogram of gradients, with each gradient quantized
+% by its angle and weighed by its magnitude. For color images, the gradient
+% is computed separately for each color channel and the one with maximum
+% magnitude is used. The centered gradient is used except at boundaries
+% (where uncentered gradient is used). Trilinear interpolation is used to
+% place each gradient in the appropriate spatial and orientation bin.
+%
+% For each resulting histogram (with nOrients bins), four different
+% normalizations are computed using adjacent histograms, resulting in a
+% nOrients*4 length feature vector for each region. To compute the
+% normalizations, first for each block of adjacent 2x2 histograms we
+% compute their L2 norm (over all 4*nOrient bins). Each histogram (except
+% at boundaries) thus has 4 different normalization values associated with
+% it. Each histogram bin is then normalized by each of the 4 different L2
+% norms, resulting in a 4 times expansion of the number of bins. Finally,
+% any bin whose value is bigger than "clip" is set to "clip".
+%
+% The computed features are NOT identical to those described in the CVPR05
+% paper. Specifically, there is no Gaussian spatial window, and other minor
+% details differ. The choices were made for speed of the resulting code:
+% ~.008s for a 640x480x3 color image on a standard machine from 2011.
+%
+% This function is essentially a wrapper for calls to gradientMag()
+% and gradientHist(). Specifically, it is equivalent to the following:
+%  [M,O] = gradientMag( I ); softBin = 1; useHog = 1;
+%  H = gradientHist(M,O,binSize,nOrients,softBin,useHog,clip);
+% See gradientHist() for more general usage.
+%
+% This code requires SSE2 to compile and run (most modern Intel and AMD
+% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2.
+%
+% USAGE
+%  H = hog( I, [binSize], [nOrients], [clip], [crop] )
+%
+% INPUTS
+%  I        - [hxw] color or grayscale input image (must have type single)
+%  binSize  - [8] spatial bin size
+%  nOrients - [9] number of orientation bins
+%  clip     - [.2] value at which to clip histogram bins
+%  crop     - [0] if true crop boundaries
+%
+% OUTPUTS
+%  H        - [h/binSize w/binSize nOrients*4] computed hog features
+%
+% EXAMPLE
+%  I=imResample(single(imread('peppers.png')),[480 640])/255;
+%  tic, for i=1:125, H=hog(I,8,9); end; toc % ~1s for 125 iterations
+%  figure(1); im(I); V=hogDraw(H,25); figure(2); im(V)
+%
+% See also hogDraw, gradientHist
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.23
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+if( nargin<2 ), binSize=8; end
+if( nargin<3 ), nOrients=9; end
+if( nargin<4 ), clip=.2; end
+if( nargin<5 ), crop=0; end
+
+softBin = 1; useHog = 1; b = binSize;
+[M,O] = gradientMag( I );
+H = gradientHist(M,O,binSize,nOrients,softBin,useHog,clip);
+if( crop ), e=mod(size(I),b)<b/2; H=H(2:end-e(1),2:end-e(2),:); end
+
+end
diff --git a/channels/hogDraw.m b/channels/hogDraw.m
new file mode 100644
index 0000000..f8a6758
--- /dev/null
+++ b/channels/hogDraw.m
@@ -0,0 +1,41 @@
+function V = hogDraw( H, w, fhog )
+% Create visualization of hog descriptor.
+%
+% USAGE
+%  V = hogDraw( H, [w], [fhog] )
+%
+% INPUTS
+%  H          - [m n oBin*4] computed hog features
+%  w          - [15] width for each glyph
+%  fhog       - [0] if true draw features returned by fhog
+%
+% OUTPUTS
+%  V          - [m*w n*w] visualization of hog features
+%
+% EXAMPLE
+%
+% See also hog, fhog
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.23
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% fold normalizations
+if(nargin<3 || isempty(fhog)), fhog=0; end
+m=size(H,3); if(fhog), m=(m-5)/3; H=H(:,:,1:m*3); m=3; else m=4; end
+s=size(H); s(3)=s(3)/m; w0=H; H=zeros(s);
+for o=0:m-1, H=H+w0(:,:,(1:s(3))+o*s(3)); end;
+
+% construct a "glyph" for each orientaion
+if(nargin<2 || isempty(w)), w=15; end
+bar=zeros(w,w); bar(:,round(.45*w):round(.55*w))=1;
+bars=zeros([size(bar) s(3)]);
+for o=1:s(3), bars(:,:,o)=imrotate(bar,-(o-1)*180/s(3),'crop'); end
+
+% make pictures of positive weights by adding up weighted glyphs
+H(H<0)=0; V=zeros(w*s(1:2));
+for r=1:s(1), rs=(1:w)+(r-1)*w;
+  for c=1:s(2), cs=(1:w)+(c-1)*w;
+    for o=1:s(3), V(rs,cs)=V(rs,cs)+bars(:,:,o)*H(r,c,o); end
+  end
+end
diff --git a/channels/imPad.m b/channels/imPad.m
new file mode 100644
index 0000000..f856fc9
--- /dev/null
+++ b/channels/imPad.m
@@ -0,0 +1,59 @@
+function J = imPad( I, pad, type )
+% Pad an image along its four boundaries.
+%
+% Similar to Matlab's padarray, with the following differences:
+%  (1) limited to padding along height and width
+%  (2) input format allows for separate padding along each dimension
+%  (3) padding values may be negative, in which case performs *cropping*
+%  (4) optimized (speedup can be significant, esp. for small arrays)
+%
+% The amount of padding along each of the four boundaries (referred to as
+% T/B/L/R) is determined by the parameter "pad" as follows:
+%  if(numel(pad)==1): T=B=L=R=pad
+%  if(numel(pad)==2): T=B=pad(1), L=R=pad(2)
+%  if(numel(pad)==4): T=pad(1), B=pad(2), L=pad(3), R=pad(4)
+%
+% USAGE
+%  J = imPad( I, pad, type )
+%
+% INPUTS
+%  I      - [hxwxk] input image (single, double or uint8 array)
+%  pad    - pad or crop amount: 1, 2, or 4 element vector (see above)
+%  type   - pad value or 'replicate', 'symmetric', 'circular'
+%
+% OUTPUTS
+%  J      - [T+h+B x L+w+R x k] padded image
+%
+% EXAMPLE
+%  I=imread('peppers.png'); pad=[10 20]; type=50;
+%  tic, J1=imPad(I,pad,type); toc
+%  tic, J2=padarray(I,pad,type,'both'); toc
+%  figure(1); im(J1); isequal(J1,J2)
+%
+% See also padarray
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.00
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+J = imPadMex( I, pad, type );
+
+%%% OLD Matlab code - slower (although still faster than padarray)
+% [h,w,~]=size(I); p=pad; k=length(p);
+% if(k==1), p=[p p p p]; elseif(k==2), p=[p(1) p(1) p(2) p(2)]; end
+% if( length(type)==1 )
+%   J=imPad(I,p,'replicate'); v=feval(class(I),type);
+%   J(1:p(1),:,:)=v; J(end-p(2)+1:end,:,:)=v;
+%   J(:,1:p(3),:)=v; J(:,end-p(4)+1:end,:)=v;
+%   return;
+% elseif( strcmp(type,'replicate') )
+%   rs = [uint32(ones(1,p(1))) 1:h h*ones(1,p(2))];
+%   cs = [uint32(ones(1,p(3))) 1:w w*ones(1,p(4))];
+% elseif( strcmp(type,'symmetric') )
+%   rs = uint32([1:h h:-1:1]); rs=rs(mod(-p(1):h+p(2)-1,2*h)+1);
+%   cs = uint32([1:w w:-1:1]); cs=cs(mod(-p(3):w+p(4)-1,2*w)+1);
+% elseif( strcmp(type,'circular') )
+%   rs = uint32(1:h); rs=rs(mod(-p(1):h+p(2)-1,h)+1);
+%   cs = uint32(1:w); cs=cs(mod(-p(3):w+p(4)-1,w)+1);
+% end
+% J = I(rs,cs,:);
diff --git a/channels/imResample.m b/channels/imResample.m
new file mode 100644
index 0000000..d3051b5
--- /dev/null
+++ b/channels/imResample.m
@@ -0,0 +1,60 @@
+function B = imResample( A, scale, method, norm )
+% Fast bilinear image downsampling/upsampling.
+%
+% Gives similar results to imresize with the bilinear option and
+% antialiasing turned off if scale is near 1, except sometimes the final
+% dims are off by 1 pixel. For very small values of the scale imresize is
+% faster but only looks at subset of values of original image.
+%
+% This code requires SSE2 to compile and run (most modern Intel and AMD
+% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2.
+%
+% USAGE
+%  B = imResample( A, scale, [method], [norm] )
+%
+% INPUT
+%  A        - input image (2D or 3D single, double or uint8 array)
+%  scale    - scalar resize factor [s] of target height and width [h w]
+%  method   - ['bilinear'] either 'bilinear' or 'nearest'
+%  norm     - [1] optionally multiply every output pixel by norm
+%
+% OUPUT
+%   B       - resampled image
+%
+% EXAMPLE
+%  I=single(imread('cameraman.tif')); n=100; s=1/2; method='bilinear';
+%  tic, for i=1:n, I1=imresize(I,s,method,'Antialiasing',0); end; toc
+%  tic, for i=1:n, I2=imResample(I,s,method); end; toc
+%  figure(1); im(I1); figure(2); im(I2);
+%
+% See also imresize
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.24
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% figure out method and get target dimensions
+if( nargin<3 || isempty(method) ), bilinear=1; else
+  if(~all(ischar(method))), error('method must be a string'); end
+  bilinear = ~strcmpi(method,'nearest');
+end
+if( nargin<4 || isempty(norm) ), norm=1; end
+[m,n,~]=size(A); k=numel(scale);
+same = (k==1 && scale==1) | (k==2 && m==scale(1) && n==scale(2));
+if( same && norm==1 ); B=A; return; end
+
+if( bilinear )
+  % use bilinear interpolation
+  if(k==1), m1=round(scale*m); n1=round(scale*n);
+  else m1=scale(1); n1=scale(2); end
+  B=imResampleMex(A,m1,n1,norm);
+else
+  % use nearest neighbor interpolation
+  if(k==1), sy=scale; sx=sy; m1=ceil(m*sy); n1=ceil(n*sx);
+  else m1=scale(1); n1=scale(2); sy=m1/m; sx=n1/n; end
+  y=(1:m1)'; y=floor(y/sy-.5/sy+1); y=min(max(1,y),m);
+  x=(1:n1)'; x=floor(x/sx-.5/sx+1); x=min(max(1,x),n);
+  nd=ndims(A); if(nd==2), B=A(y,x); elseif(nd==3), B=A(y,x,:);
+  else ids={y,x}; ids(3:nd)={':'}; B=A(ids{:}); end
+  if(norm~=1), B=B*norm; end
+end
diff --git a/channels/private/chnsTestCpp.cpp b/channels/private/chnsTestCpp.cpp
new file mode 100644
index 0000000..b6e1674
--- /dev/null
+++ b/channels/private/chnsTestCpp.cpp
@@ -0,0 +1,58 @@
+/*******************************************************************************
+* Piotr's Computer Vision Matlab Toolbox      Version 3.00
+* Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+* Licensed under the Simplified BSD License [see external/bsd.txt]
+*******************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include "rgbConvertMex.cpp"
+#include "imPadMex.cpp"
+#include "convConst.cpp"
+#include "imResampleMex.cpp"
+#include "gradientMex.cpp"
+
+// compile and test standalone channels source code
+int main(int argc, const char* argv[])
+{
+  // initialize test array (misalign controls memory mis-alignment)
+  const int h=12, w=12, misalign=1; int x, y, d;
+  float I[h*w*3+misalign], *I0=I+misalign;
+  for( x=0; x<h*w*3; x++ ) I0[x]=0;
+  for( d=0; d<3; d++ ) I0[int(h*w/2+h/2)+d*h*w]=1;
+
+  // initialize memory for results with given misalignment
+  const int pad=2, rad=2, sf=sizeof(float); d=3;
+  const int h1=h+2*pad, w1=w+2*pad, h2=h1/2, w2=w1/2, h3=h2/4, w3=w2/4;
+  float *I1, *I2, *I3, *I4, *Gx, *Gy, *M, *O, *H, *G;
+  I1 = (float*) wrCalloc(h1*w1*d+misalign,sf) + misalign;
+  I3 = (float*) wrCalloc(h1*w1*d+misalign,sf) + misalign;
+  I4 = (float*) wrCalloc(h2*w2*d+misalign,sf) + misalign;
+  Gx = (float*) wrCalloc(h2*w2*d+misalign,sf) + misalign;
+  Gy = (float*) wrCalloc(h2*w2*d+misalign,sf) + misalign;
+  M  = (float*) wrCalloc(h2*w2*d+misalign,sf) + misalign;
+  O  = (float*) wrCalloc(h2*w2*d+misalign,sf) + misalign;
+  H  = (float*) wrCalloc(h3*w3*d*6+misalign,sf) + misalign;
+  G  = (float*) wrCalloc(h3*w3*d*24+misalign,sf) + misalign;
+
+  // perform tests of imPad, rgbConvert, convConst, resample and gradient
+  imPad(I0,I1,h,w,d,pad,pad,pad,pad,0,0.0f);
+  I2 = rgbConvert(I1,h1*w1,d,0,1.0f); d=1;
+  convTri(I2,I3,h1,w1,d,rad,1);
+  resample(I3,I4,h1,h2,w1,w2,d,1.0f);
+  grad2( I4, Gx, Gy, h2, w2, d );
+  gradMag( I4, M, O, h2, w2, d );
+  gradHist(M,O,H,h2,w2,4,6,0);
+  hog(H,G,h2,w2,4,6,.2f);
+
+  // print some test arrays
+  printf("---------------- M: ----------------\n");
+  for(y=0;y<h2;y++){ for(x=0;x<w2;x++) printf("%.4f ",M[x*h2+y]); printf("\n");}
+  printf("---------------- O: ----------------\n");
+  for(y=0;y<h2;y++){ for(x=0;x<w2;x++) printf("%.4f ",O[x*h2+y]); printf("\n");}
+
+  // free memory and return
+  wrFree(I1-misalign); wrFree(I2); wrFree(I3-misalign); wrFree(I4-misalign);
+  wrFree(Gx-misalign); wrFree(Gy-misalign); wrFree(M-misalign);
+  wrFree(O-misalign); wrFree(H-misalign); wrFree(G-misalign);
+  system("pause"); return 0;
+}
diff --git a/channels/private/convConst.cpp b/channels/private/convConst.cpp
new file mode 100644
index 0000000..fe5ba4b
--- /dev/null
+++ b/channels/private/convConst.cpp
@@ -0,0 +1,250 @@
+/*******************************************************************************
+* Piotr's Computer Vision Matlab Toolbox      Version 3.24
+* Copyright 2014 Piotr Dollar & Ron Appel.  [pdollar-at-gmail.com]
+* Licensed under the Simplified BSD License [see external/bsd.txt]
+*******************************************************************************/
+#include "wrappers.hpp"
+#include <string.h>
+#include "sse.hpp"
+
+// convolve one column of I by a 2rx1 ones filter
+void convBoxY( float *I, float *O, int h, int r, int s ) {
+  float t; int j, p=r+1, q=2*h-(r+1), h0=r+1, h1=h-r, h2=h;
+  t=0; for(j=0; j<=r; j++) t+=I[j]; t=2*t-I[r]; j=0;
+  if( s==1 ) {
+    for(; j<h0; j++) O[j]=t-=I[r-j]-I[r+j];
+    for(; j<h1; j++) O[j]=t-=I[j-p]-I[r+j];
+    for(; j<h2; j++) O[j]=t-=I[j-p]-I[q-j];
+  } else {
+    int k=(s-1)/2; h2=(h/s)*s; if(h0>h2) h0=h2; if(h1>h2) h1=h2;
+    for(; j<h0; j++) { t-=I[r-j]-I[r+j]; k++; if(k==s) { k=0; *O++=t; } }
+    for(; j<h1; j++) { t-=I[j-p]-I[r+j]; k++; if(k==s) { k=0; *O++=t; } }
+    for(; j<h2; j++) { t-=I[j-p]-I[q-j]; k++; if(k==s) { k=0; *O++=t; } }
+  }
+}
+
+// convolve I by a 2r+1 x 2r+1 ones filter (uses SSE)
+void convBox( float *I, float *O, int h, int w, int d, int r, int s ) {
+  float nrm = 1.0f/((2*r+1)*(2*r+1)); int i, j, k=(s-1)/2, h0, h1, w0;
+  if(h%4==0) h0=h1=h; else { h0=h-(h%4); h1=h0+4; } w0=(w/s)*s;
+  float *T=(float*) alMalloc(h1*sizeof(float),16);
+  while(d-- > 0) {
+    // initialize T
+    memset( T, 0, h1*sizeof(float) );
+    for(i=0; i<=r; i++) for(j=0; j<h0; j+=4) INC(T[j],LDu(I[j+i*h]));
+    for(j=0; j<h0; j+=4) STR(T[j],MUL(nrm,SUB(MUL(2,LD(T[j])),LDu(I[j+r*h]))));
+    for(i=0; i<=r; i++) for(j=h0; j<h; j++ ) T[j]+=I[j+i*h];
+    for(j=h0; j<h; j++ ) T[j]=nrm*(2*T[j]-I[j+r*h]);
+    // prepare and convolve each column in turn
+    k++; if(k==s) { k=0; convBoxY(T,O,h,r,s); O+=h/s; }
+    for( i=1; i<w0; i++ ) {
+      float *Il=I+(i-1-r)*h; if(i<=r) Il=I+(r-i)*h;
+      float *Ir=I+(i+r)*h; if(i>=w-r) Ir=I+(2*w-r-i-1)*h;
+      for(j=0; j<h0; j+=4) DEC(T[j],MUL(nrm,SUB(LDu(Il[j]),LDu(Ir[j]))));
+      for(j=h0; j<h; j++ ) T[j]-=nrm*(Il[j]-Ir[j]);
+      k++; if(k==s) { k=0; convBoxY(T,O,h,r,s); O+=h/s; }
+    }
+    I+=w*h;
+  }
+  alFree(T);
+}
+
+// convolve one column of I by a [1; 1] filter (uses SSE)
+void conv11Y( float *I, float *O, int h, int side, int s ) {
+  #define C4(m,o) ADD(LDu(I[m*j-1+o]),LDu(I[m*j+o]))
+  int j=0, k=((~((size_t) O) + 1) & 15)/4;
+  const int d = (side % 4 >= 2) ? 1 : 0, h2=(h-d)/2;
+  if( s==2 ) {
+    for( ; j<k; j++ ) O[j]=I[2*j+d]+I[2*j+d+1];
+    for( ; j<h2-4; j+=4 ) STR(O[j],_mm_shuffle_ps(C4(2,d+1),C4(2,d+5),136));
+    for( ; j<h2; j++ ) O[j]=I[2*j+d]+I[2*j+d+1];
+    if(d==1 && h%2==0) O[j]=2*I[2*j+d];
+  } else {
+    if(d==0) { O[0]=2*I[0]; j++; if(k==0) k=4; }
+    for( ; j<k; j++ ) O[j]=I[j-1+d]+I[j+d];
+    for( ; j<h-4-d; j+=4 ) STR(O[j],C4(1,d) );
+    for( ; j<h-d; j++ ) O[j]=I[j-1+d]+I[j+d];
+    if(d==1) { O[j]=2*I[j]; j++; }
+  }
+  #undef C4
+}
+
+// convolve I by a [1 1; 1 1] filter (uses SSE)
+void conv11( float *I, float *O, int h, int w, int d, int side, int s ) {
+  const float nrm = 0.25f; int i, j;
+  float *I0, *I1, *T = (float*) alMalloc(h*sizeof(float),16);
+  for( int d0=0; d0<d; d0++ ) for( i=s/2; i<w; i+=s ) {
+    I0=I1=I+i*h+d0*h*w; if(side%2) { if(i<w-1) I1+=h; } else { if(i) I0-=h; }
+    for( j=0; j<h-4; j+=4 ) STR( T[j], MUL(nrm,ADD(LDu(I0[j]),LDu(I1[j]))) );
+    for( ; j<h; j++ ) T[j]=nrm*(I0[j]+I1[j]);
+    conv11Y(T,O,h,side,s); O+=h/s;
+  }
+  alFree(T);
+}
+
+// convolve one column of I by a 2rx1 triangle filter
+void convTriY( float *I, float *O, int h, int r, int s ) {
+  r++; float t, u; int j, r0=r-1, r1=r+1, r2=2*h-r, h0=r+1, h1=h-r+1, h2=h;
+  u=t=I[0]; for( j=1; j<r; j++ ) u+=t+=I[j]; u=2*u-t; t=0;
+  if( s==1 ) {
+    O[0]=u; j=1;
+    for(; j<h0; j++) O[j] = u += t += I[r-j]  + I[r0+j] - 2*I[j-1];
+    for(; j<h1; j++) O[j] = u += t += I[j-r1] + I[r0+j] - 2*I[j-1];
+    for(; j<h2; j++) O[j] = u += t += I[j-r1] + I[r2-j] - 2*I[j-1];
+  } else {
+    int k=(s-1)/2; h2=(h/s)*s; if(h0>h2) h0=h2; if(h1>h2) h1=h2;
+    if(++k==s) { k=0; *O++=u; } j=1;
+    for(;j<h0;j++) { u+=t+=I[r-j] +I[r0+j]-2*I[j-1]; if(++k==s){ k=0; *O++=u; }}
+    for(;j<h1;j++) { u+=t+=I[j-r1]+I[r0+j]-2*I[j-1]; if(++k==s){ k=0; *O++=u; }}
+    for(;j<h2;j++) { u+=t+=I[j-r1]+I[r2-j]-2*I[j-1]; if(++k==s){ k=0; *O++=u; }}
+  }
+}
+
+// convolve I by a 2rx1 triangle filter (uses SSE)
+void convTri( float *I, float *O, int h, int w, int d, int r, int s ) {
+  r++; float nrm = 1.0f/(r*r*r*r); int i, j, k=(s-1)/2, h0, h1, w0;
+  if(h%4==0) h0=h1=h; else { h0=h-(h%4); h1=h0+4; } w0=(w/s)*s;
+  float *T=(float*) alMalloc(2*h1*sizeof(float),16), *U=T+h1;
+  while(d-- > 0) {
+    // initialize T and U
+    for(j=0; j<h0; j+=4) STR(U[j], STR(T[j], LDu(I[j])));
+    for(i=1; i<r; i++) for(j=0; j<h0; j+=4) INC(U[j],INC(T[j],LDu(I[j+i*h])));
+    for(j=0; j<h0; j+=4) STR(U[j],MUL(nrm,(SUB(MUL(2,LD(U[j])),LD(T[j])))));
+    for(j=0; j<h0; j+=4) STR(T[j],0);
+    for(j=h0; j<h; j++ ) U[j]=T[j]=I[j];
+    for(i=1; i<r; i++) for(j=h0; j<h; j++ ) U[j]+=T[j]+=I[j+i*h];
+    for(j=h0; j<h; j++ ) { U[j] = nrm * (2*U[j]-T[j]); T[j]=0; }
+    // prepare and convolve each column in turn
+    k++; if(k==s) { k=0; convTriY(U,O,h,r-1,s); O+=h/s; }
+    for( i=1; i<w0; i++ ) {
+      float *Il=I+(i-1-r)*h; if(i<=r) Il=I+(r-i)*h; float *Im=I+(i-1)*h;
+      float *Ir=I+(i-1+r)*h; if(i>w-r) Ir=I+(2*w-r-i)*h;
+      for( j=0; j<h0; j+=4 ) {
+        INC(T[j],ADD(LDu(Il[j]),LDu(Ir[j]),MUL(-2,LDu(Im[j]))));
+        INC(U[j],MUL(nrm,LD(T[j])));
+      }
+      for( j=h0; j<h; j++ ) U[j]+=nrm*(T[j]+=Il[j]+Ir[j]-2*Im[j]);
+      k++; if(k==s) { k=0; convTriY(U,O,h,r-1,s); O+=h/s; }
+    }
+    I+=w*h;
+  }
+  alFree(T);
+}
+
+// convolve one column of I by a [1 p 1] filter (uses SSE)
+void convTri1Y( float *I, float *O, int h, float p, int s ) {
+  #define C4(m,o) ADD(ADD(LDu(I[m*j-1+o]),MUL(p,LDu(I[m*j+o]))),LDu(I[m*j+1+o]))
+  int j=0, k=((~((size_t) O) + 1) & 15)/4, h2=(h-1)/2;
+  if( s==2 ) {
+    for( ; j<k; j++ ) O[j]=I[2*j]+p*I[2*j+1]+I[2*j+2];
+    for( ; j<h2-4; j+=4 ) STR(O[j],_mm_shuffle_ps(C4(2,1),C4(2,5),136));
+    for( ; j<h2; j++ ) O[j]=I[2*j]+p*I[2*j+1]+I[2*j+2];
+    if( h%2==0 ) O[j]=I[2*j]+(1+p)*I[2*j+1];
+  } else {
+    O[j]=(1+p)*I[j]+I[j+1]; j++; if(k==0) k=(h<=4) ? h-1 : 4;
+    for( ; j<k; j++ ) O[j]=I[j-1]+p*I[j]+I[j+1];
+    for( ; j<h-4; j+=4 ) STR(O[j],C4(1,0));
+    for( ; j<h-1; j++ ) O[j]=I[j-1]+p*I[j]+I[j+1];
+    O[j]=I[j-1]+(1+p)*I[j];
+  }
+  #undef C4
+}
+
+// convolve I by a [1 p 1] filter (uses SSE)
+void convTri1( float *I, float *O, int h, int w, int d, float p, int s ) {
+  const float nrm = 1.0f/((p+2)*(p+2)); int i, j, h0=h-(h%4);
+  float *Il, *Im, *Ir, *T=(float*) alMalloc(h*sizeof(float),16);
+  for( int d0=0; d0<d; d0++ ) for( i=s/2; i<w; i+=s ) {
+    Il=Im=Ir=I+i*h+d0*h*w; if(i>0) Il-=h; if(i<w-1) Ir+=h;
+    for( j=0; j<h0; j+=4 )
+      STR(T[j],MUL(nrm,ADD(ADD(LDu(Il[j]),MUL(p,LDu(Im[j]))),LDu(Ir[j]))));
+    for( j=h0; j<h; j++ ) T[j]=nrm*(Il[j]+p*Im[j]+Ir[j]);
+    convTri1Y(T,O,h,p,s); O+=h/s;
+  }
+  alFree(T);
+}
+
+// convolve one column of I by a 2rx1 max filter
+void convMaxY( float *I, float *O, float *T, int h, int r ) {
+  int y, y0, y1, yi, m=2*r+1;
+  #define max1(a,b) a>b ? a : b;
+  #define maxk(y0,y1) { O[y]=I[y0]; \
+    for( yi=y0+1; yi<=y1; yi++ ) { if(I[yi]>O[y]) O[y]=I[yi]; }}
+  for( y=0; y<r; y++ ) { y1=y+r; if(y1>h-1) y1=h-1; maxk(0,y1); }
+  for( ; y<=h-m-r; y+=m ) {
+    T[m-1] = I[y+r];
+    for( yi=1; yi<m; yi++ ) T[m-1-yi] = max1( T[m-1-yi+1], I[y+r-yi] );
+    for( yi=1; yi<m; yi++ ) T[m-1+yi] = max1( T[m-1+yi-1], I[y+r+yi] );
+    for( yi=0; yi<m; yi++ ) O[y+yi] = max1( T[yi], T[yi+m-1] );
+  }
+  for( ; y<h-r; y++ ) { maxk(y-r,y+r); }
+  for( ; y<h; y++ ) { y0=y-r; if(y0<0) y0=0; maxk(y0,h-1); }
+  #undef maxk
+  #undef max1
+}
+
+// convolve I by a 2rx1 max filter
+void convMax( float *I, float *O, int h, int w, int d, int r ) {
+  if( r>w-1 ) r=w-1; if( r>h-1 ) r=h-1; int m=2*r+1;
+  float *T=(float*) alMalloc(m*2*sizeof(float),16);
+  for( int d0=0; d0<d; d0++ ) for( int x=0; x<w; x++ ) {
+    float *Oc=O+d0*h*w+h*x, *Ic=I+d0*h*w+h*x;
+    convMaxY(Ic,Oc,T,h,r);
+  }
+  alFree(T);
+}
+
+// B=convConst(type,A,r,s); fast 2D convolutions (see convTri.m and convBox.m)
+#ifdef MATLAB_MEX_FILE
+void mexFunction( int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[] ) {
+  int *ns, ms[3], nDims, d, m, r, s; float *A, *B, p;
+  mxClassID id; char type[1024];
+
+  // error checking on arguments
+  if(nrhs!=4) mexErrMsgTxt("Four inputs required.");
+  if(nlhs > 1) mexErrMsgTxt("One output expected.");
+  nDims = mxGetNumberOfDimensions(prhs[1]);
+  id = mxGetClassID(prhs[1]);
+  ns = (int*) mxGetDimensions(prhs[1]);
+  d = (nDims == 3) ? ns[2] : 1;
+  m = (ns[0] < ns[1]) ? ns[0] : ns[1];
+  if( (nDims!=2 && nDims!=3) || id!=mxSINGLE_CLASS || m<4 )
+    mexErrMsgTxt("A must be a 4x4 or bigger 2D or 3D float array.");
+
+  // extract inputs
+  if(mxGetString(prhs[0],type,1024))
+    mexErrMsgTxt("Failed to get type.");
+  A = (float*) mxGetData(prhs[1]);
+  p = (float) mxGetScalar(prhs[2]);
+  r = (int) mxGetScalar(prhs[2]);
+  s = (int) mxGetScalar(prhs[3]);
+  if( s<1 ) mexErrMsgTxt("Invalid sampling value s");
+  if( r<0 ) mexErrMsgTxt("Invalid radius r");
+
+  // create output array (w/o initializing to 0)
+  ms[0]=ns[0]/s; ms[1]=ns[1]/s; ms[2]=d;
+  B = (float*) mxMalloc(ms[0]*ms[1]*d*sizeof(float));
+  plhs[0] = mxCreateNumericMatrix(0, 0, mxSINGLE_CLASS, mxREAL);
+  mxSetData(plhs[0], B); mxSetDimensions(plhs[0],(mwSize*)ms,nDims);
+
+  // perform appropriate type of convolution
+  if(!strcmp(type,"convBox")) {
+    if(r>=m/2) mexErrMsgTxt("mask larger than image (r too large)");
+    convBox( A, B, ns[0], ns[1], d, r, s );
+  } else if(!strcmp(type,"convTri")) {
+    if(r>=m/2) mexErrMsgTxt("mask larger than image (r too large)");
+    convTri( A, B, ns[0], ns[1], d, r, s );
+  } else if(!strcmp(type,"conv11")) {
+    if( s>2 ) mexErrMsgTxt("conv11 can sample by at most s=2");
+    conv11( A, B, ns[0], ns[1], d, r, s );
+  } else if(!strcmp(type,"convTri1")) {
+    if( s>2 ) mexErrMsgTxt("convTri1 can sample by at most s=2");
+    convTri1( A, B, ns[0], ns[1], d, p, s );
+  } else if(!strcmp(type,"convMax")) {
+    if( s>1 ) mexErrMsgTxt("convMax cannot sample");
+    convMax( A, B, ns[0], ns[1], d, r );
+  } else {
+    mexErrMsgTxt("Invalid type.");
+  }
+}
+#endif
diff --git a/channels/private/convConst.mexa64 b/channels/private/convConst.mexa64
new file mode 100644
index 0000000..c55cfa9
Binary files /dev/null and b/channels/private/convConst.mexa64 differ
diff --git a/channels/private/convConst.mexmaci64 b/channels/private/convConst.mexmaci64
new file mode 100644
index 0000000..de3ad0c
Binary files /dev/null and b/channels/private/convConst.mexmaci64 differ
diff --git a/channels/private/convConst.mexw64 b/channels/private/convConst.mexw64
new file mode 100644
index 0000000..d91a5ed
Binary files /dev/null and b/channels/private/convConst.mexw64 differ
diff --git a/channels/private/gradientMex.cpp b/channels/private/gradientMex.cpp
new file mode 100644
index 0000000..fc1349c
--- /dev/null
+++ b/channels/private/gradientMex.cpp
@@ -0,0 +1,414 @@
+/*******************************************************************************
+* Piotr's Computer Vision Matlab Toolbox      Version 3.30
+* Copyright 2014 Piotr Dollar & Ron Appel.  [pdollar-at-gmail.com]
+* Licensed under the Simplified BSD License [see external/bsd.txt]
+*******************************************************************************/
+#include "wrappers.hpp"
+#include <math.h>
+#include "string.h"
+#include "sse.hpp"
+
+#define PI 3.14159265f
+
+// compute x and y gradients for just one column (uses sse)
+void grad1( float *I, float *Gx, float *Gy, int h, int w, int x ) {
+  int y, y1; float *Ip, *In, r; __m128 *_Ip, *_In, *_G, _r;
+  // compute column of Gx
+  Ip=I-h; In=I+h; r=.5f;
+  if(x==0) { r=1; Ip+=h; } else if(x==w-1) { r=1; In-=h; }
+  if( h<4 || h%4>0 || (size_t(I)&15) || (size_t(Gx)&15) ) {
+    for( y=0; y<h; y++ ) *Gx++=(*In++-*Ip++)*r;
+  } else {
+    _G=(__m128*) Gx; _Ip=(__m128*) Ip; _In=(__m128*) In; _r = SET(r);
+    for(y=0; y<h; y+=4) *_G++=MUL(SUB(*_In++,*_Ip++),_r);
+  }
+  // compute column of Gy
+  #define GRADY(r) *Gy++=(*In++-*Ip++)*r;
+  Ip=I; In=Ip+1;
+  // GRADY(1); Ip--; for(y=1; y<h-1; y++) GRADY(.5f); In--; GRADY(1);
+  y1=((~((size_t) Gy) + 1) & 15)/4; if(y1==0) y1=4; if(y1>h-1) y1=h-1;
+  GRADY(1); Ip--; for(y=1; y<y1; y++) GRADY(.5f);
+  _r = SET(.5f); _G=(__m128*) Gy;
+  for(; y+4<h-1; y+=4, Ip+=4, In+=4, Gy+=4)
+    *_G++=MUL(SUB(LDu(*In),LDu(*Ip)),_r);
+  for(; y<h-1; y++) GRADY(.5f); In--; GRADY(1);
+  #undef GRADY
+}
+
+// compute x and y gradients at each location (uses sse)
+void grad2( float *I, float *Gx, float *Gy, int h, int w, int d ) {
+  int o, x, c, a=w*h; for(c=0; c<d; c++) for(x=0; x<w; x++) {
+    o=c*a+x*h; grad1( I+o, Gx+o, Gy+o, h, w, x );
+  }
+}
+
+// build lookup table a[] s.t. a[x*n]~=acos(x) for x in [-1,1]
+float* acosTable() {
+  const int n=10000, b=10; int i;
+  static float a[n*2+b*2]; static bool init=false;
+  float *a1=a+n+b; if( init ) return a1;
+  for( i=-n-b; i<-n; i++ )   a1[i]=PI;
+  for( i=-n; i<n; i++ )      a1[i]=float(acos(i/float(n)));
+  for( i=n; i<n+b; i++ )     a1[i]=0;
+  for( i=-n-b; i<n/10; i++ ) if( a1[i] > PI-1e-6f ) a1[i]=PI-1e-6f;
+  init=true; return a1;
+}
+
+// compute gradient magnitude and orientation at each location (uses sse)
+void gradMag( float *I, float *M, float *O, int h, int w, int d, bool full ) {
+  int x, y, y1, c, h4, s; float *Gx, *Gy, *M2; __m128 *_Gx, *_Gy, *_M2, _m;
+  float *acost = acosTable(), acMult=10000.0f;
+  // allocate memory for storing one column of output (padded so h4%4==0)
+  h4=(h%4==0) ? h : h-(h%4)+4; s=d*h4*sizeof(float);
+  M2=(float*) alMalloc(s,16); _M2=(__m128*) M2;
+  Gx=(float*) alMalloc(s,16); _Gx=(__m128*) Gx;
+  Gy=(float*) alMalloc(s,16); _Gy=(__m128*) Gy;
+  // compute gradient magnitude and orientation for each column
+  for( x=0; x<w; x++ ) {
+    // compute gradients (Gx, Gy) with maximum squared magnitude (M2)
+    for(c=0; c<d; c++) {
+      grad1( I+x*h+c*w*h, Gx+c*h4, Gy+c*h4, h, w, x );
+      for( y=0; y<h4/4; y++ ) {
+        y1=h4/4*c+y;
+        _M2[y1]=ADD(MUL(_Gx[y1],_Gx[y1]),MUL(_Gy[y1],_Gy[y1]));
+        if( c==0 ) continue; _m = CMPGT( _M2[y1], _M2[y] );
+        _M2[y] = OR( AND(_m,_M2[y1]), ANDNOT(_m,_M2[y]) );
+        _Gx[y] = OR( AND(_m,_Gx[y1]), ANDNOT(_m,_Gx[y]) );
+        _Gy[y] = OR( AND(_m,_Gy[y1]), ANDNOT(_m,_Gy[y]) );
+      }
+    }
+    // compute gradient mangitude (M) and normalize Gx
+    for( y=0; y<h4/4; y++ ) {
+      _m = MIN( RCPSQRT(_M2[y]), SET(1e10f) );
+      _M2[y] = RCP(_m);
+      if(O) _Gx[y] = MUL( MUL(_Gx[y],_m), SET(acMult) );
+      if(O) _Gx[y] = XOR( _Gx[y], AND(_Gy[y], SET(-0.f)) );
+    };
+    memcpy( M+x*h, M2, h*sizeof(float) );
+    // compute and store gradient orientation (O) via table lookup
+    if( O!=0 ) for( y=0; y<h; y++ ) O[x*h+y] = acost[(int)Gx[y]];
+    if( O!=0 && full ) {
+      y1=((~size_t(O+x*h)+1)&15)/4; y=0;
+      for( ; y<y1; y++ ) O[y+x*h]+=(Gy[y]<0)*PI;
+      for( ; y<h-4; y+=4 ) STRu( O[y+x*h],
+        ADD( LDu(O[y+x*h]), AND(CMPLT(LDu(Gy[y]),SET(0.f)),SET(PI)) ) );
+      for( ; y<h; y++ ) O[y+x*h]+=(Gy[y]<0)*PI;
+    }
+  }
+  alFree(Gx); alFree(Gy); alFree(M2);
+}
+
+// normalize gradient magnitude at each location (uses sse)
+void gradMagNorm( float *M, float *S, int h, int w, float norm ) {
+  __m128 *_M, *_S, _norm; int i=0, n=h*w, n4=n/4;
+  _S = (__m128*) S; _M = (__m128*) M; _norm = SET(norm);
+  bool sse = !(size_t(M)&15) && !(size_t(S)&15);
+  if(sse) for(; i<n4; i++) { *_M=MUL(*_M,RCP(ADD(*_S++,_norm))); _M++; }
+  if(sse) i*=4; for(; i<n; i++) M[i] /= (S[i] + norm);
+}
+
+// helper for gradHist, quantize O and M into O0, O1 and M0, M1 (uses sse)
+void gradQuantize( float *O, float *M, int *O0, int *O1, float *M0, float *M1,
+  int nb, int n, float norm, int nOrients, bool full, bool interpolate )
+{
+  // assumes all *OUTPUT* matrices are 4-byte aligned
+  int i, o0, o1; float o, od, m;
+  __m128i _o0, _o1, *_O0, *_O1; __m128 _o, _od, _m, *_M0, *_M1;
+  // define useful constants
+  const float oMult=(float)nOrients/(full?2*PI:PI); const int oMax=nOrients*nb;
+  const __m128 _norm=SET(norm), _oMult=SET(oMult), _nbf=SET((float)nb);
+  const __m128i _oMax=SET(oMax), _nb=SET(nb);
+  // perform the majority of the work with sse
+  _O0=(__m128i*) O0; _O1=(__m128i*) O1; _M0=(__m128*) M0; _M1=(__m128*) M1;
+  if( interpolate ) for( i=0; i<=n-4; i+=4 ) {
+    _o=MUL(LDu(O[i]),_oMult); _o0=CVT(_o); _od=SUB(_o,CVT(_o0));
+    _o0=CVT(MUL(CVT(_o0),_nbf)); _o0=AND(CMPGT(_oMax,_o0),_o0); *_O0++=_o0;
+    _o1=ADD(_o0,_nb); _o1=AND(CMPGT(_oMax,_o1),_o1); *_O1++=_o1;
+    _m=MUL(LDu(M[i]),_norm); *_M1=MUL(_od,_m); *_M0++=SUB(_m,*_M1); _M1++;
+  } else for( i=0; i<=n-4; i+=4 ) {
+    _o=MUL(LDu(O[i]),_oMult); _o0=CVT(ADD(_o,SET(.5f)));
+    _o0=CVT(MUL(CVT(_o0),_nbf)); _o0=AND(CMPGT(_oMax,_o0),_o0); *_O0++=_o0;
+    *_M0++=MUL(LDu(M[i]),_norm); *_M1++=SET(0.f); *_O1++=SET(0);
+  }
+  // compute trailing locations without sse
+  if( interpolate ) for(; i<n; i++ ) {
+    o=O[i]*oMult; o0=(int) o; od=o-o0;
+    o0*=nb; if(o0>=oMax) o0=0; O0[i]=o0;
+    o1=o0+nb; if(o1==oMax) o1=0; O1[i]=o1;
+    m=M[i]*norm; M1[i]=od*m; M0[i]=m-M1[i];
+  } else for(; i<n; i++ ) {
+    o=O[i]*oMult; o0=(int) (o+.5f);
+    o0*=nb; if(o0>=oMax) o0=0; O0[i]=o0;
+    M0[i]=M[i]*norm; M1[i]=0; O1[i]=0;
+  }
+}
+
+// compute nOrients gradient histograms per bin x bin block of pixels
+void gradHist( float *M, float *O, float *H, int h, int w,
+  int bin, int nOrients, int softBin, bool full )
+{
+  const int hb=h/bin, wb=w/bin, h0=hb*bin, w0=wb*bin, nb=wb*hb;
+  const float s=(float)bin, sInv=1/s, sInv2=1/s/s;
+  float *H0, *H1, *M0, *M1; int x, y; int *O0, *O1; float xb, init;
+  O0=(int*)alMalloc(h*sizeof(int),16); M0=(float*) alMalloc(h*sizeof(float),16);
+  O1=(int*)alMalloc(h*sizeof(int),16); M1=(float*) alMalloc(h*sizeof(float),16);
+  // main loop
+  for( x=0; x<w0; x++ ) {
+    // compute target orientation bins for entire column - very fast
+    gradQuantize(O+x*h,M+x*h,O0,O1,M0,M1,nb,h0,sInv2,nOrients,full,softBin>=0);
+
+    if( softBin<0 && softBin%2==0 ) {
+      // no interpolation w.r.t. either orienation or spatial bin
+      H1=H+(x/bin)*hb;
+      #define GH H1[O0[y]]+=M0[y]; y++;
+      if( bin==1 )      for(y=0; y<h0;) { GH; H1++; }
+      else if( bin==2 ) for(y=0; y<h0;) { GH; GH; H1++; }
+      else if( bin==3 ) for(y=0; y<h0;) { GH; GH; GH; H1++; }
+      else if( bin==4 ) for(y=0; y<h0;) { GH; GH; GH; GH; H1++; }
+      else for( y=0; y<h0;) { for( int y1=0; y1<bin; y1++ ) { GH; } H1++; }
+      #undef GH
+
+    } else if( softBin%2==0 || bin==1 ) {
+      // interpolate w.r.t. orientation only, not spatial bin
+      H1=H+(x/bin)*hb;
+      #define GH H1[O0[y]]+=M0[y]; H1[O1[y]]+=M1[y]; y++;
+      if( bin==1 )      for(y=0; y<h0;) { GH; H1++; }
+      else if( bin==2 ) for(y=0; y<h0;) { GH; GH; H1++; }
+      else if( bin==3 ) for(y=0; y<h0;) { GH; GH; GH; H1++; }
+      else if( bin==4 ) for(y=0; y<h0;) { GH; GH; GH; GH; H1++; }
+      else for( y=0; y<h0;) { for( int y1=0; y1<bin; y1++ ) { GH; } H1++; }
+      #undef GH
+
+    } else {
+      // interpolate using trilinear interpolation
+      float ms[4], xyd, yb, xd, yd; __m128 _m, _m0, _m1;
+      bool hasLf, hasRt; int xb0, yb0;
+      if( x==0 ) { init=(0+.5f)*sInv-0.5f; xb=init; }
+      hasLf = xb>=0; xb0 = hasLf?(int)xb:-1; hasRt = xb0 < wb-1;
+      xd=xb-xb0; xb+=sInv; yb=init; y=0;
+      // macros for code conciseness
+      #define GHinit yd=yb-yb0; yb+=sInv; H0=H+xb0*hb+yb0; xyd=xd*yd; \
+        ms[0]=1-xd-yd+xyd; ms[1]=yd-xyd; ms[2]=xd-xyd; ms[3]=xyd;
+      #define GH(H,ma,mb) H1=H; STRu(*H1,ADD(LDu(*H1),MUL(ma,mb)));
+      // leading rows, no top bin
+      for( ; y<bin/2; y++ ) {
+        yb0=-1; GHinit;
+        if(hasLf) { H0[O0[y]+1]+=ms[1]*M0[y]; H0[O1[y]+1]+=ms[1]*M1[y]; }
+        if(hasRt) { H0[O0[y]+hb+1]+=ms[3]*M0[y]; H0[O1[y]+hb+1]+=ms[3]*M1[y]; }
+      }
+      // main rows, has top and bottom bins, use SSE for minor speedup
+      if( softBin<0 ) for( ; ; y++ ) {
+        yb0 = (int) yb; if(yb0>=hb-1) break; GHinit; _m0=SET(M0[y]);
+        if(hasLf) { _m=SET(0,0,ms[1],ms[0]); GH(H0+O0[y],_m,_m0); }
+        if(hasRt) { _m=SET(0,0,ms[3],ms[2]); GH(H0+O0[y]+hb,_m,_m0); }
+      } else for( ; ; y++ ) {
+        yb0 = (int) yb; if(yb0>=hb-1) break; GHinit;
+        _m0=SET(M0[y]); _m1=SET(M1[y]);
+        if(hasLf) { _m=SET(0,0,ms[1],ms[0]);
+          GH(H0+O0[y],_m,_m0); GH(H0+O1[y],_m,_m1); }
+        if(hasRt) { _m=SET(0,0,ms[3],ms[2]);
+          GH(H0+O0[y]+hb,_m,_m0); GH(H0+O1[y]+hb,_m,_m1); }
+      }
+      // final rows, no bottom bin
+      for( ; y<h0; y++ ) {
+        yb0 = (int) yb; GHinit;
+        if(hasLf) { H0[O0[y]]+=ms[0]*M0[y]; H0[O1[y]]+=ms[0]*M1[y]; }
+        if(hasRt) { H0[O0[y]+hb]+=ms[2]*M0[y]; H0[O1[y]+hb]+=ms[2]*M1[y]; }
+      }
+      #undef GHinit
+      #undef GH
+    }
+  }
+  alFree(O0); alFree(O1); alFree(M0); alFree(M1);
+  // normalize boundary bins which only get 7/8 of weight of interior bins
+  if( softBin%2!=0 ) for( int o=0; o<nOrients; o++ ) {
+    x=0; for( y=0; y<hb; y++ ) H[o*nb+x*hb+y]*=8.f/7.f;
+    y=0; for( x=0; x<wb; x++ ) H[o*nb+x*hb+y]*=8.f/7.f;
+    x=wb-1; for( y=0; y<hb; y++ ) H[o*nb+x*hb+y]*=8.f/7.f;
+    y=hb-1; for( x=0; x<wb; x++ ) H[o*nb+x*hb+y]*=8.f/7.f;
+  }
+}
+
+/******************************************************************************/
+
+// HOG helper: compute 2x2 block normalization values (padded by 1 pixel)
+float* hogNormMatrix( float *H, int nOrients, int hb, int wb, int bin ) {
+  float *N, *N1, *n; int o, x, y, dx, dy, hb1=hb+1, wb1=wb+1;
+  float eps = 1e-4f/4/bin/bin/bin/bin; // precise backward equality
+  N = (float*) wrCalloc(hb1*wb1,sizeof(float)); N1=N+hb1+1;
+  for( o=0; o<nOrients; o++ ) for( x=0; x<wb; x++ ) for( y=0; y<hb; y++ )
+    N1[x*hb1+y] += H[o*wb*hb+x*hb+y]*H[o*wb*hb+x*hb+y];
+  for( x=0; x<wb-1; x++ ) for( y=0; y<hb-1; y++ ) {
+    n=N1+x*hb1+y; *n=1/float(sqrt(n[0]+n[1]+n[hb1]+n[hb1+1]+eps)); }
+  x=0;     dx= 1; dy= 1; y=0;                  N[x*hb1+y]=N[(x+dx)*hb1+y+dy];
+  x=0;     dx= 1; dy= 0; for(y=0; y<hb1; y++)  N[x*hb1+y]=N[(x+dx)*hb1+y+dy];
+  x=0;     dx= 1; dy=-1; y=hb1-1;              N[x*hb1+y]=N[(x+dx)*hb1+y+dy];
+  x=wb1-1; dx=-1; dy= 1; y=0;                  N[x*hb1+y]=N[(x+dx)*hb1+y+dy];
+  x=wb1-1; dx=-1; dy= 0; for( y=0; y<hb1; y++) N[x*hb1+y]=N[(x+dx)*hb1+y+dy];
+  x=wb1-1; dx=-1; dy=-1; y=hb1-1;              N[x*hb1+y]=N[(x+dx)*hb1+y+dy];
+  y=0;     dx= 0; dy= 1; for(x=0; x<wb1; x++)  N[x*hb1+y]=N[(x+dx)*hb1+y+dy];
+  y=hb1-1; dx= 0; dy=-1; for(x=0; x<wb1; x++)  N[x*hb1+y]=N[(x+dx)*hb1+y+dy];
+  return N;
+}
+
+// HOG helper: compute HOG or FHOG channels
+void hogChannels( float *H, const float *R, const float *N,
+  int hb, int wb, int nOrients, float clip, int type )
+{
+  #define GETT(blk) t=R1[y]*N1[y-(blk)]; if(t>clip) t=clip; c++;
+  const float r=.2357f; int o, x, y, c; float t;
+  const int nb=wb*hb, nbo=nOrients*nb, hb1=hb+1;
+  for( o=0; o<nOrients; o++ ) for( x=0; x<wb; x++ ) {
+    const float *R1=R+o*nb+x*hb, *N1=N+x*hb1+hb1+1;
+    float *H1 = (type<=1) ? (H+o*nb+x*hb) : (H+x*hb);
+    if( type==0) for( y=0; y<hb; y++ ) {
+      // store each orientation and normalization (nOrients*4 channels)
+      c=-1; GETT(0); H1[c*nbo+y]=t; GETT(1); H1[c*nbo+y]=t;
+      GETT(hb1); H1[c*nbo+y]=t; GETT(hb1+1); H1[c*nbo+y]=t;
+    } else if( type==1 ) for( y=0; y<hb; y++ ) {
+      // sum across all normalizations (nOrients channels)
+      c=-1; GETT(0); H1[y]+=t*.5f; GETT(1); H1[y]+=t*.5f;
+      GETT(hb1); H1[y]+=t*.5f; GETT(hb1+1); H1[y]+=t*.5f;
+    } else if( type==2 ) for( y=0; y<hb; y++ ) {
+      // sum across all orientations (4 channels)
+      c=-1; GETT(0); H1[c*nb+y]+=t*r; GETT(1); H1[c*nb+y]+=t*r;
+      GETT(hb1); H1[c*nb+y]+=t*r; GETT(hb1+1); H1[c*nb+y]+=t*r;
+    }
+  }
+  #undef GETT
+}
+
+// compute HOG features
+void hog( float *M, float *O, float *H, int h, int w, int binSize,
+  int nOrients, int softBin, bool full, float clip )
+{
+  float *N, *R; const int hb=h/binSize, wb=w/binSize, nb=hb*wb;
+  // compute unnormalized gradient histograms
+  R = (float*) wrCalloc(wb*hb*nOrients,sizeof(float));
+  gradHist( M, O, R, h, w, binSize, nOrients, softBin, full );
+  // compute block normalization values
+  N = hogNormMatrix( R, nOrients, hb, wb, binSize );
+  // perform four normalizations per spatial block
+  hogChannels( H, R, N, hb, wb, nOrients, clip, 0 );
+  wrFree(N); wrFree(R);
+}
+
+// compute FHOG features
+void fhog( float *M, float *O, float *H, int h, int w, int binSize,
+  int nOrients, int softBin, float clip )
+{
+  const int hb=h/binSize, wb=w/binSize, nb=hb*wb, nbo=nb*nOrients;
+  float *N, *R1, *R2; int o, x;
+  // compute unnormalized constrast sensitive histograms
+  R1 = (float*) wrCalloc(wb*hb*nOrients*2,sizeof(float));
+  gradHist( M, O, R1, h, w, binSize, nOrients*2, softBin, true );
+  // compute unnormalized contrast insensitive histograms
+  R2 = (float*) wrCalloc(wb*hb*nOrients,sizeof(float));
+  for( o=0; o<nOrients; o++ ) for( x=0; x<nb; x++ )
+    R2[o*nb+x] = R1[o*nb+x]+R1[(o+nOrients)*nb+x];
+  // compute block normalization values
+  N = hogNormMatrix( R2, nOrients, hb, wb, binSize );
+  // normalized histograms and texture channels
+  hogChannels( H+nbo*0, R1, N, hb, wb, nOrients*2, clip, 1 );
+  hogChannels( H+nbo*2, R2, N, hb, wb, nOrients*1, clip, 1 );
+  hogChannels( H+nbo*3, R1, N, hb, wb, nOrients*2, clip, 2 );
+  wrFree(N); mxFree(R1); wrFree(R2);
+}
+
+/******************************************************************************/
+#ifdef MATLAB_MEX_FILE
+// Create [hxwxd] mxArray array, initialize to 0 if c=true
+mxArray* mxCreateMatrix3( int h, int w, int d, mxClassID id, bool c, void **I ){
+  const int dims[3]={h,w,d}, n=h*w*d; int b; mxArray* M;
+  if( id==mxINT32_CLASS ) b=sizeof(int);
+  else if( id==mxDOUBLE_CLASS ) b=sizeof(double);
+  else if( id==mxSINGLE_CLASS ) b=sizeof(float);
+  else mexErrMsgTxt("Unknown mxClassID.");
+  *I = c ? mxCalloc(n,b) : mxMalloc(n*b);
+  M = mxCreateNumericMatrix(0,0,id,mxREAL);
+  mxSetData(M,*I); mxSetDimensions(M,dims,3); return M;
+}
+
+// Check inputs and outputs to mex, retrieve first input I
+void checkArgs( int nl, mxArray *pl[], int nr, const mxArray *pr[], int nl0,
+  int nl1, int nr0, int nr1, int *h, int *w, int *d, mxClassID id, void **I )
+{
+  const int *dims; int nDims;
+  if( nl<nl0 || nl>nl1 ) mexErrMsgTxt("Incorrect number of outputs.");
+  if( nr<nr0 || nr>nr1 ) mexErrMsgTxt("Incorrect number of inputs.");
+  nDims = mxGetNumberOfDimensions(pr[0]); dims = mxGetDimensions(pr[0]);
+  *h=dims[0]; *w=dims[1]; *d=(nDims==2) ? 1 : dims[2]; *I = mxGetPr(pr[0]);
+  if( nDims!=2 && nDims!=3 ) mexErrMsgTxt("I must be a 2D or 3D array.");
+  if( mxGetClassID(pr[0])!=id ) mexErrMsgTxt("I has incorrect type.");
+}
+
+// [Gx,Gy] = grad2(I) - see gradient2.m
+void mGrad2( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) {
+  int h, w, d; float *I, *Gx, *Gy;
+  checkArgs(nl,pl,nr,pr,1,2,1,1,&h,&w,&d,mxSINGLE_CLASS,(void**)&I);
+  if(h<2 || w<2) mexErrMsgTxt("I must be at least 2x2.");
+  pl[0]= mxCreateMatrix3( h, w, d, mxSINGLE_CLASS, 0, (void**) &Gx );
+  pl[1]= mxCreateMatrix3( h, w, d, mxSINGLE_CLASS, 0, (void**) &Gy );
+  grad2( I, Gx, Gy, h, w, d );
+}
+
+// [M,O] = gradMag( I, channel, full ) - see gradientMag.m
+void mGradMag( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) {
+  int h, w, d, c, full; float *I, *M, *O=0;
+  checkArgs(nl,pl,nr,pr,1,2,3,3,&h,&w,&d,mxSINGLE_CLASS,(void**)&I);
+  if(h<2 || w<2) mexErrMsgTxt("I must be at least 2x2.");
+  c = (int) mxGetScalar(pr[1]); full = (int) mxGetScalar(pr[2]);
+  if( c>0 && c<=d ) { I += h*w*(c-1); d=1; }
+  pl[0] = mxCreateMatrix3(h,w,1,mxSINGLE_CLASS,0,(void**)&M);
+  if(nl>=2) pl[1] = mxCreateMatrix3(h,w,1,mxSINGLE_CLASS,0,(void**)&O);
+  gradMag(I, M, O, h, w, d, full>0 );
+}
+
+// gradMagNorm( M, S, norm ) - operates on M - see gradientMag.m
+void mGradMagNorm( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) {
+  int h, w, d; float *M, *S, norm;
+  checkArgs(nl,pl,nr,pr,0,0,3,3,&h,&w,&d,mxSINGLE_CLASS,(void**)&M);
+  if( mxGetM(pr[1])!=h || mxGetN(pr[1])!=w || d!=1 ||
+    mxGetClassID(pr[1])!=mxSINGLE_CLASS ) mexErrMsgTxt("M or S is bad.");
+  S = (float*) mxGetPr(pr[1]); norm = (float) mxGetScalar(pr[2]);
+  gradMagNorm(M,S,h,w,norm);
+}
+
+// H=gradHist(M,O,[...]) - see gradientHist.m
+void mGradHist( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) {
+  int h, w, d, hb, wb, nChns, binSize, nOrients, softBin, useHog;
+  bool full; float *M, *O, *H, clipHog;
+  checkArgs(nl,pl,nr,pr,1,3,2,8,&h,&w,&d,mxSINGLE_CLASS,(void**)&M);
+  O = (float*) mxGetPr(pr[1]);
+  if( mxGetM(pr[1])!=h || mxGetN(pr[1])!=w || d!=1 ||
+    mxGetClassID(pr[1])!=mxSINGLE_CLASS ) mexErrMsgTxt("M or O is bad.");
+  binSize  = (nr>=3) ? (int)   mxGetScalar(pr[2])    : 8;
+  nOrients = (nr>=4) ? (int)   mxGetScalar(pr[3])    : 9;
+  softBin  = (nr>=5) ? (int)   mxGetScalar(pr[4])    : 1;
+  useHog   = (nr>=6) ? (int)   mxGetScalar(pr[5])    : 0;
+  clipHog  = (nr>=7) ? (float) mxGetScalar(pr[6])    : 0.2f;
+  full     = (nr>=8) ? (bool) (mxGetScalar(pr[7])>0) : false;
+  hb = h/binSize; wb = w/binSize;
+  nChns = useHog== 0 ? nOrients : (useHog==1 ? nOrients*4 : nOrients*3+5);
+  pl[0] = mxCreateMatrix3(hb,wb,nChns,mxSINGLE_CLASS,1,(void**)&H);
+  if( nOrients==0 ) return;
+  if( useHog==0 ) {
+    gradHist( M, O, H, h, w, binSize, nOrients, softBin, full );
+  } else if(useHog==1) {
+    hog( M, O, H, h, w, binSize, nOrients, softBin, full, clipHog );
+  } else {
+    fhog( M, O, H, h, w, binSize, nOrients, softBin, clipHog );
+  }
+}
+
+// inteface to various gradient functions (see corresponding Matlab functions)
+void mexFunction( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) {
+  int f; char action[1024]; f=mxGetString(pr[0],action,1024); nr--; pr++;
+  if(f) mexErrMsgTxt("Failed to get action.");
+  else if(!strcmp(action,"gradient2")) mGrad2(nl,pl,nr,pr);
+  else if(!strcmp(action,"gradientMag")) mGradMag(nl,pl,nr,pr);
+  else if(!strcmp(action,"gradientMagNorm")) mGradMagNorm(nl,pl,nr,pr);
+  else if(!strcmp(action,"gradientHist")) mGradHist(nl,pl,nr,pr);
+  else mexErrMsgTxt("Invalid action.");
+}
+#endif
diff --git a/channels/private/gradientMex.mexa64 b/channels/private/gradientMex.mexa64
new file mode 100644
index 0000000..3ca9460
Binary files /dev/null and b/channels/private/gradientMex.mexa64 differ
diff --git a/channels/private/gradientMex.mexmaci64 b/channels/private/gradientMex.mexmaci64
new file mode 100644
index 0000000..498874b
Binary files /dev/null and b/channels/private/gradientMex.mexmaci64 differ
diff --git a/channels/private/gradientMex.mexw64 b/channels/private/gradientMex.mexw64
new file mode 100644
index 0000000..8daf014
Binary files /dev/null and b/channels/private/gradientMex.mexw64 differ
diff --git a/channels/private/gradientMexNew.mexmaci64 b/channels/private/gradientMexNew.mexmaci64
new file mode 100644
index 0000000..498874b
Binary files /dev/null and b/channels/private/gradientMexNew.mexmaci64 differ
diff --git a/channels/private/imPadMex.cpp b/channels/private/imPadMex.cpp
new file mode 100644
index 0000000..a8363e3
--- /dev/null
+++ b/channels/private/imPadMex.cpp
@@ -0,0 +1,123 @@
+/*******************************************************************************
+* Piotr's Computer Vision Matlab Toolbox      Version 3.00
+* Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+* Licensed under the Simplified BSD License [see external/bsd.txt]
+*******************************************************************************/
+#include "wrappers.hpp"
+#include "string.h"
+typedef unsigned char uchar;
+
+// pad A by [pt,pb,pl,pr] and store result in B
+template<class T> void imPad( T *A, T *B, int h, int w, int d, int pt, int pb,
+  int pl, int pr, int flag, T val )
+{
+  int h1=h+pt, hb=h1+pb, w1=w+pl, wb=w1+pr, x, y, z, mPad;
+  int ct=0, cb=0, cl=0, cr=0;
+  if(pt<0) { ct=-pt; pt=0; } if(pb<0) { h1+=pb; cb=-pb; pb=0; }
+  if(pl<0) { cl=-pl; pl=0; } if(pr<0) { w1+=pr; cr=-pr; pr=0; }
+  int *xs, *ys; x=pr>pl?pr:pl; y=pt>pb?pt:pb; mPad=x>y?x:y;
+  bool useLookup = ((flag==2 || flag==3) && (mPad>h || mPad>w))
+    || (flag==3 && (ct || cb || cl || cr ));
+  // helper macro for padding
+  #define PAD(XL,XM,XR,YT,YM,YB) \
+  for(x=0;  x<pl; x++) for(y=0;  y<pt; y++) B[x*hb+y]=A[(XL+cl)*h+YT+ct]; \
+  for(x=0;  x<pl; x++) for(y=pt; y<h1; y++) B[x*hb+y]=A[(XL+cl)*h+YM+ct]; \
+  for(x=0;  x<pl; x++) for(y=h1; y<hb; y++) B[x*hb+y]=A[(XL+cl)*h+YB-cb]; \
+  for(x=pl; x<w1; x++) for(y=0;  y<pt; y++) B[x*hb+y]=A[(XM+cl)*h+YT+ct]; \
+  for(x=pl; x<w1; x++) for(y=h1; y<hb; y++) B[x*hb+y]=A[(XM+cl)*h+YB-cb]; \
+  for(x=w1; x<wb; x++) for(y=0;  y<pt; y++) B[x*hb+y]=A[(XR-cr)*h+YT+ct]; \
+  for(x=w1; x<wb; x++) for(y=pt; y<h1; y++) B[x*hb+y]=A[(XR-cr)*h+YM+ct]; \
+  for(x=w1; x<wb; x++) for(y=h1; y<hb; y++) B[x*hb+y]=A[(XR-cr)*h+YB-cb];
+  // build lookup table for xs and ys if necessary
+  if( useLookup ) {
+    xs = (int*) wrMalloc(wb*sizeof(int)); int h2=(pt+1)*2*h;
+    ys = (int*) wrMalloc(hb*sizeof(int)); int w2=(pl+1)*2*w;
+    if( flag==2 ) {
+      for(x=0; x<wb; x++) { z=(x-pl+w2)%(w*2); xs[x]=z<w ? z : w*2-z-1; }
+      for(y=0; y<hb; y++) { z=(y-pt+h2)%(h*2); ys[y]=z<h ? z : h*2-z-1; }
+    } else if( flag==3 ) {
+      for(x=0; x<wb; x++) xs[x]=(x-pl+w2)%w;
+      for(y=0; y<hb; y++) ys[y]=(y-pt+h2)%h;
+    }
+  }
+  // pad by appropriate value
+  for( z=0; z<d; z++ ) {
+    // copy over A to relevant region in B
+    for( x=0; x<w-cr-cl; x++ )
+      memcpy(B+(x+pl)*hb+pt,A+(x+cl)*h+ct,sizeof(T)*(h-ct-cb));
+    // set boundaries of B to appropriate values
+    if( flag==0 && val!=0 ) { // "constant"
+      for(x=0;  x<pl; x++) for(y=0;  y<hb; y++) B[x*hb+y]=val;
+      for(x=pl; x<w1; x++) for(y=0;  y<pt; y++) B[x*hb+y]=val;
+      for(x=pl; x<w1; x++) for(y=h1; y<hb; y++) B[x*hb+y]=val;
+      for(x=w1; x<wb; x++) for(y=0;  y<hb; y++) B[x*hb+y]=val;
+    } else if( useLookup ) { // "lookup"
+      PAD( xs[x], xs[x], xs[x], ys[y], ys[y], ys[y] );
+    } else if( flag==1 ) {  // "replicate"
+      PAD( 0, x-pl, w-1, 0, y-pt, h-1 );
+    } else if( flag==2 ) { // "symmetric"
+      PAD( pl-x-1, x-pl, w+w1-1-x, pt-y-1, y-pt, h+h1-1-y );
+    } else if( flag==3 ) { // "circular"
+      PAD( x-pl+w, x-pl, x-pl-w, y-pt+h, y-pt, y-pt-h );
+    }
+    A += h*w;  B += hb*wb;
+  }
+  if( useLookup ) { wrFree(xs); wrFree(ys); }
+  #undef PAD
+}
+
+// B = imPadMex(A,pad,type); see imPad.m for usage details
+#ifdef MATLAB_MEX_FILE
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
+  int *ns, ms[3], nCh, nDims, pt, pb, pl, pr, flag, k; double *p;
+  void *A, *B; mxClassID id; double val=0; char type[1024];
+
+  // Error checking on arguments
+  if( nrhs!=3 ) mexErrMsgTxt("Three inputs expected.");
+  if( nlhs>1 ) mexErrMsgTxt("One output expected.");
+  nDims=mxGetNumberOfDimensions(prhs[0]); id=mxGetClassID(prhs[0]);
+  ns = (int*) mxGetDimensions(prhs[0]); nCh=(nDims==2) ? 1 : ns[2];
+  if( (nDims!=2 && nDims!=3) ||
+    (id!=mxSINGLE_CLASS && id!=mxDOUBLE_CLASS && id!=mxUINT8_CLASS) )
+    mexErrMsgTxt("A should be 2D or 3D single, double or uint8 array.");
+  if( !mxIsDouble(prhs[1]) ) mexErrMsgTxt("Input pad must be a double array.");
+
+  // extract padding amounts
+  k = (int) mxGetNumberOfElements(prhs[1]);
+  p = (double*) mxGetData(prhs[1]);
+  if(k==1) { pt=pb=pl=pr=int(p[0]); }
+  else if (k==2) { pt=pb=int(p[0]); pl=pr=int(p[1]); }
+  else if (k==4) { pt=int(p[0]); pb=int(p[1]); pl=int(p[2]); pr=int(p[3]); }
+  else mexErrMsgTxt( "Input pad must have 1, 2, or 4 values.");
+
+  // figure out padding type (flag and val)
+  if( !mxGetString(prhs[2],type,1024) ) {
+    if(!strcmp(type,"replicate")) flag=1;
+    else if(!strcmp(type,"symmetric")) flag=2;
+    else if(!strcmp(type,"circular")) flag=3;
+    else mexErrMsgTxt("Invalid pad value.");
+  } else {
+    flag=0; val=(double)mxGetScalar(prhs[2]);
+  }
+  if( ns[0]==0 || ns[1]==0 ) flag=0;
+
+  // create output array
+  ms[0]=ns[0]+pt+pb; ms[1]=ns[1]+pl+pr; ms[2]=nCh;
+  if( ms[0]<0 || ns[0]<=-pt || ns[0]<=-pb ) ms[0]=0;
+  if( ms[1]<0 || ns[1]<=-pl || ns[1]<=-pr ) ms[1]=0;
+  plhs[0] = mxCreateNumericArray(3, (const mwSize*) ms, id, mxREAL);
+  if( ms[0]==0 || ms[1]==0 ) return;
+
+  // pad array
+  A=mxGetData(prhs[0]); B=mxGetData(plhs[0]);
+  if( id==mxDOUBLE_CLASS ) {
+    imPad( (double*)A,(double*)B,ns[0],ns[1],nCh,pt,pb,pl,pr,flag,val );
+  } else if( id==mxSINGLE_CLASS ) {
+    imPad( (float*)A,(float*)B,ns[0],ns[1],nCh,pt,pb,pl,pr,flag,float(val) );
+  } else if( id==mxUINT8_CLASS ) {
+    imPad( (uchar*)A,(uchar*)B,ns[0],ns[1],nCh,pt,pb,pl,pr,flag,uchar(val) );
+  } else {
+    mexErrMsgTxt("Unsupported image type.");
+  }
+}
+#endif
diff --git a/channels/private/imPadMex.mexa64 b/channels/private/imPadMex.mexa64
new file mode 100644
index 0000000..2554fc1
Binary files /dev/null and b/channels/private/imPadMex.mexa64 differ
diff --git a/channels/private/imPadMex.mexmaci64 b/channels/private/imPadMex.mexmaci64
new file mode 100644
index 0000000..af07722
Binary files /dev/null and b/channels/private/imPadMex.mexmaci64 differ
diff --git a/channels/private/imPadMex.mexw64 b/channels/private/imPadMex.mexw64
new file mode 100644
index 0000000..63dfeca
Binary files /dev/null and b/channels/private/imPadMex.mexw64 differ
diff --git a/channels/private/imResampleMex.cpp b/channels/private/imResampleMex.cpp
new file mode 100644
index 0000000..8d23eeb
--- /dev/null
+++ b/channels/private/imResampleMex.cpp
@@ -0,0 +1,170 @@
+/*******************************************************************************
+* Piotr's Computer Vision Matlab Toolbox      Version 3.00
+* Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+* Licensed under the Simplified BSD License [see external/bsd.txt]
+*******************************************************************************/
+#include "wrappers.hpp"
+#include "string.h"
+#include <math.h>
+#include <typeinfo>
+#include "sse.hpp"
+typedef unsigned char uchar;
+
+// compute interpolation values for single column for resapling
+template<class T> void resampleCoef( int ha, int hb, int &n, int *&yas,
+  int *&ybs, T *&wts, int bd[2], int pad=0 )
+{
+  const T s = T(hb)/T(ha), sInv = 1/s; T wt, wt0=T(1e-3)*s;
+  bool ds=ha>hb; int nMax; bd[0]=bd[1]=0;
+  if(ds) { n=0; nMax=ha+(pad>2 ? pad : 2)*hb; } else { n=nMax=hb; }
+  // initialize memory
+  wts = (T*)alMalloc(nMax*sizeof(T),16);
+  yas = (int*)alMalloc(nMax*sizeof(int),16);
+  ybs = (int*)alMalloc(nMax*sizeof(int),16);
+  if( ds ) for( int yb=0; yb<hb; yb++ ) {
+    // create coefficients for downsampling
+    T ya0f=yb*sInv, ya1f=ya0f+sInv, W=0;
+    int ya0=int(ceil(ya0f)), ya1=int(ya1f), n1=0;
+    for( int ya=ya0-1; ya<ya1+1; ya++ ) {
+      wt=s; if(ya==ya0-1) wt=(ya0-ya0f)*s; else if(ya==ya1) wt=(ya1f-ya1)*s;
+      if(wt>wt0 && ya>=0) { ybs[n]=yb; yas[n]=ya; wts[n]=wt; n++; n1++; W+=wt; }
+    }
+    if(W>1) for( int i=0; i<n1; i++ ) wts[n-n1+i]/=W;
+    if(n1>bd[0]) bd[0]=n1;
+    while( n1<pad ) { ybs[n]=yb; yas[n]=yas[n-1]; wts[n]=0; n++; n1++; }
+  } else for( int yb=0; yb<hb; yb++ ) {
+    // create coefficients for upsampling
+    T yaf = (T(.5)+yb)*sInv-T(.5); int ya=(int) floor(yaf);
+    wt=1; if(ya>=0 && ya<ha-1) wt=1-(yaf-ya);
+    if(ya<0) { ya=0; bd[0]++; } if(ya>=ha-1) { ya=ha-1; bd[1]++; }
+    ybs[yb]=yb; yas[yb]=ya; wts[yb]=wt;
+  }
+}
+
+// resample A using bilinear interpolation and and store result in B
+template<class T>
+void resample( T *A, T *B, int ha, int hb, int wa, int wb, int d, T r ) {
+  int hn, wn, x, x1, y, z, xa, xb, ya; T *A0, *A1, *A2, *A3, *B0, wt, wt1;
+  T *C = (T*) alMalloc((ha+4)*sizeof(T),16); for(y=ha; y<ha+4; y++) C[y]=0;
+  bool sse = (typeid(T)==typeid(float)) && !(size_t(A)&15) && !(size_t(B)&15);
+  // get coefficients for resampling along w and h
+  int *xas, *xbs, *yas, *ybs; T *xwts, *ywts; int xbd[2], ybd[2];
+  resampleCoef<T>( wa, wb, wn, xas, xbs, xwts, xbd, 0 );
+  resampleCoef<T>( ha, hb, hn, yas, ybs, ywts, ybd, 4 );
+  if( wa==2*wb ) r/=2; if( wa==3*wb ) r/=3; if( wa==4*wb ) r/=4;
+  r/=T(1+1e-6); for( y=0; y<hn; y++ ) ywts[y] *= r;
+  // resample each channel in turn
+  for( z=0; z<d; z++ ) for( x=0; x<wb; x++ ) {
+    if(x==0) x1=0; xa=xas[x1]; xb=xbs[x1]; wt=xwts[x1]; wt1=1-wt; y=0;
+    A0=A+z*ha*wa+xa*ha; A1=A0+ha, A2=A1+ha, A3=A2+ha; B0=B+z*hb*wb+xb*hb;
+    // variables for SSE (simple casts to float)
+    float *Af0, *Af1, *Af2, *Af3, *Bf0, *Cf, *ywtsf, wtf, wt1f;
+    Af0=(float*) A0; Af1=(float*) A1; Af2=(float*) A2; Af3=(float*) A3;
+    Bf0=(float*) B0; Cf=(float*) C;
+    ywtsf=(float*) ywts; wtf=(float) wt; wt1f=(float) wt1;
+    // resample along x direction (A -> C)
+    #define FORs(X) if(sse) for(; y<ha-4; y+=4) STR(Cf[y],X);
+    #define FORr(X) for(; y<ha; y++) C[y] = X;
+    if( wa==2*wb ) {
+      FORs( ADD(LDu(Af0[y]),LDu(Af1[y])) );
+      FORr( A0[y]+A1[y] ); x1+=2;
+    } else if( wa==3*wb ) {
+      FORs( ADD(LDu(Af0[y]),LDu(Af1[y]),LDu(Af2[y])) );
+      FORr( A0[y]+A1[y]+A2[y] ); x1+=3;
+    } else if( wa==4*wb ) {
+      FORs( ADD(LDu(Af0[y]),LDu(Af1[y]),LDu(Af2[y]),LDu(Af3[y])) );
+      FORr( A0[y]+A1[y]+A2[y]+A3[y] ); x1+=4;
+    } else if( wa>wb ) {
+      int m=1; while( x1+m<wn && xb==xbs[x1+m] ) m++; float wtsf[4];
+      for( int x0=0; x0<(m<4?m:4); x0++ ) wtsf[x0]=float(xwts[x1+x0]);
+      #define U(x) MUL( LDu(*(Af ## x + y)), SET(wtsf[x]) )
+      #define V(x) *(A ## x + y) * xwts[x1+x]
+      if(m==1) { FORs(U(0));                     FORr(V(0)); }
+      if(m==2) { FORs(ADD(U(0),U(1)));           FORr(V(0)+V(1)); }
+      if(m==3) { FORs(ADD(U(0),U(1),U(2)));      FORr(V(0)+V(1)+V(2)); }
+      if(m>=4) { FORs(ADD(U(0),U(1),U(2),U(3))); FORr(V(0)+V(1)+V(2)+V(3)); }
+      #undef U
+      #undef V
+      for( int x0=4; x0<m; x0++ ) {
+        A1=A0+x0*ha; wt1=xwts[x1+x0]; Af1=(float*) A1; wt1f=float(wt1); y=0;
+        FORs(ADD(LD(Cf[y]),MUL(LDu(Af1[y]),SET(wt1f)))); FORr(C[y]+A1[y]*wt1);
+      }
+      x1+=m;
+    } else {
+      bool xBd = x<xbd[0] || x>=wb-xbd[1]; x1++;
+      if(xBd) memcpy(C,A0,ha*sizeof(T));
+      if(!xBd) FORs(ADD(MUL(LDu(Af0[y]),SET(wtf)),MUL(LDu(Af1[y]),SET(wt1f))));
+      if(!xBd) FORr( A0[y]*wt + A1[y]*wt1 );
+    }
+    #undef FORs
+    #undef FORr
+    // resample along y direction (B -> C)
+    if( ha==hb*2 ) {
+      T r2 = r/2; int k=((~((size_t) B0) + 1) & 15)/4; y=0;
+      for( ; y<k; y++ )  B0[y]=(C[2*y]+C[2*y+1])*r2;
+      if(sse) for(; y<hb-4; y+=4) STR(Bf0[y],MUL((float)r2,_mm_shuffle_ps(ADD(
+        LDu(Cf[2*y]),LDu(Cf[2*y+1])),ADD(LDu(Cf[2*y+4]),LDu(Cf[2*y+5])),136)));
+      for( ; y<hb; y++ ) B0[y]=(C[2*y]+C[2*y+1])*r2;
+    } else if( ha==hb*3 ) {
+      for(y=0; y<hb; y++) B0[y]=(C[3*y]+C[3*y+1]+C[3*y+2])*(r/3);
+    } else if( ha==hb*4 ) {
+      for(y=0; y<hb; y++) B0[y]=(C[4*y]+C[4*y+1]+C[4*y+2]+C[4*y+3])*(r/4);
+    } else if( ha>hb ) {
+      y=0;
+      //if( sse && ybd[0]<=4 ) for(; y<hb; y++) // Requires SSE4
+      //  STR1(Bf0[y],_mm_dp_ps(LDu(Cf[yas[y*4]]),LDu(ywtsf[y*4]),0xF1));
+      #define U(o) C[ya+o]*ywts[y*4+o]
+      if(ybd[0]==2) for(; y<hb; y++) { ya=yas[y*4]; B0[y]=U(0)+U(1); }
+      if(ybd[0]==3) for(; y<hb; y++) { ya=yas[y*4]; B0[y]=U(0)+U(1)+U(2); }
+      if(ybd[0]==4) for(; y<hb; y++) { ya=yas[y*4]; B0[y]=U(0)+U(1)+U(2)+U(3); }
+      if(ybd[0]>4)  for(; y<hn; y++) { B0[ybs[y]] += C[yas[y]] * ywts[y]; }
+      #undef U
+    } else {
+      for(y=0; y<ybd[0]; y++) B0[y] = C[yas[y]]*ywts[y];
+      for(; y<hb-ybd[1]; y++) B0[y] = C[yas[y]]*ywts[y]+C[yas[y]+1]*(r-ywts[y]);
+      for(; y<hb; y++)        B0[y] = C[yas[y]]*ywts[y];
+    }
+  }
+  alFree(xas); alFree(xbs); alFree(xwts); alFree(C);
+  alFree(yas); alFree(ybs); alFree(ywts);
+}
+
+// B = imResampleMex(A,hb,wb,nrm); see imResample.m for usage details
+#ifdef MATLAB_MEX_FILE
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
+  int *ns, ms[3], n, m, nCh, nDims;
+  void *A, *B; mxClassID id; double nrm;
+
+  // Error checking on arguments
+  if( nrhs!=4) mexErrMsgTxt("Four inputs expected.");
+  if( nlhs>1 ) mexErrMsgTxt("One output expected.");
+  nDims=mxGetNumberOfDimensions(prhs[0]); id=mxGetClassID(prhs[0]);
+  ns = (int*) mxGetDimensions(prhs[0]); nCh=(nDims==2) ? 1 : ns[2];
+  if( (nDims!=2 && nDims!=3) ||
+    (id!=mxSINGLE_CLASS && id!=mxDOUBLE_CLASS && id!=mxUINT8_CLASS) )
+    mexErrMsgTxt("A should be 2D or 3D single, double or uint8 array.");
+  ms[0]=(int)mxGetScalar(prhs[1]); ms[1]=(int)mxGetScalar(prhs[2]); ms[2]=nCh;
+  if( ms[0]<=0 || ms[1]<=0 ) mexErrMsgTxt("downsampling factor too small.");
+  nrm=(double)mxGetScalar(prhs[3]);
+
+  // create output array
+  plhs[0] = mxCreateNumericArray(3, (const mwSize*) ms, id, mxREAL);
+  n=ns[0]*ns[1]*nCh; m=ms[0]*ms[1]*nCh;
+
+  // perform resampling (w appropriate type)
+  A=mxGetData(prhs[0]); B=mxGetData(plhs[0]);
+  if( id==mxDOUBLE_CLASS ) {
+    resample((double*)A, (double*)B, ns[0], ms[0], ns[1], ms[1], nCh, nrm);
+  } else if( id==mxSINGLE_CLASS ) {
+    resample((float*)A, (float*)B, ns[0], ms[0], ns[1], ms[1], nCh, float(nrm));
+  } else if( id==mxUINT8_CLASS ) {
+    float *A1 = (float*) mxMalloc(n*sizeof(float));
+    float *B1 = (float*) mxCalloc(m,sizeof(float));
+    for(int i=0; i<n; i++) A1[i]=(float) ((uchar*)A)[i];
+    resample(A1, B1, ns[0], ms[0], ns[1], ms[1], nCh, float(nrm));
+    for(int i=0; i<m; i++) ((uchar*)B)[i]=(uchar) (B1[i]+.5);
+  } else {
+    mexErrMsgTxt("Unsupported type.");
+  }
+}
+#endif
diff --git a/channels/private/imResampleMex.mexa64 b/channels/private/imResampleMex.mexa64
new file mode 100644
index 0000000..c950605
Binary files /dev/null and b/channels/private/imResampleMex.mexa64 differ
diff --git a/channels/private/imResampleMex.mexmaci64 b/channels/private/imResampleMex.mexmaci64
new file mode 100644
index 0000000..af19851
Binary files /dev/null and b/channels/private/imResampleMex.mexmaci64 differ
diff --git a/channels/private/imResampleMex.mexw64 b/channels/private/imResampleMex.mexw64
new file mode 100644
index 0000000..aebdfb8
Binary files /dev/null and b/channels/private/imResampleMex.mexw64 differ
diff --git a/channels/private/rgbConvertMex.cpp b/channels/private/rgbConvertMex.cpp
new file mode 100644
index 0000000..bb616da
--- /dev/null
+++ b/channels/private/rgbConvertMex.cpp
@@ -0,0 +1,211 @@
+/*******************************************************************************
+* Piotr's Computer Vision Matlab Toolbox      Version 3.22
+* Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+* Licensed under the Simplified BSD License [see external/bsd.txt]
+*******************************************************************************/
+#include "wrappers.hpp"
+#include <cmath>
+#include <typeinfo>
+#include "sse.hpp"
+
+// Constants for rgb2luv conversion and lookup table for y-> l conversion
+template<class oT> oT* rgb2luv_setup( oT z, oT *mr, oT *mg, oT *mb,
+  oT &minu, oT &minv, oT &un, oT &vn )
+{
+  // set constants for conversion
+  const oT y0=(oT) ((6.0/29)*(6.0/29)*(6.0/29));
+  const oT a= (oT) ((29.0/3)*(29.0/3)*(29.0/3));
+  un=(oT) 0.197833; vn=(oT) 0.468331;
+  mr[0]=(oT) 0.430574*z; mr[1]=(oT) 0.222015*z; mr[2]=(oT) 0.020183*z;
+  mg[0]=(oT) 0.341550*z; mg[1]=(oT) 0.706655*z; mg[2]=(oT) 0.129553*z;
+  mb[0]=(oT) 0.178325*z; mb[1]=(oT) 0.071330*z; mb[2]=(oT) 0.939180*z;
+  oT maxi=(oT) 1.0/270; minu=-88*maxi; minv=-134*maxi;
+  // build (padded) lookup table for y->l conversion assuming y in [0,1]
+  static oT lTable[1064]; static bool lInit=false;
+  if( lInit ) return lTable; oT y, l;
+  for(int i=0; i<1025; i++) {
+    y = (oT) (i/1024.0);
+    l = y>y0 ? 116*(oT)pow((double)y,1.0/3.0)-16 : y*a;
+    lTable[i] = l*maxi;
+  }
+  for(int i=1025; i<1064; i++) lTable[i]=lTable[i-1];
+  lInit = true; return lTable;
+}
+
+// Convert from rgb to luv
+template<class iT, class oT> void rgb2luv( iT *I, oT *J, int n, oT nrm ) {
+  oT minu, minv, un, vn, mr[3], mg[3], mb[3];
+  oT *lTable = rgb2luv_setup(nrm,mr,mg,mb,minu,minv,un,vn);
+  oT *L=J, *U=L+n, *V=U+n; iT *R=I, *G=R+n, *B=G+n;
+  for( int i=0; i<n; i++ ) {
+    oT r, g, b, x, y, z, l;
+    r=(oT)*R++; g=(oT)*G++; b=(oT)*B++;
+    x = mr[0]*r + mg[0]*g + mb[0]*b;
+    y = mr[1]*r + mg[1]*g + mb[1]*b;
+    z = mr[2]*r + mg[2]*g + mb[2]*b;
+    l = lTable[(int)(y*1024)];
+    *(L++) = l; z = 1/(x + 15*y + 3*z + (oT)1e-35);
+    *(U++) = l * (13*4*x*z - 13*un) - minu;
+    *(V++) = l * (13*9*y*z - 13*vn) - minv;
+  }
+}
+
+// Convert from rgb to luv using sse
+template<class iT> void rgb2luv_sse( iT *I, float *J, int n, float nrm ) {
+  const int k=256; float R[k], G[k], B[k];
+  if( (size_t(R)&15||size_t(G)&15||size_t(B)&15||size_t(I)&15||size_t(J)&15)
+    || n%4>0 ) { rgb2luv(I,J,n,nrm); return; }
+  int i=0, i1, n1; float minu, minv, un, vn, mr[3], mg[3], mb[3];
+  float *lTable = rgb2luv_setup(nrm,mr,mg,mb,minu,minv,un,vn);
+  while( i<n ) {
+    n1 = i+k; if(n1>n) n1=n; float *J1=J+i; float *R1, *G1, *B1;
+    // convert to floats (and load input into cache)
+    if( typeid(iT) != typeid(float) ) {
+      R1=R; G1=G; B1=B; iT *Ri=I+i, *Gi=Ri+n, *Bi=Gi+n;
+      for( i1=0; i1<(n1-i); i1++ ) {
+        R1[i1] = (float) *Ri++; G1[i1] = (float) *Gi++; B1[i1] = (float) *Bi++;
+      }
+    } else { R1=((float*)I)+i; G1=R1+n; B1=G1+n; }
+    // compute RGB -> XYZ
+    for( int j=0; j<3; j++ ) {
+      __m128 _mr, _mg, _mb, *_J=(__m128*) (J1+j*n);
+      __m128 *_R=(__m128*) R1, *_G=(__m128*) G1, *_B=(__m128*) B1;
+      _mr=SET(mr[j]); _mg=SET(mg[j]); _mb=SET(mb[j]);
+      for( i1=i; i1<n1; i1+=4 ) *(_J++) = ADD( ADD(MUL(*(_R++),_mr),
+        MUL(*(_G++),_mg)),MUL(*(_B++),_mb));
+    }
+    { // compute XZY -> LUV (without doing L lookup/normalization)
+      __m128 _c15, _c3, _cEps, _c52, _c117, _c1024, _cun, _cvn;
+      _c15=SET(15.0f); _c3=SET(3.0f); _cEps=SET(1e-35f);
+      _c52=SET(52.0f); _c117=SET(117.0f), _c1024=SET(1024.0f);
+      _cun=SET(13*un); _cvn=SET(13*vn);
+      __m128 *_X, *_Y, *_Z, _x, _y, _z;
+      _X=(__m128*) J1; _Y=(__m128*) (J1+n); _Z=(__m128*) (J1+2*n);
+      for( i1=i; i1<n1; i1+=4 ) {
+        _x = *_X; _y=*_Y; _z=*_Z;
+        _z = RCP(ADD(_x,ADD(_cEps,ADD(MUL(_c15,_y),MUL(_c3,_z)))));
+        *(_X++) = MUL(_c1024,_y);
+        *(_Y++) = SUB(MUL(MUL(_c52,_x),_z),_cun);
+        *(_Z++) = SUB(MUL(MUL(_c117,_y),_z),_cvn);
+      }
+    }
+    { // perform lookup for L and finalize computation of U and V
+      for( i1=i; i1<n1; i1++ ) J[i1] = lTable[(int)J[i1]];
+      __m128 *_L, *_U, *_V, _l, _cminu, _cminv;
+      _L=(__m128*) J1; _U=(__m128*) (J1+n); _V=(__m128*) (J1+2*n);
+      _cminu=SET(minu); _cminv=SET(minv);
+      for( i1=i; i1<n1; i1+=4 ) {
+        _l = *(_L++);
+        *_U = SUB(MUL(_l,*_U),_cminu); _U++;
+        *_V = SUB(MUL(_l,*_V),_cminv); _V++;
+      }
+    }
+    i = n1;
+  }
+}
+
+// Convert from rgb to hsv
+template<class iT, class oT> void rgb2hsv( iT *I, oT *J, int n, oT nrm ) {
+  oT *H=J, *S=H+n, *V=S+n;
+  iT *R=I, *G=R+n, *B=G+n;
+  for(int i=0; i<n; i++) {
+    const oT r=(oT)*(R++), g=(oT)*(G++), b=(oT)*(B++);
+    oT h, s, v, minv, maxv;
+    if( r==g && g==b ) {
+      *(H++) = 0; *(S++) = 0; *(V++) = r*nrm; continue;
+    } else if( r>=g && r>=b ) {
+      maxv = r; minv = g<b ? g : b;
+      h = (g-b)/(maxv-minv)+6; if(h>=6) h-=6;
+    } else if( g>=r && g>=b ) {
+      maxv = g; minv = r<b ? r : b;
+      h = (b-r)/(maxv-minv)+2;
+    } else {
+      maxv = b; minv = r<g ? r : g;
+      h = (r-g)/(maxv-minv)+4;
+    }
+    h*=(oT) (1/6.0); s=1-minv/maxv; v=maxv*nrm;
+    *(H++) = h; *(S++) = s; *(V++) = v;
+  }
+}
+
+// Convert from rgb to gray
+template<class iT, class oT> void rgb2gray( iT *I, oT *J, int n, oT nrm ) {
+  oT *GR=J; iT *R=I, *G=R+n, *B=G+n; int i;
+  oT mr=(oT).2989360213*nrm, mg=(oT).5870430745*nrm, mb=(oT).1140209043*nrm;
+  for(i=0; i<n; i++) *(GR++)=(oT)*(R++)*mr + (oT)*(G++)*mg + (oT)*(B++)*mb;
+}
+
+// Convert from rgb (double) to gray (float)
+template<> void rgb2gray( double *I, float *J, int n, float nrm ) {
+  float *GR=J; double *R=I, *G=R+n, *B=G+n; int i;
+  double mr=.2989360213*nrm, mg=.5870430745*nrm, mb=.1140209043*nrm;
+  for(i=0; i<n; i++) *(GR++) = (float) (*(R++)*mr + *(G++)*mg + *(B++)*mb);
+}
+
+// Copy and normalize only
+template<class iT, class oT> void normalize( iT *I, oT *J, int n, oT nrm ) {
+  for(int i=0; i<n; i++) *(J++)=(oT)*(I++)*nrm;
+}
+
+// Convert rgb to various colorspaces
+template<class iT, class oT>
+oT* rgbConvert( iT *I, int n, int d, int flag, oT nrm ) {
+  oT *J = (oT*) wrMalloc(n*(flag==0 ? (d==1?1:d/3) : d)*sizeof(oT));
+  int i, n1=d*(n<1000?n/10:100); oT thr = oT(1.001);
+  if(flag>1 && nrm==1) for(i=0; i<n1; i++) if(I[i]>thr)
+    wrError("For floats all values in I must be smaller than 1.");
+  bool useSse = n%4==0 && typeid(oT)==typeid(float);
+  if( flag==2 && useSse )
+    for(i=0; i<d/3; i++) rgb2luv_sse(I+i*n*3,(float*)(J+i*n*3),n,(float)nrm);
+  else if( (flag==0 && d==1) || flag==1 ) normalize(I,J,n*d,nrm);
+  else if( flag==0 ) for(i=0; i<d/3; i++) rgb2gray(I+i*n*3,J+i*n*1,n,nrm);
+  else if( flag==2 ) for(i=0; i<d/3; i++) rgb2luv(I+i*n*3,J+i*n*3,n,nrm);
+  else if( flag==3 ) for(i=0; i<d/3; i++) rgb2hsv(I+i*n*3,J+i*n*3,n,nrm);
+  else wrError("Unknown flag.");
+  return J;
+}
+
+// J = rgbConvertMex(I,flag,single); see rgbConvert.m for usage details
+#ifdef MATLAB_MEX_FILE
+void mexFunction(int nl, mxArray *pl[], int nr, const mxArray *pr[]) {
+  const int *dims; int nDims, n, d, dims1[3]; void *I; void *J; int flag;
+  bool single; mxClassID idIn, idOut;
+
+  // Error checking
+  if( nr!=3 ) mexErrMsgTxt("Three inputs expected.");
+  if( nl>1 ) mexErrMsgTxt("One output expected.");
+  dims = (const int*) mxGetDimensions(pr[0]); n=dims[0]*dims[1];
+  nDims = mxGetNumberOfDimensions(pr[0]);
+  d = 1; for( int i=2; i<nDims; i++ ) d*=dims[i];
+
+  // extract input arguments
+  I = mxGetPr(pr[0]);
+  flag = (int) mxGetScalar(pr[1]);
+  single = (bool) (mxGetScalar(pr[2])>0);
+  idIn = mxGetClassID(pr[0]);
+
+  // call rgbConvert() based on type of input and output array
+  if(!((d==1 && flag==0) || flag==1 || (d/3)*3==d))
+    mexErrMsgTxt("I must have third dimension d==1 or (d/3)*3==d.");
+  if( idIn == mxSINGLE_CLASS && !single )
+    J = (void*) rgbConvert( (float*) I, n, d, flag, 1.0 );
+  else if( idIn == mxSINGLE_CLASS && single )
+    J = (void*) rgbConvert( (float*) I, n, d, flag, 1.0f );
+  else if( idIn == mxDOUBLE_CLASS && !single )
+    J = (void*) rgbConvert( (double*) I, n, d, flag, 1.0 );
+  else if( idIn == mxDOUBLE_CLASS && single )
+    J = (void*) rgbConvert( (double*) I, n, d, flag, 1.0f );
+  else if( idIn == mxUINT8_CLASS && !single )
+    J = (void*) rgbConvert( (unsigned char*) I, n, d, flag, 1.0/255 );
+  else if( idIn == mxUINT8_CLASS && single )
+    J = (void*) rgbConvert( (unsigned char*) I, n, d, flag, 1.0f/255 );
+  else
+    mexErrMsgTxt("Unsupported image type.");
+
+  // create and set output array
+  dims1[0]=dims[0]; dims1[1]=dims[1]; dims1[2]=(flag==0 ? (d==1?1:d/3) : d);
+  idOut = single ? mxSINGLE_CLASS : mxDOUBLE_CLASS;
+  pl[0] = mxCreateNumericMatrix(0,0,idOut,mxREAL);
+  mxSetData(pl[0],J); mxSetDimensions(pl[0],(const mwSize*) dims1,3);
+}
+#endif
diff --git a/channels/private/rgbConvertMex.mexa64 b/channels/private/rgbConvertMex.mexa64
new file mode 100644
index 0000000..f19e6cf
Binary files /dev/null and b/channels/private/rgbConvertMex.mexa64 differ
diff --git a/channels/private/rgbConvertMex.mexmaci64 b/channels/private/rgbConvertMex.mexmaci64
new file mode 100644
index 0000000..1ce7dda
Binary files /dev/null and b/channels/private/rgbConvertMex.mexmaci64 differ
diff --git a/channels/private/rgbConvertMex.mexw64 b/channels/private/rgbConvertMex.mexw64
new file mode 100644
index 0000000..08c8833
Binary files /dev/null and b/channels/private/rgbConvertMex.mexw64 differ
diff --git a/channels/private/sse.hpp b/channels/private/sse.hpp
new file mode 100644
index 0000000..e3d60f2
--- /dev/null
+++ b/channels/private/sse.hpp
@@ -0,0 +1,62 @@
+/*******************************************************************************
+* Piotr's Computer Vision Matlab Toolbox      Version 3.23
+* Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+* Licensed under the Simplified BSD License [see external/bsd.txt]
+*******************************************************************************/
+#ifndef _SSE_HPP_
+#define _SSE_HPP_
+#include <emmintrin.h> // SSE2:<e*.h>, SSE3:<p*.h>, SSE4:<s*.h>
+
+#define RETf inline __m128
+#define RETi inline __m128i
+
+// set, load and store values
+RETf SET( const float &x ) { return _mm_set1_ps(x); }
+RETf SET( float x, float y, float z, float w ) { return _mm_set_ps(x,y,z,w); }
+RETi SET( const int &x ) { return _mm_set1_epi32(x); }
+RETf LD( const float &x ) { return _mm_load_ps(&x); }
+RETf LDu( const float &x ) { return _mm_loadu_ps(&x); }
+RETf STR( float &x, const __m128 y ) { _mm_store_ps(&x,y); return y; }
+RETf STR1( float &x, const __m128 y ) { _mm_store_ss(&x,y); return y; }
+RETf STRu( float &x, const __m128 y ) { _mm_storeu_ps(&x,y); return y; }
+RETf STR( float &x, const float y ) { return STR(x,SET(y)); }
+
+// arithmetic operators
+RETi ADD( const __m128i x, const __m128i y ) { return _mm_add_epi32(x,y); }
+RETf ADD( const __m128 x, const __m128 y ) { return _mm_add_ps(x,y); }
+RETf ADD( const __m128 x, const __m128 y, const __m128 z ) {
+  return ADD(ADD(x,y),z); }
+RETf ADD( const __m128 a, const __m128 b, const __m128 c, const __m128 &d ) {
+  return ADD(ADD(ADD(a,b),c),d); }
+RETf SUB( const __m128 x, const __m128 y ) { return _mm_sub_ps(x,y); }
+RETf MUL( const __m128 x, const __m128 y ) { return _mm_mul_ps(x,y); }
+RETf MUL( const __m128 x, const float y ) { return MUL(x,SET(y)); }
+RETf MUL( const float x, const __m128 y ) { return MUL(SET(x),y); }
+RETf INC( __m128 &x, const __m128 y ) { return x = ADD(x,y); }
+RETf INC( float &x, const __m128 y ) { __m128 t=ADD(LD(x),y); return STR(x,t); }
+RETf DEC( __m128 &x, const __m128 y ) { return x = SUB(x,y); }
+RETf DEC( float &x, const __m128 y ) { __m128 t=SUB(LD(x),y); return STR(x,t); }
+RETf MIN( const __m128 x, const __m128 y ) { return _mm_min_ps(x,y); }
+RETf RCP( const __m128 x ) { return _mm_rcp_ps(x); }
+RETf RCPSQRT( const __m128 x ) { return _mm_rsqrt_ps(x); }
+
+// logical operators
+RETf AND( const __m128 x, const __m128 y ) { return _mm_and_ps(x,y); }
+RETi AND( const __m128i x, const __m128i y ) { return _mm_and_si128(x,y); }
+RETf ANDNOT( const __m128 x, const __m128 y ) { return _mm_andnot_ps(x,y); }
+RETf OR( const __m128 x, const __m128 y ) { return _mm_or_ps(x,y); }
+RETf XOR( const __m128 x, const __m128 y ) { return _mm_xor_ps(x,y); }
+
+// comparison operators
+RETf CMPGT( const __m128 x, const __m128 y ) { return _mm_cmpgt_ps(x,y); }
+RETf CMPLT( const __m128 x, const __m128 y ) { return _mm_cmplt_ps(x,y); }
+RETi CMPGT( const __m128i x, const __m128i y ) { return _mm_cmpgt_epi32(x,y); }
+RETi CMPLT( const __m128i x, const __m128i y ) { return _mm_cmplt_epi32(x,y); }
+
+// conversion operators
+RETf CVT( const __m128i x ) { return _mm_cvtepi32_ps(x); }
+RETi CVT( const __m128 x ) { return _mm_cvttps_epi32(x); }
+
+#undef RETf
+#undef RETi
+#endif
diff --git a/channels/private/wrappers.hpp b/channels/private/wrappers.hpp
new file mode 100644
index 0000000..3009281
--- /dev/null
+++ b/channels/private/wrappers.hpp
@@ -0,0 +1,42 @@
+/*******************************************************************************
+* Piotr's Computer Vision Matlab Toolbox      Version 3.00
+* Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+* Licensed under the Simplified BSD License [see external/bsd.txt]
+*******************************************************************************/
+#ifndef _WRAPPERS_HPP_
+#define _WRAPPERS_HPP_
+#ifdef MATLAB_MEX_FILE
+
+// wrapper functions if compiling from Matlab
+#include "mex.h"
+inline void wrError(const char *errormsg) { mexErrMsgTxt(errormsg); }
+inline void* wrCalloc( size_t num, size_t size ) { return mxCalloc(num,size); }
+inline void* wrMalloc( size_t size ) { return mxMalloc(size); }
+inline void wrFree( void * ptr ) { mxFree(ptr); }
+
+#else
+
+// wrapper functions if compiling from C/C++
+inline void wrError(const char *errormsg) { throw errormsg; }
+inline void* wrCalloc( size_t num, size_t size ) { return calloc(num,size); }
+inline void* wrMalloc( size_t size ) { return malloc(size); }
+inline void wrFree( void * ptr ) { free(ptr); }
+
+#endif
+
+// platform independent aligned memory allocation (see also alFree)
+void* alMalloc( size_t size, int alignment ) {
+  const size_t pSize = sizeof(void*), a = alignment-1;
+  void *raw = wrMalloc(size + a + pSize);
+  void *aligned = (void*) (((size_t) raw + pSize + a) & ~a);
+  *(void**) ((size_t) aligned-pSize) = raw;
+  return aligned;
+}
+
+// platform independent alignned memory de-allocation (see also alMalloc)
+void alFree(void* aligned) {
+  void* raw = *(void**)((char*)aligned-sizeof(void*));
+  wrFree(raw);
+}
+
+#endif
diff --git a/channels/rgbConvert.m b/channels/rgbConvert.m
new file mode 100644
index 0000000..f08b534
--- /dev/null
+++ b/channels/rgbConvert.m
@@ -0,0 +1,80 @@
+function J = rgbConvert( I, colorSpace, useSingle )
+% Convert RGB image to other color spaces (highly optimized).
+%
+% If colorSpace=='gray' transforms I to grayscale. The output is within
+% numerical error of Matlab's rgb2gray, except ~10x faster. The output in
+% this case is hxwx1, and while the input must be hxwx3 for all other
+% cases, the input for this case can also be hxwx1 (normalization only).
+%
+% If colorSpace=='hsv' transforms I to the HSV color space. The output is
+% within numerical error of Matlab's rgb2hsv, except ~15x faster.
+%
+% If colorSpace=='rgb' or colorSpace='orig' only normalizes I to be in the
+% range [0,1]. In this case both the input and output may have an arbitrary
+% number of channels (that is I may be [hxwxd] for any d).
+%
+% If colorSpace=='luv' transforms I to the LUV color space. The LUV color
+% space is "perceptually uniform" (meaning that two colors equally distant
+% in the color space according to the Euclidean metric are equally distant
+% perceptually). The L,u,v channels correspond roughly to luminance,
+% green-red, blue-yellow. For more information see:
+%   http://en.wikipedia.org/wiki/CIELUV - using this color spaces
+%   http://en.wikipedia.org/wiki/CIELAB - more info about color spaces
+% The LUV channels are normalized to fall in ~[0,1]. Without normalization
+% the ranges are L~[0,100], u~[-88,182], and v~[-134,105] (and typically
+% u,v~[-100,100]). The applied transformation is L=L/270, u=(u+88)/270, and
+% v=(v+134)/270. This results in ranges L~[0,.37], u~[0,1], and v~[0,.89].
+% Perceptual uniformity is maintained since divisor is constant
+% (normalizing each color channel independently would break uniformity).
+% To undo the normalization on an LUV image J use:
+%   J=J*270; J(:,:,2)=J(:,:,2)-88; J(:,:,3)=J(:,:,3)-134;
+% To test the range of the colorSpace use:
+%   R=100; I=zeros(R^3,1,3); k=1; R=linspace(0,1,R);
+%   for r=R, for g=R, for b=R, I(k,1,:)=[r g b]; k=k+1; end; end; end
+%   J=rgbConvert(I,'luv'); [min(J), max(J)]
+%
+% This code requires SSE2 to compile and run (most modern Intel and AMD
+% processors support SSE2). Please see: http://en.wikipedia.org/wiki/SSE2.
+%
+% USAGE
+%  J = rgbConvert( I, colorSpace, [useSingle] );
+%
+% INPUTS
+%  I          - [hxwx3] input rgb image (uint8 or single/double in [0,1])
+%  colorSpace - ['luv'] other choices include: 'gray', 'hsv', 'rgb', 'orig'
+%  useSingle  - [true] determines output type (faster if useSingle)
+%
+% OUTPUTS
+%  J          - [hxwx3] single or double output image (normalized to [0,1])
+%
+% EXAMPLE - luv
+%  I = imread('peppers.png');
+%  tic, J = rgbConvert( I, 'luv' ); toc
+%  figure(1); montage2( J );
+%
+% EXAMPLE - hsv
+%  I=imread('peppers.png');
+%  tic, J1=rgb2hsv( I ); toc
+%  tic, J2=rgbConvert( I, 'hsv' ); toc
+%  mean2(abs(J1-J2))
+%
+% EXAMPLE - gray
+%  I=imread('peppers.png');
+%  tic, J1=rgb2gray( I ); toc
+%  tic, J2=rgbConvert( I, 'gray' ); toc
+%  J1=single(J1)/255; mean2(abs(J1-J2))
+%
+% See also rgb2hsv, rgb2gray
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.02
+% Copyright 2014 Piotr Dollar & Ron Appel.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+if(nargin<3 || isempty(useSingle)), useSingle=true; end
+flag = find(strcmpi(colorSpace,{'gray','rgb','luv','hsv','orig'}))-1;
+if(isempty(flag)), error('unknown colorSpace: %s',colorSpace); end
+if(useSingle), outClass='single'; else outClass='double'; end
+if(isempty(I) && flag>0 && flag~=4), I=I(:,:,[1 1 1]); end
+d=size(I,3); if(flag==4), flag=1; end; norm=(d==1 && flag==0) || flag==1;
+if( norm && isa(I,outClass) ), J=I; return; end
+J=rgbConvertMex(I,flag,useSingle);
diff --git a/classify/Contents.m b/classify/Contents.m
new file mode 100644
index 0000000..5be11df
--- /dev/null
+++ b/classify/Contents.m
@@ -0,0 +1,46 @@
+% CLASSIFY
+% See also
+%
+% Clustering:
+%   demoCluster        - Clustering demo.
+%   demoGenData        - Generate data drawn form a mixture of Gaussians.
+%   kmeans2            - Fast version of kmeans clustering.
+%   meanShift          - meanShift clustering algorithm.
+%   meanShiftIm        - Applies the meanShift algorithm to a joint spatial/range image.
+%   meanShiftImExplore - Visualization to help choose sigmas for meanShiftIm.
+%
+% Calculating distances efficiently:
+%   distMatrixShow     - Useful visualization of a distance matrix of clustered points.
+%   pdist2             - Calculates the distance between sets of vectors.
+%   softMin            - Calculates the softMin of a vector.
+%
+% Principal components analysis:
+%   pca                - Principal components analysis (alternative to princomp).
+%   pcaApply           - Companion function to pca.
+%   pcaRandVec         - Generate random vectors in PCA subspace.
+%   pcaVisualize       - Visualization of quality of approximation of X given principal comp.
+%   visualizeData      - Project high dim. data unto principal components (PCA) for visualization.
+%
+% Confusion matrix display:
+%   confMatrix         - Generates a confusion matrix according to true and predicted data labels.
+%   confMatrixShow     - Used to display a confusion matrix.
+%
+% Radial Basis Functions (RBFs):
+%   rbfComputeBasis    - Get locations and sizes of radial basis functions for use in rbf network.
+%   rbfComputeFtrs     - Evaluate features of X given a set of radial basis functions.
+%   rbfDemo            - Demonstration of rbf networks for regression.
+%
+% Fast random fern/forest classification/regression code:
+%   fernsClfApply      - Apply learned fern classifier.
+%   fernsClfTrain      - Train random fern classifier.
+%   fernsInds          - Compute indices for each input by each fern.
+%   fernsRegApply      - Apply learned fern regressor.
+%   fernsRegTrain      - Train boosted fern regressor.
+%   forestApply        - Apply learned forest classifier.
+%   forestTrain        - Train random forest classifier.
+%
+% Fast boosted decision tree code:
+%   adaBoostTrain      - Train boosted decision tree classifier.
+%   adaBoostApply      - Apply learned boosted decision tree classifier.
+%   binaryTreeTrain    - Train binary decision tree classifier.
+%   binaryTreeApply    - Apply learned binary decision tree classifier.
diff --git a/classify/adaBoostApply.m b/classify/adaBoostApply.m
new file mode 100644
index 0000000..691070e
--- /dev/null
+++ b/classify/adaBoostApply.m
@@ -0,0 +1,36 @@
+function hs = adaBoostApply( X, model, maxDepth, minWeight, nThreads )
+% Apply learned boosted decision tree classifier.
+%
+% USAGE
+%  hs = adaBoostApply( X, model, [maxDepth], [minWeight], [nThreads] )
+%
+% INPUTS
+%  X          - [NxF] N length F feature vectors
+%  model      - learned boosted tree classifier
+%  maxDepth   - [] maximum depth of tree
+%  minWeight  - [] minimum sample weigth to allow split
+%  nThreads   - [16] max number of computational threads to use
+%
+% OUTPUTS
+%  hs         - [Nx1] predicted output log ratios
+%
+% EXAMPLE
+%
+% See also adaBoostTrain
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.40
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+if(nargin<3 || isempty(maxDepth)), maxDepth=0; end
+if(nargin<4 || isempty(minWeight)), minWeight=0; end
+if(nargin<5 || isempty(nThreads)), nThreads=16; end
+if(maxDepth>0), model.child(model.depth>=maxDepth) = 0; end
+if(minWeight>0), model.child(model.weights<=minWeight) = 0; end
+nWeak=size(model.fids,2); N=size(X,1); hs=zeros(N,1); nt=nThreads;
+for i=1:nWeak
+  ids = forestInds(X,model.thrs(:,i),model.fids(:,i),model.child(:,i),nt);
+  hs = hs + model.hs(ids,i);
+end
+
+end
diff --git a/classify/adaBoostTrain.m b/classify/adaBoostTrain.m
new file mode 100644
index 0000000..4acb0e7
--- /dev/null
+++ b/classify/adaBoostTrain.m
@@ -0,0 +1,111 @@
+function model = adaBoostTrain( X0, X1, varargin )
+% Train boosted decision tree classifier.
+%
+% Heavily optimized code for training Discrete or Real AdaBoost where the
+% weak classifiers are decision trees. With multi-core support enabled (see
+% binaryTreeTrain.m), boosting 256 depth-2 trees over 5,000 features and
+% 5,000 data points takes under 5 seconds, see example below. Most of the
+% training time is spent in binaryTreeTrain.m.
+%
+% For more information on how to quickly boost decision trees see:
+%   [1] R. Appel, T. Fuchs, P. Doll�r, P. Perona; "Quickly Boosting
+%   Decision Trees � Pruning Underachieving Features Early," ICML 2013.
+% The code here implements a simple brute-force strategy with the option to
+% sample features used for training each node for additional speedups.
+% Further gains using the ideas from the ICML paper are possible. If you
+% use this code please consider citing our ICML paper.
+%
+% USAGE
+%  model = adaBoostTrain( X0, X1, [pBoost] )
+%
+% INPUTS
+%  X0         - [N0xF] negative feature vectors
+%  X1         - [N1xF] positive feature vectors
+%  pBoost     - additional params (struct or name/value pairs)
+%   .pTree      - ['REQ'] parameters for binaryTreeTrain
+%   .nWeak      - [128] number of trees to learn
+%   .discrete   - [1] train Discrete-AdaBoost or Real-AdaBoost
+%   .verbose    - [0] if true print status information
+%
+% OUTPUTS
+%  model      - learned boosted tree classifier w the following fields
+%   .fids       - [K x nWeak] feature ids for each node
+%   .thrs       - [K x nWeak] threshold corresponding to each fid
+%   .child      - [K x nWeak] index of child for each node (1-indexed)
+%   .hs         - [K x nWeak] log ratio (.5*log(p/(1-p)) at each node
+%   .weights    - [K x nWeak] total sample weight at each node
+%   .depth      - [K x nWeak] depth of each node
+%   .errs       - [1 x nWeak] error for each tree (for debugging)
+%   .losses     - [1 x nWeak] loss after every iteration (for debugging)
+%   .treeDepth  - depth of all leaf nodes (or 0 if leaf depth varies)
+%
+% EXAMPLE
+%  % output should be: 'Testing err=0.0145 fp=0.0165 fn=0.0125'
+%  N=5000; F=5000; sep=.01; RandStream.getGlobalStream.reset();
+%  [xTrn,hTrn,xTst,hTst]=demoGenData(N,N,2,F/10,sep,.5,0);
+%  xTrn=repmat(single(xTrn),[1 10]); xTst=repmat(single(xTst),[1 10]);
+%  pBoost=struct('nWeak',256,'verbose',16,'pTree',struct('maxDepth',2));
+%  model = adaBoostTrain( xTrn(hTrn==1,:), xTrn(hTrn==2,:), pBoost );
+%  fp = mean(adaBoostApply( xTst(hTst==1,:), model )>0);
+%  fn = mean(adaBoostApply( xTst(hTst==2,:), model )<0);
+%  fprintf('Testing err=%.4f fp=%.4f fn=%.4f\n',(fp+fn)/2,fp,fn);
+%
+% See also adaBoostApply, binaryTreeTrain, demoGenData
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.21
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% get additional parameters
+dfs={ 'pTree','REQ', 'nWeak',128, 'discrete',1, 'verbose',0 };
+[pTree,nWeak,discrete,verbose]=getPrmDflt(varargin,dfs,1);
+nThreads=[]; if(isfield(pTree,'nThreads')), nThreads=pTree.nThreads; end
+
+% main loop
+[N0,F]=size(X0); [N1,F1]=size(X1); assert(F==F1);
+msg='Training AdaBoost: nWeak=%3i nFtrs=%i pos=%i neg=%i\n';
+if(verbose), fprintf(msg,nWeak,F,N1,N0); start=clock; end
+data=struct('X0',X0,'X1',X1);
+H0=zeros(N0,1); H1=zeros(N1,1);
+losses=zeros(1,nWeak); errs=losses;
+for i=1:nWeak
+  % train tree and classify each example
+  [tree,data,err]=binaryTreeTrain(data,pTree);
+  if(discrete), tree.hs=(tree.hs>0)*2-1; end
+  h0 = binaryTreeApply(X0,tree,[],[],nThreads);
+  h1 = binaryTreeApply(X1,tree,[],[],nThreads);
+  % compute alpha and incorporate directly into tree model
+  alpha=1; if(discrete), alpha=max(-5,min(5,.5*log((1-err)/err))); end
+  if(verbose && alpha<=0), nWeak=i-1; disp(' stopping early'); break; end
+  tree.hs=tree.hs*alpha;
+  % update cumulative scores H and weights
+  H0=H0+h0*alpha; data.wts0=exp( H0)/N0/2;
+  H1=H1+h1*alpha; data.wts1=exp(-H1)/N1/2;
+  loss=sum(data.wts0)+sum(data.wts1);
+  if(i==1), trees=repmat(tree,nWeak,1); end
+  trees(i)=tree; errs(i)=err; losses(i)=loss;
+  msg=' i=%4i alpha=%.3f err=%.3f loss=%.2e\n';
+  if(mod(i,verbose)==0), fprintf(msg,i,alpha,err,loss); end
+  if(verbose && loss<1e-40), nWeak=i; disp(' stopping early'); break; end
+end
+
+% create output model struct
+k=0; for i=1:nWeak, k=max(k,size(trees(i).fids,1)); end
+Z = @(type) zeros(k,nWeak,type);
+model=struct( 'fids',Z('uint32'), 'thrs',Z(data.xType), ...
+  'child',Z('uint32'), 'hs',Z('single'), 'weights',Z('single'), ...
+  'depth',Z('uint32'), 'errs',errs, 'losses',losses );
+for i=1:nWeak, T=trees(i); k=size(T.fids,1);
+  model.fids(1:k,i)=T.fids; model.thrs(1:k,i)=T.thrs;
+  model.child(1:k,i)=T.child; model.hs(1:k,i)=T.hs;
+  model.weights(1:k,i)=T.weights; model.depth(1:k,i)=T.depth;
+end
+depth = max(model.depth(:));
+model.treeDepth = depth * uint32(all(model.depth(~model.child)==depth));
+
+% output info to log
+msg='Done training err=%.4f fp=%.4f fn=%.4f (t=%.1fs).\n';
+if(verbose), fp=mean(H0>0); fn=mean(H1<0);
+  fprintf(msg,(fp+fn)/2,fp,fn,etime(clock,start)); end
+
+end
diff --git a/classify/binaryTreeApply.m b/classify/binaryTreeApply.m
new file mode 100644
index 0000000..e8d597d
--- /dev/null
+++ b/classify/binaryTreeApply.m
@@ -0,0 +1,32 @@
+function hs = binaryTreeApply( X, tree, maxDepth, minWeight, nThreads )
+% Apply learned binary decision tree classifier.
+%
+% USAGE
+%  hs = binaryTreeApply( X, tree, [maxDepth], [minWeight], [nThreads] )
+%
+% INPUTS
+%  X          - [NxF] N length F feature vectors
+%  tree       - learned tree classification model
+%  maxDepth   - [] maximum depth of tree
+%  minWeight  - [] minimum sample weigth to allow split
+%  nThreads   - [16] max number of computational threads to use
+%
+% OUTPUTS
+%  hs         - [Nx1] predicted output log ratios
+%
+% EXAMPLE
+%
+% See also binaryTreeTrain
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.40
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+if(nargin<3 || isempty(maxDepth)), maxDepth=0; end
+if(nargin<4 || isempty(minWeight)), minWeight=0; end
+if(nargin<5 || isempty(nThreads)), nThreads=16; end
+if(maxDepth>0), tree.child(tree.depth>=maxDepth) = 0; end
+if(minWeight>0), tree.child(tree.weights<=minWeight) = 0; end
+hs = tree.hs(forestInds(X,tree.thrs,tree.fids,tree.child,nThreads));
+
+end
diff --git a/classify/binaryTreeTrain.m b/classify/binaryTreeTrain.m
new file mode 100644
index 0000000..f7af397
--- /dev/null
+++ b/classify/binaryTreeTrain.m
@@ -0,0 +1,125 @@
+function [tree,data,err] = binaryTreeTrain( data, varargin )
+% Train binary decision tree classifier.
+%
+% Highly optimized code for training decision trees over binary variables.
+% Training a decision stump (depth=1) over 5000 features and 10000 training
+% examples takes 70ms on a single core machine and *7ms* with 12 cores and
+% OpenMP enabled (OpenMP is enabled by default, see toolboxCompile). This
+% code shares similarities with forestTrain.m but is optimized for binary
+% labels. Moreover, while forestTrain is meant for training random decision
+% forests, this code is tuned for use with boosting (see adaBoostTrain.m).
+%
+% For more information on how to quickly boost decision trees see:
+%   [1] R. Appel, T. Fuchs, P. Doll�r, P. Perona; "Quickly Boosting
+%   Decision Trees � Pruning Underachieving Features Early," ICML 2013.
+% The code here implements a simple brute-force strategy with the option to
+% sample features used for training each node for additional speedups.
+% Further gains using the ideas from the ICML paper are possible. If you
+% use this code please consider citing our ICML paper.
+%
+% During training each feature is quantized to lie between [0,nBins-1],
+% where nBins<=256. Quantization is expensive and should be performed just
+% once if training multiple trees. Note that the second output of the
+% algorithm is the quantized data, this can be reused in future training.
+%
+% USAGE
+%  [tree,data,err] = binaryTreeTrain( data, [pTree] )
+%
+% INPUTS
+%  data       - data for training tree
+%   .X0         - [N0xF] negative feature vectors
+%   .X1         - [N1xF] positive feature vectors
+%   .wts0       - [N0x1] negative weights
+%   .wts1       - [N1x1] positive weights
+%   .xMin       - [1xF] optional vals defining feature quantization
+%   .xStep      - [1xF] optional vals defining feature quantization
+%   .xType      - [] optional original data type for features
+%  pTree      - additional params (struct or name/value pairs)
+%   .nBins      - [256] maximum number of quanizaton bins (<=256)
+%   .maxDepth   - [1] maximum depth of tree
+%   .minWeight  - [.01] minimum sample weigth to allow split
+%   .fracFtrs   - [1] fraction of features to sample for each node split
+%   .nThreads   - [16] max number of computational threads to use
+%
+% OUTPUTS
+%  tree       - learned decision tree model struct w the following fields
+%   .fids       - [Kx1] feature ids for each node
+%   .thrs       - [Kx1] threshold corresponding to each fid
+%   .child      - [Kx1] index of child for each node (1-indexed)
+%   .hs         - [Kx1] log ratio (.5*log(p/(1-p)) at each node
+%   .weights    - [Kx1] total sample weight at each node
+%   .depth      - [Kx1] depth of each node
+%  data       - data used for training tree (quantized version of input)
+%  err        - decision tree training error
+%
+% EXAMPLE
+%
+% See also binaryTreeApply, adaBoostTrain, forestTrain
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.40
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% get parameters
+dfs={'nBins',256,'maxDepth',1,'minWeight',.01,'fracFtrs',1,'nThreads',16};
+[nBins,maxDepth,minWeight,fracFtrs,nThreads]=getPrmDflt(varargin,dfs,1);
+assert(nBins<=256);
+
+% get data and normalize weights
+dfs={ 'X0','REQ', 'X1','REQ', 'wts0',[], 'wts1',[], ...
+  'xMin',[], 'xStep',[], 'xType',[] };
+[X0,X1,wts0,wts1,xMin,xStep,xType]=getPrmDflt(data,dfs,1);
+[N0,F]=size(X0); [N1,F1]=size(X1); assert(F==F1);
+if(isempty(xType)), xMin=zeros(1,F); xStep=ones(1,F); xType=class(X0); end
+assert(isfloat(wts0)); if(isempty(wts0)), wts0=ones(N0,1)/N0; end
+assert(isfloat(wts1)); if(isempty(wts1)), wts1=ones(N1,1)/N1; end
+w=sum(wts0)+sum(wts1); if(abs(w-1)>1e-3), wts0=wts0/w; wts1=wts1/w; end
+
+% quantize data to be between [0,nBins-1] if not already quantized
+if( ~isa(X0,'uint8') || ~isa(X1,'uint8') )
+  xMin = min(min(X0),min(X1))-.01;
+  xMax = max(max(X0),max(X1))+.01;
+  xStep = (xMax-xMin) / (nBins-1);
+  X0 = uint8(bsxfun(@times,bsxfun(@minus,X0,xMin),1./xStep));
+  X1 = uint8(bsxfun(@times,bsxfun(@minus,X1,xMin),1./xStep));
+end
+data=struct( 'X0',X0, 'X1',X1, 'wts0',wts0, 'wts1',wts1, ...
+  'xMin',xMin, 'xStep',xStep, 'xType',xType );
+
+% train decision tree classifier
+K=2*(N0+N1); thrs=zeros(K,1,xType);
+hs=zeros(K,1,'single'); weights=hs; errs=hs;
+fids=zeros(K,1,'uint32'); child=fids; depth=fids;
+wtsAll0=cell(K,1); wtsAll0{1}=wts0;
+wtsAll1=cell(K,1); wtsAll1{1}=wts1; k=1; K=2;
+while( k < K )
+  % get node weights and prior
+  wts0=wtsAll0{k}; wtsAll0{k}=[]; w0=sum(wts0);
+  wts1=wtsAll1{k}; wtsAll1{k}=[]; w1=sum(wts1);
+  w=w0+w1; prior=w1/w; weights(k)=w; errs(k)=min(prior,1-prior);
+  hs(k)=max(-4,min(4,.5*log(prior/(1-prior))));
+  % if nearly pure node or insufficient data don't train split
+  if( prior<1e-3||prior>1-1e-3||depth(k)>=maxDepth||w<minWeight )
+    k=k+1; continue; end
+  % train best stump
+  fidsSt=1:F; if(fracFtrs<1), fidsSt=randperm(F,floor(F*fracFtrs)); end
+  [errsSt,thrsSt] = binaryTreeTrain1(X0,X1,single(wts0/w),...
+    single(wts1/w),nBins,prior,uint32(fidsSt-1),nThreads);
+  [~,fid]=min(errsSt); thr=single(thrsSt(fid))+.5; fid=fidsSt(fid);
+  % split data and continue
+  left0=X0(:,fid)<thr; left1=X1(:,fid)<thr;
+  if( (any(left0)||any(left1)) && (any(~left0)||any(~left1)) )
+    thr = xMin(fid)+xStep(fid)*thr;
+    child(k)=K; fids(k)=fid-1; thrs(k)=thr;
+    wtsAll0{K}=wts0.*left0; wtsAll0{K+1}=wts0.*~left0;
+    wtsAll1{K}=wts1.*left1; wtsAll1{K+1}=wts1.*~left1;
+    depth(K:K+1)=depth(k)+1; K=K+2;
+  end; k=k+1;
+end; K=K-1;
+
+% create output model struct
+tree=struct('fids',fids(1:K),'thrs',thrs(1:K),'child',child(1:K),...
+  'hs',hs(1:K),'weights',weights(1:K),'depth',depth(1:K));
+if(nargout>=3), err=sum(errs(1:K).*tree.weights.*(tree.child==0)); end
+
+end
diff --git a/classify/binaryTreeTrain1.cpp b/classify/binaryTreeTrain1.cpp
new file mode 100644
index 0000000..7c7cbf8
--- /dev/null
+++ b/classify/binaryTreeTrain1.cpp
@@ -0,0 +1,73 @@
+/*******************************************************************************
+* Piotr's Computer Vision Matlab Toolbox      Version 3.24
+* Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+* Licensed under the Simplified BSD License [see external/bsd.txt]
+*******************************************************************************/
+#include <mex.h>
+#ifdef USEOMP
+#include <omp.h>
+#endif
+
+typedef unsigned char uint8;
+typedef unsigned int uint32;
+#define min(x,y) ((x) < (y) ? (x) : (y))
+
+// construct cdf given data vector and wts
+void constructCdf( uint8* data, float *wts, int nBins,
+                  int N, int M, uint32 *ord, float *cdf )
+{
+  int i; for( i=0; i<nBins; i++) cdf[i]=0;
+  if(M) for( i=0; i<M; i++) cdf[data[ord[i]]] += wts[i];
+  else for( i=0; i<N; i++) cdf[data[i]] += wts[i];
+  for(i=1; i<nBins; i++) cdf[i]+=cdf[i-1];
+}
+
+// [errs,thrs] = mexFunction( data0, data1, wts0, wts1,
+//  nBins, prior, fids, nThreads, [ord0], [ord1] )
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
+{
+  // get inputs
+  int nBins, nThreads, N0, N1, M0, M1, F; float prior, *wts0, *wts1;
+  uint8 *data0, *data1; uint32 *fids, *ord0, *ord1;
+  data0 = (uint8*) mxGetData(prhs[0]);
+  data1 = (uint8*) mxGetData(prhs[1]);
+  wts0 = (float*) mxGetData(prhs[2]);
+  wts1 = (float*) mxGetData(prhs[3]);
+  nBins = (int) mxGetScalar(prhs[4]);
+  prior = (float) mxGetScalar(prhs[5]);
+  fids = (uint32*) mxGetData(prhs[6]);
+  nThreads = (int) mxGetScalar(prhs[7]);
+  N0 = (int) mxGetM(prhs[0]);
+  N1 = (int) mxGetM(prhs[1]);
+  F = (int) mxGetNumberOfElements(prhs[6]);
+
+  // ord0 and ord1 are optional
+  if( nrhs<10 ) M0=M1=0; else {
+    ord0 = (uint32*) mxGetData(prhs[8]);
+    ord1 = (uint32*) mxGetData(prhs[9]);
+    M0 = (int) mxGetNumberOfElements(prhs[8]);
+    M1 = (int) mxGetNumberOfElements(prhs[9]);
+  }
+
+  // create outpu structure
+  plhs[0] = mxCreateNumericMatrix(1,F,mxSINGLE_CLASS,mxREAL);
+  plhs[1] = mxCreateNumericMatrix(1,F,mxUINT8_CLASS,mxREAL);
+  float *errs = (float*) mxGetData(plhs[0]);
+  uint8 *thrs = (uint8*) mxGetData(plhs[1]);
+
+  // find lowest error for each feature
+  #ifdef USEOMP
+  nThreads = min(nThreads,omp_get_max_threads());
+  #pragma omp parallel for num_threads(nThreads)
+  #endif
+  for( int f=0; f<F; f++ ) {
+    float cdf0[256], cdf1[256], e0=1, e1=0, e; int thr;
+    constructCdf(data0+N0*size_t(fids[f]),wts0,nBins,N0,M0,ord0,cdf0);
+    constructCdf(data1+N1*size_t(fids[f]),wts1,nBins,N1,M1,ord1,cdf1);
+    for( int i=0; i<nBins; i++) {
+      e = prior - cdf1[i] + cdf0[i];
+      if(e<e0) { e0=e; e1=1-e; thr=i; } else if(e>e1) { e0=1-e; e1=e; thr=i; }
+    }
+    errs[f]=e0; thrs[f]=(uint8) thr;
+  }
+}
diff --git a/classify/binaryTreeTrain1.mexa64 b/classify/binaryTreeTrain1.mexa64
new file mode 100644
index 0000000..18caced
Binary files /dev/null and b/classify/binaryTreeTrain1.mexa64 differ
diff --git a/classify/binaryTreeTrain1.mexmaci64 b/classify/binaryTreeTrain1.mexmaci64
new file mode 100644
index 0000000..858cad6
Binary files /dev/null and b/classify/binaryTreeTrain1.mexmaci64 differ
diff --git a/classify/binaryTreeTrain1.mexw64 b/classify/binaryTreeTrain1.mexw64
new file mode 100644
index 0000000..ffa42c5
Binary files /dev/null and b/classify/binaryTreeTrain1.mexw64 differ
diff --git a/classify/confMatrix.m b/classify/confMatrix.m
new file mode 100644
index 0000000..0098979
--- /dev/null
+++ b/classify/confMatrix.m
@@ -0,0 +1,61 @@
+function CM = confMatrix( IDXtrue, IDXpred, ntypes )
+% Generates a confusion matrix according to true and predicted data labels.
+%
+% CM(i,j) denotes the number of elements of class i that were given label
+% j.  In other words, each row i contains the predictions for elements whos
+% actual class was i.  If IDXpred is perfect, then CM is a diagonal matrix
+% with CM(i,i) equal to the number of instances of class i.
+%
+% To normalize CM to [0,1], divide each row by sum of that row:
+%  CMnorm = CM ./ repmat( sum(CM,2), [1 size(CM,2)] );
+%
+% USAGE
+%  CM = confMatrix( IDXtrue, IDXpred, ntypes )
+%
+% INPUTS
+%  IDXtrue     - [nx1] array of true labels [int values in 1-ntypes]
+%  IDXpred     - [nx1] array of predicted labels [int values in 1-ntypes]
+%  ntypes      - maximum number of types (should be > max(IDX))
+%
+% OUTPUTS
+%  CM          - ntypes x ntypes confusion array with integer values
+%
+% EXAMPLE
+%  IDXtrue = [ones(1,25) ones(1,25)*2];
+%  IDXpred = [ones(1,10) randint2(1,30,[1 2]) ones(1,10)*2];
+%  CM = confMatrix( IDXtrue, IDXpred, 2 )
+%  confMatrixShow( CM, {'class-A','class-B'}, {'FontSize',20} )
+%
+% See also CONFMATRIXSHOW
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 2.12
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+IDXtrue=IDXtrue(:); IDXpred=IDXpred(:);
+
+%%% convert common binary labels [-1/+1] or [0/1] to [1/2]
+if( ntypes==2 )
+  IDX = [IDXtrue;IDXpred];
+  if( min(IDX)>=-1 && max(IDX)<=1 && all(IDX~=0))
+    IDXtrue=IDXtrue+2;  IDXpred=IDXpred+2;
+    IDXtrue(IDXtrue==3) = 2;  IDXpred(IDXpred==3) = 2;
+  elseif( min(IDX)>=0 && max(IDX)<=1 )
+    IDXtrue=IDXtrue+1;  IDXpred=IDXpred+1;
+  end
+end
+
+%%% error check
+[IDXtrue,er] = checkNumArgs( IDXtrue, [], 0, 2 ); error(er);
+[IDXpred,er] = checkNumArgs( IDXpred, [], 0, 2 ); error(er);
+if( length(IDXtrue)~=length(IDXpred) )
+  error('Lengths of IDXs must match up.'); end
+if( max([IDXtrue;IDXpred])>ntypes )
+  error(['ntypes = ' int2str(ntypes) ' not large enough']); end
+
+%%% generate CM
+CM = zeros(ntypes);
+for i=1:ntypes
+  vals = IDXpred( IDXtrue==i );
+  for j=1:ntypes; CM(i,j) = sum(vals==j); end
+end
diff --git a/classify/confMatrixShow.m b/classify/confMatrixShow.m
new file mode 100644
index 0000000..e06383c
--- /dev/null
+++ b/classify/confMatrixShow.m
@@ -0,0 +1,53 @@
+function confMatrixShow( CM, types, pvPairs, nDigits, showCnts )
+% Used to display a confusion matrix.
+%
+% See confMatrix for general format and info on confusion matricies. This
+% function normalizes the CM before displaying, hence all values range in
+% [0,1] and rows sum to 1.
+%
+% USAGE
+%  confMatrixShow( CM, [types], [pvPairs], [nDigits], [showCnts] )
+%
+% INPUTS
+%  CM          - [nTypes x nTypes] confusion array -- see confMatrix
+%  types       - [] cell array of length nTypes of text labels
+%  pvPairs     - [{'FontSize',20}] parameter / value list for text.m
+%  nDigits     - [2] number of digits after decimal to display
+%  showCnts    - [0] show total count per row to the right
+%
+% OUTPUTS
+%
+% EXAMPLE
+%  CM = randint2(6,6,[1,100])+eye(6)*500;
+%  types = { 'anger','disgust','fear','joy','sadness','surprise' };
+%  confMatrixShow( CM, types, {'FontSize',20}, [], 0 )
+%  title('confusion matrix','FontSize',24);
+%
+% See also confMatrix, imLabel, dispMatrixIm
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 2.50
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+if( nargin<2 ); types=[]; end
+if( nargin<3 || isempty(pvPairs)); pvPairs = {'FontSize',20}; end
+if( nargin<4 || isempty(nDigits)); nDigits=2; end
+if( nargin<5 || isempty(showCnts)); showCnts=0; end
+if( nDigits<1 || nDigits>10 ); error('too few or too many digits'); end
+if( any(CM(:)<0) ); error( 'CM must have non-negative entries' ); end
+
+% normalize and round appropriately
+cnts = sum(CM,2);
+CM = CM ./ repmat( cnts+eps, [1 size(CM,2)] );
+CM = round(CM*10^nDigits) / 10^nDigits;
+
+% display as image using dispMatrixIm
+dispMatrixIm(CM,'maxM',1,'maxLen',nDigits+1,'show0',0,...
+  'fStr','%f','invert',1,'pvPairs',pvPairs); axis square;
+
+% now add type labels
+if( ~isempty(types) )
+  imLabel( types, 'left', 0, pvPairs );
+  imLabel( types, 'bottom', -35, pvPairs );
+  if(showCnts), imLabel(int2str2(cnts),'right',0,pvPairs); end
+end
diff --git a/classify/demoCluster.m b/classify/demoCluster.m
new file mode 100644
index 0000000..f625a8f
--- /dev/null
+++ b/classify/demoCluster.m
@@ -0,0 +1,53 @@
+% Clustering demo.
+%
+% Used to test different clustering algorithms on 2D and 3D mixture of
+% gaussian data. Alter demo by edititing this file.
+%
+% USAGE
+%  demoCluster
+%
+% INPUTS
+%
+% OUTPUTS
+%
+% EXAMPLE
+%  demoCluster
+%
+% See also KMEANS2, MEANSHIFT
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 2.0
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+%%% generate data
+if(1) % mixture of gaussians -- see demoGenData
+  kTr = 5; sep = 3; ecc = 3; nFracTr = 0.1;  nPnts = 1000;  d = 2;
+  [X,IDXtr] = demoGenData(nPnts,0,kTr,d,sep,ecc,nFracTr);
+else
+  % two parallel clusters - kmeans will fail
+  kTr = 2;  nPnts = 200;  sep = 4;
+  X = [([5 0; 0 .5] * randn(2,nPnts) + sep/2)' ; ...
+    ([5 0; 0 .5] * randn(2,nPnts) - sep/2)' ] / 5;
+  IDXtr = [ones(1,nPnts) 2*ones(1,nPnts)];
+  nFracTr=0;
+end;
+nFrac = nFracTr;  k = kTr;
+
+%%% cluster
+switch 'kmeans2'
+  case 'kmeans2'
+    prm.nTrial=4; prm.display=1; prm.outFrac=nFrac;
+    [IDX,C,sumd] = kmeans2( X, k, prm ); 
+  case 'meanShift'
+    %(X,radius,rate,maxiter,minCsize,blur)
+    [IDX,C] = meanShift( X, .4, .2, 100 , 10, 0 );
+end
+
+%%% show data & clustering results
+figure(1); clf; d2 = min(d,3);
+subplot(2,2,1); visualizeData(X, d2); title('orig points');
+if(~isempty(IDXtr))
+  subplot(2,2,2); visualizeData(X, d2, IDXtr); title('true clusters');
+end;
+subplot(2,2,3); visualizeData(X, d2, IDX, [], C); title('rec clusters');
+subplot(2,2,4); D=distMatrixShow(sqrt(pdist2(X,X)),IDX,0); im(D);
diff --git a/classify/demoGenData.m b/classify/demoGenData.m
new file mode 100644
index 0000000..e3b3138
--- /dev/null
+++ b/classify/demoGenData.m
@@ -0,0 +1,82 @@
+function [X0,H0,X1,H1] = demoGenData1(n0,n1,k,d,sep,ecc,frc)
+% Generate data drawn form a mixture of Gaussians.
+%
+% For definitions of separation and eccentricity see:
+%  Sanjoy Dasgupta, "Learning Mixtures of Gaussians", FOCS, 1999.
+%  http://cseweb.ucsd.edu/~dasgupta/papers/mog.pdf
+%
+% USAGE
+%  [X0,H0,X1,H1] = demoGenData(n0,n1,k,d,sep,ecc,[frc])
+%
+% INPUTS
+%  n0     - size of training set
+%  n1     - size of testing set
+%  k      - number of mixture components
+%  d      - data dimension
+%  sep    - minimum separation degree between clusters (sep > 0)
+%  ecc    - maximum eccentricity of clusters (0 < ecc < 1)
+%  frc    - [0] frac of points that are noise (uniformly distributed)
+%
+% OUTPUTS
+%  X0     - [n0xd] training set data vectors
+%  H0     - [n0x1] cluster membership in [1,k] (and -1 for noise)
+%  X1     - [n1xd] testing set data vectors
+%  H1     - [n1x1] cluster membership in [1,k] (and -1 for noise)
+%
+% EXAMPLE
+%  n0=1000; k=5; d=2; sep=2; ecc=1; frc=0;
+%  [X0,H0,X1,H1] = demoGenData(n0,n0,k,d,sep,ecc,frc);
+%  figure(1); clf; visualizeData( X0, 2, H0 ); title('train');
+%  figure(2); clf; visualizeData( X1, 2, H1 ); title('test');
+%
+% See also visualizeData, demoCluster
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.20
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% generate mixing weights and adjust n0 and n1 for noise fraction
+w=0; while(any(w<=1/(4*k))), w=rand(k,1); w=w/sum(w); end
+if( nargin<7 ), frc=0; end; frc=max(0,min(frc,1));
+n=floor(frc*n0); n0=n0-n; ns0=[ceil(n0*w); n];
+n=floor(frc*n1); n1=n1-n; ns1=[ceil(n1*w); n];
+
+% create sep-separated Gaussian clusters of maximum eccentricity ecc
+for trial=1:1000
+  lam = ones(k,1)/1000;
+  n0=sum(ns0); X0=zeros(n0,d); H0=zeros(n0,1); n0=0;
+  n1=sum(ns1); X1=zeros(n1,d); H1=zeros(n1,1); n1=0;
+  mu = randn(k,d)*sqrt(k)*sqrt(sep)*trial/10;
+  for i = 1:k
+    % generate a random covariance matrix S=C'*C
+    U=rand(d,d)-0.5; U=sqrtm(inv(U*U'))*U;
+    L=diag(rand(d,1)*(ecc-1)+1).^2/100; C=chol(U*L*U');
+    % populate X0, H0
+    n=ns0(i); X0j=randn(n,d)*C + mu(ones(n,1)*i,:);
+    H0(n0+1:n0+n)=i; X0(n0+1:n0+n,:)=X0j; n0=n0+n;
+    if(n>1), lam(i) = sqrt(trace(cov(X0j))); end
+    % populate X1, H1
+    n=ns1(i); X1j=randn(n,d)*C + mu(ones(n,1)*i,:);
+    H1(n1+1:n1+n)=i; X1(n1+1:n1+n,:)=X1j; n1=n1+n;
+  end
+  % check that degree of separation is sufficient (see Dasgupta 99)
+  % use "lam=sqrt(trace(S))" instead of "lam=sqrt(eigs(S,1))*d"
+  S = pdist2(mu,mu,'euclidean'); S(eye(k)>0)=inf;
+  for i=1:k, for j=1:k, S(i,j)=S(i,j)/max(lam(i),lam(j)); end; end
+  if(all(S(:)>=sep)), break; end
+end; assert(trial<1000);
+
+% add uniformly distributed noise and permute order
+if( frc>0 )
+  v=max(abs(X0(:))); if(n1), v=max(v,max(abs(X1(:)))); end
+  % populate X0, H0
+  n=ns0(k+1); X0j=(rand(n,d)-.5)*v*2.5;
+  H0(n0+1:n0+n)=-1; X0(n0+1:n0+n,:)=X0j;
+  n0=n0+n; p=randperm(n0); X0=X0(p,:); H0=H0(p);
+  % populate X1, H1
+  n=ns1(k+1); X1j=(rand(n,d)-.5)*v*2.5;
+  H1(n1+1:n1+n)=-1; X1(n1+1:n1+n,:)=X1j;
+  n1=n1+n; p=randperm(n1); X1=X1(p,:); H1=H1(p);
+end
+
+end
diff --git a/classify/distMatrixShow.m b/classify/distMatrixShow.m
new file mode 100644
index 0000000..7890f35
--- /dev/null
+++ b/classify/distMatrixShow.m
@@ -0,0 +1,73 @@
+function [D, Dsm] = distMatrixShow( D, IDX, show )
+% Useful visualization of a distance matrix of clustered points.
+%
+% D is sorted into k blocks, where the ith block contains all the points in
+% cluster i. When D is displayed the blocks are shown explicitly.  Hence
+% for a good clustering (under a spherical gaussian assumption) the
+% 'diagonal' blocks ought to be mostly dark, and all other block ought to be
+% relatively white.  One can thus quickly visualize the quality of the
+% clustering, or even how clusterable the points are.  Outliers (according
+% to IDX) are removed from D.
+%
+% USAGE
+%  [D, Dsm] = distMatrixShow( D, IDX, [show] )
+%
+% INPUTS
+%  D       - nxn distance matrix
+%  IDX     - cluster membership [see kmeans2.m]
+%  show    - [1] will display results in figure(show)
+%
+% OUTPUTS
+%  D       - sorted nxn distance matrix
+%  Dsm     - sorted and smoothed nxn distance matrix
+%
+% EXAMPLE
+%  % not the best example since points are already ordered
+%  [X,IDX] = demoGenData(100,0,5,2,10,2,0);
+%  distMatrixShow( pdist2(X,X), IDX );
+%
+% See also VISUALIZEDATA, KMEANS2
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 2.0
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+if( nargin<3 || isempty(show) ); show=1; end
+
+k = max(IDX);
+n = size(D,1);
+
+%%% remove outliers from D and IDX
+inliers = IDX>0;
+D = D( inliers, inliers );
+IDX = IDX( inliers );
+
+%%% get order of points and rearrange D and IDX
+order = IDX2order( IDX );
+IDX = IDX( order );
+D = D( order, order );
+
+%%% compute smoothed version of D
+cnts = zeros(1,k); for i=1:k; cnts(i)=sum(IDX==i); end
+cumCnts = cumsum(cnts);  cumCnts2=[0 cumCnts];  Dsm = D;
+inds = 1:k; inds = inds( cnts>0 );
+for i=inds
+  rs = cumCnts2(i)+1:cumCnts2(i+1);
+  for j=inds
+    cs = cumCnts2(j)+1:cumCnts2(j+1);
+    ds = D( rs, cs  );
+    Dsm( rs, cs ) = mean(ds(:));
+  end;
+end;
+
+%%% show D and lines seperating super clusters.
+if(show)
+  figure(show); clf;
+  subplot(1,2,1); im(D); hold('on')
+  for i=1:k-1
+    line( [.5,n+.5], [cumCnts(i)+.5,cumCnts(i)+.5] );
+    line( [cumCnts(i)+.5,cumCnts(i)+.5], [.5,n+.5] );
+  end;
+  hold('off');
+  subplot(1,2,2); im( Dsm );
+end
diff --git a/classify/fernsClfApply.m b/classify/fernsClfApply.m
new file mode 100644
index 0000000..71e6a0f
--- /dev/null
+++ b/classify/fernsClfApply.m
@@ -0,0 +1,28 @@
+function [hs,probs] = fernsClfApply( data, ferns, inds )
+% Apply learned fern classifier.
+%
+% USAGE
+%  [hs,probs] = fernsClfApply( data, ferns, [inds] )
+%
+% INPUTS
+%  data     - [NxF] N length F binary feature vectors
+%  ferns    - learned fern classification model
+%  inds     - [NxM] cached inds (from previous call to fernsInds)
+%
+% OUTPUTS
+%  hs       - [Nx1] predicted output labels
+%  probs    - [NxH] predicted output label probabilities
+%
+% EXAMPLE
+%
+% See also fernsClfTrain, fernsInds
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 2.50
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+if( nargin<3 || isempty(inds) )
+  inds = fernsInds(data,ferns.fids,ferns.thrs); end
+[N,M]=size(inds); H=ferns.H; probs=zeros(N,H);
+for m=1:M, probs = probs + ferns.pFern(inds(:,m),:,m); end
+if(ferns.bayes==0), probs=probs/M; end; [~,hs]=max(probs,[],2);
+end
diff --git a/classify/fernsClfTrain.m b/classify/fernsClfTrain.m
new file mode 100644
index 0000000..094a02c
--- /dev/null
+++ b/classify/fernsClfTrain.m
@@ -0,0 +1,92 @@
+function [ferns,hsPr] = fernsClfTrain( data, hs, varargin )
+% Train random fern classifier.
+%
+% See "Fast Keypoint Recognition in Ten Lines of Code" by Mustafa Ozuysal,
+% Pascal Fua and Vincent Lepetit, CVPR07.
+%
+% Dimensions:
+%  M - number ferns
+%  S - fern depth
+%  F - number features
+%  N - number input vectors
+%  H - number classes
+%
+% USAGE
+%  [ferns,hsPr] = fernsClfTrain( data, hs, [varargin] )
+%
+% INPUTS
+%  data     - [NxF] N length F feature vectors
+%  hs       - [Nx1] target output labels in [1,H]
+%  varargin - additional params (struct or name/value pairs)
+%   .S        - [10] fern depth (ferns are exponential in S)
+%   .M        - [50] number of ferns to train
+%   .thrr     - [0 1] range for randomly generated thresholds
+%   .bayes    - [1] if true combine probs using bayes assumption
+%   .ferns    - [] if given reuse previous ferns (recompute pFern)
+%
+% OUTPUTS
+%  ferns    - learned fern model w the following fields
+%   .fids     - [MxS] feature ids for each fern for each depth
+%   .thrs     - [MxS] threshold corresponding to each fid
+%   .pFern    - [2^SxHxM] learned log probs at fern leaves
+%   .bayes    - if true combine probs using bayes assumption
+%   .inds     - [NxM] cached indices for original training data
+%   .H        - number classes
+%  hsPr     - [Nx1] predicted output labels
+%
+% EXAMPLE
+%  N=5000; H=5; d=2; [xs0,hs0,xs1,hs1]=demoGenData(N,N,H,d,1,1);
+%  fernPrm=struct('S',4,'M',50,'thrr',[-1 1],'bayes',1);
+%  tic, [ferns,hsPr0]=fernsClfTrain(xs0,hs0,fernPrm); toc
+%  tic, hsPr1 = fernsClfApply( xs1, ferns ); toc
+%  e0=mean(hsPr0~=hs0); e1=mean(hsPr1~=hs1);
+%  fprintf('errors trn=%f tst=%f\n',e0,e1); figure(1);
+%  subplot(2,2,1); visualizeData(xs0,2,hs0);
+%  subplot(2,2,2); visualizeData(xs0,2,hsPr0);
+%  subplot(2,2,3); visualizeData(xs1,2,hs1);
+%  subplot(2,2,4); visualizeData(xs1,2,hsPr1);
+%
+% See also fernsClfApply, fernsInds
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 2.61
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% get additional parameters and check dimensions
+dfs={'S',10,'M',50,'thrr',[0 1],'bayes',1,'ferns',[]};
+[S,M,thrr,bayes,ferns]=getPrmDflt(varargin,dfs,1);
+[N,F]=size(data); assert(length(hs)==N);
+H=max(hs); assert(all(hs>0)); assert(S<=20);
+
+if( isempty(ferns) )
+  % create ferns model and compute inds (w/o field pFern)
+  thrs=rand(M,S)*(thrr(2)-thrr(1))+thrr(1);
+  fids=uint32(floor(rand(M,S)*F+1)); inds=fernsInds(data,fids,thrs);
+  ferns=struct('fids',fids,'thrs',thrs,'bayes',bayes,'H',H,'inds',inds);
+else
+  % re-use cached model (will need to recompute pFern)
+  ferns.H=H; ferns.pFern=[]; inds=ferns.inds; assert(size(inds,1)==N);
+end
+
+% get counts for each leaf for each class for each fern
+pFern = zeros(2^S,H,M); edges = 1:2^S;
+for h=1:H, inds1=inds(hs==h,:);
+  for m=1:M, pFern(:,h,m)=histc(inds1(:,m),edges); end
+end
+pFern = pFern + bayes;
+
+% convert fern leaf class counts into probabilities
+if( bayes<=0 )
+  norm = 1./sum(pFern,2);
+  pFern = bsxfun(@times,pFern,norm);
+else
+  norm = 1./sum(pFern,1);
+  pFern = bsxfun(@times,pFern,norm);
+  pFern=log(pFern);
+end
+
+% store pFern and compute output values
+ferns.pFern=pFern; clear pFern;
+if(nargout==2), hsPr=fernsClfApply([],ferns,inds); end
+
+end
diff --git a/classify/fernsInds.m b/classify/fernsInds.m
new file mode 100644
index 0000000..1d4e2de
--- /dev/null
+++ b/classify/fernsInds.m
@@ -0,0 +1,39 @@
+function inds = fernsInds( data, fids, thrs )
+% Compute indices for each input by each fern.
+%
+% USAGE
+%  inds = fernsInds( data, fids, thrs )
+%
+% INPUTS
+%  data     - [NxF] N length F binary feature vectors
+%  fids     - [MxS] feature ids for each fern for each depth
+%  thrs     - [MxS] threshold corresponding to each fid
+%
+% OUTPUTS
+%  inds     - [NxM] computed indices for each input by each fern
+%
+% EXAMPLE
+%
+% See also fernsClfTrain, fernsClfApply
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 2.50
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+inds = fernsInds1( data, fids, thrs );
+
+%%% OLD MATLAB CODE -- NOW IN MEX
+% [M,S]=size(fids); N=size(data,1);
+% inds = zeros(N,M,'uint32');
+% for n=1:N
+%   for m=1:M
+%     for s=1:S
+%       inds(n,m)=inds(n,m)*2;
+%       if( data(n,fids(m,s))<thrs(m,s) )
+%         inds(n,m)=inds(n,m)+1;
+%       end
+%     end
+%   end
+% end
+% inds=inds+1;
+% end
diff --git a/classify/fernsInds1.c b/classify/fernsInds1.c
new file mode 100644
index 0000000..0b030ce
--- /dev/null
+++ b/classify/fernsInds1.c
@@ -0,0 +1,39 @@
+/*******************************************************************************
+* Piotr's Computer Vision Matlab Toolbox      Version 2.50
+* Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+* Licensed under the Simplified BSD License [see external/bsd.txt]
+*******************************************************************************/
+#include "mex.h"
+#include <math.h>
+typedef unsigned int uint;
+
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
+  int N, F, M, S, n, f, m, s;
+  double *data, *thrs;
+  uint *fids, *inds;
+  
+  /* Error checking on arguments */
+  if( nrhs!=3) mexErrMsgTxt("Three input arguments required.");
+  if( nlhs>1 ) mexErrMsgTxt("Too many output arguments.");
+  if( !mxIsClass(prhs[0], "double") || !mxIsClass(prhs[1], "uint32")
+  || !mxIsClass(prhs[2], "double"))
+    mexErrMsgTxt("Input arrays are of incorrect type.");
+  
+  /* extract inputs */
+  data = (double*) mxGetData(prhs[0]); /* N x F */
+  fids = (uint*)   mxGetData(prhs[1]); /* M x S */
+  thrs = (double*) mxGetData(prhs[2]); /* N x F */
+  N=mxGetM(prhs[0]); F=mxGetN(prhs[0]);
+  M=mxGetM(prhs[1]); S=mxGetN(prhs[1]);
+  
+  /* create outputs */
+  plhs[0] = mxCreateNumericMatrix(N, M, mxUINT32_CLASS, mxREAL);
+  inds = (uint*) mxGetData(plhs[0]); /* N x M */
+  
+  /* compute inds */
+  for(m=0; m<M; m++) for(s=0; s<S; s++) for(n=0; n<N; n++) {
+    inds[n+m*N]*=2; f=fids[m+s*M]-1;
+    if( data[n+f*N]<thrs[m+s*M] ) inds[n+m*N]++;
+  }
+  for(n=0; n<N*M; n++) inds[n]++;
+}
diff --git a/classify/fernsInds1.mexa64 b/classify/fernsInds1.mexa64
new file mode 100644
index 0000000..60dfc21
Binary files /dev/null and b/classify/fernsInds1.mexa64 differ
diff --git a/classify/fernsInds1.mexmaci64 b/classify/fernsInds1.mexmaci64
new file mode 100644
index 0000000..733a88b
Binary files /dev/null and b/classify/fernsInds1.mexmaci64 differ
diff --git a/classify/fernsInds1.mexw64 b/classify/fernsInds1.mexw64
new file mode 100644
index 0000000..fcc1b3f
Binary files /dev/null and b/classify/fernsInds1.mexw64 differ
diff --git a/classify/fernsRegApply.m b/classify/fernsRegApply.m
new file mode 100644
index 0000000..c8a95c9
--- /dev/null
+++ b/classify/fernsRegApply.m
@@ -0,0 +1,31 @@
+function [ys,ysCum] = fernsRegApply( data, ferns, inds )
+% Apply learned fern regressor.
+%
+% USAGE
+%  [ys,ysCum] = fernsRegApply( data, ferns, [inds] )
+%
+% INPUTS
+%  data     - [NxF] N length F binary feature vectors
+%  ferns    - learned fern regression model
+%  inds     - [NxM] cached inds (from previous call to fernsInds)
+%
+% OUTPUTS
+%  ys       - [Nx1] predicted output values
+%  ysCum    - [NxM] predicted output values after each regressor
+%
+% EXAMPLE
+%
+% See also fernsRegTrain, fernsInds
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 2.50
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+if( nargin<3 || isempty(inds) )
+  inds = fernsInds(data,ferns.fids,ferns.thrs); end; [N,M]=size(inds);
+if( nargout==1 )
+  ys=zeros(N,1); for m=1:M, ys = ys + ferns.ysFern(inds(:,m),m); end
+else
+  ysCum=zeros(N,M+1);
+  for m=1:M, ysCum(:,m+1) = ysCum(:,m) + ferns.ysFern(inds(:,m),m); end
+  ysCum=ysCum(:,2:end); ys=ysCum(:,end);
+end
diff --git a/classify/fernsRegTrain.m b/classify/fernsRegTrain.m
new file mode 100644
index 0000000..f681c98
--- /dev/null
+++ b/classify/fernsRegTrain.m
@@ -0,0 +1,148 @@
+function [ferns,ysPr] = fernsRegTrain( data, ys, varargin )
+% Train boosted fern regressor.
+%
+% Boosted regression using random ferns as the weak regressor. See "Greedy
+% function approximation: A gradient boosting machine", Friedman, Annals of
+% Statistics 2001, for more details on boosted regression.
+%
+% A few notes on the parameters: 'type' should in general be set to 'res'
+% (the 'ave' version is an undocumented variant that only performs well
+% under limited conditions). 'loss' determines the loss function being
+% optimized, in general the 'L2' version is the most robust and effective.
+% 'reg' is a regularization term for the ferns, a low value such as .01 can
+% improve results. Setting the learning rate 'eta' is crucial in order to
+% achieve good performance, especially on noisy data. In general, eta
+% should decreased as M is increased.
+%
+% Dimensions:
+%  M - number ferns
+%  R - number repeats
+%  S - fern depth
+%  N - number samples
+%  F - number features
+%
+% USAGE
+%  [ferns,ysPr] = fernsRegTrain( data, hs, [varargin] )
+%
+% INPUTS
+%  data     - [NxF] N length F feature vectors
+%  ys       - [Nx1] target output values
+%  varargin - additional params (struct or name/value pairs)
+%   .type     - ['res'] options include {'res','ave'}
+%   .loss     - ['L2'] options include {'L1','L2','exp'}
+%   .S        - [2] fern depth (ferns are exponential in S)
+%   .M        - [50] number ferns (same as number phases)
+%   .R        - [10] number repetitions per fern
+%   .thrr     - [0 1] range for randomly generated thresholds
+%   .reg      - [0.01] fern regularization term in [0,1]
+%   .eta      - [1] learning rate in [0,1] (not used if type='ave')
+%   .verbose  - [0] if true output info to display
+%
+% OUTPUTS
+%  ferns    - learned fern model w the following fields
+%   .fids     - [MxS] feature ids for each fern for each depth
+%   .thrs     - [MxS] threshold corresponding to each fid
+%   .ysFern   - [2^SxM] stored values at fern leaves
+%   .loss     - loss(ys,ysGt) computes loss of ys relateive to ysGt
+%  ysPr     - [Nx1] predicted output values
+%
+% EXAMPLE
+%  %% generate toy data
+%  N=1000; sig=.5; f=@(x) cos(x*pi*4)+(x+1).^2;
+%  xs0=rand(N,1); ys0=f(xs0)+randn(N,1)*sig;
+%  xs1=rand(N,1); ys1=f(xs1)+randn(N,1)*sig;
+%  %% train and apply fern regressor
+%  prm=struct('type','res','loss','L2','eta',.05,...
+%    'thrr',[-1 1],'reg',.01,'S',2,'M',1000,'R',3,'verbose',0);
+%  tic, [ferns,ysPr0] = fernsRegTrain(xs0,ys0,prm); toc
+%  tic, ysPr1 = fernsRegApply( xs1, ferns ); toc
+%  fprintf('errors train=%f test=%f\n',...
+%    ferns.loss(ysPr0,ys0),ferns.loss(ysPr1,ys1));
+%  %% visualize results
+%  figure(1); clf; hold on; plot(xs0,ys0,'.b'); plot(xs0,ysPr0,'.r');
+%  figure(2); clf; hold on; plot(xs1,ys1,'.b'); plot(xs1,ysPr1,'.r');
+%
+% See also fernsRegApply, fernsInds
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 2.50
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% get/check parameters
+dfs={'type','res','loss','L2','S',2,'M',50,'R',10,'thrr',[0 1],...
+  'reg',0.01,'eta',1,'verbose',0};
+[type,loss,S,M,R,thrr,reg,eta,verbose]=getPrmDflt(varargin,dfs,1);
+type=type(1:3); assert(any(strcmp(type,{'res','ave'})));
+assert(any(strcmp(loss,{'L1','L2','exp'}))); N=length(ys);
+if(strcmp(type,'ave')), eta=1; end
+% train stagewise regressor (residual or average)
+fids=zeros(M,S,'uint32'); thrs=zeros(M,S);
+ysSum=zeros(N,1); ysFern=zeros(2^S,M);
+for m=1:M
+  % train R random ferns using different losses, keep best
+  if(strcmp(type,'ave')), d=m; else d=1; end
+  ysTar=d*ys-ysSum; best={};
+  if(strcmp(loss,'L1')), e=sum(abs(ysTar));
+    for r=1:R
+      [fids1,thrs1,ysFern1,ys1]=trainFern(data,sign(ysTar),S,thrr,reg);
+      a=medianw(ysTar./ys1,abs(ys1)); ysFern1=ysFern1*a; ys1=ys1*a;
+      e1=sum(abs(ysTar-ys1));
+      if(e1<=e), e=e1; best={fids1,thrs1,ysFern1,ys1}; end
+    end
+  elseif(strcmp(loss,'L2')), e=sum(ysTar.^2);
+    for r=1:R
+      [fids1,thrs1,ysFern1,ys1]=trainFern(data,ysTar,S,thrr,reg);
+      e1=sum((ysTar-ys1).^2);
+      if(e1<=e), e=e1; best={fids1,thrs1,ysFern1,ys1}; end
+    end
+  elseif(strcmp(loss,'exp')), e=sum(exp(ysTar/d)+exp(-ysTar/d));
+    ysDeriv=exp(ysTar/d)-exp(-ysTar/d);
+    for r=1:R
+      [fids1,thrs1,ysFern1,ys1]=trainFern(data,ysDeriv,S,thrr,reg);
+      e1=inf; if(m==1), aBst=1; end; aMin=aBst/5; aMax=aBst*5;
+      for phase=1:3, aDel=(aMax-aMin)/10;
+        for a=aMin:aDel:aMax
+          eTmp=sum(exp((ysTar-a*ys1)/d)+exp((a*ys1-ysTar)/d));
+          if(eTmp<e1), a1=a; e1=eTmp; end
+        end; aMin=a1-aDel; aMax=a1+aDel;
+      end; ysFern1=ysFern1*a1; ys1=ys1*a1;
+      if(e1<=e), e=e1; aBst=a1; best={fids1,thrs1,ysFern1,ys1}; end
+    end
+  end
+  % store results and update sums
+  assert(~isempty(best)); [fids1,thrs1,ysFern1,ys1]=deal(best{:});
+  fids(m,:)=fids1; thrs(m,:)=thrs1;
+  ysFern(:,m)=ysFern1*eta; ysSum=ysSum+ys1*eta;
+  if(verbose), fprintf('phase=%i  error=%f\n',m,e); end
+end
+% create output struct
+if(strcmp(type,'ave')), d=M; else d=1; end; clear data;
+ferns=struct('fids',fids,'thrs',thrs,'ysFern',ysFern/d); ysPr=ysSum/d;
+switch loss
+  case 'L1',  ferns.loss=@(ys,ysGt) mean(abs(ys-ysGt));
+  case 'L2',  ferns.loss=@(ys,ysGt) mean((ys-ysGt).^2);
+  case 'exp', ferns.loss=@(ys,ysGt) mean(exp(ys-ysGt)+exp(ysGt-ys))-2;
+end
+end
+
+function [fids,thrs,ysFern,ysPr] = trainFern( data, ys, S, thrr, reg )
+% Train single random fern regressor.
+[N,F]=size(data); mu=sum(ys)/N; ys=ys-mu;
+fids = uint32(floor(rand(1,S)*F+1));
+thrs = rand(1,S)*(thrr(2)-thrr(1))+thrr(1);
+inds = fernsInds(data,fids,thrs);
+ysFern=zeros(2^S,1); cnts=zeros(2^S,1);
+for n=1:N, ind=inds(n);
+  ysFern(ind)=ysFern(ind)+ys(n);
+  cnts(ind)=cnts(ind)+1;
+end
+ysFern = ysFern ./ max(cnts+reg*N,eps) + mu;
+ysPr = ysFern(inds);
+end
+
+function m = medianw(x,w)
+% Compute weighted median of x.
+[x,ord]=sort(x(:)); w=w(ord);
+[~,ind]=max(cumsum(w)>=sum(w)/2);
+m = x(ind);
+end
diff --git a/classify/forestApply.m b/classify/forestApply.m
new file mode 100644
index 0000000..e64500c
--- /dev/null
+++ b/classify/forestApply.m
@@ -0,0 +1,40 @@
+function [hs,ps] = forestApply( data, forest, maxDepth, minCount, best )
+% Apply learned forest classifier.
+%
+% USAGE
+%  [hs,ps] = forestApply( data, forest, [maxDepth], [minCount], [best] )
+%
+% INPUTS
+%  data     - [NxF] N length F feature vectors
+%  forest   - learned forest classification model
+%  maxDepth - [] maximum depth of tree
+%  minCount - [] minimum number of data points to allow split
+%  best     - [0] if true use single best prediction per tree
+%
+% OUTPUTS
+%  hs       - [Nx1] predicted output labels
+%  ps       - [NxH] predicted output label probabilities
+%
+% EXAMPLE
+%
+% See also forestTrain
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.24
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+if(nargin<3 || isempty(maxDepth)), maxDepth=0; end
+if(nargin<4 || isempty(minCount)), minCount=0; end
+if(nargin<5 || isempty(best)), best=0; end
+assert(isa(data,'single')); M=length(forest);
+H=size(forest(1).distr,2); N=size(data,1);
+if(best), hs=zeros(N,M); else ps=zeros(N,H); end
+discr=iscell(forest(1).hs); if(discr), best=1; hs=cell(N,M); end
+for i=1:M, tree=forest(i);
+  if(maxDepth>0), tree.child(tree.depth>=maxDepth) = 0; end
+  if(minCount>0), tree.child(tree.count<=minCount) = 0; end
+  ids = forestInds(data,tree.thrs,tree.fids,tree.child);
+  if(best), hs(:,i)=tree.hs(ids); else ps=ps+tree.distr(ids,:); end
+end
+if(discr), ps=[]; return; end % output is actually {NxM} in this case
+if(best), ps=histc(hs',1:H)'; end; [~,hs]=max(ps,[],2); ps=ps/M;
+end
diff --git a/classify/forestFindThr.cpp b/classify/forestFindThr.cpp
new file mode 100644
index 0000000..3767894
--- /dev/null
+++ b/classify/forestFindThr.cpp
@@ -0,0 +1,86 @@
+/*******************************************************************************
+* Piotr's Computer Vision Matlab Toolbox      Version 3.24
+* Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+* Licensed under the Simplified BSD License [see external/bsd.txt]
+*******************************************************************************/
+#include <string.h>
+#include <stdint.h>
+#include <math.h>
+#include <mex.h>
+
+typedef unsigned int uint32;
+#define gini(p) p*p
+#define entropy(p) (-p*flog2(float(p)))
+
+// fast approximate log2(x) from Paul Mineiro <paul@mineiro.com>
+inline float flog2( float x ) {
+  union { float f; uint32_t i; } vx = { x };
+  union { uint32_t i; float f; } mx = { (vx.i & 0x007FFFFF) | 0x3f000000 };
+  float y = float(vx.i); y *= 1.1920928955078125e-7f;
+  return y - 124.22551499f - 1.498030302f * mx.f
+    - 1.72587999f / (0.3520887068f + mx.f);
+}
+
+// perform actual computation
+void forestFindThr( int H, int N, int F, const float *data,
+  const uint32 *hs, const float *ws, const uint32 *order, const int split,
+  uint32 &fid, float &thr, double &gain )
+{
+  double *Wl, *Wr, *W; float *data1; uint32 *order1;
+  int i, j, j1, j2, h; double vBst, vInit, v, w, wl, wr, g, gl, gr;
+  Wl=new double[H]; Wr=new double[H]; W=new double[H];
+  // perform initialization
+  vBst = vInit = 0; g = 0; w = 0; fid = 1; thr = 0;
+  for( i=0; i<H; i++ ) W[i] = 0;
+  for( j=0; j<N; j++ ) { w+=ws[j]; W[hs[j]-1]+=ws[j]; }
+  if( split==0 ) { for( i=0; i<H; i++ ) g+=gini(W[i]); vBst=vInit=(1-g/w/w); }
+  if( split==1 ) { for( i=0; i<H; i++ ) g+=entropy(W[i]); vBst=vInit=g/w; }
+  // loop over features, then thresholds (data is sorted by feature value)
+  for( i=0; i<F; i++ ) {
+    order1=(uint32*) order+i*N; data1=(float*) data+i*size_t(N);
+    for( j=0; j<H; j++ ) { Wl[j]=0; Wr[j]=W[j]; } gl=wl=0; gr=g; wr=w;
+    for( j=0; j<N-1; j++ ) {
+      j1=order1[j]; j2=order1[j+1]; h=hs[j1]-1;
+      if(split==0) {
+        // gini = 1-\sum_h p_h^2; v = gini_l*pl + gini_r*pr
+        wl+=ws[j1]; gl-=gini(Wl[h]); Wl[h]+=ws[j1]; gl+=gini(Wl[h]);
+        wr-=ws[j1]; gr-=gini(Wr[h]); Wr[h]-=ws[j1]; gr+=gini(Wr[h]);
+        v = (wl-gl/wl)/w + (wr-gr/wr)/w;
+      } else if (split==1) {
+        // entropy = -\sum_h p_h log(p_h); v = entropy_l*pl + entropy_r*pr
+        gl+=entropy(wl); wl+=ws[j1]; gl-=entropy(wl);
+        gr+=entropy(wr); wr-=ws[j1]; gr-=entropy(wr);
+        gl-=entropy(Wl[h]); Wl[h]+=ws[j1]; gl+=entropy(Wl[h]);
+        gr-=entropy(Wr[h]); Wr[h]-=ws[j1]; gr+=entropy(Wr[h]);
+        v = gl/w + gr/w;
+      } else if (split==2) {
+        // twoing: v = pl*pr*\sum_h(|p_h_left - p_h_right|)^2 [slow if H>>0]
+        j1=order1[j]; j2=order1[j+1]; h=hs[j1]-1;
+        wl+=ws[j1]; Wl[h]+=ws[j1]; wr-=ws[j1]; Wr[h]-=ws[j1];
+        g=0; for( int h1=0; h1<H; h1++ ) g+=fabs(Wl[h1]/wl-Wr[h1]/wr);
+        v = - wl/w*wr/w*g*g;
+      }
+      if( v<vBst && data1[j2]-data1[j1]>=1e-6f ) {
+        vBst=v; fid=i+1; thr=0.5f*(data1[j1]+data1[j2]); }
+    }
+  }
+  delete [] Wl; delete [] Wr; delete [] W; gain = vInit-vBst;
+}
+
+// [fid,thr,gain] = mexFunction(data,hs,ws,order,H,split);
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
+  int H, N, F, split; float *data, *ws, thr;
+  double gain; uint32 *hs, *order, fid;
+  data = (float*) mxGetData(prhs[0]);
+  hs = (uint32*) mxGetData(prhs[1]);
+  ws = (float*) mxGetData(prhs[2]);
+  order = (uint32*) mxGetData(prhs[3]);
+  H = (int) mxGetScalar(prhs[4]);
+  split = (int) mxGetScalar(prhs[5]);
+  N = (int) mxGetM(prhs[0]);
+  F = (int) mxGetN(prhs[0]);
+  forestFindThr(H,N,F,data,hs,ws,order,split,fid,thr,gain);
+  plhs[0] = mxCreateDoubleScalar(fid);
+  plhs[1] = mxCreateDoubleScalar(thr);
+  plhs[2] = mxCreateDoubleScalar(gain);
+}
diff --git a/classify/forestFindThr.mexa64 b/classify/forestFindThr.mexa64
new file mode 100644
index 0000000..916305f
Binary files /dev/null and b/classify/forestFindThr.mexa64 differ
diff --git a/classify/forestFindThr.mexmaci64 b/classify/forestFindThr.mexmaci64
new file mode 100644
index 0000000..f7728cf
Binary files /dev/null and b/classify/forestFindThr.mexmaci64 differ
diff --git a/demo.m b/demo.m
new file mode 100644
index 0000000..5265daa
--- /dev/null
+++ b/demo.m
@@ -0,0 +1,39 @@
+%
+%  Visual Tracking Using Attention-Modulated Disintegration and Integration
+%
+%  Jongwon Choi, 2016
+%  https://sites.google.com/site/jwchoivision/
+%  contact: jwchoi.pil@gmail.com
+%  
+%
+%  Demo program of SCT4.
+%  You can use this program freely for research and please acknowledge the paper[1].
+%  You should contact to us for any commercial usage.
+%  When you need the program of SCT6, please contact to the authors.
+%
+%  *** Piotr Dollar's toolbox[2] and some codes from Henriques et al.[3] were utilized.
+%
+%  [1] J. Choi, H. J. Chang, J. Jeong, Y. Demiris, J. Y. Choi, "Visual Tracking 
+%      Using Attention-Modulated Disintegration and Integration", CVPR, 2016
+%  [2]  P. Dollar, ��Piotr��s Computer Vision Matlab Toolbox (PMT)��, 
+%      http://vision.ucsd.edu/?pdollar/toolbox/doc/index.html.
+%  [3]  J. F. Henriques, R. Caseiro, P. Martins, and J. Batista, ��HighSpeed Tracking 
+%      with Kernelized Correlation Filters��, IEEE Transactions on PAMI, 2015
+%
+%
+
+addpath('KCF');
+addpath('strong');
+addpath(genpath('PiotrDollarToolbox'));
+
+% Inputs
+base_path = 'Deer'; %dataset path
+show_visualization = 1; %visualization option (0: not visible, 1: visible)
+
+% Load the image data
+[img_files, pos, target_sz, ground_truth, video_path] = load_video_info(base_path);
+
+% Tracking start
+% Position : [left-top-x left-top-y right-bottom-x right-bottom-y]
+% time : computational time in second (without time for image load & visualization)
+[positions, time] = sct4(video_path, img_files, pos, target_sz, show_visualization);
diff --git a/detector/acfDemoCal.m b/detector/acfDemoCal.m
new file mode 100644
index 0000000..77fb885
--- /dev/null
+++ b/detector/acfDemoCal.m
@@ -0,0 +1,49 @@
+% Demo for aggregate channel features object detector on Caltech dataset.
+%
+% See also acfReadme.m
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.40
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+%% extract training and testing images and ground truth
+cd(fileparts(which('acfDemoCal.m'))); dataDir='../../data/Caltech/';
+for s=1:2
+  if(s==1), type='test'; skip=[]; else type='train'; skip=4; end
+  dbInfo(['Usa' type]); if(s==2), type=['train' int2str2(skip,2)]; end
+  if(exist([dataDir type '/annotations'],'dir')), continue; end
+  dbExtract([dataDir type],1,skip);
+end
+
+%% set up opts for training detector (see acfTrain)
+opts=acfTrain(); opts.modelDs=[50 20.5]; opts.modelDsPad=[64 32];
+opts.pPyramid.pChns.pColor.smooth=0; opts.nWeak=[64 256 1024 4096];
+opts.pBoost.pTree.maxDepth=5; opts.pBoost.discrete=0;
+opts.pBoost.pTree.fracFtrs=1/16; opts.nNeg=25000; opts.nAccNeg=50000;
+opts.pPyramid.pChns.pGradHist.softBin=1; opts.pJitter=struct('flip',1);
+opts.posGtDir=[dataDir 'train' int2str2(skip,2) '/annotations'];
+opts.posImgDir=[dataDir 'train' int2str2(skip,2) '/images'];
+opts.pPyramid.pChns.shrink=2; opts.name='models/AcfCaltech+';
+pLoad={'lbls',{'person'},'ilbls',{'people'},'squarify',{3,.41}};
+opts.pLoad = [pLoad 'hRng',[50 inf], 'vRng',[1 1] ];
+
+%% optionally switch to LDCF version of detector (see acfTrain)
+if( 0 ), opts.filters=[5 4]; opts.name='models/LdcfCaltech'; end
+
+%% train detector (see acfTrain)
+detector = acfTrain( opts );
+
+%% modify detector (see acfModify)
+pModify=struct('cascThr',-1,'cascCal',.025);
+detector=acfModify(detector,pModify);
+
+%% run detector on a sample image (see acfDetect)
+imgNms=bbGt('getFiles',{[dataDir 'test/images']});
+I=imread(imgNms{1862}); tic, bbs=acfDetect(I,detector); toc
+figure(1); im(I); bbApply('draw',bbs); pause(.1);
+
+%% test detector and plot roc (see acfTest)
+[~,~,gt,dt]=acfTest('name',opts.name,'imgDir',[dataDir 'test/images'],...
+  'gtDir',[dataDir 'test/annotations'],'pLoad',[pLoad, 'hRng',[50 inf],...
+  'vRng',[.65 1],'xRng',[5 635],'yRng',[5 475]],...
+  'pModify',pModify,'reapply',0,'show',2);
diff --git a/detector/acfDemoInria.m b/detector/acfDemoInria.m
new file mode 100644
index 0000000..7a5d6d0
--- /dev/null
+++ b/detector/acfDemoInria.m
@@ -0,0 +1,61 @@
+% Demo for aggregate channel features object detector on Inria dataset.
+%
+% See also acfReadme.m
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.40
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+%% extract training and testing images and ground truth
+cd(fileparts(which('acfDemoInria.m'))); dataDir='../../data/Inria/';
+for s=1:2, pth=dbInfo('InriaTest');
+  if(s==1), set='00'; type='train'; else set='01'; type='test'; end
+  if(exist([dataDir type '/posGt'],'dir')), continue; end
+  seqIo([pth '/videos/set' set '/V000'],'toImgs',[dataDir type '/pos']);
+  seqIo([pth '/videos/set' set '/V001'],'toImgs',[dataDir type '/neg']);
+  V=vbb('vbbLoad',[pth '/annotations/set' set '/V000']);
+  vbb('vbbToFiles',V,[dataDir type '/posGt']);
+end
+
+%% set up opts for training detector (see acfTrain)
+opts=acfTrain(); opts.modelDs=[100 41]; opts.modelDsPad=[128 64];
+opts.posGtDir=[dataDir 'train/posGt']; opts.nWeak=[32 128 512 2048];
+opts.posImgDir=[dataDir 'train/pos']; opts.pJitter=struct('flip',1);
+opts.negImgDir=[dataDir 'train/neg']; opts.pBoost.pTree.fracFtrs=1/16;
+opts.pLoad={'squarify',{3,.41}}; opts.name='models/AcfInria';
+
+%% optionally switch to LDCF version of detector (see acfTrain)
+if( 0 )
+  opts.filters=[5 4]; opts.pJitter=struct('flip',1,'nTrn',3,'mTrn',1);
+  opts.pBoost.pTree.maxDepth=3; opts.pBoost.discrete=0; opts.seed=2;
+  opts.pPyramid.pChns.shrink=2; opts.name='models/LdcfInria';
+end
+
+%% train detector (see acfTrain)
+detector = acfTrain( opts );
+
+%% modify detector (see acfModify)
+pModify=struct('cascThr',-1,'cascCal',.01);
+detector=acfModify(detector,pModify);
+
+%% run detector on a sample image (see acfDetect)
+imgNms=bbGt('getFiles',{[dataDir 'test/pos']});
+I=imread(imgNms{1}); tic, bbs=acfDetect(I,detector); toc
+figure(1); im(I); bbApply('draw',bbs); pause(.1);
+
+%% test detector and plot roc (see acfTest)
+[miss,~,gt,dt]=acfTest('name',opts.name,'imgDir',[dataDir 'test/pos'],...
+  'gtDir',[dataDir 'test/posGt'],'pLoad',opts.pLoad,...
+  'pModify',pModify,'reapply',0,'show',2);
+
+%% optional timing test for detector (should be ~30 fps)
+if( 0 )
+  detector1=acfModify(detector,'pad',[0 0]); n=60; Is=cell(1,n);
+  for i=1:n, Is{i}=imResample(imread(imgNms{i}),[480 640]); end
+  tic, for i=1:n, acfDetect(Is{i},detector1); end;
+  fprintf('Detector runs at %.2f fps on 640x480 images.\n',n/toc);
+end
+
+%% optionally show top false positives ('type' can be 'fp','fn','tp','dt')
+if( 0 ), bbGt('cropRes',gt,dt,imgNms,'type','fp','n',50,...
+    'show',3,'dims',opts.modelDs([2 1])); end
diff --git a/detector/acfDetect.m b/detector/acfDetect.m
new file mode 100644
index 0000000..a28981a
--- /dev/null
+++ b/detector/acfDetect.m
@@ -0,0 +1,88 @@
+function bbs = acfDetect( I, detector, fileName )
+% Run aggregate channel features object detector on given image(s).
+%
+% The input 'I' can either be a single image (or filename) or a cell array
+% of images (or filenames). In the first case, the return is a set of bbs
+% where each row has the format [x y w h score] and score is the confidence
+% of detection. If the input is a cell array, the output is a cell array
+% where each element is a set of bbs in the form above (in this case a
+% parfor loop is used to speed execution). If 'fileName' is specified, the
+% bbs are saved to a comma separated text file and the output is set to
+% bbs=1. If saving detections for multiple images the output is stored in
+% the format [imgId x y w h score] and imgId is a one-indexed image id.
+%
+% A cell of detectors trained with the same channels can be specified,
+% detected bbs from each detector are concatenated. If using multiple
+% detectors and opts.pNms.separate=1 then each bb has a sixth element
+% bbType=j, where j is the j-th detector, see bbNms.m for details.
+%
+% USAGE
+%  bbs = acfDetect( I, detector, [fileName] )
+%
+% INPUTS
+%  I          - input image(s) of filename(s) of input image(s)
+%  detector   - detector(s) trained via acfTrain
+%  fileName   - [] target filename (if specified return is 1)
+%
+% OUTPUTS
+%  bbs        - [nx5] array of bounding boxes or cell array of bbs
+%
+% EXAMPLE
+%
+% See also acfTrain, acfModify, bbGt>loadAll, bbNms
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.40
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% run detector on every image
+if(nargin<3), fileName=''; end; multiple=iscell(I);
+if(~isempty(fileName) && exist(fileName,'file')), bbs=1; return; end
+if(~multiple), bbs=acfDetectImg(I,detector); else
+  n=length(I); bbs=cell(n,1);
+  parfor i=1:n, bbs{i}=acfDetectImg(I{i},detector); end
+end
+
+% write results to disk if fileName specified
+if(isempty(fileName)), return; end
+d=fileparts(fileName); if(~isempty(d)&&~exist(d,'dir')), mkdir(d); end
+if( multiple ) % add image index to each bb and flatten result
+  for i=1:n, bbs{i}=[ones(size(bbs{i},1),1)*i bbs{i}]; end
+  bbs=cell2mat(bbs);
+end
+dlmwrite(fileName,bbs); bbs=1;
+
+end
+
+function bbs = acfDetectImg( I, detector )
+% Run trained sliding-window object detector on given image.
+Ds=detector; if(~iscell(Ds)), Ds={Ds}; end; nDs=length(Ds);
+opts=Ds{1}.opts; pPyramid=opts.pPyramid; pNms=opts.pNms;
+imreadf=opts.imreadf; imreadp=opts.imreadp;
+shrink=pPyramid.pChns.shrink; pad=pPyramid.pad;
+separate=nDs>1 && isfield(pNms,'separate') && pNms.separate;
+% read image and compute features (including optionally applying filters)
+if(all(ischar(I))), I=feval(imreadf,I,imreadp{:}); end
+P=chnsPyramid(I,pPyramid); bbs=cell(P.nScales,nDs);
+if(isfield(opts,'filters') && ~isempty(opts.filters)), shrink=shrink*2;
+  for i=1:P.nScales, fs=opts.filters; C=repmat(P.data{i},[1 1 size(fs,4)]);
+    for j=1:size(C,3), C(:,:,j)=conv2(C(:,:,j),fs(:,:,j),'same'); end
+    P.data{i}=imResample(C,.5);
+  end
+end
+% apply sliding window classifiers
+for i=1:P.nScales
+  for j=1:nDs, opts=Ds{j}.opts;
+    modelDsPad=opts.modelDsPad; modelDs=opts.modelDs;
+    bb = acfDetect1(P.data{i},Ds{j}.clf,shrink,...
+      modelDsPad(1),modelDsPad(2),opts.stride,opts.cascThr);
+    shift=(modelDsPad-modelDs)/2-pad;
+    bb(:,1)=(bb(:,1)+shift(2))/P.scaleshw(i,2);
+    bb(:,2)=(bb(:,2)+shift(1))/P.scaleshw(i,1);
+    bb(:,3)=modelDs(2)/P.scales(i);
+    bb(:,4)=modelDs(1)/P.scales(i);
+    if(separate), bb(:,6)=j; end; bbs{i,j}=bb;
+  end
+end; bbs=cat(1,bbs{:});
+if(~isempty(pNms)), bbs=bbNms(bbs,pNms); end
+end
diff --git a/detector/acfModify.m b/detector/acfModify.m
new file mode 100644
index 0000000..43d628f
--- /dev/null
+++ b/detector/acfModify.m
@@ -0,0 +1,89 @@
+function detector = acfModify( detector, varargin )
+% Modify aggregate channel features object detector.
+%
+% Takes an object detector trained by acfTrain() and modifies it. Only
+% certain modifications are allowed to the detector and the detector should
+% never be modified directly (this may cause the detector to be invalid and
+% cause segmentation faults). Any valid modification to a detector after it
+% is trained should be performed using acfModify().
+%
+% The parameters 'nPerOct', 'nOctUp', 'nApprox', 'lambdas', 'pad', 'minDs'
+% modify the channel feature pyramid created (see help of chnsPyramid.m for
+% more details) and primarily control the scales used. The parameters
+% 'pNms', 'stride', 'cascThr' and 'cascCal' modify the detector behavior
+% (see help of acfTrain.m for more details). Finally, 'rescale' can be
+% used to rescale the trained detector (this change is irreversible).
+%
+% USAGE
+%  detector = acfModify( detector, pModify )
+%
+% INPUTS
+%  detector   - detector trained via acfTrain
+%  pModify    - parameters (struct or name/value pairs)
+%   .nPerOct    - [] number of scales per octave
+%   .nOctUp     - [] number of upsampled octaves to compute
+%   .nApprox    - [] number of approx. scales to use
+%   .lambdas    - [] coefficients for power law scaling (see BMVC10)
+%   .pad        - [] amount to pad channels (along T/B and L/R)
+%   .minDs      - [] minimum image size for channel computation
+%   .pNms       - [] params for non-maximal suppression (see bbNms.m)
+%   .stride     - [] spatial stride between detection windows
+%   .cascThr    - [] constant cascade threshold (affects speed/accuracy)
+%   .cascCal    - [] cascade calibration (affects speed/accuracy)
+%   .rescale    - [] rescale entire detector by given ratio
+%
+% OUTPUTS
+%  detector   - modified object detector
+%
+% EXAMPLE
+%
+% See also chnsPyramid, bbNms, acfTrain, acfDetect
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.20
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% get parameters (and copy to detector and pPyramid structs)
+opts=detector.opts; p=opts.pPyramid;
+dfs={ 'nPerOct',p.nPerOct, 'nOctUp',p.nOctUp, 'nApprox',p.nApprox, ...
+  'lambdas',p.lambdas, 'pad',p.pad, 'minDs',p.minDs, 'pNms',opts.pNms, ...
+  'stride',opts.stride,'cascThr',opts.cascThr,'cascCal',0,'rescale',1 };
+[p.nPerOct,p.nOctUp,p.nApprox,p.lambdas,p.pad,p.minDs,opts.pNms,...
+  opts.stride,opts.cascThr,cascCal,rescale] = getPrmDflt(varargin,dfs,1);
+
+% finalize pPyramid and opts
+p.complete=0; p.pChns.complete=0; p=chnsPyramid([],p); p=p.pPyramid;
+p.complete=1; p.pChns.complete=1; shrink=p.pChns.shrink;
+opts.stride=max(1,round(opts.stride/shrink))*shrink;
+opts.pPyramid=p; detector.opts=opts;
+
+% calibrate and rescale detector
+detector.clf.hs = detector.clf.hs+cascCal;
+if(rescale~=1), detector=detectorRescale(detector,rescale); end
+
+end
+
+function detector = detectorRescale( detector, rescale )
+% Rescale detector by ratio rescale.
+opts=detector.opts; shrink=opts.pPyramid.pChns.shrink;
+bh=opts.modelDsPad(1)/shrink; bw=opts.modelDsPad(2)/shrink;
+opts.stride=max(1,round(opts.stride*rescale/shrink))*shrink;
+modelDsPad=round(opts.modelDsPad*rescale/shrink)*shrink;
+rescale=modelDsPad./opts.modelDsPad; opts.modelDsPad=modelDsPad;
+opts.modelDs=round(opts.modelDs.*rescale); detector.opts=opts;
+bh1=opts.modelDsPad(1)/shrink; bw1=opts.modelDsPad(2)/shrink;
+% move 0-indexed (x,y) location of each lookup feature
+clf=detector.clf; fids=clf.fids; is=find(clf.child>0);
+fids=double(fids(is)); n=length(fids); loc=zeros(n,3);
+loc(:,3)=floor(fids/bh/bw); fids=fids-loc(:,3)*bh*bw;
+loc(:,2)=floor(fids/bh); fids=fids-loc(:,2)*bh; loc(:,1)=fids;
+loc(:,1)=min(bh1-1,round(loc(:,1)*rescale(1)));
+loc(:,2)=min(bw1-1,round(loc(:,2)*rescale(2)));
+fids = loc(:,3)*bh1*bw1 + loc(:,2)*bh1 + loc(:,1);
+clf.fids(is)=int32(fids);
+% rescale thrs for all features (fpdw trick)
+nChns=[detector.info.nChns]; assert(max(loc(:,3))<sum(nChns));
+k=[]; for i=1:length(nChns), k=[k ones(1,nChns(i))*i]; end %#ok<AGROW>
+lambdas=opts.pPyramid.lambdas; lambdas=sqrt(prod(rescale)).^-lambdas(k);
+clf.thrs(is)=clf.thrs(is).*lambdas(loc(:,3)+1)'; detector.clf=clf;
+end
diff --git a/detector/acfReadme.m b/detector/acfReadme.m
new file mode 100644
index 0000000..23dbe9a
--- /dev/null
+++ b/detector/acfReadme.m
@@ -0,0 +1,109 @@
+% Aggregate Channel Features Detector Overview.
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.40
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+%
+% %%%%%%%%%%%%%%%%%%%%%%%%%%%% 1. Introduction. %%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% The detector portion of this toolbox implements the Aggregate Channel
+% Features (ACF) object detection code. The ACF detector is a fast and
+% effective sliding window detector (30 fps on a single core). It is an
+% evolution of the Viola & Jones (VJ) detector but with an ~1000 fold
+% decrease in false positives (at the same detection rate). ACF is best
+% suited for quasi-rigid object detection (e.g. faces, pedestrians, cars).
+%
+% The detection code was written by Piotr Doll�r with contributions by Ron
+% Appel and Woonhyun Nam (with bug reports/suggestions from many others).
+%
+% %%%%%%%%%%%%%%%%%%%%%%%%%%%% 2. Papers. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% The detector was introduced and described through the following papers:
+%  [1] P. Doll�r, Z. Tu, P. Perona and S. Belongie
+%   "Integral Channel Features", BMVC 2009.
+%  [2] P. Doll�r, S. Belongie and P. Perona
+%   "The Fastest Pedestrian Detector in the West," BMVC 2010.
+%  [3] P. Doll�r, R. Appel and W. Kienzle
+%   "Crosstalk Cascades for Frame-Rate Pedestrian Detection," ECCV 2012.
+%  [4] P. Doll�r, R. Appel, S. Belongie and P. Perona
+%   "Fast Feature Pyramids for Object Detection," PAMI 2014.
+%  [5] W. Nam, P. Doll�r, and J.H. Han
+%   "Local Decorrelation For Improved Pedestrian Detection," NIPS 2014.
+% Please see: http://vision.ucsd.edu/~pdollar/research.html#ObjectDetection
+%
+% A short summary of the papers, organized by detector name:
+%
+% [1] "Integral Channel Features" [ICF] - Introduced channel features and
+% modified the VJ framework to compute integral images (and Haar wavelets)
+% over the channels. Substantially outperformed HOG and at faster speeds.
+%
+% [2] "Fastest Pedestrian Detector in the West" [FPDW] - We observed that
+% features computed at one scale can be used to approximate features at
+% nearby scales, increasing detector speed with little loss in accuracy.
+%
+% [3] "Crosstalk Cascades" - This work coupled cascade evaluation at nearby
+% positions and scales to exploit correlations in detector responses at
+% neighboring locations. Further increased speed of the ICF detector.
+%
+% [4] "Aggregate Channel Features" [ACF] - We found that single-scale
+% square Haar wavelets were sufficient in the ICF framework. Thus instead
+% of computing integral images and Haar wavelets, we simply smooth and
+% downsample the channels and the features are now single pixel lookups in
+% the "aggregated" channels.
+%
+% [5] "Locally Decorralated Channel Features" [LDCF] - Filtering the
+% channel features with appropriate data-derived filters can remove local
+% correlations from the channels. Given decorrelated features, boosted
+% decision trees generalize much better giving a nice boost in accuracy.
+%
+% This code implements ACF [4] and LDCF [5]. It does not implement ICF [1]
+% or FPDW [2] which are now obsolete and supplemented by ACF. Crosstalk
+% cascades [3] are also not used as classifier evalution in ACF is very
+% fast (no need to compute Haar wavelets). However, ACF does use the simple
+% but highly effective "constant soft cascades" from [3].
+%
+% Please cite a subset of the above papers as appropriate if you end up
+% using this code to support a publication. Thanks!
+%
+% %%%%%%%%%%%%%%%%%%%%%%%%%%%% 3. Setup. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%
+% (A) Please install and setup the toolbox as described online:
+%  http://vision.ucsd.edu/~pdollar/toolbox/doc/index.html
+% You may need to recompile for your system, see toolboxCompile. Note:
+% enabling OpenMP during compile will significantly speed training.
+%
+% (B) Important: to train the detectors and run the detection demos you
+% need to install the Caltech Pedestrian Detection Benchmark available at:
+%  http://www.vision.caltech.edu/Image_Datasets/CaltechPedestrians/
+% In particular, make sure to download and install:
+%  (B1) Matlab evaluation/labeling code version 3.2.1 or later
+%  (B2) INRIA data (necessary for the INRIA demo)
+%  (B3) Caltech-USA data (necessary for the Caltech demo)
+% Please follow the instruction in the readme of the Caltech code. You only
+% need to download the data and code and place appropriately, there is no
+% need to look closely at the evaluation code. Initially running the demos
+% (acfDemoInria and acfDemoCal) will convert the data from the Caltech data
+% format to a format useable by ACF. If this step fails it means the
+% Caltech code or data is not properly setup.
+%
+% %%%%%%%%%%%%%%%%%%%%%%%%%%%% 4. Getting Started. %%%%%%%%%%%%%%%%%%%%%%%%
+%
+% After performing the setup, see acfDemoInria.m and acfDemoCal.m for demos
+% and visualizations.
+%
+% For an overview of available functionality please see detector/Contents.m
+% and channels/Contents.m. The various detector/acf*.m and channels/chns*.m
+% functions are well documented and worth checking for additional details.
+%
+% Finally, a note about pre-trained models. The detector/models/ directory
+% contains four pre-trained pedestrian models (ACF/LDCF on INRIA/Caltech).
+% Running acfDemoInria/Cal.m with the ACF/LDCF flag toggled gives rise to
+% these models (just delete the existing models to retrain from scratch).
+% Note, however, that results will differ by up to +/-2% MR depending on
+% operating system and random seed (see opts.seed), and the models here are
+% not exactly equivalent to the models in the papers (due to evolution of
+% the code). Small changes in MR should not be considered significant (nor
+% should they be used as a basis for publishing). Whenever making a change
+% I suggest training/testing the same model with multiple random seeds.
+%
+% Enjoy and I hope you find the detectors useful :)
diff --git a/detector/acfSweeps.m b/detector/acfSweeps.m
new file mode 100644
index 0000000..5c2edfa
--- /dev/null
+++ b/detector/acfSweeps.m
@@ -0,0 +1,262 @@
+function acfSweeps
+% Parameter sweeps for ACF pedestrian detector.
+%
+% Running the parameter sweeps requires altering internal flags.
+% The sweeps are not well documented, use at your own discretion.
+%
+% Piotr's Computer Vision Matlab Toolbox      Version NEW
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% specify type and location of cluster (see fevalDistr.m)
+rtDir=[fileparts(fileparts(fileparts(mfilename('fullpath')))) '/data/'];
+pDistr={'type','parfor'}; if(0), matlabpool('open',11); end
+
+% define all parameter sweeps
+expNms = {'FtrsColorSpace','FtrsChnTypes','FtrsGradColorChn',...
+  'FtrsGradNormRad','FtrsGradNormConst','FtrsGradOrients',...
+  'FtrsGradSoftBins','FtrsSmoothIm','FtrsSmoothChns','FtrsShrink',...
+  'DetModelDs','DetModelDsPad','DetStride','DetNumOctaves',...
+  'DetNumApprox','DetLambda','DetCascThr','DetCascCal','DetNmsThr',...
+  'TrnNumWeak','TrnNumBoot','TrnDepth','TrnNumBins','TrnFracFtrs',...
+  'DataNumPos','DataNumNeg','DataNumNegAcc','DataNumNegPer',...
+  'DataNumPosStump','DataJitterTran','DataJitterRot'};
+expNms=expNms(:); T = 10;
+[opts,lgd,lbl]=createExp(rtDir,expNms);
+
+% run training and testing jobs
+[jobsTrn,jobsTst] = createJobs( rtDir, opts, T ); N=length(expNms);
+fprintf('nTrain = %i; nTest = %i\n',length(jobsTrn),length(jobsTst));
+tic, s=fevalDistr('acfTrain',jobsTrn,pDistr); assert(s==1); toc
+tic, s=fevalDistr('acfTest',jobsTst,pDistr); assert(s==1); toc
+
+% create plots for all experiments
+for e=1:N, plotExps(rtDir,expNms{e},opts{e},lgd{e},lbl{e},T); end
+
+end
+
+function plotExps( rtDir, expNm, opts, lgd, lbl, T )
+% data location and parameters for plotting
+plDir=[rtDir 'sweeps/plots/']; if(~exist(plDir,'dir')), mkdir(plDir); end
+diary([plDir 'sweeps.txt']); disp([expNm ' [' lbl ']']); N=length(lgd);
+pLoad=struct('squarify',{{3,.41}},'hRng',[0 inf]);
+pTest=struct('name','', 'imgDir',[rtDir 'Inria/test/pos'],...
+  'gtDir',[rtDir 'Inria/test/posGt'], 'pLoad',pLoad);
+pTest=repmat(pTest,N,T); for e=1:N, for t=1:T,
+    pTest(e,t).name=[opts(e).name 'T' int2str2(t,2)]; end; end
+% get all miss rates and display error
+miss=zeros(N,T); parfor e=1:N*T, miss(e)=acfTest(pTest(e)); end
+stds=std(miss,0,2); R=mean(miss,2); msg=' %.2f +/- %.2f  [%s]\n';
+for e=1:N, fprintf(msg,R(e)*100,stds(e)*100,lgd{e}); end
+% plot sweeps
+figPrp = {'Units','Pixels','Position',[800 600 800 400]};
+figure(1); clf; set(1,figPrp{:}); set(gca,'FontSize',24); clr=[0 .69 .94];
+pPl1={'LineWidth',3,'MarkerSize',15,'Color',clr,'MarkerFaceColor',clr};
+pPl2=pPl1; clr=[1 .75 0]; pPl2{6}=clr; pPl2{8}=clr;
+for e=1:N, if(lgd{e}(end)=='*'), def=e; end; end; lgd{def}(end)=[];
+plot(R,'-d',pPl1{:}); hold on; plot(def,R(def),'d',pPl2{:}); e=.001;
+ylabel('MR'); axis([.5 N+.5 min([R; .15]) max([R; .3])+e]);
+if(isempty(lbl)), imLabel(lgd,'bottom',30,{'FontSize',24}); lgd=[]; end
+xlabel(lbl); set(gca,'XTick',1:N,'XTickLabel',lgd);
+% save plot
+fFig=[plDir expNm]; diary('off');
+for t=1:25, try savefig(fFig,1,'png'); break; catch, pause(1), end; end
+end
+
+function [jobsTrn,jobsTst] = createJobs( rtDir, opts, T )
+% Prepare all jobs (one train and one test job per set of opts).
+opts=[opts{:}]; N=length(opts); NT=N*T;
+opts=repmat(opts,1,T); nms=cell(1,NT);
+jobsTrn=cell(1,NT); doneTrn=zeros(1,NT);
+jobsTst=cell(1,NT); doneTst=zeros(1,NT);
+pLoad=struct('squarify',{{3,.41}},'hRng',[0 inf]);
+pTest=struct('name','', 'imgDir',[rtDir 'Inria/test/pos'],...
+  'gtDir',[rtDir 'Inria/test/posGt'], 'pLoad',pLoad);
+for e=1:NT
+  t=ceil(e/N); opts(e).seed=(t-1)*100000+1;
+  nm=[opts(e).name 'T' int2str2(t,2)];
+  opts(e).name=nm; pTest.name=nm; nms{e}=nm;
+  doneTrn(e)=exist([nm 'Detector.mat'],'file')==2; jobsTrn{e}={opts(e)};
+  doneTst(e)=exist([nm 'Dets.txt'],'file')==2; jobsTst{e}={pTest};
+end
+[~,kp]=unique(nms,'stable');
+doneTrn=doneTrn(kp); jobsTrn=jobsTrn(kp); jobsTrn=jobsTrn(~doneTrn);
+doneTst=doneTst(kp); jobsTst=jobsTst(kp); jobsTst=jobsTst(~doneTst);
+end
+
+function [opts,lgd,lbl] = createExp( rtDir, expNm )
+
+% if expNm is a cell, call recursively and return
+if( iscell(expNm) )
+  N=length(expNm); opts=cell(1,N); lgd=cell(1,N); lbl=lgd;
+  for e=1:N, [opts{e},lgd{e},lbl{e}]=createExp(rtDir,expNm{e}); end; return
+end
+
+% default params for detectorTrain.m
+dataDir=[rtDir 'Inria/'];
+opts=acfTrain(); opts.modelDs=[100 41]; opts.modelDsPad=[128 64];
+opts.posGtDir=[dataDir 'train/posGt']; opts.nWeak=[32 128 512 2048];
+opts.posImgDir=[dataDir 'train/pos']; opts.pJitter=struct('flip',1);
+opts.negImgDir=[dataDir 'train/neg']; opts.pBoost.pTree.fracFtrs=1/16;
+if(~exist([rtDir 'sweeps/res/'],'dir')), mkdir([rtDir 'sweeps/res/']); end
+opts.pBoost.pTree.nThreads=1;
+
+% setup experiments (N sets of params)
+optsDefault=opts; N=100; lgd=cell(1,N); ss=lgd; lbl=''; O=ones(1,N);
+pChns=opts.pPyramid.pChns(O); pPyramid=opts.pPyramid(O); opts=opts(O);
+switch expNm
+  case 'FtrsColorSpace'
+    N=8; clrs={'Gray','rgb','hsv','luv'};
+    for e=1:N, pChns(e).pColor.colorSpace=clrs{mod(e-1,4)+1}; end
+    for e=5:N, pChns(e).pGradMag.enabled=0; end
+    for e=5:N, pChns(e).pGradHist.enabled=0; end
+    ss=[clrs clrs]; for e=1:4, ss{e}=[ss{e} '+G+H']; end
+    ss=upper(ss); lgd=ss;
+  case 'FtrsChnTypes'
+    nms={'LUV+','G+','H+'}; N=7;
+    for e=1:N
+      en=false(1,3); for i=1:3, en(i)=bitget(uint8(e),i); end
+      pChns(e).pColor.enabled=en(1); pChns(e).pGradMag.enabled=en(2);
+      pChns(e).pGradHist.enabled=en(3);
+      nm=[nms{en}]; nm=nm(1:end-1); lgd{e}=nm; ss{e}=nm;
+    end
+  case 'FtrsGradColorChn'
+    lbl='gradient color channel';
+    N=4; ss={'Max','L','U','V'}; lgd=ss;
+    for e=1:N, pChns(e).pGradMag.colorChn=e-1; end
+  case 'FtrsGradNormRad'
+    lbl='norm radius';
+    vs=[0 1 2 5 10]; N=length(vs);
+    for e=1:N, pChns(e).pGradMag.normRad=vs(e); end
+  case 'FtrsGradNormConst'
+    lbl='norm constant x 10^3';
+    vs=[1 2 5 10 20 50 100]; N=length(vs);
+    for e=1:N, pChns(e).pGradMag.normConst=vs(e)/1000; end
+  case 'FtrsGradOrients'
+    lbl='# orientations';
+    vs=[2 4 6 8 10 12]; N=length(vs);
+    for e=1:N, pChns(e).pGradHist.nOrients=vs(e); end
+  case 'FtrsGradSoftBins'
+    lbl='use soft bins';
+    vs=[0 1]; N=length(vs);
+    for e=1:N, pChns(e).pGradHist.softBin=vs(e); end
+  case 'FtrsSmoothIm'
+    lbl='image smooth radius';
+    vs=[0 50 100 200]; N=length(vs);
+    for e=1:N, pChns(e).pColor.smooth=vs(e)/100; end
+    for e=1:N, lgd{e}=num2str(vs(e)/100); end
+  case 'FtrsSmoothChns'
+    lbl='channel smooth radius';
+    vs=[0 50 100 200]; N=length(vs);
+    for e=1:N, pPyramid(e).smooth=vs(e)/100; end
+    for e=1:N, lgd{e}=num2str(vs(e)/100); end
+  case 'FtrsShrink'
+    lbl='channel shrink';
+    vs=2.^(1:4); N=length(vs);
+    for e=1:N, pChns(e).shrink=vs(e); end
+  case 'DetModelDs'
+    lbl='model height';
+    rs=1.1.^(-2:2); vs=round(100*rs); ws=round(41*rs); N=length(vs);
+    for e=1:N, opts(e).modelDs=[vs(e) ws(e)]; end
+    for e=1:N, opts(e).modelDsPad=opts(e).modelDs+[28 23]; end
+  case 'DetModelDsPad'
+    lbl='padded model height';
+    rs=1.1.^(-2:2); vs=round(128*rs); ws=round(64*rs); N=length(vs);
+    for e=1:N, opts(e).modelDsPad=[vs(e) ws(e)]; end
+  case 'DetStride'
+    lbl='detector stride';
+    vs=4:4:16; N=length(vs);
+    for e=1:N, opts(e).stride=vs(e); end
+  case 'DetNumOctaves'
+    lbl='# scales per octave';
+    vs=2.^(0:5); N=length(vs);
+    for e=1:N, pPyramid(e).nPerOct=vs(e); pPyramid(e).nApprox=vs(e)-1; end
+  case 'DetNumApprox'
+    lbl='# approx scales';
+    vs=2.^(0:5)-1; N=length(vs);
+    for e=1:N, pPyramid(e).nApprox=vs(e); end
+  case 'DetLambda'
+    lbl='lambda x 100';
+    vs=-45:15:70; N=length(vs);
+    for e=[1:4 6:N], pPyramid(e).lambdas=[0 vs(e) vs(e)]/100; end
+    for e=1:N, lgd{e}=int2str(vs(e)); end; vs=vs+100;
+  case 'DetCascThr'
+    lbl='cascade threshold';
+    vs=[-.5 -1 -2 -5 -10]; N=length(vs);
+    for e=1:N, opts(e).cascThr=vs(e); end
+    for e=1:N, lgd{e}=num2str(vs(e)); end; vs=vs*-10;
+  case 'DetCascCal'
+    lbl='cascade offset x 10^4';
+    vs=[5 10 20 50 100 200 500]; N=length(vs);
+    for e=1:N, opts(e).cascCal=vs(e)/1e4; end
+  case 'DetNmsThr'
+    lbl='nms overlap';
+    vs=25:10:95; N=length(vs);
+    for e=1:N, opts(e).pNms.overlap=vs(e)/1e2; end
+    for e=1:N, lgd{e}=['.' num2str(vs(e))]; end
+  case 'TrnNumWeak'
+    lbl='# decision trees / x';
+    vs=2.^(0:3); N=length(vs);
+    for e=1:N, opts(e).nWeak=opts(e).nWeak/vs(e); end
+  case 'TrnNumBoot'
+    lbl='bootstrap schedule';
+    vs={5:1:11,5:2:11,3:1:11,3:2:11}; N=length(vs);
+    ss={'5-1-11','5-2-11','3-1-11','3-2-11'}; lgd=ss;
+    for e=1:N, opts(e).nWeak=2.^vs{e}; end
+  case 'TrnDepth'
+    lbl='tree depth';
+    vs=1:5; N=length(vs);
+    for e=1:N, opts(e).pBoost.pTree.maxDepth=vs(e); end
+  case 'TrnNumBins'
+    lbl='# bins';
+    vs=2.^(4:8); N=length(vs);
+    for e=1:N, opts(e).pBoost.pTree.nBins=vs(e); end
+  case 'TrnFracFtrs'
+    lbl='fraction features';
+    vs=2.^(1:8); N=length(vs);
+    for e=1:N, opts(e).pBoost.pTree.fracFtrs=1/vs(e); end
+  case 'DataNumPos'
+    lbl='# pos examples';
+    vs=[2.^(6:9) inf]; N=length(vs);
+    for e=1:N-1, opts(e).nPos=vs(e); end
+  case 'DataNumNeg'
+    lbl='# neg examples';
+    vs=[5 10 25 50 100 250]*100; N=length(vs);
+    for e=1:N, opts(e).nNeg=vs(e); end
+  case 'DataNumNegAcc'
+    lbl='# neg examples total';
+    vs=[25 50 100 250 500]*100; N=length(vs);
+    for e=1:N, opts(e).nAccNeg=vs(e); end
+  case 'DataNumNegPer'
+    lbl='# neg example / image';
+    vs=[5 10 25 50 100]; N=length(vs);
+    for e=1:N, opts(e).nPerNeg=vs(e); end
+  case 'DataNumPosStump'
+    lbl='# pos examples (stumps)';
+    vs=[2.^(6:9) 1237 1237]; N=length(vs); lgd{N}='1237*';
+    for e=1:N-1, opts(e).nPos=vs(e); opts(e).pBoost.pTree.maxDepth=1; end
+  case 'DataJitterTran'
+    lbl='translational jitter';
+    vs=[0 1 2 4]; N=length(vs); opts(1).pJitter=struct('flip',1);
+    for e=2:N, opts(e).pJitter=struct('flip',1,'nTrn',3,'mTrn',vs(e)); end
+    for e=1:N, lgd{e}=['+/-' int2str(vs(e))]; end
+  case 'DataJitterRot'
+    lbl='rotational jitter';
+    vs=[0 2 4 8]; N=length(vs);
+    for e=2:N, opts(e).pJitter=struct('flip',1,'nPhi',3,'mPhi',vs(e)); end
+    for e=1:N, lgd{e}=['+/-' int2str(vs(e))]; end
+  otherwise, error('invalid exp: %s',expNm);
+end
+
+% produce final set of opts and find default opts
+for e=1:N, if(isempty(lgd{e})), lgd{e}=int2str(vs(e)); end; end
+for e=1:N, if(isempty(ss{e})), ss{e}=int2str2(vs(e),5); end; end
+O=1:N; opts=opts(O); lgd=lgd(O); ss=ss(O); d=0;
+for e=1:N, pPyramid(e).pChns=pChns(e); opts(e).pPyramid=pPyramid(e); end
+for e=1:N, if(isequal(optsDefault,opts(e))), d=e; break; end; end
+if(d==0), disp(expNm); assert(false); end
+for e=1:N, opts(e).name=[rtDir 'sweeps/res/' expNm ss{e}]; end
+lgd{d}=[lgd{d} '*']; opts(d).name=[rtDir 'sweeps/res/Default'];
+if(0), disp([ss' lgd']'); end
+
+end
diff --git a/detector/acfTest.m b/detector/acfTest.m
new file mode 100644
index 0000000..977a5db
--- /dev/null
+++ b/detector/acfTest.m
@@ -0,0 +1,66 @@
+function [miss,roc,gt,dt] = acfTest( varargin )
+% Test aggregate channel features object detector given ground truth.
+%
+% USAGE
+%  [miss,roc,gt,dt] = acfTest( pTest )
+%
+% INPUTS
+%  pTest    - parameters (struct or name/value pairs)
+%   .name     - ['REQ'] detector name
+%   .imgDir   - ['REQ'] dir containing test images
+%   .gtDir    - ['REQ'] dir containing test ground truth
+%   .pLoad    - [] params for bbGt>bbLoad for test data (see bbGt>bbLoad)
+%   .pModify  - [] params for acfModify for modifying detector
+%   .thr      - [.5] threshold on overlap area for comparing two bbs
+%   .mul      - [0] if true allow multiple matches to each gt
+%   .reapply  - [0] if true re-apply detector even if bbs already computed
+%   .ref      - [10.^(-2:.25:0)] reference points (see bbGt>compRoc)
+%   .lims     - [3.1e-3 1e1 .05 1] plot axis limits
+%   .show     - [0] optional figure number for display
+%
+% OUTPUTS
+%  miss     - log-average miss rate computed at reference points
+%  roc      - [nx3] n data points along roc of form [score fp tp]
+%  gt       - [mx5] ground truth results [x y w h match] (see bbGt>evalRes)
+%  dt       - [nx6] detect results [x y w h score match] (see bbGt>evalRes)
+%
+% EXAMPLE
+%
+% See also acfTrain, acfDetect, acfModify, acfDemoInria, bbGt
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.40
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% get parameters
+dfs={ 'name','REQ', 'imgDir','REQ', 'gtDir','REQ', 'pLoad',[], ...
+  'pModify',[], 'thr',.5,'mul',0, 'reapply',0, 'ref',10.^(-2:.25:0), ...
+  'lims',[3.1e-3 1e1 .05 1], 'show',0 };
+[name,imgDir,gtDir,pLoad,pModify,thr,mul,reapply,ref,lims,show] = ...
+  getPrmDflt(varargin,dfs,1);
+
+% run detector on directory of images
+bbsNm=[name 'Dets.txt'];
+if(reapply && exist(bbsNm,'file')), delete(bbsNm); end
+if(reapply || ~exist(bbsNm,'file'))
+  detector = load([name 'Detector.mat']);
+  detector = detector.detector;
+  if(~isempty(pModify)), detector=acfModify(detector,pModify); end
+  imgNms = bbGt('getFiles',{imgDir});
+  acfDetect( imgNms, detector, bbsNm );
+end
+
+% run evaluation using bbGt
+[gt,dt] = bbGt('loadAll',gtDir,bbsNm,pLoad);
+[gt,dt] = bbGt('evalRes',gt,dt,thr,mul);
+[fp,tp,score,miss] = bbGt('compRoc',gt,dt,1,ref);
+miss=exp(mean(log(max(1e-10,1-miss)))); roc=[score fp tp];
+
+% optionally plot roc
+if( ~show ), return; end
+figure(show); plotRoc([fp tp],'logx',1,'logy',1,'xLbl','fppi',...
+  'lims',lims,'color','g','smooth',1,'fpTarget',ref);
+title(sprintf('log-average miss rate = %.2f%%',miss*100));
+savefig([name 'Roc'],show,'png');
+
+end
diff --git a/detector/acfTrain.m b/detector/acfTrain.m
new file mode 100644
index 0000000..12610b3
--- /dev/null
+++ b/detector/acfTrain.m
@@ -0,0 +1,345 @@
+function detector = acfTrain( varargin )
+% Train aggregate channel features object detector.
+%
+% Train aggregate channel features (ACF) object detector as described in:
+%  P. Doll�r, R. Appel, S. Belongie and P. Perona
+%   "Fast Feature Pyramids for Object Detection", PAMI 2014.
+% The ACF detector is fast (30 fps on a single core) and achieves top
+% accuracy on rigid object detection. Please see acfReadme.m for details.
+%
+% Takes a set of parameters opts (described in detail below) and trains a
+% detector from start to finish including performing multiple rounds of
+% bootstrapping if need be. The return is a struct 'detector' for use with
+% acfDetect.m which fully defines a sliding window detector. Training is
+% fast (on the INRIA pedestrian dataset training takes ~10 minutes on a
+% single core or ~3m using four cores). Taking advantage of parallel
+% training requires launching matlabpool (see help for matlabpool). The
+% trained detector may be altered in certain ways via acfModify(). Calling
+% opts=acfTrain() returns all default options.
+%
+% (1) Specifying features and model: The channel features are defined by
+% 'pPyramid'. See chnsCompute.m and chnsPyramid.m for more details. The
+% channels may be convolved by a set 'filters' to remove local correlations
+% (see our NIPS14 paper on LDCF), improving accuracy but slowing detection.
+% If 'filters'=[wFilter,nFilter] these are automatically computed. The
+% model dimensions ('modelDs') define the window height and width. The
+% padded dimensions ('modelDsPad') define the extended region around object
+% candidates that are used for classification. For example, for 100 pixel
+% tall pedestrians, typically a 128 pixel tall region is used to make a
+% decision. 'pNms' controls non-maximal suppression (see bbNms.m), 'stride'
+% controls the window stride, and 'cascThr' and 'cascCal' are the threshold
+% and calibration used for the constant soft cascades. Typically, set
+% 'cascThr' to -1 and adjust 'cascCal' until the desired recall is reached
+% (setting 'cascCal' shifts the final scores output by the detector by the
+% given amount). Training alternates between sampling (bootstrapping) and
+% training an AdaBoost classifier (clf). 'nWeak' determines the number of
+% training stages and number of trees after each stage, e.g. nWeak=[32 128
+% 512 2048] defines four stages with the final clf having 2048 trees.
+% 'pBoost' specifies parameters for AdaBoost, and 'pBoost.pTree' are the
+% decision tree parameters, see adaBoostTrain.m for details. Finally,
+% 'seed' is the random seed used and makes results reproducible and 'name'
+% defines the location for storing the detector and log file.
+%
+% (2) Specifying training data location and amount: The training data can
+% take on a number of different forms. The positives can be specified using
+% either a dir of pre-cropped windows ('posWinDir') or dirs of full images
+% ('posImgDir') and ground truth labels ('posGtDir'). The negatives can by
+% specified using a dir of pre-cropped windows ('negWinDir'), a dir of full
+% images without any positives and from which negatives can be sampled
+% ('negImgDir'), and finally if neither 'negWinDir' or 'negImgDir' are
+% given negatives are sampled from the images in 'posImgDir' (avoiding the
+% positives). For the pre-cropped windows all images must have size at
+% least modelDsPad and have the object (of size exactly modelDs) centered.
+% 'imreadf' can be used to specify a custom function for loading an image,
+% and 'imreadp' are custom additional parameters to imreadf. When sampling
+% from full images, 'pLoad' determines how the ground truth is loaded and
+% converted to a set of positive bbs (see bbGt>bbLoad). 'nPos' controls the
+% total number of positives to sample for training (if nPos=inf the number
+% of positives is limited by the training set). 'nNeg' controls the total
+% number of negatives to sample and 'nPerNeg' limits the number of
+% negatives to sample per image. 'nAccNeg' controls the maximum number of
+% negatives that can accumulate over multiple stages of bootstrapping.
+% Define 'pJitter' to jitter the positives (see jitterImage.m) and thus
+% artificially increase the number of positive training windows. Finally if
+% 'winsSave' is true cropped windows are saved to disk as a mat file.
+%
+% USAGE
+%  detector = acfTrain( opts )
+%  opts = acfTrain()
+%
+% INPUTS
+%  opts       - parameters (struct or name/value pairs)
+%   (1) features and model:
+%   .pPyramid   - [{}] params for creating pyramid (see chnsPyramid)
+%   .filters    - [] [wxwxnChnsxnFilter] filters or [wFilter,nFilter]
+%   .modelDs    - [] model height+width without padding (eg [100 41])
+%   .modelDsPad - [] model height+width with padding (eg [128 64])
+%   .pNms       - [..] params for non-maximal suppression (see bbNms.m)
+%   .stride     - [4] spatial stride between detection windows
+%   .cascThr    - [-1] constant cascade threshold (affects speed/accuracy)
+%   .cascCal    - [.005] cascade calibration (affects speed/accuracy)
+%   .nWeak      - [128] vector defining number weak clfs per stage
+%   .pBoost     - [..] parameters for boosting (see adaBoostTrain.m)
+%   .seed       - [0] seed for random stream (for reproducibility)
+%   .name       - [''] name to prepend to clf and log filenames
+%   (2) training data location and amount:
+%   .posGtDir   - [''] dir containing ground truth
+%   .posImgDir  - [''] dir containing full positive images
+%   .negImgDir  - [''] dir containing full negative images
+%   .posWinDir  - [''] dir containing cropped positive windows
+%   .negWinDir  - [''] dir containing cropped negative windows
+%   .imreadf    - [@imread] optional custom function for reading images
+%   .imreadp    - [{}] optional custom parameters for imreadf
+%   .pLoad      - [..] params for bbGt>bbLoad (see bbGt)
+%   .nPos       - [inf] max number of pos windows to sample
+%   .nNeg       - [5000] max number of neg windows to sample
+%   .nPerNeg    - [25]  max number of neg windows to sample per image
+%   .nAccNeg    - [10000] max number of neg windows to accumulate
+%   .pJitter    - [{}] params for jittering pos windows (see jitterImage)
+%   .winsSave   - [0] if true save cropped windows at each stage to disk
+%
+% OUTPUTS
+%  detector   - trained object detector (modify only via acfModify)
+%   .opts       - input parameters used for model training
+%   .clf        - learned boosted tree classifier (see adaBoostTrain)
+%   .info       - info about channels (see chnsCompute.m)
+%
+% EXAMPLE
+%
+% See also acfReadme, acfDetect, acfDemoInria, acfModify, acfTest,
+% chnsCompute, chnsPyramid, adaBoostTrain, bbGt, bbNms, jitterImage
+%
+% Piotr's Computer Vision Matlab Toolbox      Version NEW
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% initialize opts struct
+opts = initializeOpts( varargin{:} );
+if(nargin==0), detector=opts; return; end
+
+% load or initialize detector and begin logging
+nm=[opts.name 'Detector.mat']; t=exist(nm,'file');
+if(t), if(nargout), t=load(nm); detector=t.detector; end; return; end
+t=fileparts(nm); if(~isempty(t) && ~exist(t,'dir')), mkdir(t); end
+detector = struct( 'opts',opts, 'clf',[], 'info',[] );
+startTrain=clock; nm=[opts.name 'Log.txt'];
+if(exist(nm,'file')), diary(nm); diary('off'); delete(nm); end; diary(nm);
+RandStream.setGlobalStream(RandStream('mrg32k3a','Seed',opts.seed));
+
+% iterate bootstraping and training
+for stage = 0:numel(opts.nWeak)-1
+  diary('on'); fprintf([repmat('-',[1 75]) '\n']);
+  fprintf('Training stage %i\n',stage); startStage=clock;
+  
+  % sample positives and compute info about channels
+  if( stage==0 )
+    [Is1,IsOrig1] = sampleWins( detector, stage, 1 );
+    t=ndims(Is1); if(t==3), t=Is1(:,:,1); else t=Is1(:,:,:,1); end
+    t=chnsCompute(t,opts.pPyramid.pChns); detector.info=t.info;
+  end
+  
+  % compute local decorrelation filters
+  if( stage==0 && length(opts.filters)==2 )
+    fs = opts.filters; opts.filters = [];
+    X1 = chnsCompute1( IsOrig1, opts );
+    fs = chnsCorrelation( X1, fs(1), fs(2) );
+    opts.filters = fs; detector.opts.filters = fs;
+  end
+  
+  % compute lambdas
+  if( stage==0 && isempty(opts.pPyramid.lambdas) )
+    fprintf('Computing lambdas... '); start=clock;
+    ds=size(IsOrig1); ds(1:end-1)=1; IsOrig1=mat2cell2(IsOrig1,ds);
+    ls=chnsScaling(opts.pPyramid.pChns,IsOrig1,0);
+    ls=round(ls*10^5)/10^5; detector.opts.pPyramid.lambdas=ls;
+    fprintf('done (time=%.0fs).\n',etime(clock,start));
+  end
+  
+  % compute features for positives
+  if( stage==0 )
+    X1 = chnsCompute1( Is1, opts );
+    X1 = reshape(X1,[],size(X1,4))';
+    clear Is1 IsOrig1 ls fs ds t;
+  end
+  
+  % sample negatives and compute features
+  Is0 = sampleWins( detector, stage, 0 );
+  X0 = chnsCompute1( Is0, opts ); clear Is0;
+  X0 = reshape(X0,[],size(X0,4))';
+  
+  % accumulate negatives from previous stages
+  if( stage>0 )
+    n0=size(X0p,1); n1=max(opts.nNeg,opts.nAccNeg)-size(X0,1);
+    if(n0>n1 && n1>0), X0p=X0p(randSample(n0,n1),:); end
+    if(n0>0 && n1>0), X0=[X0p; X0]; end %#ok<AGROW>
+  end; X0p=X0;
+  
+  % train boosted clf
+  detector.opts.pBoost.nWeak = opts.nWeak(stage+1);
+  detector.clf = adaBoostTrain(X0,X1,detector.opts.pBoost);
+  detector.clf.hs = detector.clf.hs + opts.cascCal;
+  
+  % update log
+  fprintf('Done training stage %i (time=%.0fs).\n',...
+    stage,etime(clock,startStage)); diary('off');
+end
+
+% save detector
+save([opts.name 'Detector.mat'],'detector');
+
+% finalize logging
+diary('on'); fprintf([repmat('-',[1 75]) '\n']);
+fprintf('Done training (time=%.0fs).\n',...
+  etime(clock,startTrain)); diary('off');
+
+end
+
+function opts = initializeOpts( varargin )
+% Initialize opts struct.
+dfs= { 'pPyramid',{}, 'filters',[], ...
+  'modelDs',[100 41], 'modelDsPad',[128 64], ...
+  'pNms',struct(), 'stride',4, 'cascThr',-1, 'cascCal',.005, ...
+  'nWeak',128, 'pBoost', {}, 'seed',0, 'name','', 'posGtDir','', ...
+  'posImgDir','', 'negImgDir','', 'posWinDir','', 'negWinDir','', ...
+  'imreadf',@imread, 'imreadp',{}, 'pLoad',{}, 'nPos',inf, 'nNeg',5000, ...
+  'nPerNeg',25, 'nAccNeg',10000, 'pJitter',{}, 'winsSave',0 };
+opts = getPrmDflt(varargin,dfs,1);
+% fill in remaining parameters
+p=chnsPyramid([],opts.pPyramid); p=p.pPyramid;
+p.minDs=opts.modelDs; shrink=p.pChns.shrink;
+opts.modelDsPad=ceil(opts.modelDsPad/shrink)*shrink;
+p.pad=ceil((opts.modelDsPad-opts.modelDs)/shrink/2)*shrink;
+p=chnsPyramid([],p); p=p.pPyramid; p.complete=1;
+p.pChns.complete=1; opts.pPyramid=p;
+% initialize pNms, pBoost, pBoost.pTree, and pLoad
+dfs={ 'type','maxg', 'overlap',.65, 'ovrDnm','min' };
+opts.pNms=getPrmDflt(opts.pNms,dfs,-1);
+dfs={ 'pTree',{}, 'nWeak',0, 'discrete',1, 'verbose',16 };
+opts.pBoost=getPrmDflt(opts.pBoost,dfs,1);
+dfs={'nBins',256,'maxDepth',2,'minWeight',.01,'fracFtrs',1,'nThreads',16};
+opts.pBoost.pTree=getPrmDflt(opts.pBoost.pTree,dfs,1);
+opts.pLoad=getPrmDflt(opts.pLoad,{'squarify',{0,1}},-1);
+opts.pLoad.squarify{2}=opts.modelDs(2)/opts.modelDs(1);
+end
+
+function [Is,IsOrig] = sampleWins( detector, stage, positive )
+% Load or sample windows for training detector.
+opts=detector.opts; start=clock;
+if( positive ), n=opts.nPos; else n=opts.nNeg; end
+if( positive ), crDir=opts.posWinDir; else crDir=opts.negWinDir; end
+if( exist(crDir,'dir') && stage==0 )
+  % if window directory is specified simply load windows
+  fs=bbGt('getFiles',{crDir}); nImg=length(fs); assert(nImg>0);
+  if(nImg>n), fs=fs(:,randSample(nImg,n)); else n=nImg; end
+  for i=1:n, fs{i}=[{opts.imreadf},fs(i),opts.imreadp]; end
+  Is=cell(1,n); parfor i=1:n, Is{i}=feval(fs{i}{:}); end
+else
+  % sample windows from full images using sampleWins1()
+  hasGt=positive||isempty(opts.negImgDir); fs={opts.negImgDir};
+  if(hasGt), fs={opts.posImgDir,opts.posGtDir}; end
+  fs=bbGt('getFiles',fs); nImg=size(fs,2); assert(nImg>0);
+  if(~isinf(n)), fs=fs(:,randperm(nImg)); end; Is=cell(nImg*1000,1);
+  diary('off'); tid=ticStatus('Sampling windows',1,30); k=0; i=0; batch=64;
+  while( i<nImg && k<n )
+    batch=min(batch,nImg-i); Is1=cell(1,batch);
+    parfor j=1:batch, ij=i+j;
+      I = feval(opts.imreadf,fs{1,ij},opts.imreadp{:}); %#ok<PFBNS>
+      gt=[]; if(hasGt), [~,gt]=bbGt('bbLoad',fs{2,ij},opts.pLoad); end
+      Is1{j} = sampleWins1( I, gt, detector, stage, positive );
+    end
+    Is1=[Is1{:}]; k1=length(Is1); Is(k+1:k+k1)=Is1; k=k+k1;
+    if(k>n), Is=Is(randSample(k,n)); k=n; end
+    i=i+batch; tocStatus(tid,max(i/nImg,k/n));
+  end
+  Is=Is(1:k); diary('on');
+  fprintf('Sampled %i windows from %i images.\n',k,i);
+end
+% optionally jitter positive windows
+if(length(Is)<2), Is={}; return; end
+nd=ndims(Is{1})+1; Is=cat(nd,Is{:}); IsOrig=Is;
+if( positive && isstruct(opts.pJitter) )
+  opts.pJitter.hasChn=(nd==4); Is=jitterImage(Is,opts.pJitter);
+  ds=size(Is); ds(nd)=ds(nd)*ds(nd+1); Is=reshape(Is,ds(1:nd));
+end
+% make sure dims are divisible by shrink and not smaller than modelDsPad
+ds=size(Is); cr=rem(ds(1:2),opts.pPyramid.pChns.shrink); s=floor(cr/2)+1;
+e=ceil(cr/2); Is=Is(s(1):end-e(1),s(2):end-e(2),:,:); ds=size(Is);
+if(any(ds(1:2)<opts.modelDsPad)), error('Windows too small.'); end
+% optionally save windows to disk and update log
+nm=[opts.name 'Is' int2str(positive) 'Stage' int2str(stage)];
+if( opts.winsSave ), save(nm,'Is','-v7.3'); end
+fprintf('Done sampling windows (time=%.0fs).\n',etime(clock,start));
+diary('off'); diary('on');
+end
+
+function Is = sampleWins1( I, gt, detector, stage, positive )
+% Sample windows from I given its ground truth gt.
+opts=detector.opts; shrink=opts.pPyramid.pChns.shrink;
+modelDs=opts.modelDs; modelDsPad=opts.modelDsPad;
+if( positive ), bbs=gt; bbs=bbs(bbs(:,5)==0,:); else
+  if( stage==0 )
+    % generate candidate bounding boxes in a grid
+    [h,w,~]=size(I); h1=modelDs(1); w1=modelDs(2);
+    n=opts.nPerNeg; ny=sqrt(n*h/w); nx=n/ny; ny=ceil(ny); nx=ceil(nx);
+    [xs,ys]=meshgrid(linspace(1,w-w1,nx),linspace(1,h-h1,ny));
+    bbs=[xs(:) ys(:)]; bbs(:,3)=w1; bbs(:,4)=h1; bbs=bbs(1:n,:);
+  else
+    % run detector to generate candidate bounding boxes
+    bbs=acfDetect(I,detector); [~,ord]=sort(bbs(:,5),'descend');
+    bbs=bbs(ord(1:min(end,opts.nPerNeg)),1:4);
+  end
+  if( ~isempty(gt) )
+    % discard any candidate negative bb that matches the gt
+    n=size(bbs,1); keep=false(1,n);
+    for i=1:n, keep(i)=all(bbGt('compOas',bbs(i,:),gt,gt(:,5))<.1); end
+    bbs=bbs(keep,:);
+  end
+end
+% grow bbs to a large padded size and finally crop windows
+modelDsBig=max(8*shrink,modelDsPad)+max(2,ceil(64/shrink))*shrink;
+r=modelDs(2)/modelDs(1); assert(all(abs(bbs(:,3)./bbs(:,4)-r)<1e-5));
+r=modelDsBig./modelDs; bbs=bbApply('resize',bbs,r(1),r(2));
+Is=bbApply('crop',I,bbs,'replicate',modelDsBig([2 1]));
+end
+
+function chns = chnsCompute1( Is, opts )
+% Compute single scale channels of dimensions modelDsPad.
+if(isempty(Is)), chns=[]; return; end
+fprintf('Extracting features... '); start=clock; fs=opts.filters;
+pChns=opts.pPyramid.pChns; smooth=opts.pPyramid.smooth;
+dsTar=opts.modelDsPad/pChns.shrink; ds=size(Is); ds(1:end-1)=1;
+Is=squeeze(mat2cell2(Is,ds)); n=length(Is); chns=cell(1,n);
+parfor i=1:n
+  C=chnsCompute(Is{i},pChns); C=convTri(cat(3,C.data{:}),smooth);
+  if(~isempty(fs)), C=repmat(C,[1 1 size(fs,4)]);
+    for j=1:size(C,3), C(:,:,j)=conv2(C(:,:,j),fs(:,:,j),'same'); end; end
+  if(~isempty(fs)), C=imResample(C,.5); shr=2; else shr=1; end
+  ds=size(C); cr=ds(1:2)-dsTar/shr; s=floor(cr/2)+1; e=ceil(cr/2);
+  C=C(s(1):end-e(1),s(2):end-e(2),:); chns{i}=C;
+end; chns=cat(4,chns{:});
+fprintf('done (time=%.0fs).\n',etime(clock,start));
+end
+
+function filters = chnsCorrelation( chns, wFilter, nFilter )
+% Compute filters capturing local correlations for each channel.
+fprintf('Computing correlations... '); start=clock;
+[~,~,m,n]=size(chns); w=wFilter; wp=w*2-1;
+filters=zeros(w,w,m,nFilter,'single');
+for i=1:m
+  % compute local auto-scorrelation using Wiener-Khinchin theorem
+  mus=squeeze(mean(mean(chns(:,:,i,:)))); sig=cell(1,n);
+  parfor j=1:n
+    T=fftshift(ifft2(abs(fft2(chns(:,:,i,j)-mean(mus))).^2));
+    sig{j}=T(floor(end/2)+1-w+(1:wp),floor(end/2)+1-w+(1:wp));
+  end
+  sig=double(mean(cat(4,sig{mus>1/50}),4));
+  sig=reshape(full(convmtx2(sig,w,w)),wp+w-1,wp+w-1,[]);
+  sig=reshape(sig(w:wp,w:wp,:),w^2,w^2); sig=(sig+sig')/2;
+  % compute filters for each channel from sig (sorted by eigenvalue)
+  [fs,D]=eig(sig); fs=reshape(fs,w,w,[]);
+  [~,ord]=sort(diag(D),'descend');
+  fs=flipdim(flipdim(fs,1),2); %#ok<DFLIPDIM>
+  filters(:,:,i,:)=fs(:,:,ord(1:nFilter));
+end
+fprintf('done (time=%.0fs).\n',etime(clock,start));
+end
diff --git a/detector/bbApply.m b/detector/bbApply.m
new file mode 100644
index 0000000..c24593a
--- /dev/null
+++ b/detector/bbApply.m
@@ -0,0 +1,619 @@
+function varargout = bbApply( action, varargin )
+% Functions for manipulating bounding boxes (bb).
+%
+% A bounding box (bb) is also known as a position vector or a rectangle
+% object. It is a four element vector with the fields: [x y w h]. A set of
+% n bbs can be stores as an [nx4] array, most funcitons below can handle
+% either a single or multiple bbs. In addtion, typically [nxm] inputs with
+% m>4 are ok (with the additional columns ignored/copied to the output).
+%
+% bbApply contains a number of utility functions for working with bbs. The
+% format for accessing the various utility functions is:
+%  outputs = bbApply( 'action', inputs );
+% The list of functions and help for each is given below. Also, help on
+% individual subfunctions can be accessed by: "help bbApply>action".
+%
+% Compute area of bbs.
+%   bb = bbApply( 'area', bb )
+% Shift center of bbs.
+%   bb = bbApply( 'shift', bb, xdel, ydel )
+% Get center of bbs.
+%   cen = bbApply( 'getCenter', bb )
+% Get bb at intersection of bb1 and bb2 (may be empty).
+%   bb = bbApply( 'intersect', bb1, bb2 )
+% Get bb that is union of bb1 and bb2 (smallest bb containing both).
+%   bb = bbApply( 'union', bb1, bb2 )
+% Resize the bbs (without moving their centers).
+%   bb = bbApply( 'resize', bb, hr, wr, [ar] )
+% Fix bb aspect ratios (without moving the bb centers).
+%   bbr = bbApply( 'squarify', bb, flag, [ar] )
+% Draw single or multiple bbs to image (calls rectangle()).
+%   hs = bbApply( 'draw', bb, [col], [lw], [ls], [prop], [ids] )
+% Embed single or multiple bbs directly into image.
+%  I = bbApply( 'embed', I, bb, [varargin] )
+% Crop image regions from I encompassed by bbs.
+%   [patches, bbs] = bbApply('crop',I,bb,[padEl],[dims])
+% Convert bb relative to absolute coordinates and vice-versa.
+%   bb = bbApply( 'convert', bb, bbRef, isAbs )
+% Randomly generate bbs that fall in a specified region.
+%   bbs =  bbApply( 'random', pRandom )
+% Convert weighted mask to bbs.
+%   bbs = bbApply('frMask',M,bbw,bbh,[thr])
+% Create weighted mask encoding bb centers (or extent).
+%   M = bbApply('toMask',bbs,w,h,[fill],[bgrd])
+%
+% USAGE
+%  varargout = bbApply( action, varargin );
+%
+% INPUTS
+%  action     - string specifying action
+%  varargin   - depends on action, see above
+%
+% OUTPUTS
+%  varargout  - depends on action, see above
+%
+% EXAMPLE
+%
+% See also bbApply>area bbApply>shift bbApply>getCenter bbApply>intersect
+% bbApply>union bbApply>resize bbApply>squarify bbApply>draw bbApply>crop
+% bbApply>convert bbApply>random bbApply>frMask bbApply>toMask
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.30
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+%#ok<*DEFNU>
+varargout = cell(1,max(1,nargout));
+[varargout{:}] = feval(action,varargin{:});
+end
+
+function a = area( bb )
+% Compute area of bbs.
+%
+% USAGE
+%  bb = bbApply( 'area', bb )
+%
+% INPUTS
+%  bb     - [nx4] original bbs
+%
+% OUTPUTS
+%  a      - [nx1] area of each bb
+%
+% EXAMPLE
+%  a = bbApply('area', [0 0 10 10])
+%
+% See also bbApply
+a=prod(bb(:,3:4),2);
+end
+
+function bb = shift( bb, xdel, ydel )
+% Shift center of bbs.
+%
+% USAGE
+%  bb = bbApply( 'shift', bb, xdel, ydel )
+%
+% INPUTS
+%  bb     - [nx4] original bbs
+%  xdel   - amount to shift x coord of each bb left
+%  ydel   - amount to shift y coord of each bb up
+%
+% OUTPUTS
+%  bb     - [nx4] shifted bbs
+%
+% EXAMPLE
+%  bb = bbApply('shift', [0 0 10 10], 1, 2)
+%
+% See also bbApply
+bb(:,1)=bb(:,1)-xdel; bb(:,2)=bb(:,2)-ydel;
+end
+
+function cen = getCenter( bb )
+% Get center of bbs.
+%
+% USAGE
+%  cen = bbApply( 'getCenter', bb )
+%
+% INPUTS
+%  bb     - [nx4] original bbs
+%
+% OUTPUTS
+%  cen    - [nx1] centers of bbs
+%
+% EXAMPLE
+%  cen = bbApply('getCenter', [0 0 10 10])
+%
+% See also bbApply
+cen=bb(:,1:2)+bb(:,3:4)/2;
+end
+
+function bb = intersect( bb1, bb2 )
+% Get bb at intersection of bb1 and bb2 (may be empty).
+%
+% USAGE
+%  bb = bbApply( 'intersect', bb1, bb2 )
+%
+% INPUTS
+%  bb1    - [nx4] first set of bbs
+%  bb2    - [nx4] second set of bbs
+%
+% OUTPUTS
+%  bb     - [nx4] intersection of bbs
+%
+% EXAMPLE
+%  bb = bbApply('intersect', [0 0 10 10], [5 5 10 10])
+%
+% See also bbApply bbApply>union
+n1=size(bb1,1); n2=size(bb2,1);
+if(n1==0 || n2==0), bb=zeros(0,4); return, end
+if(n1==1 && n2>1), bb1=repmat(bb1,n2,1); n1=n2; end
+if(n2==1 && n1>1), bb2=repmat(bb2,n1,1); n2=n1; end
+assert(n1==n2);
+lcsE=min(bb1(:,1:2)+bb1(:,3:4),bb2(:,1:2)+bb2(:,3:4));
+lcsS=max(bb1(:,1:2),bb2(:,1:2)); empty=any(lcsE<lcsS,2);
+bb=[lcsS lcsE-lcsS]; bb(empty,:)=0;
+end
+
+function bb = union( bb1, bb2 )
+% Get bb that is union of bb1 and bb2 (smallest bb containing both).
+%
+% USAGE
+%  bb = bbApply( 'union', bb1, bb2 )
+%
+% INPUTS
+%  bb1    - [nx4] first set of bbs
+%  bb2    - [nx4] second set of bbs
+%
+% OUTPUTS
+%  bb     - [nx4] intersection of bbs
+%
+% EXAMPLE
+%  bb = bbApply('union', [0 0 10 10], [5 5 10 10])
+%
+% See also bbApply bbApply>intersect
+n1=size(bb1,1); n2=size(bb2,1);
+if(n1==0 || n2==0), bb=zeros(0,4); return, end
+if(n1==1 && n2>1), bb1=repmat(bb1,n2,1); n1=n2; end
+if(n2==1 && n1>1), bb2=repmat(bb2,n1,1); n2=n1; end
+assert(n1==n2);
+lcsE=max(bb1(:,1:2)+bb1(:,3:4),bb2(:,1:2)+bb2(:,3:4));
+lcsS=min(bb1(:,1:2),bb2(:,1:2));
+bb=[lcsS lcsE-lcsS];
+end
+
+function bb = resize( bb, hr, wr, ar )
+% Resize the bbs (without moving their centers).
+%
+% If wr>0 or hr>0, the w/h of each bb is adjusted in the following order:
+%  if(hr~=0), h=h*hr; end
+%  if(wr~=0), w=w*wr; end
+%  if(hr==0), h=w/ar; end
+%  if(wr==0), w=h*ar; end
+% Only one of hr/wr may be set to 0, and then only if ar>0. If, however,
+% hr=wr=0 and ar>0 then resizes bbs such that areas and centers are
+% preserved but aspect ratio becomes ar.
+%
+% USAGE
+%  bb = bbApply( 'resize', bb, hr, wr, [ar] )
+%
+% INPUTS
+%  bb     - [nx4] original bbs
+%  hr     - ratio by which to multiply height (or 0)
+%  wr     - ratio by which to multiply width (or 0)
+%  ar     - [0] target aspect ratio (used only if hr=0 or wr=0)
+%
+% OUTPUT
+%  bb    - [nx4] the output resized bbs
+%
+% EXAMPLE
+%  bb = bbApply('resize',[0 0 1 1],1.2,0,.5) % h'=1.2*h; w'=h'/2;
+%
+% See also bbApply, bbApply>squarify
+if(nargin<4), ar=0; end; assert(size(bb,2)>=4);
+assert((hr>0&&wr>0)||ar>0);
+% preserve area and center, set aspect ratio
+if(hr==0 && wr==0), a=sqrt(bb(:,3).*bb(:,4)); ar=sqrt(ar);
+  d=a*ar-bb(:,3); bb(:,1)=bb(:,1)-d/2; bb(:,3)=bb(:,3)+d;
+  d=a/ar-bb(:,4); bb(:,2)=bb(:,2)-d/2; bb(:,4)=bb(:,4)+d; return;
+end
+% possibly adjust h/w based on hr/wr
+if(hr~=0), d=(hr-1)*bb(:,4); bb(:,2)=bb(:,2)-d/2; bb(:,4)=bb(:,4)+d; end
+if(wr~=0), d=(wr-1)*bb(:,3); bb(:,1)=bb(:,1)-d/2; bb(:,3)=bb(:,3)+d; end
+% possibly adjust h/w based on ar and NEW h/w
+if(~hr), d=bb(:,3)/ar-bb(:,4); bb(:,2)=bb(:,2)-d/2; bb(:,4)=bb(:,4)+d; end
+if(~wr), d=bb(:,4)*ar-bb(:,3); bb(:,1)=bb(:,1)-d/2; bb(:,3)=bb(:,3)+d; end
+end
+
+function bbr = squarify( bb, flag, ar )
+% Fix bb aspect ratios (without moving the bb centers).
+%
+% The w or h of each bb is adjusted so that w/h=ar.
+% The parameter flag controls whether w or h should change:
+%  flag==0: expand bb to given ar
+%  flag==1: shrink bb to given ar
+%  flag==2: use original w, alter h
+%  flag==3: use original h, alter w
+%  flag==4: preserve area, alter w and h
+% If ar==1 (the default), always converts bb to a square, hence the name.
+%
+% USAGE
+%  bbr = bbApply( 'squarify', bb, flag, [ar] )
+%
+% INPUTS
+%  bb     - [nx4] original bbs
+%  flag   - controls whether w or h should change
+%  ar     - [1] desired aspect ratio
+%
+% OUTPUT
+%  bbr    - the output 'squarified' bbs
+%
+% EXAMPLE
+%  bbr = bbApply('squarify',[0 0 1 2],0)
+%
+% See also bbApply, bbApply>resize
+if(nargin<3 || isempty(ar)), ar=1; end; bbr=bb;
+if(flag==4), bbr=resize(bb,0,0,ar); return; end
+for i=1:size(bb,1), p=bb(i,1:4);
+  usew = (flag==0 && p(3)>p(4)*ar) || (flag==1 && p(3)<p(4)*ar) || flag==2;
+  if(usew), p=resize(p,0,1,ar); else p=resize(p,1,0,ar); end; bbr(i,1:4)=p;
+end
+end
+
+function hs = draw( bb, col, lw, ls, prop, ids )
+% Draw single or multiple bbs to image (calls rectangle()).
+%
+% To draw bbs aligned with pixel boundaries, subtract .5 from the x and y
+% coordinates (since pixel centers are located at integer locations).
+%
+% USAGE
+%  hs = bbApply( 'draw', bb, [col], [lw], [ls], [prop], [ids] )
+%
+% INPUTS
+%  bb     - [nx4] standard bbs or [nx5] weighted bbs
+%  col    - ['g'] color or [kx1] array of colors
+%  lw     - [2] LineWidth for rectangle
+%  ls     - ['-'] LineStyle for rectangle
+%  prop   - [] other properties for rectangle
+%  ids    - [ones(1,n)] id in [1,k] for each bb into colors array
+%
+% OUTPUT
+%  hs     - [nx1] handles to drawn rectangles (and labels)
+%
+% EXAMPLE
+%  im(rand(3)); bbApply('draw',[1.5 1.5 1 1 .5],'g');
+%
+% See also bbApply, bbApply>embed, rectangle
+[n,m]=size(bb); if(n==0), hs=[]; return; end
+if(nargin<2 || isempty(col)), col=[]; end
+if(nargin<3 || isempty(lw)), lw=2; end
+if(nargin<4 || isempty(ls)), ls='-'; end
+if(nargin<5 || isempty(prop)), prop={}; end
+if(nargin<6 || isempty(ids)), ids=ones(1,n); end
+% prepare display properties
+prop=['LineWidth' lw 'LineStyle' ls prop 'EdgeColor'];
+tProp={'FontSize',10,'color','w','FontWeight','bold',...
+  'VerticalAlignment','bottom'}; k=max(ids);
+if(isempty(col)), if(k==1), col='g'; else col=hsv(k); end; end
+if(size(col,1)<k), ids=ones(1,n); end; hs=zeros(1,n);
+% draw rectangles and optionally labels
+for b=1:n, hs(b)=rectangle('Position',bb(b,1:4),prop{:},col(ids(b),:)); end
+if(m==4), return; end; hs=[hs zeros(1,n)]; bb=double(bb);
+for b=1:n, hs(b+n)=text(bb(b,1),bb(b,2),num2str(bb(b,5),4),tProp{:}); end
+end
+
+function I = embed( I, bb, varargin )
+% Embed single or multiple bbs directly into image.
+%
+% USAGE
+%  I = bbApply( 'embed', I, bb, varargin )
+%
+% INPUTS
+%  I      - input image
+%  bb     - [nx4] or [nx5] input bbs
+%  varargin   - additional params (struct or name/value pairs)
+%    .col       - [0 255 0] color for rectangle or nx3 array of colors
+%    .lw        - [3] width for rectangle in pixels
+%    .fh        - [35] font height (if displaying weight), may be 0
+%    .fcol      - [255 0 0] font color or nx3 array of colors
+%
+% OUTPUT
+%  I      - output image
+%
+% EXAMPLE
+%  I=imResample(imread('cameraman.tif'),2); bb=[200 70 70 90 0.25];
+%  J=bbApply('embed',I,bb,'col',[0 0 255],'lw',8,'fh',30); figure(1); im(J)
+%  K=bbApply('embed',J,bb,'col',[0 255 0],'lw',2,'fh',30); figure(2); im(K)
+%
+% See also bbApply, bbApply>draw, char2img
+
+% get additional parameters
+dfs={'col',[0 255 0],'lw',3,'fh',35,'fcol',[255 0 0]};
+[col,lw,fh,fcol]=getPrmDflt(varargin,dfs,1);
+n=size(bb,1); bb(:,1:4)=round(bb(:,1:4));
+if(size(col,1)==1), col=col(ones(1,n),:); end
+if(size(fcol,1)==1), fcol=fcol(ones(1,n),:); end
+if( ismatrix(I) ), I=I(:,:,[1 1 1]); end
+% embed each bb
+x0=bb(:,1); x1=x0+bb(:,3)-1; y0=bb(:,2); y1=y0+bb(:,4)-1;
+j0=floor((lw-1)/2); j1=ceil((lw-1)/2); h=size(I,1); w=size(I,2);
+x00=max(1,x0-j0); x01=min(x0+j1,w); x10=max(1,x1-j0); x11=min(x1+j1,w);
+y00=max(1,y0-j0); y01=min(y0+j1,h); y10=max(1,y1-j0); y11=min(y1+j1,h);
+for b=1:n
+  for c=1:3, I([y00(b):y01(b) y10(b):y11(b)],x00(b):x11(b),c)=col(b,c); end
+  for c=1:3, I(y00(b):y11(b),[x00(b):x01(b) x10(b):x11(b)],c)=col(b,c); end
+end
+% embed text displaying bb score (inside upper-left bb corner)
+if(size(bb,2)<5 || fh==0), return; end
+bb(:,1:4)=intersect(bb(:,1:4),[1 1 w h]);
+for b=1:n
+  M=char2img(sprintf('%.4g',bb(b,5)),fh); M=M{1}==0; [h,w]=size(M);
+  y0=bb(b,2); y1=y0+h-1; x0=bb(b,1); x1=x0+w-1;
+  if( x0>=1 && y0>=1 && x1<=size(I,2) && y1<=size(I,1))
+    Ir=I(y0:y1,x0:x1,1); Ig=I(y0:y1,x0:x1,2); Ib=I(y0:y1,x0:x1,3);
+    Ir(M)=fcol(b,1); Ig(M)=fcol(b,2); Ib(M)=fcol(b,3);
+    I(y0:y1,x0:x1,:)=cat(3,Ir,Ig,Ib);
+  end
+end
+end
+
+function [patches, bbs] = crop( I, bbs, padEl, dims )
+% Crop image regions from I encompassed by bbs.
+%
+% The only subtlety is that a pixel centered at location (i,j) would have a
+% bb of [j-1/2,i-1/2,1,1].  The -1/2 is because pixels are located at
+% integer locations. This is a Matlab convention, to confirm use:
+%  im(rand(3)); bbApply('draw',[1.5 1.5 1 1],'g')
+% If bb contains all integer entries cropping is straightforward. If
+% entries are not integers, x=round(x+.499) is used, eg 1.2 actually goes
+% to 2 (since it is closer to 1.5 then .5), and likewise for y.
+%
+% If ~isempty(padEl), image is padded so can extract full bb region (no
+% actual padding is done, this is fast). Otherwise bb is intersected with
+% the image bb prior to cropping. If padEl is a string ('circular',
+% 'replicate', or 'symmetric'), uses padarray to do actual padding (slow).
+%
+% USAGE
+%  [patches, bbs] = bbApply('crop',I,bb,[padEl],[dims])
+%
+% INPUTS
+%  I        - image from which to crop patches
+%  bbs      - bbs that indicate regions to crop
+%  padEl    - [0] value to pad I or [] to indicate no padding (see above)
+%  dims     - [] if specified resize each cropped patch to [w h]
+%
+% OUTPUTS
+%  patches  - [1xn] cell of cropped image regions
+%  bbs      - actual integer-valued bbs used to crop
+%
+% EXAMPLE
+%  I=imread('cameraman.tif'); bb=[-10 -10 100 100];
+%  p1=bbApply('crop',I,bb); p2=bbApply('crop',I,bb,'replicate');
+%  figure(1); im(I); figure(2); im(p1{1}); figure(3); im(p2{1});
+%
+% See also bbApply, ARRAYCROP, PADARRAY, IMRESAMPLE
+
+% get padEl, bound bb to visible region if empty
+if( nargin<3 ), padEl=0; end; h=size(I,1); w=size(I,2);
+if( nargin<4 ), dims=[]; end;
+if(isempty(padEl)), bbs=intersect([.5 .5 w h],bbs); end
+% crop each patch in turn
+n=size(bbs,1); patches=cell(1,n);
+for i=1:n, [patches{i},bbs(i,1:4)]=crop1(bbs(i,1:4)); end
+
+  function [patch, bb] = crop1( bb )
+    % crop single patch (use arrayCrop only if necessary)
+    lcsS=round(bb([2 1])+.5-.001); lcsE=lcsS+round(bb([4 3]))-1;
+    if( any(lcsS<1) || lcsE(1)>h || lcsE(2)>w )
+      if( ischar(padEl) )
+        pt=max(0,1-lcsS(1)); pb=max(0,lcsE(1)-h);
+        pl=max(0,1-lcsS(2)); pr=max(0,lcsE(2)-w);
+        lcsS1=max(1,lcsS); lcsE1=min(lcsE,[h w]);
+        patch = I(lcsS1(1):lcsE1(1),lcsS1(2):lcsE1(2),:);
+        patch = padarray(patch,[pt pl],padEl,'pre');
+        patch = padarray(patch,[pb pr],padEl,'post');
+      else
+        if(ndims(I)==3); lcsS=[lcsS 1]; lcsE=[lcsE 3]; end
+        patch = arrayCrop(I,lcsS,lcsE,padEl);
+      end
+    else
+      patch = I(lcsS(1):lcsE(1),lcsS(2):lcsE(2),:);
+    end
+    bb = [lcsS([2 1]) lcsE([2 1])-lcsS([2 1])+1];
+    if(~isempty(dims)), patch=imResample(patch,[dims(2),dims(1)]); end
+  end
+end
+
+function bb = convert( bb, bbRef, isAbs )
+% Convert bb relative to absolute coordinates and vice-versa.
+%
+% If isAbs==1, bb is assumed to be given in absolute coords, and the output
+% is given in coords relative to bbRef. Otherwise, if isAbs==0, bb is
+% assumed to be given in coords relative to bbRef and the output is given
+% in absolute coords.
+%
+% USAGE
+%  bb = bbApply( 'convert', bb, bbRef, isAbs )
+%
+% INPUTS
+%  bb     - original bb, either in abs or rel coords
+%  bbRef  - reference bb
+%  isAbs  - 1: bb is in abs coords, 0: bb is in rel coords
+%
+% OUTPUTS
+%  bb     - converted bb
+%
+% EXAMPLE
+%  bbRef=[5 5 15 15]; bba=[10 10 5 5];
+%  bbr = bbApply( 'convert', bba, bbRef, 1 )
+%  bba2 = bbApply( 'convert', bbr, bbRef, 0 )
+%
+% See also bbApply
+if( isAbs )
+  bb(1:2)=bb(1:2)-bbRef(1:2);
+  bb=bb./bbRef([3 4 3 4]);
+else
+  bb=bb.*bbRef([3 4 3 4]);
+  bb(1:2)=bb(1:2)+bbRef(1:2);
+end
+end
+
+function bbs = random( varargin )
+% Randomly generate bbs that fall in a specified region.
+%
+% The vector dims defines the region in which bbs are generated. Specify
+% dims=[height width] to generate bbs=[x y w h] such that: 1<=x<=width,
+% 1<=y<=height, x+w-1<=width, y+h-1<=height. The biggest bb generated can
+% be bb=[1 1 width height]. If dims is a three element vector the third
+% coordinate is the depth, in this case bbs=[x y w h d] where 1<=d<=depth.
+%
+% A number of constraints can be specified that control the size and other
+% characteristics of the generated bbs. Note that if incompatible
+% constraints are specified (e.g. if the maximum width and height are both
+% 5 while the minimum area is 100) no bbs will be generated. More
+% generally, if fewer than n bbs are generated a warning is displayed.
+%
+% USAGE
+%  bbs =  bbApply( 'random', pRandom )
+%
+% INPUTS
+%  pRandom    - parameters (struct or name/value pairs)
+%   .n          - ['REQ'] number of bbs to generate
+%   .dims       - ['REQ'] region in which to generate bbs [height,width]
+%   .wRng       - [1 inf] range for width of bbs (or scalar value)
+%   .hRng       - [1 inf] range for height of bbs (or scalar value)
+%   .aRng       - [1 inf] range for area of bbs
+%   .arRng      - [0 inf] range for aspect ratio (width/height) of bbs
+%   .unique     - [1] if true generate unique bbs
+%   .maxOverlap - [1] max overlap (intersection/union) between bbs
+%   .maxIter    - [100] max iterations to go w/o changes before giving up
+%   .show       - [0] if true show sample generated bbs
+%
+% OUTPUTS
+%  bbs        - [nx4] array of randomly generated integer bbs
+%
+% EXAMPLE
+%  bbs=bbApply('random','n',50,'dims',[20 20],'arRng',[.5 .5],'show',1);
+%
+% See also bbApply
+
+% get parameters
+rng=[1 inf]; dfs={ 'n','REQ', 'dims','REQ', 'wRng',rng, 'hRng',rng, ...
+  'aRng',rng, 'arRng',[0 inf], 'unique',1, 'maxOverlap',1, ...
+  'maxIter',100, 'show',0 };
+[n,dims,wRng,hRng,aRng,arRng,uniqueOnly,maxOverlap,maxIter,show] ...
+  = getPrmDflt(varargin,dfs,1);
+if(length(hRng)==1), hRng=[hRng hRng]; end
+if(length(wRng)==1), wRng=[wRng wRng]; end
+if(length(dims)==3), d=5; else d=4; end
+
+% generate random bbs satisfying constraints
+bbs=zeros(0,d); ids=zeros(0,1); n1=min(n*10,1000);
+M=max(dims)+1; M=M.^(0:d-1); iter=0; k=0;
+tid=ticStatus('generating random bbs',1,2);
+while( k<n && iter<maxIter )
+  ys=1+floor(rand(2,n1)*dims(1)); ys0=min(ys); ys1=max(ys); hs=ys1-ys0+1;
+  xs=1+floor(rand(2,n1)*dims(2)); xs0=min(xs); xs1=max(xs); ws=xs1-xs0+1;
+  if(d==5), ds=1+floor(rand(1,n1)*dims(3)); else ds=zeros(0,n1); end
+  if(arRng(1)==arRng(2)), ws=hs.*arRng(1); end
+  ars=ws./hs; ws=round(ws); xs1=xs0+ws-1; as=ws.*hs;
+  kp = ys0>0 & xs0>0 & ys1<=dims(1) & xs1<=dims(2) & ...
+    hs>=hRng(1) & hs<=hRng(2) & ws>=wRng(1) & ws<=wRng(2) & ...
+    as>=aRng(1) & as<=aRng(2) & ars>=arRng(1) & ars<=arRng(2);
+  bbs1=[xs0' ys0' ws' hs' ds']; bbs1=bbs1(kp,:);
+  k0=k; bbs=[bbs; bbs1]; k=size(bbs,1); %#ok<AGROW>
+  if( maxOverlap<1 && k ), bbs=bbs(1:k0,:);
+    for j=1:size(bbs1,1), bbs0=bbs; bb=bbs1(j,:);
+      if(d==5), bbs=bbs(bbs(:,5)==bb(5),:); end
+      if(isempty(bbs)), bbs=[bbs0; bb]; continue; end
+      ws1=min(bbs(:,1)+bbs(:,3),bb(1)+bb(3))-max(bbs(:,1),bb(1));
+      hs1=min(bbs(:,2)+bbs(:,4),bb(2)+bb(4))-max(bbs(:,2),bb(2));
+      o=max(0,ws1).*max(0,hs1); o=o./(bbs(:,3).*bbs(:,4)+bb(3).*bb(4)-o);
+      if(max(o)<=maxOverlap), bbs=[bbs0; bb]; else bbs=bbs0; end
+    end
+  elseif( uniqueOnly && k )
+    ids=[ids; sum(bbs1.*M(ones(1,size(bbs1,1)),:),2)]; %#ok<AGROW>
+    [ids,o]=sort(ids); bbs=bbs(o,:); kp=[ids(1:end-1)~=ids(2:end); true];
+    bbs=bbs(kp,:); ids=ids(kp,:);
+  end
+  k=size(bbs,1); if(k0==k), iter=iter+1; else iter=0; end
+  if(k>n), bbs=bbs(randSample(k,n),:); k=n; end;
+  tocStatus(tid,max(k/n,iter/maxIter));
+end
+if( k<n ), warning('only generated %i of %i bbs',k,n); n=k; end %#ok<WNTAG>
+
+% optionally display a few bbs
+if( show )
+  k=8; figure(show); im(zeros(dims)); cs=uniqueColors(1,k,0,0);
+  if(n>k), bbs1=bbs(randsample(n,k),:); else bbs1=bbs; end
+  bbs1(:,1:2)=bbs1(:,1:2)-.5;
+  for i=1:min(k,n), rectangle('Position',bbs1(i,:),...
+      'EdgeColor',cs(i,:),'LineStyle','--'); end
+end
+
+end
+
+function bbs = frMask( M, bbw, bbh, thr )
+% Convert weighted mask to bbs.
+%
+% Pixels in mask above given threshold (thr) indicate bb centers.
+%
+% USAGE
+%  bbs = bbApply('frMask',M,bbw,bbh,[thr])
+%
+% INPUTS
+%  M      - mask
+%  bbw    - bb target width
+%  bbh    - bb target height
+%  thr    - [0] mask threshold
+%
+% OUTPUTS
+%  bbs    - bounding boxes
+%
+% EXAMPLE
+%  w=20; h=10; bbw=5; bbh=8; M=double(rand(h,w)); M(M<.95)=0;
+%  bbs=bbApply('frMask',M,bbw,bbh); M2=bbApply('toMask',bbs,w,h);
+%  sum(abs(M(:)-M2(:)))
+%
+% See also bbApply, bbApply>toMask
+if(nargin<4), thr=0; end
+ids=find(M>thr); ids=ids(:); h=size(M,1);
+if(isempty(ids)), bbs=zeros(0,5); return; end
+xs=floor((ids-1)/h); ys=ids-xs*h; xs=xs+1;
+bbs=[xs-floor(bbw/2) ys-floor(bbh/2)];
+bbs(:,3)=bbw; bbs(:,4)=bbh; bbs(:,5)=M(ids);
+end
+
+function M = toMask( bbs, w, h, fill, bgrd )
+% Create weighted mask encoding bb centers (or extent).
+%
+% USAGE
+%  M = bbApply('toMask',bbs,w,h,[fill],[bgrd])
+%
+% INPUTS
+%  bbs    - bounding boxes
+%  w      - mask target width
+%  h      - mask target height
+%  fill   - [0] if 1 encodes extent of bbs
+%  bgrd   - [0] default value for background pixels
+%
+% OUTPUTS
+%  M      - hxw mask
+%
+% EXAMPLE
+%
+% See also bbApply, bbApply>frMask
+if(nargin<4||isempty(fill)), fill=0; end
+if(nargin<5||isempty(bgrd)), bgrd=0; end
+if(size(bbs,2)==4), bbs(:,5)=1; end
+M=zeros(h,w); B=true(h,w); n=size(bbs,1);
+if( fill==0 )
+  p=floor(getCenter(bbs)); p=sub2ind([h w],p(:,2),p(:,1));
+  for i=1:n, M(p(i))=M(p(i))+bbs(i,5); end
+  if(bgrd~=0), B(p)=0; end
+else
+  bbs=[intersect(round(bbs),[1 1 w h]) bbs(:,5)]; n=size(bbs,1);
+  x0=bbs(:,1); x1=x0+bbs(:,3)-1; y0=bbs(:,2); y1=y0+bbs(:,4)-1;
+  for i=1:n, y=y0(i):y1(i); x=x0(i):x1(i);
+    M(y,x)=M(y,x)+bbs(i,5); B(y,x)=0; end
+end
+if(bgrd~=0), M(B)=bgrd; end
+end
diff --git a/detector/bbGt.m b/detector/bbGt.m
new file mode 100644
index 0000000..0b2da2a
--- /dev/null
+++ b/detector/bbGt.m
@@ -0,0 +1,885 @@
+function varargout = bbGt( action, varargin )
+% Bounding box (bb) annotations struct, evaluation and sampling routines.
+%
+% bbGt gives access to two types of routines:
+% (1) Data structure for storing bb image annotations.
+% (2) Routines for evaluating the Pascal criteria for object detection.
+%
+% The bb annotation stores bb for objects of interest with additional
+% information per object, such as occlusion information. The underlying
+% data structure is simply a Matlab stuct array, one struct per object.
+% This annotation format is an alternative to the annotation format used
+% for the PASCAL object challenges (in addition routines for loading PASCAL
+% format data are provided, see bbLoad()).
+%
+% Each object struct has the following fields:
+%  lbl  - a string label describing object type (eg: 'pedestrian')
+%  bb   - [l t w h]: bb indicating predicted object extent
+%  occ  - 0/1 value indicating if bb is occluded
+%  bbv  - [l t w h]: bb indicating visible region (may be [0 0 0 0])
+%  ign  - 0/1 value indicating bb was marked as ignore
+%  ang  - [0-360] orientation of bb in degrees
+%
+% Note: although orientation (angle) is stored for each bb, for now it is
+% not being used during evaluation or sampling.
+%
+% bbGt contains a number of utility functions, accessed using:
+%  outputs = bbGt( 'action', inputs );
+% The list of functions and help for each is given below. Also, help on
+% individual subfunctions can be accessed by: "help bbGt>action".
+%
+%%% (1) Data structure for storing bb image annotations.
+% Create annotation of n empty objects.
+%   objs = bbGt( 'create', [n] );
+% Save bb annotation to text file.
+%   objs = bbGt( 'bbSave', objs, fName )
+% Load bb annotation from text file and filter.
+%   [objs,bbs] = bbGt( 'bbLoad', fName, [pLoad] )
+% Get object property 'name' (in a standard array).
+%   vals = bbGt( 'get', objs, name )
+% Set object property 'name' (with a standard array).
+%   objs = bbGt( 'set', objs, name, vals )
+% Draw an ellipse for each labeled object.
+%   hs = draw( objs, pDraw )
+%
+%%% (2) Routines for evaluating the Pascal criteria for object detection.
+% Get all corresponding files in given directories.
+%   [fs,fs0] = bbGt('getFiles', dirs, [f0], [f1] )
+% Copy corresponding files into given directories.
+%   fs = bbGt( 'copyFiles', fs, dirs )
+% Load all ground truth and detection bbs in given directories.
+%   [gt0,dt0] = bbGt( 'loadAll', gtDir, [dtDir], [pLoad] )
+% Evaluates detections against ground truth data.
+%   [gt,dt] = bbGt( 'evalRes', gt0, dt0, [thr], [mul] )
+% Display evaluation results for given image.
+%   [hs,hImg] = bbGt( 'showRes' I, gt, dt, varargin )
+% Compute ROC or PR based on outputs of evalRes on multiple images.
+%   [xs,ys,ref] = bbGt( 'compRoc', gt, dt, roc, ref )
+% Extract true or false positives or negatives for visualization.
+%   [Is,scores,imgIds] = bbGt( 'cropRes', gt, dt, imFs, varargin )
+% Computes (modified) overlap area between pairs of bbs.
+%   oa = bbGt( 'compOas', dt, gt, [ig] )
+% Optimized version of compOas for a single pair of bbs.
+%   oa = bbGt( 'compOa', dt, gt, ig )
+%
+% USAGE
+%  varargout = bbGt( action, varargin );
+%
+% INPUTS
+%  action     - string specifying action
+%  varargin   - depends on action, see above
+%
+% OUTPUTS
+%  varargout  - depends on action, see above
+%
+% EXAMPLE
+%
+% See also bbApply, bbLabeler, bbGt>create, bbGt>bbSave, bbGt>bbLoad,
+% bbGt>get, bbGt>set, bbGt>draw, bbGt>getFiles, bbGt>copyFiles,
+% bbGt>loadAll, bbGt>evalRes, bbGt>showRes,  bbGt>compRoc, bbGt>cropRes,
+% bbGt>compOas, bbGt>compOa
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 3.26
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+%#ok<*DEFNU>
+varargout = cell(1,max(1,nargout));
+[varargout{:}] = feval(action,varargin{:});
+end
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+function objs = create( n )
+% Create annotation of n empty objects.
+%
+% USAGE
+%  objs = bbGt( 'create', [n] )
+%
+% INPUTS
+%  n      - [1] number of objects to create
+%
+% OUTPUTS
+%  objs   - annotation of n 'empty' objects
+%
+% EXAMPLE
+%  objs = bbGt('create')
+%
+% See also bbGt
+o=struct('lbl','','bb',[0 0 0 0],'occ',0,'bbv',[0 0 0 0],'ign',0,'ang',0);
+if(nargin<1 || n==1), objs=o; return; end; objs=o(ones(n,1));
+end
+
+function objs = bbSave( objs, fName )
+% Save bb annotation to text file.
+%
+% USAGE
+%  objs = bbGt( 'bbSave', objs, fName )
+%
+% INPUTS
+%  objs   - objects to save
+%  fName  - name of text file
+%
+% OUTPUTS
+%  objs   - objects to save
+%
+% EXAMPLE
+%
+% See also bbGt, bbGt>bbLoad
+vers=3; fid=fopen(fName,'w'); assert(fid>0);
+fprintf(fid,'%% bbGt version=%i\n',vers);
+objs=set(objs,'bb',round(get(objs,'bb')));
+objs=set(objs,'bbv',round(get(objs,'bbv')));
+objs=set(objs,'ang',round(get(objs,'ang')));
+for i=1:length(objs)
+  o=objs(i); bb=o.bb; bbv=o.bbv;
+  fprintf(fid,['%s' repmat(' %i',1,11) '\n'],o.lbl,...
+    bb,o.occ,bbv,o.ign,o.ang);
+end
+fclose(fid);
+end
+
+function [objs,bbs] = bbLoad( fName, varargin )
+% Load bb annotation from text file and filter.
+%
+% FORMAT: Specify 'format' to indicate the format of the ground truth.
+% format=0 is the default format (created by bbSave/bbLabeler). format=1 is
+% the PASCAL VOC format. Loading ground truth in this format requires
+% 'VOCcode/' to be in directory path. It's part of VOCdevkit available from
+% the PASCAL VOC: http://pascallin.ecs.soton.ac.uk/challenges/VOC/. Objects
+% labeled as either 'truncated' or 'occluded' using the PASCAL definitions
+% have the 'occ' flag set to true. Objects labeled as 'difficult' have the
+% 'ign' flag set to true. 'class' is used for 'lbl'. format=2 is the
+% ImageNet detection format and requires the ImageNet Dev Kit.
+%
+% FILTERING: After loading, the objects can be filtered. First, only
+% objects with lbl in lbls or ilbls or returned. For each object, obj.ign
+% is set to 1 if it was already at 1, if its label was in ilbls, or if any
+% object property is outside of the specified range. The ignore flag is
+% used during training and testing so that objects with certain properties
+% (such as very small or heavily occluded objects) are excluded. The range
+% for each property is a two element vector, [0 inf] by default; a property
+% value v is inside the range if v>=rng(1) && v<=rng(2). Tested properties
+% include height (h), width (w), area (a), aspect ratio (ar), orientation
+% (o), extent x-coordinate (x), extent y-coordinate (y), and fraction
+% visible (v). The last property is computed as the visible object area
+% divided by the total area, except if o.occ==0, in which case v=1, or
+% all(o.bbv==o.bb), which indicates the object may be barely visible, in
+% which case v=0 (note that v~=1 in this case).
+%
+% RETURN: In addition to outputting the objs, bbLoad() can return the
+% corresponding bounding boxes (bbs) in an [nx5] array where each row is of
+% the form [x y w h ignore], [x y w h] is the bb and ignore=obj.ign. For
+% oriented bbs, the extent of the bb is returned, where the extent is the
+% smallest axis aligned bb containing the oriented bb. If the oriented bb
+% was labeled as a rectangle as opposed to an ellipse, the tightest bb will
+% usually increase slightly in size due to the corners of the rectangle
+% sticking out beyond the ellipse bounds. The 'ellipse' flag controls how
+% an oriented bb is converted to a regular bb. Specifically, set ellipse=1
+% if an ellipse tightly delineates the object and 0 if a rectangle does.
+% Finally, if 'squarify' is not empty the (non-ignore) bbs are converted to
+% a fixed aspect ratio using bbs=bbApply('squarify',bbs,squarify{:}).
+%
+% USAGE
+%  [objs,bbs] = bbGt( 'bbLoad', fName, [pLoad] )
+%
+% INPUTS
+%  fName    - name of text file
+%  pLoad    - parameters (struct or name/value pairs)
+%   .format   - [0] gt format 0:default, 1:PASCAL, 2:ImageNet
+%   .ellipse  - [1] controls how oriented bb is converted to regular bb
+%   .squarify - [] controls optional reshaping of bbs to fixed aspect ratio
+%   .lbls     - [] return objs with these labels (or [] to return all)
+%   .ilbls    - [] return objs with these labels but set to ignore
+%   .hRng     - [] range of acceptable obj heights
+%   .wRng     - [] range of acceptable obj widths
+%   .aRng     - [] range of acceptable obj areas
+%   .arRng    - [] range of acceptable obj aspect ratios
+%   .oRng     - [] range of acceptable obj orientations (angles)
+%   .xRng     - [] range of x coordinates of bb extent
+%   .yRng     - [] range of y coordinates of bb extent
+%   .vRng     - [] range of acceptable obj occlusion levels
+%
+% OUTPUTS
+%  objs     - loaded objects
+%  bbs      - [nx5] array containg ground truth bbs [x y w h ignore]
+%
+% EXAMPLE
+%
+% See also bbGt, bbGt>bbSave
+
+% get parameters
+df={'format',0,'ellipse',1,'squarify',[],'lbls',[],'ilbls',[],'hRng',[],...
+  'wRng',[],'aRng',[],'arRng',[],'oRng',[],'xRng',[],'yRng',[],'vRng',[]};
+[format,ellipse,sqr,lbls,ilbls,hRng,wRng,aRng,arRng,oRng,xRng,yRng,vRng]...
+  = getPrmDflt(varargin,df,1);
+
+% load objs
+if( format==0 )
+  % load objs stored in default format
+  fId=fopen(fName);
+  if(fId==-1), error(['unable to open file: ' fName]); end; v=0;
+  try v=textscan(fId,'%% bbGt version=%d'); v=v{1}; catch, end %#ok<CTCH>
+  if(isempty(v)), v=0; end
+  % read in annotation (m is number of fields for given version v)
+  if(all(v~=[0 1 2 3])), error('Unknown version %i.',v); end
+  frmt='%s %d %d %d %d %d %d %d %d %d %d %d';
+  ms=[10 10 11 12]; m=ms(v+1); frmt=frmt(1:2+(m-1)*3);
+  in=textscan(fId,frmt); for i=2:m, in{i}=double(in{i}); end; fclose(fId);
+  % create objs struct from read in fields
+  n=length(in{1}); objs=create(n);
+  for i=1:n, objs(i).lbl=in{1}{i}; objs(i).occ=in{6}(i); end
+  bb=[in{2} in{3} in{4} in{5}]; bbv=[in{7} in{8} in{9} in{10}];
+  for i=1:n, objs(i).bb=bb(i,:); objs(i).bbv=bbv(i,:); end
+  if(m>=11), for i=1:n, objs(i).ign=in{11}(i); end; end
+  if(m>=12), for i=1:n, objs(i).ang=in{12}(i); end; end
+elseif( format==1 )
+  % load objs stored in PASCAL VOC format
+  if(exist('PASreadrecord.m','file')~=2)
+    error('bbLoad() requires the PASCAL VOC code.'); end
+  os=PASreadrecord(fName); os=os.objects;
+  n=length(os); objs=create(n);
+  if(~isfield(os,'occluded')), for i=1:n, os(i).occluded=0; end; end
+  for i=1:n
+    bb=os(i).bbox; bb(3)=bb(3)-bb(1); bb(4)=bb(4)-bb(2); objs(i).bb=bb;
+    objs(i).lbl=os(i).class; objs(i).ign=os(i).difficult;
+    objs(i).occ=os(i).occluded || os(i).truncated;
+    if(objs(i).occ), objs(i).bbv=bb; end
+  end
+elseif( format==2 )
+  if(exist('VOCreadxml.m','file')~=2)
+    error('bbLoad() requires the ImageNet dev code.'); end
+  os=VOCreadxml(fName); os=os.annotation;
+  if(isfield(os,'object')), os=os.object; else os=[]; end
+  n=length(os); objs=create(n);
+  for i=1:n
+    bb=os(i).bndbox; bb=str2double({bb.xmin bb.ymin bb.xmax bb.ymax});
+    bb(3)=bb(3)-bb(1); bb(4)=bb(4)-bb(2); objs(i).bb=bb;
+    objs(i).lbl=os(i).name;
+  end
+else error('bbLoad() unknown format: %i',format);
+end
+
+% only keep objects whose lbl is in lbls or ilbls
+if(~isempty(lbls) || ~isempty(ilbls)), K=true(n,1);
+  for i=1:n, K(i)=any(strcmp(objs(i).lbl,[lbls ilbls])); end
+  objs=objs(K); n=length(objs);
+end
+
+% filter objs (set ignore flags)
+for i=1:n, objs(i).ang=mod(objs(i).ang,360); end
+if(~isempty(ilbls)), for i=1:n, v=objs(i).lbl;
+    objs(i).ign = objs(i).ign || any(strcmp(v,ilbls)); end; end
+if(~isempty(xRng)),  for i=1:n, v=objs(i).bb(1);
+    objs(i).ign = objs(i).ign || v<xRng(1) || v>xRng(2); end; end
+if(~isempty(xRng)),  for i=1:n, v=objs(i).bb(1)+objs(i).bb(3);
+    objs(i).ign = objs(i).ign || v<xRng(1) || v>xRng(2); end; end
+if(~isempty(yRng)),  for i=1:n, v=objs(i).bb(2);
+    objs(i).ign = objs(i).ign || v<yRng(1) || v>yRng(2); end; end
+if(~isempty(yRng)),  for i=1:n, v=objs(i).bb(2)+objs(i).bb(4);
+    objs(i).ign = objs(i).ign || v<yRng(1) || v>yRng(2); end; end
+if(~isempty(wRng)),  for i=1:n, v=objs(i).bb(3);
+    objs(i).ign = objs(i).ign || v<wRng(1) || v>wRng(2); end; end
+if(~isempty(hRng)),  for i=1:n, v=objs(i).bb(4);
+    objs(i).ign = objs(i).ign || v<hRng(1) || v>hRng(2); end; end
+if(~isempty(oRng)),  for i=1:n, v=objs(i).ang; if(v>180), v=v-360; end
+    objs(i).ign = objs(i).ign || v<oRng(1) || v>oRng(2); end; end
+if(~isempty(aRng)),  for i=1:n, v=objs(i).bb(3)*objs(i).bb(4);
+    objs(i).ign = objs(i).ign || v<aRng(1) || v>aRng(2); end; end
+if(~isempty(arRng)), for i=1:n, v=objs(i).bb(3)/objs(i).bb(4);
+    objs(i).ign = objs(i).ign || v<arRng(1) || v>arRng(2); end; end
+if(~isempty(vRng)),  for i=1:n, o=objs(i); bb=o.bb; bbv=o.bbv; %#ok<ALIGN>
+    if(~o.occ || all(bbv==0)), v=1; elseif(all(bbv==bb)), v=0; else
+      v=(bbv(3)*bbv(4))/(bb(3)*bb(4)); end
+    objs(i).ign = objs(i).ign || v<vRng(1) || v>vRng(2); end
+end
+
+% finally get extent of each bounding box (not trivial if ang~=0)
+if(nargout<=1), return; end; if(n==0), bbs=zeros(0,5); return; end
+bbs=double([reshape([objs.bb],4,[]); [objs.ign]]'); ign=bbs(:,5)==1;
+for i=1:n, bbs(i,1:4)=bbExtent(bbs(i,1:4),objs(i).ang,ellipse); end
+if(~isempty(sqr)), bbs(~ign,:)=bbApply('squarify',bbs(~ign,:),sqr{:}); end
+
+  function bb = bbExtent( bb, ang, ellipse )
+    % get bb that fully contains given oriented bb
+    if(~ang), return; end
+    if( ellipse ) % get bb that encompases ellipse (tighter)
+      x=bbApply('getCenter',bb); a=bb(4)/2; b=bb(3)/2; ang=ang-90;
+      rx=(a*cosd(ang))^2+(b*sind(ang))^2; rx=abs(rx/sqrt(rx));
+      ry=(a*sind(ang))^2+(b*cosd(ang))^2; ry=abs(ry/sqrt(ry));
+      bb=[x(1)-rx x(2)-ry 2*rx 2*ry];
+    else % get bb that encompases rectangle (looser)
+      c=cosd(ang); s=sind(ang); R=[c -s; s c]; rs=bb(3:4)/2;
+      x0=-rs(1); x1=rs(1); y0=-rs(2); y1=rs(2); pc=bb(1:2)+rs;
+      p=[x0 y0; x1 y0; x1 y1; x0 y1]*R'+pc(ones(4,1),:);
+      x0=min(p(:,1)); x1=max(p(:,1)); y0=min(p(:,2)); y1=max(p(:,2));
+      bb=[x0 y0 x1-x0 y1-y0];
+    end
+  end
+end
+
+function vals = get( objs, name )
+% Get object property 'name' (in a standard array).
+%
+% USAGE
+%  vals = bbGt( 'get', objs, name )
+%
+% INPUTS
+%  objs   - [nx1] struct array of objects
+%  name   - property name ('lbl','bb','occ',etc.)
+%
+% OUTPUTS
+%  vals   - [nxk] array of n values (k=1 or 4)
+%
+% EXAMPLE
+%
+% See also bbGt, bbGt>set
+nObj=length(objs); if(nObj==0), vals=[]; return; end
+switch name
+  case 'lbl', vals={objs.lbl}';
+  case 'bb',  vals=reshape([objs.bb]',4,[])';
+  case 'occ', vals=[objs.occ]';
+  case 'bbv', vals=reshape([objs.bbv]',4,[])';
+  case 'ign', vals=[objs.ign]';
+  case 'ang', vals=[objs.ang]';
+  otherwise, error('unkown type %s',name);
+end
+end
+
+function objs = set( objs, name, vals )
+% Set object property 'name' (with a standard array).
+%
+% USAGE
+%  objs = bbGt( 'set', objs, name, vals )
+%
+% INPUTS
+%  objs   - [nx1] struct array of objects
+%  name   - property name ('lbl','bb','occ',etc.)
+%  vals   - [nxk] array of n values (k=1 or 4)
+%
+% OUTPUTS
+%  objs   - [nx1] struct array of updated objects
+%
+% EXAMPLE
+%
+% See also bbGt, bbGt>get
+nObj=length(objs);
+switch name
+  case 'lbl', for i=1:nObj, objs(i).lbl=vals{i}; end
+  case 'bb',  for i=1:nObj, objs(i).bb=vals(i,:); end
+  case 'occ', for i=1:nObj, objs(i).occ=vals(i); end
+  case 'bbv', for i=1:nObj, objs(i).bbv=vals(i,:); end
+  case 'ign', for i=1:nObj, objs(i).ign=vals(i); end
+  case 'ang', for i=1:nObj, objs(i).ang=vals(i); end
+  otherwise, error('unkown type %s',name);
+end
+end
+
+function hs = draw( objs, varargin )
+% Draw an ellipse for each labeled object.
+%
+% USAGE
+%  hs = bbGt( 'draw', objs, pDraw )
+%
+% INPUTS
+%  objs       - [nx1] struct array of objects
+%  pDraw      - parameters (struct or name/value pairs)
+%   .col        - ['g'] color or [nx1] array of colors
+%   .lw         - [2] line width
+%   .ls         - ['-'] line style
+%
+% OUTPUTS
+%  hs     - [nx1] handles to drawn graphic objects
+%
+% EXAMPLE
+%
+% See also bbGt
+dfs={'col',[],'lw',2,'ls','-'};
+[col,lw,ls]=getPrmDflt(varargin,dfs,1);
+n=length(objs); hold on; hs=zeros(n,4);
+if(isempty(col)), if(n==1), col='g'; else col=hsv(n); end; end
+tProp={'FontSize',10,'color','w','FontWeight','bold',...
+  'VerticalAlignment','bottom'};
+for i=1:n
+  bb=objs(i).bb; ci=col(i,:);
+  hs(i,1)=text(bb(1),bb(2),objs(i).lbl,tProp{:});
+  x=bbApply('getCenter',bb); r=bb(3:4)/2; a=objs(i).ang/180*pi-pi/2;
+  [hs(i,2),hs(i,3),hs(i,4)]=plotEllipse(x(2),x(1),r(2),r(1),a,ci,[],lw,ls);
+end; hold off;
+end
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+function [fs,fs0] = getFiles( dirs, f0, f1 )
+% Get all corresponding files in given directories.
+%
+% The first dir in 'dirs' serves as the baseline dir. getFiles() returns
+% all files in the baseline dir and all corresponding files in the
+% remaining dirs to the files in the baseline dir, in the same order. Two
+% files are in correspondence if they have the same base name (regardless
+% of extension). For example, given a file named "name.jpg", a
+% corresponding file may be named "name.txt" or "name.jpg.txt". Every file
+% in the baseline dir must have a matching file in the remaining dirs.
+%
+% USAGE
+%  [fs,fs0] = bbGt('getFiles', dirs, [f0], [f1] )
+%
+% INPUTS
+%   dirs      - {1xm} list of m directories
+%   f0        - [1] index of first file in baseline dir to use
+%   f1        - [inf] index of last file in baseline dir to use
+%
+% OUTPUTS
+%   fs        - {mxn} list of full file names in each dir
+%   fs0       - {1xn} list of file names without path or extensions
+%
+% EXAMPLE
+%
+% See also bbGt
+
+if(nargin<2 || isempty(f0)), f0=1; end
+if(nargin<3 || isempty(f1)), f1=inf; end
+m=length(dirs); assert(m>0); sep=filesep;
+
+for d=1:m, dir1=dirs{d}; dir1(dir1=='\')=sep; dir1(dir1=='/')=sep;
+  if(dir1(end)==sep), dir1(end)=[]; end; dirs{d}=dir1; end
+
+[fs0,fs1] = getFiles0(dirs{1},f0,f1,sep);
+n1=length(fs0); fs=cell(m,n1); fs(1,:)=fs1;
+for d=2:m, fs(d,:)=getFiles1(dirs{d},fs0,sep); end
+
+  function [fs0,fs1] = getFiles0( dir1, f0, f1, sep )
+    % get fs1 in dir1 (and fs0 without path or extension)
+    fs1=dir([dir1 sep '*']); fs1={fs1.name}; fs1=fs1(3:end);
+    fs1=fs1(f0:min(f1,end)); fs0=fs1; n=length(fs0);
+    if(n==0), error('No files found in baseline dir %s.',dir1); end
+    for i=1:n, fs1{i}=[dir1 sep fs0{i}]; end
+    n=length(fs0); for i=1:n, f=fs0{i};
+      f(find(f=='.',1,'first'):end)=[]; fs0{i}=f; end
+  end
+
+  function fs1 = getFiles1( dir1, fs0, sep )
+    % get fs1 in dir1 corresponding to fs0
+    n=length(fs0); fs1=cell(1,n); i2=0; i1=0;
+    fs2=dir(dir1); fs2={fs2.name}; n2=length(fs2);
+    eMsg='''%s'' has no corresponding file in %s.';
+    for i0=1:n, r=length(fs0{i0}); match=0;
+      while(i2<n2), i2=i2+1; if(strcmpi(fs0{i0},fs2{i2}(1:min(end,r))))
+          i1=i1+1; fs1{i1}=fs2{i2}; match=1; break; end; end
+      if(~match), error(eMsg,fs0{i0},dir1); end
+    end
+    for i1=1:n, fs1{i1}=[dir1 sep fs1{i1}]; end
+  end
+end
+
+function fs = copyFiles( fs, dirs )
+% Copy corresponding files into given directories.
+%
+% Useful for splitting data into training, validation and testing sets.
+% See also bbGt>getFiles for obtaining a set of corresponding files.
+%
+% USAGE
+%  fs = bbGt( 'copyFiles', fs, dirs )
+%
+% INPUTS
+%   fs        - {mxn} list of full file names in each dir
+%   dirs      - {1xm} list of m target directories
+%
+% OUTPUTS
+%   fs        - {mxn} list of full file names of copied files
+%
+% EXAMPLE
+%
+% See also bbGt, bbGt>getFiles
+[m,n]=size(fs); assert(numel(dirs)==m); if(n==0), return; end
+for d=1:m
+  if(~exist(dirs{d},'dir')), mkdir(dirs{d}); end
+  for i=1:n, f=fs{d,i}; j=[0 find(f=='/' | f=='\')]; j=j(end);
+    fs{d,i}=[dirs{d} '/' f(j+1:end)]; copyfile(f,fs{d,i}); end
+end
+end
+
+function [gt0,dt0] = loadAll( gtDir, dtDir, pLoad )
+% Load all ground truth and detection bbs in given directories.
+%
+% Loads each ground truth (gt) annotation in gtDir and the corresponding
+% detection (dt) in dtDir. gt and dt files must correspond according to
+% getFiles(). Alternatively, dtDir may be a filename of a single text file
+% that contains the detection results across all images.
+%
+% Each dt should be a text file where each row contains 5 numbers
+% representing a bb (left/top/width/height/score). If dtDir is a text file,
+% it should contain the detection results across the full set of images. In
+% this case each row in the text file should have an extra leading column
+% specifying the image id: (imgId/left/top/width/height/score).
+%
+% The output of this function can be used in bbGt>evalRes().
+%
+% USAGE
+%  [gt0,dt0] = bbGt( 'loadAll', gtDir, [dtDir], [pLoad] )
+%
+% INPUTS
+%  gtDir      - location of ground truth
+%  dtDir      - [] optional location of detections
+%  pLoad      - {} params for bbGt>bbLoad() (determine format/filtering)
+%
+% OUTPUTS
+%  gt0        - {1xn} loaded ground truth bbs (each is a mx5 array of bbs)
+%  dt0        - {1xn} loaded detections (each is a mx5 array of bbs)
+%
+% EXAMPLE
+%
+% See also bbGt, bbGt>getFiles, bbGt>evalRes
+
+% get list of files
+if(nargin<2), dtDir=[]; end
+if(nargin<3), pLoad={}; end
+if(isempty(dtDir)), fs=getFiles({gtDir}); gtFs=fs(1,:); else
+  dtFile=length(dtDir)>4 && strcmp(dtDir(end-3:end),'.txt');
+  if(dtFile), dirs={gtDir}; else dirs={gtDir,dtDir}; end
+  fs=getFiles(dirs); gtFs=fs(1,:);
+  if(dtFile), dtFs=dtDir; else dtFs=fs(2,:); end
+end
+
+% load ground truth
+persistent keyPrv gtPrv; key={gtDir,pLoad}; n=length(gtFs);
+if(isequal(key,keyPrv)), gt0=gtPrv; else gt0=cell(1,n);
+  for i=1:n, [~,gt0{i}]=bbLoad(gtFs{i},pLoad); end
+  gtPrv=gt0; keyPrv=key;
+end
+
+% load detections
+if(isempty(dtDir) || nargout<=1), dt0=cell(0); return; end
+if(iscell(dtFs)), dt0=cell(1,n);
+  for i=1:n, dt1=load(dtFs{i},'-ascii');
+    if(numel(dt1)==0), dt1=zeros(0,5); end; dt0{i}=dt1(:,1:5); end
+else
+  dt1=load(dtFs,'-ascii'); if(numel(dt1)==0), dt1=zeros(0,6); end
+  ids=dt1(:,1); assert(max(ids)<=n);
+  dt0=cell(1,n); for i=1:n, dt0{i}=dt1(ids==i,2:6); end
+end
+
+end
+
+function [gt,dt] = evalRes( gt0, dt0, thr, mul )
+% Evaluates detections against ground truth data.
+%
+% Uses modified Pascal criteria that allows for "ignore" regions. The
+% Pascal criteria states that a ground truth bounding box (gtBb) and a
+% detected bounding box (dtBb) match if their overlap area (oa):
+%  oa(gtBb,dtBb) = area(intersect(gtBb,dtBb)) / area(union(gtBb,dtBb))
+% is over a sufficient threshold (typically .5). In the modified criteria,
+% the dtBb can match any subregion of a gtBb set to "ignore". Choosing
+% gtBb' in gtBb that most closely matches dtBb can be done by using
+% gtBb'=intersect(dtBb,gtBb). Computing oa(gtBb',dtBb) is equivalent to
+%  oa'(gtBb,dtBb) = area(intersect(gtBb,dtBb)) / area(dtBb)
+% For gtBb set to ignore the above formula for oa is used.
+%
+% Highest scoring detections are matched first. Matches to standard,
+% (non-ignore) gtBb are preferred. Each dtBb and gtBb may be matched at
+% most once, except for ignore-gtBb which can be matched multiple times.
+% Unmatched dtBb are false-positives, unmatched gtBb are false-negatives.
+% Each match between a dtBb and gtBb is a true-positive, except matches
+% between dtBb and ignore-gtBb which do not affect the evaluation criteria.
+%
+% In addition to taking gt/dt results on a single image, evalRes() can take
+% cell arrays of gt/dt bbs, in which case evaluation proceeds on each
+% element. Use bbGt>loadAll() to load gt/dt for multiple images.
+%
+% Each gt/dt output row has a flag match that is either -1/0/1:
+%  for gt: -1=ignore,  0=fn [unmatched],  1=tp [matched]
+%  for dt: -1=ignore,  0=fp [unmatched],  1=tp [matched]
+%
+% USAGE
+%  [gt, dt] = bbGt( 'evalRes', gt0, dt0, [thr], [mul] )
+%
+% INPUTS
+%  gt0  - [mx5] ground truth array with rows [x y w h ignore]
+%  dt0  - [nx5] detection results array with rows [x y w h score]
+%  thr  - [.5] the threshold on oa for comparing two bbs
+%  mul  - [0] if true allow multiple matches to each gt
+%
+% OUTPUTS
+%  gt   - [mx5] ground truth results [x y w h match]
+%  dt   - [nx6] detection results [x y w h score match]
+%
+% EXAMPLE
+%
+% See also bbGt, bbGt>compOas, bbGt>loadAll
+
+% get parameters
+if(nargin<3 || isempty(thr)), thr=.5; end
+if(nargin<4 || isempty(mul)), mul=0; end
+
+% if gt0 and dt0 are cell arrays run on each element in turn
+if( iscell(gt0) && iscell(dt0) ), n=length(gt0);
+  assert(length(dt0)==n); gt=cell(1,n); dt=gt;
+  for i=1:n, [gt{i},dt{i}] = evalRes(gt0{i},dt0{i},thr,mul); end; return;
+end
+
+% check inputs
+if(isempty(gt0)), gt0=zeros(0,5); end
+if(isempty(dt0)), dt0=zeros(0,5); end
+assert( size(dt0,2)==5 ); nd=size(dt0,1);
+assert( size(gt0,2)==5 ); ng=size(gt0,1);
+
+% sort dt highest score first, sort gt ignore last
+[~,ord]=sort(dt0(:,5),'descend'); dt0=dt0(ord,:);
+[~,ord]=sort(gt0(:,5),'ascend'); gt0=gt0(ord,:);
+gt=gt0; gt(:,5)=-gt(:,5); dt=dt0; dt=[dt zeros(nd,1)];
+
+% Attempt to match each (sorted) dt to each (sorted) gt
+oa = compOas( dt(:,1:4), gt(:,1:4), gt(:,5)==-1 );
+for d=1:nd
+  bstOa=thr; bstg=0; bstm=0; % info about best match so far
+  for g=1:ng
+    % if this gt already matched, continue to next gt
+    m=gt(g,5); if( m==1 && ~mul ), continue; end
+    % if dt already matched, and on ignore gt, nothing more to do
+    if( bstm~=0 && m==-1 ), break; end
+    % compute overlap area, continue to next gt unless better match made
+    if(oa(d,g)<bstOa), continue; end
+    % match successful and best so far, store appropriately
+    bstOa=oa(d,g); bstg=g; if(m==0), bstm=1; else bstm=-1; end
+  end; g=bstg; m=bstm;
+  % store type of match for both dt and gt
+  if(m==-1), dt(d,6)=m; elseif(m==1), gt(g,5)=m; dt(d,6)=m; end
+end
+
+end
+
+function [hs,hImg] = showRes( I, gt, dt, varargin )
+% Display evaluation results for given image.
+%
+% USAGE
+%  [hs,hImg] = bbGt( 'showRes', I, gt, dt, varargin )
+%
+% INPUTS
+%  I          - image to display, image filename, or []
+%  gt         - first output of evalRes()
+%  dt         - second output of evalRes()
+%  varargin   - additional parameters (struct or name/value pairs)
+%   .evShow     - [1] if true show results of evaluation
+%   .gtShow     - [1] if true show ground truth
+%   .dtShow     - [1] if true show detections
+%   .cols       - ['krg'] colors for ignore/mistake/correct
+%   .gtLs       - ['-'] line style for gt bbs
+%   .dtLs       - ['--'] line style for dt bbs
+%   .lw         - [3] line width
+%
+% OUTPUTS
+%  hs         - handles to bbs and text labels
+%  hImg       - handle for image graphics object
+%
+% EXAMPLE
+%
+% See also bbGt, bbGt>evalRes
+dfs={'evShow',1,'gtShow',1,'dtShow',1,'cols','krg',...
+  'gtLs','-','dtLs','--','lw',3};
+[evShow,gtShow,dtShow,cols,gtLs,dtLs,lw]=getPrmDflt(varargin,dfs,1);
+% optionally display image
+if(ischar(I)), I=imread(I); end
+if(~isempty(I)), hImg=im(I,[],0); title(''); end
+% display bbs with or w/o color coding based on output of evalRes
+hold on; hs=cell(1,1000); k=0;
+if( evShow )
+  if(gtShow), for i=1:size(gt,1), k=k+1;
+      hs{k}=bbApply('draw',gt(i,1:4),cols(gt(i,5)+2),lw,gtLs); end; end
+  if(dtShow), for i=1:size(dt,1), k=k+1;
+      hs{k}=bbApply('draw',dt(i,1:5),cols(dt(i,6)+2),lw,dtLs); end; end
+else
+  if(gtShow), k=k+1; hs{k}=bbApply('draw',gt(:,1:4),cols(3),lw,gtLs); end
+  if(dtShow), k=k+1; hs{k}=bbApply('draw',dt(:,1:5),cols(3),lw,dtLs); end
+end
+hs=[hs{:}]; hold off;
+end
+
+function [xs,ys,score,ref] = compRoc( gt, dt, roc, ref )
+% Compute ROC or PR based on outputs of evalRes on multiple images.
+%
+% ROC="Receiver operating characteristic"; PR="Precision Recall"
+% Also computes result at reference points (ref):
+%  which for ROC curves is the *detection* rate at reference *FPPI*
+%  which for PR curves is the *precision* at reference *recall*
+% Note, FPPI="false positive per image"
+%
+% USAGE
+%  [xs,ys,score,ref] = bbGt( 'compRoc', gt, dt, roc, ref )
+%
+% INPUTS
+%  gt         - {1xn} first output of evalRes() for each image
+%  dt         - {1xn} second output of evalRes() for each image
+%  roc        - [1] if 1 compue ROC else compute PR
+%  ref        - [] reference points for ROC or PR curve
+%
+% OUTPUTS
+%  xs         - x coords for curve: ROC->FPPI; PR->recall
+%  ys         - y coords for curve: ROC->TP; PR->precision
+%  score      - detection scores corresponding to each (x,y)
+%  ref        - recall or precision at each reference point
+%
+% EXAMPLE
+%
+% See also bbGt, bbGt>evalRes
+
+% get additional parameters
+if(nargin<3 || isempty(roc)), roc=1; end
+if(nargin<4 || isempty(ref)), ref=[]; end
+% convert to single matrix, discard ignore bbs
+nImg=length(gt); assert(length(dt)==nImg);
+gt=cat(1,gt{:}); gt=gt(gt(:,5)~=-1,:);
+dt=cat(1,dt{:}); dt=dt(dt(:,6)~=-1,:);
+% compute results
+if(size(dt,1)==0), xs=0; ys=0; score=0; ref=ref*0; return; end
+m=length(ref); np=size(gt,1); score=dt(:,5); tp=dt(:,6);
+[score,order]=sort(score,'descend'); tp=tp(order);
+fp=double(tp~=1); fp=cumsum(fp); tp=cumsum(tp);
+if( roc )
+  xs=fp/nImg; ys=tp/np; xs1=[-inf; xs]; ys1=[0; ys];
+  for i=1:m, j=find(xs1<=ref(i)); ref(i)=ys1(j(end)); end
+else
+  xs=tp/np; ys=tp./(fp+tp); xs1=[xs; inf]; ys1=[ys; 0];
+  for i=1:m, j=find(xs1>=ref(i)); ref(i)=ys1(j(1)); end
+end
+end
+
+function [Is,scores,imgIds] = cropRes( gt, dt, imFs, varargin )
+% Extract true or false positives or negatives for visualization.
+%
+% USAGE
+%  [Is,scores,imgIds] = bbGt( 'cropRes', gt, dt, imFs, varargin )
+%
+% INPUTS
+%  gt         - {1xN} first output of evalRes() for each image
+%  dt         - {1xN} second output of evalRes() for each image
+%  imFs       - {1xN} name of each image
+%  varargin   - additional parameters (struct or name/value pairs)
+%   .dims       - ['REQ'] target dimensions for extracted windows
+%   .pad        - [0] padding amount for cropping
+%   .type       - ['fp'] one of: 'fp', 'fn', 'tp', 'dt'
+%   .n          - [100] max number of windows to extract
+%   .show       - [1] figure for displaying results (or 0)
+%   .fStr       - ['%0.1f'] label{i}=num2str(score(i),fStr)
+%   .embed      - [0] if true embed dt/gt bbs into cropped windows
+%
+% OUTPUTS
+%  Is         - [dimsxn] extracted image windows
+%  scores     - [1xn] detection score for each bb unless 'fn'
+%  imgIds     - [1xn] image id for each cropped window
+%
+% EXAMPLE
+%
+% See also bbGt, bbGt>evalRes
+dfs={'dims','REQ','pad',0,'type','fp','n',100,...
+  'show',1,'fStr','%0.1f','embed',0};
+[dims,pad,type,n,show,fStr,embed]=getPrmDflt(varargin,dfs,1);
+N=length(imFs); assert(length(gt)==N && length(dt)==N);
+% crop patches either in gt or dt according to type
+switch type
+  case 'fn', bbs=gt; keep=@(bbs) bbs(:,5)==0;
+  case 'fp', bbs=dt; keep=@(bbs) bbs(:,6)==0;
+  case 'tp', bbs=dt; keep=@(bbs) bbs(:,6)==1;
+  case 'dt', bbs=dt; keep=@(bbs) bbs(:,6)>=0;
+  otherwise, error('unknown type: %s',type);
+end
+% create ids that will map each bb to correct name
+ms=zeros(1,N); for i=1:N, ms(i)=size(bbs{i},1); end; cms=[0 cumsum(ms)];
+ids=zeros(1,sum(ms)); for i=1:N, ids(cms(i)+1:cms(i+1))=i; end
+% flatten bbs and keep relevent subset
+bbs=cat(1,bbs{:}); K=keep(bbs); bbs=bbs(K,:); ids=ids(K); n=min(n,sum(K));
+% reorder bbs appropriately
+if(~strcmp(type,'fn')), [~,ord]=sort(bbs(:,5),'descend'); else
+  if(size(bbs,1)<n), ord=randperm(size(bbs,1)); else ord=1:n; end; end
+bbs=bbs(ord(1:n),:); ids=ids(ord(1:n));
+% extract patches from each image
+if(n==0), Is=[]; scores=[]; imgIds=[]; return; end;
+Is=cell(1,n); scores=zeros(1,n); imgIds=zeros(1,n);
+if(any(pad>0)), dims1=dims.*(1+pad); rs=dims1./dims; dims=dims1; end
+if(any(pad>0)), bbs=bbApply('resize',bbs,rs(1),rs(2)); end
+for i=1:N
+  locs=find(ids==i); if(isempty(locs)), continue; end; I=imread(imFs{i});
+  if( embed )
+    if(any(strcmp(type,{'fp','dt'}))), bbs1=gt{i};
+    else bbs1=dt{i}(:,[1:4 6]); end
+    I=bbApply('embed',I,bbs1(bbs1(:,5)==0,1:4),'col',[255 0 0]);
+    I=bbApply('embed',I,bbs1(bbs1(:,5)==1,1:4),'col',[0 255 0]);
+  end
+  Is1=bbApply('crop',I,bbs(locs,1:4),'replicate',dims);
+  for j=1:length(locs), Is{locs(j)}=Is1{j}; end;
+  scores(locs)=bbs(locs,5); imgIds(locs)=i;
+end; Is=cell2array(Is);
+% optionally display
+if(~show), return; end; figure(show); pMnt={'hasChn',size(Is1{1},3)>1};
+if(isempty(fStr)), montage2(Is,pMnt); title(type); return; end
+ls=cell(1,n); for i=1:n, ls{i}=int2str2(imgIds(i)); end
+if(~strcmp(type,'fn'))
+  for i=1:n, ls{i}=[ls{i} '/' num2str(scores(i),fStr)]; end; end
+montage2(Is,[pMnt 'labels' {ls}]); title(type);
+end
+
+function oa = compOas( dt, gt, ig )
+% Computes (modified) overlap area between pairs of bbs.
+%
+% Uses modified Pascal criteria with "ignore" regions. The overlap area
+% (oa) of a ground truth (gt) and detected (dt) bb is defined as:
+%  oa(gt,dt) = area(intersect(dt,dt)) / area(union(gt,dt))
+% In the modified criteria, a gt bb may be marked as "ignore", in which
+% case the dt bb can can match any subregion of the gt bb. Choosing gt' in
+% gt that most closely matches dt can be done using gt'=intersect(dt,gt).
+% Computing oa(gt',dt) is equivalent to:
+%  oa'(gt,dt) = area(intersect(gt,dt)) / area(dt)
+%
+% USAGE
+%  oa = bbGt( 'compOas', dt, gt, [ig] )
+%
+% INPUTS
+%  dt       - [mx4] detected bbs
+%  gt       - [nx4] gt bbs
+%  ig       - [nx1] 0/1 ignore flags (0 by default)
+%
+% OUTPUTS
+%  oas      - [m x n] overlap area between each gt and each dt bb
+%
+% EXAMPLE
+%  dt=[0 0 10 10]; gt=[0 0 20 20];
+%  oa0 = bbGt('compOas',dt,gt,0)
+%  oa1 = bbGt('compOas',dt,gt,1)
+%
+% See also bbGt, bbGt>evalRes
+m=size(dt,1); n=size(gt,1); oa=zeros(m,n);
+if(nargin<3), ig=zeros(n,1); end
+de=dt(:,[1 2])+dt(:,[3 4]); da=dt(:,3).*dt(:,4);
+ge=gt(:,[1 2])+gt(:,[3 4]); ga=gt(:,3).*gt(:,4);
+for i=1:m
+  for j=1:n
+    w=min(de(i,1),ge(j,1))-max(dt(i,1),gt(j,1)); if(w<=0), continue; end
+    h=min(de(i,2),ge(j,2))-max(dt(i,2),gt(j,2)); if(h<=0), continue; end
+    t=w*h; if(ig(j)), u=da(i); else u=da(i)+ga(j)-t; end; oa(i,j)=t/u;
+  end
+end
+end
+
+function oa = compOa( dt, gt, ig )
+% Optimized version of compOas for a single pair of bbs.
+%
+% USAGE
+%  oa = bbGt( 'compOa', dt, gt, ig )
+%
+% INPUTS
+%  dt       - [1x4] detected bb
+%  gt       - [1x4] gt bb
+%  ig       - 0/1 ignore flag
+%
+% OUTPUTS
+%  oa       - overlap area between gt and dt bb
+%
+% EXAMPLE
+%  dt=[0 0 10 10]; gt=[0 0 20 20];
+%  oa0 = bbGt('compOa',dt,gt,0)
+%  oa1 = bbGt('compOa',dt,gt,1)
+%
+% See also bbGt, bbGt>compOas
+w=min(dt(3)+dt(1),gt(3)+gt(1))-max(dt(1),gt(1)); if(w<=0),oa=0; return; end
+h=min(dt(4)+dt(2),gt(4)+gt(2))-max(dt(2),gt(2)); if(h<=0),oa=0; return; end
+i=w*h; if(ig),u=dt(3)*dt(4); else u=dt(3)*dt(4)+gt(3)*gt(4)-i; end; oa=i/u;
+end
diff --git a/detector/bbLabeler.m b/detector/bbLabeler.m
new file mode 100644
index 0000000..7d1e159
--- /dev/null
+++ b/detector/bbLabeler.m
@@ -0,0 +1,438 @@
+function bbLabeler( objTypes, imgDir, resDir )
+% Bounding box or ellipse labeler for static images.
+%
+% Launch and click "?" icon for more info.
+%
+% USAGE
+%  bbLabeler( [objTypes], [imgDir], [resDir] )
+%
+% INPUTS
+%  objTypes - [{'object'}] list of object types to annotate
+%  imgDir   - [pwd] directory with images
+%  resDir   - [imgDir] directory with annotations
+%
+% OUTPUTS
+%
+% EXAMPLE
+%  bbLabeler
+%
+% See also bbGt, imRectRot
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 2.66
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+if(nargin<1 || isempty(objTypes)), objTypes={'object'}; end
+if(nargin<2 || isempty(imgDir)), imgDir=pwd; end
+if(nargin<3 || isempty(resDir)), resDir=imgDir; end
+if(~exist(resDir,'dir')), mkdir(resDir); end
+colors='gcmrkgcmrkgcmrkgcmrkgcmrkgcmrkgcmrk'; minSiz=[12 12];
+[hFig,hPan,hAx,pTop,imgInd,imgFiles,usePnts] = deal([]);
+makeLayout(); imgApi=imgMakeApi(); objApi=objMakeApi();
+usePnts=0; imgApi.setImgDir(imgDir);
+
+  function makeLayout()
+    % common properties
+    name = 'bounding box labeler';
+    bg='BackgroundColor'; fg='ForegroundColor'; ha='HorizontalAlignment';
+    units = {'Units','pixels'}; st='String'; ps='Position'; fs='FontSize';
+    
+    % initial figures size / pos
+    set(0,'Units','pixels');  ss = get(0,'ScreenSize');
+    if( ss(3)<800 || ss(4)<600 ); error('screen too small'); end;
+    figPos = [(ss(3)-620)/2 (ss(4)-500)/2 620 500];
+    
+    % create main figure
+    hFig = figure('NumberTitle','off', 'Toolbar','none', 'Color','k', ...
+      'MenuBar','none', 'Visible','off', ps,figPos, 'Name',[name resDir]);
+    set(hFig,'DeleteFcn',@(h,e) exitProg,'ResizeFcn',@(h,e) figResized );
+    
+    % display axes
+    hAx = axes(units{:},'Parent',hFig,'XTick',[],'YTick',[]); imshow(0);
+    
+    % top panel
+    pnlProp = [units {bg,[.1 .1 .1],'BorderType','none'}];
+    txtPrp = {'Style','text',bg,[.1 .1 .1],fs,8,fg,'w',ha};
+    edtPrp = {'Style','edit',bg,[.1 .1 .1],fs,8,fg,'w',ha};
+    btnPrp = [units,{'Style','pushbutton','FontWeight','bold',...
+      bg,[.7 .7 .7],fs,10}];
+    chbPrp = {'Style','checkbox',bg,[.1 .1 .1],fs,8,fg,'w'};
+    pTop.h = uipanel(pnlProp{:},'Parent',hFig);
+    pTop.hImgInd=uicontrol(pTop.h,edtPrp{:},'Right',st,'0');
+    pTop.hImgNum=uicontrol(pTop.h,edtPrp{:},'Left',st,'/0',...
+      'Enable','inactive');
+    pTop.hLbl = uicontrol( pTop.h,'Style','popupmenu',units{:},...
+      st,objTypes,fs,8,'Value',1);
+    pTop.hDel=uicontrol(pTop.h,btnPrp{:},fs,11,fg,[.5 0 0],st,'X');
+    pTop.hPrv=uicontrol(pTop.h,btnPrp{:},st,'<<');
+    pTop.hNxt=uicontrol(pTop.h,btnPrp{:},st,'>>');
+    pTop.hOcc=uicontrol(pTop.h,chbPrp{:},st,'occ');
+    pTop.hIgn=uicontrol(pTop.h,chbPrp{:},st,'ign');
+    pTop.hEll=uicontrol(pTop.h,chbPrp{:},st,'ellipse');
+    pTop.hRot=uicontrol(pTop.h,chbPrp{:},st,'rotate');
+    pTop.hLim=uicontrol(pTop.h,chbPrp{:},st,'lims');
+    pTop.hPnt=uicontrol(pTop.h,chbPrp{:},st,'pnts');
+    pTop.hHid=uicontrol(pTop.h,chbPrp{:},st,'hide');
+    pTop.hPan=uicontrol(pTop.h,chbPrp{:},st,'pan');
+    pTop.hDims=uicontrol(pTop.h,txtPrp{:},'Center',st,'');
+    pTop.hNum=uicontrol(pTop.h,txtPrp{:},'Center',st,'n=0');
+    pTop.hHelp=uicontrol(pTop.h,btnPrp{:},fs,12,st,'?');
+    
+    % set the keyPressFcn for all focusable components (except popupmenus)
+    set( hFig, 'keyPressFcn',@keyPress );
+    set( hFig, 'WindowScrollWheelFcn',@(h,e) mouseWheel(e));
+    set( hFig, 'ButtonDownFcn',@(h,e) mousePress );
+    set( pTop.hHelp,'CallBack',@(h,e) helpWindow );
+    
+    % set hFig to visible upon completion
+    set(hFig,'Visible','on'); drawnow;
+    
+    % pan controls
+    hPan = pan( hFig );
+    
+    function figResized()
+      % overall layout
+      pos=get(hFig,ps); pad=8; htTop=30; wdTop=620;
+      wd=pos(3)-2*pad; ht=pos(4)-2*pad-htTop;
+      x=(pos(3)-wd)/2; y=pad;
+      set(hAx,ps,[x y wd ht]); y=y+ht;
+      set(pTop.h,ps,[x y wd htTop]);
+      % position stuff in top panel
+      x=max(2,(wd-wdTop)/2);
+      set(pTop.hImgInd,ps,[x 4 40 22]); x=x+40;
+      set(pTop.hImgNum,ps,[x 4 40 22]); x=x+50;
+      set(pTop.hDel,ps,[x 5 20 20]); x=x+20+5;
+      set(pTop.hPrv,ps,[x 5 24 20]); x=x+25;
+      set(pTop.hLbl,ps,[x 5 80 21]); x=x+81;
+      set(pTop.hNxt,ps,[x 5 24 20]); x=x+25+5;
+      set(pTop.hDims,ps,[x 5 60 20]); x=x+62;
+      set(pTop.hOcc,ps,[x 15 45 13]);
+      set(pTop.hIgn,ps,[x 2 45 13]); x=x+50;
+      set(pTop.hEll,ps,[x 15 55 13]);
+      set(pTop.hRot,ps,[x 2 55 13]); x=x+60;
+      set(pTop.hLim,ps,[x 15 45 13]);
+      set(pTop.hPnt,ps,[x 2 45 13]); x=x+50;
+      set(pTop.hHid,ps,[x 15 55 13]);
+      set(pTop.hPan,ps,[x 2 55 13]); x=x+60;
+      set(pTop.hNum,ps,[x 5 30 20]); x=x+30+20;
+      set(pTop.hHelp,ps,[x 5 20 20]);
+    end
+    
+    function helpWindow()
+      helpTxt = {
+        'Image Selection:'
+        ' * spacebar: advance one image'
+        ' * ctrl-spacebar: go back one image'
+        ' * double-click: advance one image'
+        ' * can also directly enter image index'
+        ''
+        'Zoom and Pan controls:'
+        ' * mouse wheel or +/- keys: zoom in and out'
+        ' * q-key or pan-icon: toggle pan mode'
+        ' * click/drag: pans image (while in pan mode)'
+        ''
+        'bb modification with mouse:'
+        ' * click/drag in blank region: create new bb'
+        ' * click on existing bb: select bb'
+        ' * click/drag center of existing bb: move bb'
+        ' * click/drag edge of existing bb: resize bb'
+        ' * clck/drag control points: rotate/resize bb'
+        ' * ctrl+arrow keys: shift selected bb'
+        ''
+        'Other controls:'
+        ' * d-key or del-key or X-icon: delete selected bb'
+        ' * o-key or occ-icon: toggle occlusion for bb'
+        ' * i-key or ign-icon: toggle ignore for bb'
+        ' * e-key or ellipse-icon: toggle bb ellipse/rect display'
+        ' * r-key or rotation-icon: toggle bb rotation control'
+        ' * l-key or lims-icon: toggle bb limits on/off'
+        ' * p-key or pnts-icon: toggle pnt creation on/off'
+        ' * left-arrow or <<-icon: select previous bb'
+        ' * right-arrow or >>-icon: select next bb'
+        ' * up/down-arrow a-key/z-key or dropbox: select bb label'
+        ' * ctrl and +/- keys: increase/decrease contrast' };
+      pos=get(0,'ScreenSize'); pos=[(pos(3)-400)/2 (pos(4)-520)/2 400 520];
+      hHelp = figure('NumberTitle','off', 'Toolbar','auto', ...
+        'Color','k', 'MenuBar','none', 'Visible','on', ...
+        'Name',[name ' help'], 'Resize','on', ps, pos ); pos(1:2)=0;
+      uicontrol( hHelp, 'Style','text', ha,'Left', fs,10, bg,'w', ...
+        ps,pos, st,helpTxt );
+    end
+    
+    function exitProg(), objApi.closeAnn(); end
+  end
+
+  function keyPress( h, evnt ) %#ok<INUSL>
+    c=int8(evnt.Character); if(isempty(c)), c=0; end;
+    ctrl=strcmp(evnt.Modifier,'control'); if(isempty(ctrl)),ctrl=0; end
+    if(c==127 || c==100), objApi.objDel(); end % 'del' or 'd'
+    if(c==32 && ctrl ), imgApi.setImg(imgInd-1); end % ctrl-spacebar
+    if(c==32 && ~ctrl), imgApi.setImg(imgInd+1); end % spacebar
+    if(c==28 && ctrl), objApi.objShift(-1,0); end   % ctrl-lf
+    if(c==29 && ctrl), objApi.objShift(+1,0); end   % ctrl-rt
+    if(c==30 && ctrl), objApi.objShift(0,-1); end   % ctrl-up
+    if(c==31 && ctrl), objApi.objShift(0,+1); end   % ctrl-dn
+    if(c==28 && ~ctrl), objApi.objToggle(-1); end  % lf
+    if(c==29 && ~ctrl), objApi.objToggle(+1); end  % rt
+    if((c==30 && ~ctrl) || c==97),  objApi.objSetType(-1); end  % up or 'a'
+    if((c==31 && ~ctrl) || c==122), objApi.objSetType(+1); end  % dn or 'z'
+    if(c==111), objApi.objSetVal('occ',0); end  % 'o'
+    if(c==105), objApi.objSetVal('ign',0); end  % 'i'
+    if(c==101), objApi.objSetVal('ell',0); end  % 'e'
+    if(c==114), objApi.objSetVal('rot',0); end  % 'r'
+    if(c==108), objApi.objSetVal('lim',0); end  % 'l'
+    if(c==112), objApi.objSetVal('pnt',0); end  % 'p'
+    if(c==104), objApi.objSetVal('hid',0); end  % 'h'
+    if(c==113), objApi.objSetVal('pan',0); end  % 'q'
+    if(c==43 && ~ctrl), zoom(1.1);   end % '+' key, zoom in
+    if(c==45 && ~ctrl), zoom(1/1.1); end % '-' key, zoom out
+    if(c==43 && ctrl), imgApi.adjContrast(+1); end % ctrl-'+', inc contrast
+    if(c==45 && ctrl), imgApi.adjContrast(-1); end % ctrl-'-', dec contrast
+  end
+
+  function mousePress()
+    sType = get(hFig,'SelectionType');
+    %disp(['mouse pressed: ' sType]);
+    if( strcmp(sType,'open') )
+      if( usePnts ), return; end
+      imgApi.setImg(imgInd+1); % double click
+    elseif( strcmp(sType,'normal') )
+      objApi.objNew(); % single click
+    end
+  end
+
+  function mouseWheel( evnt )
+    if( evnt.VerticalScrollCount>0 ), zoom(1/1.1); else zoom(1.1); end
+  end
+
+  function mouseDrag()
+    if(isempty(imgInd)), return; end
+    persistent h; if(~all(ishandle(h))), h=[]; end
+    xs=get(gca,'xLim'); ys=get(gca,'yLim');
+    p=get(hAx,'CurrentPoint'); x=p(1); y=p(3);
+    if( x<xs(1)||x>xs(2)||y<ys(1)||y>ys(2) ), delete(h); return; end
+    if(isempty(h)), h=[line line];
+      set(h,'ButtonDownFcn',@(h,e) mousePress,'Color','k'); end
+    set(h,{'Xdata'},{[x x];xs},{'YData'},{ys,[y y]}');
+  end
+
+  function api = objMakeApi()
+    % variables
+    [resNm,objs,nObj,hsObj,curObj,lims] = deal([]);
+    ellipse=0; rotate=0; useLims=0; hide=0;
+    
+    % callbacks
+    set(pTop.hDel,'Callback',@(h,evnt) objDel());
+    set(pTop.hPrv,'Callback',@(h,evnt) objToggle(-1));
+    set(pTop.hNxt,'Callback',@(h,evnt) objToggle(+1));
+    set(pTop.hLbl,'Callback',@(h,evnt) objSetType());
+    set(pTop.hOcc,'Callback',@(h,evnt) objSetVal('occ',1));
+    set(pTop.hIgn,'Callback',@(h,evnt) objSetVal('ign',1));
+    set(pTop.hEll,'Callback',@(h,evnt) objSetVal('ell',1));
+    set(pTop.hRot,'Callback',@(h,evnt) objSetVal('rot',1));
+    set(pTop.hLim,'Callback',@(h,evnt) objSetVal('lim',1));
+    set(pTop.hPnt,'Callback',@(h,evnt) objSetVal('pnt',1));
+    set(pTop.hHid,'Callback',@(h,evnt) objSetVal('hid',1));
+    set(pTop.hPan,'Callback',@(h,evnt) objSetVal('pan',1));
+    
+    % create api
+    api = struct( 'closeAnn',@closeAnn, 'openAnn',@openAnn, ...
+      'objNew',@objNew, 'objDel',@objDel, 'objToggle',@objToggle, ...
+      'objSetType',@objSetType, 'objSetVal',@objSetVal, ...
+      'objShift',@objShift );
+    
+    function closeAnn()
+      % save annotation and then clear (also use to init)
+      if(~isempty(nObj)&&~isempty(resNm)), bbGt('bbSave',objs,resNm); end
+      delete(hsObj); hsObj=[]; nObj=0; resNm=''; curObj=0; objs=[];
+      objsDraw();
+    end
+    
+    function openAnn()
+      % try to load annotation, prepare for new image
+      assert(nObj==0); lims=[get(gca,'xLim'); get(gca,'yLim')];
+      lims=[lims(:); 0]'; lims(3:4)=lims(3:4)-lims(1:2);
+      resNm=[resDir '/' imgFiles{imgInd} '.txt'];
+      if(exist(resNm,'file')), objs=bbGt('bbLoad',resNm); end
+      objTypes=unique([objTypes bbGt('get',objs,'lbl')']);
+      set(pTop.hLbl,'String',objTypes); nObj=length(objs); objsDraw();
+    end
+    
+    function objsDraw()
+      delete(hsObj); if(hide), hsObj=[]; return; end; hsObj=zeros(1,nObj);
+      % display regular bbs
+      for id=1:nObj
+        o=objs(id); color=colors(strcmp(o.lbl,objTypes));
+        rp=struct('ellipse',ellipse,'rotate',rotate,'hParent',hAx,...
+          'lw',2,'ls','-','pos',[o.bb o.ang],'color',color);
+        if(~useLims), rp.lims=[]; else rp.lims=lims; end
+        if(o.ign), rp.cross=2; end; if(curObj==id), rp.ls=':'; end
+        [hsObj(id),rectApi]=imRectRot(rp);
+        rectApi.setPosSetCb(@(bb) objSetBb(bb,id));
+        rectApi.setPosChnCb(@(bb) objChnBb(bb,id));
+        if(id==curObj), rectApiCur=rectApi; end
+      end
+      if(curObj>0), rectApiCur.uistack('top'); end
+      % display occluded bbs
+      for id=1:nObj
+        o=objs(id); ang=o.ang; if(~o.occ), continue; end
+        rp=struct('ellipse',ellipse,'rotate',0,'hParent',hAx,'lw',1,...
+          'ls','-','pos',[o.bbv ang],'lims',[o.bb ang],'color','y');
+        [hObj,rectApi] = imRectRot( rp );
+        rectApi.setPosSetCb(@(bbv) objSetBbv(bbv,id));
+        hsObj=[hsObj hObj]; %#ok<AGROW>
+      end
+      % update gui info
+      if(curObj==0), dimsStr=''; occ=0; ign=0; en='off'; else
+        o=objs(curObj); occ=o.occ; ign=o.ign; en='on';
+        set(pTop.hLbl,'Value',find(strcmp(o.lbl,objTypes)));
+        dimsStr=sprintf('%i x %i',round(o.bb(3)),round(o.bb(4)));
+      end
+      set([pTop.hIgn pTop.hOcc],'Enable',en); set(pTop.hOcc,'Value',occ);
+      set(pTop.hDims,'String',dimsStr); set(pTop.hIgn,'Value',ign);
+      set(pTop.hNum,'String', ['n=' int2str(nObj)] );
+      set(hFig,'WindowButtonMotionFcn',@(h,e) mouseDrag); mouseDrag();
+    end
+    
+    function objSetBb( bb, objId )
+      curObj=objId; o=objs(objId); bb=round(bb); bbv=o.bbv;
+      if(any(bb(3:4)<minSiz)), objDel(); return; end;
+      objs(objId).bb=bb(1:4); objs(objId).ang=bb(5);
+      if(~o.occ), objsDraw(); return; end
+      if( objs(objId).ang~=o.ang ), bbv=[0 0 0 0]; else
+        bbv=[(bbv(1:2)-o.bb(1:2))./o.bb(3:4) bbv(3:4)./o.bb(3:4)];
+        bbv=[(bbv(1:2).*bb(3:4))+bb(1:2) bbv(3:4).*bb(3:4)];
+      end; objSetBbv(bbv,objId);
+    end
+    
+    function objSetBbv( bbv, objId )
+      curObj=objId; o=objs(objId); bbv=round(bbv(1:4));
+      if(~o.occ), objs(objId).bbv=[0 0 0 0]; objsDraw(); return; end
+      if(any(bbv(3:4)<minSiz)), bbv=o.bb; end
+      objs(objId).bbv=bbv; objsDraw();
+    end
+    
+    function objChnBb( bb, objId ) %#ok<INUSD>
+      dimsStr=sprintf('%i x %i',round(bb(3)),round(bb(4)));
+      set( pTop.hDims, 'String', dimsStr );
+    end
+    
+    function objNew()
+      if(hide), return; end; curObj=0; objsDraw();
+      pnt=get(hAx,'CurrentPoint'); pnt=pnt([1,3]);
+      if( pnt(1)<lims(1) || pnt(1)>lims(3) || ...
+          pnt(2)<lims(2) || pnt(2)>lims(4)), return; end
+      lblId=get(pTop.hLbl,'Value'); color=colors(lblId);
+      rp=struct('ellipse',ellipse,'rotate',rotate/2,'hParent',hAx,...
+        'lw',2,'ls',':','pos',pnt,'color',color);
+      if(~useLims), rp.lims=[]; else rp.lims=lims; end
+      [hObj,rectApi]=imRectRot(rp);
+      lbl=objTypes{lblId}; bb=round(rectApi.getPos());
+      if( usePnts && all(bb(3:4)<minSiz) )
+        bb(1:2)=bb(:,1:2)+bb(:,3:4)/2-minSiz/2; bb(3:4)=minSiz;
+      end
+      if( usePnts || all(bb(3:4)>=minSiz) )
+        obj=bbGt('create'); obj.lbl=lbl; obj.bb=bb(1:4); obj.ang=bb(5);
+        objs=[objs; obj]; nObj=nObj+1; curObj=nObj;
+      end; delete(hObj); objsDraw();
+    end
+    
+    function objDel()
+      if(curObj==0), return; end
+      objs(curObj)=[]; curObj=0; nObj=nObj-1; objsDraw();
+    end
+    
+    function objToggle( del )
+      curObj=mod(curObj+del,nObj+1); objsDraw();
+    end
+    
+    function objSetType( del )
+      val = get(pTop.hLbl,'Value');
+      if( nargin>0 && del~=0 )
+        val = max(1,min(val+del,length(objTypes)));
+        set(pTop.hLbl,'Value',val);
+      end
+      if(curObj), objs(curObj).lbl=objTypes{val}; objsDraw(); end
+    end
+    
+    function objSetVal( type, flag )
+      if(strcmp(type,'occ'))
+        if(curObj==0), return; end
+        occ = get(pTop.hOcc,'Value'); if(~flag), occ=1-occ; end
+        objs(curObj).occ=occ; objSetBbv(objs(curObj).bb,curObj); return;
+      elseif(strcmp(type,'ign'))
+        if(curObj==0), return; end
+        ign = get(pTop.hIgn,'Value'); if(~flag), ign=1-ign; end
+        objs(curObj).ign=ign;
+      elseif(strcmp(type,'ell'))
+        ellipse = get(pTop.hEll,'Value');
+        if(~flag), ellipse=1-ellipse; set(pTop.hEll,'Value',ellipse); end
+      elseif(strcmp(type,'rot'))
+        rotate = get(pTop.hRot,'Value');
+        if(~flag), rotate=1-rotate; set(pTop.hRot,'Value',rotate); end
+      elseif(strcmp(type,'lim'))
+        useLims = get(pTop.hLim,'Value');
+        if(~flag), useLims=1-useLims; set(pTop.hLim,'Value',useLims); end
+      elseif(strcmp(type,'pnt'))
+        usePnts = get(pTop.hPnt,'Value');
+        if(~flag), usePnts=1-usePnts; set(pTop.hPnt,'Value',usePnts); end
+      elseif(strcmp(type,'hid'))
+        hide = get(pTop.hHid,'Value');
+        if(~flag), hide=1-hide; set(pTop.hHid,'Value',hide); end
+        if( hide ), curObj=0; end
+      elseif(strcmp(type,'pan'))
+        enabled = get(pTop.hPan,'Value');
+        if(~flag), enabled=1-enabled; set(pTop.hPan,'Value',enabled); end
+        if(~enabled), set(hPan,'Enable','off'); else
+          set(hPan,'Enable','on'); hM=uigetmodemanager(hFig);
+          set(hM.WindowListenerHandles,'Enable','off');
+          set( hFig, 'keyPressFcn',@keyPress);
+          set( hFig, 'WindowScrollWheelFcn',@(h,e) mouseWheel(e));
+          setptr(hFig,'hand'); %set(hFig,'Pointer','hand');
+        end
+      end
+      objsDraw();
+    end
+    
+    function objShift( x, y )
+      if(curObj==0), return; end
+      objs(curObj).bb(1:2)=objs(curObj).bb(1:2)+[x y];
+      objsDraw();
+    end
+  end
+
+  function api = imgMakeApi()
+    [nImg,hImg,contrast,I]=deal([]);
+    set(pTop.hImgInd,'Callback',@(h,evnt) setImgCb());
+    api = struct( 'setImgDir',@setImgDir, 'setImg',@setImg, ...
+      'adjContrast',@adjContrast );
+    
+    function setImgDir( imgDir1 )
+      objApi.closeAnn(); imgDir=imgDir1;
+      imgFiles=[dir([imgDir '/*.jpg']); dir([imgDir '/*.jpeg']); ...
+        dir([imgDir '/*.png']); dir([imgDir '/*.tif'])];
+      imgFiles={imgFiles.name}; nImg=length(imgFiles); setImg(1);
+      set(pTop.hImgNum,'String',['/' int2str(nImg)]);
+    end
+    
+    function adjContrast( del )
+      if(isempty(I)), return; end
+      contrast=max(.1,contrast+del/10);
+      set(hImg,'CData',I*contrast);
+    end
+    
+    function setImg( imgInd1 )
+      if(nImg==0), return; end; objApi.closeAnn(); imgInd=imgInd1;
+      if(imgInd<1), imgInd=1; end; if(imgInd>nImg), imgInd=nImg; end
+      I=imread([imgDir '/' imgFiles{imgInd}]); hImg=imshow(I);
+      set(pTop.hImgInd,'String',int2str(imgInd)); contrast=1;
+      set(hImg,'ButtonDownFcn',@(h,e) mousePress); objApi.openAnn();
+    end
+    
+    function setImgCb()
+      imgInd1=str2double(get(pTop.hImgInd,'String'));
+      if(isnan(imgInd1)), setImg(imgInd); else setImg(imgInd1); end
+    end
+  end
+
+end
diff --git a/detector/bbNms.m b/detector/bbNms.m
new file mode 100644
index 0000000..7a0a9b5
--- /dev/null
+++ b/detector/bbNms.m
@@ -0,0 +1,177 @@
+function bbs = bbNms( bbs, varargin )
+% Bounding box (bb) non-maximal suppression (nms).
+%
+% type=='max': nms of bbs using area of overlap criteria. For each pair of
+% bbs, if their overlap, defined by:
+%  overlap(bb1,bb2) = area(intersect(bb1,bb2))/area(union(bb1,bb2))
+% is greater than overlap, then the bb with the lower score is suppressed.
+% In the Pascal critieria two bbs are considered a match if overlap>=.5. If
+% ovrDnm='min', the 'union' in the above formula is replaced with 'min'.
+%
+% type=='maxg': Similar to 'max', except performs the nms in a greedy
+% fashion. Bbs are processed in order of decreasing score, and, unlike in
+% 'max' nms, once a bb is suppressed it can no longer suppress other bbs.
+%
+% type='cover': Perform nms by attempting to choose the smallest subset of
+% the bbs such that each remaining bb is within overlap of one of the
+% chosen bbs. The above reduces to the weighted set cover problem which is
+% NP but greedy optimization yields provably good solutions. The score of
+% each bb is set to the sum of the scores of the bbs it covers (the max can
+% also be used). In practice similar to 'maxg'.
+%
+% type=='ms': Mean shift nms of bbs with a variable width kernel. radii is
+% a 4 element vector (x,y,w,h) that controls the amount of suppression
+% along each dim. Typically the first two elements should be the same, as
+% should the last two. Distance between w/h are computed in log2 space (ie
+% w and w*2 are 1 unit apart), and the radii should be set accordingly.
+% radii may need to change depending on spatial and scale stride of bbs.
+%
+% Although efficient, nms is O(n^2). To speed things up for large n, can
+% divide data into two parts (according to x or y coordinate), run nms on
+% each part, combine and run nms on the result. If maxn is specified, will
+% split the data in half if n>maxn. Note that this is a heuristic and can
+% change the results of nms. Moreover, setting maxn too small will cause an
+% increase in overall performance time.
+%
+% Finally, the bbs are optionally resized before performing nms. The
+% resizing is important as some detectors return bbs that are padded. For
+% example, if a detector returns a bounding box of size 128x64 around
+% objects of size 100x43 (as is typical for some pedestrian detectors on
+% the INRIA pedestrian database), the resize parameters should be {100/128,
+% 43/64, 0}, see bbApply>resize() for more info.
+%
+% USAGE
+%  bbs = bbNms( bbs, [varargin] )
+%
+% INPUTS
+%  bbs        - original bbs (must be of form [x y w h wt bbType])
+%  varargin   - additional params (struct or name/value pairs)
+%   .type       - ['max'] 'max', 'maxg', 'ms', 'cover', or 'none'
+%   .thr        - [-inf] threshold below which to discard (0 for 'ms')
+%   .maxn       - [inf] if n>maxn split and run recursively (see above)
+%   .radii      - [.15 .15 1 1] supression radii ('ms' only, see above)
+%   .overlap    - [.5] area of overlap for bbs
+%   .ovrDnm     - ['union'] area of overlap denominator ('union' or 'min')
+%   .resize     - {} parameters for bbApply('resize')
+%   .separate   - [0] run nms separately on each bb type (bbType)
+%
+% OUTPUTS
+%  bbs      - suppressed bbs
+%
+% EXAMPLE
+%  bbs=[0 0 1 1 1; .1 .1 1 1 1.1; 2 2 1 1 1];
+%  bbs1 = bbNms(bbs, 'type','max' )
+%  bbs2 = bbNms(bbs, 'thr',.5, 'type','ms')
+%
+% See also bbApply, nonMaxSuprList
+%
+% Piotr's Computer Vision Matlab Toolbox      Version 2.60
+% Copyright 2014 Piotr Dollar.  [pdollar-at-gmail.com]
+% Licensed under the Simplified BSD License [see external/bsd.txt]
+
+% get parameters
+dfs={'type','max','thr',[],'maxn',inf,'radii',[.15 .15 1 1],...
+  'overlap',.5,'ovrDnm','union','resize',{},'separate',0};
+[type,thr,maxn,radii,overlap,ovrDnm,resize,separate] = ...
+  getPrmDflt(varargin,dfs,1);
+if(isempty(thr)), if(strcmp(type,'ms')), thr=0; else thr=-inf; end; end
+if(strcmp(ovrDnm,'union')), ovrDnm=1; elseif(strcmp(ovrDnm,'min')),
+  ovrDnm=0; else assert(false); end
+assert(maxn>=2); assert(numel(overlap)==1);
+
+% discard bbs below threshold and run nms1
+if(isempty(bbs)), bbs=zeros(0,5); end; if(strcmp(type,'none')), return; end
+kp=bbs(:,5)>thr; bbs=bbs(kp,:); if(isempty(bbs)), return; end
+if(~isempty(resize)), bbs=bbApply('resize',bbs,resize{:}); end
+pNms1={type,thr,maxn,radii,overlap,0};
+if(~separate || size(bbs,2)<6), bbs=nms1(bbs,pNms1{:}); else
+  ts=unique(bbs(:,6)); m=length(ts); bbs1=cell(1,m);
+  for t=1:m, bbs1{t}=nms1(bbs(bbs(:,6)==ts(t),:),pNms1{:}); end
+  bbs=cat(1,bbs1{:});
+end
+
+  function bbs = nms1( bbs, type, thr, maxn, radii, overlap, isy )
+    % if big split in two, recurse, merge, then run on merged
+    if( size(bbs,1)>maxn )
+      n2=floor(size(bbs,1)/2); [~,ord]=sort(bbs(:,1+isy)+bbs(:,3+isy)/2);
+      bbs0=nms1(bbs(ord(1:n2),:),type,thr,maxn,radii,overlap,~isy);
+      bbs1=nms1(bbs(ord(n2+1:end),:),type,thr,maxn,radii,overlap,~isy);
+      bbs=[bbs0; bbs1];
+    end
+    % run actual nms on given bbs
+    switch type
+      case 'max', bbs = nmsMax(bbs,overlap,0,ovrDnm);
+      case 'maxg', bbs = nmsMax(bbs,overlap,1,ovrDnm);
+      case 'ms', bbs = nmsMs(bbs,thr,radii);
+      case 'cover', bbs = nmsCover(bbs,overlap,ovrDnm);
+      otherwise, error('unknown type: %s',type);
+    end
+  end
+
+  function bbs = nmsMax( bbs, overlap, greedy, ovrDnm )
+    % for each i suppress all j st j>i and area-overlap>overlap
+    [~,ord]=sort(bbs(:,5),'descend'); bbs=bbs(ord,:);
+    n=size(bbs,1); kp=true(1,n); as=bbs(:,3).*bbs(:,4);
+    xs=bbs(:,1); xe=bbs(:,1)+bbs(:,3); ys=bbs(:,2); ye=bbs(:,2)+bbs(:,4);
+    for i=1:n, if(greedy && ~kp(i)), continue; end
+      for j=(i+1):n, if(kp(j)==0), continue; end
+        iw=min(xe(i),xe(j))-max(xs(i),xs(j)); if(iw<=0), continue; end
+        ih=min(ye(i),ye(j))-max(ys(i),ys(j)); if(ih<=0), continue; end
+        o=iw*ih; if(ovrDnm), u=as(i)+as(j)-o; else u=min(as(i),as(j)); end
+        o=o/u; if(o>overlap), kp(j)=0; end
+      end
+    end
+    bbs=bbs(kp>0,:);
+  end
+
+  function bbs = nmsMs( bbs, thr, radii )
+    % position = [x+w/2,y+h/2,log2(w),log2(h)], ws=weights-thr
+    ws=bbs(:,5)-thr; w=bbs(:,3); h=bbs(:,4); n=length(w);
+    ps=[bbs(:,1)+w/2 bbs(:,2)+h/2 log2(w) log2(h)];
+    % find modes starting from each elt, then merge nodes that are same
+    ps1=zeros(n,4); ws1=zeros(n,1); stopThr=1e-2;
+    for i=1:n, [ps1(i,:), ws1(i,:)]=nmsMs1(i); end
+    [ps,ws] = nonMaxSuprList(ps1,ws1,stopThr*100,[],[],2);
+    % convert back to bbs format and sort by weight
+    w=pow2(ps(:,3)); h=pow2(ps(:,4));
+    bbs=[ps(:,1)-w/2 ps(:,2)-h/2 w h ws+thr];
+    [ws,ord]=sort(ws,'descend'); bbs=bbs(ord,:);
+    
+    function [p,w]=nmsMs1(ind)
+      % variable bandwith kernel (analytically defined)
+      p=ps(ind,:); [n,m]=size(ps); onesN=ones(n,1);
+      h = [pow2(ps(:,3)) pow2(ps(:,4)) onesN onesN];
+      h = h .* radii(onesN,:); hInv=1./h;
+      while(1)
+        % compute (weighted) squared Euclidean distance to each neighbor
+        d=(ps-p(onesN,:)).*hInv; d=d.*d; d=sum(d,2);
+        % compute new mode
+        wMask=ws.*exp(-d); wMask=wMask/sum(wMask); p1=wMask'*ps;
+        % stopping criteria
+        diff=sum(abs(p1-p))/m; p=p1; if(diff<stopThr), break; end
+      end
+      w = sum(ws.*wMask);
+    end
+  end
+
+  function bbs = nmsCover( bbs, overlap, ovrDnm )
+    % construct n^2 neighbor matrix
+    n=size(bbs,1); N=eye(n)*.5; as=bbs(:,3).*bbs(:,4);
+    xs=bbs(:,1); xe=bbs(:,1)+bbs(:,3); ys=bbs(:,2); ye=bbs(:,2)+bbs(:,4);
+    for i=1:n
+      for j=i+1:n
+        iw=min(xe(i),xe(j))-max(xs(i),xs(j)); if(iw<=0), continue; end
+        ih=min(ye(i),ye(j))-max(ys(i),ys(j)); if(ih<=0), continue; end
+        o=iw*ih; if(ovrDnm), u=as(i)+as(j)-o; else u=min(as(i),as(j)); end
+        o=o/u; if(o>overlap), N(i,j)=1; end
+      end
+    end
+    % perform set cover operation (greedily choose next best)
+    N=N+N'; bbs1=zeros(n,5); n1=n; c=0;
+    while( n1>0 ), [~,i0]=max(N*bbs(:,5));
+      N0=N(:,i0)==1; n1=n1-sum(N0); N(N0,:)=0; N(:,N0)=0;
+      c=c+1; bbs1(c,1:4)=bbs(i0,1:4); bbs1(c,5)=sum(bbs(N0,5));
+    end
+    bbs=bbs1(1:c,:);
+  end
+end
diff --git a/gradientMex.mexw64 b/gradientMex.mexw64
new file mode 100644
index 0000000..fdcb9e6
Binary files /dev/null and b/gradientMex.mexw64 differ
diff --git a/readme.txt b/readme.txt
new file mode 100644
index 0000000..9d52972
--- /dev/null
+++ b/readme.txt
@@ -0,0 +1,59 @@
+
+       Visual Tracking Using Attention-Modulated Disintegration and Integration
+
+             J. Choi, H. J. Chang, J. Jeong, Y. Demiris, J. Y. Choi
+                                   CVPR2016
+
+________________
+Project webpage: https://sites.google.com/site/jwchoivision/
+
+This MATLAB program implements a visual tracker, SCT4.
+
+It is free for research use. If you find it useful, please acknowledge the paper
+above with a reference.
+
+
+__________
+Quickstart
+
+1. Extract the zip-file somewhere.
+
+2. Run 'demo.m'
+
+3. The tracking results of sample video ('Deer') will be shown.
+
+
+__________
+References
+
+[1] J. Choi, H. J. Chang, J. Jeong, Y. Demiris, J. Y. Choi, "Visual Tracking 
+Using Attention-Modulated Disintegration and Integration", CVPR, 2016
+
+[2]  P. Dollar, ��Piotr��s Computer Vision Matlab Toolbox (PMT)��, 
+http://vision.ucsd.edu/?pdollar/toolbox/doc/index.html.
+
+[3]  J. F. Henriques, R. Caseiro, P. Martins, and J. Batista, ��HighSpeed Tracking 
+with Kernelized Correlation Filters��, IEEE Transactions on PAMI, 2015
+
+[4] Y. Wu, J. Lim, M.-H. Yang, "Online Object Tracking: A Benchmark", CVPR 2013.
+Website: http://visual-tracking.net/
+
+[5] P. Dollar, S. Belongie, P. Perona, "The Fastest Pedestrian Detector in the
+West", BMVC 2010.
+
+
+_____________________________________
+Copyright (c) 2016, Jongwon Choi
+
+Permission to use, copy, modify, and distribute this software for research
+purposes with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
diff --git a/strong/eval_pgrf.m b/strong/eval_pgrf.m
new file mode 100644
index 0000000..ebcc3d6
--- /dev/null
+++ b/strong/eval_pgrf.m
@@ -0,0 +1,49 @@
+function result = eval_pgrf(input, rf)
+
+feature = reshape(input, size(input,1)*size(input,2), size(input,3));
+
+trees = rf.mainTree;
+nTrees = size(trees,1);
+d = rf.mainProb;
+
+
+% main tree
+prob = zeros(size(feature,1),nTrees);
+c = zeros(size(feature,1),nTrees);
+for i = 1:nTrees
+    prob(:,i) = d(forestInds(single(feature),trees{i,1}.thrs,trees{i,1}.fids,trees{i,1}.child,1), i);
+
+end
+
+%partial trees    
+leaf2partialTree = rf.leaf2partialTree;
+num_partialTree = size(rf.partialTree,1);
+
+if(num_partialTree > 0)        
+    partialTree = rf.partialTree;
+    d3 = rf.subProb;
+    
+    for j = 1:nTrees
+        idx = find(leaf2partialTree(:,j) > 0);
+        k = leaf2partialTree(idx,j);
+        
+        for i = 1:length(idx)
+            featureIdx = find(c(:,j)==idx(i));
+            
+            if(~isempty(featureIdx))                
+                partialFeature = feature(featureIdx,:);        
+                [hs, ps] = forestApply(single(partialFeature), partialTree{k,1});
+                prob(featureIdx,j) = ps(:,2);                                   
+            end
+                            
+        end
+        
+    end    
+    
+end
+
+
+prob = mean(prob,2);
+bgDist = 1 - reshape(prob,[size(input,1),size(input,2)]);
+result = bgDist;
+
diff --git a/strong/evaluate_stSaliency.m b/strong/evaluate_stSaliency.m
new file mode 100644
index 0000000..da7b92f
--- /dev/null
+++ b/strong/evaluate_stSaliency.m
@@ -0,0 +1,3 @@
+function output = evaluate_stSaliency(feature, rf)
+
+output = eval_pgrf(feature, rf);
\ No newline at end of file
diff --git a/strong/init_pgrf.m b/strong/init_pgrf.m
new file mode 100644
index 0000000..887ec2b
--- /dev/null
+++ b/strong/init_pgrf.m
@@ -0,0 +1,127 @@
+function [rf, result] = init_pgrf(input, mask, N_TREE, mainTree)
+
+maxDepth = 10;
+maxNNode = 2^(maxDepth);
+AMBIGUITY_THRESH = 0.40;
+
+origin_bgProb = mask(:);
+
+feature = reshape(input, size(input,1)*size(input,2), size(input,3));
+
+
+% training initial forest
+inputFeature = cell(N_TREE,1);
+inputBgProb = cell(N_TREE,1);
+
+for i = 1:N_TREE
+    idx = 1:size(feature,1);
+    inputFeature{i,1} =  feature(idx,:);
+    inputBgProb{i,1} = origin_bgProb(idx,:);
+end
+
+%training (50ms)
+trees = cell(N_TREE,1);
+if(isempty(mainTree))
+    for i = 1:N_TREE
+        params.M = 1;
+        params.N1 = size(inputFeature{i,1},1);
+        params.F1 = size(inputFeature{i,1},2);
+
+        trees{i,1} = forestTrain(inputFeature{i,1}, inputBgProb{i,1}+1,params);
+
+    end
+    
+    prob = zeros(size(feature,1),N_TREE);
+    d = zeros(maxNNode,N_TREE);
+    for i = 1:N_TREE
+        c = forestInds(single(feature),trees{i,1}.thrs,trees{i,1}.fids,trees{i,1}.child,1);
+        leafIdx = unique(c);
+        counts1 = histc(c, leafIdx);
+        counts2 = histc(c(origin_bgProb==1), leafIdx);
+        d(leafIdx,i) = counts2 ./ counts1;
+        prob(:,i) = d(c,i);
+    end
+    prob = mean(prob,2);
+    
+    leaf2partialTree = zeros(size(d));
+    d3 = [];
+    partialTree = [];
+    
+else
+    
+    for i = 1:N_TREE
+        trees{i,1} = mainTree;
+    end
+    prob = zeros(size(feature,1),N_TREE);
+    d = zeros(maxNNode,N_TREE);
+    for i = 1:N_TREE        
+        c = forestInds(single(feature),trees{i,1}.thrs,trees{i,1}.fids,trees{i,1}.child,1);
+        leafIdx = unique(c);
+        counts1 = histc(c, leafIdx);
+        counts2 = histc(c(origin_bgProb==1), leafIdx);
+        d(leafIdx,i) = counts2 ./ counts1;
+        prob(:,i) = d(c,i);
+    end
+        
+    leaf2partialTree = zeros(size(d));
+    num_partialTree = sum(sum(d > AMBIGUITY_THRESH & d < 1-AMBIGUITY_THRESH));
+    
+    if(num_partialTree < 1)
+        prob = mean(prob,2);
+        
+        d3 = [];
+        partialTree = [];
+        
+    else
+    
+        partialTree = cell(num_partialTree,1);
+        k = 1;
+        d3 = cell(num_partialTree, 1);
+        for j = 1:N_TREE
+            idx = find(d(:,j) > AMBIGUITY_THRESH & d(:,j) < 1-AMBIGUITY_THRESH);
+            for i = 1:length(idx)
+                featureIdx = find(c(:,j)==idx(i));
+
+                if(~isempty(featureIdx))
+
+                    partialFeature = feature(featureIdx,:);
+                    
+                    partialLabels = origin_bgProb(featureIdx);
+                    params.M = 1;
+                    params.N1 = size(partialFeature,1);
+                    params.F1 = size(partialFeature,2);
+
+                    partialTree{k,1} = forestTrain(partialFeature, partialLabels+1 , params);        
+
+                    if(partialTree{k,1}.fids > 1)
+                        d3{k,1} = partialTree{k,1}.distr(:,1);    
+                        [hs, ps] = forestApply(single(partialFeature), partialTree{k,1});
+                        prob(featureIdx,j) = ps(:,2);
+
+                        leaf2partialTree(idx(i),j) = k;
+                        k = k + 1;
+
+                    end
+
+                end
+
+            end
+        end
+    end
+    
+    prob = mean(prob,2);
+    
+end
+
+rf.mainTree = trees;
+rf.leaf2partialTree = leaf2partialTree;
+if(size(partialTree,1)<1)
+    rf.partialTree = [];
+else
+    rf.partialTree = partialTree;
+end
+rf.mainProb = d;
+rf.subProb = d3;
+
+bgDist = 1 - reshape(prob,[size(input,1),size(input,2)]);
+result = bgDist;
diff --git a/strong/init_stSaliency.m b/strong/init_stSaliency.m
new file mode 100644
index 0000000..2d96335
--- /dev/null
+++ b/strong/init_stSaliency.m
@@ -0,0 +1,5 @@
+function [rf, result] = init_stSaliency(feature, mask)
+
+N_TREE = 1;
+
+[rf, result] = init_pgrf(feature, mask, N_TREE,[]);
diff --git a/strong/update_pgrf.m b/strong/update_pgrf.m
new file mode 100644
index 0000000..c3b30de
--- /dev/null
+++ b/strong/update_pgrf.m
@@ -0,0 +1,206 @@
+function [rf, result] = update_pgrf(input, mask, rf, N_TREE)
+
+Beta = 0.15;
+G_para = 0.5;
+
+maxDepth = 10;
+maxDepth2 = 5;
+maxNNode = 2^(maxDepth);
+AMBIGUITY_THRESH = 0.40;
+
+TOL = 10;
+
+origin_bgProb = mask(:);
+
+if(size(input,3) > 1)
+    input = cat(3, input, rgb2lab(input) / 255 + 0.5);
+end
+feature = reshape(input, size(input,1)*size(input,2), size(input,3));
+
+% training initial forest
+inputFeature = cell(N_TREE,1);
+inputBgProb = cell(N_TREE,1);
+
+for i = 1:N_TREE
+    idx = randsample(size(feature,1),round(0.8*(size(feature,1))));
+    inputFeature{i,1} =  feature(idx,:);
+    inputBgProb{i,1} = origin_bgProb(idx,:);
+end
+
+
+%training (50ms)
+trees = rf.mainTree(1:N_TREE);
+
+% predict (70ms)
+prob = zeros(size(feature,1),N_TREE);
+d = zeros(maxNNode,N_TREE);
+for i = 1:N_TREE
+    [yfit, c] = trees{i,1}(feature);
+    leafIdx = unique(c);
+    counts1 = histc(c, leafIdx);
+    counts2 = histc(c(origin_bgProb==1), leafIdx);
+    d(leafIdx,i) = counts2 ./ counts1;
+    prob(:,i) = str2num(cell2mat(yfit(:)));
+end
+prob = mean(prob,2);
+
+bgProb = prob;
+
+bgDist = 1 - reshape(bgProb,size(mask));
+fgDist = 1 - bgDist;
+
+
+%%% Get the image gradient
+gradH = input(:,2:end,:) - input(:,1:end-1,:);
+gradV = input(2:end,:,:) - input(1:end-1,:,:);
+
+gradH = sum(gradH.^2, 3);
+gradV = sum(gradV.^2, 3);
+
+hC = exp(-Beta.*gradH./mean(gradH(:)));
+vC = exp(-Beta.*gradV./mean(gradV(:)));
+
+%%% These matrices will evantually use as inputs to Bagon's code
+hC = [hC zeros(size(hC,1),1)];
+vC = [vC ;zeros(1, size(vC,2))];
+sc = [0 G_para;G_para 0];
+
+fgDist(mask(:)==1) = max(max(fgDist));
+bgDist(mask(:)==1) = min(min(bgDist));
+
+dc = cat(3, exp(bgDist), log(fgDist+1));
+dc = cat(3, (bgDist), (fgDist));
+graphHandle = GraphCut('open', dc , sc, vC, hC);
+graphHandle = GraphCut('set', graphHandle, int32(mask == 0));
+[graphHandle currLabel] = GraphCut('expand', graphHandle,1000);
+currLabel = 1 - currLabel;
+GraphCut('close', graphHandle);
+
+bgProb = double(currLabel);
+prevLabel = currLabel;
+
+for ii = 1:5
+    
+    ratio = sum(currLabel(:)) / length(currLabel(:));
+    
+    label = bgProb(:);
+        
+    
+    % predict
+    prob = zeros(size(feature,1),N_TREE);
+    c = zeros(size(feature,1),N_TREE);
+    d = zeros(maxNNode,N_TREE);
+    for i = 1:N_TREE
+        [yfit, c2] = eval(trees{i,1},feature);
+        leafIdx = unique(c2);
+        counts1 = histc(c2, leafIdx);
+        counts2 = histc(c2(label==1), leafIdx);
+        if(size(counts1,1) ~= size(counts2,1))
+            counts2 = counts2';
+        end
+        d(leafIdx,i) = counts2 ./ counts1;
+        
+        prob(:,i) = str2num(cell2mat(yfit(:)));
+        c(:,i) = c2;
+    end
+    
+    
+    leaf2partialTree = zeros(size(d));
+    num_partialTree = sum(sum(d > AMBIGUITY_THRESH & d < 1-AMBIGUITY_THRESH));
+    
+    if(num_partialTree < 1)
+        prob = mean(prob,2);
+        bgProb = prob;
+
+        bgDist = 1 - reshape(bgProb,size(mask));
+            
+        bgDist(mask(:)==1) = min(min(bgDist));
+        
+        d3 = [];
+        partialTree = [];
+        break;
+    end
+    
+    partialTree = cell(num_partialTree,1);
+    k = 1;
+    d3 = cell(num_partialTree, 1);
+    for j = 1:N_TREE
+        idx = find(d(:,j) > AMBIGUITY_THRESH & d(:,j) < 1-AMBIGUITY_THRESH);
+        for i = 1:length(idx)
+            featureIdx = find(c(:,j)==idx(i));
+            
+            if(~isempty(featureIdx))
+                
+                partialFeature = feature(featureIdx,:);
+                
+                partialTree{k,1} = classregtree(partialFeature, bgProb(featureIdx),'maxdepth',maxDepth2,'method','classification');
+                
+                if(partialTree{k,1}.numnodes > 1)
+                    
+                    d3{k,1} = classprob(partialTree{k,1});
+                    [yfit, c3] = eval(partialTree{k,1}, partialFeature);
+                    
+                    prob(featureIdx,j) = d3{k,1}(c3(:),2);
+                    
+                    leaf2partialTree(idx(i),j) = k;
+                    k = k + 1;
+                    
+                end
+                
+            end
+            
+        end
+    end
+    
+    prob = mean(prob,2);
+    bgProb = prob;
+    
+    bgDist = 1 - reshape(bgProb,size(mask));
+    fgDist = 1 - bgDist;
+    
+    %%% Get the image gradient
+    gradH = input(:,2:end,:) - input(:,1:end-1,:);
+    gradV = input(2:end,:,:) - input(1:end-1,:,:);
+    
+    gradH = sum(gradH.^2, 3);
+    gradV = sum(gradV.^2, 3);
+    
+    hC2 = exp(-Beta.*gradH./mean(gradH(:)));
+    vC2 = exp(-Beta.*gradV./mean(gradV(:)));
+    
+    %%% These matrices will evantually use as inputs to Bagon's code
+    hC = [hC2 zeros(size(hC2,1),1)];
+    vC = [vC2 ;zeros(1, size(vC2,2))];
+    sc = [0 G_para;G_para 0];
+    
+    fgDist(mask(:)==1) = max(max(fgDist));
+    bgDist(mask(:)==1) = min(min(bgDist));
+    
+    dc = cat(3, (bgDist), (fgDist));
+    graphHandle = GraphCut('open', dc , sc, vC, hC);
+    graphHandle = GraphCut('set', graphHandle, prevLabel);
+    [graphHandle, currLabel] = GraphCut('expand', graphHandle, 1000);
+    currLabel = 1 - currLabel;
+    GraphCut('close', graphHandle);
+        
+    bgProb = double(currLabel);
+    
+    if(sum(abs(prevLabel - currLabel)) < TOL)
+        break;
+    else
+        prevLabel = currLabel;
+    end
+    
+end
+
+result = bgDist;
+
+rf.mainTree = trees;
+rf.leaf2partialTree = leaf2partialTree;
+if(size(partialTree,1)<1)
+    rf.partialTree = [];
+else
+    rf.partialTree = partialTree;
+end
+rf.mainProb = d;
+rf.subProb = d3;
\ No newline at end of file
diff --git a/strong/update_stSaliency.m b/strong/update_stSaliency.m
new file mode 100644
index 0000000..af81a46
--- /dev/null
+++ b/strong/update_stSaliency.m
@@ -0,0 +1,33 @@
+function [new_rf, result] = update_stSaliency(feature, mask, old_rf)
+
+N_TREE = 1;
+N_ADDTREE = 1;
+MAX_TREE = 20;
+
+[rf, ~] = init_pgrf(feature, mask, N_TREE,old_rf.mainTree{1,1});
+
+new_rf.mainTree = cat(1, rf.mainTree(1:N_ADDTREE,1), old_rf.mainTree);
+aa = max(max(rf.leaf2partialTree(:, 1:N_ADDTREE)));
+new_rf.leaf2partialTree = ...
+    cat(2, rf.leaf2partialTree(:, 1:N_ADDTREE), old_rf.leaf2partialTree+aa*(old_rf.leaf2partialTree>0));
+if(aa > 0)
+    new_rf.partialTree = cat(1, rf.partialTree(1:aa,1), old_rf.partialTree);
+    new_rf.subProb = cat(1, rf.subProb(1:aa,1), old_rf.subProb);
+else
+    new_rf.partialTree = old_rf.partialTree;
+    new_rf.subProb = old_rf.subProb;
+end
+new_rf.mainProb = cat(2, rf.mainProb(:,1:N_ADDTREE), old_rf.mainProb);
+
+
+if(size(new_rf.mainTree, 1) > MAX_TREE)
+    new_rf.mainTree = new_rf.mainTree(1:MAX_TREE, 1);
+    new_rf.leaf2partialTree = new_rf.leaf2partialTree(:,1:MAX_TREE);
+    if(max(vec(new_rf.leaf2partialTree)) > 0)
+        new_rf.partialTree = new_rf.partialTree(1:max(vec(new_rf.leaf2partialTree)), 1); 
+        new_rf.subProb = new_rf.subProb(1:size(new_rf.partialTree,1), 1);
+    end
+    new_rf.mainProb = new_rf.mainProb(:,1:MAX_TREE);    
+end
+
+result = eval_pgrf(feature, new_rf);
diff --git a/strong/vec.m b/strong/vec.m
new file mode 100644
index 0000000..55c78b7
--- /dev/null
+++ b/strong/vec.m
@@ -0,0 +1,3 @@
+function output = vec(input)
+
+output = input(:);
\ No newline at end of file