Skip to content

Commit

Permalink
DCFNet 2.0 [train on VID][AUC 66.5% on OTB2013]
Browse files Browse the repository at this point in the history
  • Loading branch information
foolwood committed Sep 13, 2017
1 parent 606b79a commit d36dc49
Show file tree
Hide file tree
Showing 46 changed files with 829 additions and 1,250 deletions.
Binary file removed DCFNet/DCFNet-dataset-3-net-21-loss-1-epoch-50.mat
Binary file not shown.
Binary file added DCFNet/DCFNet-net-7-125-2.mat
Binary file not shown.
85 changes: 41 additions & 44 deletions DCFNet/run_DCFNet.m
Original file line number Diff line number Diff line change
@@ -1,18 +1,33 @@
function res = run_DCFNet(subS, rp, bSaveImage, param)
init_rect = subS.init_rect;
img_files = subS.s_frames;
num_frame = numel(img_files);
result = repmat(init_rect,[num_frame, 1]);
if nargin < 4
param = {};
end
vl_setupnn();
im = vl_imreadjpeg(img_files,'numThreads', 12);
function res = run_DCFNet(subS, rp, bSaveImage, varargin)

tracking_env();
cur_path = fileparts(mfilename('fullpath'));
net_file = 'DCFNet-net-7-125-2.mat';
net = load(fullfile(cur_path, net_file));
net = vl_simplenn_tidy(net.net);

param = [];
param.gpu = true;
param.visual = false;
param.net = net;
param.interp_factor = net.meta.interp_factor;
param.scale_penalty = net.meta.scale_penalty;
param.scale_step = net.meta.scale_step;
param.padding = net.meta.padding;
param = vl_argparse(param, varargin) ;

tic;
param.lambda = 1e-4;
init_rect = subS.init_rect; %1-index
im = vl_imreadjpeg(subS.s_frames, 'numThreads', 12);

num_frame = numel(im);
result = repmat(init_rect, [num_frame, 1]);

init_rect(1:2) = init_rect(1:2)-1; %0-index
[state, ~] = DCFNet_initialize(im{1}, init_rect, param);
for frame = 2:num_frame
[state, region] = DCFNet_update(state, im{frame});
[state, region] = DCFNet_update(state,im{frame});
region(1:2) = region(1:2)+1; %1-index
result(frame,:) = region;
end
time = toc;
Expand All @@ -26,24 +41,18 @@
state.visual = false;

state.lambda = 1e-4;
state.padding = 1.5;
state.padding = 2.0;
state.output_sigma_factor = 0.1;
state.interp_factor = 0.002;
state.interp_factor = 0.01;

state.num_scale = 3;
state.scale_step = 1.03;
state.scale_step = 1.0275;
state.min_scale_factor = 0.2;
state.max_scale_factor = 5;
state.scale_penalty = 0.9925;
state.net = [];
state.model_path = './';
state.net_name = 'DCFNet-dataset-3-net-21-loss-1-epoch-50';
state = vl_argparse(state, param);

net = load(fullfile(state.model_path,state.net_name));
net = vl_simplenn_tidy(net.net);
state.net = net;

state.scale_factor = state.scale_step.^((1:state.num_scale)-ceil(state.num_scale/2));
state.scale_penalties = ones(1,state.num_scale);
state.scale_penalties((1:state.num_scale)~=ceil(state.num_scale/2)) = state.scale_penalty;
Expand Down Expand Up @@ -79,18 +88,16 @@
target = bsxfun(@minus, patch, state.net_average_image);
res = vl_simplenn(state.net, target);

xf = fft2(bsxfun(@times, res(end).x, state.cos_window));
x = bsxfun(@times, res(end).x, state.cos_window);
xf = fft2(x);
state.numel_xf = numel(xf);
kf = sum(xf.*conj(xf),3)/state.numel_xf;
state.model_alphaf = state.yf ./ (kf + state.lambda);
state.model_xf = xf;

location = region;
if state.visual
subplot(1,2,1);imshow(uint8(patch));
subplot(1,2,2);imshow(uint8(I));
rectangle('Position',location,'EdgeColor','g');
drawnow;
state.videoPlayer = vision.VideoPlayer('Position', [100 100 [size(I,2), size(I,1)]+30]);
end

end
Expand All @@ -102,16 +109,15 @@
search = bsxfun(@minus, patch_crop, state.net_average_image);
res = vl_simplenn(state.net, search);

zf = fft2(bsxfun(@times, res(end).x, state.cos_window));
z = bsxfun(@times, res(end).x, state.cos_window);
zf = fft2(z);
kzf = sum(bsxfun(@times, zf, conj(state.model_xf)),3)/state.numel_xf;

response = squeeze(real(ifft2(bsxfun(@times, state.model_alphaf, kzf))));
[max_response, max_index] = max(reshape(response,[],state.num_scale));
max_response = gather(max_response);
max_index = gather(max_index);
% max_response = max_response.*state.scale_penalty;
% scale_delta = find(max_response == max(max_response),1,'last');
[~,scale_delta] = max(max_response.*state.scale_penalty);
[~,scale_delta] = max(max_response.*state.scale_penalties);
[vert_delta, horiz_delta] = ind2sub(state.net_input_size, max_index(scale_delta));

if vert_delta > state.net_input_size(1) / 2 %wrap around to negative half-space of vertical axis
Expand All @@ -129,7 +135,8 @@
target = bsxfun(@minus, patch, state.net_average_image);

res = vl_simplenn(state.net, target);
xf = fft2(bsxfun(@times, res(end).x, state.cos_window));
x = bsxfun(@times, res(end).x, state.cos_window);
xf = fft2(x);
kf = sum(xf .* conj(xf), 3) / state.numel_xf;
alphaf = state.yf ./ (kf + state.lambda); %equation for fast training

Expand All @@ -141,20 +148,10 @@
location = double(gather(box));

if state.visual
subplot(1,2,1);im_show_add_response(patch_crop(:,:,:,scale_delta), response(:,:,scale_delta));
subplot(1,2,2);imshow(uint8(I));
rectangle('Position',location,'EdgeColor','g');
drawnow;
im_show = insertShape(uint8(gather(I)), 'Rectangle', location, 'LineWidth', 4, 'Color', 'yellow');
step(state.videoPlayer, im_show);
end
end

function im_show_add_response(im,response)
sz = size(response);
response = circshift(response, floor(sz(1:2) / 2) - 1);

imshow(uint8(gather(im)));hold on;
h = imagesc(response);colormap(jet);
set(h,'AlphaData',gather(response)+0.6);
end

function labels = gaussian_shaped_labels(sigma, sz)
Expand Down Expand Up @@ -190,4 +187,4 @@ function im_show_add_response(im,response)
g = reshape(g, 2, output_sz(1), output_sz(2), []);

img_crop = vl_nnbilinearsampler(img, g);
end
end
5 changes: 5 additions & 0 deletions DCFNet/tracking_env.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
function tracking_env()
addpath('../matconvnet/matlab');
run('vl_setupnn.m') ;
fftw('planner','patient');
end
Binary file removed DCFNet_result.zip
Binary file not shown.
Binary file removed EAO_RANK2015.png
Binary file not shown.
Empty file removed data/NUS_PRO/.gitkeep
Empty file.
18 changes: 18 additions & 0 deletions data/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# ILSVRC2015 VID

You can download the **training data** from [website](http://bvisionweb1.cs.unc.edu/ilsvrc2015/download-videos-3j16.php#vid) and unzip to the data/ILSVRC/ folder of the repository.


```
@article{ILSVRC15,
Author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
Title = {{ImageNet Large Scale Visual Recognition Challenge}},
Year = {2015},
journal = {International Journal of Computer Vision (IJCV)},
doi = {10.1007/s11263-015-0816-y},
volume={115},
number={3},
pages={211-252}
}
```

Empty file removed data/Temple-color-128/.gitkeep
Empty file.
Empty file removed data/UAV123/.gitkeep
Empty file.
Binary file removed demo/DCFNet-dataset-3-net-21-loss-1-epoch-50.mat
Binary file not shown.
24 changes: 0 additions & 24 deletions demo/demoDCFNet.m

This file was deleted.

18 changes: 18 additions & 0 deletions demo/demo_DCFNet.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
function res = demo_DCFNet()
addpath(fullfile('..','DCFNet'));

init_rect = [129,80,64,78];
img_file = dir('./David/img/*.jpg');
img_file = fullfile('./David/img/', {img_file.name});
subS.init_rect = init_rect;
subS.s_frames = img_file;

param = [];
param.gpu = true;
gpuDevice(1);
param.visual = true;

res = run_DCFNet(subS,0,0,param);
disp(['fps: ',res.fps]);

end
Binary file removed otb_result.png
Binary file not shown.
77 changes: 41 additions & 36 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,76 +4,81 @@ By Qiang Wang, Jin Gao, Junliang Xing, Mengdan Zhang, Weiming Hu

### Introduction

![DCFNet](DCFNet.png)
![DCFNet](result/DCFNet.png)

> Discriminant Correlation Filters (DCF) based methods now become a kind of dominant approach to online object tracking. The features used in these methods, however, are either based on hand-crafted features like HoGs, or convolutional features trained independently from other tasks like image classification. In this work, we present an *end-to-end lightweight* network architecture, namely **DCFNet**, to learn the convolutional features and perform the correlation tracking process simultaneously.
## Requirements: software
## Contents
1. [Requirements](#Requirements)
2. [Tracking](#Tracking)
3. [Training](#Training)
4. [Results](#Results)
5. [Citation](#Citing DCFNet)

## Requirements

```
git clone --depth=1 https://github.com/foolwood/DCFNet.git
```

Requirements for MatConvNet 1.0-beta24 \(see: [MatConvNet](http://www.vlfeat.org/matconvnet/install/)\)

Requirements for MatConvNet 1.0-beta23\(see: [MatConvNet](http://www.vlfeat.org/matconvnet/install/)\)
1. Downloading MatConvNet

```
cd <DCFNet>
git clone https://github.com/vlfeat/matconvnet.git
```

2. Compiling MatConvNet

Run the following command from the MATLAB command window:
```
run <matconvnet>/matlab/vl_compilenn
cd matconvnet
run matlab/vl_compilenn
```

[Optional]

If you want to reproduce the speed in our paper, please follow the [website](http://www.vlfeat.org/matconvnet/install/) to compile the **GPU** version.

## Tracking

```
git clone --depth=1 https://github.com/foolwood/DCFNet.git
```
The file `demo/demoDCFNet.m` is used to test our algorithm.

To verify [**OTB**](http://cvlab.hanyang.ac.kr/tracker_benchmark/index.html) and [**VOT**](http://www.votchallenge.net/) performance, you can simple copy `DCFNet/` into OTB toolkit and integrate `track4vot/` to VOT toolkit.
To reproduce the performance on [**OTB**](http://cvlab.hanyang.ac.kr/tracker_benchmark/index.html) , you can simple copy `DCFNet/` into OTB toolkit.

## Training
[Note] Configure MatConvNet path in `tracking_env.m`

1.Download the training data.
## Training

**TColor-128**:[[LINK](http://www.dabi.temple.edu/~hbling/data/TColor-128/Temple-color-128.zip)]
1.Download the training data. ([**VID**](data))

**UAV123**: [[GoogleDrive](https://goo.gl/iQf0Y7)]
2.Data Preprocessing in MATLAB.

**NUS_PRO**:[GoogleDrive] ([part1](https://drive.google.com/drive/folders/0B6eYf2Rj8c79Smk4Q1BxU1ROS28))([part2](https://drive.google.com/folderview?id=0BwFzRq8t3gu5VWFRNGp5dlBkSU0&usp=sharing))]
```matlab
cd training/dataPreprocessing
data_preprocessing();
analyze_data();
```

It should have this basic structure
3.Train a DCFNet model.

```
data
|-- NUS_PRO
|-- Temple-color-128
|-- UAV123
train_DCFNet();
```

2.Run `training/train_cnn_dcf.m` to train a model.


You can choose the network architecture by setting `opts.networkType = 21`(This parameter is 21 by default)

## Results on OTB and VOT2015

:high_brightness: ** Raw Results** :high_brightness:

[Raw result files for the OTB and VOT2015 datasets.](DCFNet_result.zip)



**AUC on OTB2013 and OTB2015(OPE)**

![otb_result](otb_result.png)
## Results

**VOT2015 EAO result**
**DCFNet** obtains a significant improvements by

![vot2015](EAO_RANK2015.png)
- Good Training dataset. (TC128+UAV123+NUS_PRO -> VID)
- Good learning policy. (constant 1e-5 -> logspace(-2,-5,50))
- Large padding size. (1.5 -> 2.0)

The OPE/TRE/SRE results on OTB [BaiduYun](http://pan.baidu.com/s/1boKcXkF) or [GoogleDrive](https://drive.google.com/open?id=0BwWEXCnRCqJ-SHNaYUJwaW81R1E).

![result on OTB](result/OTB.png)

## Citing DCFNet

Expand Down
File renamed without changes
Binary file added result/OTB.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Loading

0 comments on commit d36dc49

Please sign in to comment.