DCFNet 2.0 [train on VID][AUC 66.5% on OTB2013]

foolwood · Sep 13, 2017 · d36dc49 · d36dc49
1 parent 606b79a
commit d36dc49
Show file tree

Hide file tree

Showing 46 changed files with 829 additions and 1,250 deletions.
diff --git a/DCFNet/DCFNet-dataset-3-net-21-loss-1-epoch-50.mat b/DCFNet/DCFNet-dataset-3-net-21-loss-1-epoch-50.mat
diff --git a/DCFNet/DCFNet-net-7-125-2.mat b/DCFNet/DCFNet-net-7-125-2.mat
diff --git a/DCFNet/run_DCFNet.m b/DCFNet/run_DCFNet.m
@@ -1,18 +1,33 @@
-function res = run_DCFNet(subS, rp, bSaveImage, param)
-init_rect = subS.init_rect;
-img_files = subS.s_frames;
-num_frame = numel(img_files);
-result = repmat(init_rect,[num_frame, 1]);
-if nargin < 4
-    param = {};
-end
-vl_setupnn();
-im = vl_imreadjpeg(img_files,'numThreads', 12);
+function res = run_DCFNet(subS, rp, bSaveImage, varargin)
+
+tracking_env();
+cur_path = fileparts(mfilename('fullpath'));
+net_file = 'DCFNet-net-7-125-2.mat';
+net = load(fullfile(cur_path, net_file));
+net = vl_simplenn_tidy(net.net);
+
+param = [];
+param.gpu = true;
+param.visual = false;
+param.net = net;
+param.interp_factor = net.meta.interp_factor;
+param.scale_penalty = net.meta.scale_penalty;
+param.scale_step = net.meta.scale_step;
+param.padding = net.meta.padding;
+param = vl_argparse(param, varargin) ;
+
 tic;
-param.lambda = 1e-4;
+init_rect = subS.init_rect; %1-index
+im = vl_imreadjpeg(subS.s_frames, 'numThreads', 12);
+
+num_frame = numel(im);
+result = repmat(init_rect, [num_frame, 1]);
+
+init_rect(1:2) = init_rect(1:2)-1; %0-index
 [state, ~] = DCFNet_initialize(im{1}, init_rect, param);
 for frame = 2:num_frame
-    [state, region] = DCFNet_update(state, im{frame});
+    [state, region] = DCFNet_update(state,im{frame});
+    region(1:2) = region(1:2)+1; %1-index
     result(frame,:) = region;
 end
 time = toc;
@@ -26,24 +41,18 @@
 state.visual = false;
 
 state.lambda = 1e-4;
-state.padding = 1.5;
+state.padding = 2.0;
 state.output_sigma_factor = 0.1;
-state.interp_factor = 0.002;
+state.interp_factor = 0.01;
 
 state.num_scale = 3;
-state.scale_step = 1.03;
+state.scale_step = 1.0275;
 state.min_scale_factor = 0.2;
 state.max_scale_factor = 5;
 state.scale_penalty = 0.9925;
 state.net = [];
-state.model_path = './';
-state.net_name = 'DCFNet-dataset-3-net-21-loss-1-epoch-50';
 state = vl_argparse(state, param);
 
-net = load(fullfile(state.model_path,state.net_name));
-net = vl_simplenn_tidy(net.net);
-state.net = net;
-
 state.scale_factor = state.scale_step.^((1:state.num_scale)-ceil(state.num_scale/2));
 state.scale_penalties = ones(1,state.num_scale);
 state.scale_penalties((1:state.num_scale)~=ceil(state.num_scale/2)) = state.scale_penalty;
@@ -79,18 +88,16 @@
 target = bsxfun(@minus, patch, state.net_average_image);
 res = vl_simplenn(state.net, target);
 
-xf = fft2(bsxfun(@times, res(end).x, state.cos_window));
+x = bsxfun(@times, res(end).x, state.cos_window);
+xf = fft2(x);
 state.numel_xf = numel(xf);
 kf = sum(xf.*conj(xf),3)/state.numel_xf;
 state.model_alphaf = state.yf ./ (kf + state.lambda);
 state.model_xf = xf;
 
 location = region;
 if state.visual
-    subplot(1,2,1);imshow(uint8(patch));
-    subplot(1,2,2);imshow(uint8(I));
-    rectangle('Position',location,'EdgeColor','g');
-    drawnow;
+    state.videoPlayer = vision.VideoPlayer('Position', [100 100 [size(I,2), size(I,1)]+30]);
 end
 
 end
@@ -102,16 +109,15 @@
 search = bsxfun(@minus, patch_crop, state.net_average_image);
 res = vl_simplenn(state.net, search);
 
-zf = fft2(bsxfun(@times, res(end).x, state.cos_window));
+z = bsxfun(@times, res(end).x, state.cos_window);
+zf = fft2(z);
 kzf = sum(bsxfun(@times, zf, conj(state.model_xf)),3)/state.numel_xf;
 
 response = squeeze(real(ifft2(bsxfun(@times, state.model_alphaf, kzf))));
 [max_response, max_index] = max(reshape(response,[],state.num_scale));
 max_response = gather(max_response);
 max_index = gather(max_index);
-% max_response = max_response.*state.scale_penalty;
-% scale_delta = find(max_response == max(max_response),1,'last');
-[~,scale_delta] = max(max_response.*state.scale_penalty);
+[~,scale_delta] = max(max_response.*state.scale_penalties);
 [vert_delta, horiz_delta] = ind2sub(state.net_input_size, max_index(scale_delta));
 
 if vert_delta > state.net_input_size(1) / 2  %wrap around to negative half-space of vertical axis
@@ -129,7 +135,8 @@
 target = bsxfun(@minus, patch, state.net_average_image);
 
 res = vl_simplenn(state.net, target);
-xf = fft2(bsxfun(@times, res(end).x, state.cos_window));
+x = bsxfun(@times, res(end).x, state.cos_window);
+xf = fft2(x);
 kf = sum(xf .* conj(xf), 3) / state.numel_xf;
 alphaf = state.yf ./ (kf + state.lambda);   %equation for fast training
 
@@ -141,20 +148,10 @@
 location = double(gather(box));
 
 if state.visual
-    subplot(1,2,1);im_show_add_response(patch_crop(:,:,:,scale_delta), response(:,:,scale_delta));
-    subplot(1,2,2);imshow(uint8(I));
-    rectangle('Position',location,'EdgeColor','g');
-    drawnow;
+    im_show = insertShape(uint8(gather(I)), 'Rectangle', location, 'LineWidth', 4, 'Color', 'yellow');
+    step(state.videoPlayer, im_show);
 end
-end
-
-function im_show_add_response(im,response)
-sz = size(response);
-response = circshift(response, floor(sz(1:2) / 2) - 1);
 
-imshow(uint8(gather(im)));hold on;
-h = imagesc(response);colormap(jet);
-set(h,'AlphaData',gather(response)+0.6);
 end
 
 function labels = gaussian_shaped_labels(sigma, sz)
@@ -190,4 +187,4 @@ function im_show_add_response(im,response)
 g = reshape(g, 2, output_sz(1), output_sz(2), []);
 
 img_crop = vl_nnbilinearsampler(img, g);
-end
+end
diff --git a/DCFNet/tracking_env.m b/DCFNet/tracking_env.m
@@ -0,0 +1,5 @@
+function tracking_env()
+addpath('../matconvnet/matlab');
+run('vl_setupnn.m') ;
+fftw('planner','patient');
+end
diff --git a/DCFNet_result.zip b/DCFNet_result.zip
diff --git a/EAO_RANK2015.png b/EAO_RANK2015.png
diff --git a/data/NUS_PRO/.gitkeep b/data/NUS_PRO/.gitkeep
diff --git a/data/README.md b/data/README.md
@@ -0,0 +1,18 @@
+# ILSVRC2015 VID
+
+You can download the **training data** from [website](http://bvisionweb1.cs.unc.edu/ilsvrc2015/download-videos-3j16.php#vid) and unzip to the data/ILSVRC/ folder of the repository.
+
+
+```
+@article{ILSVRC15,
+Author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
+Title = {{ImageNet Large Scale Visual Recognition Challenge}},
+Year = {2015},
+journal   = {International Journal of Computer Vision (IJCV)},
+doi = {10.1007/s11263-015-0816-y},
+volume={115},
+number={3},
+pages={211-252}
+}
+```
+
diff --git a/data/Temple-color-128/.gitkeep b/data/Temple-color-128/.gitkeep
diff --git a/data/UAV123/.gitkeep b/data/UAV123/.gitkeep
diff --git a/demo/DCFNet-dataset-3-net-21-loss-1-epoch-50.mat b/demo/DCFNet-dataset-3-net-21-loss-1-epoch-50.mat
diff --git a/demo/demoDCFNet.m b/demo/demoDCFNet.m
diff --git a/demo/demo_DCFNet.m b/demo/demo_DCFNet.m
@@ -0,0 +1,18 @@
+function res = demo_DCFNet()
+addpath(fullfile('..','DCFNet'));
+
+init_rect = [129,80,64,78];
+img_file = dir('./David/img/*.jpg');
+img_file = fullfile('./David/img/', {img_file.name});
+subS.init_rect = init_rect;
+subS.s_frames = img_file;
+
+param = [];
+param.gpu = true;
+gpuDevice(1);
+param.visual = true;
+
+res = run_DCFNet(subS,0,0,param);
+disp(['fps: ',res.fps]);
+
+end
diff --git a/otb_result.png b/otb_result.png
diff --git a/readme.md b/readme.md
@@ -4,76 +4,81 @@ By Qiang Wang, Jin Gao, Junliang Xing, Mengdan Zhang, Weiming Hu
 
 ### Introduction
 
-![DCFNet](DCFNet.png)
+![DCFNet](result/DCFNet.png)
 
 > Discriminant Correlation Filters (DCF) based methods now become a kind of dominant approach to online object tracking. The features used in these methods, however, are either based on hand-crafted features like HoGs, or convolutional features trained independently from other tasks like image classification. In this work, we present an *end-to-end lightweight* network architecture, namely **DCFNet**, to learn the convolutional features and perform the correlation tracking process simultaneously.
 
-## Requirements: software
+## Contents
+1. [Requirements](#Requirements)
+2. [Tracking](#Tracking)
+3. [Training](#Training)
+4. [Results](#Results)
+5. [Citation](#Citing DCFNet)
+
+## Requirements
+
+```
+git clone --depth=1 https://github.com/foolwood/DCFNet.git
+```
+
+Requirements for MatConvNet 1.0-beta24 \(see: [MatConvNet](http://www.vlfeat.org/matconvnet/install/)\)
 
-Requirements for MatConvNet 1.0-beta23\(see: [MatConvNet](http://www.vlfeat.org/matconvnet/install/)\)
 1. Downloading MatConvNet
 
 ```
+cd <DCFNet>
 git clone https://github.com/vlfeat/matconvnet.git
 ```
 
 2. Compiling MatConvNet
 
 Run the following command from the MATLAB command window:
 ```
-run <matconvnet>/matlab/vl_compilenn
+cd matconvnet
+run matlab/vl_compilenn
 ```
 
+[Optional]
+
+ If you want to reproduce the speed in our paper, please follow the [website](http://www.vlfeat.org/matconvnet/install/) to compile the **GPU** version.
+
 ## Tracking
 
-```
-git clone --depth=1 https://github.com/foolwood/DCFNet.git
-```
 The file `demo/demoDCFNet.m` is used to test our algorithm.
 
-To verify [**OTB**](http://cvlab.hanyang.ac.kr/tracker_benchmark/index.html) and [**VOT**](http://www.votchallenge.net/) performance, you can simple copy `DCFNet/` into OTB toolkit and integrate `track4vot/` to VOT toolkit.
+To reproduce the performance on [**OTB**](http://cvlab.hanyang.ac.kr/tracker_benchmark/index.html) , you can simple copy `DCFNet/` into OTB toolkit.
 
-## Training
+[Note] Configure MatConvNet path in `tracking_env.m`
 
-1.Download the training data.
+## Training
 
-**TColor-128**:[[LINK](http://www.dabi.temple.edu/~hbling/data/TColor-128/Temple-color-128.zip)]
+1.Download the training data. ([**VID**](data))
 
-**UAV123**: [[GoogleDrive](https://goo.gl/iQf0Y7)]
+2.Data Preprocessing in MATLAB.
 
-**NUS_PRO**:[GoogleDrive] ([part1](https://drive.google.com/drive/folders/0B6eYf2Rj8c79Smk4Q1BxU1ROS28))([part2](https://drive.google.com/folderview?id=0BwFzRq8t3gu5VWFRNGp5dlBkSU0&usp=sharing))]
+```matlab
+cd training/dataPreprocessing
+data_preprocessing();
+analyze_data();
+```
 
-It should have this basic structure
+3.Train a DCFNet model.
 
 ```
-data
-    |-- NUS_PRO
-    |-- Temple-color-128
-    |-- UAV123
+train_DCFNet();
 ```
 
-2.Run `training/train_cnn_dcf.m` to train a model.
-
-
-You can choose the network architecture by setting `opts.networkType = 21`(This parameter is 21 by default)
-
-## Results on OTB and VOT2015
-
-:high_brightness:  ** Raw Results** :high_brightness: 
-
-[Raw result files for the OTB and VOT2015 datasets.](DCFNet_result.zip)
-
-
-
-**AUC on OTB2013 and OTB2015(OPE)**
-
-![otb_result](otb_result.png)
+## Results
 
-**VOT2015 EAO result**
+**DCFNet** obtains a significant improvements by
 
-![vot2015](EAO_RANK2015.png)
+- Good Training dataset. (TC128+UAV123+NUS_PRO -> VID)
+- Good learning policy. (constant 1e-5  ->  logspace(-2,-5,50))
+- Large padding size. (1.5  -> 2.0)
 
+The OPE/TRE/SRE results on OTB [BaiduYun](http://pan.baidu.com/s/1boKcXkF) or [GoogleDrive](https://drive.google.com/open?id=0BwWEXCnRCqJ-SHNaYUJwaW81R1E).
 
+![result on OTB](result/OTB.png)
 
 ## Citing DCFNet
 

diff --git a/DCFNet.png → result/DCFNet.png b/DCFNet.png → result/DCFNet.png
diff --git a/result/OTB.png b/result/OTB.png
diff --git a/track4vot/DCFNet-dataset-3-net-21-loss-1-epoch-20.mat b/track4vot/DCFNet-dataset-3-net-21-loss-1-epoch-20.mat