Skip to content
This repository has been archived by the owner on Aug 5, 2022. It is now read-only.

Faster-RCNN support #107

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
26 changes: 26 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,29 @@ CONTRIBUTION AGREEMENT
By contributing to the BVLC/caffe repository through pull-request, comment,
or otherwise, the contributor releases their content to the
license and copyright terms herein.

************************************************************************

Faster R-CNN

The MIT License (MIT)

Copyright (c) 2015 Microsoft Corporation

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
75 changes: 75 additions & 0 deletions include/caffe/gen_anchors.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
All modification made by Intel Corporation: © 2017 Intel Corporation

All contributions by the University of California:
Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
All rights reserved.

All other contributions:
Copyright (c) 2014, 2015, the respective contributors
All rights reserved.
For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md


Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#ifndef GEN_ANCHORS
#define GEN_ANCHORS

#include <vector>

using namespace std;

namespace caffe {

/**
* @brief Type of faster-rcnn anchor
*/
struct anchor {
float start_x;
float start_y;
float end_x;
float end_y;

anchor() {}

anchor(float s_x, float s_y, float e_x, float e_y)
{
start_x = s_x;
start_y = s_y;
end_x = e_x;
end_y = e_y;
}
};


/**
* @brief Generates a vector of anchors based on a size, list of ratios and list of scales
*/
void GenerateAnchors(unsigned int base_size, const vector<float>& ratios, const vector<float> scales, // input
anchor *anchors); // output
}

#endif
3 changes: 2 additions & 1 deletion include/caffe/layers/dropout_layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,14 @@ class DropoutLayer : public NeuronLayer<Dtype> {
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

/// when divided by UINT_MAX, the randomly generated values @f$u\sim U(0,1)@f$
/// when divided by uint_MAX, the randomly generated values @f$u\sim U(0,1)@f$
Blob<unsigned int> rand_vec_;
/// the probability @f$ p @f$ of dropping any input
Dtype threshold_;
/// the scale for undropped inputs at train time @f$ 1 / (1 - p) @f$
Dtype scale_;
unsigned int uint_thres_;
bool scale_train_;
};

} // namespace caffe
Expand Down
207 changes: 207 additions & 0 deletions include/caffe/layers/fast_rcnn_layers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
// ------------------------------------------------------------------
// Fast R-CNN
// Copyright (c) 2015 Microsoft
// Licensed under The MIT License [see LICENSE for details]
// Written by Ross Girshick
// ------------------------------------------------------------------

#ifndef CAFFE_FAST_RCNN_LAYERS_HPP_
#define CAFFE_FAST_RCNN_LAYERS_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/layer.hpp"
//#include "caffe/loss_layers.hpp"
#include "caffe/layers/accuracy_layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/gen_anchors.hpp"

namespace caffe {

/* ROIPoolingLayer - Region of Interest Pooling Layer
*/
template <typename Dtype>
class ROIPoolingLayer : public Layer<Dtype> {
public:
explicit ROIPoolingLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "ROIPooling"; }

virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int MaxBottomBlobs() const { return 2; }
virtual inline int MinTopBlobs() const { return 1; }
virtual inline int MaxTopBlobs() const { return 1; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

int channels_;
int height_;
int width_;
int pooled_height_;
int pooled_width_;
Dtype spatial_scale_;
Blob<int> max_idx_;
};

template <typename Dtype>
class SmoothL1LossLayer : public LossLayer<Dtype> {
public:
explicit SmoothL1LossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param), diff_() {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "SmoothL1Loss"; }

virtual inline int ExactNumBottomBlobs() const { return -1; }
virtual inline int MinBottomBlobs() const { return 2; }
virtual inline int MaxBottomBlobs() const { return 3; }

/**
* Unlike most loss layers, in the SmoothL1LossLayer we can backpropagate
* to both inputs -- override to return true and always allow force_backward.
*/
virtual inline bool AllowForceBackward(const int bottom_index) const {
return true;
}

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

Blob<Dtype> diff_;
Blob<Dtype> errors_;
bool has_weights_;
};

/* SimplerNMSLayer - N Mini-batch Sampling Layer
*/
template <typename Dtype>
class SimplerNMSLayer : public Layer<Dtype> {
public:
SimplerNMSLayer(const LayerParameter& param) :Layer<Dtype>(param),
max_proposals_(500),
prob_threshold_(0.5f),
iou_threshold_(0.7f),
min_bbox_size_(16),
feat_stride_(16),
pre_nms_topN_(6000),
post_nms_topN_(300) {
};

~SimplerNMSLayer() {
}

virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
//top[0]->Reshape(std::vector<int>{ 1, 1, max_proposals_, 5 });
top[0]->Reshape(vector<int>{ (int)post_nms_topN_, 5 });
}

virtual inline const char* type() const { return "SimplerNMS"; }

protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

private:
int max_proposals_;
float prob_threshold_;
// TODO: add to proto
float iou_threshold_;
int min_bbox_size_;
int feat_stride_;
int pre_nms_topN_;
int post_nms_topN_;

// relative to center point,
Blob<Dtype> anchors_blob_;

//TODO: clamp is part of std as of c++17...
constexpr static inline const Dtype clamp_v(const Dtype v, const Dtype v_min, const Dtype v_max)
{
return std::max(v_min, std::min(v, v_max));
}
struct simpler_nms_roi_t
{
Dtype x0, y0, x1, y1;

Dtype area() const { return std::max<Dtype>(0, y1 - y0 + 1) * std::max<Dtype>(0, x1 - x0 + 1); }
simpler_nms_roi_t intersect (simpler_nms_roi_t other) const
{
return
{
std::max(x0, other.x0),
std::max(y0, other.y0),
std::min(x1, other.x1),
std::min(y1, other.y1)
};
}
simpler_nms_roi_t clamp (simpler_nms_roi_t other) const
{
return
{
clamp_v(x0, other.x0, other.x1),
clamp_v(y0, other.y0, other.y1),
clamp_v(x1, other.x0, other.x1),
clamp_v(y1, other.y0, other.y1)
};
}
};

struct simpler_nms_delta_t { Dtype shift_x, shift_y, log_w, log_h; };
struct simpler_nms_proposal_t { simpler_nms_roi_t roi; Dtype confidence; size_t ord; };

static std::vector<simpler_nms_roi_t> simpler_nms_perform_nms(
const std::vector<simpler_nms_proposal_t>& proposals,
float iou_threshold,
size_t top_n);

static void sort_and_keep_at_most_top_n(
std::vector<simpler_nms_proposal_t>& proposals,
size_t top_n);

static simpler_nms_roi_t simpler_nms_gen_bbox(
const anchor& box,
const simpler_nms_delta_t& delta,
int anchor_shift_x,
int anchor_shift_y);
};

} // namespace caffe

#endif // CAFFE_FAST_RCNN_LAYERS_HPP_