@@ -0,0 +1,192 @@
/*
* yolov1_layer.cu
*
* Created on: 29 Jun, 2018
* Author: gathetaroot
*/

#include "cfloat"
#include "caffe/layers/yolov1_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe{

template <typename Dtype>
__global__ void gpu_yolov1_loss_kernel(const int N,
const Dtype*X,const Dtype* Gt,
const int slide,const int nClass,const int nBox,
const float scaleCoord,const float scaleNoObj,
const float threshold,Dtype*Y){
CUDA_KERNEL_LOOP(i,N){
if(i==0)
Y[0]=0;
Dtype centerLoss,sizeLoss,objLoss,noObjLoss,clsLoss;
Dtype tmp_centerLoss,tmp_sizeLoss,tmp_objLoss,tmp_noObjLoss,tmp_clsLoss;
Dtype dx,dy,dw,dh,dstatus,dclass;
float largestConf=0;
int blobSlide=slide*(5*nBox+nClass);
int boxSlide=5*nBox+nClass;
int localN=N*nBox*slide;
int classN=N*nClass*slide;
bool hasObj=false;
centerLoss=0;
sizeLoss=0;
noObjLoss=0;
objLoss=0;
clsLoss=0;
for(int j=0;j<slide;j++){
tmp_centerLoss=0;
tmp_sizeLoss=0;
tmp_noObjLoss=0;
tmp_objLoss=0;
for(int k=0;k<nBox;k++){
dx=Gt[i*blobSlide+j*boxSlide+5*k]-X[i*blobSlide+j*boxSlide+5*k];
dy=Gt[i*blobSlide+j*boxSlide+5*k+1]-X[i*blobSlide+j*boxSlide+5*k+1];
dw=Gt[i*blobSlide+j*boxSlide+5*k+2]-X[i*blobSlide+j*boxSlide+5*k+2];
dh=Gt[i*blobSlide+j*boxSlide+5*k+3]-X[i*blobSlide+j*boxSlide+5*k+3];
dstatus=Gt[i*blobSlide+j*boxSlide+5*k+4]-X[i*blobSlide+j*boxSlide+5*k+4];

if(X[i*blobSlide+j*boxSlide+5*k+4]>=largestConf){
tmp_centerLoss=(dx*dx)+(dy*dy);
tmp_sizeLoss=(dw*dw)+(dh*dh);
tmp_noObjLoss=(dstatus*dstatus);
tmp_objLoss=noObjLoss;
}

//centerLoss+=(dx*dx)+(dy*dy);
//sizeLoss+=(dw*dw)+(dh*dh);
//noObjLoss+=(dstatus*dstatus);
//objLoss=noObjLoss;
if(X[i*blobSlide+j*boxSlide+5*k+4]>=threshold)
hasObj=true;
}
if(hasObj){
for(int l=0;l<nClass;l++){
dclass=Gt[i*blobSlide+j*boxSlide+5*nBox+l]-X[i*blobSlide+j*boxSlide+5*nBox+l];
clsLoss+=dclass*dclass;
}
}
centerLoss+=tmp_centerLoss;
sizeLoss+=tmp_sizeLoss;
noObjLoss+=tmp_noObjLoss;
objLoss+=tmp_objLoss;
}
Y[0]+=(scaleCoord*centerLoss+scaleCoord*sizeLoss+objLoss+scaleNoObj*noObjLoss)/(2*localN)+clsLoss/(2*classN);
}
}

template <typename Dtype>
__global__ void gpu_dyolov1_loss_kernel(const int N,
const Dtype*X,const Dtype* Gt,
const int slide,const int nClass,const int nBox,
const float scaleCoord,const float scaleNoObj,
const float threshold,Dtype*Y){
CUDA_KERNEL_LOOP(i,N){
Dtype dx,dy,dw,dh,dstatus,dclass;
Dtype dldx,dldy,dldw,dldh,dldstatus,dldclass;
int blobSlide=slide*(5*nBox+nClass);
int boxSlide=5*nBox+nClass;
bool hasObj=false;
for(int j=0;j<slide;j++){
for(int k=0;k<nBox;k++){
dx=Gt[i*blobSlide+j*boxSlide+5*k]-X[i*blobSlide+j*boxSlide+5*k];
dy=Gt[i*blobSlide+j*boxSlide+5*k+1]-X[i*blobSlide+j*boxSlide+5*k+1];
dw=Gt[i*blobSlide+j*boxSlide+5*k+2]-X[i*blobSlide+j*boxSlide+5*k+2];
dh=Gt[i*blobSlide+j*boxSlide+5*k+3]-X[i*blobSlide+j*boxSlide+5*k+3];
dstatus=Gt[i*blobSlide+j*boxSlide+5*k+4]-X[i*blobSlide+j*boxSlide+5*k+4];

dldx=-dy*scaleCoord;
dldy=dx*scaleCoord;
dldw=-dh*scaleCoord;
dldh=dw*scaleCoord;
dldstatus=-(1+scaleNoObj)*dstatus;
if(X[i*blobSlide+j*boxSlide+5*k+4]>=threshold)
hasObj=true;

Y[i*blobSlide+j*boxSlide+j*boxSlide+5*k]=dldx;
Y[i*blobSlide+j*boxSlide+j*boxSlide+5*k+1]=dldy;
Y[i*blobSlide+j*boxSlide+j*boxSlide+5*k+2]=dldw;
Y[i*blobSlide+j*boxSlide+j*boxSlide+5*k+3]=dldh;
Y[i*blobSlide+j*boxSlide+j*boxSlide+5*k+4]=dldstatus;
}
for(int l=0;l<nClass;l++){
dclass=Gt[i*blobSlide+j*boxSlide+5*nBox+l]-X[i*blobSlide+j*boxSlide+5*nBox+l];
if(hasObj)
dldclass=-X[i*blobSlide+j*boxSlide+5*nBox+l];
else
dldclass=0;
Y[i*blobSlide+j*boxSlide+5*nBox+l]=dldclass;
}
}
}
}

template <typename Dtype>
void caffe_gpu_yolo1(const int N,const Dtype*X,const Dtype*Gt,
const int slide,const int nClass,const int nBox,
const float scaleCoord,const float scaleNoObj,
const float threshold,Dtype*Y){
gpu_yolov1_loss_kernel<< <CAFFE_GET_BLOCKS(N),CAFFE_CUDA_NUM_THREADS>> >(N,
X,Gt,slide,nClass,
nBox,scaleCoord,scaleNoObj,threshold,Y);
}

template void caffe_gpu_yolo1<float>(const int N,const float*X,const float*Gt,
const int slide,const int nClass,const int nBox,
const float scaleCoord,const float scaleNoObj,
const float threshold,float*Y);

template void caffe_gpu_yolo1<double>(const int N,const double*X,const double*Gt,
const int slide,const int nClass,const int nBox,
const float scaleCoord,const float scaleNoObj,
const float threshold,double*Y);

template <typename Dtype>
void caffe_gpu_dyolo1(const int N,const Dtype*X,const Dtype*Gt,
const int slide,const int nClass,const int nBox,
const float scaleCoord,const float scaleNoObj,
const float threshold,Dtype*Y){
gpu_dyolov1_loss_kernel<< <CAFFE_GET_BLOCKS(N),CAFFE_CUDA_NUM_THREADS>> >(N,
X,Gt,slide,nClass,nBox,scaleCoord,
scaleNoObj,threshold,Y);
}

template void caffe_gpu_dyolo1<float>(const int N,const float*X,const float*Gt,
const int slide,const int nClass,const int nBox,
const float scaleCoord,const float scaleNoObj,
const float threshold,float*Y);

template void caffe_gpu_dyolo1<double>(const int N,const double*X,const double*Gt,
const int slide,const int nClass,const int nBox,
const float scaleCoord,const float scaleNoObj,
const float threshold,double*Y);

template<typename Dtype>
void YoloV1Layer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top){
const Dtype* x=bottom[0]->gpu_data();
const Dtype* gt=bottom[1]->gpu_data();
int nBatch=bottom[0]->shape()[0];
int slide=this->nGrid*this->nGrid;
Dtype* y=top[0]->mutable_gpu_data();
caffe_yolo1(nBatch,x,gt,slide,this->nClass,this->nBox,
this->scaleXY,this->scaleNoObj,this->threshold,y);
}

template<typename Dtype>
void YoloV1Layer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagation_down,const vector<Blob<Dtype>*>& bottom){
const Dtype* x=bottom[0]->gpu_data();
const Dtype* gt=bottom[1]->gpu_data();
int nBatch=bottom[0]->shape()[0];
int slide=this->nGrid*this->nGrid;
Dtype* y=bottom[0]->mutable_gpu_diff();
caffe_dyolo1(nBatch,x,gt,slide,this->nClass,this->nBox,
this->scaleXY,this->scaleNoObj,this->threshold,y);
}

INSTANTIATE_LAYER_GPU_FUNCS(YoloV1Layer);

}


@@ -374,7 +374,7 @@ message LayerParameter {
// engine parameter for selecting the implementation.
// The default for the engine is set by the ENGINE switch at compile-time.
// last ID=149
// Custom last_ID=151
// Custom last_ID=152
optional AccuracyParameter accuracy_param = 102;
optional ArgMaxParameter argmax_param = 103;
optional BatchNormParameter batch_norm_param = 139;
@@ -425,6 +425,7 @@ message LayerParameter {
optional ThresholdParameter threshold_param = 128;
optional TileParameter tile_param = 138;
optional WindowDataParameter window_data_param = 129;
optional YoloV1Parameter yolo_v1_param = 152;
}

// Message that stores parameters used to apply transformation
@@ -1468,3 +1469,12 @@ message PReLUParameter {
// Whether or not slope parameters are shared across channels.
optional bool channel_shared = 2 [default = false];
}

message YoloV1Parameter{
optional int32 grid_size = 1 [default = 7];
optional float gamma_coord = 2 [default = 5];
optional float gamma_noobj = 3 [default = 0.5];
optional int32 class_num = 4 [default = 2];
optional int32 box_num = 5 [default = 2];
optional float threshold = 6 [default = 0.2];
}