-
Notifications
You must be signed in to change notification settings - Fork 18
/
inner_product_layer_binary.cu
157 lines (144 loc) · 5.47 KB
/
inner_product_layer_binary.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#include <vector>
#include "caffe/filler.hpp"
#include "caffe/layers/inner_product_layer_binary.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
void BinaryInnerProductLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// Initialization
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
Dtype* weight = this->blobs_[0]->mutable_gpu_data();
const int num = this-> N_;
const int kel = this-> K_;
const int N = num * kel;
Dtype* binaryweight = binary_weight_.mutable_gpu_data();
caffe_copy<Dtype>(N, weight, binaryweight);
// quantize the weights and save the signs into binaryweight
caffe_gpu_binarize<Dtype>(weight, binaryweight, this->all_quantized_.gpu_data(), num, kel);
caffe_gpu_binary_scaling<Dtype>(weight, binaryweight, this->all_quantized_.gpu_data(), &alpha_, num, kel);
// Stochastic Quantization
if (this->sq_ && (this->ratio_ < 100)){
// roulette selection algorithm; mask is stored in 'is_quantized'
Roulette();
// convert the weights to a hybrid weight
caffe_gpu_binarize<Dtype>(weight, binaryweight, this->is_quantized_.gpu_data(), num, kel);
caffe_gpu_binary_scaling<Dtype>(weight, binaryweight, this->is_quantized_.gpu_data(), &alpha_, num, kel);
}
// Inner product
if (M_ == 1) {
caffe_gpu_gemv<Dtype>(CblasNoTrans, N_, K_, (Dtype)1.,
binaryweight, bottom_data, (Dtype)0., top_data);
if (bias_term_)
caffe_gpu_axpy<Dtype>(N_, bias_multiplier_.cpu_data()[0],
this->blobs_[1]->gpu_data(), top_data);
} else {
caffe_gpu_gemm<Dtype>(CblasNoTrans,
transpose_ ? CblasNoTrans : CblasTrans,
M_, N_, K_, (Dtype)1.,
bottom_data, binaryweight, (Dtype)0., top_data);
if (bias_term_)
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
bias_multiplier_.gpu_data(),
this->blobs_[1]->gpu_data(), (Dtype)1., top_data);
}
}
template <typename Dtype>
void BinaryInnerProductLayer<Dtype>::Roulette() {
const Dtype* weight = this->blobs_[0]->gpu_data();
const int num = this->N_;
const int weight_col = this->K_;
const int N = num * weight_col;
const Dtype* binaryweight = binary_weight_.gpu_data();
const float ratio = this->ratio_;
Dtype* norm = error_norm_.mutable_cpu_data();
Dtype* ns = sum_norm_.mutable_cpu_data();
Dtype* wc = weight_copy_.mutable_gpu_data();
// calculate the quantization error(||W-Q||/||W||)
caffe_gpu_sub(N, weight, binaryweight, wc);
for(int n = 0; n < num; n++) {
caffe_gpu_asum(weight_col, wc + n * weight_col, norm + n);
caffe_gpu_asum(weight_col, weight + n * weight_col, ns + n);
}
for(int n = 0; n < num; n++) {
if (ns[n] == 0) {
norm[n] = 0;
} else {
norm[n] = norm[n] / ns[n]; // quantization errors are stored in 'norm'
}
}
int* is_quant = is_quantized_.mutable_cpu_data();
// roulette
Dtype sum = 0;
for(int n = 0; n < num; n++) {
sum += norm[n];
is_quant[n] = 1;
}
const int real_num = int((1 - ratio / 100) * num);
for(int i = 0; i < real_num; i++) { // select one kernel which is set to real. the probability is equal to norm
Dtype p;
caffe_rng_uniform(1, Dtype(0), Dtype(1), &p);
p *= sum;
Dtype cur_sum = 0;
for(int n = 0; n < num; n++) {
if(is_quant[n] == 1) { // not selected
if((p >= cur_sum) && (p < cur_sum + norm[n])) { // hit
is_quant[n] = 0;
sum -= norm[n]; // remove
break;
}
else {
cur_sum += norm[n];
}
}
}
}
}
template void BinaryInnerProductLayer<float>::Roulette();
template void BinaryInnerProductLayer<double>::Roulette();
template <typename Dtype>
void BinaryInnerProductLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
if (this->param_propagate_down_[0]) {
const Dtype* top_diff = top[0]->gpu_diff();
const Dtype* bottom_data = bottom[0]->gpu_data();
// Gradient with respect to weight
if (transpose_) {
caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans,
K_, N_, M_,
(Dtype)1., bottom_data, top_diff,
(Dtype)1., this->blobs_[0]->mutable_gpu_diff());
} else {
caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans,
N_, K_, M_,
(Dtype)1., top_diff, bottom_data,
(Dtype)1., this->blobs_[0]->mutable_gpu_diff());
}
}
if (bias_term_ && this->param_propagate_down_[1]) {
const Dtype* top_diff = top[0]->gpu_diff();
// Gradient with respect to bias
caffe_gpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
bias_multiplier_.gpu_data(), (Dtype)1.,
this->blobs_[1]->mutable_gpu_diff());
}
if (propagate_down[0]) {
const Dtype* top_diff = top[0]->gpu_diff();
// Gradient with respect to bottom data
if (transpose_) {
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans,
M_, K_, N_,
(Dtype)1., top_diff, binary_weight_.gpu_data(),
(Dtype)0., bottom[0]->mutable_gpu_diff());
} else {
caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
M_, K_, N_,
(Dtype)1., top_diff, binary_weight_.gpu_data(),
(Dtype)0., bottom[0]->mutable_gpu_diff());
}
}
}
INSTANTIATE_LAYER_GPU_FUNCS(BinaryInnerProductLayer);
} // namespace caffe