-
Notifications
You must be signed in to change notification settings - Fork 199
/
SoftMaxLayer.cpp
344 lines (337 loc) · 14.3 KB
/
SoftMaxLayer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
// Copyright Hugh Perkins 2014 hughperkins at gmail
//
// This Source Code Form is subject to the terms of the Mozilla Public License,
// v. 2.0. If a copy of the MPL was not distributed with this file, You can
// obtain one at http://mozilla.org/MPL/2.0/.
#include "util/StatefulTimer.h"
#include "layer/LayerMaker.h"
#include "loss/SoftMaxLayer.h"
#include <cfloat>
using namespace std;
#undef VIRTUAL
#define VIRTUAL
#undef STATIC
#define STATIC
SoftMaxLayer::SoftMaxLayer(Layer *previousLayer, SoftMaxMaker *maker) :
LossLayer(previousLayer, maker),
perPlane(maker->_perPlane),
imageSize(previousLayer->getOutputSize()),
numPlanes(previousLayer->getOutputPlanes()),
imageSizeSquared(previousLayer->getOutputSize() * previousLayer->getOutputSize()),
output(0),
gradInput(0),
allocatedSize(0),
batchSize(0)
{
}
VIRTUAL SoftMaxLayer::~SoftMaxLayer() {
if(gradInput != 0) {
delete[] gradInput;
}
if(output != 0) {
delete[] output;
}
}
VIRTUAL std::string SoftMaxLayer::getClassName() const {
return "SoftMaxLayer";
}
VIRTUAL float *SoftMaxLayer::getOutput() {
return output;
}
VIRTUAL float *SoftMaxLayer::getGradInput() {
return gradInput;
}
VIRTUAL void SoftMaxLayer::setBatchSize(int batchSize) {
this->batchSize = batchSize;
if(batchSize <= this->allocatedSize) {
return;
}
if(output != 0) {
delete[] output;
}
if(gradInput != 0) {
delete[] gradInput;
}
output = new float[ getOutputNumElements() ];
gradInput = new float[ previousLayer-> getOutputNumElements() ];
allocatedSize = batchSize;
}
VIRTUAL int SoftMaxLayer::getBatchSize() {
return this->batchSize;
}
// need to calculate multinomial logistic /cross-entropy loss
VIRTUAL float SoftMaxLayer::calcLossFromLabels(int const *labels) {
// cout << "softmaxlayer::calcloss" << endl;
StatefulTimer::timeCheck("start SoftMaxLayer calcLossfromlabels");
float loss = 0;
if(perPlane) {
for(int n = 0; n < batchSize; n++) {
for(int plane = 0; plane < numPlanes; plane++) {
int label = labels[n * numPlanes + plane];
int imageOffset = (n * numPlanes + plane) * imageSizeSquared;
loss += - log(std::max(output[ imageOffset + label ], FLT_MIN));
}
}
} else {
// force imagesize of 1 for now
if(imageSize != 1) {
throw std::runtime_error("perColumn only supported for imagesize 1 for now. Sit tight :-) (But please raise an issue to highlight your need)");
}
for(int n = 0; n < batchSize; n++) {
int imageOffset = n * numPlanes * imageSizeSquared;
int label = labels[n];
loss += - log(std::max(output[imageOffset + label], FLT_MIN));
}
}
StatefulTimer::timeCheck("end SoftMaxLayer calcLossfromlabels");
return loss;
}
// need to calculate multinomial logistic /cross-entropy loss
VIRTUAL float SoftMaxLayer::calcLoss(float const *expectedValues) {
StatefulTimer::timeCheck("start SoftMaxLayer calcLoss");
float loss = 0;
if(perPlane) {
for(int n = 0; n < batchSize; n++) {
for(int plane = 0; plane < numPlanes; plane++) {
int imageOffset = (n * numPlanes + plane) * imageSizeSquared;
for(int i = 0; i < imageSizeSquared; i++) {
if(expectedValues[ imageOffset + i ] != 0) {
float thisloss = - expectedValues[ imageOffset + i ] * log(std::max(output[ imageOffset + i ], FLT_MIN));
loss += thisloss;
}
}
}
}
} else {
// force imagesize of 1 for now
if(imageSize != 1) {
throw std::runtime_error("perColumn only supported for imagesize 1 for now. Sit tight :-) (But please raise an issue to highlight your need)");
}
for(int n = 0; n < batchSize; n++) {
int imageOffset = n * numPlanes * imageSizeSquared;
for(int plane = 0; plane < numPlanes; plane++) {
float thisloss = - expectedValues[imageOffset + plane] * log(std::max(output[imageOffset + plane], FLT_MIN));
loss += thisloss;
}
}
}
StatefulTimer::timeCheck("end SoftMaxLayer calcLoss");
return loss;
}
// calculate partial deriv loss wrt our inputs, in other words, product of
// (multinomial cross-entropy) loss derivative wrt our output, and
// derivative of softmax wrt our inputs
VIRTUAL void SoftMaxLayer::calcGradInputFromLabels(int const *labels) {
// cout << "softmaxlayer::calcerrors" << endl;
StatefulTimer::timeCheck("start SoftMaxLayer calcGradInputfromlabels");
if(perPlane) {
for(int n = 0; n < batchSize; n++) {
for(int plane = 0; plane < numPlanes; plane++) {
int imageOffset = (n * numPlanes + plane) * imageSizeSquared;
int label = labels[n * numPlanes + plane];
for(int i = 0; i < imageSizeSquared; i++) {
float value = output[imageOffset + i];
if (std::isfinite(value) == false)
throw runtime_error("Output is a non-finite number, this usually means the learning rate is too high");
gradInput[imageOffset + i] = value;
}
gradInput[imageOffset + label] -= 1;
}
}
} else {
// force imagesize of 1 for now
if(imageSize != 1) {
throw std::runtime_error("perColumn only supported for imagesize 1 for now. Sit tight :-) (But please raise an issue to highlight your need)");
}
for(int n = 0; n < batchSize; n++) {
int imageOffset = n * numPlanes * imageSizeSquared;
int label = labels[n];
for(int plane = 0; plane < numPlanes; plane++) {
float value = output[imageOffset + plane];
if (std::isfinite(value) == false)
throw runtime_error("Output is a non-finite number, this usually means the learning rate is too high");
gradInput[imageOffset + plane] = value;
}
if(label >= numPlanes) {
throw runtime_error("Label " + toString(label) + " exceeds number of softmax planes " + toString(numPlanes) );
} else if(label < 0) {
throw runtime_error("Label " + toString(label) + " negative");
}
gradInput[imageOffset + label] -= 1;
}
}
StatefulTimer::timeCheck("end SoftMaxLayer calcGradInputfromlabels");
}
// calculate partial deriv loss wrt our inputs, in other words, product of
// (multinomial cross-entropy) loss derivative wrt our output, and
// derivative of softmax wrt our inputs
VIRTUAL void SoftMaxLayer::calcGradInput(float const *expectedValues) {
// cout << "softmaxlayer::calcerrors" << endl;
StatefulTimer::timeCheck("start SoftMaxLayer calcGradInput");
if(perPlane) {
for(int n = 0; n < batchSize; n++) {
for(int plane = 0; plane < numPlanes; plane++) {
int imageOffset = (n * numPlanes + plane) * imageSizeSquared;
for(int i = 0; i < imageSizeSquared; i++) {
int resultIndex = imageOffset + i;
float value = output[resultIndex];
if (std::isfinite(value) == false)
throw runtime_error("Output is a non-finite number, this usually means the learning rate is too high");
gradInput[resultIndex] = value - expectedValues[resultIndex];
}
}
}
} else {
// force imagesize of 1 for now
if(imageSize != 1) {
throw std::runtime_error("perColumn only supported for imagesize 1 for now. Sit tight :-) (But please raise an issue to highlight your need)");
}
for(int n = 0; n < batchSize; n++) {
int imageOffset = n * numPlanes * imageSizeSquared;
for(int plane = 0; plane < numPlanes; plane++) {
int resultIndex = imageOffset + plane;
float value = output[resultIndex];
if (std::isfinite(value) == false)
throw runtime_error("Output is a non-finite number, this usually means the learning rate is too high");
gradInput[resultIndex] = value - expectedValues[resultIndex];
}
}
}
StatefulTimer::timeCheck("end SoftMaxLayer calcGradInput");
}
VIRTUAL int SoftMaxLayer::getNumLabelsPerExample() {
if(perPlane) {
return numPlanes;
} else {
return imageSizeSquared;
}
}
VIRTUAL int SoftMaxLayer::getPersistSize(int version) const {
return 0;
}
VIRTUAL int SoftMaxLayer::calcNumRightFromLabels(int const*labels) {
StatefulTimer::timeCheck("start SoftMaxLayer calcNumRight");
// float *input = previousLayer->getOutput(); // just retrieve as host-side array for now
int numRight = 0;
if(perPlane) {
for(int n = 0; n < batchSize; n++) {
for(int plane = 0; plane < numPlanes; plane++) {
int imageOffset = (n * numPlanes + plane) * imageSizeSquared;
int label = labels[n * numPlanes + plane];
float thisMax = output[imageOffset + 0];
int iMax = 0;
for(int i = 1; i < imageSizeSquared; i++) {
if(output[imageOffset + i] > thisMax) {
thisMax = output[imageOffset + i];
iMax = i;
}
}
if(label == iMax) {
// cout << "n " << n << " plane " << plane << " label " << label << endl;
numRight++;
}
}
}
} else {
// force imagesize of 1 for now
if(imageSize != 1) {
throw std::runtime_error("perColumn only supported for imagesize 1 for now. Sit tight :-) (But please raise an issue to highlight your need)");
}
for(int n = 0; n < batchSize; n++) {
int imageOffset = n * numPlanes * imageSizeSquared;
int label = labels[n];
float thisMax = output[imageOffset + 0];
int iMax = 0;
for(int i = 1; i < numPlanes; i++) {
if(output[imageOffset + i] > thisMax) {
thisMax = output[imageOffset + i];
iMax = i;
}
}
if(label == iMax) {
numRight++;
}
}
}
StatefulTimer::timeCheck("start SoftMaxLayer calcNumRight");
return numRight;
}
// for forward, we just need to apply the softmax activation. "just" :-P
VIRTUAL void SoftMaxLayer::forward() {
// cout << "softmaxlayer::forward" << endl;
StatefulTimer::timeCheck("start SoftMaxLayer forward");
float *input = previousLayer->getOutput(); // just retrieve as host-side array for now
if(perPlane) {
for(int n = 0; n < batchSize; n++) {
for(int plane = 0; plane < numPlanes; plane++) {
int imageOffset = (n * numPlanes + plane) * imageSizeSquared;
float maxValue = input[imageOffset + 0];
for(int i = 1; i < imageSizeSquared; i++) {
maxValue = std::max(maxValue, input[imageOffset + i]);
}
float denominator = 0;
for(int i = 0; i < imageSizeSquared; i++) {
denominator += exp(input[imageOffset + i] - maxValue);
}
for(int i = 0; i < imageSizeSquared; i++) {
output[imageOffset + i] = exp(input[imageOffset + i] - maxValue) / denominator;
}
}
}
} else {
// force imagesize of 1 for now
if(imageSize != 1) {
throw std::runtime_error("perColumn only supported for imagesize 1 for now. Sit tight :-) (But please raise an issue to highlight your need)");
}
for(int n = 0; n < batchSize; n++) {
int imageOffset = n * numPlanes * imageSizeSquared;
// first get the max
float maxValue = input[imageOffset + 0]; // since we assume imagesize 1, this is correct
for(int plane = 1; plane < numPlanes; plane++) {
maxValue = std::max(maxValue, input[imageOffset + plane]);
}
// calculate sum, under this max
float denominator = 0;
for(int plane = 0; plane < numPlanes; plane++) {
denominator += exp(input[imageOffset + plane] - maxValue);
}
// now calc the softmaxes:
for(int plane = 0; plane < numPlanes; plane++) {
output[imageOffset + plane] = exp(input[imageOffset + plane] - maxValue) / denominator;
}
}
}
StatefulTimer::timeCheck("end SoftMaxLayer forward");
}
VIRTUAL void SoftMaxLayer::getLabels(int *labels) { // need to allocate labels array first, and have called 'forward' first
if(perPlane) {
throw std::runtime_error("getLabels doesnt work with 'perPlane' option currently, though it wouldnt be hard to add, so ask if you need");
}
if(imageSize != 1) {
throw std::runtime_error("perColumn only supported for imagesize 1 for now. Sit tight :-) (But please raise an issue to highlight your need)");
}
for(int n = 0; n < batchSize; n++) {
float *outputStack = output + n * numPlanes;
float highestProb = outputStack[0];
int bestPlane = 0;
for(int plane = 1; plane < numPlanes; plane++) {
if(outputStack[plane] > highestProb) {
bestPlane = plane;
highestProb = outputStack[plane];
}
}
labels[n] = bestPlane;
}
}
// this seems to be handled by calcGradInput? So, just to a nop?
// (cos this layer kind of combines loss layer and a 'normal' propagation layer)
// certainly, we dont have any weights to update, and we already handled error
// propagation in 'calcGradInput' method above
//VIRTUAL void SoftMaxLayer::backward(float learningRate) {
// cout << "softmaxlayer::backproperrors" << endl;
// nop, do nothing :-)
//}
VIRTUAL std::string SoftMaxLayer::asString() const {
return "SoftMaxLayer{ perPlane=" + toString(perPlane) + " numPlanes=" + toString(numPlanes)
+ " imageSize=" + toString(imageSize) + " }";
}