This repository has been archived by the owner on Feb 7, 2023. It is now read-only.
/
softmax_op.cc
128 lines (118 loc) · 4.46 KB
/
softmax_op.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include "caffe2/operators/softmax_op.h"
#include "caffe2/operators/softmax_shared.h"
namespace caffe2 {
// Implementation for the CPU context.
template <>
bool SoftmaxOp<float, CPUContext>::RunOnDevice() {
auto& X = Input(0);
auto* Y = Output(0);
const auto canonical_axis = X.canonical_axis_index(axis_);
const int N = X.size_to_dim(canonical_axis);
const int D = X.size_from_dim(canonical_axis);
Y->ResizeLike(X);
float* Ydata = Y->mutable_data<float>();
// First, get scales
if (scale_.size() != N) {
scale_.Resize(N);
}
if (rowmax_.size() != N) {
rowmax_.Resize(N);
}
if (sum_multiplier_.size() != D) {
sum_multiplier_.Resize(D);
math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<float>(),
&context_);
}
SoftmaxCPU(
context_,
N,
D,
X.data<float>(),
Ydata,
scale_.mutable_data<float>(),
sum_multiplier_.data<float>(),
false,
rowmax_.mutable_data<float>());
return true;
}
// Implementation for the CPU context.
template <>
bool SoftmaxGradientOp<float, CPUContext>::RunOnDevice() {
auto& Y = Input(0);
auto& dY = Input(1);
auto* dX = Output(0);
const auto canonical_axis = Y.canonical_axis_index(axis_);
const int N = Y.size_to_dim(canonical_axis);
const int D = Y.size_from_dim(canonical_axis);
// First, get scales
if (scale_.size() != N) {
scale_.Resize(N);
}
if (sum_multiplier_.size() != D) {
sum_multiplier_.Resize(D);
math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<float>(),
&context_);
}
dX->ResizeLike(Y);
const float* Ydata = Y.data<float>();
const float* dYdata = dY.data<float>();
float* dXdata = dX->mutable_data<float>();
context_.Copy<float, CPUContext, CPUContext>(Y.size(), dYdata, dXdata);
float* scaledata = scale_.mutable_data<float>();
for (int i = 0; i < N; ++i) {
math::Dot<float, CPUContext>(D, Ydata + i * D, dYdata + i * D,
scaledata + i, &context_);
}
math::Gemm<float, CPUContext>(CblasNoTrans, CblasNoTrans, N, D, 1, -1,
scaledata, sum_multiplier_.data<float>(), 1,
dXdata, &context_);
math::Mul<float, CPUContext>(Y.size(), dXdata, Ydata, dXdata,
&context_);
return true;
}
REGISTER_CPU_OPERATOR(Softmax, SoftmaxOp<float, CPUContext>);
REGISTER_CPU_OPERATOR(SoftmaxGradient, SoftmaxGradientOp<float, CPUContext>);
OPERATOR_SCHEMA(Softmax)
.NumInputs(1)
.NumOutputs(1)
.IdenticalTypeAndShape()
.SetDoc(R"DOC(
The operator computes the softmax normalized values for each layer in the batch
of the given input. The input is a 2-D tensor (Tensor<float>) of size
(batch_size x input_feature_dimensions). The output tensor has the same shape
and contains the softmax normalized values of the corresponding input.
X does not need to explicitly be a 2D vector; rather, it will be
coerced into one. For an arbitrary n-dimensional tensor
X \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}] and k is
the axis provided, then X will be coerced into a 2-dimensional tensor with
dimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1}]. For the default
case where axis=1, this means the X tensor will be coerced into a 2D tensor
of dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size.
In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D.
Each of these dimensions must be matched correctly, or else the operator
will throw errors.
)DOC")
.Arg("axis",
"(int) default to 1; describes the axis of the inputs when coerced "
"to 2D; defaults to one because the 0th axis most likely describes "
"the batch_size")
.Input(0, "input",
"The input tensor that's coerced into a 2D matrix of size (NxD) "
"as described above.")
.Output(0, "output", "The softmax normalized output values with the same "
"shape as input tensor.")
.InheritOnnxSchema("Softmax");
// Input: Y, dY. Output: dX
OPERATOR_SCHEMA(SoftmaxGradient).NumInputs(2).NumOutputs(1);
class GetSoftmaxGradient : public GradientMakerBase {
using GradientMakerBase::GradientMakerBase;
vector<OperatorDef> GetGradientDefs() override {
return SingleGradientDef(
def_.type() + "Gradient", "",
vector<string>{O(0), GO(0)},
vector<string>{GI(0)});
}
};
REGISTER_GRADIENT(Softmax, GetSoftmaxGradient);
REGISTER_GRADIENT(SoftmaxFp16, GetSoftmaxGradient);
} // namespace caffe2