forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
elu_op_cudnn.cc
111 lines (95 loc) · 3 KB
/
elu_op_cudnn.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#include "caffe2/operators/elu_op.h"
#include "caffe2/operators/activation_ops_cudnn.h"
namespace caffe2 {
template <>
class CuDNNActivationOp<CUDNN_ACTIVATION_ELU> final
: public CuDNNActivationOpBase {
public:
USE_OPERATOR_FUNCTIONS(CUDAContext);
template <class... Args>
explicit CuDNNActivationOp(Args&&... args)
: CuDNNActivationOpBase(std::forward<Args>(args)...),
OP_SINGLE_ARG(float, "alpha", alpha_, 1.0f) {
CUDNN_ENFORCE(cudnnSetActivationDescriptor(
act_desc_,
CUDNN_ACTIVATION_ELU,
CUDNN_PROPAGATE_NAN,
static_cast<double>(alpha_)));
}
bool RunOnDevice() override {
return DispatchHelper<TensorTypes<float, at::Half>>::call(this, Input(0));
}
template <typename T>
bool DoRunWithType() {
const auto& X = Input(0);
auto* Y = Output(0, X.sizes(), at::dtype<T>());
if (X.numel() == 0) {
Y->template mutable_data<T>();
return true;
}
this->SetTensorDescriptor(cudnnTypeWrapper<T>::type, X.numel());
CUDNN_ENFORCE(cudnnActivationForward(
this->cudnn_wrapper_.inline_cudnn_handle(),
this->act_desc_,
cudnnTypeWrapper<T>::kOne(),
this->data_desc_,
X.template data<T>(),
cudnnTypeWrapper<T>::kZero(),
this->data_desc_,
Y->template mutable_data<T>()));
return true;
}
private:
const float alpha_;
};
template <>
class CuDNNActivationGradientOp<CUDNN_ACTIVATION_ELU> final
: public CuDNNActivationOpBase {
public:
USE_OPERATOR_FUNCTIONS(CUDAContext);
template <class... Args>
explicit CuDNNActivationGradientOp(Args&&... args)
: CuDNNActivationOpBase(std::forward<Args>(args)...),
OP_SINGLE_ARG(float, "alpha", alpha_, 1.0f) {
CUDNN_ENFORCE(cudnnSetActivationDescriptor(
act_desc_,
CUDNN_ACTIVATION_ELU,
CUDNN_PROPAGATE_NAN,
static_cast<double>(alpha_)));
}
bool RunOnDevice() override {
return DispatchHelper<TensorTypes<float, at::Half>>::call(this, Input(0));
}
template <typename T>
bool DoRunWithType() {
const auto& Y = Input(0);
const auto& dY = Input(1);
auto* dX = Output(0, Y.sizes(), at::dtype<T>());
if (Y.numel() == 0) {
dX->template mutable_data<T>();
return true;
}
this->SetTensorDescriptor(cudnnTypeWrapper<T>::type, Y.numel());
CUDNN_ENFORCE(cudnnActivationBackward(
this->cudnn_wrapper_.inline_cudnn_handle(),
this->act_desc_,
cudnnTypeWrapper<T>::kOne(),
this->data_desc_,
Y.template data<T>(),
this->data_desc_,
dY.template data<T>(),
this->data_desc_,
Y.template data<T>(), // Use Y_data as placeholder here.
cudnnTypeWrapper<T>::kZero(),
this->data_desc_,
dX->template mutable_data<T>()));
return true;
}
private:
const float alpha_;
};
REGISTER_CUDNN_OPERATOR(Elu, CuDNNActivationOp<CUDNN_ACTIVATION_ELU>);
REGISTER_CUDNN_OPERATOR(
EluGradient,
CuDNNActivationGradientOp<CUDNN_ACTIVATION_ELU>);
} // namespace caffe2