From 761f8bdde2cd9ca6a9ffc9fd915ccec9b17fc927 Mon Sep 17 00:00:00 2001 From: Yan Li Date: Fri, 12 Aug 2016 14:24:37 +0800 Subject: [PATCH 1/5] DTypeRegressionOutput --- src/operator/regression_output-inl.h | 47 ++++++++++++++++++++++------ src/operator/regression_output.cc | 42 ++++++++++++++++--------- src/operator/regression_output.cu | 33 ++++++++++++------- 3 files changed, 86 insertions(+), 36 deletions(-) diff --git a/src/operator/regression_output-inl.h b/src/operator/regression_output-inl.h index d70066d26fcb..6f63b7ccc329 100644 --- a/src/operator/regression_output-inl.h +++ b/src/operator/regression_output-inl.h @@ -33,7 +33,7 @@ struct RegressionOutputParam : public dmlc::Parameter { // Special Operator to output regression value in forward // And get gradient in calculation. -template +template class RegressionOutputOp : public Operator { public: explicit RegressionOutputOp(RegressionOutputParam param) : param_(param) {} @@ -48,8 +48,8 @@ class RegressionOutputOp : public Operator { CHECK_EQ(in_data.size(), 2) << "RegressionOutputOp Input: [data, label]"; CHECK_EQ(out_data.size(), 1) << "RegressionOutputOp Output: [output]"; Stream *s = ctx.get_stream(); - Tensor data = in_data[reg_enum::kData].FlatTo2D(s); - Tensor out = out_data[reg_enum::kOut].FlatTo2D(s); + Tensor data = in_data[reg_enum::kData].FlatTo2D(s); + Tensor out = out_data[reg_enum::kOut].FlatTo2D(s); Assign(out, req[reg_enum::kOut], F(data)); } @@ -69,11 +69,11 @@ class RegressionOutputOp : public Operator { Stream *s = ctx.get_stream(); real_t num_output = in_data[reg_enum::kLabel].Size()/in_data[reg_enum::kLabel].shape_[0]; - Tensor out = out_data[reg_enum::kOut].FlatTo2D(s); - Tensor grad = in_grad[reg_enum::kData].FlatTo2D(s); - Tensor label = in_data[reg_enum::kLabel] - .get_with_shape(out.shape_, s); - Assign(grad, req[reg_enum::kData], param_.grad_scale/num_output* + Tensor out = out_data[reg_enum::kOut].FlatTo2D(s); + Tensor grad = in_grad[reg_enum::kData].FlatTo2D(s); + Tensor label = in_data[reg_enum::kLabel] + .get_with_shape(out.shape_, s); + Assign(grad, req[reg_enum::kData], scalar(param_.grad_scale/num_output)* F(out, reshape(label, grad.shape_))); } @@ -84,7 +84,7 @@ class RegressionOutputOp : public Operator { // Decalre Factory function, used for dispatch specialization template Operator* CreateRegressionOutputOp(reg_enum::RegressionOutputType type, - RegressionOutputParam param); + RegressionOutputParam param, int dtype); #if DMLC_USE_CXX11 template @@ -129,6 +129,27 @@ class RegressionOutputProp : public OperatorProperty { return true; } + bool InferType(std::vector *in_type, + std::vector *out_type, + std::vector *aux_type) const override { + CHECK_EQ(in_type->size(), 2) << "Input:[data, label]"; + int dtype = (*in_type)[0]; + + auto nin = in_type->size(); + in_type->clear(); + in_type->push_back(dtype); + for (index_t i = 1; i < nin; ++i) in_type->push_back(dtype); + + if (dtype == -1) { + LOG(FATAL) << "Input type to regression_output is not specified."; + return false; + } + + out_type->clear(); + out_type->push_back(dtype); + return true; + } + OperatorProperty* Copy() const override { auto ptr = new RegressionOutputProp(); ptr->param_ = param_; @@ -165,7 +186,13 @@ class RegressionOutputProp : public OperatorProperty { return {{in_data[reg_enum::kData], out_data[reg_enum::kOut]}}; } - Operator* CreateOperator(Context ctx) const override; + Operator* CreateOperator(Context ctx) const override { + LOG(FATAL) << "Not Implemented."; + return NULL; + } + + Operator* CreateOperatorEx(Context ctx, std::vector *in_shape, + std::vector *in_type) const override; protected: RegressionOutputParam param_; diff --git a/src/operator/regression_output.cc b/src/operator/regression_output.cc index ff63c0e00dcf..38b24f85da42 100644 --- a/src/operator/regression_output.cc +++ b/src/operator/regression_output.cc @@ -11,24 +11,38 @@ namespace op { template<> Operator *CreateRegressionOutputOp(reg_enum::RegressionOutputType type, - RegressionOutputParam param) { - switch (type) { - case reg_enum::kLinear: - return new RegressionOutputOp(param); - case reg_enum::kLogistic: - return new RegressionOutputOp(param); - case reg_enum::kMAE: - return new RegressionOutputOp(param); - default: - LOG(FATAL) << "unknown activation type " << type; - } - return nullptr; + RegressionOutputParam param, int dtype) { + Operator *op = NULL; + MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { + switch (type) { + case reg_enum::kLinear: + op = new RegressionOutputOp + (param); + break; + case reg_enum::kLogistic: + op = new RegressionOutputOp + (param); + break; + case reg_enum::kMAE: + op = new RegressionOutputOp + (param); + break; + default: + LOG(FATAL) << "unknown RegressionOutput type " << type; + } + }); + return op; } // DO_BIND_DISPATCH comes from operator_common.h template -Operator *RegressionOutputProp::CreateOperator(Context ctx) const { - DO_BIND_DISPATCH(CreateRegressionOutputOp, type, param_); +Operator *RegressionOutputProp::CreateOperatorEx(Context ctx, std::vector *in_shape, + std::vector *in_type) const { + std::vector out_shape, aux_shape; + std::vector out_type, aux_type; + CHECK(InferType(in_type, &out_type, &aux_type)); + CHECK(InferShape(in_shape, &out_shape, &aux_shape)); + DO_BIND_DISPATCH(CreateRegressionOutputOp, type, param_, (*in_type)[0]); } DMLC_REGISTER_PARAMETER(RegressionOutputParam); diff --git a/src/operator/regression_output.cu b/src/operator/regression_output.cu index 18e7a1f4184c..f5d44252c534 100644 --- a/src/operator/regression_output.cu +++ b/src/operator/regression_output.cu @@ -11,18 +11,27 @@ namespace op { template<> Operator *CreateRegressionOutputOp(reg_enum::RegressionOutputType type, - RegressionOutputParam param) { - switch (type) { - case reg_enum::kLinear: - return new RegressionOutputOp(param); - case reg_enum::kLogistic: - return new RegressionOutputOp(param); - case reg_enum::kMAE: - return new RegressionOutputOp(param); - default: - LOG(FATAL) << "unknown activation type " << type; - } - return NULL; + RegressionOutputParam param, int dtype) { + Operator *op = NULL; + MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { + switch (type) { + case reg_enum::kLinear: + op = new RegressionOutputOp + (param); + break; + case reg_enum::kLogistic: + op = new RegressionOutputOp + (param); + break; + case reg_enum::kMAE: + op = new RegressionOutputOp + (param); + break; + default: + LOG(FATAL) << "unknown RegressionOutput type " << type; + } + }); + return op; } } // namespace op } // namespace mxnet From 8922f70ca26cb8727bd34898013f1b5e2120578e Mon Sep 17 00:00:00 2001 From: Yan Li Date: Sat, 13 Aug 2016 01:52:23 +0800 Subject: [PATCH 2/5] Update DType test for pooling and regression --- tests/python/gpu/test_operator_gpu.py | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index daa60e1779a0..2847fb3e777f 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -207,6 +207,34 @@ def test_embedding_with_type(): {'ctx': mx.cpu(0), 'embedding_data': (2, 10), 'type_dict': {'embedding_data': np.float16}}] check_consistency(sym, ctx_list, grad_req={'embedding_data': 'null','embedding_weight': 'write'}) +def test_pooling_with_type(): + sym= mx.sym.Pooling(name='pooling', kernel=(3, 3), pool_type='avg') + ctx_list = [{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float64}}, + {'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float32}}, + {'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float16}}, + {'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float64}}, + {'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float32}}] + check_consistency(sym, ctx_list) + + sym_3d= mx.sym.Pooling(name='pooling', kernel=(3, 3, 3), pool_type='avg') + ctx_list_3d = [{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float64}}, + {'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float32}}, + {'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float16}}, + {'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float64}}, + {'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float32}}] + check_consistency(sym_3d, ctx_list_3d) + +def test_regression_with_type() + sym_logistic = mx.sym.LogisticRegressionOutput(name = 'regression') + sym_linear = mx.sym.LinearRegressionOutput(name = 'regression') + ctx_list = [{'ctx': mx.gpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float64}}, + {'ctx': mx.gpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float32}}, + {'ctx': mx.gpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float16}}, + {'ctx': mx.cpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float64}}, + {'ctx': mx.cpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float32}}] + check_consistency(sym_logistic, ctx_list) + check_consistency(sym_linear, ctx_list) + if __name__ == '__main__': test_batchnorm_with_type() test_convolution_with_type() @@ -220,6 +248,8 @@ def test_embedding_with_type(): test_fullyconnected_with_type() test_activation_with_type() test_embedding_with_type() + test_pooling_with_type() + test_regression_with_type() #test_softmax_with_shape((3,4), mx.gpu()) #test_multi_softmax_with_shape((3,4,5), mx.gpu()) From 32720380db80c54ff4148edbb3680e2a7d52ddea Mon Sep 17 00:00:00 2001 From: Yan Li Date: Mon, 15 Aug 2016 15:57:58 +0800 Subject: [PATCH 3/5] nullptr fix --- src/operator/regression_output.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/regression_output.cu b/src/operator/regression_output.cu index f5d44252c534..4917192abc51 100644 --- a/src/operator/regression_output.cu +++ b/src/operator/regression_output.cu @@ -12,7 +12,7 @@ namespace op { template<> Operator *CreateRegressionOutputOp(reg_enum::RegressionOutputType type, RegressionOutputParam param, int dtype) { - Operator *op = NULL; + Operator *op = nullptr; MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { switch (type) { case reg_enum::kLinear: From bad103ed9cb3325ba1c3e326a5555076d59df025 Mon Sep 17 00:00:00 2001 From: Yan Li Date: Mon, 15 Aug 2016 16:00:08 +0800 Subject: [PATCH 4/5] fix infershape with {} and nullptr --- src/operator/regression_output-inl.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/operator/regression_output-inl.h b/src/operator/regression_output-inl.h index 6f63b7ccc329..273f42987cd0 100644 --- a/src/operator/regression_output-inl.h +++ b/src/operator/regression_output-inl.h @@ -134,17 +134,16 @@ class RegressionOutputProp : public OperatorProperty { std::vector *aux_type) const override { CHECK_EQ(in_type->size(), 2) << "Input:[data, label]"; int dtype = (*in_type)[0]; - auto nin = in_type->size(); in_type->clear(); in_type->push_back(dtype); - for (index_t i = 1; i < nin; ++i) in_type->push_back(dtype); - + for (index_t i = 1; i < nin; ++i) { + in_type->push_back(dtype); + } if (dtype == -1) { LOG(FATAL) << "Input type to regression_output is not specified."; return false; } - out_type->clear(); out_type->push_back(dtype); return true; @@ -188,7 +187,7 @@ class RegressionOutputProp : public OperatorProperty { Operator* CreateOperator(Context ctx) const override { LOG(FATAL) << "Not Implemented."; - return NULL; + return nullptr; } Operator* CreateOperatorEx(Context ctx, std::vector *in_shape, From 63ee12e04a563275cbb4c3a2c7abc173ab28a020 Mon Sep 17 00:00:00 2001 From: Yan Li Date: Mon, 15 Aug 2016 16:00:19 +0800 Subject: [PATCH 5/5] nullptr fix --- src/operator/regression_output.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/regression_output.cc b/src/operator/regression_output.cc index 38b24f85da42..06369f8abd88 100644 --- a/src/operator/regression_output.cc +++ b/src/operator/regression_output.cc @@ -12,7 +12,7 @@ namespace op { template<> Operator *CreateRegressionOutputOp(reg_enum::RegressionOutputType type, RegressionOutputParam param, int dtype) { - Operator *op = NULL; + Operator *op = nullptr; MSHADOW_REAL_TYPE_SWITCH(dtype, DType, { switch (type) { case reg_enum::kLinear: