Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

DType regression #3018

Merged
merged 6 commits into from
Aug 16, 2016
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 37 additions & 10 deletions src/operator/regression_output-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ struct RegressionOutputParam : public dmlc::Parameter<RegressionOutputParam> {

// Special Operator to output regression value in forward
// And get gradient in calculation.
template<typename xpu, typename ForwardOp, typename BackwardOp>
template<typename xpu, typename ForwardOp, typename BackwardOp, typename DType>
class RegressionOutputOp : public Operator {
public:
explicit RegressionOutputOp(RegressionOutputParam param) : param_(param) {}
Expand All @@ -48,8 +48,8 @@ class RegressionOutputOp : public Operator {
CHECK_EQ(in_data.size(), 2) << "RegressionOutputOp Input: [data, label]";
CHECK_EQ(out_data.size(), 1) << "RegressionOutputOp Output: [output]";
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 2> data = in_data[reg_enum::kData].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> out = out_data[reg_enum::kOut].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2, DType> data = in_data[reg_enum::kData].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> out = out_data[reg_enum::kOut].FlatTo2D<xpu, DType>(s);
Assign(out, req[reg_enum::kOut], F<ForwardOp>(data));
}

Expand All @@ -69,11 +69,11 @@ class RegressionOutputOp : public Operator {
Stream<xpu> *s = ctx.get_stream<xpu>();
real_t num_output =
in_data[reg_enum::kLabel].Size()/in_data[reg_enum::kLabel].shape_[0];
Tensor<xpu, 2> out = out_data[reg_enum::kOut].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> grad = in_grad[reg_enum::kData].FlatTo2D<xpu, real_t>(s);
Tensor<xpu, 2> label = in_data[reg_enum::kLabel]
.get_with_shape<xpu, 2, real_t>(out.shape_, s);
Assign(grad, req[reg_enum::kData], param_.grad_scale/num_output*
Tensor<xpu, 2, DType> out = out_data[reg_enum::kOut].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> grad = in_grad[reg_enum::kData].FlatTo2D<xpu, DType>(s);
Tensor<xpu, 2, DType> label = in_data[reg_enum::kLabel]
.get_with_shape<xpu, 2, DType>(out.shape_, s);
Assign(grad, req[reg_enum::kData], scalar<DType>(param_.grad_scale/num_output)*
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this correct? What if Dtype is int8?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your review. Which line are you worry about? I think int8 is currently not supported as we are using MSHADOW_REAL_TYPE_SWITCH to create operators.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, merge as is for now. We have to change a lot for the coming unit8...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep... I'll propose a pr to fix nullptr issue in other operators later. Is there any paper list to share about uint8 network?

There are some other issues to be fixed to make DType network really works, like kvstore, param init. For the lstm case, extra states data_type is need. We need to discuss more about this DType support and a more detail todo list.

F<BackwardOp>(out, reshape(label, grad.shape_)));
}

Expand All @@ -84,7 +84,7 @@ class RegressionOutputOp : public Operator {
// Decalre Factory function, used for dispatch specialization
template<typename xpu>
Operator* CreateRegressionOutputOp(reg_enum::RegressionOutputType type,
RegressionOutputParam param);
RegressionOutputParam param, int dtype);

#if DMLC_USE_CXX11
template<reg_enum::RegressionOutputType type>
Expand Down Expand Up @@ -129,6 +129,27 @@ class RegressionOutputProp : public OperatorProperty {
return true;
}

bool InferType(std::vector<int> *in_type,
std::vector<int> *out_type,
std::vector<int> *aux_type) const override {
CHECK_EQ(in_type->size(), 2) << "Input:[data, label]";
int dtype = (*in_type)[0];

auto nin = in_type->size();
in_type->clear();
in_type->push_back(dtype);
for (index_t i = 1; i < nin; ++i) in_type->push_back(dtype);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please follow the coding convention, use {} to wrap it, and put it in a separate line.


if (dtype == -1) {
LOG(FATAL) << "Input type to regression_output is not specified.";
return false;
}

out_type->clear();
out_type->push_back(dtype);
return true;
}

OperatorProperty* Copy() const override {
auto ptr = new RegressionOutputProp<type>();
ptr->param_ = param_;
Expand Down Expand Up @@ -165,7 +186,13 @@ class RegressionOutputProp : public OperatorProperty {
return {{in_data[reg_enum::kData], out_data[reg_enum::kOut]}};
}

Operator* CreateOperator(Context ctx) const override;
Operator* CreateOperator(Context ctx) const override {
LOG(FATAL) << "Not Implemented.";
return NULL;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nullptr

}

Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
std::vector<int> *in_type) const override;

protected:
RegressionOutputParam param_;
Expand Down
42 changes: 28 additions & 14 deletions src/operator/regression_output.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,38 @@ namespace op {

template<>
Operator *CreateRegressionOutputOp<cpu>(reg_enum::RegressionOutputType type,
RegressionOutputParam param) {
switch (type) {
case reg_enum::kLinear:
return new RegressionOutputOp<cpu, mshadow::op::identity, mshadow::op::minus>(param);
case reg_enum::kLogistic:
return new RegressionOutputOp<cpu, mshadow_op::sigmoid, mshadow::op::minus>(param);
case reg_enum::kMAE:
return new RegressionOutputOp<cpu, mshadow::op::identity, mshadow_op::minus_sign>(param);
default:
LOG(FATAL) << "unknown activation type " << type;
}
return nullptr;
RegressionOutputParam param, int dtype) {
Operator *op = NULL;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use nullptr?

MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
switch (type) {
case reg_enum::kLinear:
op = new RegressionOutputOp
<cpu, mshadow::op::identity, mshadow::op::minus, DType>(param);
break;
case reg_enum::kLogistic:
op = new RegressionOutputOp
<cpu, mshadow_op::sigmoid, mshadow::op::minus, DType>(param);
break;
case reg_enum::kMAE:
op = new RegressionOutputOp
<cpu, mshadow::op::identity, mshadow_op::minus_sign, DType>(param);
break;
default:
LOG(FATAL) << "unknown RegressionOutput type " << type;
}
});
return op;
}

// DO_BIND_DISPATCH comes from operator_common.h
template<reg_enum::RegressionOutputType type>
Operator *RegressionOutputProp<type>::CreateOperator(Context ctx) const {
DO_BIND_DISPATCH(CreateRegressionOutputOp, type, param_);
Operator *RegressionOutputProp<type>::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
std::vector<int> *in_type) const {
std::vector<TShape> out_shape, aux_shape;
std::vector<int> out_type, aux_type;
CHECK(InferType(in_type, &out_type, &aux_type));
CHECK(InferShape(in_shape, &out_shape, &aux_shape));
DO_BIND_DISPATCH(CreateRegressionOutputOp, type, param_, (*in_type)[0]);
}

DMLC_REGISTER_PARAMETER(RegressionOutputParam);
Expand Down
33 changes: 21 additions & 12 deletions src/operator/regression_output.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,27 @@ namespace op {

template<>
Operator *CreateRegressionOutputOp<gpu>(reg_enum::RegressionOutputType type,
RegressionOutputParam param) {
switch (type) {
case reg_enum::kLinear:
return new RegressionOutputOp<gpu, mshadow::op::identity, mshadow::op::minus>(param);
case reg_enum::kLogistic:
return new RegressionOutputOp<gpu, mshadow_op::sigmoid, mshadow::op::minus>(param);
case reg_enum::kMAE:
return new RegressionOutputOp<gpu, mshadow::op::identity, mshadow_op::minus_sign>(param);
default:
LOG(FATAL) << "unknown activation type " << type;
}
return NULL;
RegressionOutputParam param, int dtype) {
Operator *op = NULL;
MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
switch (type) {
case reg_enum::kLinear:
op = new RegressionOutputOp
<gpu, mshadow::op::identity, mshadow::op::minus, DType>(param);
break;
case reg_enum::kLogistic:
op = new RegressionOutputOp
<gpu, mshadow_op::sigmoid, mshadow::op::minus, DType>(param);
break;
case reg_enum::kMAE:
op = new RegressionOutputOp
<gpu, mshadow::op::identity, mshadow_op::minus_sign, DType>(param);
break;
default:
LOG(FATAL) << "unknown RegressionOutput type " << type;
}
});
return op;
}
} // namespace op
} // namespace mxnet
Expand Down
30 changes: 30 additions & 0 deletions tests/python/gpu/test_operator_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,34 @@ def test_embedding_with_type():
{'ctx': mx.cpu(0), 'embedding_data': (2, 10), 'type_dict': {'embedding_data': np.float16}}]
check_consistency(sym, ctx_list, grad_req={'embedding_data': 'null','embedding_weight': 'write'})

def test_pooling_with_type():
sym= mx.sym.Pooling(name='pooling', kernel=(3, 3), pool_type='avg')
ctx_list = [{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float64}},
{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float32}},
{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float16}},
{'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float64}},
{'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10), 'type_dict': {'pooling_data': np.float32}}]
check_consistency(sym, ctx_list)

sym_3d= mx.sym.Pooling(name='pooling', kernel=(3, 3, 3), pool_type='avg')
ctx_list_3d = [{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float64}},
{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float32}},
{'ctx': mx.gpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float16}},
{'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float64}},
{'ctx': mx.cpu(0), 'pooling_data': (2, 2, 10, 10, 10), 'type_dict': {'pooling_data': np.float32}}]
check_consistency(sym_3d, ctx_list_3d)

def test_regression_with_type()
sym_logistic = mx.sym.LogisticRegressionOutput(name = 'regression')
sym_linear = mx.sym.LinearRegressionOutput(name = 'regression')
ctx_list = [{'ctx': mx.gpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float64}},
{'ctx': mx.gpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float32}},
{'ctx': mx.gpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float16}},
{'ctx': mx.cpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float64}},
{'ctx': mx.cpu(0), 'regression_data': (2, 2, 10, 10), 'type_dict': {'regression_data': np.float32}}]
check_consistency(sym_logistic, ctx_list)
check_consistency(sym_linear, ctx_list)

if __name__ == '__main__':
test_batchnorm_with_type()
test_convolution_with_type()
Expand All @@ -220,6 +248,8 @@ def test_embedding_with_type():
test_fullyconnected_with_type()
test_activation_with_type()
test_embedding_with_type()
test_pooling_with_type()
test_regression_with_type()
#test_softmax_with_shape((3,4), mx.gpu())
#test_multi_softmax_with_shape((3,4,5), mx.gpu())