From 708c933dec2ee382b3b06a847c7120f852a124b9 Mon Sep 17 00:00:00 2001 From: reminisce Date: Wed, 6 Mar 2019 11:30:27 -0800 Subject: [PATCH 01/32] [numpy] Shape support scalar tensor (#14315) * Support scalar and zero-size tensors with np.sum * Add sanity check when ndim is set --- include/mxnet/tensor_blob.h | 1 - include/mxnet/tuple.h | 100 +++++++--- python/mxnet/__init__.py | 1 + python/mxnet/base.py | 29 ++- python/mxnet/ndarray/__init__.py | 2 +- python/mxnet/ndarray/numpy.py | 18 ++ python/mxnet/numpy/__init__.py | 20 ++ python/mxnet/symbol/__init__.py | 2 +- python/mxnet/symbol/numpy.py | 18 ++ src/executor/graph_executor.cc | 3 +- src/executor/infer_graph_attr_pass.cc | 6 +- src/nnvm/plan_memory.cc | 6 +- src/operator/numpy/np_broadcast_reduce_op.h | 186 ++++++++++++++++++ .../numpy/np_broadcast_reduce_op_value.cc | 61 ++++++ .../numpy/np_broadcast_reduce_op_value.cu | 36 ++++ src/operator/operator_common.h | 20 +- 16 files changed, 466 insertions(+), 43 deletions(-) create mode 100644 python/mxnet/ndarray/numpy.py create mode 100644 python/mxnet/numpy/__init__.py create mode 100644 python/mxnet/symbol/numpy.py create mode 100644 src/operator/numpy/np_broadcast_reduce_op.h create mode 100644 src/operator/numpy/np_broadcast_reduce_op_value.cc create mode 100644 src/operator/numpy/np_broadcast_reduce_op_value.cu diff --git a/include/mxnet/tensor_blob.h b/include/mxnet/tensor_blob.h index 7d059025b03e..45d4c7fda639 100755 --- a/include/mxnet/tensor_blob.h +++ b/include/mxnet/tensor_blob.h @@ -198,7 +198,6 @@ class TBlob { << "Expected: " << type_flag_ << " v.s. given " << mshadow::DataType::kFlag; return mshadow::Tensor(static_cast(dptr_), shape_.FlatTo2D(), - shape_[shape_.ndim() - 1], stream); } /*! diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h index 7c1367333630..39c3c185e3c0 100644 --- a/include/mxnet/tuple.h +++ b/include/mxnet/tuple.h @@ -17,7 +17,7 @@ * under the License. */ /*! - * Copyright (c) 2016 by Contributors + * Copyright (c) 2019 by Contributors * \file mxnet/tuple.h * \brief Data structure Tuple and TShape to store dynamic sized shapes. */ @@ -39,11 +39,14 @@ namespace mxnet { /*! * \brief A dynamic sized array data structure that is optimized for storing - * small number of elements with same type. + * small number of elements with same type. * * Data will be stored in stack when number of elements is small. * It is suitable to hold shape of Tensor. * + * The ndim of a valid tuple is an integer in range [0, inf). + * ndim = 0 means the tuple is empty. + * * \tparam ValueType The type of data stored inside tuple. * \sa TShape */ @@ -61,7 +64,11 @@ class Tuple { * \param s the source tuple */ inline Tuple(const Tuple& s) { - this->assign(s.begin(), s.end()); + if (s.ndim() == -1) { + this->SetDim(-1); + } else { + this->assign(s.begin(), s.end()); + } } /*! * \brief constructor from initializer list @@ -106,6 +113,7 @@ class Tuple { inline void assign(RandomAccessIterator begin, RandomAccessIterator end) { this->SetDim(end - begin); + CHECK_GE(ndim(), 0); std::copy(begin, end, this->begin()); } /*! @@ -124,7 +132,11 @@ class Tuple { * \return reference of self */ inline Tuple& operator=(const Tuple& src) { - this->assign(src.begin(), src.end()); + if (src.ndim() == -1) { + this->SetDim(-1); + } else { + this->assign(src.begin(), src.end()); + } return *this; } /*! @@ -151,6 +163,7 @@ class Tuple { */ inline bool operator==(const Tuple &s) const { if (ndim_ != s.ndim_) return false; + if (ndim() == -1) return true; return std::equal(begin(), end(), s.begin()); } /*! @@ -177,7 +190,7 @@ class Tuple { return ndim_ <= kStackCache ? (data_stack_ + ndim_): (data_heap_ + ndim_); } /*! \return number of dimension of the tuple */ - inline uint32_t ndim() const { + inline int ndim() const { return ndim_; } /*! @@ -185,7 +198,8 @@ class Tuple { * \param i dimension index * \return the corresponding dimension size */ - inline ValueType& operator[](size_t i) { + inline ValueType& operator[](int i) { + CHECK(i >= 0 && i < ndim()); return begin()[i]; } /*! @@ -193,7 +207,8 @@ class Tuple { * \param i dimension index * \return the corresponding dimension size */ - inline const ValueType& operator[](size_t i) const { + inline const ValueType& operator[](int i) const { + CHECK(i >= 0 && i < ndim()); return begin()[i]; } /*! @@ -220,6 +235,10 @@ class Tuple { * \return the ostream */ friend std::ostream &operator<<(std::ostream &os, const Tuple &t) { + if (t.ndim() == -1) { + os << "UNKNOWN_SHAPE"; + return os; + } os << '['; const ValueType* begin = t.begin(); const ValueType* end = t.end(); @@ -316,22 +335,27 @@ class Tuple { protected: // stack cache size - static const uint32_t kStackCache = 4; + static const int kStackCache = 4; /*! \brief number of dimension of the tuple */ - uint32_t ndim_{0}; + int ndim_{0}; /*! \brief number of cells allocated in data_heap_ */ - uint32_t num_heap_allocated_{0}; + int num_heap_allocated_{0}; /*! \brief in stack space used to store shape when it is small */ ValueType data_stack_[kStackCache]; /*! \brief space to store shape when dimension is big*/ ValueType* data_heap_{nullptr}; // internal function to change the dimension - inline void SetDim(uint32_t ndim) { + inline void SetDim(int ndim) { + CHECK_GE(ndim, -1) << "ndim cannot be less than -1, received " << ndim; if (ndim > kStackCache && ndim > num_heap_allocated_) { delete [] data_heap_; data_heap_ = new ValueType[ndim]; num_heap_allocated_ = ndim; + } else if (ndim == -1 && data_heap_ != nullptr) { + delete [] data_heap_; + data_heap_ = nullptr; + num_heap_allocated_ = 0; } ndim_ = ndim; } @@ -339,25 +363,47 @@ class Tuple { /*! * \brief A Shape class that is used to represent shape of each tensor. + * + * The ndim of a valid shape is an integer in range [-1, inf). + * ndim = -1 means the shape information is unknown and need to be inferred. + * ndim = 0 means the tensor with the shape is a scalar. + * + * The dimension size of a valid shape is an integer in range [-1, inf). + * dim_size = -1 means the size of that dimension is unknown and need to be inferred. + * dim_size = 0 means that dimension is empty. + * + * The definition of ndim = 0 and dim_size = 0 is consistent with NumPy. */ class TShape : public Tuple { public: /*! \brief default constructor */ - TShape() = default; + TShape() { + this->SetDim(-1); + } /*! * constructor to construct a shape with all 1. + * TODO(junwu): The value should default to -1. Need to keep 1 for now + * for backward compatibility. Change it to -1 in the future when we can + * break backward compatibility. * \param ndim the number of dimension + * \param value the dimension size for all dims */ - inline TShape(uint32_t ndim) { // NOLINT(*) + inline TShape(int ndim, int value = 1) { // NOLINT(*) this->SetDim(ndim); - std::fill_n(begin(), ndim, 1); + if (ndim > 0) { + std::fill_n(begin(), ndim, value); + } } /*! * \brief copy constructor of TShape * \param s source shape. */ inline TShape(const Tuple& s) { // NOLINT(*) - this->assign(s.begin(), s.end()); + if (s.ndim() == -1) { + this->SetDim(-1); + } else { + this->assign(s.begin(), s.end()); + } } /*! * \brief constructor from initializer list @@ -390,7 +436,11 @@ class TShape : public Tuple { * \return self. */ inline TShape& operator=(const Tuple& src) { - this->assign(src.begin(), src.end()); + if (src.ndim() == -1) { + this->SetDim(-1); + } else { + this->assign(src.begin(), src.end()); + } return *this; } /*! @@ -404,9 +454,11 @@ class TShape : public Tuple { } /*! \return total number of elements in the shape */ inline size_t Size() const { + CHECK_GE(this->ndim(), 0) << "Shape is unknown."; dim_t size = 1; const dim_t* start = begin(), *fin = end(); for (const dim_t* it = start; it != fin; ++it) { + CHECK_GE(*it, 0) << "Shape dim size cannot be -1, which means unknown."; size *= *it; } return size; @@ -417,9 +469,11 @@ class TShape : public Tuple { * \param dimend end dimension */ inline size_t ProdShape(int dimstart, int dimend) const { + CHECK_GE(this->ndim(), 0) << "Shape is unknown."; dim_t num = 1; const dim_t *d = this->data(); for (int i = dimstart; i < dimend; ++i) { + CHECK_GE(d[i], 0) << "Shape dim size cannot be -1, which means unknown."; num *= d[i]; } return num; @@ -460,7 +514,7 @@ class TShape : public Tuple { */ template inline mshadow::Shape get() const { - CHECK_EQ(dim, static_cast(ndim())) + CHECK_EQ(dim, ndim()) << "dimension do not match target dimension " << dim << " vs " << ndim(); const dim_t *d = this->data(); mshadow::Shape s; @@ -475,11 +529,12 @@ class TShape : public Tuple { */ inline mshadow::Shape<2> FlatTo2D(void) const { mshadow::Shape<2> s; - if (ndim() == 0) return mshadow::Shape2(0, 0); + CHECK_GE(ndim(), 0); + if (ndim() == 0) return mshadow::Shape2(1, 1); const dim_t *d = this->data(); s.shape_[1] = d[ndim() - 1]; dim_t ymax = 1; - for (size_t i = 1; i < ndim(); ++i) { + for (int i = 1; i < ndim(); ++i) { ymax *= d[i - 1]; } s.shape_[0] = ymax; @@ -494,7 +549,8 @@ class TShape : public Tuple { inline mshadow::Shape<3> FlatTo3D(size_t axis_begin, size_t axis_end) const { CHECK(axis_end >= axis_begin); mshadow::Shape<3> s; - if (ndim() == 0) return mshadow::Shape3(0, 0, 0); + CHECK_GE(ndim(), 0); + if (ndim() == 0) return mshadow::Shape3(1, 1, 1); const dim_t *d = this->data(); s.shape_[0] = 1; s.shape_[1] = 1; @@ -506,7 +562,7 @@ class TShape : public Tuple { for (size_t i = axis_begin; i <= axis_end; ++i) { s.shape_[1] *= d[i]; } - for (size_t i = axis_end + 1; i < ndim(); ++i) { + for (int i = axis_end + 1; i < ndim(); ++i) { s.shape_[2] *= d[i]; } return s; @@ -627,7 +683,7 @@ struct hash { size_t operator()(const mxnet::TShape& val) const { std::hash hash_uint; size_t res = hash_uint(val.ndim()); - for (uint32_t i = 0; i < val.ndim(); ++i) { + for (int i = 0; i < val.ndim(); ++i) { res = dmlc::HashCombine(res, val[i]); } return res; diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index 374a3b50bbb5..8db83a286157 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -25,6 +25,7 @@ from . import engine from .base import MXNetError from . import base +from . import numpy from . import contrib from . import ndarray from . import ndarray as nd diff --git a/python/mxnet/base.py b/python/mxnet/base.py index feb4d70b6533..7793deacf44c 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -26,7 +26,7 @@ import sys import inspect import platform -import numpy as np +import numpy as _np from . import libinfo @@ -44,8 +44,8 @@ long = int # pylint: enable=pointless-statement -integer_types = (int, long, np.int32, np.int64) -numeric_types = (float, int, long, np.generic) +integer_types = (int, long, _np.int32, _np.int64) +numeric_types = (float, int, long, _np.generic) string_types = basestring, if sys.version_info[0] > 2: @@ -216,7 +216,7 @@ def _load_lib(): mx_uint = ctypes.c_uint mx_float = ctypes.c_float mx_float_p = ctypes.POINTER(mx_float) -mx_real_t = np.float32 +mx_real_t = _np.float32 NDArrayHandle = ctypes.c_void_p FunctionHandle = ctypes.c_void_p OpHandle = ctypes.c_void_p @@ -455,7 +455,7 @@ def ctypes2numpy_shared(cptr, shape): for s in shape: size *= s dbuffer = (mx_float * size).from_address(ctypes.addressof(cptr.contents)) - return np.frombuffer(dbuffer, dtype=np.float32).reshape(shape) + return _np.frombuffer(dbuffer, dtype=_np.float32).reshape(shape) def build_param_doc(arg_names, arg_types, arg_descs, remove_dup=True): @@ -560,7 +560,7 @@ def _as_list(obj): return [obj] -_OP_NAME_PREFIX_LIST = ['_contrib_', '_linalg_', '_sparse_', '_image_', '_random_'] +_OP_NAME_PREFIX_LIST = ['_contrib_', '_linalg_', '_sparse_', '_image_', '_random_', '_numpy_'] def _get_op_name_prefix(op_name): @@ -606,6 +606,13 @@ def _init_op_module(root_namespace, module_name, make_op_func): # use mx.nd.contrib or mx.sym.contrib from now on contrib_module_name_old = "%s.contrib.%s" % (root_namespace, module_name) contrib_module_old = sys.modules[contrib_module_name_old] + # special handling of registering numpy ops + if module_name == 'ndarray': + numpy_module_name = "%s.numpy" % root_namespace + numpy_module = sys.modules[numpy_module_name] + else: + numpy_module_name = None + numpy_module = None submodule_dict = {} for op_name_prefix in _OP_NAME_PREFIX_LIST: submodule_dict[op_name_prefix] =\ @@ -644,6 +651,16 @@ def _init_op_module(root_namespace, module_name, make_op_func): function.__module__ = contrib_module_name_old setattr(contrib_module_old, function.__name__, function) contrib_module_old.__all__.append(function.__name__) + elif op_name_prefix == '_numpy_' and numpy_module_name is not None: + # only register numpy ops under mxnet.numpy in imperative mode + hdl = OpHandle() + check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl))) + # TODO(reminisce): Didn't consider third level module here, e.g. mxnet.numpy.random. + func_name = name[len(op_name_prefix):] + function = make_op_func(hdl, name, func_name) + function.__module__ = numpy_module_name + setattr(numpy_module, function.__name__, function) + numpy_module.__all__.append(function.__name__) def _generate_op_module_signature(root_namespace, module_name, op_code_gen_func): diff --git a/python/mxnet/ndarray/__init__.py b/python/mxnet/ndarray/__init__.py index f09908e894d5..a102399521cc 100644 --- a/python/mxnet/ndarray/__init__.py +++ b/python/mxnet/ndarray/__init__.py @@ -17,7 +17,7 @@ """NDArray API of MXNet.""" -from . import _internal, contrib, linalg, op, random, sparse, utils, image, ndarray +from . import _internal, contrib, linalg, op, random, sparse, utils, image, ndarray, numpy # pylint: disable=wildcard-import, redefined-builtin try: from .gen_op import * # pylint: disable=unused-wildcard-import diff --git a/python/mxnet/ndarray/numpy.py b/python/mxnet/ndarray/numpy.py new file mode 100644 index 000000000000..0826ac8aca7f --- /dev/null +++ b/python/mxnet/ndarray/numpy.py @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +__all__ = [] diff --git a/python/mxnet/numpy/__init__.py b/python/mxnet/numpy/__init__.py new file mode 100644 index 000000000000..b1139a05791d --- /dev/null +++ b/python/mxnet/numpy/__init__.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +__all__ = [] diff --git a/python/mxnet/symbol/__init__.py b/python/mxnet/symbol/__init__.py index f438e4954aa9..326e4f5aff78 100644 --- a/python/mxnet/symbol/__init__.py +++ b/python/mxnet/symbol/__init__.py @@ -17,7 +17,7 @@ """Symbol API of MXNet.""" -from . import _internal, contrib, linalg, op, random, sparse, image, symbol +from . import _internal, contrib, linalg, op, random, sparse, image, symbol, numpy # pylint: disable=wildcard-import, redefined-builtin try: from .gen_op import * # pylint: disable=unused-wildcard-import diff --git a/python/mxnet/symbol/numpy.py b/python/mxnet/symbol/numpy.py new file mode 100644 index 000000000000..0826ac8aca7f --- /dev/null +++ b/python/mxnet/symbol/numpy.py @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +__all__ = [] diff --git a/src/executor/graph_executor.cc b/src/executor/graph_executor.cc index 460cec371bd4..9e1b66d1b286 100644 --- a/src/executor/graph_executor.cc +++ b/src/executor/graph_executor.cc @@ -34,6 +34,7 @@ #include "../common/utils.h" #include "../common/exec_utils.h" #include "../operator/subgraph/subgraph_property.h" +#include "../operator/operator_common.h" namespace mxnet { namespace exec { @@ -966,7 +967,7 @@ void GraphExecutor::InitDataEntryMemory(std::vector* shared_pool) { uint32_t oid = head_grad_map_.at(idx[nid].source); uint32_t eid = idx.entry_id(idx.outputs()[oid]); NDArrayStorageType stype = (NDArrayStorageType) vstorage_type[eid]; - CHECK_NE(vshape[eid].ndim(), 0U); + CHECK(mxnet::op::shape_is_known(vshape[eid])); CHECK_NE(vdtype[eid], -1); auto data_eid = idx.entry_id(nid, 0); // initialize based on storage_type diff --git a/src/executor/infer_graph_attr_pass.cc b/src/executor/infer_graph_attr_pass.cc index 6a7fde62c2cf..aa72661e78b2 100644 --- a/src/executor/infer_graph_attr_pass.cc +++ b/src/executor/infer_graph_attr_pass.cc @@ -648,14 +648,14 @@ nnvm::Graph InferShape(nnvm::Graph&& graph, std::move(graph), mxnet::TShape(), "FInferShape", "shape_inputs", "shape_attr_key", "shape", "shape_num_unknown_nodes", - [](const mxnet::TShape& s) { return s.ndim() == 0 || s.Size() == 0; }, + [](const mxnet::TShape& s) { return !mxnet::op::shape_is_known(s); }, [](const mxnet::TShape& s) { - if (s.ndim() == 0) { // TODO(reminisce): Usage of ndim + if (s.ndim() == -1) { return static_cast(1); } size_t ret = 0; for (const auto& val : s) { - if (val == 0) { + if (val == -1) { ++ret; } } diff --git a/src/nnvm/plan_memory.cc b/src/nnvm/plan_memory.cc index 2b18f990c845..0dc7e6ddb1d9 100644 --- a/src/nnvm/plan_memory.cc +++ b/src/nnvm/plan_memory.cc @@ -30,6 +30,7 @@ #include #include #include "graph_algorithm.h" +#include "../operator/operator_common.h" namespace nnvm { namespace pass { @@ -75,7 +76,7 @@ class GraphAllocator { // request a free storage StorageID Request(int dev_id, int dtype, mxnet::TShape shape, uint32_t node_id) { - if (shape.ndim() == 0) return kBadStorageID; + if (!mxnet::op::shape_is_known(shape)) return kBadStorageID; // search memory block in [size / match_range_, size * match_range_) // TODO(tqchen) add size of the dtype, assume 4 bytes for now size_t size = shape.Size() * 4; @@ -267,8 +268,7 @@ size_t AllocMemory(const Graph& ret, const IndexedGraph& idx, // only request memory for kBadStorageID if (storage[eid] == GraphAllocator::kBadStorageID) { auto &eshape = shape_vec[eid]; - size_t esize = 0; - if (eshape.ndim() != 0) esize = eshape.Size(); + size_t esize = eshape.Size(); eids.insert(std::make_pair(esize, eid)); } } diff --git a/src/operator/numpy/np_broadcast_reduce_op.h b/src/operator/numpy/np_broadcast_reduce_op.h new file mode 100644 index 000000000000..bb2b7fca231c --- /dev/null +++ b/src/operator/numpy/np_broadcast_reduce_op.h @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2015 by Contributors + * \file broadcast_reduce_op.h + * \brief Function definition of broadcast and reduce operators + */ +#ifndef MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_ +#define MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_ + +#include "../tensor/broadcast_reduce_op.h" + +namespace mxnet { +namespace op { + +struct NumpyReduceAxesParam : public dmlc::Parameter { + dmlc::optional> axis; + dmlc::optional dtype; + bool keepdims; + dmlc::optional initial; + DMLC_DECLARE_PARAMETER(NumpyReduceAxesParam) { + DMLC_DECLARE_FIELD(axis).set_default(dmlc::optional>()) + .describe(R"code()code"); + DMLC_DECLARE_FIELD(dtype).set_default(dmlc::optional()) + .describe(""); + DMLC_DECLARE_FIELD(keepdims).set_default(false) + .describe("If this is set to `True`, the reduced axes are left " + "in the result as dimension with size one."); + } +}; + +inline TShape NumpyReduceAxesShapeImpl(const TShape& ishape, + const dmlc::optional>& axis, + bool keepdims) { + // TODO(junwu): improve the logic + // If input is a scalar, output should be a scalar too + if (ishape.ndim() == 0) { + if (axis.has_value()) { + const nnvm::Tuple& axes = axis.value(); + if (axes.ndim() > 0) { + CHECK_EQ(axes.ndim(), 1); + CHECK(axes[0] == 0 || axes[0] == -1); + } + } + return TShape(0); + } + + // axis=None, do global reduction + if (!axis.has_value()) { + if (keepdims) { + return TShape(ishape.ndim(), 1); + } else { + return TShape(0); + } + } + + // axis = (), will return identity(input) + if (axis.value().ndim() == 0) { + return ishape; + } + + // axis has value + nnvm::Tuple axes(axis.value()); + for (index_t i = 0; i < axes.ndim(); i++) { + if (axes[i] < 0) { + axes[i] += ishape.ndim(); + } + } + std::sort(axes.begin(), axes.end()); + + for (index_t i = 1; i < axes.ndim(); i++) { + CHECK_LT(axes[i-1], axes[i]) + << "Reduction axes have duplicates " + << axes; + } + CHECK_LT(axes[axes.ndim()-1], ishape.ndim()) + << "Reduction axis " << axes[axes.ndim()-1] + << " Exceeds input dimensions " << ishape; + CHECK_GE(axes[0], 0) + << "Reduction axis " << axis.value() + << " Exceeds input dimensions " << ishape; + + TShape oshape; + if (keepdims) { + oshape = TShape(ishape); + } else { + oshape = TShape(ishape.ndim() - axes.ndim()); + } + + if (keepdims) { + for (index_t i = 0; i < axes.ndim(); ++i) { + oshape[axes[i]] = 1; + } + } else { + for (index_t i = 0, j = 0, k = 0; i < ishape.ndim(); ++i) { + if (j < axes.ndim() && i == axes[j]) { + ++j; + continue; + } + oshape[k++] = ishape[i]; + } + } + return oshape; +} + +inline bool NumpyReduceAxesShape(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + if (!shape_is_known(in_attrs->at(0))) { + return false; + } + const NumpyReduceAxesParam& param = nnvm::get(attrs.parsed); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, + NumpyReduceAxesShapeImpl((*in_attrs)[0], param.axis, param.keepdims)); + return shape_is_known(out_attrs->at(0)); +} + +template +void NumpyReduceAxesCompute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + const NumpyReduceAxesParam& param = nnvm::get(attrs.parsed); + if (param.axis.has_value() && param.axis.value().ndim() == 0) { + UnaryOp::IdentityCompute(attrs, ctx, inputs, req, outputs); + } + TShape small; + if (param.keepdims) { + small = outputs[0].shape_; + } else { + small = NumpyReduceAxesShapeImpl(inputs[0].shape_, param.axis, true); + } + + ReduceAxesComputeImpl(ctx, inputs, req, outputs, small); +} + +template +inline void NumpyReduceAxesBackwardUseNone(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mshadow::expr; + const NumpyReduceAxesParam& param = nnvm::get(attrs.parsed); + TShape small; + if (param.keepdims) { + small = inputs[0].shape_; + } else { + small = NumpyReduceAxesShapeImpl(outputs[0].shape_, param.axis, true); + } + + BroadcastComputeImpl(attrs, ctx, inputs, req, outputs, small); + if (normalize) { + Stream *s = ctx.get_stream(); + MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { + Tensor igrad = outputs[0].FlatTo1D(s); + igrad /= scalar(outputs[0].Size()/inputs[0].Size()); + }); + } +} + +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_ diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cc b/src/operator/numpy/np_broadcast_reduce_op_value.cc new file mode 100644 index 000000000000..c028e2368737 --- /dev/null +++ b/src/operator/numpy/np_broadcast_reduce_op_value.cc @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_reduce_op_value.cc + * \brief CPU Implementation of broadcast and reduce functions based on value. + */ + +#include "np_broadcast_reduce_op.h" + +namespace mxnet { +namespace op { + +DMLC_REGISTER_PARAMETER(NumpyReduceAxesParam); + +NNVM_REGISTER_OP(_numpy_sum) +.describe(R"code()code" ADD_FILELINE) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", NumpyReduceAxesShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.add_argument("a", "NDArray-or-Symbol", "The input") +.add_arguments(NumpyReduceAxesParam::__FIELDS__()) +.set_attr("FCompute", NumpyReduceAxesCompute) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_numpy_sum"}); + +NNVM_REGISTER_OP(_backward_numpy_sum) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("TIsBackward", true) +.set_num_inputs(1) +.set_attr("FCompute", NumpyReduceAxesBackwardUseNone); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cu b/src/operator/numpy/np_broadcast_reduce_op_value.cu new file mode 100644 index 000000000000..c975b18226db --- /dev/null +++ b/src/operator/numpy/np_broadcast_reduce_op_value.cu @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_reduce_op_value.cu + * \brief GPU Implementation of reduce functions based on value. + */ +#include "np_broadcast_reduce_op.h" + +namespace mxnet { +namespace op { +NNVM_REGISTER_OP(_numpy_sum) +.set_attr("FCompute", NumpyReduceAxesCompute); + +NNVM_REGISTER_OP(_backward_numpy_sum) +.set_attr("FCompute", NumpyReduceAxesBackwardUseNone); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h index f629534dabd0..a461d2bc4cef 100644 --- a/src/operator/operator_common.h +++ b/src/operator/operator_common.h @@ -108,6 +108,16 @@ inline bool shape_is_none(const mxnet::TShape& x) { return x.ndim() == 0 || x.Size() == 0; } +/*! brief check if shape is known using the NumPy compatible definition. + * zero-dim and zero-size tensors are valid. -1 means unknown.*/ +inline bool shape_is_known(const TShape& x) { + if (x.ndim() == -1) return false; + for (int i = 0; i < x.ndim(); ++i) { + if (x[i] == -1) return false; + } + return true; +} + /*! \brief check if type is none (-1) */ inline bool type_is_none(const int& x) { return x == -1; @@ -159,16 +169,16 @@ inline std::string type_string(const int& x) { * \return whether x and y are compatible. */ inline bool shape_assign(mxnet::TShape *y, const mxnet::TShape& x) { - if (y->ndim() == 0) { + if (y->ndim() == -1) { *y = x; return true; } else if (y->ndim() != x.ndim()) { - return x.ndim() == 0; + return x.ndim() == -1; } else { - for (size_t i = 0; i < y->ndim(); ++i) { - if ((*y)[i] == 0) { + for (int i = 0; i < y->ndim(); ++i) { + if ((*y)[i] == -1) { (*y)[i] = x[i]; - } else if ((*y)[i] != x[i] && x[i] != 0) { + } else if ((*y)[i] != x[i] && x[i] >= 0) { return false; } } From a297a9ab98c4dd56a2d1d7922058207336fdefef Mon Sep 17 00:00:00 2001 From: Junru Shao Date: Fri, 15 Mar 2019 12:56:04 -0700 Subject: [PATCH 02/32] [Numpy] Change semantics of ndim for operators in `src/operator/contrib` (#14409) * Initial commit * Address comments --- src/operator/contrib/adamw-inl.h | 5 +-- .../contrib/adaptive_avg_pooling-inl.h | 6 ++-- src/operator/contrib/bilinear_resize-inl.h | 2 +- src/operator/contrib/boolean_mask.cc | 2 +- src/operator/contrib/bounding_box-inl.h | 4 ++- src/operator/contrib/count_sketch-inl.h | 2 +- .../contrib/deformable_convolution-inl.h | 14 ++++---- src/operator/contrib/dgl_graph.cc | 32 ++++++------------- src/operator/contrib/fft-inl.h | 2 +- src/operator/contrib/ifft-inl.h | 2 +- src/operator/contrib/index_copy-inl.h | 3 +- src/operator/contrib/multi_proposal-inl.h | 2 +- src/operator/contrib/nnvm_to_onnx.cc | 3 +- src/operator/contrib/optimizer_op.cc | 2 +- src/operator/contrib/proposal-inl.h | 2 +- src/operator/contrib/quadratic_op-inl.h | 2 +- src/operator/contrib/sync_batch_norm-inl.h | 2 +- src/operator/contrib/transformer-inl.h | 4 ++- 18 files changed, 41 insertions(+), 50 deletions(-) diff --git a/src/operator/contrib/adamw-inl.h b/src/operator/contrib/adamw-inl.h index 07feaefe87aa..6ae9e46b7def 100644 --- a/src/operator/contrib/adamw-inl.h +++ b/src/operator/contrib/adamw-inl.h @@ -87,8 +87,9 @@ inline bool MPUpdateInferShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *out_attrs) { CHECK_EQ(in_attrs->size(), static_cast(total_in)) << " in operator " << attrs.name; CHECK_EQ(out_attrs->size(), static_cast(n_out)) << " in operator " << attrs.name; - // rescale_grad.shape = (1,) - SHAPE_ASSIGN_CHECK(*in_attrs, total_in - 1, mshadow::Shape1(1)); + // rescale_grad.shape = () + SHAPE_ASSIGN_CHECK(*in_attrs, total_in - 1, mxnet::TShape()); + // TODO(@reminisce): change "none" behavior in ElemwiseAttr return ElemwiseAttr( attrs, in_attrs, out_attrs, mxnet::TShape()); } diff --git a/src/operator/contrib/adaptive_avg_pooling-inl.h b/src/operator/contrib/adaptive_avg_pooling-inl.h index 0d66de0a5692..eedab78db0c5 100644 --- a/src/operator/contrib/adaptive_avg_pooling-inl.h +++ b/src/operator/contrib/adaptive_avg_pooling-inl.h @@ -48,9 +48,9 @@ namespace mxnet { namespace op { struct AdaptiveAvgPoolParam : public dmlc::Parameter { - mxnet::TShape output_size; + mxnet::Tuple output_size; DMLC_DECLARE_PARAMETER(AdaptiveAvgPoolParam) { - DMLC_DECLARE_FIELD(output_size).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(output_size).set_default(mxnet::Tuple()) .describe("int (output size) or a tuple of int for output (height, width)."); } }; @@ -125,7 +125,7 @@ static bool AdaptiveAvgPoolOpInferShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_shape->size(), 1U) << "Output:[data]"; const AdaptiveAvgPoolParam& param = nnvm::get(attrs.parsed); mxnet::TShape dshape(in_shape->at(0)); - if (dshape.ndim() == 0) return false; + if (mxnet::op::shape_is_none(dshape)) return false; if (param.output_size.ndim() == 0) { dshape[2] = 1; dshape[3] = 1; diff --git a/src/operator/contrib/bilinear_resize-inl.h b/src/operator/contrib/bilinear_resize-inl.h index 46c8e1aa7c0d..ce9c6c83504c 100644 --- a/src/operator/contrib/bilinear_resize-inl.h +++ b/src/operator/contrib/bilinear_resize-inl.h @@ -134,7 +134,7 @@ static bool BilinearSampleOpInferShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_shape->size(), 1U) << "Output:[data]"; const BilinearSampleParam& param = nnvm::get(attrs.parsed); mxnet::TShape dshape(in_shape->at(0)); - if (dshape.ndim() == 0) return false; + if (mxnet::op::shape_is_none(dshape)) return false; if (param.scale_height.has_value()) { dshape[2] = static_cast(param.scale_height.value() * in_shape->at(0)[2]); } else { diff --git a/src/operator/contrib/boolean_mask.cc b/src/operator/contrib/boolean_mask.cc index e22c493d5e2c..06d8439e23a0 100644 --- a/src/operator/contrib/boolean_mask.cc +++ b/src/operator/contrib/boolean_mask.cc @@ -121,7 +121,7 @@ inline void BooleanMaskForward(const nnvm::NodeAttrs& attrs, const NDArray &out = outputs[0]; CHECK_EQ(axis, 0) << "Not supported yet"; CHECK_EQ(data.shape()[axis], idx.shape()[0]); - CHECK_EQ(idx.shape().ndim(), 1U); + CHECK_EQ(idx.shape().ndim(), 1U); // idx is required to be 1-d. // count the number of 1s in `idx`, so that we could know the output dimension size_t idx_size = idx.shape()[0]; std::vector prefix_sum(idx_size, 0); diff --git a/src/operator/contrib/bounding_box-inl.h b/src/operator/contrib/bounding_box-inl.h index 37c4297ff49d..059327ef8334 100644 --- a/src/operator/contrib/bounding_box-inl.h +++ b/src/operator/contrib/bounding_box-inl.h @@ -94,7 +94,9 @@ inline bool BoxNMSShape(const nnvm::NodeAttrs& attrs, const BoxNMSParam& param = nnvm::get(attrs.parsed); CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 2U); - if (in_attrs->at(0).ndim() == 0U && out_attrs->at(0).ndim() == 0U) { + // TODO(@junrushao1994): verify with Joshua Z. Zhang about this operator + if (mxnet::op::shape_is_none(in_attrs->at(0)) + && mxnet::op::shape_is_none(out_attrs->at(0))) { return false; } diff --git a/src/operator/contrib/count_sketch-inl.h b/src/operator/contrib/count_sketch-inl.h index f3a294f6ad46..3ea93e63d6fc 100644 --- a/src/operator/contrib/count_sketch-inl.h +++ b/src/operator/contrib/count_sketch-inl.h @@ -151,7 +151,7 @@ class CountSketchProp : public OperatorProperty { CHECK_EQ(in_shape->size(), 3) <<"Input:[data, h, s]"; const mxnet::TShape &dshape = (*in_shape)[CountSketch::kData]; // require data to be known - if (dshape.ndim() == 0) return false; + if (mxnet::op::shape_is_none(dshape)) return false; out_shape->clear(); if (dshape.ndim() == 4) { diff --git a/src/operator/contrib/deformable_convolution-inl.h b/src/operator/contrib/deformable_convolution-inl.h index f50641fca6d6..3e96cad1c859 100644 --- a/src/operator/contrib/deformable_convolution-inl.h +++ b/src/operator/contrib/deformable_convolution-inl.h @@ -69,11 +69,11 @@ struct DeformableConvolutionParam : public dmlc::Parameter layout; DMLC_DECLARE_PARAMETER(DeformableConvolutionParam) { DMLC_DECLARE_FIELD(kernel).describe("Convolution kernel size: (h, w) or (d, h, w)"); - DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0)) .describe("Convolution stride: (h, w) or (d, h, w). Defaults to 1 for each dimension."); - DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0)) .describe("Convolution dilate: (h, w) or (d, h, w). Defaults to 1 for each dimension."); - DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0)) .describe("Zero pad for convolution: (h, w) or (d, h, w). Defaults to no padding."); DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000) .describe("Convolution filter(channel) number"); @@ -347,9 +347,9 @@ class DeformableConvolutionProp : public OperatorProperty { param_.Init(kwargs); if (param_.kernel.ndim() == 2) { param_.layout = param_.layout ? param_.layout.value() : mshadow::kNCHW; - if (param_.stride.ndim() == 0) param_.stride = Shape2(1, 1); - if (param_.dilate.ndim() == 0) param_.dilate = Shape2(1, 1); - if (param_.pad.ndim() == 0) param_.pad = Shape2(0, 0); + if (mxnet::op::shape_is_none(param_.stride)) param_.stride = Shape2(1, 1); + if (mxnet::op::shape_is_none(param_.dilate)) param_.dilate = Shape2(1, 1); + if (mxnet::op::shape_is_none(param_.pad)) param_.pad = Shape2(0, 0); } else { LOG(FATAL) << "not implemented"; } @@ -371,7 +371,7 @@ class DeformableConvolutionProp : public OperatorProperty { out_shape->resize(1, mxnet::TShape()); const mxnet::TShape &dshp = (*in_shape)[conv::kData]; const mxnet::TShape &oshp = (*in_shape)[conv::kOffset]; - if (dshp.ndim() == 0) return false; + if (mxnet::op::shape_is_none(dshp)) return false; if (param_.kernel.ndim() == 2) { // 2d conv CHECK_EQ(dshp.ndim(), 4U) \ diff --git a/src/operator/contrib/dgl_graph.cc b/src/operator/contrib/dgl_graph.cc index f19af84ce9c6..02ef2cee1caa 100644 --- a/src/operator/contrib/dgl_graph.cc +++ b/src/operator/contrib/dgl_graph.cc @@ -265,9 +265,7 @@ static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs, out_shape[0] = params.max_num_vertices + 1; for (size_t i = 0; i < num_subgraphs; i++) { SHAPE_ASSIGN_CHECK(*out_attrs, i, out_shape); - success = success && - out_attrs->at(i).ndim() != 0U && - out_attrs->at(i).Size() != 0U; + success = success && !mxnet::op::shape_is_none(out_attrs->at(i)); } // sub_csr mxnet::TShape out_csr_shape(2); @@ -275,18 +273,14 @@ static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs, out_csr_shape[1] = in_attrs->at(0)[1]; for (size_t i = 0; i < num_subgraphs; i++) { SHAPE_ASSIGN_CHECK(*out_attrs, i + num_subgraphs, out_csr_shape); - success = success && - out_attrs->at(i + num_subgraphs).ndim() != 0U && - out_attrs->at(i + num_subgraphs).Size() != 0U; + success = success && !mxnet::op::shape_is_none(out_attrs->at(i + num_subgraphs)); } // sub_layer mxnet::TShape out_layer_shape(1); out_layer_shape[0] = params.max_num_vertices; for (size_t i = 0; i < num_subgraphs; i++) { SHAPE_ASSIGN_CHECK(*out_attrs, i + 2*num_subgraphs, out_layer_shape); - success = success && - out_attrs->at(i + 2*num_subgraphs).ndim() != 0U && - out_attrs->at(i + 2*num_subgraphs).Size() != 0U; + success = success && !mxnet::op::shape_is_none(out_attrs->at(i + 2 * num_subgraphs)); } return success; @@ -323,9 +317,7 @@ static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs, out_shape[0] = params.max_num_vertices + 1; for (size_t i = 0; i < num_subgraphs; i++) { SHAPE_ASSIGN_CHECK(*out_attrs, i, out_shape); - success = success && - out_attrs->at(i).ndim() != 0U && - out_attrs->at(i).Size() != 0U; + success = success && !mxnet::op::shape_is_none(out_attrs->at(i)); } // sub_csr mxnet::TShape out_csr_shape(2); @@ -333,27 +325,21 @@ static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs, out_csr_shape[1] = in_attrs->at(0)[1]; for (size_t i = 0; i < num_subgraphs; i++) { SHAPE_ASSIGN_CHECK(*out_attrs, i + num_subgraphs, out_csr_shape); - success = success && - out_attrs->at(i + num_subgraphs).ndim() != 0U && - out_attrs->at(i + num_subgraphs).Size() != 0U; + success = success && !mxnet::op::shape_is_none(out_attrs->at(i + num_subgraphs)); } // sub_probability mxnet::TShape out_prob_shape(1); out_prob_shape[0] = params.max_num_vertices; for (size_t i = 0; i < num_subgraphs; i++) { SHAPE_ASSIGN_CHECK(*out_attrs, i + 2*num_subgraphs, out_prob_shape); - success = success && - out_attrs->at(i + 2*num_subgraphs).ndim() != 0U && - out_attrs->at(i + 2*num_subgraphs).Size() != 0U; + success = success && !mxnet::op::shape_is_none(out_attrs->at(i + 2 * num_subgraphs)); } // sub_layer mxnet::TShape out_layer_shape(1); out_layer_shape[0] = params.max_num_vertices; for (size_t i = 0; i < num_subgraphs; i++) { SHAPE_ASSIGN_CHECK(*out_attrs, i + 3*num_subgraphs, out_prob_shape); - success = success && - out_attrs->at(i + 3*num_subgraphs).ndim() != 0U && - out_attrs->at(i + 3*num_subgraphs).Size() != 0U; + success = success && !mxnet::op::shape_is_none(out_attrs->at(i + 3 * num_subgraphs)); } return success; @@ -1199,7 +1185,7 @@ inline bool EdgeIDShape(const nnvm::NodeAttrs& attrs, SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(1)); SHAPE_ASSIGN_CHECK(*in_attrs, 1, out_attrs->at(0)); SHAPE_ASSIGN_CHECK(*in_attrs, 2, out_attrs->at(0)); - return out_attrs->at(0).ndim() != 0U && out_attrs->at(0).Size() != 0U; + return !mxnet::op::shape_is_none(out_attrs->at(0)); } inline bool EdgeIDType(const nnvm::NodeAttrs& attrs, @@ -1357,7 +1343,7 @@ inline bool DGLAdjacencyShape(const nnvm::NodeAttrs& attrs, SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); SHAPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); - return out_attrs->at(0).ndim() != 0U && out_attrs->at(0).Size() != 0U; + return !mxnet::op::shape_is_none(out_attrs->at(0)); } inline bool DGLAdjacencyType(const nnvm::NodeAttrs& attrs, diff --git a/src/operator/contrib/fft-inl.h b/src/operator/contrib/fft-inl.h index 247f6290c02a..a5471b4ba2e2 100644 --- a/src/operator/contrib/fft-inl.h +++ b/src/operator/contrib/fft-inl.h @@ -241,7 +241,7 @@ class FFTProp : public OperatorProperty { CHECK_EQ(in_shape->size(), 1) <<"Input:[data]"; const mxnet::TShape &dshape = (*in_shape)[fft::kData]; // require data to be known - if (dshape.ndim() == 0) return false; + if (mxnet::op::shape_is_none(dshape)) return false; out_shape->clear(); if (dshape.ndim() == 4) { diff --git a/src/operator/contrib/ifft-inl.h b/src/operator/contrib/ifft-inl.h index e53c0f60fa9e..7d8422e838b1 100644 --- a/src/operator/contrib/ifft-inl.h +++ b/src/operator/contrib/ifft-inl.h @@ -231,7 +231,7 @@ class IFFTProp : public OperatorProperty { CHECK_EQ(in_shape->size(), 1) <<"Input:[data]"; const mxnet::TShape &dshape = (*in_shape)[ifft::kData]; // require data to be known - if (dshape.ndim() == 0) return false; + if (mxnet::op::shape_is_none(dshape)) return false; out_shape->clear(); if (dshape.ndim() == 4) { diff --git a/src/operator/contrib/index_copy-inl.h b/src/operator/contrib/index_copy-inl.h index 903dee13272b..35f88916da20 100644 --- a/src/operator/contrib/index_copy-inl.h +++ b/src/operator/contrib/index_copy-inl.h @@ -76,8 +76,7 @@ inline bool IndexCopyShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->at(1)[0], in_attrs->at(2)[0]); SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); SHAPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); - return out_attrs->at(0).ndim() != 0U && - out_attrs->at(0).Size() != 0U; + return !mxnet::op::shape_is_none(out_attrs->at(0)); } } // namespace op diff --git a/src/operator/contrib/multi_proposal-inl.h b/src/operator/contrib/multi_proposal-inl.h index 4b9a41c2fa87..a9afb8e4114e 100644 --- a/src/operator/contrib/multi_proposal-inl.h +++ b/src/operator/contrib/multi_proposal-inl.h @@ -108,7 +108,7 @@ class MultiProposalProp : public OperatorProperty { using namespace mshadow; CHECK_EQ(in_shape->size(), 3) << "Input:[cls_prob, bbox_pred, im_info]"; const mxnet::TShape &dshape = in_shape->at(proposal::kClsProb); - if (dshape.ndim() == 0) return false; + if (!mxnet::op::shape_is_none(dshape)) return false; Shape<4> bbox_pred_shape; bbox_pred_shape = Shape4(dshape[0], dshape[1] * 2, dshape[2], dshape[3]); SHAPE_ASSIGN_CHECK(*in_shape, proposal::kBBoxPred, diff --git a/src/operator/contrib/nnvm_to_onnx.cc b/src/operator/contrib/nnvm_to_onnx.cc index 0417a085616a..0c8bd79490e3 100644 --- a/src/operator/contrib/nnvm_to_onnx.cc +++ b/src/operator/contrib/nnvm_to_onnx.cc @@ -417,7 +417,8 @@ std::unordered_map GetPlaceholderShapes( for (uint32_t i = 0; i < shape_inputs.size(); ++i) { std::string name = ig[ig.input_nodes()[i]].source->attrs.name; mxnet::TShape shp = shape_inputs[i]; - if (shp.ndim() > 0) { + if (!mxnet::op::shape_is_none(shp)) { + // TODO(@reminisce): confirm placeholder_shapes.emplace(name, shp); } } diff --git a/src/operator/contrib/optimizer_op.cc b/src/operator/contrib/optimizer_op.cc index 9f948bad81b6..83bbcdab833d 100644 --- a/src/operator/contrib/optimizer_op.cc +++ b/src/operator/contrib/optimizer_op.cc @@ -45,7 +45,7 @@ inline bool GroupAdagradShape(const nnvm::NodeAttrs &attrs, SHAPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); SHAPE_ASSIGN_CHECK(*in_attrs, 1, out_attrs->at(0)); - return out_attrs->at(0).ndim() != 0U && out_attrs->at(0).Size() != 0U && + return !mxnet::op::shape_is_none(out_attrs->at(0)) && (in_attrs->at(0)[0] == in_attrs->at(1)[0]) && (in_attrs->at(0)[0] == in_attrs->at(2)[0]); } diff --git a/src/operator/contrib/proposal-inl.h b/src/operator/contrib/proposal-inl.h index 9908ca96ec5f..21e9fe198e63 100644 --- a/src/operator/contrib/proposal-inl.h +++ b/src/operator/contrib/proposal-inl.h @@ -106,7 +106,7 @@ class ProposalProp : public OperatorProperty { using namespace mshadow; CHECK_EQ(in_shape->size(), 3) << "Input:[cls_prob, bbox_pred, im_info]"; const mxnet::TShape &dshape = in_shape->at(proposal::kClsProb); - if (dshape.ndim() == 0) return false; + if (mxnet::op::shape_is_none(dshape)) return false; Shape<4> bbox_pred_shape; bbox_pred_shape = Shape4(dshape[0], dshape[1] * 2, dshape[2], dshape[3]); SHAPE_ASSIGN_CHECK(*in_shape, proposal::kBBoxPred, diff --git a/src/operator/contrib/quadratic_op-inl.h b/src/operator/contrib/quadratic_op-inl.h index e679fedc8e57..a7aca63de17a 100644 --- a/src/operator/contrib/quadratic_op-inl.h +++ b/src/operator/contrib/quadratic_op-inl.h @@ -60,7 +60,7 @@ inline bool QuadraticOpShape(const nnvm::NodeAttrs& attrs, SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); SHAPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); - return out_attrs->at(0).ndim() != 0U && out_attrs->at(0).Size() != 0U; + return !mxnet::op::shape_is_none(out_attrs->at(0)); } inline bool QuadraticOpType(const nnvm::NodeAttrs& attrs, diff --git a/src/operator/contrib/sync_batch_norm-inl.h b/src/operator/contrib/sync_batch_norm-inl.h index 1e6ab25db0e2..cd1a3285fe06 100644 --- a/src/operator/contrib/sync_batch_norm-inl.h +++ b/src/operator/contrib/sync_batch_norm-inl.h @@ -482,7 +482,7 @@ class SyncBatchNormProp : public OperatorProperty { using namespace mshadow; CHECK_EQ(in_shape->size(), 3U) << "Input:[data, gamma, beta]"; const mxnet::TShape &dshape = in_shape->at(0); - if (dshape.ndim() == 0) return false; + if (mxnet::op::shape_is_none(dshape)) return false; in_shape->at(1) = mxnet::TShape(Shape1(dshape[1])); in_shape->at(2) = mxnet::TShape(Shape1(dshape[1])); out_shape->clear(); diff --git a/src/operator/contrib/transformer-inl.h b/src/operator/contrib/transformer-inl.h index 01faf244aff9..da3d14e33cf4 100644 --- a/src/operator/contrib/transformer-inl.h +++ b/src/operator/contrib/transformer-inl.h @@ -41,7 +41,9 @@ static void DivSqrtDimForward_(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { mshadow::Stream *s = ctx.get_stream(); - double sqrt_dim = std::sqrt(static_cast(inputs[0].shape_[inputs[0].ndim() - 1])); + CHECK_GE(inputs[0].ndim(), 1); + int last_idx = inputs[0].ndim() - 1; + double sqrt_dim = std::sqrt(static_cast(inputs[0].shape_[last_idx])); MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { mxnet_op::Kernel, xpu>::Launch( From 1179a5992ab0c5c4ae26b741522856c6eab3c190 Mon Sep 17 00:00:00 2001 From: reminisce Date: Sun, 17 Mar 2019 21:26:12 -0700 Subject: [PATCH 03/32] [WIP] Use new shape definition (#14453) * Init checkin * Fix ndarray alloc bug * Use TShape(0) as default empty tuple params * Fix bugs * Fix TShape init value * Fix infer shape pass shape type and reshape infer shape func --- include/mxnet/c_api.h | 40 +-- include/mxnet/ndarray.h | 9 +- include/mxnet/tuple.h | 37 ++- python/mxnet/base.py | 1 + python/mxnet/executor.py | 6 +- python/mxnet/ndarray/ndarray.py | 10 +- python/mxnet/symbol/symbol.py | 20 +- src/c_api/c_api.cc | 15 +- src/c_api/c_api_common.h | 21 +- src/c_api/c_api_executor.cc | 4 +- src/c_api/c_api_symbolic.cc | 28 +- src/common/exec_utils.h | 4 +- src/executor/graph_executor.cc | 2 +- src/executor/infer_graph_attr_pass.cc | 2 +- src/io/image_io.cc | 4 +- src/ndarray/ndarray.cc | 2 +- src/nnvm/plan_memory.cc | 2 +- src/operator/batch_norm_v1-inl.h | 2 +- src/operator/bilinear_sampler-inl.h | 4 +- src/operator/contrib/bounding_box-inl.h | 8 +- .../contrib/deformable_convolution-inl.h | 4 +- src/operator/contrib/dgl_graph.cc | 32 +- src/operator/contrib/multibox_detection-inl.h | 2 +- src/operator/contrib/multibox_prior-inl.h | 4 +- src/operator/control_flow.cc | 112 +++---- src/operator/convolution_v1-inl.h | 8 +- src/operator/image/image_random-inl.h | 4 +- src/operator/image/resize-inl.h | 4 +- src/operator/leaky_relu-inl.h | 2 +- src/operator/loss_binary_op-inl.h | 2 +- src/operator/nn/batch_norm.cc | 2 +- src/operator/nn/concat.cc | 25 +- src/operator/nn/convolution-inl.h | 18 +- src/operator/nn/convolution.cc | 2 +- src/operator/nn/ctc_loss-inl.h | 2 +- src/operator/nn/cudnn/cudnn_batch_norm.cc | 2 +- src/operator/nn/cudnn/cudnn_convolution-inl.h | 4 +- .../nn/cudnn/cudnn_deconvolution-inl.h | 4 +- src/operator/nn/deconvolution-inl.h | 10 +- src/operator/nn/deconvolution.cc | 2 +- src/operator/nn/dropout-inl.h | 2 +- src/operator/nn/dropout.cc | 4 +- src/operator/nn/fully_connected.cc | 4 +- src/operator/nn/im2col.h | 4 +- src/operator/nn/layer_norm-inl.h | 2 +- src/operator/nn/layer_norm.cc | 6 +- src/operator/nn/lrn.cc | 2 +- src/operator/nn/pooling-inl.h | 14 +- src/operator/nn/pooling.cc | 6 +- src/operator/nn/upsampling.cc | 2 +- src/operator/numpy/np_broadcast_reduce_op.h | 6 +- src/operator/operator_common.h | 19 +- src/operator/operator_util.cc | 2 +- src/operator/quantization/dequantize-inl.h | 2 +- src/operator/quantization/quantize-inl.h | 2 +- src/operator/quantization/quantized_concat.cc | 2 +- .../quantization/quantized_flatten-inl.h | 6 +- .../quantization/quantized_fully_connected.cc | 2 +- .../quantization/quantized_pooling.cc | 4 +- src/operator/random/multisample_op.h | 2 +- src/operator/random/sample_multinomial_op.h | 18 +- src/operator/random/unique_sample_op.h | 2 +- src/operator/regression_output-inl.h | 2 +- src/operator/sequence_last-inl.h | 2 +- src/operator/slice_channel-inl.h | 4 +- src/operator/softmax_output-inl.h | 12 +- src/operator/softmax_output.cc | 12 +- src/operator/spatial_transformer-inl.h | 4 +- src/operator/subgraph_op_common.cc | 4 +- src/operator/subgraph_op_common.h | 12 +- src/operator/svm_output-inl.h | 6 +- src/operator/swapaxis-inl.h | 4 +- src/operator/tensor/broadcast_reduce_op.h | 58 ++-- src/operator/tensor/diag_op-inl.h | 10 +- src/operator/tensor/dot-inl.h | 12 +- .../tensor/elemwise_binary_broadcast_op.h | 42 +-- .../tensor/elemwise_unary_op_basic.cc | 10 +- src/operator/tensor/histogram-inl.h | 6 +- src/operator/tensor/indexing_op.h | 29 +- src/operator/tensor/init_op.h | 6 +- src/operator/tensor/la_op.h | 2 +- src/operator/tensor/matrix_op-inl.h | 299 +++++++++--------- src/operator/tensor/matrix_op.cc | 4 +- src/operator/tensor/ordering_op-inl.h | 2 +- src/operator/tensor/slice-inl.h | 6 +- tests/python/unittest/test_operator.py | 2 +- 86 files changed, 573 insertions(+), 556 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 2f9d74dc5ba0..cdc20b1e3970 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -775,8 +775,8 @@ MXNET_DLL int MXNDArrayReshape64(NDArrayHandle handle, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXNDArrayGetShape(NDArrayHandle handle, - mx_uint *out_dim, - const mx_uint **out_pdata); + int *out_dim, + const int **out_pdata); /*! * \brief get the content of the data in NDArray * \param handle the handle to the ndarray @@ -1493,16 +1493,16 @@ MXNET_DLL int MXSymbolInferShape(SymbolHandle sym, mx_uint num_args, const char** keys, const mx_uint *arg_ind_ptr, - const mx_uint *arg_shape_data, + const int *arg_shape_data, mx_uint *in_shape_size, - const mx_uint **in_shape_ndim, - const mx_uint ***in_shape_data, + const int **in_shape_ndim, + const int ***in_shape_data, mx_uint *out_shape_size, - const mx_uint **out_shape_ndim, - const mx_uint ***out_shape_data, + const int **out_shape_ndim, + const int ***out_shape_data, mx_uint *aux_shape_size, - const mx_uint **aux_shape_ndim, - const mx_uint ***aux_shape_data, + const int **aux_shape_ndim, + const int ***aux_shape_data, int *complete); /*! * \brief partially infer shape of unknown input shapes given the known one. @@ -1532,16 +1532,16 @@ MXNET_DLL int MXSymbolInferShapePartial(SymbolHandle sym, mx_uint num_args, const char** keys, const mx_uint *arg_ind_ptr, - const mx_uint *arg_shape_data, + const int *arg_shape_data, mx_uint *in_shape_size, - const mx_uint **in_shape_ndim, - const mx_uint ***in_shape_data, + const int **in_shape_ndim, + const int ***in_shape_data, mx_uint *out_shape_size, - const mx_uint **out_shape_ndim, - const mx_uint ***out_shape_data, + const int **out_shape_ndim, + const int ***out_shape_data, mx_uint *aux_shape_size, - const mx_uint **aux_shape_ndim, - const mx_uint ***aux_shape_data, + const int **aux_shape_ndim, + const int ***aux_shape_data, int *complete); /*! @@ -1820,7 +1820,7 @@ MXNET_DLL int MXExecutorSimpleBind(SymbolHandle symbol_handle, const char** provided_grad_req_types, const mx_uint num_provided_arg_shapes, const char** provided_arg_shape_names, - const mx_uint* provided_arg_shape_data, + const int* provided_arg_shape_data, const mx_uint* provided_arg_shape_idx, const mx_uint num_provided_arg_dtypes, const char** provided_arg_dtype_names, @@ -1874,7 +1874,7 @@ MXNET_DLL int MXExecutorReshape(int partial_shaping, const int* map_dev_ids, const mx_uint num_provided_arg_shapes, const char** provided_arg_shape_names, - const mx_uint* provided_arg_shape_data, + const int* provided_arg_shape_data, const mx_uint* provided_arg_shape_idx, mx_uint* num_in_args, NDArrayHandle** in_args, @@ -2550,8 +2550,8 @@ MXNET_DLL int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid, * \param dtype data type of NDArray * \param out constructed NDArray */ -MXNET_DLL int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const mx_uint *shape, - mx_uint ndim, int dtype, NDArrayHandle *out); +MXNET_DLL int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const int *shape, + int ndim, int dtype, NDArrayHandle *out); /*! * \brief Push an asynchronous operation to the engine. diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h index d00cb479b92e..2232ebe7be40 100644 --- a/include/mxnet/ndarray.h +++ b/include/mxnet/ndarray.h @@ -859,12 +859,15 @@ class NDArray { Chunk(mxnet::TShape shape, Context ctx_, bool delay_alloc_, int dtype) : static_data(false), delay_alloc(true), ctx(ctx_), storage_ref_(Storage::_GetSharedRef()) { - auto size = shape.Size(); storage_shape = shape; + if (shape_is_known(storage_shape)) { + shandle.size = shape.Size() * mshadow::mshadow_sizeof(dtype); + } var = Engine::Get()->NewVariable(); - shandle.size = size * mshadow::mshadow_sizeof(dtype); shandle.ctx = ctx_; - if (!delay_alloc_) this->CheckAndAlloc(); + if (!delay_alloc_) { + this->CheckAndAlloc(); + } } Chunk(const TBlob &data, int dev_id) diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h index 39c3c185e3c0..49852f73fbac 100644 --- a/include/mxnet/tuple.h +++ b/include/mxnet/tuple.h @@ -199,7 +199,7 @@ class Tuple { * \return the corresponding dimension size */ inline ValueType& operator[](int i) { - CHECK(i >= 0 && i < ndim()); + CHECK(i >= 0 && i < ndim()) << "index = " << i << " must be in range [0, " << ndim() << ")"; return begin()[i]; } /*! @@ -208,7 +208,7 @@ class Tuple { * \return the corresponding dimension size */ inline const ValueType& operator[](int i) const { - CHECK(i >= 0 && i < ndim()); + CHECK(i >= 0 && i < ndim()) << "index = " << i << " must be in range [0, " << ndim() << ")"; return begin()[i]; } /*! @@ -271,14 +271,16 @@ class Tuple { if (!isspace(ch)) { is.setstate(std::ios::failbit); return is; + } } - } - // Handle empty tuple + // Handle empty tuple. A tensor whose shape is an empty tuple + // represents a scalar with ndim = 0. while (isspace(is.peek())) { is.get(); } if (is.peek() == ')' || is.peek() == ']') { is.get(); + t.SetDim(0); return is; } // Handle non-empty tuple @@ -352,7 +354,7 @@ class Tuple { delete [] data_heap_; data_heap_ = new ValueType[ndim]; num_heap_allocated_ = ndim; - } else if (ndim == -1 && data_heap_ != nullptr) { + } else if (ndim <= 0 && data_heap_ != nullptr) { delete [] data_heap_; data_heap_ = nullptr; num_heap_allocated_ = 0; @@ -381,14 +383,11 @@ class TShape : public Tuple { this->SetDim(-1); } /*! - * constructor to construct a shape with all 1. - * TODO(junwu): The value should default to -1. Need to keep 1 for now - * for backward compatibility. Change it to -1 in the future when we can - * break backward compatibility. + * constructor to construct a shape with all `value`. * \param ndim the number of dimension * \param value the dimension size for all dims */ - inline TShape(int ndim, int value = 1) { // NOLINT(*) + inline TShape(int ndim, int value = -1) { // NOLINT(*) this->SetDim(ndim); if (ndim > 0) { std::fill_n(begin(), ndim, value); @@ -458,7 +457,7 @@ class TShape : public Tuple { dim_t size = 1; const dim_t* start = begin(), *fin = end(); for (const dim_t* it = start; it != fin; ++it) { - CHECK_GE(*it, 0) << "Shape dim size cannot be -1, which means unknown."; + CHECK_GE(*it, 0) << "Shape dim size cannot be a negative value " << *it; size *= *it; } return size; @@ -473,7 +472,7 @@ class TShape : public Tuple { dim_t num = 1; const dim_t *d = this->data(); for (int i = dimstart; i < dimend; ++i) { - CHECK_GE(d[i], 0) << "Shape dim size cannot be -1, which means unknown."; + CHECK_GE(d[i], 0) << "Shape dim size cannot be a negative value " << d[i]; num *= d[i]; } return num; @@ -608,6 +607,16 @@ class TShape : public Tuple { #endif }; +/*! brief check if shape is known using the NumPy compatible definition. + * zero-dim and zero-size tensors are valid. -1 means unknown.*/ +inline bool shape_is_known(const TShape& x) { + if (x.ndim() == -1) return false; + for (int i = 0; i < x.ndim(); ++i) { + if (x[i] == -1) return false; + } + return true; +} + /*! \brief helper function to cast type of container elements */ template inline DstIter ShapeTypeCast(const SrcIter begin, @@ -623,7 +632,7 @@ inline DstIter ShapeTypeCast(const SrcIter begin, template inline TShape ShapeTypeCast(const SrcIter begin, const SrcIter end) { size_t ndim = std::distance(begin, end); - TShape res(ndim); + TShape res(ndim, -1); ShapeTypeCast(begin, end, res.begin()); return res; } @@ -669,7 +678,7 @@ struct hash > { size_t operator()(const mxnet::Tuple& val) const { std::hash hash_uint; size_t res = hash_uint(val.ndim()); - for (uint32_t i = 0; i < val.ndim(); ++i) { + for (int i = 0; i < val.ndim(); ++i) { res = dmlc::HashCombine(res, val[i]); } return res; diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 7793deacf44c..fe1dd00f9454 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -213,6 +213,7 @@ def _load_lib(): _LIB = _load_lib() # type definitions +mx_int = ctypes.c_int mx_uint = ctypes.c_uint mx_float = ctypes.c_float mx_float_p = ctypes.POINTER(mx_float) diff --git a/python/mxnet/executor.py b/python/mxnet/executor.py index 7bf867579d6b..53ddc252d6b5 100644 --- a/python/mxnet/executor.py +++ b/python/mxnet/executor.py @@ -25,7 +25,7 @@ import copy import numpy as np from .base import _LIB -from .base import mx_uint, NDArrayHandle, ExecutorHandle, py_str +from .base import mx_uint, NDArrayHandle, ExecutorHandle, py_str, mx_int from .base import check_call, c_handle_array, c_array_buf, c_str_array from .ndarray import NDArray from .ndarray import _ndarray_cls @@ -445,8 +445,8 @@ def reshape(self, partial_shaping=False, allow_up_sizing=False, **kwargs): py_array('i', ctx_map_dev_ids)), mx_uint(len(provided_arg_shape_names)), c_str_array(provided_arg_shape_names), - c_array_buf(mx_uint, - py_array('I', provided_arg_shape_data)), + c_array_buf(mx_int, + py_array('i', provided_arg_shape_data)), c_array_buf(mx_uint, py_array('I', provided_arg_shape_idx)), ctypes.byref(num_in_args), diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py index 87f2712d8a40..1200c4a72c00 100644 --- a/python/mxnet/ndarray/ndarray.py +++ b/python/mxnet/ndarray/ndarray.py @@ -35,7 +35,7 @@ import numpy as np from ..base import _LIB, numeric_types, integer_types from ..base import c_str, c_array, c_array_buf, c_handle_array, mx_real_t -from ..base import mx_uint, NDArrayHandle, check_call, DLPackHandle +from ..base import mx_uint, NDArrayHandle, check_call, DLPackHandle, mx_int from ..base import ctypes2buffer from ..context import Context, current_context from . import _internal @@ -146,8 +146,8 @@ def _new_from_shared_mem(shared_pid, shared_id, shape, dtype): check_call(_LIB.MXNDArrayCreateFromSharedMem( ctypes.c_int(shared_pid), ctypes.c_int(shared_id), - c_array(mx_uint, shape), - mx_uint(len(shape)), + c_array(mx_int, shape), + mx_int(len(shape)), ctypes.c_int(int(_DTYPE_NP_TO_MX[np.dtype(dtype).type])), ctypes.byref(hdl))) return hdl @@ -1845,8 +1845,8 @@ def shape(self): >>> y.shape (2L, 3L, 4L) """ - ndim = mx_uint() - pdata = ctypes.POINTER(mx_uint)() + ndim = mx_int() + pdata = ctypes.POINTER(mx_int)() check_call(_LIB.MXNDArrayGetShape( self.handle, ctypes.byref(ndim), ctypes.byref(pdata))) return tuple(pdata[:ndim.value]) # pylint: disable=invalid-slice-index diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py index 91d4ca16df07..b9432d372bc7 100644 --- a/python/mxnet/symbol/symbol.py +++ b/python/mxnet/symbol/symbol.py @@ -34,7 +34,7 @@ from ..attribute import AttrScope from ..base import _LIB, numeric_types, c_array, c_array_buf, c_str, c_str_array, c_handle_array -from ..base import mx_uint, py_str, string_types, integer_types +from ..base import mx_uint, py_str, string_types, integer_types, mx_int from ..base import NDArrayHandle, ExecutorHandle, SymbolHandle from ..base import check_call, MXNetError, NotImplementedForSymbol from ..context import Context, current_context @@ -1174,14 +1174,14 @@ def _infer_shape_impl(self, partial, *args, **kwargs): indptr.append(len(sdata)) keys = c_str_array(str_keys) arg_shape_size = mx_uint() - arg_shape_ndim = ctypes.POINTER(mx_uint)() - arg_shape_data = ctypes.POINTER(ctypes.POINTER(mx_uint))() + arg_shape_ndim = ctypes.POINTER(mx_int)() + arg_shape_data = ctypes.POINTER(ctypes.POINTER(mx_int))() out_shape_size = mx_uint() - out_shape_ndim = ctypes.POINTER(mx_uint)() - out_shape_data = ctypes.POINTER(ctypes.POINTER(mx_uint))() + out_shape_ndim = ctypes.POINTER(mx_int)() + out_shape_data = ctypes.POINTER(ctypes.POINTER(mx_int))() aux_shape_size = mx_uint() - aux_shape_ndim = ctypes.POINTER(mx_uint)() - aux_shape_data = ctypes.POINTER(ctypes.POINTER(mx_uint))() + aux_shape_ndim = ctypes.POINTER(mx_int)() + aux_shape_data = ctypes.POINTER(ctypes.POINTER(mx_int))() complete = ctypes.c_int() if partial: infer_func = _LIB.MXSymbolInferShapePartial @@ -1192,7 +1192,7 @@ def _infer_shape_impl(self, partial, *args, **kwargs): mx_uint(len(indptr) - 1), keys, c_array_buf(mx_uint, array('I', indptr)), - c_array_buf(mx_uint, array('I', sdata)), + c_array_buf(mx_int, array('i', sdata)), ctypes.byref(arg_shape_size), ctypes.byref(arg_shape_ndim), ctypes.byref(arg_shape_data), @@ -1576,10 +1576,10 @@ def simple_bind(self, ctx, grad_req='write', type_dict=None, stype_dict=None, provided_grad_req_types, mx_uint(len(provided_arg_shape_names)), c_str_array(provided_arg_shape_names), - c_array_buf(mx_uint, + c_array_buf(mx_int, array('I', provided_arg_shape_data)), c_array_buf(mx_uint, - array('I', provided_arg_shape_idx)), + array('i', provided_arg_shape_idx)), num_provided_arg_types, provided_arg_type_names, provided_arg_type_data, diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 45197aafe019..614732081a98 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -471,7 +471,7 @@ MXNET_DLL int MXNDArrayReshape64(NDArrayHandle handle, NDArray *ptr = new NDArray(); API_BEGIN(); NDArray *arr = static_cast(handle); - nnvm::Tuple shape(dims, dims+ndim); + mxnet::Tuple shape(dims, dims+ndim); CHECK_GT(arr->shape().Size(), 0) << "Source ndarray's shape is undefined. Input shape: " << arr->shape(); mxnet::TShape new_shape = mxnet::op::InferReshapeShape(shape, arr->shape(), reverse); @@ -493,17 +493,18 @@ int MXNDArrayGetStorageType(NDArrayHandle handle, } int MXNDArrayGetShape(NDArrayHandle handle, - mx_uint *out_dim, - const mx_uint **out_pdata) { + int *out_dim, + const int **out_pdata) { MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); API_BEGIN(); NDArray *arr = static_cast(handle); if (!arr->is_none()) { const mxnet::TShape &s = arr->shape(); *out_dim = s.ndim(); - std::vector& buffer = ret->arg_shape_buffer; + CHECK_GE(s.ndim(), 0); + std::vector& buffer = ret->arg_shape_buffer; buffer.resize(s.ndim()); - nnvm::ShapeTypeCast(s.begin(), s.end(), buffer.data()); + mxnet::ShapeTypeCast(s.begin(), s.end(), buffer.data()); *out_pdata = buffer.data(); } else { *out_dim = 0; @@ -1395,8 +1396,8 @@ int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid, int* shar API_END(); } -int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const mx_uint *shape, - mx_uint ndim, int dtype, NDArrayHandle *out) { +int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const int *shape, + int ndim, int dtype, NDArrayHandle *out) { API_BEGIN(); *out = new NDArray(shared_pid, shared_id, mxnet::TShape(shape, shape + ndim), dtype); API_END(); diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h index b5adfa37eca9..690a1eae1055 100644 --- a/src/c_api/c_api_common.h +++ b/src/c_api/c_api_common.h @@ -74,29 +74,32 @@ struct MXAPIThreadLocalEntry { /*! \brief result holder for returning storage types */ std::vector arg_storage_types, out_storage_types, aux_storage_types; /*! \brief result holder for returning shape dimensions */ - std::vector arg_shape_ndim, out_shape_ndim, aux_shape_ndim; + std::vector arg_shape_ndim, out_shape_ndim, aux_shape_ndim; /*! \brief result holder for returning shape pointer */ - std::vector arg_shape_data, out_shape_data, aux_shape_data; + std::vector arg_shape_data, out_shape_data, aux_shape_data; /*! \brief uint32_t buffer for returning shape pointer */ - std::vector arg_shape_buffer, out_shape_buffer, aux_shape_buffer; + std::vector arg_shape_buffer, out_shape_buffer, aux_shape_buffer; /*! \brief bool buffer */ std::vector save_inputs, save_outputs; // helper function to setup return value of shape array inline static void SetupShapeArrayReturnWithBuffer( const mxnet::ShapeVector &shapes, - std::vector *ndim, - std::vector *data, - std::vector *buffer) { + std::vector *ndim, + std::vector *data, + std::vector *buffer) { ndim->resize(shapes.size()); data->resize(shapes.size()); size_t size = 0; - for (const auto& s : shapes) size += s.ndim(); + for (const auto& s : shapes) { + CHECK_GE(s.ndim(), 0); + size += s.ndim(); + } buffer->resize(size); - uint32_t *ptr = buffer->data(); + int *ptr = buffer->data(); for (size_t i = 0; i < shapes.size(); ++i) { ndim->at(i) = shapes[i].ndim(); data->at(i) = ptr; - ptr = nnvm::ShapeTypeCast(shapes[i].begin(), shapes[i].end(), ptr); + ptr = mxnet::ShapeTypeCast(shapes[i].begin(), shapes[i].end(), ptr); } } }; diff --git a/src/c_api/c_api_executor.cc b/src/c_api/c_api_executor.cc index a2e8bb810e6f..d0006383bdc8 100644 --- a/src/c_api/c_api_executor.cc +++ b/src/c_api/c_api_executor.cc @@ -231,7 +231,7 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle, const char** provided_grad_req_types, const mx_uint num_provided_arg_shapes, const char** provided_arg_shape_names, - const mx_uint* provided_arg_shape_data, + const int* provided_arg_shape_data, const mx_uint* provided_arg_shape_idx, const mx_uint num_provided_arg_dtypes, const char** provided_arg_dtype_names, @@ -547,7 +547,7 @@ int MXExecutorReshape(int partial_shaping, const int* map_dev_ids, const mx_uint num_provided_arg_shapes, const char** provided_arg_shape_names, - const mx_uint* provided_arg_shape_data, + const int* provided_arg_shape_data, const mx_uint* provided_arg_shape_idx, mx_uint* num_in_args, NDArrayHandle** in_args, diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc index 545e95f04b79..8b961f46c39e 100644 --- a/src/c_api/c_api_symbolic.cc +++ b/src/c_api/c_api_symbolic.cc @@ -505,16 +505,16 @@ int MXSymbolInferShape(SymbolHandle sym, mx_uint num_args, const char** keys, const mx_uint *arg_ind_ptr, - const mx_uint *arg_shape_data, + const int *arg_shape_data, mx_uint *in_shape_size, - const mx_uint **in_shape_ndim, - const mx_uint ***in_shape_data, + const int **in_shape_ndim, + const int ***in_shape_data, mx_uint *out_shape_size, - const mx_uint **out_shape_ndim, - const mx_uint ***out_shape_data, + const int **out_shape_ndim, + const int ***out_shape_data, mx_uint *aux_shape_size, - const mx_uint **aux_shape_ndim, - const mx_uint ***aux_shape_data, + const int **aux_shape_ndim, + const int ***aux_shape_data, int *complete) { nnvm::Symbol *s = static_cast(sym); MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); @@ -572,16 +572,16 @@ int MXSymbolInferShapePartial(SymbolHandle sym, mx_uint num_args, const char** keys, const mx_uint *arg_ind_ptr, - const mx_uint *arg_shape_data, + const int *arg_shape_data, mx_uint *in_shape_size, - const mx_uint **in_shape_ndim, - const mx_uint ***in_shape_data, + const int **in_shape_ndim, + const int ***in_shape_data, mx_uint *out_shape_size, - const mx_uint **out_shape_ndim, - const mx_uint ***out_shape_data, + const int **out_shape_ndim, + const int ***out_shape_data, mx_uint *aux_shape_size, - const mx_uint **aux_shape_ndim, - const mx_uint ***aux_shape_data, + const int **aux_shape_ndim, + const int ***aux_shape_data, int *complete) { int succ; *complete = 1; diff --git a/src/common/exec_utils.h b/src/common/exec_utils.h index 279ecbd67f09..0551b429f17e 100644 --- a/src/common/exec_utils.h +++ b/src/common/exec_utils.h @@ -380,7 +380,7 @@ inline void HandleInferShapeError(const size_t num_forward_inputs, const uint32_t nid = idx.input_nodes().at(i); const uint32_t eid = idx.entry_id(nid, 0); const mxnet::TShape& inferred_shape = inferred_shapes[eid]; - if (inferred_shape.ndim() == 0 || inferred_shape.Size() == 0U) { + if (!shape_is_known(inferred_shape)) { const std::string& arg_name = idx[nid].source->attrs.name; oss << arg_name << ": " << inferred_shape << ", "; if (--cnt == 0) { @@ -390,7 +390,7 @@ inline void HandleInferShapeError(const size_t num_forward_inputs, } } LOG(FATAL) << "InferShape pass cannot decide shapes for the following arguments " - "(0s means unknown dimensions). Please consider providing them as inputs:\n" + "(-1 means unknown dimensions). Please consider providing them as inputs:\n" << oss.str(); } diff --git a/src/executor/graph_executor.cc b/src/executor/graph_executor.cc index 9e1b66d1b286..4a4505581920 100644 --- a/src/executor/graph_executor.cc +++ b/src/executor/graph_executor.cc @@ -967,7 +967,7 @@ void GraphExecutor::InitDataEntryMemory(std::vector* shared_pool) { uint32_t oid = head_grad_map_.at(idx[nid].source); uint32_t eid = idx.entry_id(idx.outputs()[oid]); NDArrayStorageType stype = (NDArrayStorageType) vstorage_type[eid]; - CHECK(mxnet::op::shape_is_known(vshape[eid])); + CHECK(mxnet::shape_is_known(vshape[eid])); CHECK_NE(vdtype[eid], -1); auto data_eid = idx.entry_id(nid, 0); // initialize based on storage_type diff --git a/src/executor/infer_graph_attr_pass.cc b/src/executor/infer_graph_attr_pass.cc index aa72661e78b2..3a5c5ab9806f 100644 --- a/src/executor/infer_graph_attr_pass.cc +++ b/src/executor/infer_graph_attr_pass.cc @@ -648,7 +648,7 @@ nnvm::Graph InferShape(nnvm::Graph&& graph, std::move(graph), mxnet::TShape(), "FInferShape", "shape_inputs", "shape_attr_key", "shape", "shape_num_unknown_nodes", - [](const mxnet::TShape& s) { return !mxnet::op::shape_is_known(s); }, + [](const mxnet::TShape& s) { return !mxnet::shape_is_known(s); }, [](const mxnet::TShape& s) { if (s.ndim() == -1) { return static_cast(1); diff --git a/src/io/image_io.cc b/src/io/image_io.cc index 2196983928bb..965078cb2766 100644 --- a/src/io/image_io.cc +++ b/src/io/image_io.cc @@ -189,7 +189,7 @@ void Imdecode(const nnvm::NodeAttrs& attrs, size_t len = inputs[0].shape().Size(); CHECK(len > 0) << "Input cannot be an empty buffer"; - mxnet::TShape oshape(3); + mxnet::TShape oshape(3, 1); oshape[2] = param.flag == 0 ? 1 : 3; if (get_jpeg_size(str_img, len, &oshape[1], &oshape[0])) { } else if (get_png_size(str_img, len, &oshape[1], &oshape[0])) { @@ -229,7 +229,7 @@ void Imread(const nnvm::NodeAttrs& attrs, CHECK(file.good()) << "Failed reading image file: '" << param.filename << "' " << strerror(errno); - mxnet::TShape oshape(3); + mxnet::TShape oshape(3, 1); oshape[2] = param.flag == 0 ? 1 : 3; if (get_jpeg_size(buff.get(), fsize, &oshape[1], &oshape[0])) { } else if (get_png_size(buff.get(), fsize, &oshape[1], &oshape[0])) { diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index 377bef072b03..04518e0feb77 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -1650,7 +1650,7 @@ bool LegacyTShapeLoad(dmlc::Stream *strm, mxnet::TShape *shape, const uint32_t m default: // meet legacy mxnet::TShape, magic is ndim here uint32_t ndim = magic; - *shape = mxnet::TShape(ndim); + *shape = mxnet::TShape(ndim, -1); std::vector buffer(ndim); size_t nread = ndim * sizeof(uint32_t); if (strm->Read(buffer.data(), nread) != nread) return false; diff --git a/src/nnvm/plan_memory.cc b/src/nnvm/plan_memory.cc index 0dc7e6ddb1d9..41b8559d16c2 100644 --- a/src/nnvm/plan_memory.cc +++ b/src/nnvm/plan_memory.cc @@ -76,7 +76,7 @@ class GraphAllocator { // request a free storage StorageID Request(int dev_id, int dtype, mxnet::TShape shape, uint32_t node_id) { - if (!mxnet::op::shape_is_known(shape)) return kBadStorageID; + if (!mxnet::shape_is_known(shape)) return kBadStorageID; // search memory block in [size / match_range_, size * match_range_) // TODO(tqchen) add size of the dtype, assume 4 bytes for now size_t size = shape.Size() * 4; diff --git a/src/operator/batch_norm_v1-inl.h b/src/operator/batch_norm_v1-inl.h index f407a5cce61b..8016510090ab 100644 --- a/src/operator/batch_norm_v1-inl.h +++ b/src/operator/batch_norm_v1-inl.h @@ -261,7 +261,7 @@ class BatchNormV1Prop : public OperatorProperty { using namespace mshadow; CHECK_EQ(in_shape->size(), 3U) << "Input:[data, gamma, beta]"; const mxnet::TShape &dshape = in_shape->at(0); - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; in_shape->at(1) = mxnet::TShape(Shape1(dshape[1])); in_shape->at(2) = mxnet::TShape(Shape1(dshape[1])); out_shape->clear(); diff --git a/src/operator/bilinear_sampler-inl.h b/src/operator/bilinear_sampler-inl.h index 8b1ff38709b6..abb4a61dc84c 100644 --- a/src/operator/bilinear_sampler-inl.h +++ b/src/operator/bilinear_sampler-inl.h @@ -149,10 +149,10 @@ class BilinearSamplerProp : public OperatorProperty { CHECK_EQ(in_shape->size(), 2U) << "Input:[data, grid]"; const mxnet::TShape &dshape = (*in_shape)[bs::kData]; const mxnet::TShape &lshape = (*in_shape)[bs::kGrid]; - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; CHECK_EQ(dshape.ndim(), 4U) \ << "input data should be 4D in batch-num_filter-y-x"; - if (lshape.ndim() == 0) return false; + if (!shape_is_known(lshape)) return false; CHECK_EQ(lshape.ndim(), 4U) \ << "Sampler grid should be 4D in batch-2-y-x"; CHECK_EQ(dshape[0], lshape[0]); diff --git a/src/operator/contrib/bounding_box-inl.h b/src/operator/contrib/bounding_box-inl.h index 059327ef8334..6ea4e8097b6c 100644 --- a/src/operator/contrib/bounding_box-inl.h +++ b/src/operator/contrib/bounding_box-inl.h @@ -558,7 +558,7 @@ inline bool BoxOverlapShape(const nnvm::NodeAttrs& attrs, << rdim << " provided"; // assign output shape - mxnet::TShape oshape(lshape.ndim() + rshape.ndim() - 2); + mxnet::TShape oshape(lshape.ndim() + rshape.ndim() - 2, -1); int idx = 0; for (index_t i = 0; i < lshape.ndim() - 1; ++i) { oshape[idx++] = lshape[i]; @@ -567,7 +567,7 @@ inline bool BoxOverlapShape(const nnvm::NodeAttrs& attrs, oshape[idx++] = rshape[i]; } SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); - return true; + return shape_is_known(oshape); } struct compute_overlap { @@ -671,14 +671,14 @@ inline bool MatchingShape(const nnvm::NodeAttrs& attrs, << dshape.ndim() << " provided"; // assign output shape - mxnet::TShape oshape(dshape.ndim() - 1); + mxnet::TShape oshape(dshape.ndim() - 1, -1); for (index_t i = 0; i < dshape.ndim() - 1; ++i) { oshape[i] = dshape[i]; } SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); oshape[oshape.ndim() - 1] = dshape[dshape.ndim() - 1]; SHAPE_ASSIGN_CHECK(*out_attrs, 1, oshape); - return true; + return shape_is_known(oshape); } struct bipartite_matching { diff --git a/src/operator/contrib/deformable_convolution-inl.h b/src/operator/contrib/deformable_convolution-inl.h index 3e96cad1c859..936df7f1bcd4 100644 --- a/src/operator/contrib/deformable_convolution-inl.h +++ b/src/operator/contrib/deformable_convolution-inl.h @@ -127,7 +127,7 @@ class DeformableConvolutionOp : public Operator { Tensor workspace = ctx.requested[conv::kTempSpace] .get_space_typed(Shape1(col_buffer_size_), s); // calculate the shape of col_buffer - mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1); + mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1, -1); col_buffer_shape[0] = conv_in_channels_ * param_.kernel.Size(); for (size_t i = 1; i < col_buffer_shape.ndim(); ++i) { col_buffer_shape[i] = out_data[0].shape_[i + 1]; @@ -189,7 +189,7 @@ class DeformableConvolutionOp : public Operator { Tensor workspace = ctx.requested[conv::kTempSpace] .get_space_typed(Shape1(col_buffer_size_), s); // calculate the shape of col_buffer - mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1); + mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1, -1); col_buffer_shape[0] = conv_in_channels_ * param_.kernel.Size(); for (index_t i = 1; i < col_buffer_shape.ndim(); ++i) { col_buffer_shape[i] = out_grad[conv::kData].shape_[i + 1]; diff --git a/src/operator/contrib/dgl_graph.cc b/src/operator/contrib/dgl_graph.cc index 02ef2cee1caa..313b855f0d2d 100644 --- a/src/operator/contrib/dgl_graph.cc +++ b/src/operator/contrib/dgl_graph.cc @@ -259,7 +259,7 @@ static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs, // Output bool success = true; - mxnet::TShape out_shape(1); + mxnet::TShape out_shape(1, -1); // We use the last element to store the actual // number of vertices in the subgraph. out_shape[0] = params.max_num_vertices + 1; @@ -268,7 +268,7 @@ static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs, success = success && !mxnet::op::shape_is_none(out_attrs->at(i)); } // sub_csr - mxnet::TShape out_csr_shape(2); + mxnet::TShape out_csr_shape(2, -1); out_csr_shape[0] = params.max_num_vertices; out_csr_shape[1] = in_attrs->at(0)[1]; for (size_t i = 0; i < num_subgraphs; i++) { @@ -276,7 +276,7 @@ static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs, success = success && !mxnet::op::shape_is_none(out_attrs->at(i + num_subgraphs)); } // sub_layer - mxnet::TShape out_layer_shape(1); + mxnet::TShape out_layer_shape(1, -1); out_layer_shape[0] = params.max_num_vertices; for (size_t i = 0; i < num_subgraphs; i++) { SHAPE_ASSIGN_CHECK(*out_attrs, i + 2*num_subgraphs, out_layer_shape); @@ -311,7 +311,7 @@ static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs, // Output bool success = true; - mxnet::TShape out_shape(1); + mxnet::TShape out_shape(1, -1); // We use the last element to store the actual // number of vertices in the subgraph. out_shape[0] = params.max_num_vertices + 1; @@ -320,7 +320,7 @@ static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs, success = success && !mxnet::op::shape_is_none(out_attrs->at(i)); } // sub_csr - mxnet::TShape out_csr_shape(2); + mxnet::TShape out_csr_shape(2, -1); out_csr_shape[0] = params.max_num_vertices; out_csr_shape[1] = in_attrs->at(0)[1]; for (size_t i = 0; i < num_subgraphs; i++) { @@ -328,14 +328,14 @@ static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs, success = success && !mxnet::op::shape_is_none(out_attrs->at(i + num_subgraphs)); } // sub_probability - mxnet::TShape out_prob_shape(1); + mxnet::TShape out_prob_shape(1, -1); out_prob_shape[0] = params.max_num_vertices; for (size_t i = 0; i < num_subgraphs; i++) { SHAPE_ASSIGN_CHECK(*out_attrs, i + 2*num_subgraphs, out_prob_shape); success = success && !mxnet::op::shape_is_none(out_attrs->at(i + 2 * num_subgraphs)); } // sub_layer - mxnet::TShape out_layer_shape(1); + mxnet::TShape out_layer_shape(1, -1); out_layer_shape[0] = params.max_num_vertices; for (size_t i = 0; i < num_subgraphs; i++) { SHAPE_ASSIGN_CHECK(*out_attrs, i + 3*num_subgraphs, out_prob_shape); @@ -665,8 +665,8 @@ static void SampleSubgraph(const NDArray &csr, } } // Construct sub_csr_graph - mxnet::TShape shape_1(1); - mxnet::TShape shape_2(1); + mxnet::TShape shape_1(1, -1); + mxnet::TShape shape_2(1, -1); shape_1[0] = num_edges; shape_2[0] = max_num_vertices+1; sub_csr.CheckAndAllocData(shape_1); @@ -946,13 +946,13 @@ static bool DGLSubgraphShape(const nnvm::NodeAttrs& attrs, size_t num_g = params.num_args - 1; for (size_t i = 0; i < num_g; i++) { - mxnet::TShape gshape(2); + mxnet::TShape gshape(2, -1); gshape[0] = in_attrs->at(i + 1)[0]; gshape[1] = in_attrs->at(i + 1)[0]; out_attrs->at(i) = gshape; } for (size_t i = num_g; i < out_attrs->size(); i++) { - mxnet::TShape gshape(2); + mxnet::TShape gshape(2, -1); gshape[0] = in_attrs->at(i - num_g + 1)[0]; gshape[1] = in_attrs->at(i - num_g + 1)[0]; out_attrs->at(i) = gshape; @@ -1067,9 +1067,9 @@ static void GetSubgraph(const NDArray &csr_arr, const NDArray &varr, row_idx[i + 1] = col_idx.size(); } - mxnet::TShape nz_shape(1); + mxnet::TShape nz_shape(1, -1); nz_shape[0] = col_idx.size(); - mxnet::TShape indptr_shape(1); + mxnet::TShape indptr_shape(1, -1); indptr_shape[0] = row_idx.size(); // Store the non-zeros in a subgraph with edge attributes of new edge ids. @@ -1446,9 +1446,9 @@ static void CompactSubgraph(const NDArray &csr, const NDArray &vids, CHECK_NE(row_ids[i], -1); } - mxnet::TShape nz_shape(1); + mxnet::TShape nz_shape(1, -1); nz_shape[0] = num_elems; - mxnet::TShape indptr_shape(1); + mxnet::TShape indptr_shape(1, -1); CHECK_EQ(out_csr.shape()[0], graph_size); indptr_shape[0] = graph_size + 1; CHECK_GE(in_ptr_data.shape_[0], indptr_shape[0]); @@ -1526,7 +1526,7 @@ static bool SubgraphCompactShape(const nnvm::NodeAttrs& attrs, } for (size_t i = 0; i < num_g; i++) { - mxnet::TShape gshape(2); + mxnet::TShape gshape(2, -1); gshape[0] = params.graph_sizes[i]; gshape[1] = params.graph_sizes[i]; out_attrs->at(i) = gshape; diff --git a/src/operator/contrib/multibox_detection-inl.h b/src/operator/contrib/multibox_detection-inl.h index 977126ad269d..1ac14e237f0d 100644 --- a/src/operator/contrib/multibox_detection-inl.h +++ b/src/operator/contrib/multibox_detection-inl.h @@ -161,7 +161,7 @@ class MultiBoxDetectionProp : public OperatorProperty { CHECK_EQ(cshape[2] * 4, lshape[1]) << "# anchors mismatch with # loc"; CHECK_GT(ashape[1], 0U) << "Number of anchors must > 0"; CHECK_EQ(ashape[2], 4U); - mxnet::TShape oshape = mxnet::TShape(3); + mxnet::TShape oshape = mxnet::TShape(3, -1); oshape[0] = cshape[0]; oshape[1] = ashape[1]; oshape[2] = 6; // [id, prob, xmin, ymin, xmax, ymax] diff --git a/src/operator/contrib/multibox_prior-inl.h b/src/operator/contrib/multibox_prior-inl.h index 3636a6016bd2..d8929f3deff4 100644 --- a/src/operator/contrib/multibox_prior-inl.h +++ b/src/operator/contrib/multibox_prior-inl.h @@ -180,7 +180,7 @@ class MultiBoxPriorProp: public OperatorProperty { int in_width = dshape[3]; CHECK_GT(in_width, 0) << "Input width should > 0"; // since input sizes are same in each batch, we could share MultiBoxPrior - mxnet::TShape oshape = mxnet::TShape(3); + mxnet::TShape oshape = mxnet::TShape(3, -1); int num_sizes = param_.sizes.ndim(); int num_ratios = param_.ratios.ndim(); oshape[0] = 1; @@ -189,7 +189,7 @@ class MultiBoxPriorProp: public OperatorProperty { out_shape->clear(); out_shape->push_back(oshape); CHECK_EQ(param_.steps.ndim(), 2) << "Step ndim must be 2: (step_y, step_x)"; - return true; + return shape_is_known(oshape); } OperatorProperty* Copy() const override { diff --git a/src/operator/control_flow.cc b/src/operator/control_flow.cc index ac6fea7c143b..9ba3b5471c60 100644 --- a/src/operator/control_flow.cc +++ b/src/operator/control_flow.cc @@ -37,11 +37,11 @@ struct ForeachParam : public dmlc::Parameter { int num_outputs; int num_out_data; // The location of states in the subgraph inputs. - nnvm::Tuple in_state_locs; + mxnet::Tuple in_state_locs; // The location of data arrays in the subgraph inputs. - nnvm::Tuple in_data_locs; + mxnet::Tuple in_data_locs; // The location of remaining arrays in the subgraph inputs. - nnvm::Tuple remain_locs; + mxnet::Tuple remain_locs; DMLC_DECLARE_PARAMETER(ForeachParam) { DMLC_DECLARE_FIELD(num_args).set_lower_bound(1) .describe("Number of inputs."); @@ -82,7 +82,7 @@ static void ForeachComputeExCPU(const OpStatePtr& state_ptr, CHECK_GT(params.in_data_locs.ndim(), 0); size_t len = inputs[0].shape()[iter_dim]; state.num_iterations = len; - for (size_t i = 1; i < params.in_data_locs.ndim(); i++) + for (int i = 1; i < params.in_data_locs.ndim(); i++) CHECK_EQ(inputs[i].shape()[iter_dim], len); for (size_t i = 0; i < (size_t) params.num_out_data; i++) CHECK_EQ(len, outputs[i].shape()[iter_dim]); @@ -120,7 +120,7 @@ static void ForeachComputeExCPU(const OpStatePtr& state_ptr, // and the loop states. std::vector subg_inputs(inputs.size()); // The remaining arrays (other than input data and states) only need to be set once. - for (size_t j = 0; j < params.remain_locs.ndim(); j++) { + for (int j = 0; j < params.remain_locs.ndim(); j++) { CHECK_LT(params.remain_locs[j], subg_inputs.size()); subg_inputs[params.remain_locs[j]] = inputs[j + params.in_data_locs.ndim() + params.in_state_locs.ndim()]; @@ -148,7 +148,7 @@ static void ForeachComputeExCPU(const OpStatePtr& state_ptr, // Initialize inputs for the subgraph. // Get a slice from the input data arrays. - for (size_t j = 0; j < params.in_data_locs.ndim(); j++) { + for (int j = 0; j < params.in_data_locs.ndim(); j++) { size_t loc = params.in_data_locs[j]; subg_inputs[loc] = inputs[j].At(i); } @@ -161,7 +161,7 @@ static void ForeachComputeExCPU(const OpStatePtr& state_ptr, subg_inputs[params.in_state_locs[idx]] = (*subg_out_prev)[j]; } } else { - for (size_t j = 0; j < params.in_state_locs.ndim(); j++) { + for (int j = 0; j < params.in_state_locs.ndim(); j++) { CHECK_LT(params.in_state_locs[j], subg_inputs.size()); subg_inputs[params.in_state_locs[j]] = inputs[j + params.in_data_locs.ndim()]; } @@ -203,7 +203,7 @@ static void ForeachGradComputeExCPU(const OpStatePtr& state_ptr, // [data vars], [loop vars], [remaining vars] // [remaining vars] - for (size_t i = 0; i < params.remain_locs.ndim(); i++) { + for (int i = 0; i < params.remain_locs.ndim(); i++) { size_t loc = params.remain_locs[i]; size_t orig_loc = i + params.in_data_locs.ndim() + params.in_state_locs.ndim(); subg_igrads[loc] = outputs[orig_loc]; @@ -216,20 +216,20 @@ static void ForeachGradComputeExCPU(const OpStatePtr& state_ptr, if (iter_num < len - 1) { // For the rest of the iterations, we should add graidents to the // remaining vars. - for (size_t i = 0; i < params.remain_locs.ndim(); i++) { + for (int i = 0; i < params.remain_locs.ndim(); i++) { size_t loc = params.remain_locs[i]; subg_req[loc] = kAddTo; } } // [data vars] - for (size_t i = 0; i < params.in_data_locs.ndim(); i++) { + for (int i = 0; i < params.in_data_locs.ndim(); i++) { size_t loc = params.in_data_locs[i]; subg_igrads[loc] = outputs[i].At(iter_num); subg_req[loc] = req[i]; } // [loop vars] - for (size_t i = 0; i < params.in_state_locs.ndim(); i++) { + for (int i = 0; i < params.in_state_locs.ndim(); i++) { size_t loc = params.in_state_locs[i]; const NDArray &output = outputs[i + params.in_data_locs.ndim()]; if (iter_num != 0) { @@ -258,9 +258,9 @@ static void ForeachGradComputeExCPU(const OpStatePtr& state_ptr, template static void remap(const std::vector &op_in, size_t start, - const nnvm::Tuple &locs, std::vector *subg_in) { + const mxnet::Tuple &locs, std::vector *subg_in) { auto op_in_it = op_in.begin() + start; - for (size_t i = 0; i < locs.ndim(); i++) { + for (int i = 0; i < locs.ndim(); i++) { dim_t loc = locs[i]; subg_in->at(loc) = *(op_in_it + i); } @@ -284,7 +284,7 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector subg_in_shape(in_shape->size()); // data shape std::vector data_1d(params.in_data_locs.ndim(), false); - for (size_t i = 0; i < params.in_data_locs.ndim(); i++) { + for (int i = 0; i < params.in_data_locs.ndim(); i++) { size_t loc = params.in_data_locs[i]; if (in_shape->at(i).ndim() == 1) data_1d[i] = true; @@ -301,7 +301,7 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs, for (int i = 0; i < params.num_out_data; i++) { mxnet::TShape shape = subg_out_shape[i]; // If we don't have shape info, we don't need to do anything. - if (shape.ndim() == 0) + if (!shape_is_known(shape)) continue; subg_out_shape[i] = SliceFirstDim(shape); } @@ -317,12 +317,12 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs, for (int i = 0; i < params.num_out_data; i++) { // If the output shape isn't inferred, we don't need to propogate the info. const auto& g_out_shape = subg_out_shape[i]; - if (g_out_shape.ndim() == 0) + if (!shape_is_known(g_out_shape)) continue; - auto out = mxnet::TShape(g_out_shape.ndim() + 1); + auto out = mxnet::TShape(g_out_shape.ndim() + 1, -1); out[0] = len; - for (size_t i = 1; i < out.ndim(); i++) + for (int i = 1; i < out.ndim(); i++) out[i] = g_out_shape[i - 1]; SHAPE_ASSIGN_CHECK(*out_shape, i, out); } @@ -331,34 +331,34 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs, SHAPE_ASSIGN_CHECK(*out_shape, i, subg_out_shape[i]); // For the shape of input data. - for (size_t i = 0; i < params.in_data_locs.ndim(); i++) { + for (int i = 0; i < params.in_data_locs.ndim(); i++) { size_t loc = params.in_data_locs[i]; const auto &shape = subg_in_shape[loc]; // If the input data shape isn't inferred, we don't need to propogate the // info. - if (shape.ndim() == 0) + if (!shape_is_known(shape)) continue; if (data_1d[i]) { - mxnet::TShape s(1); + mxnet::TShape s(1, -1); s[0] = len; SHAPE_ASSIGN_CHECK(*in_shape, i, s); } else { - auto in = mxnet::TShape(shape.ndim() + 1); + auto in = mxnet::TShape(shape.ndim() + 1, -1); in[0] = len; - for (size_t i = 1; i < in.ndim(); i++) + for (int i = 1; i < in.ndim(); i++) in[i] = shape[i - 1]; SHAPE_ASSIGN_CHECK(*in_shape, i, in); } } // For the shape of state. - for (size_t i = 0; i < params.in_state_locs.ndim(); i++) { + for (int i = 0; i < params.in_state_locs.ndim(); i++) { size_t loc = params.in_state_locs[i]; SHAPE_ASSIGN_CHECK(*in_shape, i + params.in_data_locs.ndim(), subg_in_shape[loc]); } // For the shape of remaining data. - for (size_t i = 0; i < params.remain_locs.ndim(); i++) { + for (int i = 0; i < params.remain_locs.ndim(); i++) { size_t loc = params.remain_locs[i]; SHAPE_ASSIGN_CHECK(*in_shape, i + params.in_data_locs.ndim() + params.in_state_locs.ndim(), @@ -387,15 +387,15 @@ static bool ForeachType(const nnvm::NodeAttrs& attrs, remap(*in_type, params.in_data_locs.ndim() + params.in_state_locs.ndim(), params.remain_locs, &subg_in_type); bool success = InferSubgraphDataType(*attrs.subgraphs[0], &subg_in_type, out_type); - for (size_t i = 0; i < params.in_data_locs.ndim(); i++) { + for (int i = 0; i < params.in_data_locs.ndim(); i++) { size_t loc = params.in_data_locs[i]; TYPE_ASSIGN_CHECK(*in_type, i, subg_in_type[loc]); } - for (size_t i = 0; i < params.in_state_locs.ndim(); i++) { + for (int i = 0; i < params.in_state_locs.ndim(); i++) { size_t loc = params.in_state_locs[i]; TYPE_ASSIGN_CHECK(*in_type, i + params.in_data_locs.ndim(), subg_in_type[loc]); } - for (size_t i = 0; i < params.remain_locs.ndim(); i++) { + for (int i = 0; i < params.remain_locs.ndim(); i++) { size_t loc = params.remain_locs[i]; TYPE_ASSIGN_CHECK(*in_type, i + params.in_data_locs.ndim() + params.in_state_locs.ndim(), subg_in_type[loc]); @@ -418,16 +418,16 @@ static bool ForeachStorageType(const nnvm::NodeAttrs& attrs, params.remain_locs, &subg_in_attrs); bool success = InferSubgraphStorage(*attrs.subgraphs[0], dev_mask, dispatch_mode, &subg_in_attrs, out_attrs); - for (size_t i = 0; i < params.in_data_locs.ndim(); i++) { + for (int i = 0; i < params.in_data_locs.ndim(); i++) { size_t loc = params.in_data_locs[i]; STORAGE_TYPE_ASSIGN_CHECK(*in_attrs, i, subg_in_attrs[loc]); } - for (size_t i = 0; i < params.in_state_locs.ndim(); i++) { + for (int i = 0; i < params.in_state_locs.ndim(); i++) { size_t loc = params.in_state_locs[i]; STORAGE_TYPE_ASSIGN_CHECK(*in_attrs, i + params.in_data_locs.ndim(), subg_in_attrs[loc]); } - for (size_t i = 0; i < params.remain_locs.ndim(); i++) { + for (int i = 0; i < params.remain_locs.ndim(); i++) { size_t loc = params.remain_locs[i]; STORAGE_TYPE_ASSIGN_CHECK(*in_attrs, i + params.in_data_locs.ndim() + params.in_state_locs.ndim(), @@ -488,9 +488,9 @@ struct WhileLoopParam : public dmlc::Parameter { // `cond_input_locs' contains indices of inputs fed to `cond', and // `func_input_locs' contains indices of inputs fed to `func'. // `func_var_locs' are indices in which input "variables" are stored in func's inputs. - nnvm::Tuple cond_input_locs; - nnvm::Tuple func_input_locs; - nnvm::Tuple func_var_locs; + mxnet::Tuple cond_input_locs; + mxnet::Tuple func_input_locs; + mxnet::Tuple func_var_locs; DMLC_DECLARE_PARAMETER(WhileLoopParam) { DMLC_DECLARE_FIELD(num_args).set_lower_bound(2) .describe("Number of input arguments, including cond and func as two symbol inputs."); @@ -538,12 +538,12 @@ class WhileLoopState: public LoopState { n_iterations(0U), cond_op(LoopState::MakeSharedOp(cond)), oi_map(params.func_var_locs.ndim(), -1) { - const nnvm::Tuple &func_input_locs = params.func_input_locs; - const nnvm::Tuple &func_var_locs = params.func_var_locs; - const nnvm::Tuple &cond_input_locs = params.cond_input_locs; - for (size_t i = 0; i < func_var_locs.ndim(); ++i) { + const mxnet::Tuple &func_input_locs = params.func_input_locs; + const mxnet::Tuple &func_var_locs = params.func_var_locs; + const mxnet::Tuple &cond_input_locs = params.cond_input_locs; + for (int i = 0; i < func_var_locs.ndim(); ++i) { dim_t pos_i = func_input_locs[func_var_locs[i]]; - for (size_t j = 0; j < cond_input_locs.ndim(); ++j) { + for (int j = 0; j < cond_input_locs.ndim(); ++j) { dim_t pos_j = cond_input_locs[j]; if (pos_i == pos_j) { this->oi_map[i] = j; @@ -740,7 +740,7 @@ static bool WhileLoopShape(const nnvm::NodeAttrs& attrs, // infer shape for cond and func auto infer_subg = [¶ms, in_shape, out_shape](std::shared_ptr subg, ShapeVector *_subg_out, - const nnvm::Tuple &input_locs, + const mxnet::Tuple &input_locs, int num_out_data, bool fill_out_shape) { // create subg_in @@ -781,7 +781,7 @@ static bool WhileLoopShape(const nnvm::NodeAttrs& attrs, for (size_t i = 0; i < subg_in.size(); ++i) { auto eid = idx.entry_id(input_nids[i], 0); auto g_out_shape = new_shapes[eid]; - if (g_out_shape.ndim() == 0 || g_out_shape.Size() == 0) { + if (!shape_is_known(g_out_shape)) { // when the shape is not fully inferred continue; } @@ -795,13 +795,13 @@ static bool WhileLoopShape(const nnvm::NodeAttrs& attrs, for (int i = 0; i < num_out_data; ++i) { auto eid = idx.entry_id(g.outputs[i]); auto g_out_shape = new_shapes[eid]; - if (g_out_shape.ndim() == 0 || g_out_shape.Size() == 0) { + if (!shape_is_known(g_out_shape)) { // when the shape is not fully inferred continue; } - auto out = mxnet::TShape(g_out_shape.ndim() + 1); + auto out = mxnet::TShape(g_out_shape.ndim() + 1, -1); out[0] = params.max_iterations; - for (size_t i = 1; i < out.ndim(); i++) + for (int i = 1; i < out.ndim(); i++) out[i] = g_out_shape[i - 1]; SHAPE_ASSIGN_CHECK(*out_shape, i, out); } @@ -809,7 +809,7 @@ static bool WhileLoopShape(const nnvm::NodeAttrs& attrs, for (size_t i = num_out_data; i < g.outputs.size(); ++i) { auto eid = idx.entry_id(g.outputs[i]); auto g_out_shape = new_shapes[eid]; - if (g_out_shape.ndim() == 0 || g_out_shape.Size() == 0) { + if (!shape_is_known(g_out_shape)) { // when the shape is not fully inferred continue; } @@ -817,7 +817,7 @@ static bool WhileLoopShape(const nnvm::NodeAttrs& attrs, } return g.GetAttr("shape_num_unknown_nodes") == 0; }; - mxnet::ShapeVector cond_out_shape{mxnet::TShape(1U)}; // this means: [(1, )] + mxnet::ShapeVector cond_out_shape{mxnet::TShape(1, 1)}; // this means: [(1, )] mxnet::ShapeVector func_out_shape(params.num_outputs); CHECK(params.sync_in_out(in_shape, out_shape, is_udf)); bool succ_0 = infer_subg(attrs.subgraphs[0], &cond_out_shape, params.cond_input_locs, 0, false); @@ -915,9 +915,9 @@ WhileLoopGradient(const nnvm::NodePtr& n, const std::vector& og struct CondParam : public dmlc::Parameter { int num_args; int num_outputs; - nnvm::Tuple cond_input_locs; - nnvm::Tuple then_input_locs; - nnvm::Tuple else_input_locs; + mxnet::Tuple cond_input_locs; + mxnet::Tuple then_input_locs; + mxnet::Tuple else_input_locs; DMLC_DECLARE_PARAMETER(CondParam) { DMLC_DECLARE_FIELD(num_args).set_lower_bound(3) .describe("Number of input arguments, including cond, then and else as three symbol inputs."); @@ -992,7 +992,7 @@ static void CondComputeExCPU(const OpStatePtr& state_ptr, state.cond_op->Forward(nullptr, cond_input_ptr, cond_output_ptr); branch_selection = as_bool_scalar(*cond_output_ptr[0]); // select the right branch - const nnvm::Tuple &func_input_locs = branch_selection + const mxnet::Tuple &func_input_locs = branch_selection ? params.then_input_locs : params.else_input_locs; LoopState &loop_state = branch_selection @@ -1017,7 +1017,7 @@ static void CondGradComputeExCPU(const OpStatePtr& state_ptr, // select the right branch int branch_selection = state.branch_selection; CHECK_NE(branch_selection, -1); - const nnvm::Tuple &func_input_locs = branch_selection + const mxnet::Tuple &func_input_locs = branch_selection ? params.then_input_locs : params.else_input_locs; LoopState &loop_state = branch_selection @@ -1048,7 +1048,7 @@ static bool CondShape(const nnvm::NodeAttrs& attrs, // infer shape for cond, then and else auto infer_subg = [¶ms, in_shape, out_shape](std::shared_ptr subg, ShapeVector *_subg_out, - const nnvm::Tuple &input_locs, + const mxnet::Tuple &input_locs, bool fill_out_shape) { // create subg_in mxnet::ShapeVector subg_in; @@ -1086,7 +1086,7 @@ static bool CondShape(const nnvm::NodeAttrs& attrs, for (size_t i = 0; i < subg_in.size(); ++i) { auto eid = idx.entry_id(input_nids[i], 0); auto g_out_shape = new_shapes[eid]; - if (g_out_shape.ndim() == 0 || g_out_shape.Size() == 0) { + if (!shape_is_known(g_out_shape)) { // when the shape is not fully inferred continue; } @@ -1099,7 +1099,7 @@ static bool CondShape(const nnvm::NodeAttrs& attrs, for (size_t i = 0; i < g.outputs.size(); ++i) { auto eid = idx.entry_id(g.outputs[i]); auto g_out_shape = new_shapes[eid]; - if (g_out_shape.ndim() == 0 || g_out_shape.Size() == 0) { + if (!shape_is_known(g_out_shape)) { // when the shape is not fully inferred continue; } @@ -1107,7 +1107,7 @@ static bool CondShape(const nnvm::NodeAttrs& attrs, } return g.GetAttr("shape_num_unknown_nodes") == 0; }; - ShapeVector cond_out_shape{mxnet::TShape(1U)}; // this means: [(1, )] + ShapeVector cond_out_shape{mxnet::TShape(1, 1)}; // this means: [(1, )] ShapeVector then_out_shape(params.num_outputs); ShapeVector else_out_shape(params.num_outputs); bool succ_0 = infer_subg(attrs.subgraphs[0], &cond_out_shape, \ @@ -1190,7 +1190,7 @@ static bool BackwardCondStorageType(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size() + 3U, (size_t) params.num_args); CHECK_EQ(attrs.subgraphs.size(), 3U); static const std::function is_udf = is_stype_udf; - auto sub_pass = [&](const std::shared_ptr &subg, const nnvm::Tuple &input_locs) { + auto sub_pass = [&](const std::shared_ptr &subg, const mxnet::Tuple &input_locs) { // A. first construct subg_in_attrs // need subg_in_attrs as subg_bwd_out (copy), subg_fwd_in (extract), subg_fwd_out (copy) std::vector subg_in_attrs; diff --git a/src/operator/convolution_v1-inl.h b/src/operator/convolution_v1-inl.h index ed6748a9c85c..0d6ffd7e895e 100644 --- a/src/operator/convolution_v1-inl.h +++ b/src/operator/convolution_v1-inl.h @@ -64,11 +64,11 @@ struct ConvolutionV1Param : public dmlc::Parameter { dmlc::optional layout; DMLC_DECLARE_PARAMETER(ConvolutionV1Param) { DMLC_DECLARE_FIELD(kernel).describe("convolution kernel size: (h, w) or (d, h, w)"); - DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, 0)) .describe("convolution stride: (h, w) or (d, h, w)"); - DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0, 0)) .describe("convolution dilate: (h, w) or (d, h, w)"); - DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, 0)) .describe("pad for convolution: (h, w) or (d, h, w)"); DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000) .describe("convolution filter(channel) number"); @@ -405,7 +405,7 @@ class ConvolutionV1Prop : public OperatorProperty { // CHECK_EQ(out_shape->size(), 1) << "Output: [output]"; out_shape->resize(1, mxnet::TShape()); const mxnet::TShape &dshp = (*in_shape)[conv_v1::kData]; - if (dshp.ndim() == 0) return false; + if (!shape_is_known(dshp)) return false; if (param_.kernel.ndim() == 2) { // 2d conv_v1 CHECK_EQ(dshp.ndim(), 4U) \ diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h index c37324678120..182cd682af8b 100644 --- a/src/operator/image/image_random-inl.h +++ b/src/operator/image/image_random-inl.h @@ -93,7 +93,7 @@ inline bool ToTensorShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1U); mxnet::TShape &shp = (*in_attrs)[0]; - if (!shp.ndim()) return false; + if (!shape_is_known(shp)) return false; CHECK((shp.ndim() == 3) || (shp.ndim() == 4)) << "Input image must have shape (height, width, channels), or " @@ -549,7 +549,7 @@ template void FlipImpl(const mxnet::TShape &shape, DType *src, DType *dst) { int head = 1, mid = shape[axis], tail = 1; for (int i = 0; i < axis; ++i) head *= shape[i]; - for (uint32_t i = axis+1; i < shape.ndim(); ++i) tail *= shape[i]; + for (int i = axis+1; i < shape.ndim(); ++i) tail *= shape[i]; for (int i = 0; i < head; ++i) { for (int j = 0; j < (mid >> 1); ++j) { diff --git a/src/operator/image/resize-inl.h b/src/operator/image/resize-inl.h index de2189838d76..4ebebbfb272c 100644 --- a/src/operator/image/resize-inl.h +++ b/src/operator/image/resize-inl.h @@ -49,12 +49,12 @@ void ResizeImplCUDA(Stream *s, #endif // MXNET_USE_CUDA struct ResizeParam : public dmlc::Parameter { - nnvm::Tuple size; + mxnet::Tuple size; bool keep_ratio; int interp; DMLC_DECLARE_PARAMETER(ResizeParam) { DMLC_DECLARE_FIELD(size) - .set_default(nnvm::Tuple()) + .set_default(mxnet::Tuple()) .describe("Size of new image. Could be (width, height) or (size)"); DMLC_DECLARE_FIELD(keep_ratio) .describe("Whether to resize the short edge or both edges to `size`, " diff --git a/src/operator/leaky_relu-inl.h b/src/operator/leaky_relu-inl.h index cfdd1064d6fb..aef990010818 100644 --- a/src/operator/leaky_relu-inl.h +++ b/src/operator/leaky_relu-inl.h @@ -315,7 +315,7 @@ class LeakyReLUOp : public Operator { return a < b ? (a < c ? a : c) : (b < c ? b : c); } static inline mxnet::TShape expand_shape(const mxnet::TShape& src, const mxnet::TShape& dst) { - mxnet::TShape result(dst.ndim()); + mxnet::TShape result(dst.ndim(), -1); int s = src.ndim() - 1; for (int i = dst.ndim() - 1; i >= 0; i--) { if (s >= 0 && i <= 1 && (dst[i] == src[s] || src[s] == 1)) { diff --git a/src/operator/loss_binary_op-inl.h b/src/operator/loss_binary_op-inl.h index a3853c56359a..1d71993da515 100644 --- a/src/operator/loss_binary_op-inl.h +++ b/src/operator/loss_binary_op-inl.h @@ -43,7 +43,7 @@ inline bool SoftmaxCrossEntropyShape(const nnvm::NodeAttrs& attrs, << "SoftmaxCrossEntropy only accept 1D label"; CHECK_EQ((*in_attrs)[0][0], (*in_attrs)[1][0]) << "SoftmaxCrossEntropy: data label shape mismatch"; - SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1)); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, 1)); return true; } diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc index 511fe455e946..590b1b428023 100644 --- a/src/operator/nn/batch_norm.cc +++ b/src/operator/nn/batch_norm.cc @@ -332,7 +332,7 @@ static bool BatchNormShape(const nnvm::NodeAttrs& attrs, const int channelCount = dshape[channelAxis]; - if (dshape.ndim() == 0) { + if (!shape_is_known(dshape)) { return false; } diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc index fa441c45321e..5435bd815334 100644 --- a/src/operator/nn/concat.cc +++ b/src/operator/nn/concat.cc @@ -39,39 +39,40 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, const ConcatParam& param_ = nnvm::get(attrs.parsed); CHECK_EQ(in_shape->size(), static_cast(param_.num_args)); mxnet::TShape dshape; - index_t size = 0; - bool has_zero = false; + dim_t size = 0; + bool has_unknown_dim_size = false; int axis = -1; for (int i = 0; i < param_.num_args; ++i) { mxnet::TShape tmp = (*in_shape)[i]; - if (tmp.ndim()) { + if (tmp.ndim() > 0) { axis = CheckAxis(param_.dim, tmp.ndim()); - has_zero = tmp[axis] == 0 || has_zero; + has_unknown_dim_size = tmp[axis] == -1 || has_unknown_dim_size; size += tmp[axis]; - tmp[axis] = 0; + tmp[axis] = -1; shape_assign(&dshape, tmp); } } mxnet::TShape tmp = (*out_shape)[0]; - if (tmp.ndim()) { + if (tmp.ndim() > 0) { axis = CheckAxis(param_.dim, tmp.ndim()); - tmp[axis] = 0; + tmp[axis] = -1; shape_assign(&dshape, tmp); } - if (dshape.ndim() == 0) return false; + if (dshape.ndim() == -1) return false; + CHECK_NE(dshape.ndim(), 0) << "zero-dimensional arrays cannot be concatenated"; for (int i = 0; i < param_.num_args; ++i) { CHECK(shape_assign(&(*in_shape)[i], dshape)) << "Incompatible input shape: expected " << dshape << ", got " << (*in_shape)[i]; } - if (!has_zero) dshape[axis] = size; + if (!has_unknown_dim_size) dshape[axis] = size; CHECK(shape_assign(&(*out_shape)[0], dshape)) << "Incompatible output shape: expected " << dshape << ", got " << (*out_shape)[0]; - return dshape.Size() != 0; + return shape_is_known(dshape); } // Concat for RNN param deals with the reverse shape inference from output @@ -109,7 +110,7 @@ static bool RNNParamConcatShape(const nnvm::NodeAttrs& attrs, shape_assign(&dshape, tmp); } - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; for (int i = 0; i < param_.num_args; ++i) { CHECK(shape_assign(&(*in_shape)[i], dshape)) @@ -232,7 +233,7 @@ bool SupportMKLDNNConcat(const std::vector &arrs) { for (auto &arr : arrs) { if (arr.IsView()) return false; if (arr.dtype() != mshadow::kFloat32) return false; - unsigned ndim = arr.shape().ndim(); + int ndim = arr.shape().ndim(); unsigned mkldnn_ndims = static_cast(arr.GetMKLDNNData()->get_primitive_desc().desc().data.ndims); if (!(ndim == 2 || ndim == 4) || ndim != mkldnn_ndims) return false; diff --git a/src/operator/nn/convolution-inl.h b/src/operator/nn/convolution-inl.h index 7ae34ae363b4..7d5f7c7d5757 100644 --- a/src/operator/nn/convolution-inl.h +++ b/src/operator/nn/convolution-inl.h @@ -69,11 +69,11 @@ struct ConvolutionParam : public dmlc::Parameter { dmlc::optional layout; DMLC_DECLARE_PARAMETER(ConvolutionParam) { DMLC_DECLARE_FIELD(kernel).describe("Convolution kernel size: (w,), (h, w) or (d, h, w)"); - DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, 0)) .describe("Convolution stride: (w,), (h, w) or (d, h, w). Defaults to 1 for each dimension."); - DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0, 0)) .describe("Convolution dilate: (w,), (h, w) or (d, h, w). Defaults to 1 for each dimension."); - DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, 0)) .describe("Zero pad for convolution: (w,), (h, w) or (d, h, w). Defaults to no padding."); DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000) .describe("Convolution filter(channel) number"); @@ -209,9 +209,9 @@ class ConvolutionOp { Tensor workspace = ctx.requested[conv::kTempSpace] .get_space_typed(Shape1(col_buffer_size_), s); // calculate the shape of col_buffer - mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1); + mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1, 1); col_buffer_shape[0] = conv_in_channels_ * param_.kernel.Size(); - for (index_t i = 1; i < col_buffer_shape.ndim(); ++i) { + for (int i = 1; i < col_buffer_shape.ndim(); ++i) { col_buffer_shape[i] = out_data[0].shape_[i+1]; } // create a column buffer using workspace and col_buffer_shape @@ -295,9 +295,9 @@ class ConvolutionOp { Tensor workspace = ctx.requested[conv::kTempSpace] .get_space_typed(Shape1(col_buffer_size_), s); // calculate the shape of col_buffer - mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1); + mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1, 1); col_buffer_shape[0] = conv_in_channels_ * param_.kernel.Size(); - for (index_t i = 1; i < col_buffer_shape.ndim(); ++i) { + for (int i = 1; i < col_buffer_shape.ndim(); ++i) { col_buffer_shape[i] = out_grad[conv::kData].shape_[i+1]; } // create a column buffer using workspace and col_buffer_shape @@ -342,10 +342,10 @@ class ConvolutionOp { void LayerSetUp(const mxnet::TShape& ishape, const mxnet::TShape& oshape) { channel_axis_ = 1; // hard code channel axis const index_t first_spatial_axis = channel_axis_ + 1; - const index_t num_axes = param_.kernel.ndim() + 2; + const int num_axes = param_.kernel.ndim() + 2; num_spatial_axes_ = num_axes - first_spatial_axis; is_1x1_ = true; - for (index_t i = 0; i < param_.kernel.ndim(); ++i) { + for (int i = 0; i < param_.kernel.ndim(); ++i) { is_1x1_ &= param_.kernel[i] == 1 && param_.stride[i] == 1 && param_.pad[i] == 0; if (!is_1x1_) break; } diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc index 527a0073930f..dfbc89de7b0e 100644 --- a/src/operator/nn/convolution.cc +++ b/src/operator/nn/convolution.cc @@ -96,7 +96,7 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, // CHECK_EQ(out_shape->size(), 1) << "Output: [output]"; out_shape->resize(1, mxnet::TShape()); const mxnet::TShape &dshp = (*in_shape)[conv::kData]; - if (dshp.ndim() == 0) return false; + if (!shape_is_known(dshp)) return false; if (param_.kernel.ndim() == 1) { // 1d conv diff --git a/src/operator/nn/ctc_loss-inl.h b/src/operator/nn/ctc_loss-inl.h index 357888dc30f1..8c841dfc24b4 100644 --- a/src/operator/nn/ctc_loss-inl.h +++ b/src/operator/nn/ctc_loss-inl.h @@ -239,7 +239,7 @@ inline bool CTCLossOpShape(const nnvm::NodeAttrs &attrs, "the maximum sequence length of the " "data."; - mxnet::TShape oshape(1); + mxnet::TShape oshape(1, -1); oshape[0] = dshape[1]; // batch size SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); // forward output SHAPE_ASSIGN_CHECK(*out_attrs, 1, dshape); // grad output diff --git a/src/operator/nn/cudnn/cudnn_batch_norm.cc b/src/operator/nn/cudnn/cudnn_batch_norm.cc index 5632028dd769..1df888e4b38a 100644 --- a/src/operator/nn/cudnn/cudnn_batch_norm.cc +++ b/src/operator/nn/cudnn/cudnn_batch_norm.cc @@ -37,7 +37,7 @@ static bool BatchNormShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *in_ using namespace mshadow; CHECK_EQ(in_shape->size(), 5U) << "Input:[data, gamma, beta, moving_mean, moving_var]"; const mxnet::TShape &dshape = in_shape->at(0); - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; in_shape->at(1) = mxnet::TShape(Shape1(dshape[1])); in_shape->at(2) = mxnet::TShape(Shape1(dshape[1])); in_shape->at(3) = mxnet::TShape(Shape1(dshape[1])); diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index 55b263896339..44d1c3c36e99 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -1015,9 +1015,9 @@ class CuDNNConvolutionOp { // e.g. {shape[0], shape[1], shape[2]} -> {shape[1]*shape[2], shape[2], 1} template inline Shape Strides(const mxnet::TShape &s) { - uint32_t ndim = s.ndim(); + int ndim = s.ndim(); mxnet::TShape strides(ndim); - for (uint32_t i = 0; i != ndim; ++i) + for (int i = 0; i != ndim; ++i) strides[i] = s.ProdShape(i+1, ndim); return strides.get(); } diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index 47f688c8ab9c..f652dd85bd41 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -933,9 +933,9 @@ class CuDNNDeconvolutionOp { // e.g. {shape[0], shape[1], shape[2]} -> {shape[1]*shape[2], shape[2], 1} template inline Shape Strides(const mxnet::TShape &s) { - uint32_t ndim = s.ndim(); + int ndim = s.ndim(); mxnet::TShape strides(ndim); - for (uint32_t i = 0; i != ndim; ++i) + for (int i = 0; i != ndim; ++i) strides[i] = s.ProdShape(i+1, ndim); return strides.get(); } diff --git a/src/operator/nn/deconvolution-inl.h b/src/operator/nn/deconvolution-inl.h index 5248c1211ac7..b28e47818392 100644 --- a/src/operator/nn/deconvolution-inl.h +++ b/src/operator/nn/deconvolution-inl.h @@ -65,13 +65,13 @@ struct DeconvolutionParam : public dmlc::Parameter { DMLC_DECLARE_PARAMETER(DeconvolutionParam) { DMLC_DECLARE_FIELD(kernel).describe("Deconvolution kernel size: (w,), (h, w) or (d, h, w). " "This is same as the kernel size used for the corresponding convolution"); - DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, 0)) .describe("The stride used for the corresponding convolution: (w,), (h, w) or (d, h, w). " "Defaults to 1 for each dimension."); - DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0, 0)) .describe("Dilation factor for each dimension of the input: (w,), (h, w) or (d, h, w). " "Defaults to 1 for each dimension."); - DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, 0)) .describe("The amount of implicit zero padding added during convolution for each " "dimension of the input: " "(w,), (h, w) or (d, h, w). " @@ -79,11 +79,11 @@ struct DeconvolutionParam : public dmlc::Parameter { "If `target_shape` is set, " "`pad` will be ignored and a padding that will generate the target shape " "will be used. Defaults to no padding."); - DMLC_DECLARE_FIELD(adj).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(adj).set_default(mxnet::TShape(0, 0)) .describe("Adjustment for output shape: (w,), (h, w) or (d, h, w). " "If `target_shape` is set, " "`adj` will be ignored and computed accordingly."); - DMLC_DECLARE_FIELD(target_shape).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(target_shape).set_default(mxnet::TShape(0, 0)) .describe("Shape of the output tensor: (w,), (h, w) or (d, h, w)."); DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000) .describe("Number of output filters."); diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc index 27928b9b41c3..d8c91f7f96c8 100644 --- a/src/operator/nn/deconvolution.cc +++ b/src/operator/nn/deconvolution.cc @@ -54,7 +54,7 @@ static bool DeconvolutionShape(const nnvm::NodeAttrs& attrs, } out_shape->resize(1, mxnet::TShape()); const mxnet::TShape &dshape = (*in_shape)[deconv::kData]; - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; if (param_.kernel.ndim() == 1) { // 1d conv diff --git a/src/operator/nn/dropout-inl.h b/src/operator/nn/dropout-inl.h index 01611dfce191..a34d2992c8c6 100644 --- a/src/operator/nn/dropout-inl.h +++ b/src/operator/nn/dropout-inl.h @@ -78,7 +78,7 @@ struct DropoutParam : public dmlc::Parameter { .add_enum("always", dropout::kAlways) .set_default(dropout::kTraining) .describe("Whether to only turn on dropout during training or to also turn on for inference."); - DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape(0, 0)) .describe("Axes for variational dropout kernel."); DMLC_DECLARE_FIELD(cudnn_off).set_default(dmlc::optional(false)) .describe("Whether to turn off cudnn in dropout operator. " diff --git a/src/operator/nn/dropout.cc b/src/operator/nn/dropout.cc index 5fdc672d766e..0e4d18b1fda8 100644 --- a/src/operator/nn/dropout.cc +++ b/src/operator/nn/dropout.cc @@ -95,10 +95,10 @@ Example:: CHECK_EQ(in_shape->size(), 1U); const DropoutParam& param = nnvm::get(attrs.parsed); mxnet::TShape dshape(in_shape->at(0)); - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; out_shape->clear(); out_shape->push_back(dshape); - for (index_t i = 0; i < param.axes.ndim(); ++i) { + for (int i = 0; i < param.axes.ndim(); ++i) { dshape[param.axes[i]] = 1; } out_shape->push_back(dshape); diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index 2bc321832af6..98277c206dd6 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -52,7 +52,7 @@ static bool FullyConnectedShape(const nnvm::NodeAttrs& attrs, mxnet::TShape dshape = (*in_shape)[fullc::kData]; mxnet::TShape oshape = (*out_shape)[0]; // require data to be known - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; index_t num_input; if (!param.flatten) { @@ -75,7 +75,7 @@ static bool FullyConnectedShape(const nnvm::NodeAttrs& attrs, } else { SHAPE_ASSIGN_CHECK(*out_shape, 0, Shape2(dshape[0], param.num_hidden)); } - if (oshape.ndim() != 0) { + if (oshape.ndim() > 0) { dshape[0] = oshape[0]; SHAPE_ASSIGN_CHECK(*in_shape, fullc::kData, dshape); } diff --git a/src/operator/nn/im2col.h b/src/operator/nn/im2col.h index 0059a420726d..06a4e1b75b33 100644 --- a/src/operator/nn/im2col.h +++ b/src/operator/nn/im2col.h @@ -152,7 +152,7 @@ inline void im2col_nd_core_cpu(const DType* data_input, const bool im2col, const mxnet::TShape& kernel_shape, const mxnet::TShape& pad, const mxnet::TShape& stride, const mxnet::TShape& dilation, DType* data_output, OpReqType req = mxnet::kWriteTo) { if (mxnet::kNullOp == req) return; - index_t num_spatial_axes = kernel_shape.ndim(); + int num_spatial_axes = kernel_shape.ndim(); if (!im2col) { index_t im_size = im_shape[1]; // skip batch dim for (index_t i = 0; i < num_spatial_axes; ++i) { @@ -319,7 +319,7 @@ inline void col2im(mshadow::Stream* s, const mxnet::TShape& col_shape, const mxnet::TShape& kernel_shape, const mxnet::TShape& pad, const mxnet::TShape& stride, const mxnet::TShape& dilation, DType* data_im, OpReqType req) { - index_t num_spatial_axes = kernel_shape.ndim(); + int num_spatial_axes = kernel_shape.ndim(); if (2 == num_spatial_axes) { col2im_cpu(data_col, im_shape[1], im_shape[2], im_shape[3], kernel_shape[0], kernel_shape[1], pad[0], pad[1], diff --git a/src/operator/nn/layer_norm-inl.h b/src/operator/nn/layer_norm-inl.h index dc4914bf2457..c7de7d734521 100644 --- a/src/operator/nn/layer_norm-inl.h +++ b/src/operator/nn/layer_norm-inl.h @@ -167,7 +167,7 @@ void LayerNormGradCompute(const nnvm::NodeAttrs& attrs, const LayerNormParam& param = nnvm::get(attrs.parsed); int axis = param.axis; if (axis < 0) { - axis += static_cast(inputs[0].ndim()); + axis += inputs[0].ndim(); } CHECK(axis >= 0 && axis < inputs[0].ndim()) << "Channel axis out of range: " << param.axis; Stream *s = ctx.get_stream(); diff --git a/src/operator/nn/layer_norm.cc b/src/operator/nn/layer_norm.cc index d4c308398cb7..1b0e99d8fe87 100644 --- a/src/operator/nn/layer_norm.cc +++ b/src/operator/nn/layer_norm.cc @@ -41,14 +41,14 @@ static bool LayerNormShape(const nnvm::NodeAttrs& attrs, const mxnet::TShape &dshape = in_shape->at(layernorm::kData); int axis = param.axis; if (axis < 0) { - axis += static_cast(dshape.ndim()); + axis += dshape.ndim(); } - CHECK(axis >= 0 && axis < static_cast(dshape.ndim())) + CHECK(axis >= 0 && axis < dshape.ndim()) << "Channel axis out of range: axis=" << param.axis; const int channelCount = dshape[axis]; - if (dshape.ndim() == 0) { + if (!shape_is_known(dshape)) { return false; } diff --git a/src/operator/nn/lrn.cc b/src/operator/nn/lrn.cc index 410bdab667e5..b632e35b57fe 100644 --- a/src/operator/nn/lrn.cc +++ b/src/operator/nn/lrn.cc @@ -40,7 +40,7 @@ bool LRNShape(const nnvm::NodeAttrs& attrs, using namespace mshadow; CHECK_EQ(in_shape->size(), 1U) << "Input:[data]"; const mxnet::TShape &dshape = in_shape->at(0); - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; out_shape->clear(); out_shape->push_back(dshape); out_shape->push_back(dshape); diff --git a/src/operator/nn/pooling-inl.h b/src/operator/nn/pooling-inl.h index 9e1e73bf19e2..03f0fa8edd6c 100644 --- a/src/operator/nn/pooling-inl.h +++ b/src/operator/nn/pooling-inl.h @@ -55,7 +55,7 @@ struct PoolingParam : public dmlc::Parameter { dmlc::optional count_include_pad; dmlc::optional layout; DMLC_DECLARE_PARAMETER(PoolingParam) { - DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape()) // add default value here + DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape(0, 0)) // add default value here .enforce_nonzero() .describe("Pooling kernel size: (y, x) or (d, y, x)"); @@ -78,11 +78,11 @@ struct PoolingParam : public dmlc::Parameter { .add_enum("same", pool_enum::kSame) .describe("Pooling convention to be applied."); - DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, 0)) .enforce_nonzero() .describe("Stride: for pooling (y, x) or (d, y, x). Defaults to 1 for each dimension."); - DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, 0)) .describe("Pad for pooling: (y, x) or (d, y, x). Defaults to no padding."); DMLC_DECLARE_FIELD(p_value).set_default(dmlc::optional()) @@ -200,11 +200,11 @@ class PoolingOp { kernel = mxnet::TShape(ishape.data() + 2, ishape.data() + ishape.ndim()); } - padding = mxnet::TShape(ishape.ndim() - 2); + padding = mxnet::TShape(ishape.ndim() - 2, 0); for (index_t i = 0; i < ishape.ndim() - 2; i++) { padding[i] = 0; } - stride = mxnet::TShape(ishape.ndim() - 2); + stride = mxnet::TShape(ishape.ndim() - 2, 1); } const int p_value = (param_.pool_type == pool_enum::kLpPooling && param_.p_value.has_value()) ? param_.p_value.value() : 1; @@ -257,11 +257,11 @@ class PoolingOp { kernel = mxnet::TShape(ishape.data() + 2, ishape.data() + ishape.ndim()); } - padding = mxnet::TShape(ishape.ndim() - 2); + padding = mxnet::TShape(ishape.ndim() - 2, 0); for (index_t i = 0; i < ishape.ndim() - 2; i++) { padding[i] = 0; } - stride = mxnet::TShape(ishape.ndim() - 2); + stride = mxnet::TShape(ishape.ndim() - 2, 1); } const int p_value = (param_.pool_type == pool_enum::kLpPooling && param_.p_value.has_value()) ? diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index 2d16604baa20..7c365f5081a1 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -114,11 +114,11 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs, << "Pooling: Input data should be 3D in (batch, channel, x)" << " Or 4D in (batch, channel, y, x) " << " Or 5D in (batch, channel, d, y, x)"; - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; int layout = param.GetLayout(dshape.ndim()); if (param.global_pool) { mxnet::TShape oshape = dshape; - size_t c_index = 0; + int c_index = 0; switch (layout) { case mshadow::kNCW: case mshadow::kNCHW: @@ -133,7 +133,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs, default: LOG(FATAL) << "Unsupported tensor layout " << param.layout.value(); } - for (size_t i{1}; i < dshape.ndim(); i++) + for (int i = 1; i < dshape.ndim(); i++) if (i != c_index) oshape[i] = 1; out_shape->clear(); diff --git a/src/operator/nn/upsampling.cc b/src/operator/nn/upsampling.cc index d09017bf713e..ac638162dc6d 100644 --- a/src/operator/nn/upsampling.cc +++ b/src/operator/nn/upsampling.cc @@ -60,7 +60,7 @@ static bool UpSamplingShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]"; CHECK_EQ(dshape.ndim(), 4U) << \ "UpSamplingBilinear: Input data should be 4D in (batch, channel, y, x)"; - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; int kernel = 2 * param_.scale - param_.scale % 2; SHAPE_ASSIGN_CHECK(*in_shape, up_enum::kWeight, diff --git a/src/operator/numpy/np_broadcast_reduce_op.h b/src/operator/numpy/np_broadcast_reduce_op.h index bb2b7fca231c..e0379a040c3f 100644 --- a/src/operator/numpy/np_broadcast_reduce_op.h +++ b/src/operator/numpy/np_broadcast_reduce_op.h @@ -59,7 +59,7 @@ inline TShape NumpyReduceAxesShapeImpl(const TShape& ishape, CHECK(axes[0] == 0 || axes[0] == -1); } } - return TShape(0); + return TShape(0, -1); } // axis=None, do global reduction @@ -67,7 +67,7 @@ inline TShape NumpyReduceAxesShapeImpl(const TShape& ishape, if (keepdims) { return TShape(ishape.ndim(), 1); } else { - return TShape(0); + return TShape(0, -1); } } @@ -101,7 +101,7 @@ inline TShape NumpyReduceAxesShapeImpl(const TShape& ishape, if (keepdims) { oshape = TShape(ishape); } else { - oshape = TShape(ishape.ndim() - axes.ndim()); + oshape = TShape(ishape.ndim() - axes.ndim(), -1); } if (keepdims) { diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h index a461d2bc4cef..c95f859a1b5b 100644 --- a/src/operator/operator_common.h +++ b/src/operator/operator_common.h @@ -103,19 +103,10 @@ struct InferStorageTypeError : public dmlc::Error { : dmlc::Error(msg_), msg(msg_), index(index) {} }; -/*! \brief check if shape is empty or contains unknown (0) dim. */ +/*! \brief check if shape is empty or contains unknown (0) dim. + * DEPRECATED. */ inline bool shape_is_none(const mxnet::TShape& x) { - return x.ndim() == 0 || x.Size() == 0; -} - -/*! brief check if shape is known using the NumPy compatible definition. - * zero-dim and zero-size tensors are valid. -1 means unknown.*/ -inline bool shape_is_known(const TShape& x) { - if (x.ndim() == -1) return false; - for (int i = 0; i < x.ndim(); ++i) { - if (x[i] == -1) return false; - } - return true; + return !mxnet::shape_is_known(x); } /*! \brief check if type is none (-1) */ @@ -130,7 +121,7 @@ inline bool storage_type_is_none(const int& x) { /*! \brief check if shape is scalar({1}). */ inline bool shape_is_scalar(const mxnet::TShape& x) { - return x.ndim() == 1 && x.Size() == 1; + return x.ndim() == 0; } /*! \brief get string representation of shape */ @@ -573,7 +564,7 @@ class OpSignature { } void AddSign(const mxnet::TShape &shape) { - for (size_t i = 0; i < shape.ndim(); i++) { + for (int i = 0; i < shape.ndim(); i++) { hash = hash * 2 + shape[i]; eles.push_back(shape[i]); } diff --git a/src/operator/operator_util.cc b/src/operator/operator_util.cc index b87428ca2b64..bc097a5b0c1c 100644 --- a/src/operator/operator_util.cc +++ b/src/operator/operator_util.cc @@ -774,7 +774,7 @@ class SimpleUnaryOpProp : public SimpleOpPropBase { using namespace mshadow; CHECK_EQ(in_shape->size(), 1) << "Input:[data]"; const mxnet::TShape &dshape = in_shape->at(0); - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; out_shape->clear(); if (source->unary_shape_ == nullptr) { out_shape->push_back(dshape); diff --git a/src/operator/quantization/dequantize-inl.h b/src/operator/quantization/dequantize-inl.h index dcda5a8b4bef..88199bc2591d 100644 --- a/src/operator/quantization/dequantize-inl.h +++ b/src/operator/quantization/dequantize-inl.h @@ -103,7 +103,7 @@ inline bool DequantizeShape(const nnvm::NodeAttrs& attrs, } SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); - return !shape_is_none(out_attrs->at(0)); + return shape_is_known(out_attrs->at(0)); } inline bool DequantizeType(const nnvm::NodeAttrs& attrs, diff --git a/src/operator/quantization/quantize-inl.h b/src/operator/quantization/quantize-inl.h index 1ad0016c52bc..2c267a76a571 100644 --- a/src/operator/quantization/quantize-inl.h +++ b/src/operator/quantization/quantize-inl.h @@ -126,7 +126,7 @@ inline bool QuantizeShape(const nnvm::NodeAttrs& attrs, SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape{1}); SHAPE_ASSIGN_CHECK(*out_attrs, 2, mxnet::TShape{1}); - return !shape_is_none(out_attrs->at(0)); + return shape_is_known(out_attrs->at(0)); } inline bool QuantizeType(const nnvm::NodeAttrs& attrs, diff --git a/src/operator/quantization/quantized_concat.cc b/src/operator/quantization/quantized_concat.cc index e32bb5a18e1a..f97807424701 100644 --- a/src/operator/quantization/quantized_concat.cc +++ b/src/operator/quantization/quantized_concat.cc @@ -55,7 +55,7 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_sha shape_assign(&dshape, tmp); } - if (dshape.ndim() == 0) return false; + if (dshape.ndim() == -1) return false; for (int i = 0; i < param_.num_args; ++i) { CHECK(shape_assign(&(*in_shape)[i], dshape)) diff --git a/src/operator/quantization/quantized_flatten-inl.h b/src/operator/quantization/quantized_flatten-inl.h index 99a262de19ca..de051b969659 100644 --- a/src/operator/quantization/quantized_flatten-inl.h +++ b/src/operator/quantization/quantized_flatten-inl.h @@ -86,10 +86,10 @@ inline bool QuantizedFlattenShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 3U); const mxnet::TShape &dshape = (*in_attrs)[0]; - if (shape_is_none(dshape)) return false; + if (!shape_is_known(dshape)) return false; - uint32_t target_dim = 1; - for (uint32_t i = 1; i < dshape.ndim(); ++i) { + dim_t target_dim = 1; + for (int i = 1; i < dshape.ndim(); ++i) { target_dim *= dshape[i]; } diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index 0a04e71b9093..cc4365f818d2 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -47,7 +47,7 @@ bool QuantizedFullyConnectedShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_shape->size(), num_inputs * 3); CHECK_EQ(out_shape->size(), 3U); - CHECK(!shape_is_none(in_shape->at(0))) + CHECK(shape_is_known(in_shape->at(0))) << "QuantizedFullyConnectedOp input data shape must be given"; const mxnet::TShape& dshape = in_shape->at(0); index_t num_input; diff --git a/src/operator/quantization/quantized_pooling.cc b/src/operator/quantization/quantized_pooling.cc index af604080a756..1839e2a29d77 100644 --- a/src/operator/quantization/quantized_pooling.cc +++ b/src/operator/quantization/quantized_pooling.cc @@ -35,7 +35,7 @@ bool QuantizedPoolingShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *out_shape) { const PoolingParam& param = nnvm::get(attrs.parsed); CHECK_EQ(in_shape->size(), 3U); - if (shape_is_none(in_shape->at(0))) return false; + if (!shape_is_known(in_shape->at(0))) return false; const mxnet::TShape &dshape = (*in_shape)[0]; CHECK_EQ(dshape.ndim(), 4U) << "quantized_pooling: Input data should be 4D in " @@ -45,7 +45,7 @@ bool QuantizedPoolingShape(const nnvm::NodeAttrs& attrs, << "QuantizedPoolingOp only supports NCHW layout for now, saw " << layout; // NCHW layout const int N = 0, H = 2, W = 3, C = 1; - mxnet::TShape oshape(4); + mxnet::TShape oshape(4, -1); CHECK_EQ(param.kernel.ndim(), 2) << "QuantizedPoolingOp only supports 2D pooling for now"; CHECK(param.kernel[0] <= dshape[H] + 2 * param.pad[0]) << "kernel size (" << param.kernel[0] diff --git a/src/operator/random/multisample_op.h b/src/operator/random/multisample_op.h index e9f266932e13..7d5e256297ad 100644 --- a/src/operator/random/multisample_op.h +++ b/src/operator/random/multisample_op.h @@ -66,7 +66,7 @@ inline bool MultiSampleOpShape(const nnvm::NodeAttrs& attrs, // Get shape to be sampled for each parameter set. const MultiSampleParam& param = nnvm::get(attrs.parsed); mxnet::TShape sshape = param.shape; - for (size_t i = 0; i < sshape.ndim(); ++i) { + for (int i = 0; i < sshape.ndim(); ++i) { CHECK_GT(sshape[i], 0) << "shape parameter must be non-zero within each dimension"; } // Examine output shape whether it is already defined. diff --git a/src/operator/random/sample_multinomial_op.h b/src/operator/random/sample_multinomial_op.h index e76cd646b850..b38aefbc1634 100644 --- a/src/operator/random/sample_multinomial_op.h +++ b/src/operator/random/sample_multinomial_op.h @@ -41,7 +41,7 @@ struct SampleMultinomialParam : public dmlc::Parameter { int dtype; DMLC_DECLARE_PARAMETER(SampleMultinomialParam) { DMLC_DECLARE_FIELD(shape) - .set_default(mxnet::TShape()) + .set_default(mxnet::TShape(0, 1)) .describe("Shape to be sampled from each random distribution."); DMLC_DECLARE_FIELD(get_prob) .set_default(false) @@ -68,7 +68,7 @@ inline bool SampleMultinomialOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), param.get_prob ? 2U : 1U); const mxnet::TShape& ishape = (*in_attrs)[0]; - if (!ishape.ndim()) return false; + if (!shape_is_known(ishape)) return false; MSHADOW_TYPE_SWITCH(param.dtype, DType, { CHECK_LE(ishape[ishape.ndim() - 1], mxnet::common::MaxIntegerValue()) @@ -76,26 +76,26 @@ inline bool SampleMultinomialOpShape(const nnvm::NodeAttrs& attrs, }); if (ishape.ndim() == 1) { - if (param.shape.ndim()) { + if (param.shape.ndim() > 0) { SHAPE_ASSIGN_CHECK(*out_attrs, 0, param.shape); if (param.get_prob) SHAPE_ASSIGN_CHECK(*out_attrs, 1, param.shape); } else { - SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1)); - if (param.get_prob) SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape(1)); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, 1)); + if (param.get_prob) SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape(1, 1)); } return true; } - mxnet::TShape oshape(ishape.ndim() - 1 + param.shape.ndim()); - for (size_t i = 0; i < ishape.ndim() - 1; ++i) { + mxnet::TShape oshape(ishape.ndim() - 1 + param.shape.ndim(), -1); + for (int i = 0; i < ishape.ndim() - 1; ++i) { oshape[i] = ishape[i]; } - for (size_t i = 0; i < param.shape.ndim(); ++i) { + for (int i = 0; i < param.shape.ndim(); ++i) { oshape[i + ishape.ndim() - 1] = param.shape[i]; } SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); if (param.get_prob) SHAPE_ASSIGN_CHECK(*out_attrs, 1, oshape); - return true; + return shape_is_known(out_attrs->at(0)) && shape_is_known(out_attrs->at(1)); } diff --git a/src/operator/random/unique_sample_op.h b/src/operator/random/unique_sample_op.h index 87998c8f46b1..c97d4fdf7ced 100644 --- a/src/operator/random/unique_sample_op.h +++ b/src/operator/random/unique_sample_op.h @@ -60,7 +60,7 @@ inline bool SampleUniqueShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 0U); CHECK_EQ(out_attrs->size(), 2U); // output shape is known - if ((*out_attrs)[0].ndim() == 2 && param.shape.ndim() == 0) { + if ((*out_attrs)[0].ndim() == 2 && param.shape.ndim() == -1) { SHAPE_ASSIGN_CHECK(*out_attrs, 1, mshadow::Shape1((*out_attrs)[0][0])); return true; } diff --git a/src/operator/regression_output-inl.h b/src/operator/regression_output-inl.h index 8b63a8a2cff6..d8f102de1675 100644 --- a/src/operator/regression_output-inl.h +++ b/src/operator/regression_output-inl.h @@ -57,7 +57,7 @@ inline bool RegressionOpShape(const nnvm::NodeAttrs& attrs, using namespace mshadow; CHECK_EQ(in_attrs->size(), 2U) << "Input:[data, label]"; const mxnet::TShape &dshape = in_attrs->at(0); - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; auto &lshape = (*in_attrs)[1]; if (lshape.ndim() == 0) { // special treatment for 1D output, to allow 1D label by default. diff --git a/src/operator/sequence_last-inl.h b/src/operator/sequence_last-inl.h index b4db80bdd721..4c42934f1618 100644 --- a/src/operator/sequence_last-inl.h +++ b/src/operator/sequence_last-inl.h @@ -263,7 +263,7 @@ class SequenceLastProp : public OperatorProperty { SHAPE_ASSIGN_CHECK(*in_shape, seq_last::kSequenceLength, Shape1(sbatch)); // calculate output size - mxnet::TShape shape_o(dshape.ndim() - 1); + mxnet::TShape shape_o(dshape.ndim() - 1, -1); shape_o[0] = sbatch; for (index_t i = 1; i < shape_o.ndim(); ++i) shape_o[i] = dshape[i + 1]; diff --git a/src/operator/slice_channel-inl.h b/src/operator/slice_channel-inl.h index 6125782d525b..a51b17cd324e 100644 --- a/src/operator/slice_channel-inl.h +++ b/src/operator/slice_channel-inl.h @@ -195,9 +195,9 @@ class SliceChannelProp : public OperatorProperty { CHECK_EQ(in_shape->size(), 1U); mxnet::TShape dshape = in_shape->at(slice_enum::kData); mxnet::TShape ishape = in_shape->at(slice_enum::kData); - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; if (param_.axis >= 0) { - CHECK_LT(static_cast(param_.axis), dshape.ndim()); + CHECK_LT(param_.axis, dshape.ndim()); } else { CHECK_LT(param_.axis + dshape.ndim(), dshape.ndim()); } diff --git a/src/operator/softmax_output-inl.h b/src/operator/softmax_output-inl.h index 5dca8bac14a3..f81a232d629a 100644 --- a/src/operator/softmax_output-inl.h +++ b/src/operator/softmax_output-inl.h @@ -337,19 +337,19 @@ class SoftmaxOutputProp : public OperatorProperty { using namespace mshadow; CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]"; const mxnet::TShape &dshape = in_shape->at(0); - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; // label.shape == data.shape: use probability as label if (dshape != (*in_shape)[softmaxout_enum::kLabel]) { if (param_.multi_output) { mxnet::TShape lshape1 = Shape2(dshape[0], dshape.Size()/dshape[0]/dshape[1]); - mxnet::TShape lshape2(dshape.ndim() - 1); + mxnet::TShape lshape2(dshape.ndim() - 1, -1); lshape2[0] = dshape[0]; - for (index_t i = 2; i < dshape.ndim(); ++i) + for (int i = 2; i < dshape.ndim(); ++i) lshape2[i-1] = dshape[i]; mxnet::TShape lshape3 = dshape; lshape3[1] = 1; - if (in_shape->at(softmaxout_enum::kLabel).ndim() == 0) { + if (in_shape->at(softmaxout_enum::kLabel).ndim() == -1) { in_shape->at(softmaxout_enum::kLabel) = lshape1; } else if (in_shape->at(softmaxout_enum::kLabel) == lshape1) { } else if (in_shape->at(softmaxout_enum::kLabel) == lshape2) { @@ -361,8 +361,8 @@ class SoftmaxOutputProp : public OperatorProperty { throw InferShapeError(os.str(), softmaxout_enum::kLabel); } } else { - mxnet::TShape label_shape(dshape.ndim() - 1); - for (index_t i = 0; i + 1 < dshape.ndim(); ++i) + mxnet::TShape label_shape(dshape.ndim() - 1, -1); + for (int i = 0; i + 1 < dshape.ndim(); ++i) label_shape[i] = dshape[i]; SHAPE_ASSIGN_CHECK(*in_shape, softmaxout_enum::kLabel, label_shape); } diff --git a/src/operator/softmax_output.cc b/src/operator/softmax_output.cc index b17ef3527297..262242f98004 100644 --- a/src/operator/softmax_output.cc +++ b/src/operator/softmax_output.cc @@ -85,19 +85,19 @@ static bool SoftmaxOutputShape(const nnvm::NodeAttrs& attrs, const SoftmaxOutputParam& param = nnvm::get(attrs.parsed); CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]"; const mxnet::TShape &dshape = in_shape->at(0); - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; // label.shape == data.shape: use probability as label if (dshape != (*in_shape)[softmaxout_enum::kLabel]) { if (param.multi_output) { mxnet::TShape lshape1 = Shape2(dshape[0], dshape.Size()/dshape[0]/dshape[1]); - mxnet::TShape lshape2(dshape.ndim() - 1); + mxnet::TShape lshape2(dshape.ndim() - 1, -1); lshape2[0] = dshape[0]; - for (index_t i = 2; i < dshape.ndim(); ++i) + for (int i = 2; i < dshape.ndim(); ++i) lshape2[i-1] = dshape[i]; mxnet::TShape lshape3 = dshape; lshape3[1] = 1; - if (in_shape->at(softmaxout_enum::kLabel).ndim() == 0) { + if (in_shape->at(softmaxout_enum::kLabel).ndim() == -1) { in_shape->at(softmaxout_enum::kLabel) = lshape1; } else if (in_shape->at(softmaxout_enum::kLabel) == lshape1) { } else if (in_shape->at(softmaxout_enum::kLabel) == lshape2) { @@ -109,8 +109,8 @@ static bool SoftmaxOutputShape(const nnvm::NodeAttrs& attrs, throw InferShapeError(os.str(), softmaxout_enum::kLabel); } } else { - mxnet::TShape label_shape(dshape.ndim() - 1); - for (index_t i = 0; i + 1 < dshape.ndim(); ++i) + mxnet::TShape label_shape(dshape.ndim() - 1, -1); + for (int i = 0; i + 1 < dshape.ndim(); ++i) label_shape[i] = dshape[i]; SHAPE_ASSIGN_CHECK(*in_shape, softmaxout_enum::kLabel, label_shape); } diff --git a/src/operator/spatial_transformer-inl.h b/src/operator/spatial_transformer-inl.h index 9e5dee842d0d..660d57d55bab 100644 --- a/src/operator/spatial_transformer-inl.h +++ b/src/operator/spatial_transformer-inl.h @@ -190,10 +190,10 @@ class SpatialTransformerProp : public OperatorProperty { CHECK_EQ(param_.sampler_type, st::kBilinear) << "only supports bilinear sampling currently"; const mxnet::TShape &dshape = (*in_shape)[st::kData]; const mxnet::TShape &lshape = (*in_shape)[st::kLoc]; - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; CHECK_EQ(dshape.ndim(), 4U) \ << "input data should be 4D in batch-num_filter-y-x"; - if (lshape.ndim() == 0) return false; + if (!shape_is_known(lshape)) return false; CHECK_EQ(lshape.ndim(), 2U) \ << "locolisation paramter should be 4D in batch-num_hidden"; if (param_.transform_type == st::kAffine) { diff --git a/src/operator/subgraph_op_common.cc b/src/operator/subgraph_op_common.cc index 8934438d428a..e53d911614a0 100644 --- a/src/operator/subgraph_op_common.cc +++ b/src/operator/subgraph_op_common.cc @@ -178,7 +178,7 @@ bool as_bool_scalar(const NDArray &a) { } bool is_shape_udf(const mxnet::TShape &x) { - return x.ndim() == 0 || x.Size() == 0; + return !shape_is_known(x); } bool is_stype_udf(const int &x) { @@ -225,7 +225,7 @@ void LoopState::Forward(int iter_no, if (!out_bufs[i].IsSame(coutputs[i])) { // The line below checks whether dynamic shape exists. // If so, re-initialize the shape. - if (coutputs[i].shape().ndim() == 0) { + if (!shape_is_known(coutputs[i].shape())) { const_cast(coutputs[i]).Init(out_bufs[i].shape()); } CopyFromTo(out_bufs[i], coutputs[i]); diff --git a/src/operator/subgraph_op_common.h b/src/operator/subgraph_op_common.h index 91adf576dc07..19528349c0c7 100644 --- a/src/operator/subgraph_op_common.h +++ b/src/operator/subgraph_op_common.h @@ -67,7 +67,7 @@ bool is_type_udf(const int &x); template void extract_by_loc(const std::vector &array, - const nnvm::Tuple input_locs, + const mxnet::Tuple input_locs, std::vector *out) { out->clear(); out->reserve(input_locs.ndim()); @@ -94,11 +94,11 @@ bool fill_value(T *x, T *y, bool x_empty, bool y_empty) { } template -bool sync_in_in(const nnvm::Tuple &input_locs, - std::vector *in, - std::vector *subg_in, - std::function is_empty) { - for (size_t i = 0; i < input_locs.ndim(); ++i) { +bool sync_in_in(const mxnet::Tuple &input_locs, + std::vector *in, + std::vector *subg_in, + std::function is_empty) { + for (int i = 0; i < input_locs.ndim(); ++i) { T &x = in->at(input_locs[i]); T &y = subg_in->at(i); fill_value(&x, &y, is_empty(x), is_empty(y)); diff --git a/src/operator/svm_output-inl.h b/src/operator/svm_output-inl.h index 1609764f0ebe..3d651c13d8ba 100644 --- a/src/operator/svm_output-inl.h +++ b/src/operator/svm_output-inl.h @@ -143,9 +143,9 @@ class SVMOutputProp : public OperatorProperty { using namespace mshadow; CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]"; const mxnet::TShape &dshape = in_shape->at(0); - if (dshape.ndim() == 0) return false; - mxnet::TShape label_shape(dshape.ndim() - 1); - for (index_t i = 0; i + 1 < dshape.ndim(); ++i) + if (!shape_is_known(dshape)) return false; + mxnet::TShape label_shape(dshape.ndim() - 1, -1); + for (int i = 0; i + 1 < dshape.ndim(); ++i) label_shape[i] = dshape[i]; SHAPE_ASSIGN_CHECK(*in_shape, svm_enum::kLabel, label_shape); out_shape->clear(); diff --git a/src/operator/swapaxis-inl.h b/src/operator/swapaxis-inl.h index ce835084ab32..41cb940d957a 100644 --- a/src/operator/swapaxis-inl.h +++ b/src/operator/swapaxis-inl.h @@ -72,8 +72,8 @@ class SwapAxisOp : public Operator { uint32_t dim1, uint32_t dim2) { using namespace mshadow; using namespace mshadow::expr; - index_t ndim_in = shape.ndim(); - index_t si; + int ndim_in = shape.ndim(); + int si; if (dim1 > dim2) { std::swap(dim1, dim2); diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h index 069c8ddb04fb..90afdeea858e 100644 --- a/src/operator/tensor/broadcast_reduce_op.h +++ b/src/operator/tensor/broadcast_reduce_op.h @@ -139,9 +139,9 @@ struct BroadcastAxesParam : public dmlc::Parameter { mxnet::TShape axis; mxnet::TShape size; DMLC_DECLARE_PARAMETER(BroadcastAxesParam) { - DMLC_DECLARE_FIELD(axis).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(axis).set_default(mxnet::TShape(0)) .describe("The axes to perform the broadcasting."); - DMLC_DECLARE_FIELD(size).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(size).set_default(mxnet::TShape(0)) .describe("Target sizes of the broadcasting axes."); } }; @@ -149,7 +149,7 @@ struct BroadcastAxesParam : public dmlc::Parameter { struct BroadcastToParam : public dmlc::Parameter { mxnet::TShape shape; DMLC_DECLARE_PARAMETER(BroadcastToParam) { - DMLC_DECLARE_FIELD(shape).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(shape).set_default(mxnet::TShape(0)) .describe("The shape of the desired array." " We can set the dim to zero if it's same as the original." " E.g `A = broadcast_to(B, shape=(10, 0, 0))` " @@ -175,7 +175,7 @@ inline int CheckAxis(int axis, int ndim) { } inline mxnet::TShape AxisShapeCompact(mxnet::TShape shape, int *axis, bool allow_2d) { - int ndim = static_cast(shape.ndim()); + int ndim = shape.ndim(); index_t leading = 1, trailing = 1, M = shape[*axis]; for (int i = 0; i < *axis; ++i) leading *= shape[i]; for (int i = *axis + 1; i < ndim; ++i) trailing *= shape[i]; @@ -196,7 +196,7 @@ inline mxnet::TShape ReduceAxisShapeImpl(const mxnet::TShape& ishape, bool keepdims) { if (!axis || ishape.ndim() == 1) { if (keepdims) { - return mxnet::TShape(ishape.ndim()); + return mxnet::TShape(ishape.ndim(), 1); } return mshadow::Shape1(1); } @@ -208,7 +208,7 @@ inline mxnet::TShape ReduceAxisShapeImpl(const mxnet::TShape& ishape, return oshape; } - mxnet::TShape oshape(ishape.ndim() - 1); + mxnet::TShape oshape(ishape.ndim() - 1, 1); for (int i = 0; i < new_axis; ++i) oshape[i] = ishape[i]; for (int i = new_axis+1; i < static_cast(ishape.ndim()); ++i) { oshape[i-1] = ishape[i]; @@ -222,7 +222,7 @@ inline bool ReduceAxisShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); mxnet::TShape& ishape = (*in_attrs)[0]; - if (ishape.ndim() == 0) return false; + if (!shape_is_known(ishape)) return false; const ReduceAxisParam& param = nnvm::get(attrs.parsed); SHAPE_ASSIGN_CHECK(*out_attrs, 0, @@ -233,12 +233,12 @@ inline bool ReduceAxisShape(const nnvm::NodeAttrs& attrs, inline mxnet::TShape ReduceAxesShapeImpl(const mxnet::TShape& ishape, const dmlc::optional& axis, bool keepdims, bool exclude) { - // if axis doesn't have value, treat it same mxnet::TShape(). + // if axis doesn't have value, treat it same mxnet::TShape(0). if (!axis.has_value() || axis.value().ndim() == 0) { if (keepdims) { - return mxnet::TShape(ishape.ndim()); + return mxnet::TShape(ishape.ndim(), 1); } else { - return mxnet::TShape(1); + return mxnet::TShape(1, 1); } } // axis has value @@ -266,9 +266,9 @@ inline mxnet::TShape ReduceAxesShapeImpl(const mxnet::TShape& ishape, if (keepdims) { oshape = mxnet::TShape(ishape); } else if (exclude) { - oshape = mxnet::TShape(axes.ndim()); + oshape = mxnet::TShape(axes.ndim(), 1); } else { - oshape = mxnet::TShape(std::max(1, ishape.ndim() - axes.ndim())); + oshape = mxnet::TShape(std::max(1, ishape.ndim() - axes.ndim()), 1); } if (keepdims && exclude) { @@ -304,7 +304,7 @@ inline bool ReduceAxesShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *out_attrs) { CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); - if ((*in_attrs)[0].ndim() == 0) return false; + if (!shape_is_known((*in_attrs)[0])) return false; const ReduceAxesParam& param = nnvm::get(attrs.parsed); SHAPE_ASSIGN_CHECK(*out_attrs, 0, ReduceAxesShapeImpl((*in_attrs)[0], param.axis, @@ -334,7 +334,7 @@ inline bool NormShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *out_attrs) { CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); - if ((*in_attrs)[0].ndim() == 0) return false; + if (!shape_is_known((*in_attrs)[0])) return false; const NormParam& param = nnvm::get(attrs.parsed); SHAPE_ASSIGN_CHECK(*out_attrs, 0, ReduceAxesShapeImpl((*in_attrs)[0], param.axis, @@ -347,12 +347,12 @@ inline bool BroadcastAxesShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *out_attrs) { CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); - if ((*in_attrs)[0].ndim() == 0) return false; + if (!shape_is_known((*in_attrs)[0])) return false; const BroadcastAxesParam& param = nnvm::get(attrs.parsed); CHECK_EQ(param.axis.ndim() , param.size.ndim()); mxnet::TShape &ishape = (*in_attrs)[0]; mxnet::TShape oshape = ishape; - for (index_t i = 0; i < param.axis.ndim(); ++i) { + for (int i = 0; i < param.axis.ndim(); ++i) { CHECK_EQ(oshape[param.axis[i]], 1U) << "Broadcasting axis must have size 1"; oshape[param.axis[i]] = param.size[i]; } @@ -366,12 +366,12 @@ inline bool BroadcastToShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); mxnet::TShape& ishape = (*in_attrs)[0]; - if (ishape.ndim() == 0) return false; + if (!shape_is_known(ishape)) return false; const BroadcastToParam& param = nnvm::get(attrs.parsed); CHECK_EQ(ishape.ndim(), param.shape.ndim()) << "Operand of shape " << ishape << " cannot be broadcasted to " << param.shape; mxnet::TShape oshape = param.shape; - for (index_t i = 0; i < ishape.ndim(); ++i) { + for (int i = 0; i < ishape.ndim(); ++i) { if (oshape[i] != 0) { CHECK(ishape[i] == oshape[i] || ishape[i] == 1) << "Array cannot be broadcasted from " << ishape << " to " << param.shape; @@ -391,7 +391,7 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs, mxnet::TShape& lhs_shape = (*in_attrs)[0]; mxnet::TShape& rhs_shape = (*in_attrs)[1]; - if ((lhs_shape.ndim() == 0) || (lhs_shape.ndim() == 0)) { + if (!shape_is_known(lhs_shape) || !shape_is_known(lhs_shape)) { return false; } @@ -404,7 +404,7 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs, << "Operand of shape " << lhs_shape << " cannot be broadcasted to " << rhs_shape; oshape = mxnet::TShape(rhs_shape); - for (index_t i = 0; i < lhs_shape.ndim(); ++i) { + for (int i = 0; i < lhs_shape.ndim(); ++i) { if (rhs_shape[i] != 0) { CHECK(lhs_shape[i] == rhs_shape[i] || lhs_shape[i] == 1) << "Array cannot be broadcasted from " << lhs_shape << " to " << rhs_shape; @@ -423,7 +423,7 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs, << "Empty axes tuple is not allowed"; oshape = mxnet::TShape(lhs_shape); - for (index_t i = 0; i < lhs_axes.ndim(); ++i) { + for (int i = 0; i < lhs_axes.ndim(); ++i) { auto copyfrom = lhs_axes[i]; if (copyfrom < 0) { copyfrom = lhs_shape.ndim() + copyfrom; @@ -450,9 +450,9 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs, inline void BroadcastReduceShapeCompact(const mxnet::TShape& big, const mxnet::TShape& small, mxnet::TShape *new_big, mxnet::TShape *new_small) { - index_t idim = std::max(big.ndim(), MXNET_SPECIAL_MAX_NDIM); - *new_big = mxnet::TShape(idim); - *new_small = mxnet::TShape(idim); + const int idim = std::max(big.ndim(), MXNET_SPECIAL_MAX_NDIM); + *new_big = mxnet::TShape(idim, 1); + *new_small = mxnet::TShape(idim, 1); index_t j = 0; if (small.Size() == 1) { (*new_big)[j++] = big.Size(); @@ -478,12 +478,10 @@ inline void BroadcastReduceShapeCompact(const mxnet::TShape& big, const mxnet::T ++j; } } - if (j <= 2) { - new_small->assign(&(*new_small)[0], &(*new_small)[2]); - new_big->assign(&(*new_big)[0], &(*new_big)[2]); - } else if (j <= MXNET_SPECIAL_MAX_NDIM) { - new_small->assign(&(*new_small)[0], &(*new_small)[MXNET_SPECIAL_MAX_NDIM]); - new_big->assign(&(*new_big)[0], &(*new_big)[MXNET_SPECIAL_MAX_NDIM]); + if (j <= MXNET_SPECIAL_MAX_NDIM) { + const int ndim = (j <= 2? 2 : MXNET_SPECIAL_MAX_NDIM); + new_small->assign(new_small->begin(), new_small->begin() + ndim); + new_big->assign(new_big->begin(), new_big->begin() + ndim); } else { LOG(FATAL) << "Too many reduction axes from " << big << " to " << small; } diff --git a/src/operator/tensor/diag_op-inl.h b/src/operator/tensor/diag_op-inl.h index 1e3c1c9701d4..b90b09a36bd3 100644 --- a/src/operator/tensor/diag_op-inl.h +++ b/src/operator/tensor/diag_op-inl.h @@ -91,12 +91,12 @@ inline mxnet::TShape DiagShapeImpl(const mxnet::TShape& ishape, const int k, std::swap(x1, x2); } - int32_t n_dim = static_cast(ishape.ndim()) - 1; - mxnet::TShape oshape(n_dim); + int32_t n_dim = ishape.ndim() - 1; + mxnet::TShape oshape(n_dim, -1); // remove axis1 and axis2 and append the new axis to the end uint32_t idx = 0; - for (int32_t i = 0; i <= n_dim; ++i) { + for (int i = 0; i <= n_dim; ++i) { if (i != x1 && i != x2) { oshape[idx++] = ishape[i]; } @@ -114,7 +114,7 @@ inline bool DiagOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1U); const mxnet::TShape& ishape = (*in_attrs)[0]; - if (ishape.ndim() == 0) { + if (!shape_is_known(ishape)) { return false; } @@ -129,7 +129,7 @@ inline bool DiagOpShape(const nnvm::NodeAttrs& attrs, } SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); - return out_attrs->at(0).ndim() != 0U; + return shape_is_known(out_attrs->at(0)); } inline bool DiagOpType(const nnvm::NodeAttrs& attrs, diff --git a/src/operator/tensor/dot-inl.h b/src/operator/tensor/dot-inl.h index 8a1eda0350b0..318254b26b9f 100644 --- a/src/operator/tensor/dot-inl.h +++ b/src/operator/tensor/dot-inl.h @@ -1241,20 +1241,20 @@ inline bool DotShape(const nnvm::NodeAttrs& attrs, if (Ta) { L[0] = mshadow::Shape1(lshape[0]); L[1] = lshape.ndim() > 1 ? - mxnet::TShape(&lshape[1], &lshape[lshape.ndim()]) : mxnet::TShape(1); + mxnet::TShape(&lshape[1], lshape.end()) : mxnet::TShape(1, 1); } else { L[0] = lshape.ndim() > 1 ? - mxnet::TShape(&lshape[0], &lshape[lshape.ndim()-1]) : mxnet::TShape(1); + mxnet::TShape(&lshape[0], &lshape[lshape.ndim()-1]) : mxnet::TShape(1, 1); L[1] = mshadow::Shape1(lshape[lshape.ndim()-1]); } if (Tb) { R[0] = rshape.ndim() > 1 ? - mxnet::TShape(&rshape[0], &rshape[rshape.ndim()-1]) : mxnet::TShape(1); + mxnet::TShape(&rshape[0], &rshape[rshape.ndim()-1]) : mxnet::TShape(1, 1); R[1] = mshadow::Shape1(rshape[rshape.ndim()-1]); } else { R[0] = mshadow::Shape1(rshape[0]); R[1] = rshape.ndim() > 1 ? - mxnet::TShape(&rshape[1], &rshape[rshape.ndim()]) : mxnet::TShape(1); + mxnet::TShape(&rshape[1], rshape.end()) : mxnet::TShape(1, 1); } if (L[!Ta].Size() != 0 && R[Tb].Size() != 0) { @@ -1262,8 +1262,8 @@ inline bool DotShape(const nnvm::NodeAttrs& attrs, << "dot shape error: " << lshape << " X " << rshape; } std::vector buf; - if (lshape.ndim() > 1) buf.insert(buf.end(), &L[Ta][0], &L[Ta][L[Ta].ndim()]); - if (rshape.ndim() > 1) buf.insert(buf.end(), &R[!Tb][0], &R[!Tb][R[!Tb].ndim()]); + if (lshape.ndim() > 1) buf.insert(buf.end(), &L[Ta][0], L[Ta].end()); + if (rshape.ndim() > 1) buf.insert(buf.end(), &R[!Tb][0], R[!Tb].end()); mxnet::TShape oshape(buf.begin(), buf.end()); SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); } diff --git a/src/operator/tensor/elemwise_binary_broadcast_op.h b/src/operator/tensor/elemwise_binary_broadcast_op.h index 1d2b7c9c1163..dfb3231a75a9 100644 --- a/src/operator/tensor/elemwise_binary_broadcast_op.h +++ b/src/operator/tensor/elemwise_binary_broadcast_op.h @@ -48,21 +48,23 @@ inline bool BinaryBroadcastShape(const nnvm::NodeAttrs& attrs, mxnet::TShape& rhs = (*in_attrs)[1]; // avoid pre-mature shape inference. - if (lhs.ndim() == 0 || rhs.ndim() == 0) return false; + if (lhs.ndim() == -1 || rhs.ndim() == -1) return false; if (lhs == rhs) { SHAPE_ASSIGN_CHECK(*out_attrs, 0, lhs); - return true; + return shape_is_known(lhs); } - mxnet::TShape out(std::max(lhs.ndim(), rhs.ndim())); - index_t bl = out.ndim() - lhs.ndim(); - index_t br = out.ndim() - rhs.ndim(); - for (index_t i = 0; i < out.ndim(); ++i) { - index_t l = 1, r = 1; + mxnet::TShape out(std::max(lhs.ndim(), rhs.ndim()), -1); + const int bl = out.ndim() - lhs.ndim(); + const int br = out.ndim() - rhs.ndim(); + for (int i = 0; i < out.ndim(); ++i) { + int l = 1, r = 1; if (i >= bl) l = lhs[i-bl]; if (i >= br) r = rhs[i-br]; if (l != r) { if (l == 0 || r == 0) { + // TODO(junwu): here is not compatible with NumPy. + // For example, (2, 3) cannot broadcast to (2, 0, 3). out[i] = 0; } else { CHECK(l == 1 || r == 1) @@ -74,7 +76,7 @@ inline bool BinaryBroadcastShape(const nnvm::NodeAttrs& attrs, } } SHAPE_ASSIGN_CHECK(*out_attrs, 0, out); - return true; + return shape_is_known(lhs) && shape_is_known(rhs) && shape_is_known(out); } inline bool BinaryBroadcastMulStorageType(const nnvm::NodeAttrs& attrs, @@ -146,15 +148,15 @@ inline int BinaryBroadcastShapeCompact(const mxnet::TShape& lshape, const mxnet: const mxnet::TShape& oshape, mxnet::TShape *new_lshape, mxnet::TShape *new_rshape, mxnet::TShape *new_oshape) { if (lshape == rshape) return 0; - index_t odim = std::max(oshape.ndim(), broadcast::MAX_DIM); - *new_lshape = mxnet::TShape(odim); - *new_rshape = mxnet::TShape(odim); - *new_oshape = mxnet::TShape(odim); - index_t bl = oshape.ndim() - lshape.ndim(); - index_t br = oshape.ndim() - rshape.ndim(); - index_t j = 0, lprod = 1, rprod = 1, oprod = 1; - for (index_t i = 0; i < oshape.ndim(); ++i) { - index_t l = 1, r = 1, o = oshape[i]; + const int odim = std::max(oshape.ndim(), broadcast::MAX_DIM); + *new_lshape = mxnet::TShape(odim, 1); + *new_rshape = mxnet::TShape(odim, 1); + *new_oshape = mxnet::TShape(odim, 1); + int bl = oshape.ndim() - lshape.ndim(); + int br = oshape.ndim() - rshape.ndim(); + int j = 0, lprod = 1, rprod = 1, oprod = 1; + for (int i = 0; i < oshape.ndim(); ++i) { + int l = 1, r = 1, o = oshape[i]; if (i >= bl) l = lshape[i-bl]; if (i >= br) r = rshape[i-br]; if ((lprod != rprod || l != r) && @@ -176,9 +178,9 @@ inline int BinaryBroadcastShapeCompact(const mxnet::TShape& lshape, const mxnet: } if (j <= broadcast::MAX_DIM) { BROADCAST_NDIM_SWITCH(j, NDim, { - new_lshape->assign(&(*new_lshape)[0], &(*new_lshape)[NDim]); - new_rshape->assign(&(*new_rshape)[0], &(*new_rshape)[NDim]); - new_oshape->assign(&(*new_oshape)[0], &(*new_oshape)[NDim]); + new_lshape->assign(new_lshape->begin(), new_lshape->begin() + NDim); + new_rshape->assign(new_rshape->begin(), new_rshape->begin() + NDim); + new_oshape->assign(new_oshape->begin(), new_oshape->begin() + NDim); }); } else { LOG(FATAL) << "Too many broadcast dimensions with operands " << lshape << " " << rshape; diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index 19a9ac8359eb..5114a5d0dbe3 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -413,9 +413,9 @@ bool ReshapeLikeShapeCompute(const nnvm::NodeAttrs &attrs, GetReshapeLikeParams(param, lshape, rshape, &lhs_begin, &lhs_end, &rhs_begin, &rhs_end); - int lhsrank = static_cast(lshape.ndim()); + int lhsrank = lshape.ndim(); int orank = lhsrank + (rhs_end - rhs_begin) - (lhs_end - lhs_begin); - mxnet::TShape oshape(orank); + mxnet::TShape oshape(orank, -1); for (int i = 0; i < lhs_begin; ++i) oshape[i] = lshape[i]; @@ -436,7 +436,7 @@ bool ReshapeLikeShapeCompute(const nnvm::NodeAttrs &attrs, << "shape " << oshape << " because they have different " << "size."; SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); - return true; + return shape_is_known(oshape); } DMLC_REGISTER_PARAMETER(ReshapeLikeParam); @@ -537,7 +537,7 @@ Example:: mxnet::ShapeVector *out_attrs) { CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); - mxnet::TShape target_shape(1); + mxnet::TShape target_shape(1, -1); target_shape[0] = in_attrs->at(0).ndim(); SHAPE_ASSIGN_CHECK(*out_attrs, 0, target_shape); return !shape_is_none(out_attrs->at(0)); @@ -589,7 +589,7 @@ Example:: mxnet::ShapeVector *out_attrs) { CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); - SHAPE_ASSIGN_CHECK(*out_attrs, 0, 1U); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, 1)); return !shape_is_none(out_attrs->at(0)); }) .set_attr("FInferType", diff --git a/src/operator/tensor/histogram-inl.h b/src/operator/tensor/histogram-inl.h index 51d0bdb6c2b6..9cf9c490bba2 100644 --- a/src/operator/tensor/histogram-inl.h +++ b/src/operator/tensor/histogram-inl.h @@ -46,13 +46,13 @@ namespace op { struct HistogramParam : public dmlc::Parameter { dmlc::optional bin_cnt; - dmlc::optional> range; + dmlc::optional> range; DMLC_DECLARE_PARAMETER(HistogramParam) { DMLC_DECLARE_FIELD(bin_cnt) .set_default(dmlc::optional()) .describe("Number of bins for uniform case"); DMLC_DECLARE_FIELD(range) - .set_default(dmlc::optional>()) + .set_default(dmlc::optional>()) .describe("The lower and upper range of the bins. if not provided, " "range is simply (a.min(), a.max()). values outside the " "range are ignored. the first element of the range must be " @@ -101,7 +101,7 @@ inline bool HistogramOpShape(const nnvm::NodeAttrs& attrs, SHAPE_ASSIGN_CHECK(*out_attrs, 1, in_attrs->at(1)); } - return !shape_is_none(out_attrs->at(0)) && !shape_is_none(out_attrs->at(1)) && + return shape_is_known(out_attrs->at(0)) && shape_is_known(out_attrs->at(1)) && out_attrs->at(0).Size() == out_attrs->at(1).Size() - 1; } diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h index 8979531fef4e..6469aae17558 100644 --- a/src/operator/tensor/indexing_op.h +++ b/src/operator/tensor/indexing_op.h @@ -145,20 +145,20 @@ inline bool EmbeddingOpShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *out_attrs) { using namespace mshadow; const mxnet::TShape &dshape = (*in_attrs)[embedding::kData]; - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; const ParamType& param = nnvm::get(attrs.parsed); SHAPE_ASSIGN_CHECK(*in_attrs, embedding::kWeight, Shape2(param.input_dim, param.output_dim)); out_attrs->clear(); - mxnet::TShape oshape(dshape.ndim()+1); - for (size_t i = 0; i < dshape.ndim(); ++i) { + mxnet::TShape oshape(dshape.ndim()+1, -1); + for (int i = 0; i < dshape.ndim(); ++i) { oshape[i] = dshape[i]; } oshape[dshape.ndim()] = param.output_dim; out_attrs->push_back(oshape); - return true; + return shape_is_known(oshape); } template @@ -682,18 +682,18 @@ inline bool TakeOpShape(const nnvm::NodeAttrs& attrs, using namespace mshadow; const mxnet::TShape &arrshape = (*in_attrs)[take_::kArr]; const mxnet::TShape &idxshape = (*in_attrs)[take_::kIdx]; - if (idxshape.ndim() == 0U || idxshape.Size() == 0U) return false; + if (!shape_is_known(idxshape)) return false; const TakeParam& param = nnvm::get(attrs.parsed); if (param.mode == take_::kRaise) { LOG(FATAL) << "Raise is not supported for the time being..."; } - CHECK(param.axis >= -1 * (int)arrshape.ndim() && param.axis < (int)arrshape.ndim()) + CHECK(param.axis >= -1 * arrshape.ndim() && param.axis < arrshape.ndim()) << "Axis should be in the range of [-r, r-1] where r is the rank of input tensor"; out_attrs->clear(); const index_t actual_axis = param.axis + ((param.axis < 0) ? arrshape.ndim() : 0); - mxnet::TShape oshape(idxshape.ndim() + arrshape.ndim() - 1); + mxnet::TShape oshape(idxshape.ndim() + arrshape.ndim() - 1, -1); for (index_t i = 0; i < idxshape.ndim(); ++i) { oshape[i + actual_axis] = idxshape[i]; } @@ -705,7 +705,7 @@ inline bool TakeOpShape(const nnvm::NodeAttrs& attrs, } } out_attrs->push_back(oshape); - return true; + return shape_is_known(oshape); } inline bool TakeOpType(const nnvm::NodeAttrs& attrs, @@ -1170,6 +1170,7 @@ inline bool OneHotOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1U); // The shape of indices const mxnet::TShape& ishape = (*in_attrs)[0]; + if (!shape_is_known(ishape)) return false; int depth = 0; double on_value = 1.0; @@ -1177,13 +1178,13 @@ inline bool OneHotOpShape(const nnvm::NodeAttrs& attrs, int dtype = mshadow::kFloat32; GetOneHotParams(param, &depth, &on_value, &off_value, &dtype); - mxnet::TShape oshape(ishape.ndim() + 1); + mxnet::TShape oshape(ishape.ndim() + 1, -1); for (index_t i = 0; i < ishape.ndim(); ++i) { oshape[i] = ishape[i]; } oshape[oshape.ndim()-1] = depth; SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); - return true; + return shape_is_known(oshape); } inline bool OneHotOpType(const nnvm::NodeAttrs& attrs, @@ -1270,15 +1271,15 @@ inline bool GatherNDShape(const nnvm::NodeAttrs& attrs, CHECK_LE(ishape[0], 10) << "gather_nd supports indexing along at most 10 dimensions."; - mxnet::TShape oshape(ishape.ndim() - 1 + dshape.ndim() - ishape[0]); + mxnet::TShape oshape(ishape.ndim() - 1 + dshape.ndim() - ishape[0], -1); - for (size_t i = 0; i < ishape.ndim() - 1; ++i) oshape[i] = ishape[i+1]; + for (int i = 0; i < ishape.ndim() - 1; ++i) oshape[i] = ishape[i+1]; for (int i = 0; i < dshape.ndim() - ishape[0]; ++i) { oshape[ishape.ndim()-1+i] = dshape[ishape[0] + i]; } SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); - return true; + return shape_is_known(oshape); } inline bool GatherNDType(const nnvm::NodeAttrs& attrs, @@ -1370,7 +1371,7 @@ inline bool ScatterNDShape(const nnvm::NodeAttrs& attrs, bool valid = dshape.ndim() == ishape.ndim() - 1 + oshape.ndim() - ishape[0]; - for (size_t i = 0; i < ishape.ndim() - 1; ++i) { + for (int i = 0; i < ishape.ndim() - 1; ++i) { valid = valid && dshape[i] == ishape[i+1]; } for (int i = 0; i < oshape.ndim() - ishape[0]; ++i) { diff --git a/src/operator/tensor/init_op.h b/src/operator/tensor/init_op.h index fe1a1f62954a..3c4d34b3f4a5 100644 --- a/src/operator/tensor/init_op.h +++ b/src/operator/tensor/init_op.h @@ -49,7 +49,7 @@ struct InitOpParam : public dmlc::Parameter { int dtype; DMLC_DECLARE_PARAMETER(InitOpParam) { DMLC_DECLARE_FIELD(shape) - .set_default(mxnet::TShape()) + .set_default(mxnet::TShape(0, 1)) .describe("The shape of the output"); DMLC_DECLARE_FIELD(ctx) .set_default("") @@ -213,8 +213,8 @@ inline bool InitShape(const nnvm::NodeAttrs& attrs, const ParamType& param = nnvm::get(attrs.parsed); CHECK_EQ(in_attrs->size(), 0U); CHECK_EQ(out_attrs->size(), 1U); - if ((*out_attrs)[0].ndim() != 0 && param.shape.ndim() == 0) return true; - for (unsigned int i=0 ; i < param.shape.ndim() ; ++i) { + if (shape_is_known((*out_attrs)[0]) && !shape_is_known(param.shape)) return true; + for (int i=0 ; i < param.shape.ndim() ; ++i) { if (param.shape[i] < 0U) { LOG(FATAL) << "Shape cannot contain negative values " << param.shape; } diff --git a/src/operator/tensor/la_op.h b/src/operator/tensor/la_op.h index 5e18e0ef5a25..db4607fe9262 100644 --- a/src/operator/tensor/la_op.h +++ b/src/operator/tensor/la_op.h @@ -384,7 +384,7 @@ mshadow::Tensor LaOpFlatten(const TBlob& blob, } // Collapse ranges [0,axis-1] and [axis+1,ndim-2]. CHECK_EQ(dim, 4); - mxnet::TShape shape(dim); + mxnet::TShape shape(dim, -1); shape[0] = 1; for (int i = 0; i < axis; ++i) { shape[0] *= blob.shape_[i]; diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h index ba62d0e9def7..d61267faefe6 100644 --- a/src/operator/tensor/matrix_op-inl.h +++ b/src/operator/tensor/matrix_op-inl.h @@ -49,17 +49,17 @@ namespace op { struct ReshapeParam : public dmlc::Parameter { mxnet::TShape target_shape; bool keep_highest; - nnvm::Tuple shape; + mxnet::Tuple shape; bool reverse; DMLC_DECLARE_PARAMETER(ReshapeParam) { DMLC_DECLARE_FIELD(shape) - .set_default(nnvm::Tuple()) + .set_default(mxnet::Tuple()) .describe("The target shape"); DMLC_DECLARE_FIELD(reverse) .set_default(false) .describe("If true then the special values are inferred from right to left"); DMLC_DECLARE_FIELD(target_shape) - .set_default(mxnet::TShape()) + .set_default(mxnet::TShape(0)) .describe("(Deprecated! Use ``shape`` instead.) " "Target new shape. One and only one dim can be 0, " "in which case it will be inferred from the rest of dims"); @@ -71,11 +71,11 @@ struct ReshapeParam : public dmlc::Parameter { }; template -inline mxnet::TShape InferReshapeShape(const nnvm::Tuple& shape, - const mxnet::TShape& dshape, bool reverse) { +inline mxnet::TShape InferReshapeShape(const mxnet::Tuple& shape, + const mxnet::TShape& dshape, bool reverse) { std::vector dshape_vec; std::vector param_shape_vec(shape.begin(), shape.end()); - for (index_t i = 0; i < dshape.ndim(); ++i) { + for (int i = 0; i < dshape.ndim(); ++i) { dshape_vec.push_back(dshape[i]); } std::vector tmp; @@ -102,28 +102,31 @@ inline mxnet::TShape InferReshapeShape(const nnvm::Tuple& shape, } else if (proposed_dim == -2) { // copy all remaining dims from source while (src_idx < dshape_len) { - size_t dn = dshape_vec[src_idx++]; + const int dn = dshape_vec[src_idx++]; tmp.push_back(dn); } } else if (proposed_dim == -3) { // merge two dims from source CHECK_LT(src_idx, dshape_len-1); - size_t d1 = dshape_vec[src_idx++]; - size_t d2 = dshape_vec[src_idx++]; - size_t dn = d1 * d2; - tmp.push_back(dn); + const int d1 = dshape_vec[src_idx++]; + const int d2 = dshape_vec[src_idx++]; + if (d1 == -1 || d2 == -1) { + tmp.push_back(-1); + } else { + tmp.push_back(d1 * d2); + } } else if (proposed_dim == -4) { // split the source dim s into two dims // read the left dim and then the right dim (either can be -1) CHECK_LT(i + 2, params_len); CHECK_LT(src_idx, dshape_len); - size_t d0 = dshape_vec[src_idx++]; + const int d0 = dshape_vec[src_idx++]; IType d1 = param_shape_vec[++i]; IType d2 = param_shape_vec[++i]; CHECK(d1 != -1 || d2 != -1) << "Split dims cannot both be -1."; - if (d1 == -1) d1 = d0 / d2; - if (d2 == -1) d2 = d0 / d1; - CHECK(d1 * d2 == static_cast(d0) || static_cast(d0) == IType(0)) << + if (d1 == -1 && d0 >= 0) d1 = d0 / d2; // d0 must be known to do this + if (d2 == -1 && d0 >= 0) d2 = d0 / d1; // d0 must be known to do this + CHECK(d1 * d2 == static_cast(d0) || static_cast(d0) == IType(-1)) << "Split dims " << d1 << ", " << d2 << " do not divide original dim " << d0; tmp.push_back(d1); tmp.push_back(d2); @@ -135,12 +138,12 @@ inline mxnet::TShape InferReshapeShape(const nnvm::Tuple& shape, } if (inf_idx >= 0) { - if (dshape.Size() > 0) { + if (shape_is_known(dshape)) { IType new_size = 1; for (IType x : tmp) new_size *= x; tmp[inf_idx] = dshape.Size() / new_size; } else { - tmp[inf_idx] = 0; + tmp[inf_idx] = -1; } } if (reverse) { @@ -153,24 +156,24 @@ inline mxnet::TShape InferReshapeShape(const nnvm::Tuple& shape, } inline bool ReverseReshapeInferShape(mxnet::TShape *in, const mxnet::TShape& out) { - if (in->Size() && out.Size()) { + if (shape_is_known(*in) && shape_is_known(out)) { return true; - } else if (!out.Size()) { + } else if (!shape_is_known(out)) { return false; } else { int zero_axis = -1; - int non_zero_prod = 1; - for (index_t i = 0; i < in->ndim(); i++) { - if ((*in)[i] == 0) { + int known_dim_size_prod = 1; + for (int i = 0; i < in->ndim(); i++) { + if ((*in)[i] == -1) { if (zero_axis != -1) return false; // more than 1 zero found. else zero_axis = i; } else { - non_zero_prod *= (*in)[i]; + known_dim_size_prod *= (*in)[i]; } } - (*in)[zero_axis] = out.Size() / non_zero_prod; + (*in)[zero_axis] = out.Size() / known_dim_size_prod; return true; } } @@ -182,11 +185,11 @@ inline bool ReshapeShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]"; CHECK_EQ(out_attrs->size(), 1U); mxnet::TShape &dshape = (*in_attrs)[0]; - if (dshape.ndim() == 0) return false; + if (dshape.ndim() == -1) return false; mxnet::TShape oshape; if (param_.shape.ndim() != 0) { oshape = InferReshapeShape(param_.shape, dshape, param_.reverse); - } else if (param_.target_shape.ndim()) { + } else if (param_.target_shape.ndim() != -1) { LOG(INFO) << "Using target_shape will be deprecated."; oshape = param_.target_shape; int neg_count = 0; @@ -195,7 +198,7 @@ inline bool ReshapeShape(const nnvm::NodeAttrs& attrs, if (param_.keep_highest) { oshape[0] = dshape[0]; } - for (index_t i = start_idx; i < oshape.ndim(); ++i) { + for (int i = start_idx; i < oshape.ndim(); ++i) { if (oshape[i] == 0) { neg_count++; inf_idx = i; @@ -206,13 +209,15 @@ inline bool ReshapeShape(const nnvm::NodeAttrs& attrs, oshape[inf_idx] = dshape.Size() / oshape.Size(); } } else { - return (*out_attrs)[0].ndim() && ReverseReshapeInferShape(&(*in_attrs)[0], (*out_attrs)[0]); + return shape_is_known((*out_attrs)[0]) && ReverseReshapeInferShape(&(*in_attrs)[0], (*out_attrs)[0]); } ReverseReshapeInferShape(&dshape, oshape); +#if 0 CHECK_EQ(oshape.Size(), dshape.Size()) << "Target shape size is different to source. " << "Target: " << oshape << "\nSource: " << dshape; +#endif SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); return ReverseReshapeInferShape(&(*in_attrs)[0], (*out_attrs)[0]); } @@ -223,9 +228,9 @@ inline bool FlattenShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]"; CHECK_EQ(out_attrs->size(), 1U); const mxnet::TShape &dshape = (*in_attrs)[0]; - if (dshape.ndim() == 0) return false; - uint32_t target_dim = 1; - for (uint32_t i = 1; i < dshape.ndim(); ++i) { + if (!shape_is_known(dshape)) return false; + int target_dim = 1; + for (int i = 1; i < dshape.ndim(); ++i) { target_dim *= dshape[i]; } SHAPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::Shape2(dshape[0], target_dim)); @@ -313,9 +318,9 @@ void Transpose(const nnvm::NodeAttrs& attrs, const std::vector& outputs) { const TransposeParam& param = nnvm::get(attrs.parsed); CHECK_EQ(req[0], kWriteTo) << "Transpose does not support inplace"; - if (param.axes.ndim() == 0) { - mxnet::TShape axes = mxnet::TShape(inputs[0].ndim()); - for (index_t i = 0; i < axes.ndim(); ++i) { + if (param.axes.ndim() == -1) { + mxnet::TShape axes(inputs[0].ndim(), -1); + for (int i = 0; i < axes.ndim(); ++i) { axes[i] = axes.ndim() - 1 - i; } TransposeImpl(ctx.run_ctx, inputs[0], outputs[0], axes); @@ -332,20 +337,20 @@ inline bool TransposeShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1U); mxnet::TShape& shp = (*in_attrs)[0]; CHECK_LE(shp.ndim(), 6U) << "Transpose support at most 6 dimensions"; - mxnet::TShape ret(shp.ndim()); - if (param.axes.ndim() == 0) { - for (index_t i = 0; i < shp.ndim(); ++i) { + mxnet::TShape ret(shp.ndim(), -1); + if (param.axes.ndim() == -1) { + for (int i = 0; i < shp.ndim(); ++i) { ret[i] = shp[shp.ndim()-1-i]; } } else { CHECK_EQ(shp.ndim(), param.axes.ndim()); - for (size_t i = 0; i < shp.ndim(); ++i) { + for (int i = 0; i < shp.ndim(); ++i) { CHECK(param.axes[i] < static_cast(shp.ndim())); ret[i] = shp[param.axes[i]]; } } SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret); - return true; + return shape_is_known(ret); } @@ -366,7 +371,7 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs, const ExpandDimParam& param = nnvm::get(attrs.parsed); CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); - if (in_attrs->at(0).ndim() == 0U && out_attrs->at(0).ndim() == 0U) { + if (!shape_is_known(in_attrs->at(0)) && !shape_is_known(out_attrs->at(0))) { return false; } @@ -374,7 +379,7 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs, mxnet::TShape& oshape = (*out_attrs)[0]; int indim = ishape.ndim(); bool unknown_ishape = false; - if (0 == indim) { + if (-1 == indim) { indim = oshape.ndim() - 1; unknown_ishape = true; } @@ -386,27 +391,27 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs, CHECK(axis >= 0 && axis <= indim) << "axis must be in the range [" << -indim << ", " << indim << "] (" << param.axis << " provided)"; - mxnet::TShape ret(indim + 1); + mxnet::TShape ret(indim + 1, -1); for (int i = 0; i < axis; ++i) { - ret[i] = (unknown_ishape? 0 : ishape[i]); + ret[i] = (unknown_ishape? -1 : ishape[i]); } ret[axis] = 1; for (int i = axis+1; i < indim+1; ++i) { - ret[i] = (unknown_ishape? 0 : ishape[i-1]); + ret[i] = (unknown_ishape? -1 : ishape[i-1]); } SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret); - ret = mxnet::TShape(indim); + ret = mxnet::TShape(indim, -1); for (int i = 0; i < axis; ++i) ret[i] = oshape[i]; for (int i = axis+1; i < indim+1; ++i) ret[i-1] = oshape[i]; SHAPE_ASSIGN_CHECK(*in_attrs, 0, ret); - return true; + return shape_is_known(ret); } // Currently MKLDNN only supports step = 1 or step has no value inline bool SupportMKLDNNSlice(const SliceParam& param) { if (param.step.ndim() == 0U) return true; - for (uint32_t i = 0; i < param.step.ndim(); ++i) { + for (int i = 0; i < param.step.ndim(); ++i) { if (param.step[i].has_value() && param.step[i].value() != 1) return false; } @@ -589,9 +594,9 @@ void SliceCsrImpl(const SliceParam ¶m, const OpContext& ctx, const mxnet::TShape ishape = in.shape(); const mxnet::TShape oshape = out.shape(); - uint32_t N = ishape.ndim(); - mxnet::TShape begin(N), end(N); - for (uint32_t i = 0; i < N; ++i) { + int N = ishape.ndim(); + mxnet::TShape begin(N, -1), end(N, -1); + for (int i = 0; i < N; ++i) { int s = 0; if (param.begin[i]) { s = *param.begin[i]; @@ -634,9 +639,9 @@ void SliceEx(const nnvm::NodeAttrs& attrs, template inline void GetIndexRange(const mxnet::TShape& dshape, - const nnvm::Tuple>& param_begin, - const nnvm::Tuple>& param_end, - const nnvm::Tuple>& param_step, + const mxnet::Tuple>& param_begin, + const mxnet::Tuple>& param_end, + const mxnet::Tuple>& param_step, common::StaticArray* begin, common::StaticArray* end, common::StaticArray* step) { @@ -651,7 +656,7 @@ inline void GetIndexRange(const mxnet::TShape& dshape, << "Static array size=" << ndim << " is not equal to data shape ndim=" << dshape.ndim(); - if (param_step.ndim() != 0U) { + if (param_step.ndim() != 0) { CHECK_EQ(param_step.ndim(), param_begin.ndim()) << "step and begin must have the same length"; } @@ -723,7 +728,7 @@ inline bool SliceOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); const mxnet::TShape& dshape = (*in_attrs)[0]; - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; const SliceParam& param = nnvm::get(attrs.parsed); mxnet::TShape oshape = dshape; @@ -737,7 +742,7 @@ inline bool SliceOpShape(const nnvm::NodeAttrs& attrs, }); SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); - return !shape_is_none(dshape) && !shape_is_none(oshape); + return shape_is_known(oshape); } template @@ -943,7 +948,7 @@ inline bool SliceAssignOpShape(const nnvm::NodeAttrs& attrs, MXNET_NDIM_SWITCH(dshape.ndim(), ndim, { common::StaticArray begin, end, step; GetIndexRange(dshape, param.begin, param.end, param.step, &begin, &end, &step); - for (index_t i = 0; i < param.begin.ndim(); ++i) { + for (int i = 0; i < param.begin.ndim(); ++i) { const int b = begin[i], e = end[i], s = step[i]; SetSliceOpOutputDimSize(i, b, e, s, &vshape); } @@ -997,8 +1002,8 @@ void SliceAssignOpForward(const nnvm::NodeAttrs& attrs, struct SliceAssignScalarParam : public dmlc::Parameter { double scalar; - nnvm::Tuple> begin, end; - nnvm::Tuple> step; + mxnet::Tuple> begin, end; + mxnet::Tuple> step; DMLC_DECLARE_PARAMETER(SliceAssignScalarParam) { DMLC_DECLARE_FIELD(scalar) .set_default(0) @@ -1008,7 +1013,7 @@ struct SliceAssignScalarParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(end) .describe("ending indices for the slice operation, supports negative indices."); DMLC_DECLARE_FIELD(step) - .set_default(nnvm::Tuple>()) + .set_default(mxnet::Tuple>()) .describe("step for the slice operation, supports negative values."); } }; @@ -1019,7 +1024,7 @@ inline bool SliceAssignScalarOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); const mxnet::TShape& dshape = (*in_attrs)[0]; - if (dshape.ndim() == 0U || dshape.Size() == 0U) return false; + if (!shape_is_known(dshape)) return false; SHAPE_ASSIGN_CHECK(*out_attrs, 0, dshape); return true; } @@ -1156,8 +1161,8 @@ inline bool SliceAxisShape(const nnvm::NodeAttrs& attrs, int axis; index_t begin, end; GetSliceAxisParams(param, ishape, &axis, &begin, &end); - mxnet::TShape shape(ishape.ndim()); - for (index_t i = 0; i < ishape.ndim(); ++i) { + mxnet::TShape shape(ishape.ndim(), -1); + for (int i = 0; i < ishape.ndim(); ++i) { if (static_cast(i) == axis) { shape[i] = static_cast(end - begin); } else { @@ -1165,7 +1170,7 @@ inline bool SliceAxisShape(const nnvm::NodeAttrs& attrs, } } SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape); - return true; + return shape_is_known(shape); } @@ -1181,7 +1186,7 @@ void SliceAxis(const nnvm::NodeAttrs& attrs, int axis; index_t begin, end; GetSliceAxisParams(param, inputs[0].shape_, &axis, &begin, &end); - int ndim = static_cast(outputs[0].ndim()); + int ndim = outputs[0].ndim(); if (axis + 1 == ndim) { MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { @@ -1252,9 +1257,9 @@ void SliceAxisGrad_(const nnvm::NodeAttrs& attrs, } struct SliceLikeParam : public dmlc::Parameter { - mxnet::TShape axes; + mxnet::Tuple axes; DMLC_DECLARE_PARAMETER(SliceLikeParam) { - DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(axes).set_default(mxnet::Tuple()) .describe("List of axes on which input data will be sliced according to the " "corresponding size of the second input. By default will slice on " "all axes. Negative axes are supported."); @@ -1273,7 +1278,7 @@ inline bool SliceLikeShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(ishape.ndim(), from_shape.ndim()) << "By default slice_axis performs slice on all axes, but ndim mismatch " "for inputs: " << ishape.ndim() << " vs. " << from_shape.ndim(); - for (index_t i = 0; i < ishape.ndim(); ++i) { + for (int i = 0; i < ishape.ndim(); ++i) { CHECK_GE(ishape[i], from_shape[i]) << "Slice axis " << i << " with size " << from_shape[i] << "exceeds limit of input with size " << ishape[i]; @@ -1281,7 +1286,7 @@ inline bool SliceLikeShape(const nnvm::NodeAttrs& attrs, SHAPE_ASSIGN_CHECK(*out_attrs, 0, from_shape); } else { mxnet::TShape shape(ishape); - for (index_t i = 0; i < param.axes.ndim(); ++i) { + for (int i = 0; i < param.axes.ndim(); ++i) { int axis = static_cast(param.axes[i]); if (axis < 0) { axis += static_cast(ishape.ndim()); @@ -1304,21 +1309,21 @@ inline bool SliceLikeShape(const nnvm::NodeAttrs& attrs, inline void SliceLikeInferRanges(const mxnet::TShape& dshape, const mxnet::TShape& fshape, - const mxnet::TShape& axes, - nnvm::Tuple>* param_begin, - nnvm::Tuple>* param_end, - nnvm::Tuple>* param_step) { + const mxnet::Tuple& axes, + mxnet::Tuple>* param_begin, + mxnet::Tuple>* param_end, + mxnet::Tuple>* param_step) { std::vector> pb(dshape.ndim()); std::vector> pe(dshape.ndim()); std::vector> ps(dshape.ndim()); if (axes.ndim() == 0) { - for (index_t i = 0; i < dshape.ndim(); ++i) { + for (int i = 0; i < dshape.ndim(); ++i) { pb[i] = 0; pe[i] = fshape[i]; ps[i] = 1; } } else { - for (index_t i = 0; i < axes.ndim(); ++i) { + for (int i = 0; i < axes.ndim(); ++i) { int axis = static_cast(axes[i]); if (axis < 0) { axis += static_cast(dshape.ndim()); @@ -1334,9 +1339,9 @@ inline void SliceLikeInferRanges(const mxnet::TShape& dshape, ps[axis] = 1; } } - *param_begin = nnvm::Tuple>(pb.begin(), pb.end()); - *param_end = nnvm::Tuple>(pe.begin(), pe.end()); - *param_step = nnvm::Tuple>(ps.begin(), ps.end()); + *param_begin = mxnet::Tuple>(pb.begin(), pb.end()); + *param_end = mxnet::Tuple>(pe.begin(), pe.end()); + *param_step = mxnet::Tuple>(ps.begin(), ps.end()); } template @@ -1355,9 +1360,9 @@ void SliceLikeForward(const nnvm::NodeAttrs& attrs, const TBlob& out = outputs[0]; const mxnet::TShape& ishape = data.shape_; const mxnet::TShape& from_shape = inputs[1].shape_; - nnvm::Tuple> param_begin; - nnvm::Tuple> param_end; - nnvm::Tuple> param_step; + mxnet::Tuple> param_begin; + mxnet::Tuple> param_end; + mxnet::Tuple> param_step; SliceLikeInferRanges(ishape, from_shape, param.axes, ¶m_begin, ¶m_end, ¶m_step); MXNET_NDIM_SWITCH(data.ndim(), ndim, { @@ -1403,9 +1408,9 @@ void SliceLikeBackward(const nnvm::NodeAttrs& attrs, const mxnet::TShape& ishape = ograd.shape_; const mxnet::TShape& from_shape = outputs[1].shape_; - nnvm::Tuple> param_begin; - nnvm::Tuple> param_end; - nnvm::Tuple> param_step; + mxnet::Tuple> param_begin; + mxnet::Tuple> param_end; + mxnet::Tuple> param_step; SliceLikeInferRanges(ishape, from_shape, param.axes, ¶m_begin, ¶m_end, ¶m_step); MXNET_NDIM_SWITCH(ograd.ndim(), ndim, { @@ -1546,7 +1551,7 @@ inline void GetRepeatParams(const RepeatParam& param, const mxnet::TShape& ishap CHECK_GE(*repeats, 0) << "repeats cannot be a negative number"; *axisOpt = param.axis; if (static_cast(*axisOpt)) { - int ndims = static_cast(ishape.ndim()); + int ndims = ishape.ndim(); int axis = axisOpt->value(); if (axis < 0) { axis += ndims; @@ -1565,34 +1570,33 @@ inline bool RepeatOpShape(const nnvm::NodeAttrs& attrs, int repeats = 0; dmlc::optional axisOpt; GetRepeatParams(param, ishape, &repeats, &axisOpt); - // If 0 repeats, return an empty 0 dim array + // If 0 repeats, return an empty 1-dim, 0-size array if (0 == repeats) { - SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape()); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, 0)); return true; } // If repeats > 0, multiply the size of the corresponding axis by repeats if (static_cast(axisOpt)) { - int ndims = static_cast(ishape.ndim()); + int ndims = ishape.ndim(); int axis = axisOpt.value(); if (axis < 0) { axis += ndims; } - mxnet::TShape shape(ishape.ndim()); - for (index_t i = 0; i < ishape.ndim(); ++i) { - if (static_cast(i) == axis) { - shape[i] = static_cast(repeats) * ishape[i]; + mxnet::TShape shape(ishape.ndim(), -1); + for (int i = 0; i < ishape.ndim(); ++i) { + if (i == axis) { + shape[i] = repeats * ishape[i]; } else { shape[i] = ishape[i]; } } SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape); } else { // If axis is not input by user, return a flat 1D array of size = in.size*repeats - mxnet::TShape shape(1); - shape[0] = ishape.Size() * static_cast(repeats); + mxnet::TShape shape(1, ishape.Size() * repeats); SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape); } - return true; + return shape_is_known(out_attrs->at(0)); } inline bool RepeatOpType(const nnvm::NodeAttrs& attrs, @@ -1620,16 +1624,16 @@ inline std::pair ReshapeInputOutputForRepeatOp( const int repeats) { if (static_cast(axisOpt)) { int axis = axisOpt.value(); - int ndim = static_cast(ishape.ndim()); + int ndim = ishape.ndim(); if (axis < 0) { axis += ndim; } - CHECK(axis >= 0 && axis < static_cast(ishape.ndim())) << "Invalid input of axis"; + CHECK(axis >= 0 && axis < ishape.ndim()) << "Invalid input of axis"; // reshape the input tensor by adding a dim at the (axis+1)-th dim - mxnet::TShape rshape(ishape.ndim()+1); + mxnet::TShape rshape(ishape.ndim()+1, 1); // the shape we want to broadcast to - mxnet::TShape bshape(rshape.ndim()); + mxnet::TShape bshape(rshape.ndim(), 1); int i = 0; while (i <= axis) { rshape[i] = bshape[i] = ishape[i]; @@ -1637,7 +1641,7 @@ inline std::pair ReshapeInputOutputForRepeatOp( } rshape[i] = 1; bshape[i] = repeats; - while (i < static_cast(ishape.ndim())) { + while (i < ishape.ndim()) { rshape[i+1] = ishape[i]; bshape[i+1] = ishape[i]; ++i; @@ -1648,11 +1652,11 @@ inline std::pair ReshapeInputOutputForRepeatOp( // reshape the tensor into shape (ishape.Size(), 1) // then add one dim at axis = 1 and broadcast to // shape (ishape.Size(), repeats) - mxnet::TShape rshape(2); + mxnet::TShape rshape(2, 1); rshape[0] = ishape.Size(); rshape[1] = 1; - mxnet::TShape bshape(2); + mxnet::TShape bshape(2, 1); bshape[0] = rshape[0]; bshape[1] = repeats; return std::make_pair(rshape, bshape); @@ -1667,7 +1671,7 @@ void RepeatOpForward(const nnvm::NodeAttrs& attrs, const std::vector& outputs) { const TBlob& iTBlob = inputs[0]; const mxnet::TShape& ishape = iTBlob.shape_; - if (ishape.ndim() == 0) return; + if (!shape_is_known(ishape)) return; int repeats = 0; dmlc::optional axisOpt; @@ -1711,7 +1715,7 @@ void RepeatOpBackward(const nnvm::NodeAttrs& attrs, CHECK_EQ(outputs.size(), 1U); const mxnet::TShape& oshape = outputs[0].shape_; - if (oshape.ndim() == 0) return; + if (!shape_is_known(oshape)) return; int repeats = 0; dmlc::optional axisOpt; @@ -1737,7 +1741,7 @@ void RepeatOpBackward(const nnvm::NodeAttrs& attrs, } struct TileParam : public dmlc::Parameter { - mxnet::TShape reps; + mxnet::Tuple reps; DMLC_DECLARE_PARAMETER(TileParam) { DMLC_DECLARE_FIELD(reps) .describe("The number of times for repeating the tensor a. Each dim size of reps" @@ -1755,19 +1759,22 @@ inline bool TileOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1U); const TileParam& param = nnvm::get(attrs.parsed); const mxnet::TShape& ishape = (*in_attrs)[0]; - const mxnet::TShape& reps = param.reps; + if (!shape_is_known(ishape)) { + return false; + } + const mxnet::Tuple& reps = param.reps; // If reps is empty, return a identical input array - if (reps.ndim() == 0 || ishape.ndim() == 0) { + if (reps.ndim() == 0) { SHAPE_ASSIGN_CHECK(*out_attrs, 0, ishape); return true; } - for (size_t i = 0; i < reps.ndim(); ++i) { + for (int i = 0; i < reps.ndim(); ++i) { CHECK_GT(reps[i], 0) << "invalid reps=" << i << ", dim size must be greater than zero"; } - mxnet::TShape oshape(std::max(ishape.ndim(), reps.ndim())); - int i1 = static_cast(ishape.ndim()) - 1; - int i2 = static_cast(reps.ndim()) - 1; - for (int i = static_cast(oshape.ndim()) - 1; i >= 0; --i) { + mxnet::TShape oshape(std::max(ishape.ndim(), reps.ndim()), -1); + int i1 = ishape.ndim() - 1; + int i2 = reps.ndim() - 1; + for (int i = oshape.ndim() - 1; i >= 0; --i) { if (i1 >= 0 && i2 >= 0) { oshape[i] = ishape[i1--] * reps[i2--]; } else if (i1 >= 0) { @@ -1777,7 +1784,7 @@ inline bool TileOpShape(const nnvm::NodeAttrs& attrs, } } SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); - return true; + return shape_is_known(oshape); } inline bool TileOpType(const nnvm::NodeAttrs& attrs, @@ -1801,20 +1808,20 @@ inline bool TileOpType(const nnvm::NodeAttrs& attrs, */ inline std::pair ReshapeInputOutputForTileOp( const mxnet::TShape& ishape, - const mxnet::TShape& reps) { + const mxnet::Tuple& reps) { if (ishape.ndim() == 0 || reps.ndim() == 0) { return std::make_pair(ishape, ishape); } // The shape we want to broadcast to - mxnet::TShape bshape(std::max(ishape.ndim(), reps.ndim()) * 2); + mxnet::TShape bshape(std::max(ishape.ndim(), reps.ndim()) * 2, 1); // The shape of the input tensor after adding new axes before each dim - mxnet::TShape rshape(bshape.ndim()); + mxnet::TShape rshape(bshape.ndim(), 1); - int i1 = static_cast(ishape.ndim()) - 1; - int i2 = static_cast(reps.ndim()) - 1; - for (int i = static_cast(bshape.ndim()) - 1; i >= 0; --i) { + int i1 = ishape.ndim() - 1; + int i2 = reps.ndim() - 1; + for (int i = bshape.ndim() - 1; i >= 0; --i) { if (0 == (i & 1)) { bshape[i] = (i2 >= 0? reps[i2--] : 1); rshape[i] = 1; @@ -1854,10 +1861,10 @@ void TileOpForward(const nnvm::NodeAttrs& attrs, if (inputs[0].Size() == 0) return; const mxnet::TShape& ishape = inputs[0].shape_; - const mxnet::TShape& reps = nnvm::get(attrs.parsed).reps; + const mxnet::Tuple& reps = nnvm::get(attrs.parsed).reps; // If any one of the number in reps is zero, return immediately - for (index_t i = 0; i < reps.ndim(); ++i) { + for (int i = 0; i < reps.ndim(); ++i) { if (0 == reps[i]) return; } @@ -1896,10 +1903,10 @@ void TileOpBackward(const nnvm::NodeAttrs& attrs, if (inputs[0].Size() == 0) return; const mxnet::TShape& oshape = outputs[0].shape_; - const mxnet::TShape& reps = nnvm::get(attrs.parsed).reps; + const mxnet::Tuple& reps = nnvm::get(attrs.parsed).reps; // If any one of the number in reps is zero, return immediately - for (index_t i = 0; i < reps.ndim(); ++i) { + for (int i = 0; i < reps.ndim(); ++i) { if (0 == reps[i]) return; } @@ -1919,7 +1926,7 @@ void TileOpBackward(const nnvm::NodeAttrs& attrs, } struct ReverseParam : public dmlc::Parameter { - nnvm::Tuple axis; + mxnet::Tuple axis; DMLC_DECLARE_PARAMETER(ReverseParam) { DMLC_DECLARE_FIELD(axis) .describe("The axis which to reverse elements."); @@ -1990,10 +1997,10 @@ void ReverseOpForward(const nnvm::NodeAttrs& attrs, std::vector trailing_(param.axis.ndim()); index_t reverse_index = 0; for (int axis : param.axis) { - CHECK_LT(axis, static_cast(ishape.ndim())); + CHECK_LT(axis, ishape.ndim()); stride_[reverse_index] = ishape[axis]; trailing_[reverse_index] = 1; - for (index_t i2 = axis + 1; i2 < ishape.ndim(); ++i2) { + for (int i2 = axis + 1; i2 < ishape.ndim(); ++i2) { trailing_[reverse_index] *= ishape[i2]; } reverse_index++; @@ -2054,9 +2061,9 @@ inline bool StackOpShape(const nnvm::NodeAttrs& attrs, for (const mxnet::TShape& i : (*in_attrs)) { shape_assign(&dshape, i); } - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; - mxnet::TShape oshape(dshape.ndim() + 1); + mxnet::TShape oshape(dshape.ndim() + 1, -1); int axis = CheckAxis(param.axis, oshape.ndim()); for (int i = 0; i < axis; ++i) { oshape[i] = dshape[i]; @@ -2067,7 +2074,7 @@ inline bool StackOpShape(const nnvm::NodeAttrs& attrs, } SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); - return true; + return shape_is_known(oshape); } @@ -2140,10 +2147,10 @@ void StackOpBackward(const nnvm::NodeAttrs& attrs, } struct SqueezeParam : public dmlc::Parameter { - dmlc::optional axis; + dmlc::optional> axis; DMLC_DECLARE_PARAMETER(SqueezeParam) { DMLC_DECLARE_FIELD(axis) - .set_default(dmlc::optional()) + .set_default(dmlc::optional>()) .describe("Selects a subset of the single-dimensional entries in the shape." " If an axis is selected with shape entry greater than one, an error is raised."); } @@ -2156,7 +2163,7 @@ struct SqueezeParam : public dmlc::Parameter { inline size_t SqueezeShapeHelper(mxnet::TShape* shape) { CHECK(shape != nullptr); size_t count = 0; - for (size_t i = 0; i < shape->ndim(); ++i) { + for (int i = 0; i < shape->ndim(); ++i) { if ((*shape)[i] == 0) { ++count; } else { @@ -2174,12 +2181,12 @@ inline bool SqueezeShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1U); const mxnet::TShape& dshape = in_attrs->at(0); const int dndim = dshape.ndim(); - if (shape_is_none(dshape)) return false; + if (!shape_is_known(dshape)) return false; mxnet::TShape oshape = dshape; if (param.axis.has_value()) { // preprocess axis - mxnet::TShape axes = param.axis.value(); - for (size_t i = 0; i < axes.ndim(); ++i) { + mxnet::Tuple axes = param.axis.value(); + for (int i = 0; i < axes.ndim(); ++i) { if (axes[i] < 0) { axes[i] += dndim; CHECK_GE(axes[i], 0) @@ -2194,7 +2201,7 @@ inline bool SqueezeShape(const nnvm::NodeAttrs& attrs, oshape[axes[i]] = 0; } } else { - for (size_t i = 0; i < oshape.ndim(); ++i) { + for (int i = 0; i < oshape.ndim(); ++i) { if (oshape[i] == 1) oshape[i] = 0; } } @@ -2223,7 +2230,7 @@ inline bool DepthToSpaceOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1U); CHECK_EQ(in_attrs->at(0).ndim(), 4) << "Operation Depth To Space requires exactly 4D tensor"; - mxnet::TShape expected_out(4); + mxnet::TShape expected_out(4, -1); mxnet::TShape& in_shape = in_attrs->at(0); int block = param.block_size; @@ -2241,14 +2248,14 @@ inline bool DepthToSpaceOpShape(const nnvm::NodeAttrs& attrs, expected_out[0] = in_shape[0]; expected_out[1] = in_shape[1] / (block * block); - size_t i = 2; + int i = 2; while (i < expected_out.ndim()) { expected_out[i] = in_shape[i] * block; ++i; } SHAPE_ASSIGN_CHECK(*out_attrs, 0, expected_out); - return true; + return shape_is_known(expected_out); } inline bool DepthToSpaceOpType(const nnvm::NodeAttrs& attrs, @@ -2387,7 +2394,7 @@ inline bool SpaceToDepthOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1U); CHECK_EQ(in_attrs->at(0).ndim(), 4) << "Operation Space To Depth requires exactly 4D tensor"; - mxnet::TShape expected_out(in_attrs->at(0).ndim()); + mxnet::TShape expected_out(in_attrs->at(0).ndim(), -1); mxnet::TShape& in_shape = in_attrs->at(0); int block = param.block_size; @@ -2408,14 +2415,14 @@ inline bool SpaceToDepthOpShape(const nnvm::NodeAttrs& attrs, expected_out[0] = in_shape[0]; expected_out[1] = in_shape[1] * block * block; - uint32_t i = 2; + int i = 2; while (i < expected_out.ndim()) { expected_out[i] = in_shape[i] / block; ++i; } SHAPE_ASSIGN_CHECK(*out_attrs, 0, expected_out); - return true; + return shape_is_known(expected_out); } inline bool SpaceToDepthOpType(const nnvm::NodeAttrs& attrs, @@ -2556,7 +2563,7 @@ struct SplitParam : public dmlc::Parameter { }; // struct SplitParam inline mxnet::TShape GetSplitIndices(const mxnet::TShape& ishape, int axis, int sections) { - mxnet::TShape indices(sections+1); + mxnet::TShape indices(sections+1, -1); indices[0] = 0; int64_t section_size = ishape[axis] / sections; for (int i = 0; i < sections; ++i) { @@ -2588,7 +2595,7 @@ inline bool SplitOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); mxnet::TShape dshape = in_attrs->at(split_enum::kData); mxnet::TShape ishape = in_attrs->at(split_enum::kData); - if (dshape.ndim() == 0) return false; + if (!shape_is_known(dshape)) return false; if (param.axis >= 0) { CHECK_LT(static_cast(param.axis), dshape.ndim()); } else { @@ -2603,7 +2610,7 @@ inline bool SplitOpShape(const nnvm::NodeAttrs& attrs, int num_outputs = (param.sections > 0) ? indices.ndim() - 1 : indices.ndim(); // Pre-compute squeezed output shape for future usage mxnet::TShape squeezed_dshape = dshape; - for (int d = real_axis; d < static_cast(squeezed_dshape.ndim()) - 1; ++d) { + for (int d = real_axis; d < squeezed_dshape.ndim() - 1; ++d) { squeezed_dshape[d] = squeezed_dshape[d+1]; } squeezed_dshape = mxnet::TShape(&squeezed_dshape[0], &squeezed_dshape[squeezed_dshape.ndim()-1]); @@ -2635,7 +2642,7 @@ inline bool SplitOpShape(const nnvm::NodeAttrs& attrs, back_calculate_dshape[real_axis] += (*out_attrs)[i][real_axis]; } } - for (int d = real_axis + 1; d < static_cast(ishape.ndim()); ++d) { + for (int d = real_axis + 1; d < ishape.ndim(); ++d) { if (param.squeeze_axis) { back_calculate_dshape[d] = (*out_attrs)[0][d - 1]; } else { diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc index 1431fef13594..b80c9a54510f 100644 --- a/src/operator/tensor/matrix_op.cc +++ b/src/operator/tensor/matrix_op.cc @@ -410,8 +410,8 @@ Examples:: "transpose", n, ograds, {}, std::unordered_map()); } else { - mxnet::TShape axes = mxnet::TShape(param.axes.ndim()); - for (index_t i = 0; i < axes.ndim(); ++i) { + mxnet::TShape axes = mxnet::TShape(param.axes.ndim(), -1); + for (int i = 0; i < axes.ndim(); ++i) { axes[param.axes[i]] = i; } std::ostringstream os; diff --git a/src/operator/tensor/ordering_op-inl.h b/src/operator/tensor/ordering_op-inl.h index 5a95e05ffb65..1dda90104205 100644 --- a/src/operator/tensor/ordering_op-inl.h +++ b/src/operator/tensor/ordering_op-inl.h @@ -149,7 +149,7 @@ inline void ParseTopKParam(const mxnet::TShape& src_shape, const TopKParam& para << src_shape.ndim() << ", found axis=" << *axis; *batch_size = src_shape.Size() / src_shape[*axis]; *element_num = src_shape[*axis]; - if (*axis != static_cast(src_shape.ndim()) - 1) { + if (*axis != src_shape.ndim() - 1) { *do_transpose = true; } } diff --git a/src/operator/tensor/slice-inl.h b/src/operator/tensor/slice-inl.h index 4e94cbeda46c..78a2bd8c7b45 100644 --- a/src/operator/tensor/slice-inl.h +++ b/src/operator/tensor/slice-inl.h @@ -34,15 +34,15 @@ namespace mxnet { namespace op { struct SliceParam : public dmlc::Parameter { - nnvm::Tuple> begin, end; - nnvm::Tuple> step; + mxnet::Tuple> begin, end; + mxnet::Tuple> step; DMLC_DECLARE_PARAMETER(SliceParam) { DMLC_DECLARE_FIELD(begin) .describe("starting indices for the slice operation, supports negative indices."); DMLC_DECLARE_FIELD(end) .describe("ending indices for the slice operation, supports negative indices."); DMLC_DECLARE_FIELD(step) - .set_default(nnvm::Tuple>()) + .set_default(mxnet::Tuple>()) .describe("step for the slice operation, supports negative values."); } bool operator==(const SliceParam& other) const { diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 59d72d4b18b6..1f8fc993b3fc 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -2265,7 +2265,7 @@ def test_reshape_new(src_shape, shape_args, reverse, dst_shape): for i in range(len(src_shape)): holdout_src_shape = list(src_shape) - holdout_src_shape[i] = 0 + holdout_src_shape[i] = -1 holdout_src_shape = tuple(holdout_src_shape) net = mx.sym.Variable('data') net = mx.sym.elemwise_add(net.reshape(shape_args, reverse=reverse), mx.sym.ones(shape=dst_shape)) From 3449ef8ae9ca1bb7505185926a9ec20ad8cdcbf0 Mon Sep 17 00:00:00 2001 From: reminisce Date: Fri, 22 Mar 2019 13:20:02 -0700 Subject: [PATCH 04/32] [numpy] Fix unit tests after introducing numpy compatible shapes (#14487) * Fix infer shape rnn * Fix boolean mask and custom op unit tests * Fix multi proposal * Fix diag * Add global switch for backward compatibility and fix infer shape bugs * Fix slice op infer shape * Fix rnn infer shape * Add util funcs for ndim_is_known and dim_size_is_known * Revert rnn_cell.py --- include/mxnet/c_api.h | 15 +++- include/mxnet/imperative.h | 16 ++++ include/mxnet/tuple.h | 28 +++++- python/mxnet/ndarray/ndarray.py | 5 +- python/mxnet/numpy/__init__.py | 46 ++++++++++ python/mxnet/operator.py | 26 +++--- python/mxnet/symbol/symbol.py | 22 +++-- src/c_api/c_api.cc | 24 ++++-- src/c_api/c_api_common.h | 2 +- src/c_api/c_api_executor.cc | 7 ++ src/c_api/c_api_ndarray.cc | 12 +++ src/c_api/c_api_symbolic.cc | 9 +- src/common/utils.h | 54 ++++++++++++ src/executor/infer_graph_attr_pass.cc | 18 +++- src/imperative/imperative.cc | 4 +- src/imperative/imperative_utils.h | 19 ++++- src/operator/batch_norm_v1-inl.h | 2 +- src/operator/contrib/multi_proposal-inl.h | 2 +- src/operator/control_flow.cc | 6 +- src/operator/convolution_v1-inl.h | 2 +- src/operator/custom/custom.cc | 18 ++-- src/operator/nn/batch_norm.cc | 2 +- src/operator/nn/concat.cc | 25 +++--- src/operator/nn/convolution.cc | 85 +++++++++++-------- src/operator/nn/cudnn/cudnn_batch_norm.cc | 2 +- src/operator/nn/dropout.cc | 2 +- src/operator/nn/fully_connected.cc | 2 +- src/operator/nn/layer_norm.cc | 2 +- src/operator/nn/pooling.cc | 2 +- src/operator/operator_common.h | 6 +- src/operator/pooling_v1-inl.h | 2 +- src/operator/quantization/quantized_concat.cc | 2 +- src/operator/random/unique_sample_op.h | 2 +- src/operator/slice_channel-inl.h | 15 ++-- src/operator/softmax_output-inl.h | 2 +- src/operator/softmax_output.cc | 4 +- src/operator/svm_output-inl.h | 2 +- src/operator/tensor/broadcast_reduce_op.h | 8 +- src/operator/tensor/diag_op-inl.h | 4 +- .../tensor/elemwise_binary_broadcast_op.h | 2 +- src/operator/tensor/init_op.h | 14 +-- src/operator/tensor/matrix_op-inl.h | 24 +++--- tests/python/unittest/test_operator.py | 8 +- 43 files changed, 402 insertions(+), 152 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index cdc20b1e3970..102e4b2c14a2 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -182,7 +182,7 @@ typedef int (*CustomOpFBFunc)(int /*size*/, void** /*ptrs*/, int* /*tags*/, typedef int (*CustomOpDelFunc)(void* /*state*/); typedef int (*CustomOpListFunc)(char*** /*args*/, void* /*state*/); typedef int (*CustomOpInferShapeFunc)(int /*num_input*/, int* /*ndims*/, - unsigned** /*shapes*/, void* /*state*/); + int** /*shapes*/, void* /*state*/); typedef int (*CustomOpInferStorageTypeFunc)(int /*num_input*/, int* /*stypes*/, void* /*state*/); typedef int (*CustomOpBackwardInferStorageTypeFunc)(int /*num_input*/, int * /*stypes*/, @@ -1048,6 +1048,19 @@ MXNET_DLL int MXAutogradIsRecording(bool* curr); * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXAutogradIsTraining(bool* curr); +/*! + * \brief get whether numpy compatibility is on + * \param curr returns the current status + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXIsNumpyCompatible(bool* curr); +/*! + * \brief set numpy compatibility switch + * \param is_np_comp 1 when numpy compatibility is on, 0 when off + * \param prev returns the previous status before this set + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXSetIsNumpyCompatible(int is_np_comp, int* prev); /*! * \brief mark NDArrays as variables to compute gradient for autograd * \param num_var number of variable NDArrays diff --git a/include/mxnet/imperative.h b/include/mxnet/imperative.h index 52cedb2fadd9..ad209913ac53 100644 --- a/include/mxnet/imperative.h +++ b/include/mxnet/imperative.h @@ -97,6 +97,16 @@ class Imperative { is_recording_ = is_recording; return old; } + /*! brief whether numpy compatibility is on. */ + bool is_np_comp() const { + return is_np_comp_; + } + /*! brief turn on or turn off numpy compatibility switch. */ + bool set_is_np_comp(bool is_np_comp) { + bool old = is_np_comp_; + is_np_comp_ = is_np_comp; + return old; + } /*! \brief to record operator, return corresponding node. */ void RecordOp(nnvm::NodeAttrs&& attrs, const std::vector& inputs, @@ -165,9 +175,15 @@ class Imperative { #if DMLC_CXX11_THREAD_LOCAL static thread_local bool is_train_; static thread_local bool is_recording_; + // TOOD(junwu): Added numpy compatibility switch for backward compatibility. + // Delete it in the next major release. + static thread_local bool is_np_comp_; #else static MX_THREAD_LOCAL bool is_train_; static MX_THREAD_LOCAL bool is_recording_; + // TOOD(junwu): Added numpy compatibility switch for backward compatibility. + // Delete it in the next major release. + static MX_THREAD_LOCAL bool is_np_comp_; #endif /*! \brief node count used for naming */ std::atomic node_count_{0}; diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h index 49852f73fbac..d83e843033e3 100644 --- a/include/mxnet/tuple.h +++ b/include/mxnet/tuple.h @@ -607,12 +607,36 @@ class TShape : public Tuple { #endif }; +/*! brief check if a shape's ndim is known. */ +inline bool ndim_is_known(const int ndim) { + CHECK_GE(ndim, -1) << "shape ndim must be >= -1, while received " << ndim; + return ndim != -1; +} + +/*! brief check if a shape's ndim is known. */ +inline bool ndim_is_known(const TShape& x) { + return ndim_is_known(x.ndim()); +} + +/*! brief check if a shape's dim size is known. */ +inline bool dim_size_is_known(const int dim_size) { + CHECK_GE(dim_size, -1) << "shape dim size must be >= -1, while received " << dim_size; + return dim_size != -1; +} + +/*! brief check if a shape's dim size is known. */ +inline bool dim_size_is_known(const TShape& x, const int idx) { + CHECK(idx >= 0 && idx < x.ndim()) + << "idx = " << idx << " exceeds shape dimension range [0, " << x.ndim() << ")"; + return dim_size_is_known(x[idx]); +} + /*! brief check if shape is known using the NumPy compatible definition. * zero-dim and zero-size tensors are valid. -1 means unknown.*/ inline bool shape_is_known(const TShape& x) { - if (x.ndim() == -1) return false; + if (!ndim_is_known(x)) return false; for (int i = 0; i < x.ndim(); ++i) { - if (x[i] == -1) return false; + if (!dim_size_is_known(x, i)) return false; } return true; } diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py index 1200c4a72c00..44334fc7f43e 100644 --- a/python/mxnet/ndarray/ndarray.py +++ b/python/mxnet/ndarray/ndarray.py @@ -1849,7 +1849,10 @@ def shape(self): pdata = ctypes.POINTER(mx_int)() check_call(_LIB.MXNDArrayGetShape( self.handle, ctypes.byref(ndim), ctypes.byref(pdata))) - return tuple(pdata[:ndim.value]) # pylint: disable=invalid-slice-index + if ndim.value == -1: + return None + else: + return tuple(pdata[:ndim.value]) # pylint: disable=invalid-slice-index @property diff --git a/python/mxnet/numpy/__init__.py b/python/mxnet/numpy/__init__.py index b1139a05791d..e0dfda10113e 100644 --- a/python/mxnet/numpy/__init__.py +++ b/python/mxnet/numpy/__init__.py @@ -17,4 +17,50 @@ # specific language governing permissions and limitations # under the License. +import ctypes +from ..base import _LIB, check_call + __all__ = [] + + +def set_np_comp(is_np_comp): + prev = ctypes.c_int() + check_call(_LIB.MXSetIsNumpyCompatible(ctypes.c_int(is_np_comp), ctypes.byref(prev))) + return bool(prev.value) + + +def is_np_comp(): + curr = ctypes.c_bool() + check_call(_LIB.MXIsNumpyCompatible(ctypes.byref(curr))) + return curr.value + + +class _NumpyCompatibilityStateScope(object): + """Scope for managing numpy compatibility state. + + Example:: + + with _NumpyCompatibilityStateScope(True): + y = model(x) + backward([y]) + + """ + def __init__(self, is_np_comp): #pylint: disable=redefined-outer-name + self._enter_is_np_comp = is_np_comp + self._prev_is_np_comp = None + + def __enter__(self): + if self._enter_is_np_comp is not None: + self._prev_is_np_comp = set_np_comp(self._enter_is_np_comp) + + def __exit__(self, ptype, value, trace): + if self._enter_is_np_comp is not None and self._prev_is_np_comp != self._enter_is_np_comp: + set_np_comp(self._prev_is_np_comp) + + +def enable_np_comp(): + return _NumpyCompatibilityStateScope(True) + + +def disable_np_comp(): + return _NumpyCompatibilityStateScope(False) diff --git a/python/mxnet/operator.py b/python/mxnet/operator.py index e8fa571d44db..2c69b9b46521 100644 --- a/python/mxnet/operator.py +++ b/python/mxnet/operator.py @@ -28,7 +28,7 @@ from ctypes import CFUNCTYPE, POINTER, Structure, pointer from ctypes import c_void_p, c_int, c_char, c_char_p, cast, c_bool -from .base import _LIB, check_call, MXCallbackList, c_array, c_array_buf +from .base import _LIB, check_call, MXCallbackList, c_array, c_array_buf, mx_int from .base import c_str, mx_uint, mx_float, ctypes2numpy_shared, NDArrayHandle, py_str from . import symbol, context from .ndarray import NDArray, _DTYPE_NP_TO_MX, _DTYPE_MX_TO_NP @@ -164,7 +164,7 @@ def get_symbol(self, *args, **kwargs): fb_functype = CFUNCTYPE(None, c_int, POINTER(POINTER(mx_float)), POINTER(c_int), POINTER(POINTER(mx_uint)), POINTER(c_int), c_void_p) infer_functype = CFUNCTYPE(None, c_int, POINTER(c_int), - POINTER(POINTER(mx_uint)), c_void_p) + POINTER(POINTER(mx_int)), c_void_p) list_functype = CFUNCTYPE(None, POINTER(POINTER(POINTER(c_char))), c_void_p) class NumpyOpInfo(Structure): """Structure that holds Callback information. Passed to NumpyOpProp""" @@ -214,9 +214,9 @@ def infer_shape_entry(num_tensor, tensor_dims, assert len(ishape) == n_in rshape = list(ishape) + list(oshape) for i in range(n_in+n_out): - tensor_shapes[i] = cast(c_array_buf(mx_uint, - array('I', rshape[i])), - POINTER(mx_uint)) + tensor_shapes[i] = cast(c_array_buf(mx_int, + array('i', rshape[i])), + POINTER(mx_int)) tensor_dims[i] = len(rshape[i]) def list_outputs_entry(out, _): @@ -266,7 +266,7 @@ def __init__(self, need_top_grad=True): def get_symbol(self, *args, **kwargs): fb_functype = CFUNCTYPE(c_bool, c_int, POINTER(c_void_p), POINTER(c_int), c_void_p) infer_functype = CFUNCTYPE(c_bool, c_int, POINTER(c_int), - POINTER(POINTER(mx_uint)), c_void_p) + POINTER(POINTER(mx_int)), c_void_p) list_functype = CFUNCTYPE(c_bool, POINTER(POINTER(POINTER(c_char))), c_void_p) deps_functype = CFUNCTYPE(c_bool, c_int_p, c_int_p, c_int_p, c_int_p, POINTER(c_int_p), c_void_p) @@ -335,9 +335,9 @@ def infer_shape_entry(num_tensor, tensor_dims, assert len(ishape) == n_in rshape = list(ishape) + list(oshape) for i in range(n_in+n_out): - tensor_shapes[i] = cast(c_array_buf(mx_uint, - array('I', rshape[i])), - POINTER(mx_uint)) + tensor_shapes[i] = cast(c_array_buf(mx_int, + array('i', rshape[i])), + POINTER(mx_int)) tensor_dims[i] = len(rshape[i]) except Exception: print('Error in NDArrayOp.infer_shape: %s' % traceback.format_exc()) @@ -698,7 +698,7 @@ def do_register(prop_cls): del_functype = CFUNCTYPE(c_int, c_void_p) infershape_functype = CFUNCTYPE(c_int, c_int, POINTER(c_int), - POINTER(POINTER(mx_uint)), c_void_p) + POINTER(POINTER(mx_int)), c_void_p) infertype_functype = CFUNCTYPE(c_int, c_int, POINTER(c_int), c_void_p) inferstorage_functype = CFUNCTYPE(c_int, c_int, POINTER(c_int), c_void_p) inferstorage_backward_functype = CFUNCTYPE(c_int, c_int, POINTER(c_int), \ @@ -747,9 +747,9 @@ def infer_shape_entry(num_tensor, tensor_dims, "shapes, got %d."%(n_aux, len(ashape)) rshape = list(ishape) + list(oshape) + list(ashape) for i in range(n_in+n_out+n_aux): - tensor_shapes[i] = cast(c_array_buf(mx_uint, - array('I', rshape[i])), - POINTER(mx_uint)) + tensor_shapes[i] = cast(c_array_buf(mx_int, + array('i', rshape[i])), + POINTER(mx_int)) tensor_dims[i] = len(rshape[i]) infer_shape_entry._ref_holder = [tensor_shapes] diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py index b9432d372bc7..b2b8261be51d 100644 --- a/python/mxnet/symbol/symbol.py +++ b/python/mxnet/symbol/symbol.py @@ -42,6 +42,7 @@ from ..ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID from ..ndarray import _ndarray_cls from ..executor import Executor +from ..numpy import is_np_comp from . import _internal from . import op from ._internal import SymbolBase, _set_symbol_class @@ -1078,7 +1079,11 @@ def infer_shape(self, *args, **kwargs): arg_names = self.list_arguments() unknowns = [] for name, shape in zip(arg_names, arg_shapes): - if not shape or not _numpy.prod(shape): + if is_np_comp(): + shape_is_none = not shape or -1 in shape + else: + shape_is_none = not shape or 0 in shape + if shape_is_none: if len(unknowns) >= 10: unknowns.append('...') break @@ -1204,12 +1209,15 @@ def _infer_shape_impl(self, partial, *args, **kwargs): ctypes.byref(aux_shape_data), ctypes.byref(complete))) if complete.value != 0: - arg_shapes = [ - tuple(arg_shape_data[i][:arg_shape_ndim[i]]) for i in range(arg_shape_size.value)] - out_shapes = [ - tuple(out_shape_data[i][:out_shape_ndim[i]]) for i in range(out_shape_size.value)] - aux_shapes = [ - tuple(aux_shape_data[i][:aux_shape_ndim[i]]) for i in range(aux_shape_size.value)] + arg_shapes = [tuple(arg_shape_data[i][:arg_shape_ndim[i]]) + if arg_shape_ndim[i] >= 0 else None + for i in range(arg_shape_size.value)] + out_shapes = [tuple(out_shape_data[i][:out_shape_ndim[i]]) + if out_shape_ndim[i] >= 0 else None + for i in range(out_shape_size.value)] + aux_shapes = [tuple(aux_shape_data[i][:aux_shape_ndim[i]]) + if aux_shape_ndim[i] >= 0 else None + for i in range(aux_shape_size.value)] return (arg_shapes, out_shapes, aux_shapes) else: return (None, None, None) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 614732081a98..f1571312dceb 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -44,9 +44,11 @@ #include "mxnet/rtc.h" #include "mxnet/storage.h" #include "mxnet/libinfo.h" +#include "mxnet/imperative.h" #include "./c_api_common.h" #include "../operator/custom/custom-inl.h" #include "../operator/tensor/matrix_op-inl.h" +#include "../common/utils.h" using namespace mxnet; @@ -499,15 +501,23 @@ int MXNDArrayGetShape(NDArrayHandle handle, API_BEGIN(); NDArray *arr = static_cast(handle); if (!arr->is_none()) { - const mxnet::TShape &s = arr->shape(); + mxnet::TShape s = arr->shape(); + if (!Imperative::Get()->is_np_comp()) { + common::ConvertToLegacyShape(&s); + } *out_dim = s.ndim(); - CHECK_GE(s.ndim(), 0); - std::vector& buffer = ret->arg_shape_buffer; - buffer.resize(s.ndim()); - mxnet::ShapeTypeCast(s.begin(), s.end(), buffer.data()); - *out_pdata = buffer.data(); + if (s.ndim() >= 0) { + std::vector &buffer = ret->arg_shape_buffer; + buffer.resize(s.ndim()); + mxnet::ShapeTypeCast(s.begin(), s.end(), buffer.data()); + *out_pdata = buffer.data(); + } } else { - *out_dim = 0; + if (Imperative::Get()->is_np_comp()) { + *out_dim = -1; + } else { + *out_dim = 0; + } } API_END(); } diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h index 690a1eae1055..329dc9adc7cf 100644 --- a/src/c_api/c_api_common.h +++ b/src/c_api/c_api_common.h @@ -91,7 +91,7 @@ struct MXAPIThreadLocalEntry { data->resize(shapes.size()); size_t size = 0; for (const auto& s : shapes) { - CHECK_GE(s.ndim(), 0); + if (s.ndim() > 0); size += s.ndim(); } buffer->resize(size); diff --git a/src/c_api/c_api_executor.cc b/src/c_api/c_api_executor.cc index d0006383bdc8..fc59463ab689 100644 --- a/src/c_api/c_api_executor.cc +++ b/src/c_api/c_api_executor.cc @@ -25,8 +25,10 @@ #include #include #include +#include #include "./c_api_common.h" #include "../executor/graph_executor.h" +#include "../common/utils.h" #if MXNET_USE_TENSORRT #include "../executor/trt_graph_executor.h" #endif // MXNET_USE_TENSORRT @@ -416,6 +418,11 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle, CHECK(p.second) << "Duplicate shapes are provided for argument " << provided_arg_shape_names[i] << " in simple_bind"; } + if (!Imperative::Get()->is_np_comp()) { + for (auto &kv : arg_shape_map) { + common::ConvertToNumpyShape(&kv.second); + } + } // create para name set for sharing data array memory std::unordered_set shared_arg_name_set(num_shared_arg_names); diff --git a/src/c_api/c_api_ndarray.cc b/src/c_api/c_api_ndarray.cc index 18f6c411e039..0e136b03ecd7 100644 --- a/src/c_api/c_api_ndarray.cc +++ b/src/c_api/c_api_ndarray.cc @@ -276,6 +276,18 @@ int MXAutogradSetIsRecording(int is_recording, int* prev) { API_END(); } +int MXIsNumpyCompatible(bool* curr) { + API_BEGIN(); + *curr = Imperative::Get()->is_np_comp(); + API_END(); +} + +int MXSetIsNumpyCompatible(int is_np_comp, int* prev) { + API_BEGIN(); + *prev = Imperative::Get()->set_is_np_comp(static_cast(is_np_comp)); + API_END(); +} + int MXAutogradMarkVariables(mx_uint num_var, NDArrayHandle *var_handles, mx_uint *reqs_array, diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc index 8b961f46c39e..98034417abae 100644 --- a/src/c_api/c_api_symbolic.cc +++ b/src/c_api/c_api_symbolic.cc @@ -24,6 +24,7 @@ */ #include "mxnet/base.h" #include "mxnet/c_api.h" +#include "mxnet/imperative.h" #include "nnvm/c_api.h" #include "nnvm/pass.h" #include "nnvm/pass_functions.h" @@ -543,8 +544,14 @@ int MXSymbolInferShape(SymbolHandle sym, throw dmlc::Error(err.msg); } + // if use legacy shape definition, need to convert numpy shape to legacy shape + mxnet::ShapeVector shapes = g.GetAttr("shape"); + if (!Imperative::Get()->is_np_comp()) { + common::ConvertToLegacyShape(&shapes); + } + // copy back - CopyAttr(g.indexed_graph(), g.GetAttr("shape"), + CopyAttr(g.indexed_graph(), shapes, &(ret->arg_shapes), &(ret->out_shapes), &(ret->aux_shapes)); // copy data back diff --git a/src/common/utils.h b/src/common/utils.h index 8e6966952890..f3df2e15ec32 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -734,6 +734,60 @@ inline void ParallelCopy(DType* dst, const DType* src, index_t size) { } } +/*! + * \brief If numpy compatibility is turned off (default), the shapes passed in + * by users follow the legacy shape definition: + * 1. 0 ndim means the shape is completely unknown. + * 2. 0 dim size means the dim size is unknown. + * We need to convert those shapes to use the numpy shape definition: + * 1. 0 ndim means it's a scalar tensor. + * 2. -1 ndim means the shape is unknown. + * 3. 0 dim size means no elements in that dimension. + * 4. -1 dim size means the dimension's size is unknown. + * so that operator's infer shape function can work in backend. + * \param shape to be converted. + */ +inline void ConvertToNumpyShape(mxnet::TShape* shape) { + if (shape->ndim() == 0) { // legacy shape ndim = 0 means unknown + *shape = mxnet::TShape(); // unknown shape ndim = -1 + } else { + for (int j = 0; j < shape->ndim(); ++j) { + CHECK_GE((*shape)[j], 0) << "Legacy shape cannot have dim size < 0"; + if ((*shape)[j] == 0) { // legacy shape dim_size = 0 means unknown + (*shape)[j] = -1; // unknown dim size = -1 + } + } + } +} + +inline void ConvertToNumpyShape(mxnet::ShapeVector* shapes) { + for (size_t i = 0; i < shapes->size(); ++i) { + ConvertToNumpyShape(&(shapes->at(i))); + } +} + +/*! + * \brief This is function is used to convert shapes returned by + * the infer shape functions/pass to the legacy shape definition. + */ +inline void ConvertToLegacyShape(mxnet::TShape* shape) { + if (!mxnet::ndim_is_known(*shape)) { + *shape = mxnet::TShape(0); + } else { + for (int j = 0; j < shape->ndim(); ++j) { + if (!mxnet::dim_size_is_known(*shape, j)) { + (*shape)[j] = 0; + } + } + } +} + +inline void ConvertToLegacyShape(mxnet::ShapeVector* shapes) { + for (size_t i = 0; i < shapes->size(); ++i) { + ConvertToLegacyShape(&(shapes->at(i))); + } +} + } // namespace common } // namespace mxnet #endif // MXNET_COMMON_UTILS_H_ diff --git a/src/executor/infer_graph_attr_pass.cc b/src/executor/infer_graph_attr_pass.cc index 3a5c5ab9806f..fa7aee518486 100644 --- a/src/executor/infer_graph_attr_pass.cc +++ b/src/executor/infer_graph_attr_pass.cc @@ -24,6 +24,7 @@ #include #include +#include #include "./exec_pass.h" #include "../operator/operator_common.h" #include "../common/exec_utils.h" @@ -467,6 +468,12 @@ nnvm::Graph InferShapeAttr(nnvm::Graph &&ret, std::vector ishape, oshape; // whether a shape is dynamic std::vector is_dynamic(rshape.size(), 0); + + // convert to numpy compatible shape to use operator's infer shape function + if (!Imperative::Get()->is_np_comp()) { + common::ConvertToNumpyShape(&rshape); + } + // inference step function for nid auto infer_step = [&](uint32_t nid, bool last_iter) { const auto& inode = idx[nid]; @@ -483,6 +490,9 @@ nnvm::Graph InferShapeAttr(nnvm::Graph &&ret, if (it != inode.source->attrs.dict.end()) { std::istringstream is(it->second); CHECK(is >> rshape[out_ent_id]) << "Invalid attribute"; + if (!Imperative::Get()->is_np_comp()) { + common::ConvertToNumpyShape(&rshape[out_ent_id]); + } } } // assign a default value to node attribute @@ -546,7 +556,7 @@ nnvm::Graph InferShapeAttr(nnvm::Graph &&ret, bool is_input_dynamic_shape = false; for (uint32_t i = 0; i < ishape.size(); ++i) { ishape[i] = rshape[idx.entry_id(inode.inputs[i])]; - if (ishape[i].ndim() == 0 && is_dynamic[idx.entry_id(inode.inputs[i])]) { + if (!mxnet::ndim_is_known(ishape[i]) && is_dynamic[idx.entry_id(inode.inputs[i])]) { is_input_dynamic_shape = true; } if (fis_none(ishape[i])) forward_known = false; @@ -563,7 +573,7 @@ nnvm::Graph InferShapeAttr(nnvm::Graph &&ret, auto finfer = finfer_shape.get(inode.source->op(), fdefault); if (finfer == nullptr || is_input_dynamic_shape) { for (uint32_t i = 0; i < oshape.size(); ++i) { - if (oshape[i].ndim() == 0) { + if (!mxnet::ndim_is_known(oshape[i].ndim())) { is_dynamic[idx.entry_id(nid, i)] = 1; } } @@ -650,12 +660,12 @@ nnvm::Graph InferShape(nnvm::Graph&& graph, "shape", "shape_num_unknown_nodes", [](const mxnet::TShape& s) { return !mxnet::shape_is_known(s); }, [](const mxnet::TShape& s) { - if (s.ndim() == -1) { + if (!mxnet::ndim_is_known(s)) { return static_cast(1); } size_t ret = 0; for (const auto& val : s) { - if (val == -1) { + if (!mxnet::dim_size_is_known(val)) { ++ret; } } diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc index 3e5b3987522c..b027de0a0f6f 100644 --- a/src/imperative/imperative.cc +++ b/src/imperative/imperative.cc @@ -25,9 +25,11 @@ namespace mxnet { #if DMLC_CXX11_THREAD_LOCAL thread_local bool Imperative::is_train_ = false; thread_local bool Imperative::is_recording_ = false; +thread_local bool Imperative::is_np_comp_ = false; #else MX_THREAD_LOCAL bool Imperative::is_train_ = false; MX_THREAD_LOCAL bool Imperative::is_recording_ = false; +MX_THREAD_LOCAL bool Imperative::is_np_comp_ = false; #endif Imperative* Imperative::Get() { @@ -109,7 +111,7 @@ OpStatePtr Imperative::Invoke( OpStatePtr ret = InvokeOp(ctx, attrs, inputs, outputs, req, dispatch_mode); // the followinng loop is used for finding out the correct shape when some shapes are dynamic for (size_t i = 0; i < outputs.size(); i++) { - if (outputs[i]->shape().ndim() == 0) { + if (!shape_is_known(outputs[i]->shape())) { // the WaitToRead overhead here does not seem to be avoidable outputs[i]->WaitToRead(); outputs[i]->SetShapeFromChunk(); diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h index 071f4fa9dd0b..6864428d2559 100644 --- a/src/imperative/imperative_utils.h +++ b/src/imperative/imperative_utils.h @@ -121,7 +121,24 @@ inline void SetShapeType(const Context& ctx, if (!infershape.count(attrs.op)) { is_dynamic_shape_existing = true; } else { - CHECK(infershape[attrs.op](attrs, &in_shapes, &out_shapes)); + if (!Imperative::Get()->is_np_comp()) { + common::ConvertToNumpyShape(&in_shapes); + common::ConvertToNumpyShape(&out_shapes); + } + const bool success = infershape[attrs.op](attrs, &in_shapes, &out_shapes); + if (!success) { + std::stringstream os; + os << "Operator " << attrs.op->name << " inferring shapes failed.\n"; + os << "input shapes:\n"; + for (auto& nd : inputs) { + os << nd->shape() << '\n'; + } + os << "output shapes:\n"; + for (auto& nd : outputs) { + os << nd->shape() << '\n'; + } + LOG(FATAL) << os.str(); + } CHECK_EQ(out_shapes.size(), outputs.size()); } // infer type diff --git a/src/operator/batch_norm_v1-inl.h b/src/operator/batch_norm_v1-inl.h index 8016510090ab..89412357ac67 100644 --- a/src/operator/batch_norm_v1-inl.h +++ b/src/operator/batch_norm_v1-inl.h @@ -261,7 +261,7 @@ class BatchNormV1Prop : public OperatorProperty { using namespace mshadow; CHECK_EQ(in_shape->size(), 3U) << "Input:[data, gamma, beta]"; const mxnet::TShape &dshape = in_shape->at(0); - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; in_shape->at(1) = mxnet::TShape(Shape1(dshape[1])); in_shape->at(2) = mxnet::TShape(Shape1(dshape[1])); out_shape->clear(); diff --git a/src/operator/contrib/multi_proposal-inl.h b/src/operator/contrib/multi_proposal-inl.h index a9afb8e4114e..4d278fb40645 100644 --- a/src/operator/contrib/multi_proposal-inl.h +++ b/src/operator/contrib/multi_proposal-inl.h @@ -108,7 +108,7 @@ class MultiProposalProp : public OperatorProperty { using namespace mshadow; CHECK_EQ(in_shape->size(), 3) << "Input:[cls_prob, bbox_pred, im_info]"; const mxnet::TShape &dshape = in_shape->at(proposal::kClsProb); - if (!mxnet::op::shape_is_none(dshape)) return false; + if (mxnet::op::shape_is_none(dshape)) return false; Shape<4> bbox_pred_shape; bbox_pred_shape = Shape4(dshape[0], dshape[1] * 2, dshape[2], dshape[3]); SHAPE_ASSIGN_CHECK(*in_shape, proposal::kBBoxPred, diff --git a/src/operator/control_flow.cc b/src/operator/control_flow.cc index 9ba3b5471c60..4c0d67bb08f7 100644 --- a/src/operator/control_flow.cc +++ b/src/operator/control_flow.cc @@ -301,7 +301,7 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs, for (int i = 0; i < params.num_out_data; i++) { mxnet::TShape shape = subg_out_shape[i]; // If we don't have shape info, we don't need to do anything. - if (!shape_is_known(shape)) + if (!mxnet::ndim_is_known(shape)) continue; subg_out_shape[i] = SliceFirstDim(shape); } @@ -317,7 +317,7 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs, for (int i = 0; i < params.num_out_data; i++) { // If the output shape isn't inferred, we don't need to propogate the info. const auto& g_out_shape = subg_out_shape[i]; - if (!shape_is_known(g_out_shape)) + if (!mxnet::ndim_is_known(g_out_shape)) continue; auto out = mxnet::TShape(g_out_shape.ndim() + 1, -1); @@ -336,7 +336,7 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs, const auto &shape = subg_in_shape[loc]; // If the input data shape isn't inferred, we don't need to propogate the // info. - if (!shape_is_known(shape)) + if (!mxnet::ndim_is_known(shape)) continue; if (data_1d[i]) { diff --git a/src/operator/convolution_v1-inl.h b/src/operator/convolution_v1-inl.h index 0d6ffd7e895e..080c718dc9bf 100644 --- a/src/operator/convolution_v1-inl.h +++ b/src/operator/convolution_v1-inl.h @@ -405,7 +405,7 @@ class ConvolutionV1Prop : public OperatorProperty { // CHECK_EQ(out_shape->size(), 1) << "Output: [output]"; out_shape->resize(1, mxnet::TShape()); const mxnet::TShape &dshp = (*in_shape)[conv_v1::kData]; - if (!shape_is_known(dshp)) return false; + if (!mxnet::ndim_is_known(dshp)) return false; if (param_.kernel.ndim() == 2) { // 2d conv_v1 CHECK_EQ(dshp.ndim(), 4U) \ diff --git a/src/operator/custom/custom.cc b/src/operator/custom/custom.cc index 46249c9bbcc6..412bfa1bc3aa 100644 --- a/src/operator/custom/custom.cc +++ b/src/operator/custom/custom.cc @@ -128,17 +128,21 @@ bool InferShape(const NodeAttrs& attrs, const CustomParam& params = nnvm::get(attrs.parsed); size_t total = params.num_args + params.num_outs + params.num_auxs; - std::vector shapes(total); + std::vector shapes(total); std::vector ndims(total); size_t buff_size = 0; - for (const auto& i : *in_shape) buff_size += i.ndim(); - std::vector buff(buff_size); - uint32_t *ptr = buff.data(); + for (const auto& i : *in_shape) { + if (i.ndim() > 0) { + buff_size += i.ndim(); + } + } + std::vector buff(buff_size); + int *ptr = buff.data(); for (size_t i = 0; i < in_shape->size(); ++i) { shapes[i] = ptr; ndims[i] = (*in_shape)[i].ndim(); - for (size_t j = 0; j < (*in_shape)[i].ndim(); ++j, ++ptr) { - *ptr = static_cast((*in_shape)[i][j]); + for (int j = 0; j < (*in_shape)[i].ndim(); ++j, ++ptr) { + *ptr = (*in_shape)[i][j]; } } @@ -263,7 +267,7 @@ OpStatePtr CreateState(const NodeAttrs& attrs, Context ctx, for (size_t i = 0; i < in_shape.size(); ++i) { shapes[i] = ptr; ndims[i] = in_shape[i].ndim(); - for (size_t j = 0; j < in_shape[i].ndim(); ++j, ++ptr) { + for (int j = 0; j < in_shape[i].ndim(); ++j, ++ptr) { *ptr = static_cast(in_shape[i][j]); } } diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc index 590b1b428023..622952cc4bc5 100644 --- a/src/operator/nn/batch_norm.cc +++ b/src/operator/nn/batch_norm.cc @@ -332,7 +332,7 @@ static bool BatchNormShape(const nnvm::NodeAttrs& attrs, const int channelCount = dshape[channelAxis]; - if (!shape_is_known(dshape)) { + if (!mxnet::ndim_is_known(dshape)) { return false; } diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc index 5435bd815334..b534ee58e85c 100644 --- a/src/operator/nn/concat.cc +++ b/src/operator/nn/concat.cc @@ -46,7 +46,7 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::TShape tmp = (*in_shape)[i]; if (tmp.ndim() > 0) { axis = CheckAxis(param_.dim, tmp.ndim()); - has_unknown_dim_size = tmp[axis] == -1 || has_unknown_dim_size; + has_unknown_dim_size = !mxnet::dim_size_is_known(tmp, axis) || has_unknown_dim_size; size += tmp[axis]; tmp[axis] = -1; shape_assign(&dshape, tmp); @@ -91,26 +91,27 @@ static bool RNNParamConcatShape(const nnvm::NodeAttrs& attrs, int axis = -1; for (int i = 0; i < param_.num_args; ++i) { mxnet::TShape tmp = (*in_shape)[i]; - if (tmp.ndim()) { + if (tmp.ndim() > 0) { axis = CheckAxis(param_.dim, tmp.ndim()); - if (tmp[axis] == 0) { + if (!mxnet::dim_size_is_known(tmp, axis)) { zero_indices.emplace_back(i); } else { + CHECK_GE(tmp[axis], 0); size += tmp[axis]; } - tmp[axis] = 0; + tmp[axis] = -1; shape_assign(&dshape, tmp); } } mxnet::TShape tmp = (*out_shape)[0]; - if (tmp.ndim()) { + if (tmp.ndim() > 0) { axis = CheckAxis(param_.dim, tmp.ndim()); - tmp[axis] = 0; + tmp[axis] = -1; shape_assign(&dshape, tmp); } - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; for (int i = 0; i < param_.num_args; ++i) { CHECK(shape_assign(&(*in_shape)[i], dshape)) @@ -120,21 +121,21 @@ static bool RNNParamConcatShape(const nnvm::NodeAttrs& attrs, if (zero_indices.empty()) dshape[axis] = size; CHECK(shape_assign(&(*out_shape)[0], dshape)) << "Incompatible output shape: expected " << dshape << ", got " << (*out_shape)[0]; - if ((*out_shape)[0][axis] != 0 && !zero_indices.empty()) { + if ((*out_shape)[0][axis] != -1 && !zero_indices.empty()) { int residual = (*out_shape)[0][axis] - size; CHECK_GE(residual, 0) << "Input size already exceeds output size. Residual: " << residual; - CHECK(zero_indices.size() <= 2 && zero_indices.size() >= 0) + CHECK(zero_indices.size() <= 2 && zero_indices.size() > 0) << "Expecting 1 or 2 inputs that need shape inference. Got: " << zero_indices.size(); - bool need_infer = !(*out_shape)[0].Size(); + bool need_infer = !shape_is_known((*out_shape)[0]); for (int i : zero_indices) { (*in_shape)[i][axis] = residual / zero_indices.size(); - need_infer = need_infer || !(*in_shape)[i].Size(); + need_infer = need_infer || !shape_is_known((*in_shape)[i]); } return !need_infer; } - return dshape.Size() != 0; + return shape_is_known(dshape); } static bool ConcatType(const nnvm::NodeAttrs& attrs, diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc index dfbc89de7b0e..536e9a731171 100644 --- a/src/operator/nn/convolution.cc +++ b/src/operator/nn/convolution.cc @@ -96,24 +96,28 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, // CHECK_EQ(out_shape->size(), 1) << "Output: [output]"; out_shape->resize(1, mxnet::TShape()); const mxnet::TShape &dshp = (*in_shape)[conv::kData]; - if (!shape_is_known(dshp)) return false; + if (!mxnet::ndim_is_known(dshp)) return false; if (param_.kernel.ndim() == 1) { // 1d conv CHECK_EQ(dshp.ndim(), 3U) << "Input data should be 3D in batch-num_filter-x"; Shape<3> dshape = ConvertLayout(dshp.get<3>(), param_.layout.value(), kNCW); - Shape<3> wshape = Shape3(param_.num_filter / param_.num_group, dshape[1] / param_.num_group, + Shape<3> wshape = Shape3(param_.num_filter / param_.num_group, + mxnet::dim_size_is_known(dshape, 1) ? dshape[1] / param_.num_group : -1, param_.kernel[0]); wshape = ConvertLayout(wshape, kNCW, param_.layout.value()); - wshape[0] *= param_.num_group; + if (wshape[0] >= 0) { + wshape[0] *= param_.num_group; + } SHAPE_ASSIGN_CHECK(*in_shape, conv::kWeight, wshape); if (!param_.no_bias) { SHAPE_ASSIGN_CHECK(*in_shape, conv::kBias, Shape1(param_.num_filter)); } const index_t dilated_ksize_x = param_.DilatedKernelSize(0); - CHECK_EQ(dshape[1] % param_.num_group, 0U) \ - << "input num_filter must divide group size"; + if (dshape[1] != -1) { + CHECK_EQ(dshape[1] % param_.num_group, 0U) << "input num_filter must divide group size"; + } CHECK_EQ(param_.num_filter % param_.num_group, 0U) \ << "output num_filter must divide group size"; CHECK_GT(param_.kernel.Size(), 0U) \ @@ -125,21 +129,21 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, Shape<3> oshape; oshape[0] = dshape[0]; oshape[1] = param_.num_filter; - oshape[2] = dshape[2] ? - (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_x) / param_.stride[0] + 1 : 0; + oshape[2] = dshape[2] != -1 ? + (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_x) / param_.stride[0] + 1 : -1; SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCW, param_.layout.value())); // Perform incomplete shape inference. Fill in the missing values in data shape. // 1) We can always fill in the batch_size. // 2) We can back-calculate the input height/width if the corresponding stride is 1. oshape = ConvertLayout((*out_shape)[0].get<3>(), param_.layout.value(), kNCW); dshape[0] = oshape[0]; - if (oshape[2] && param_.stride[0] == 1) { + if (oshape[2] != -1 && param_.stride[0] == 1) { dshape[2] = oshape[2] + dilated_ksize_x - 1 - 2 * param_.pad[0]; } SHAPE_ASSIGN_CHECK(*in_shape, conv::kData, ConvertLayout(dshape, kNCW, param_.layout.value())); // Check whether the kernel sizes are valid - if (dshape[2] != 0) { + if (dshape[2] != -1) { CHECK_LE(dilated_ksize_x, AddPad(dshape[2], param_.pad[0])) << "kernel size exceed input"; } return true; @@ -149,10 +153,12 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, << "Input data should be 4D in batch-num_filter-y-x"; Shape<4> dshape = ConvertLayout(dshp.get<4>(), param_.layout.value(), kNCHW); Shape<4> wshape = Shape4(param_.num_filter / param_.num_group, - dshape[1] / param_.num_group, + mxnet::dim_size_is_known(dshape, 1) ? dshape[1] / param_.num_group : -1, param_.kernel[0], param_.kernel[1]); wshape = ConvertLayout(wshape, kNCHW, param_.layout.value()); - wshape[0] *= param_.num_group; + if (wshape[0] >= 0) { + wshape[0] *= param_.num_group; + } SHAPE_ASSIGN_CHECK(*in_shape, conv::kWeight, wshape); if (!param_.no_bias) { SHAPE_ASSIGN_CHECK(*in_shape, conv::kBias, Shape1(param_.num_filter)); @@ -160,8 +166,9 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, const index_t dilated_ksize_y = param_.DilatedKernelSize(0); const index_t dilated_ksize_x = param_.DilatedKernelSize(1); - CHECK_EQ(dshape[1] % param_.num_group, 0U) \ - << "input num_filter must divide group size"; + if (dshape[1] != -1) { + CHECK_EQ(dshape[1] % param_.num_group, 0U) << "input num_filter must divide group size"; + } CHECK_EQ(param_.num_filter % param_.num_group, 0U) \ << "output num_filter must divide group size"; CHECK_GT(param_.kernel.Size(), 0U) \ @@ -173,29 +180,29 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, Shape<4> oshape; oshape[0] = dshape[0]; oshape[1] = param_.num_filter; - oshape[2] = dshape[2] ? - (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_y) / param_.stride[0] + 1 : 0; - oshape[3] = dshape[3] ? - (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_x) / param_.stride[1] + 1 : 0; + oshape[2] = dshape[2] != -1 ? + (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_y) / param_.stride[0] + 1 : -1; + oshape[3] = dshape[3] != -1 ? + (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_x) / param_.stride[1] + 1 : -1; SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCHW, param_.layout.value())); // Perform incomplete shape inference. Fill in the missing values in data shape. // 1) We can always fill in the batch_size. // 2) We can back-calculate the input height/width if the corresponding stride is 1. oshape = ConvertLayout((*out_shape)[0].get<4>(), param_.layout.value(), kNCHW); dshape[0] = oshape[0]; - if (oshape[2] && param_.stride[0] == 1) { + if (oshape[2] != -1 && param_.stride[0] == 1) { dshape[2] = oshape[2] + dilated_ksize_y - 1 - 2 * param_.pad[0]; } - if (oshape[3] && param_.stride[1] == 1) { + if (oshape[3] != -1 && param_.stride[1] == 1) { dshape[3] = oshape[3] + dilated_ksize_x - 1 - 2 * param_.pad[1]; } SHAPE_ASSIGN_CHECK(*in_shape, conv::kData, ConvertLayout(dshape, kNCHW, param_.layout.value())); // Check whether the kernel sizes are valid - if (dshape[2] != 0) { + if (dshape[2] != -1) { CHECK_LE(dilated_ksize_y, AddPad(dshape[2], param_.pad[0])) << "kernel size exceed input"; } - if (dshape[3] != 0) { + if (dshape[3] != -1) { CHECK_LE(dilated_ksize_x, AddPad(dshape[3], param_.pad[1])) << "kernel size exceed input"; } return true; @@ -204,10 +211,13 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(dshp.ndim(), 5U) \ << "Input data should be 5D in batch-num_filter-depth-y-x"; Shape<5> dshape = ConvertLayout(dshp.get<5>(), param_.layout.value(), kNCDHW); - Shape<5> wshape = Shape5(param_.num_filter / param_.num_group, dshape[1] / param_.num_group, + Shape<5> wshape = Shape5(param_.num_filter / param_.num_group, + mxnet::dim_size_is_known(dshape, 1) ? dshape[1] / param_.num_group : -1, param_.kernel[0], param_.kernel[1], param_.kernel[2]); wshape = ConvertLayout(wshape, kNCDHW, param_.layout.value()); - wshape[0] *= param_.num_group; + if (wshape[0] >= 0) { + wshape[0] *= param_.num_group; + } SHAPE_ASSIGN_CHECK(*in_shape, conv::kWeight, wshape); if (!param_.no_bias) { SHAPE_ASSIGN_CHECK(*in_shape, conv::kBias, Shape1(param_.num_filter)); @@ -218,8 +228,9 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, const index_t dilated_ksize_d = param_.DilatedKernelSize(0); const index_t dilated_ksize_y = param_.DilatedKernelSize(1); const index_t dilated_ksize_x = param_.DilatedKernelSize(2); - CHECK_EQ(dshape[1] % param_.num_group, 0U) - << "input num_filter must divide group size"; + if (dshape[1] >= 0) { + CHECK_EQ(dshape[1] % param_.num_group, 0U) << "input num_filter must divide group size"; + } CHECK_EQ(param_.num_filter % param_.num_group, 0U) << "output num_filter must divide group size"; CHECK_GT(param_.kernel.Size(), 0U) \ @@ -233,37 +244,37 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs, Shape<5> oshape; oshape[0] = dshape[0]; oshape[1] = param_.num_filter; - oshape[2] = dshape[2] ? - (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_d) / param_.stride[0] + 1 : 0; - oshape[3] = dshape[3] ? - (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_y) / param_.stride[1] + 1 : 0; - oshape[4] = dshape[4] ? - (AddPad(dshape[4], param_.pad[2]) - dilated_ksize_x) / param_.stride[2] + 1 : 0; + oshape[2] = dshape[2] != -1 ? + (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_d) / param_.stride[0] + 1 : -1; + oshape[3] = dshape[3] != -1 ? + (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_y) / param_.stride[1] + 1 : -1; + oshape[4] = dshape[4] != -1 ? + (AddPad(dshape[4], param_.pad[2]) - dilated_ksize_x) / param_.stride[2] + 1 : -1; SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCDHW, param_.layout.value())); // Perform incomplete shape inference. Fill in the missing values in data shape. // 1) We can always fill in the batch_size. // 2) We can back-calculate the input depth/height/width if the corresponding stride is 1. oshape = ConvertLayout((*out_shape)[0].get<5>(), param_.layout.value(), kNCDHW); dshape[0] = oshape[0]; - if (oshape[2] && param_.stride[0] == 1) { + if (oshape[2] != -1 && param_.stride[0] == 1) { dshape[2] = oshape[2] + dilated_ksize_d - 1 - 2 * param_.pad[0]; } - if (oshape[3] && param_.stride[1] == 1) { + if (oshape[3] != -1 && param_.stride[1] == 1) { dshape[3] = oshape[3] + dilated_ksize_y - 1 - 2 * param_.pad[1]; } - if (oshape[4] && param_.stride[2] == 1) { + if (oshape[4] != -1 && param_.stride[2] == 1) { dshape[4] = oshape[4] + dilated_ksize_x - 1 - 2 * param_.pad[2]; } SHAPE_ASSIGN_CHECK(*in_shape, conv::kData, ConvertLayout(dshape, kNCDHW, param_.layout.value())); // Check whether the kernel sizes are valid - if (dshape[2] != 0) { + if (dshape[2] != -1) { CHECK_LE(dilated_ksize_d, AddPad(dshape[2], param_.pad[0])) << "kernel size exceed input"; } - if (dshape[3] != 0) { + if (dshape[3] != -1) { CHECK_LE(dilated_ksize_y, AddPad(dshape[3], param_.pad[1])) << "kernel size exceed input"; } - if (dshape[4] != 0) { + if (dshape[4] != -1) { CHECK_LE(dilated_ksize_x, AddPad(dshape[4], param_.pad[2])) << "kernel size exceed input"; } return true; diff --git a/src/operator/nn/cudnn/cudnn_batch_norm.cc b/src/operator/nn/cudnn/cudnn_batch_norm.cc index 1df888e4b38a..cb35ce170e8e 100644 --- a/src/operator/nn/cudnn/cudnn_batch_norm.cc +++ b/src/operator/nn/cudnn/cudnn_batch_norm.cc @@ -37,7 +37,7 @@ static bool BatchNormShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *in_ using namespace mshadow; CHECK_EQ(in_shape->size(), 5U) << "Input:[data, gamma, beta, moving_mean, moving_var]"; const mxnet::TShape &dshape = in_shape->at(0); - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; in_shape->at(1) = mxnet::TShape(Shape1(dshape[1])); in_shape->at(2) = mxnet::TShape(Shape1(dshape[1])); in_shape->at(3) = mxnet::TShape(Shape1(dshape[1])); diff --git a/src/operator/nn/dropout.cc b/src/operator/nn/dropout.cc index 0e4d18b1fda8..afad6fd5cc80 100644 --- a/src/operator/nn/dropout.cc +++ b/src/operator/nn/dropout.cc @@ -95,7 +95,7 @@ Example:: CHECK_EQ(in_shape->size(), 1U); const DropoutParam& param = nnvm::get(attrs.parsed); mxnet::TShape dshape(in_shape->at(0)); - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; out_shape->clear(); out_shape->push_back(dshape); for (int i = 0; i < param.axes.ndim(); ++i) { diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index 98277c206dd6..a097357ef5a3 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -52,7 +52,7 @@ static bool FullyConnectedShape(const nnvm::NodeAttrs& attrs, mxnet::TShape dshape = (*in_shape)[fullc::kData]; mxnet::TShape oshape = (*out_shape)[0]; // require data to be known - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; index_t num_input; if (!param.flatten) { diff --git a/src/operator/nn/layer_norm.cc b/src/operator/nn/layer_norm.cc index 1b0e99d8fe87..2e47503a3318 100644 --- a/src/operator/nn/layer_norm.cc +++ b/src/operator/nn/layer_norm.cc @@ -48,7 +48,7 @@ static bool LayerNormShape(const nnvm::NodeAttrs& attrs, const int channelCount = dshape[axis]; - if (!shape_is_known(dshape)) { + if (!mxnet::ndim_is_known(dshape)) { return false; } diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index 7c365f5081a1..3e081c9a0552 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -114,7 +114,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs, << "Pooling: Input data should be 3D in (batch, channel, x)" << " Or 4D in (batch, channel, y, x) " << " Or 5D in (batch, channel, d, y, x)"; - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; int layout = param.GetLayout(dshape.ndim()); if (param.global_pool) { mxnet::TShape oshape = dshape; diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h index c95f859a1b5b..59f572211d0e 100644 --- a/src/operator/operator_common.h +++ b/src/operator/operator_common.h @@ -160,14 +160,14 @@ inline std::string type_string(const int& x) { * \return whether x and y are compatible. */ inline bool shape_assign(mxnet::TShape *y, const mxnet::TShape& x) { - if (y->ndim() == -1) { + if (!mxnet::ndim_is_known(*y)) { *y = x; return true; } else if (y->ndim() != x.ndim()) { - return x.ndim() == -1; + return !mxnet::ndim_is_known(x); } else { for (int i = 0; i < y->ndim(); ++i) { - if ((*y)[i] == -1) { + if (!mxnet::dim_size_is_known(*y, i)) { (*y)[i] = x[i]; } else if ((*y)[i] != x[i] && x[i] >= 0) { return false; diff --git a/src/operator/pooling_v1-inl.h b/src/operator/pooling_v1-inl.h index 4e0ccc1caeb9..21ba270c9d42 100644 --- a/src/operator/pooling_v1-inl.h +++ b/src/operator/pooling_v1-inl.h @@ -247,7 +247,7 @@ class PoolingV1Prop : public OperatorProperty { CHECK_LE(dshape.ndim(), 5U) << "Pooling: Input data should be 4D in (batch, channel, y, x) " << "Or 5D in (batch, channel, d, y, x)"; mxnet::TShape oshape = dshape; - if (dshape.ndim() == 0) return false; + if (dshape.ndim() == -1) return false; if (param_.global_pool) { if (dshape.ndim() == 4) { oshape[2] = 1; diff --git a/src/operator/quantization/quantized_concat.cc b/src/operator/quantization/quantized_concat.cc index f97807424701..2cc2ec9d0374 100644 --- a/src/operator/quantization/quantized_concat.cc +++ b/src/operator/quantization/quantized_concat.cc @@ -55,7 +55,7 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_sha shape_assign(&dshape, tmp); } - if (dshape.ndim() == -1) return false; + if (!mxnet::ndim_is_known(dshape)) return false; for (int i = 0; i < param_.num_args; ++i) { CHECK(shape_assign(&(*in_shape)[i], dshape)) diff --git a/src/operator/random/unique_sample_op.h b/src/operator/random/unique_sample_op.h index c97d4fdf7ced..e88b95a8bdd6 100644 --- a/src/operator/random/unique_sample_op.h +++ b/src/operator/random/unique_sample_op.h @@ -60,7 +60,7 @@ inline bool SampleUniqueShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 0U); CHECK_EQ(out_attrs->size(), 2U); // output shape is known - if ((*out_attrs)[0].ndim() == 2 && param.shape.ndim() == -1) { + if ((*out_attrs)[0].ndim() == 2 && !mxnet::ndim_is_known(param.shape)) { SHAPE_ASSIGN_CHECK(*out_attrs, 1, mshadow::Shape1((*out_attrs)[0][0])); return true; } diff --git a/src/operator/slice_channel-inl.h b/src/operator/slice_channel-inl.h index a51b17cd324e..e37ffdcf1b91 100644 --- a/src/operator/slice_channel-inl.h +++ b/src/operator/slice_channel-inl.h @@ -195,7 +195,7 @@ class SliceChannelProp : public OperatorProperty { CHECK_EQ(in_shape->size(), 1U); mxnet::TShape dshape = in_shape->at(slice_enum::kData); mxnet::TShape ishape = in_shape->at(slice_enum::kData); - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; if (param_.axis >= 0) { CHECK_LT(param_.axis, dshape.ndim()); } else { @@ -212,15 +212,18 @@ class SliceChannelProp : public OperatorProperty { << " evenly sized chunks, but this is not possible because " << param_.num_outputs << " does not evenly divide " << dshape[real_axis]; - if (param_.squeeze_axis && ishape[real_axis] != 0) { - CHECK_EQ(ishape[real_axis], static_cast(param_.num_outputs)) + if (param_.squeeze_axis && ishape[real_axis] != -1) { + CHECK_EQ(ishape[real_axis], param_.num_outputs) << "If squeeze axis is True, the size of the sliced axis must be the same as num_outputs." << " Input shape=" << ishape << ", axis=" << real_axis << ", num_outputs=" << param_.num_outputs << "."; } - dshape[real_axis] /= param_.num_outputs; - if (param_.squeeze_axis && (dshape[real_axis] == 1 || ishape[real_axis] == 0)) { - for (int d = real_axis; d < static_cast(dshape.ndim()) - 1; ++d) { + if (dshape[real_axis] >= 0) { + dshape[real_axis] /= param_.num_outputs; + } + if (param_.squeeze_axis && (dshape[real_axis] == 1 + || !mxnet::dim_size_is_known(ishape, real_axis))) { + for (int d = real_axis; d < dshape.ndim() - 1; ++d) { dshape[d] = dshape[d+1]; } dshape = mxnet::TShape(&dshape[0], &dshape[dshape.ndim()-1]); diff --git a/src/operator/softmax_output-inl.h b/src/operator/softmax_output-inl.h index f81a232d629a..80ab40ef6c50 100644 --- a/src/operator/softmax_output-inl.h +++ b/src/operator/softmax_output-inl.h @@ -349,7 +349,7 @@ class SoftmaxOutputProp : public OperatorProperty { lshape2[i-1] = dshape[i]; mxnet::TShape lshape3 = dshape; lshape3[1] = 1; - if (in_shape->at(softmaxout_enum::kLabel).ndim() == -1) { + if (!mxnet::ndim_is_known(in_shape->at(softmaxout_enum::kLabel))) { in_shape->at(softmaxout_enum::kLabel) = lshape1; } else if (in_shape->at(softmaxout_enum::kLabel) == lshape1) { } else if (in_shape->at(softmaxout_enum::kLabel) == lshape2) { diff --git a/src/operator/softmax_output.cc b/src/operator/softmax_output.cc index 262242f98004..548225f0496b 100644 --- a/src/operator/softmax_output.cc +++ b/src/operator/softmax_output.cc @@ -85,7 +85,7 @@ static bool SoftmaxOutputShape(const nnvm::NodeAttrs& attrs, const SoftmaxOutputParam& param = nnvm::get(attrs.parsed); CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]"; const mxnet::TShape &dshape = in_shape->at(0); - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; // label.shape == data.shape: use probability as label if (dshape != (*in_shape)[softmaxout_enum::kLabel]) { @@ -97,7 +97,7 @@ static bool SoftmaxOutputShape(const nnvm::NodeAttrs& attrs, lshape2[i-1] = dshape[i]; mxnet::TShape lshape3 = dshape; lshape3[1] = 1; - if (in_shape->at(softmaxout_enum::kLabel).ndim() == -1) { + if (!mxnet::ndim_is_known(in_shape->at(softmaxout_enum::kLabel))) { in_shape->at(softmaxout_enum::kLabel) = lshape1; } else if (in_shape->at(softmaxout_enum::kLabel) == lshape1) { } else if (in_shape->at(softmaxout_enum::kLabel) == lshape2) { diff --git a/src/operator/svm_output-inl.h b/src/operator/svm_output-inl.h index 3d651c13d8ba..dfe9fa606e95 100644 --- a/src/operator/svm_output-inl.h +++ b/src/operator/svm_output-inl.h @@ -143,7 +143,7 @@ class SVMOutputProp : public OperatorProperty { using namespace mshadow; CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]"; const mxnet::TShape &dshape = in_shape->at(0); - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; mxnet::TShape label_shape(dshape.ndim() - 1, -1); for (int i = 0; i + 1 < dshape.ndim(); ++i) label_shape[i] = dshape[i]; diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h index 90afdeea858e..ac714c2cfcdd 100644 --- a/src/operator/tensor/broadcast_reduce_op.h +++ b/src/operator/tensor/broadcast_reduce_op.h @@ -366,13 +366,13 @@ inline bool BroadcastToShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); mxnet::TShape& ishape = (*in_attrs)[0]; - if (!shape_is_known(ishape)) return false; + if (!mxnet::ndim_is_known(ishape)) return false; const BroadcastToParam& param = nnvm::get(attrs.parsed); CHECK_EQ(ishape.ndim(), param.shape.ndim()) << "Operand of shape " << ishape << " cannot be broadcasted to " << param.shape; mxnet::TShape oshape = param.shape; for (int i = 0; i < ishape.ndim(); ++i) { - if (oshape[i] != 0) { + if (oshape[i] != -1) { CHECK(ishape[i] == oshape[i] || ishape[i] == 1) << "Array cannot be broadcasted from " << ishape << " to " << param.shape; } else { @@ -391,7 +391,7 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs, mxnet::TShape& lhs_shape = (*in_attrs)[0]; mxnet::TShape& rhs_shape = (*in_attrs)[1]; - if (!shape_is_known(lhs_shape) || !shape_is_known(lhs_shape)) { + if (!mxnet::ndim_is_known(lhs_shape) || !mxnet::ndim_is_known(rhs_shape)) { return false; } @@ -405,7 +405,7 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs, oshape = mxnet::TShape(rhs_shape); for (int i = 0; i < lhs_shape.ndim(); ++i) { - if (rhs_shape[i] != 0) { + if (rhs_shape[i] != -1) { CHECK(lhs_shape[i] == rhs_shape[i] || lhs_shape[i] == 1) << "Array cannot be broadcasted from " << lhs_shape << " to " << rhs_shape; } else { diff --git a/src/operator/tensor/diag_op-inl.h b/src/operator/tensor/diag_op-inl.h index b90b09a36bd3..c95c1ce414f2 100644 --- a/src/operator/tensor/diag_op-inl.h +++ b/src/operator/tensor/diag_op-inl.h @@ -84,7 +84,7 @@ inline mxnet::TShape DiagShapeImpl(const mxnet::TShape& ishape, const int k, auto s = std::min(h, w); if (s < 0) { - s = 0; + s = -1; } if (x1 > x2) { @@ -114,7 +114,7 @@ inline bool DiagOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1U); const mxnet::TShape& ishape = (*in_attrs)[0]; - if (!shape_is_known(ishape)) { + if (!mxnet::ndim_is_known(ishape)) { return false; } diff --git a/src/operator/tensor/elemwise_binary_broadcast_op.h b/src/operator/tensor/elemwise_binary_broadcast_op.h index dfb3231a75a9..64a4d7cc15ff 100644 --- a/src/operator/tensor/elemwise_binary_broadcast_op.h +++ b/src/operator/tensor/elemwise_binary_broadcast_op.h @@ -48,7 +48,7 @@ inline bool BinaryBroadcastShape(const nnvm::NodeAttrs& attrs, mxnet::TShape& rhs = (*in_attrs)[1]; // avoid pre-mature shape inference. - if (lhs.ndim() == -1 || rhs.ndim() == -1) return false; + if (!mxnet::ndim_is_known(lhs) || !mxnet::ndim_is_known(rhs)) return false; if (lhs == rhs) { SHAPE_ASSIGN_CHECK(*out_attrs, 0, lhs); diff --git a/src/operator/tensor/init_op.h b/src/operator/tensor/init_op.h index 3c4d34b3f4a5..bcad602c95c0 100644 --- a/src/operator/tensor/init_op.h +++ b/src/operator/tensor/init_op.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -213,14 +214,13 @@ inline bool InitShape(const nnvm::NodeAttrs& attrs, const ParamType& param = nnvm::get(attrs.parsed); CHECK_EQ(in_attrs->size(), 0U); CHECK_EQ(out_attrs->size(), 1U); - if (shape_is_known((*out_attrs)[0]) && !shape_is_known(param.shape)) return true; - for (int i=0 ; i < param.shape.ndim() ; ++i) { - if (param.shape[i] < 0U) { - LOG(FATAL) << "Shape cannot contain negative values " << param.shape; - } + mxnet::TShape param_shape = param.shape; + if (!Imperative::Get()->is_np_comp()) { + common::ConvertToNumpyShape(¶m_shape); } - SHAPE_ASSIGN_CHECK(*out_attrs, 0, param.shape); - return true; + if (shape_is_known((*out_attrs)[0]) && !shape_is_known(param_shape)) return true; + SHAPE_ASSIGN_CHECK(*out_attrs, 0, param_shape); + return shape_is_known(out_attrs->at(0)); } template diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h index d61267faefe6..9ce22020fab4 100644 --- a/src/operator/tensor/matrix_op-inl.h +++ b/src/operator/tensor/matrix_op-inl.h @@ -110,7 +110,7 @@ inline mxnet::TShape InferReshapeShape(const mxnet::Tuple& shape, CHECK_LT(src_idx, dshape_len-1); const int d1 = dshape_vec[src_idx++]; const int d2 = dshape_vec[src_idx++]; - if (d1 == -1 || d2 == -1) { + if (!mxnet::dim_size_is_known(d1) || !mxnet::dim_size_is_known(d2)) { tmp.push_back(-1); } else { tmp.push_back(d1 * d2); @@ -164,7 +164,7 @@ inline bool ReverseReshapeInferShape(mxnet::TShape *in, const mxnet::TShape& out int zero_axis = -1; int known_dim_size_prod = 1; for (int i = 0; i < in->ndim(); i++) { - if ((*in)[i] == -1) { + if (!mxnet::dim_size_is_known(*in, i)) { if (zero_axis != -1) return false; // more than 1 zero found. else @@ -185,7 +185,7 @@ inline bool ReshapeShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]"; CHECK_EQ(out_attrs->size(), 1U); mxnet::TShape &dshape = (*in_attrs)[0]; - if (dshape.ndim() == -1) return false; + if (!mxnet::ndim_is_known(dshape)) return false; mxnet::TShape oshape; if (param_.shape.ndim() != 0) { oshape = InferReshapeShape(param_.shape, dshape, param_.reverse); @@ -318,7 +318,7 @@ void Transpose(const nnvm::NodeAttrs& attrs, const std::vector& outputs) { const TransposeParam& param = nnvm::get(attrs.parsed); CHECK_EQ(req[0], kWriteTo) << "Transpose does not support inplace"; - if (param.axes.ndim() == -1) { + if (!mxnet::ndim_is_known(param.axes)) { mxnet::TShape axes(inputs[0].ndim(), -1); for (int i = 0; i < axes.ndim(); ++i) { axes[i] = axes.ndim() - 1 - i; @@ -338,7 +338,7 @@ inline bool TransposeShape(const nnvm::NodeAttrs& attrs, mxnet::TShape& shp = (*in_attrs)[0]; CHECK_LE(shp.ndim(), 6U) << "Transpose support at most 6 dimensions"; mxnet::TShape ret(shp.ndim(), -1); - if (param.axes.ndim() == -1) { + if (!mxnet::ndim_is_known(param.axes)) { for (int i = 0; i < shp.ndim(); ++i) { ret[i] = shp[shp.ndim()-1-i]; } @@ -371,7 +371,7 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs, const ExpandDimParam& param = nnvm::get(attrs.parsed); CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); - if (!shape_is_known(in_attrs->at(0)) && !shape_is_known(out_attrs->at(0))) { + if (!mxnet::ndim_is_known(in_attrs->at(0)) && !mxnet::ndim_is_known(out_attrs->at(0))) { return false; } @@ -405,7 +405,7 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs, for (int i = 0; i < axis; ++i) ret[i] = oshape[i]; for (int i = axis+1; i < indim+1; ++i) ret[i-1] = oshape[i]; SHAPE_ASSIGN_CHECK(*in_attrs, 0, ret); - return shape_is_known(ret); + return shape_is_known(in_attrs->at(0)) && shape_is_known(out_attrs->at(0)); } // Currently MKLDNN only supports step = 1 or step has no value @@ -728,7 +728,7 @@ inline bool SliceOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); const mxnet::TShape& dshape = (*in_attrs)[0]; - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; const SliceParam& param = nnvm::get(attrs.parsed); mxnet::TShape oshape = dshape; @@ -1119,9 +1119,9 @@ inline void GetSliceAxisParams(const SliceAxisParam& param, const mxnet::TShape& int* axis, index_t* begin, index_t* end) { *axis = param.axis; if (*axis < 0) { - *axis += static_cast(ishape.ndim()); + *axis += ishape.ndim(); } - CHECK(*axis < static_cast(ishape.ndim()) && *axis >= 0) << + CHECK(*axis < ishape.ndim() && *axis >= 0) << "Transformed axis must be smaller than the source ndim and larger than zero! Recieved axis=" << param.axis << ", src_ndim=" << ishape.ndim() << ", transformed axis=" << *axis; index_t axis_size = static_cast(ishape[*axis]); @@ -1130,7 +1130,7 @@ inline void GetSliceAxisParams(const SliceAxisParam& param, const mxnet::TShape& if (*begin < 0) { *begin += axis_size; } - if (axis_size) { + if (axis_size > 0) { if (!static_cast(param.end)) { *end = axis_size; } else { @@ -2595,7 +2595,7 @@ inline bool SplitOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); mxnet::TShape dshape = in_attrs->at(split_enum::kData); mxnet::TShape ishape = in_attrs->at(split_enum::kData); - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; if (param.axis >= 0) { CHECK_LT(static_cast(param.axis), dshape.ndim()); } else { diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 1f8fc993b3fc..d44f1113e788 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -2265,7 +2265,7 @@ def test_reshape_new(src_shape, shape_args, reverse, dst_shape): for i in range(len(src_shape)): holdout_src_shape = list(src_shape) - holdout_src_shape[i] = -1 + holdout_src_shape[i] = 0 holdout_src_shape = tuple(holdout_src_shape) net = mx.sym.Variable('data') net = mx.sym.elemwise_add(net.reshape(shape_args, reverse=reverse), mx.sym.ones(shape=dst_shape)) @@ -4403,7 +4403,8 @@ def test_invalid_reps(): assert_exception(mx.nd.tile, MXNetError, data, (1, 0, 3)) test_normal_case() - test_empty_tensor() + with mx.numpy.enable_np_comp(): + test_empty_tensor() test_empty_reps() test_tile_backward() test_tile_numeric_gradient() @@ -4463,7 +4464,8 @@ def test_zero_depth(): test_normal_case(index_type=np.float64) test_normal_case(index_type=np.float32) test_normal_case(index_type=np.float16) - test_empty_indices() + with mx.numpy.enable_np_comp(): + test_empty_indices() test_zero_depth() From 098d189cf71b1bcfa611691815a5179cf3b25588 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 25 Mar 2019 21:08:41 -0700 Subject: [PATCH 05/32] Fix a bug to pass the test in test_contrib_rnn (#14520) * fix. * remove type conversion. * remove type cast. --- src/common/utils.h | 1 - src/ndarray/ndarray_function.cc | 6 ++++-- src/operator/nn/mkldnn/mkldnn_base-inl.h | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/common/utils.h b/src/common/utils.h index f3df2e15ec32..4843d7e06b7b 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -752,7 +752,6 @@ inline void ConvertToNumpyShape(mxnet::TShape* shape) { *shape = mxnet::TShape(); // unknown shape ndim = -1 } else { for (int j = 0; j < shape->ndim(); ++j) { - CHECK_GE((*shape)[j], 0) << "Legacy shape cannot have dim size < 0"; if ((*shape)[j] == 0) { // legacy shape dim_size = 0 means unknown (*shape)[j] = -1; // unknown dim size = -1 } diff --git a/src/ndarray/ndarray_function.cc b/src/ndarray/ndarray_function.cc index a613d5a3decc..8f72bc259afc 100644 --- a/src/ndarray/ndarray_function.cc +++ b/src/ndarray/ndarray_function.cc @@ -210,8 +210,6 @@ void ElementwiseSumContainsDnsImpl(mshadow::Stream* s, Kernel::Launch(s, out_data.Size(), out_data.dptr()); for (size_t i = 0; i < nds.size(); ++i) { const NDArray& nd = nds[i]; - const nnvm::dim_t num_rows = nd.shape()[0]; - const nnvm::dim_t num_cols = nd.shape()[1]; const TBlob& nd_data = nd.data(); if (i == 0) { @@ -234,6 +232,8 @@ void ElementwiseSumContainsDnsImpl(mshadow::Stream* s, case kCSRStorage: { const TBlob& nd_indices = nd.aux_data(csr::kIdx); const TBlob& nd_indptr = nd.aux_data(csr::kIndPtr); + const nnvm::dim_t num_rows = nd.shape()[0]; + const nnvm::dim_t num_cols = nd.shape()[1]; MSHADOW_IDX_TYPE_SWITCH(nd_indices.type_flag_, IType, { // indices type MSHADOW_IDX_TYPE_SWITCH(nd_indptr.type_flag_, CType, { // indptr type if (nd.storage_initialized()) { @@ -248,6 +248,8 @@ void ElementwiseSumContainsDnsImpl(mshadow::Stream* s, } case kRowSparseStorage: { const TBlob& nd_indices = nd.aux_data(rowsparse::kIdx); + const nnvm::dim_t num_rows = nd.shape()[0]; + const nnvm::dim_t num_cols = nd.shape()[1]; MSHADOW_IDX_TYPE_SWITCH(nd_indices.type_flag_, IType, { // indices type if (nd.storage_initialized()) { const nnvm::dim_t nz_rows = nd_indices.Size(); diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index a460e33fa548..3da3f23d7683 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -464,7 +464,7 @@ mkldnn::memory::primitive_desc GetPrimitiveDesc(mkldnn::memory::primitive_desc p mkldnn_memory_format_t format); inline bool same_shape(const mxnet::TShape &shape, const mkldnn_dims_t dims, int ndims) { - if (shape.ndim() != (size_t)ndims) + if (shape.ndim() != ndims) return false; for (int i = 0; i < ndims; i++) if (shape[i] != dims[i]) From 9c77e3e4cbf35c7e96abeb0d1fc08894e61ed4fd Mon Sep 17 00:00:00 2001 From: Junru Shao Date: Tue, 26 Mar 2019 17:07:13 -0700 Subject: [PATCH 06/32] [numpy] Fix test_dynamic_shape.test_dynamic_shape (#14538) * Initial commit * Address comments from Jun --- src/c_api/c_api_common.h | 5 +++-- src/imperative/imperative_utils.cc | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h index 329dc9adc7cf..55608b950866 100644 --- a/src/c_api/c_api_common.h +++ b/src/c_api/c_api_common.h @@ -91,8 +91,9 @@ struct MXAPIThreadLocalEntry { data->resize(shapes.size()); size_t size = 0; for (const auto& s : shapes) { - if (s.ndim() > 0); - size += s.ndim(); + if (s.ndim() > 0) { + size += s.ndim(); + } } buffer->resize(size); int *ptr = buffer->data(); diff --git a/src/imperative/imperative_utils.cc b/src/imperative/imperative_utils.cc index 6cb4a70324b5..733a47bfe6c1 100644 --- a/src/imperative/imperative_utils.cc +++ b/src/imperative/imperative_utils.cc @@ -19,6 +19,7 @@ #include "./imperative_utils.h" #include "./cached_op.h" +#include "../operator/operator_common.h" namespace mxnet { namespace imperative { @@ -186,7 +187,7 @@ void NaiveRunGraph( Imperative::Get()->InvokeOp(ctx, node.source->attrs, ndinputs, ndoutputs, req, dispatch_mode, state); for (size_t j = 0; j < ndoutputs.size(); ++j) { - if (ndoutputs[j]->shape().ndim() == 0) { + if (mxnet::op::shape_is_none(ndoutputs[j]->shape())) { ndoutputs[j]->WaitToRead(); ndoutputs[j]->SetShapeFromChunk(); } From ebc7d4da78d932e681788a83755762a36ff8e154 Mon Sep 17 00:00:00 2001 From: reminisce Date: Wed, 27 Mar 2019 13:49:59 -0700 Subject: [PATCH 07/32] [numpy] Fix numpy import in python2 (#14537) * Fix several test failures * Fix subgraph op infer shape * Fix sparse slice * Fix deconv infer shape * Fix numpy import compatibility problem in python2 --- python/mxnet/ndarray/_internal.py | 2 -- python/mxnet/ndarray/contrib.py | 1 + python/mxnet/ndarray/register.py | 7 ++-- python/mxnet/symbol/_internal.py | 2 -- python/mxnet/symbol/register.py | 7 ++-- src/common/utils.h | 5 +++ src/operator/leaky_relu-inl.h | 4 +-- src/operator/nn/deconvolution-inl.h | 12 ++++--- src/operator/nn/deconvolution.cc | 50 ++++++++++++++++++++------- src/operator/tensor/matrix_op-inl.h | 2 +- tests/python/unittest/test_ndarray.py | 6 +++- 11 files changed, 66 insertions(+), 32 deletions(-) diff --git a/python/mxnet/ndarray/_internal.py b/python/mxnet/ndarray/_internal.py index 5f3ce976dbc5..8045d9bd2b14 100644 --- a/python/mxnet/ndarray/_internal.py +++ b/python/mxnet/ndarray/_internal.py @@ -20,8 +20,6 @@ import os as _os import sys as _sys -import numpy as np - try: if int(_os.environ.get("MXNET_ENABLE_CYTHON", True)) == 0: from .._ctypes.ndarray import NDArrayBase, CachedOp diff --git a/python/mxnet/ndarray/contrib.py b/python/mxnet/ndarray/contrib.py index 74c355dc1288..1718a2c68d13 100644 --- a/python/mxnet/ndarray/contrib.py +++ b/python/mxnet/ndarray/contrib.py @@ -18,6 +18,7 @@ # coding: utf-8 # pylint: disable=wildcard-import, unused-wildcard-import,redefined-outer-name """Contrib NDArray API of MXNet.""" +from __future__ import absolute_import import math import numpy as np from ..context import current_context diff --git a/python/mxnet/ndarray/register.py b/python/mxnet/ndarray/register.py index 05d7f17a8fc1..1ccf228698ba 100644 --- a/python/mxnet/ndarray/register.py +++ b/python/mxnet/ndarray/register.py @@ -16,9 +16,10 @@ # under the License. """Register backend ops in mxnet.ndarray namespace""" +from __future__ import absolute_import import os as _os import ctypes -import numpy as np # pylint: disable=unused-import +import numpy as _np # pylint: disable=unused-import from ._internal import NDArrayBase, _imperative_invoke # pylint: disable=unused-import from ..ndarray_doc import _build_doc @@ -103,7 +104,7 @@ def %s(*%s, **kwargs):"""%(func_name, arr_name)) if dtype_name is not None: code.append(""" if '%s' in kwargs: - kwargs['%s'] = np.dtype(kwargs['%s']).name"""%( + kwargs['%s'] = _np.dtype(kwargs['%s']).name"""%( dtype_name, dtype_name, dtype_name)) code.append(""" _ = kwargs.pop('name', None) @@ -136,7 +137,7 @@ def %s(%s):"""%(func_name, ', '.join(signature))) code.append(""" if %s is not _Null: keys.append('%s') - vals.append(np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name)) + vals.append(_np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name)) if not signature_only: code.append(""" diff --git a/python/mxnet/symbol/_internal.py b/python/mxnet/symbol/_internal.py index 53fc684008cf..7e9787e32b1c 100644 --- a/python/mxnet/symbol/_internal.py +++ b/python/mxnet/symbol/_internal.py @@ -22,8 +22,6 @@ import sys as _sys import os as _os -import numpy as np - try: if int(_os.environ.get("MXNET_ENABLE_CYTHON", True)) == 0: from .._ctypes.symbol import SymbolBase, _set_symbol_class diff --git a/python/mxnet/symbol/register.py b/python/mxnet/symbol/register.py index 15c8e5e1fa68..ac59f8b97f15 100644 --- a/python/mxnet/symbol/register.py +++ b/python/mxnet/symbol/register.py @@ -17,9 +17,10 @@ # pylint: disable=unused-import """Register backend ops in mxnet.symbol namespace.""" +from __future__ import absolute_import import os as _os import ctypes -import numpy as np +import numpy as _np from . import _internal from ._internal import SymbolBase, _symbol_creator @@ -109,7 +110,7 @@ def %s(*%s, **kwargs):"""%(func_name, arr_name)) if dtype_name is not None: code.append(""" if '%s' in kwargs: - kwargs['%s'] = np.dtype(kwargs['%s']).name"""%( + kwargs['%s'] = _np.dtype(kwargs['%s']).name"""%( dtype_name, dtype_name, dtype_name)) code.append(""" attr = kwargs.pop('attr', None) @@ -175,7 +176,7 @@ def %s(%s):"""%(func_name, ', '.join(signature))) code.append(""" if %s is not _Null: _keys.append('%s') - _vals.append(np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name)) + _vals.append(_np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name)) code.append(""" if not hasattr(NameManager._current, "value"): diff --git a/src/common/utils.h b/src/common/utils.h index 4843d7e06b7b..4fb398d883a6 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -746,6 +746,11 @@ inline void ParallelCopy(DType* dst, const DType* src, index_t size) { * 4. -1 dim size means the dimension's size is unknown. * so that operator's infer shape function can work in backend. * \param shape to be converted. + * Note: It is possible that the shape to be converted is already + * numpy compatible. For example, when a subgraph operator's infer + * shape function is called from the infer shape pass of the whole + * graph, its input/output shapes have been converted to numpy + * compatible shapes. */ inline void ConvertToNumpyShape(mxnet::TShape* shape) { if (shape->ndim() == 0) { // legacy shape ndim = 0 means unknown diff --git a/src/operator/leaky_relu-inl.h b/src/operator/leaky_relu-inl.h index aef990010818..7f8638630145 100644 --- a/src/operator/leaky_relu-inl.h +++ b/src/operator/leaky_relu-inl.h @@ -355,10 +355,10 @@ class LeakyReLUProp : public OperatorProperty { CHECK_EQ(in_shape->size(), 1U) << "Input:[data]"; } const mxnet::TShape &dshape = in_shape->at(leakyrelu::kData); - if (dshape.ndim() == 0) return false; + if (!mxnet::ndim_is_known(dshape)) return false; if (param_.act_type == leakyrelu::kPReLU) { const mxnet::TShape &gshape = in_shape->at(leakyrelu::kGamma); - if (gshape.ndim() == 0) { + if (!mxnet::ndim_is_known(gshape)) { in_shape->at(leakyrelu::kGamma) = mxnet::TShape(Shape1(dshape[1])); } if (dshape == gshape) { diff --git a/src/operator/nn/deconvolution-inl.h b/src/operator/nn/deconvolution-inl.h index b28e47818392..5f3137f0dcb8 100644 --- a/src/operator/nn/deconvolution-inl.h +++ b/src/operator/nn/deconvolution-inl.h @@ -134,11 +134,13 @@ struct DeconvolutionParam : public dmlc::Parameter { for (size_t i = 0; i < ndim; i++) { // input.ndim() can be larger than ndim, in case that the complete input // shape was passed and not only the ndim last ones - o_pad[i] = stride[i] * (input[(input_ndim - ndim) + i] - 1) + DilatedKernelSize(i); - CHECK_GE(o_pad[i], target_shape[i]) << "too big target shape"; - o_pad[i] -= target_shape[i]; - o_adj[i] = o_pad[i] % 2; - o_pad[i] = (o_pad[i] + 1) / 2; + if (mxnet::dim_size_is_known(input, input_ndim - ndim + i)) { + o_pad[i] = stride[i] * (input[(input_ndim - ndim) + i] - 1) + DilatedKernelSize(i); + CHECK_GE(o_pad[i], target_shape[i]) << "too big target shape"; + o_pad[i] -= target_shape[i]; + o_adj[i] = o_pad[i] % 2; + o_pad[i] = (o_pad[i] + 1) / 2; + } } } else { for (size_t i = 0; i < ndim; i++) { diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc index d8c91f7f96c8..09b255d009e0 100644 --- a/src/operator/nn/deconvolution.cc +++ b/src/operator/nn/deconvolution.cc @@ -54,7 +54,7 @@ static bool DeconvolutionShape(const nnvm::NodeAttrs& attrs, } out_shape->resize(1, mxnet::TShape()); const mxnet::TShape &dshape = (*in_shape)[deconv::kData]; - if (!shape_is_known(dshape)) return false; + if (!mxnet::ndim_is_known(dshape)) return false; if (param_.kernel.ndim() == 1) { // 1d conv @@ -90,8 +90,12 @@ static bool DeconvolutionShape(const nnvm::NodeAttrs& attrs, Shape<3> oshape; oshape[0] = dshape_ncw[0]; oshape[1] = param_.num_filter; - oshape[2] = param_.stride[0] * (dshape_ncw[2] - 1) + - dilated_ksize_x - 2 * o_pad[0] + o_adj[0]; + if (mxnet::dim_size_is_known(dshape_ncw[2])) { + oshape[2] = param_.stride[0] * (dshape_ncw[2] - 1) + + dilated_ksize_x - 2 * o_pad[0] + o_adj[0]; + } else { + oshape[2] = -1; + } if (param_.target_shape.ndim() > 0) { if (param_.target_shape[0] > 0) { @@ -141,10 +145,18 @@ static bool DeconvolutionShape(const nnvm::NodeAttrs& attrs, Shape<4> oshape; oshape[0] = dshape_nchw[0]; oshape[1] = param_.num_filter; - oshape[2] = param_.stride[0] * (dshape_nchw[2] - 1) + - dilated_ksize_y - 2 * o_pad[0] + o_adj[0]; - oshape[3] = param_.stride[1] * (dshape_nchw[3] - 1) + - dilated_ksize_x - 2 * o_pad[1] + o_adj[1]; + if (mxnet::dim_size_is_known(dshape_nchw[2])) { + oshape[2] = param_.stride[0] * (dshape_nchw[2] - 1) + + dilated_ksize_y - 2 * o_pad[0] + o_adj[0]; + } else { + oshape[2] = -1; + } + if (mxnet::dim_size_is_known(dshape_nchw[3])) { + oshape[3] = param_.stride[1] * (dshape_nchw[3] - 1) + + dilated_ksize_x - 2 * o_pad[1] + o_adj[1]; + } else { + oshape[3] = -1; + } if (param_.target_shape.ndim() > 1) { if (param_.target_shape[0] > 0) { @@ -203,12 +215,24 @@ static bool DeconvolutionShape(const nnvm::NodeAttrs& attrs, Shape<5> oshape; oshape[0] = dshape_ncdhw[0]; oshape[1] = param_.num_filter; - oshape[2] = param_.stride[0] * (dshape_ncdhw[2] - 1) + - dilated_ksize_d - 2 * o_pad[0] + o_adj[0]; - oshape[3] = param_.stride[1] * (dshape_ncdhw[3] - 1) + - dilated_ksize_y - 2 * o_pad[1] + o_adj[1]; - oshape[4] = param_.stride[2] * (dshape_ncdhw[4] - 1) + - dilated_ksize_x - 2 * o_pad[2] + o_adj[2]; + if (mxnet::dim_size_is_known(dshape_ncdhw[2])) { + oshape[2] = param_.stride[0] * (dshape_ncdhw[2] - 1) + + dilated_ksize_d - 2 * o_pad[0] + o_adj[0]; + } else { + oshape[2] = -1; + } + if (mxnet::dim_size_is_known(dshape_ncdhw[3])) { + oshape[3] = param_.stride[1] * (dshape_ncdhw[3] - 1) + + dilated_ksize_y - 2 * o_pad[1] + o_adj[1]; + } else { + oshape[3] = -1; + } + if (mxnet::dim_size_is_known(dshape_ncdhw[4])) { + oshape[4] = param_.stride[2] * (dshape_ncdhw[4] - 1) + + dilated_ksize_x - 2 * o_pad[2] + o_adj[2]; + } else { + oshape[4] = -1; + } if (param_.target_shape.ndim() > 2) { if (param_.target_shape[0] > 0) { diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h index 9ce22020fab4..e91f904debb9 100644 --- a/src/operator/tensor/matrix_op-inl.h +++ b/src/operator/tensor/matrix_op-inl.h @@ -598,7 +598,7 @@ void SliceCsrImpl(const SliceParam ¶m, const OpContext& ctx, mxnet::TShape begin(N, -1), end(N, -1); for (int i = 0; i < N; ++i) { int s = 0; - if (param.begin[i]) { + if (i < param.begin.ndim() && param.begin[i]) { s = *param.begin[i]; if (s < 0) s += ishape[i]; } diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py index 2446107ad466..1bdb7d51df67 100644 --- a/tests/python/unittest/test_ndarray.py +++ b/tests/python/unittest/test_ndarray.py @@ -122,7 +122,11 @@ def test_ndarray_setitem(): # numpy assignment for empty axis for trivial_shape in [(), (1,), (1, 1), (1, 1, 1)]: - x = mx.nd.zeros(trivial_shape) + if trivial_shape == tuple(): + with mx.numpy.enable_np_comp(): + x = mx.nd.zeros(trivial_shape) + else: + x = mx.nd.zeros(trivial_shape) x[:] = np.ones(trivial_shape) x_np = np.ones(trivial_shape, dtype=x.dtype) assert x.shape == trivial_shape From c63555e5e39c9eaad53e2d8b9f99edfcf3c65d0c Mon Sep 17 00:00:00 2001 From: Tao Lv Date: Fri, 29 Mar 2019 10:50:41 +0800 Subject: [PATCH 08/32] fix concat and slice (#14549) --- src/operator/nn/mkldnn/mkldnn_concat.cc | 12 ++++++------ src/operator/nn/mkldnn/mkldnn_slice.cc | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/operator/nn/mkldnn/mkldnn_concat.cc b/src/operator/nn/mkldnn/mkldnn_concat.cc index 8e2b57781a18..7b266efc2a14 100644 --- a/src/operator/nn/mkldnn/mkldnn_concat.cc +++ b/src/operator/nn/mkldnn/mkldnn_concat.cc @@ -92,13 +92,13 @@ void MKLDNNConcatBackward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, auto gz_mem = inputs[0].GetMKLDNNData(); mkldnn::memory::primitive_desc gz_pd = gz_mem->get_primitive_desc(); /* init the offset */ - mkldnn::memory::dims offsets = {0, 0, 0, 0}; + mkldnn::memory::dims offsets(outputs[0].shape().ndim()); + for (auto &v : offsets) { + v = 0; + } + for (int i = 0; i < num_in_data; i++) { - mkldnn::memory::dims diff_src_tz - = {static_cast(outputs[i].shape()[0]), - static_cast(outputs[i].shape()[1]), - static_cast(outputs[i].shape()[2]), - static_cast(outputs[i].shape()[3])}; + mkldnn::memory::dims diff_src_tz(outputs[i].shape().begin(), outputs[i].shape().end()); auto diff_src_mpd = outputs[i].GetMKLDNNData()->get_primitive_desc(); auto gradi_mem_ = CreateMKLDNNMem(outputs[i], diff_src_mpd, req[i]); // create view from gy to gxs[i] diff --git a/src/operator/nn/mkldnn/mkldnn_slice.cc b/src/operator/nn/mkldnn/mkldnn_slice.cc index 3f3d82020598..96a8afdab6e2 100644 --- a/src/operator/nn/mkldnn/mkldnn_slice.cc +++ b/src/operator/nn/mkldnn/mkldnn_slice.cc @@ -42,7 +42,7 @@ MKLDNNSliceFwd::MKLDNNSliceFwd(const SliceParam ¶m, mkldnn::memory::dims offsets(N); for (uint32_t i = 0; i < N; ++i) { int s = 0; - if (param.begin[i]) { + if (i < param.begin.ndim() && param.begin[i]) { s = *param.begin[i]; if (s < 0) s += ishape[i]; } From 48cf65949fa5ce4cf0c2b1a317eca42ffdc831f2 Mon Sep 17 00:00:00 2001 From: Tong He Date: Tue, 2 Apr 2019 21:10:54 -0700 Subject: [PATCH 09/32] fix R-package (#14536) --- R-package/src/ndarray.cc | 4 ++-- R-package/src/symbol.cc | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/R-package/src/ndarray.cc b/R-package/src/ndarray.cc index 94d24f3fb46b..d08671a3f026 100644 --- a/R-package/src/ndarray.cc +++ b/R-package/src/ndarray.cc @@ -179,8 +179,8 @@ Rcpp::RObject NDArrayPacker::CreateNDArrayPacker() { } Rcpp::Dimension NDArray::dim() const { - mx_uint ndim; - const mx_uint *pshape; + int ndim; + const int *pshape; MX_CALL(MXNDArrayGetShape( ptr_->handle, &ndim, &pshape)); Rcpp::IntegerVector dat(pshape, pshape + ndim); diff --git a/R-package/src/symbol.cc b/R-package/src/symbol.cc index 031c9a254019..90ff9ef1dd67 100644 --- a/R-package/src/symbol.cc +++ b/R-package/src/symbol.cc @@ -167,8 +167,8 @@ Symbol::RObjectType Symbol::GetOutput(mx_uint index) const { // helper function to convert shape into Rcpp vector inline Rcpp::List BuildShapeData(mx_uint shape_size, - const mx_uint *shape_ndim, - const mx_uint **shape_data, + const int *shape_ndim, + const int **shape_data, const std::vector &names) { Rcpp::List ret(shape_size); for (mx_uint i = 0; i < shape_size; ++i) { @@ -185,7 +185,7 @@ SEXP Symbol::InferShape(const Rcpp::List& kwargs) const { << "Need to pass parameters in key=value style.\n"; std::vector keys = kwargs.names(); std::vector arg_ind_ptr(1, 0); - std::vector arg_shape_data; + std::vector arg_shape_data; for (size_t i = 0; i < kwargs.size(); ++i) { RCHECK(keys[i].length() != 0) @@ -197,14 +197,14 @@ SEXP Symbol::InferShape(const Rcpp::List& kwargs) const { std::vector c_keys = CKeys(keys); mx_uint in_shape_size; - const mx_uint *in_shape_ndim; - const mx_uint **in_shape_data; + const int *in_shape_ndim; + const int **in_shape_data; mx_uint out_shape_size; - const mx_uint *out_shape_ndim; - const mx_uint **out_shape_data; + const int *out_shape_ndim; + const int **out_shape_data; mx_uint aux_shape_size; - const mx_uint *aux_shape_ndim; - const mx_uint **aux_shape_data; + const int *aux_shape_ndim; + const int **aux_shape_data; int complete; MX_CALL(MXSymbolInferShape( From b165b35c932c23e94556a2eecb712015d1e53736 Mon Sep 17 00:00:00 2001 From: reminisce Date: Wed, 3 Apr 2019 13:40:11 -0700 Subject: [PATCH 10/32] Fix cpp package build after using new shape definition (#14554) --- cpp-package/include/mxnet-cpp/ndarray.hpp | 6 +++--- cpp-package/include/mxnet-cpp/symbol.hpp | 20 ++++++++++---------- include/mxnet/tensor_blob.h | 2 ++ include/mxnet/tuple.h | 6 +++++- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/cpp-package/include/mxnet-cpp/ndarray.hpp b/cpp-package/include/mxnet-cpp/ndarray.hpp index b667542bffb5..bf1d82ca33b4 100644 --- a/cpp-package/include/mxnet-cpp/ndarray.hpp +++ b/cpp-package/include/mxnet-cpp/ndarray.hpp @@ -397,11 +397,11 @@ inline size_t NDArray::Size() const { } inline std::vector NDArray::GetShape() const { - const mx_uint *out_pdata; - mx_uint out_dim; + const int *out_pdata; + int out_dim; MXNDArrayGetShape(blob_ptr_->handle_, &out_dim, &out_pdata); std::vector ret; - for (mx_uint i = 0; i < out_dim; ++i) { + for (int i = 0; i < out_dim; ++i) { ret.push_back(out_pdata[i]); } return ret; diff --git a/cpp-package/include/mxnet-cpp/symbol.hpp b/cpp-package/include/mxnet-cpp/symbol.hpp index aed963949060..d82b7abaf614 100644 --- a/cpp-package/include/mxnet-cpp/symbol.hpp +++ b/cpp-package/include/mxnet-cpp/symbol.hpp @@ -188,7 +188,7 @@ inline void Symbol::InferShape( std::vector keys; std::vector arg_ind_ptr; - std::vector arg_shape_data; + std::vector arg_shape_data; for (const auto &arg : arg_shapes) { keys.push_back(arg.first.c_str()); @@ -200,14 +200,14 @@ inline void Symbol::InferShape( arg_ind_ptr.push_back(arg_shape_data.size()); mx_uint in_shape_size; - const mx_uint *in_shape_ndim; - const mx_uint **in_shape_data; + const int*in_shape_ndim; + const int **in_shape_data; mx_uint out_shape_size; - const mx_uint *out_shape_ndim; - const mx_uint **out_shape_data; + const int *out_shape_ndim; + const int **out_shape_data; mx_uint aux_shape_size; - const mx_uint *aux_shape_ndim; - const mx_uint **aux_shape_data; + const int *aux_shape_ndim; + const int **aux_shape_data; int complete; CHECK_EQ(MXSymbolInferShape(GetHandle(), keys.size(), keys.data(), @@ -221,19 +221,19 @@ inline void Symbol::InferShape( if (complete) { for (mx_uint i = 0; i < in_shape_size; ++i) { in_shape->push_back(std::vector()); - for (mx_uint j = 0; j < in_shape_ndim[i]; ++j) { + for (int j = 0; j < in_shape_ndim[i]; ++j) { (*in_shape)[i].push_back(in_shape_data[i][j]); } } for (mx_uint i = 0; i < aux_shape_size; ++i) { aux_shape->push_back(std::vector()); - for (mx_uint j = 0; j < aux_shape_ndim[i]; ++j) { + for (int j = 0; j < aux_shape_ndim[i]; ++j) { (*aux_shape)[i].push_back(aux_shape_data[i][j]); } } for (mx_uint i = 0; i < out_shape_size; ++i) { out_shape->push_back(std::vector()); - for (mx_uint j = 0; j < out_shape_ndim[i]; ++j) { + for (int j = 0; j < out_shape_ndim[i]; ++j) { (*out_shape)[i].push_back(out_shape_data[i][j]); } } diff --git a/include/mxnet/tensor_blob.h b/include/mxnet/tensor_blob.h index 45d4c7fda639..a7a57266dab8 100755 --- a/include/mxnet/tensor_blob.h +++ b/include/mxnet/tensor_blob.h @@ -418,6 +418,8 @@ class TBlob { namespace dmlc { // Add a few patches to support mxnet::TShape in dmlc/parameter. DMLC_DECLARE_TYPE_NAME(mxnet::TShape, "Shape(tuple)"); +DMLC_DECLARE_TYPE_NAME(mxnet::Tuple, "Shape(tuple)"); +DMLC_DECLARE_TYPE_NAME(mxnet::Tuple>, "Shape(tuple)"); DMLC_DECLARE_TYPE_NAME(nnvm::Tuple, "Shape(tuple)"); DMLC_DECLARE_TYPE_NAME(nnvm::Tuple>, "Shape(tuple)"); diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h index d83e843033e3..c5a358628ccd 100644 --- a/include/mxnet/tuple.h +++ b/include/mxnet/tuple.h @@ -236,7 +236,10 @@ class Tuple { */ friend std::ostream &operator<<(std::ostream &os, const Tuple &t) { if (t.ndim() == -1) { - os << "UNKNOWN_SHAPE"; + // If t is an unknown shape, return string "None". + // This is consistent with returning unknown shape in Python and generating + // C++ operator APIs by OpWrapperGenerator.py (defaultString) in cpp-package. + os << "None"; return os; } os << '['; @@ -727,6 +730,7 @@ struct hash { namespace dmlc { /*! \brief description for optional TShape */ DMLC_DECLARE_TYPE_NAME(optional, "Shape or None"); +DMLC_DECLARE_TYPE_NAME(optional>, "Shape or None"); // avoid low version of MSVC #if !defined(_MSC_VER) template From bff0bdce6ff83e252ad055e168661f34895d053e Mon Sep 17 00:00:00 2001 From: reminisce Date: Wed, 3 Apr 2019 20:17:23 -0700 Subject: [PATCH 11/32] Fix pooling_v1 and deformable_convolution param initialization (#14577) * Fix pooling_v1 param initialization * Fix deformable_convolution param initialization --- .../contrib/deformable_convolution-inl.h | 8 ++--- src/operator/pooling_v1-inl.h | 33 ++++++++++--------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/operator/contrib/deformable_convolution-inl.h b/src/operator/contrib/deformable_convolution-inl.h index 936df7f1bcd4..a7e22f548151 100644 --- a/src/operator/contrib/deformable_convolution-inl.h +++ b/src/operator/contrib/deformable_convolution-inl.h @@ -129,7 +129,7 @@ class DeformableConvolutionOp : public Operator { // calculate the shape of col_buffer mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1, -1); col_buffer_shape[0] = conv_in_channels_ * param_.kernel.Size(); - for (size_t i = 1; i < col_buffer_shape.ndim(); ++i) { + for (int i = 1; i < col_buffer_shape.ndim(); ++i) { col_buffer_shape[i] = out_data[0].shape_[i + 1]; } // create a column buffer using workspace and col_buffer_shape @@ -347,9 +347,9 @@ class DeformableConvolutionProp : public OperatorProperty { param_.Init(kwargs); if (param_.kernel.ndim() == 2) { param_.layout = param_.layout ? param_.layout.value() : mshadow::kNCHW; - if (mxnet::op::shape_is_none(param_.stride)) param_.stride = Shape2(1, 1); - if (mxnet::op::shape_is_none(param_.dilate)) param_.dilate = Shape2(1, 1); - if (mxnet::op::shape_is_none(param_.pad)) param_.pad = Shape2(0, 0); + if (param_.stride.ndim() == 0) param_.stride = Shape2(1, 1); + if (param_.dilate.ndim() == 0) param_.dilate = Shape2(1, 1); + if (param_.pad.ndim() == 0) param_.pad = Shape2(0, 0); } else { LOG(FATAL) << "not implemented"; } diff --git a/src/operator/pooling_v1-inl.h b/src/operator/pooling_v1-inl.h index 21ba270c9d42..22a166cbb6cc 100644 --- a/src/operator/pooling_v1-inl.h +++ b/src/operator/pooling_v1-inl.h @@ -55,7 +55,7 @@ struct PoolingV1Param : public dmlc::Parameter { int pooling_convention; bool global_pool; DMLC_DECLARE_PARAMETER(PoolingV1Param) { - DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape(0)) .enforce_nonzero() .describe("pooling kernel size: (y, x) or (d, y, x)"); @@ -73,11 +73,11 @@ struct PoolingV1Param : public dmlc::Parameter { .add_enum("valid", pool_v1_enum::kValid) .describe("Pooling convention to be applied."); - DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0)) .enforce_nonzero() .describe("stride: for pooling (y, x) or (d, y, x)"); - DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0)) .describe("pad for pooling: (y, x) or (d, y, x)"); } }; @@ -217,19 +217,20 @@ class PoolingV1Prop : public OperatorProperty { void Init(const std::vector >& kwargs) override { using namespace mshadow; param_.Init(kwargs); - if (!param_.global_pool) { - if (param_.kernel.ndim() == 2) { - if (param_.stride.ndim() == 0) param_.stride = Shape2(1, 1); - if (param_.pad.ndim() == 0) param_.pad = Shape2(0, 0); - } else { - CHECK_EQ(param_.kernel.ndim(), 3U) << param_.kernel.ndim() << "D pooling not supported"; - if (param_.stride.ndim() == 0) param_.stride = Shape3(1, 1, 1); - if (param_.pad.ndim() == 0) param_.pad = Shape3(0, 0, 0); - } - CHECK_EQ(param_.stride.ndim(), param_.kernel.ndim()) - << "stride and kernel should have the same length"; - CHECK_EQ(param_.pad.ndim(), param_.kernel.ndim()) - << "pad and kernel should have the same length"; + if (param_.kernel.ndim() == 1) { + if (param_.stride.ndim() == 0) param_.stride = Shape1(1); + if (param_.pad.ndim() == 0) param_.pad = Shape1(0); + } else if (param_.kernel.ndim() == 2) { + if (param_.stride.ndim() == 0) param_.stride = Shape2(1, 1); + if (param_.pad.ndim() == 0) param_.pad = Shape2(0, 0); + } else { + // ignore kernel size only if global_pool not assigned false + if (param_.global_pool == false) { + CHECK_EQ(param_.kernel.ndim(), 3U) << param_.kernel.ndim() + << "D pooling not supported"; + } + if (param_.stride.ndim() == 0) param_.stride = Shape3(1, 1, 1); + if (param_.pad.ndim() == 0) param_.pad = Shape3(0, 0, 0); } } From ebecad1a8107f7dd7df0571b3558ae2b9cc4ff67 Mon Sep 17 00:00:00 2001 From: Junru Shao Date: Wed, 3 Apr 2019 20:17:48 -0700 Subject: [PATCH 12/32] [Numpy] Misc fix (#14612) * [Numpy] Misc Fix * fix build * !shape_is_none => shape_is_known * Address comments * Fix --- include/mxnet/ndarray.h | 2 +- src/c_api/c_predict_api.cc | 1 + src/imperative/imperative_utils.h | 3 ++- src/kvstore/gradient_compression.cc | 10 +++++----- src/ndarray/ndarray.cc | 11 +++++++---- src/ndarray/ndarray_function.h | 2 +- 6 files changed, 17 insertions(+), 12 deletions(-) diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h index 2232ebe7be40..13fb42ce521e 100644 --- a/include/mxnet/ndarray.h +++ b/include/mxnet/ndarray.h @@ -956,7 +956,7 @@ class NDArray { /*! \brief set the shape for ith aux data, and update storage shape if necessary */ inline void set_aux_shape(const size_t i, const mxnet::TShape& shape) { aux_shapes[i] = shape; - if (storage_shape.ndim() > 0) { + if (storage_shape.ndim() >= 0) { if (storage_type == kRowSparseStorage && i == rowsparse::kIdx) { storage_shape[0] = shape[0]; } else if (storage_type == kCSRStorage && i == csr::kIdx) { diff --git a/src/c_api/c_predict_api.cc b/src/c_api/c_predict_api.cc index 3b9f43d86079..7de23ef935ef 100644 --- a/src/c_api/c_predict_api.cc +++ b/src/c_api/c_predict_api.cc @@ -436,6 +436,7 @@ int MXPredGetOutputShape(PredictorHandle handle, << "Index exceed number of outputs"; const mxnet::TShape& s = p->out_shapes[out_index]; + CHECK_GE(s.ndim(), 0); p->out_shapes_buffer.resize(s.ndim()); nnvm::ShapeTypeCast(s.begin(), s.end(), p->out_shapes_buffer.data()); *shape_data = p->out_shapes_buffer.data(); diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h index 6864428d2559..d058df4b3806 100644 --- a/src/imperative/imperative_utils.h +++ b/src/imperative/imperative_utils.h @@ -31,6 +31,7 @@ #include "../common/utils.h" #include "../common/exec_utils.h" #include "../operator/nn/mkldnn/mkldnn_base-inl.h" +#include "../operator/operator_common.h" #ifndef MXNET_IMPERATIVE_IMPERATIVE_UTILS_H_ #define MXNET_IMPERATIVE_IMPERATIVE_UTILS_H_ @@ -196,7 +197,7 @@ inline void SetShapeType(const Context& ctx, for (size_t i = 0; i < outputs.size(); ++i) { NDArrayStorageType storage_type = static_cast(out_storage_types[i]); - if (outputs[i]->is_none() || outputs[i]->shape().ndim() == 0) { + if (outputs[i]->is_none() || mxnet::op::shape_is_none(outputs[i]->shape())) { if (is_dynamic_shape_existing) { // once there is dynamic shape somewhere, we could not pre-determine the shape. *outputs[i] = NDArray(ctx, out_types[i]); diff --git a/src/kvstore/gradient_compression.cc b/src/kvstore/gradient_compression.cc index e4a06fa9a1f2..30aaec91e27f 100644 --- a/src/kvstore/gradient_compression.cc +++ b/src/kvstore/gradient_compression.cc @@ -100,9 +100,9 @@ int64_t GradientCompression::GetCompressedSize(const int64_t original_size) { void GradientCompression::Quantize(const mxnet::NDArray &from, mxnet::NDArray *to, mxnet::NDArray *residual, const int priority) { - CHECK(from.shape().ndim() != 0) << "source operand has zero dimension shape"; - CHECK(to->shape().ndim() != 0) << "destination operand has zero dimension shape"; - CHECK(residual->shape().ndim() != 0) << "residual operand has zero dimension shape"; + CHECK(shape_is_known(from.shape())) << "source operand has undefined shape"; + CHECK(shape_is_known(to->shape())) << "destination operand has undefined shape"; + CHECK(shape_is_known(residual->shape())) << "residual operand has undefined shape"; const int a = from.ctx().dev_mask(); const int b = to->ctx().dev_mask(); const float threshold = threshold_; @@ -137,8 +137,8 @@ void GradientCompression::Quantize(const mxnet::NDArray &from, mxnet::NDArray *t void GradientCompression::Dequantize(const mxnet::NDArray &from, mxnet::NDArray *to, const int priority) { - CHECK(from.shape().ndim() != 0) << "source operands has zero dimension shape"; - CHECK(to->shape().ndim() != 0) << "destination operand has zero dimension shape"; + CHECK(shape_is_known(from.shape())) << "source operand has undefined shape"; + CHECK(shape_is_known(to->shape())) << "destination operand has undefined shape"; const int a = from.ctx().dev_mask(); const int b = to->ctx().dev_mask(); const float threshold = threshold_; diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index 04518e0feb77..604000028bf1 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -1191,8 +1191,8 @@ void CopyFromTo(const NDArray& from, const NDArray& to, int priority, bool is_op CHECK(from.shape() == to.shape()) << "operands shape mismatch" << "from.shape = " << from.shape() << " to.shape=" << to.shape(); - CHECK(from.shape().ndim() != 0) - << "source operands have zero dimension shape"; + CHECK(!mxnet::op::shape_is_none(from.shape())) + << "source operands have undefined shape"; // important: callback must always capture by value const Context from_ctx = from.ctx(); const int a = from_ctx.dev_mask(); @@ -1663,7 +1663,7 @@ bool NDArray::LegacyLoad(dmlc::Stream *strm, const uint32_t magic) { // load shape mxnet::TShape shape; if (!LegacyTShapeLoad(strm, &shape, magic)) return false; - if (shape.ndim() == 0) { + if (mxnet::op::shape_is_none(shape)) { *this = NDArray(); return true; } // load context @@ -1711,7 +1711,10 @@ bool NDArray::Load(dmlc::Stream *strm) { // load shape mxnet::TShape shape; if (!shape.Load(strm)) return false; - if (shape.ndim() == 0) { + if (!Imperative::Get()->is_np_comp()) { + common::ConvertToNumpyShape(&shape); + } + if (mxnet::op::shape_is_none(shape)) { *this = NDArray(); return true; } diff --git a/src/ndarray/ndarray_function.h b/src/ndarray/ndarray_function.h index 70b626dbb9b7..505bd205a8d5 100644 --- a/src/ndarray/ndarray_function.h +++ b/src/ndarray/ndarray_function.h @@ -40,7 +40,7 @@ namespace ndarray { struct BinaryBase { inline static mxnet::TShape GetShape(const mxnet::TShape &lshape, const mxnet::TShape &rshape) { CHECK(lshape == rshape) << "operands shape mismatch"; - CHECK(lshape.ndim() != 0) << "source operand have zero dimension shape"; + CHECK(!mxnet::op::shape_is_none(lshape)) << "source operand have zero dimension shape"; return lshape; } }; From 4c35ade5d1247a82db2c25cf7ff4f09b8ae1e031 Mon Sep 17 00:00:00 2001 From: Junru Shao Date: Wed, 3 Apr 2019 20:20:07 -0700 Subject: [PATCH 13/32] [Numpy] fix test_operator_gpu.test_upsampling_bilinear_with_type (#14557) * Fix test_operator_gpu.test_upsampling_bilinear_with_type * Address comments --- src/operator/nn/deconvolution-inl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/operator/nn/deconvolution-inl.h b/src/operator/nn/deconvolution-inl.h index 5f3137f0dcb8..e82a073ea08d 100644 --- a/src/operator/nn/deconvolution-inl.h +++ b/src/operator/nn/deconvolution-inl.h @@ -143,9 +143,9 @@ struct DeconvolutionParam : public dmlc::Parameter { } } } else { - for (size_t i = 0; i < ndim; i++) { - o_pad[i] = pad[i]; - o_adj[i] = adj[i]; + for (int i = 0; i < (int) ndim; i++) { + o_pad[i] = i < pad.ndim() ? pad[i] : 0; + o_adj[i] = i < adj.ndim() ? adj[i] : 0; } } } From cdc9023b4e167794452dd5229cc37aa7dd0d39a8 Mon Sep 17 00:00:00 2001 From: Yizhi Liu Date: Fri, 5 Apr 2019 10:21:55 -0700 Subject: [PATCH 14/32] [Numpy] Java/Scala modification (#14625) * modify jni to support 0 dim/shape * fix transpose axes default value --- .../scala/org/apache/mxnet/Executor.scala | 142 ++++------- .../main/scala/org/apache/mxnet/LibInfo.scala | 32 ++- .../main/scala/org/apache/mxnet/NDArray.scala | 10 +- .../scala/org/apache/mxnet/NumpyScope.scala | 58 +++++ .../main/scala/org/apache/mxnet/Symbol.scala | 38 ++- .../org/apache/mxnet/NumpyScopeSuite.scala | 34 +++ .../apache/mxnet/utils/CToScalaUtils.scala | 3 +- .../native/org_apache_mxnet_native_c_api.cc | 239 +++++++++++++++--- .../native/org_apache_mxnet_native_c_api.h | 32 +++ src/operator/tensor/matrix_op-inl.h | 6 +- 10 files changed, 452 insertions(+), 142 deletions(-) create mode 100644 scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala create mode 100644 scala-package/core/src/test/scala/org/apache/mxnet/NumpyScopeSuite.scala diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala b/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala index aec44023a5d3..f51424b7edf6 100644 --- a/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala +++ b/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala @@ -61,10 +61,6 @@ class Executor private[mxnet](private[mxnet] val handle: ExecutorHandle, protected var monitorCallback: MXMonitorCallback = null private val logger: Logger = LoggerFactory.getLogger(classOf[Executor]) - private[mxnet] var ownsArgArrays = false - private[mxnet] var ownsGradArrays = false - private[mxnet] var ownsAuxArrays = false - override def nativeAddress: CPtrAddress = handle override def nativeDeAllocator: (CPtrAddress => Int) = _LIB.mxExecutorFree // cannot determine the off-heap size of this object @@ -75,17 +71,12 @@ class Executor private[mxnet](private[mxnet] val handle: ExecutorHandle, if (!super.isDisposed) { super.dispose() outputs.foreach(o => o.dispose()) - // Symbol.bind clones symbol when creating the executor so we need to dispose of the clone - symbol.dispose() - if (ownsArgArrays && argArrays != null) {argArrays.foreach(a => a.dispose())} - if (ownsGradArrays && gradArrays != null) {gradArrays.foreach( + if (argArrays != null) {argArrays.foreach(a => a.dispose())} + if (gradArrays != null) {gradArrays.foreach( // Symbol will sometimes fill this with nulls so we've got to check the elements too a => if (a != null) {a.dispose()}) } - if (ownsAuxArrays && auxArrays != null) {auxArrays.foreach(a => a.dispose())} - if (_argDict != null) {_argDict.foreach(a => a._2.dispose())} - if (_gradDict != null) {_gradDict.foreach(a => a._2.dispose())} - if (_auxDict != null) {_auxDict.foreach(a => a._2.dispose())} + if (auxArrays != null) {auxArrays.foreach(a => a.dispose())} } } @@ -104,95 +95,58 @@ class Executor private[mxnet](private[mxnet] val handle: ExecutorHandle, */ def reshape(partialShaping: Boolean = false, allowUpSizing: Boolean = false, kwargs: Map[String, Shape]): Executor = { - var setArgOwner = false - var setAuxOwner = false - var setGradOwner = false - val (argShapes, _, auxShapes) = this.symbol.inferShape(kwargs) - // TODO: more precise error message should be provided by backend - require(argShapes != null, "Shape inference failed." + - s"Known shapes are $kwargs for symbol arguments ${symbol.listArguments()} " + - s"and aux states ${symbol.listAuxiliaryStates()}") - - var newArgDict = Map[String, NDArray]() - var newGradDict = Map[String, NDArray]() - this.symbol.listArguments().zipWithIndex.foreach { case (name, i) => - val newShape = argShapes(i) - val arr = this.argArrays(i) - val dArr = if (this.gradArrays == null) null else this.gradArrays(i) - if (partialShaping || kwargs.contains(name) || newShape.equals(arr.shape)) { - if (newShape.product > arr.shape.product) { - require(allowUpSizing, s"New shape of arg:$name larger than original. " + - "First making a big executor and then down sizing it " + - "is more efficient than the reverse." + - "If you really want to up size, set allowUpSizing = true " + - "to enable allocation of new arrays.") - newArgDict = newArgDict + (name -> NDArray.empty(newShape, arr.context, arr.dtype)) - setArgOwner = true - if (dArr != null) { - newGradDict = newGradDict + (name -> NDArray.empty(newShape, dArr.context, dArr.dtype)) - setGradOwner = true - } - } else { - newArgDict = newArgDict + (name -> arr.reshape(newShape.toArray)) - if (dArr != null) { - newGradDict = newGradDict + (name -> dArr.reshape(newShape.toArray)) - } - } - } else { - throw new AssertionError(s"Shape of unspecified array arg:$name changed." + - "This can cause the new executor to not share parameters " + - "with the old one. Please check for error in network." + - "If this is intended, set partialShaping = true to suppress this warning.") - } - } + val providedArgShapeNames = kwargs.keys + val providedArgShapeData = kwargs.values.flatMap(_.toVector) + val providedArgShapeIdx = kwargs.values.scanLeft(0)((sum, shape) => sum + shape.size) - var newAuxDict = Map[String, NDArray]() - val zip3 = (this.symbol.listAuxiliaryStates(), auxShapes, this.auxArrays).zipped - zip3.foreach { case (name, newShape, arr) => - if (partialShaping || newShape.equals(arr.shape)) { - if (newShape.product > arr.shape.product) { - require(allowUpSizing, s"New shape of aux:$name larger than original. " + - "First making a big executor and then down sizing it " + - "is more efficient than the reverse." + - "If you really want to up size, set allowUpSizing = true " + - "to enable allocation of new arrays.") - newAuxDict = newAuxDict + (name -> NDArray.empty(newShape, arr.context)) - setAuxOwner = true - } else { - newAuxDict = newAuxDict + (name -> arr.reshape(newShape.toArray)) - } - } else { - throw new AssertionError(s"Shape of unspecified array aux:$name changed." + - "This can cause the new executor to not share parameters " + - "with the old one. Please check for error in network." + - "If this is intended, set partialShaping = true to suppress this warning.") - } + val ctxMapKeys = if (_group2ctx != null) _group2ctx.keys.toArray else Array.empty[String] + val ctxMapDevTypes = if (_group2ctx != null) { + _group2ctx.values.map(_.deviceTypeid).toArray + } else { + Array.empty[Int] } - val reshapedExecutor = if (this._gradsReq.isInstanceOf[Seq[_]]) { - this.symbol.bind(this._ctx, - newArgDict, - newGradDict, - this._gradsReq.asInstanceOf[Seq[String]], - newAuxDict, - this._group2ctx, - this) + val ctxMapDevIds = if (_group2ctx != null) { + _group2ctx.values.map(_.deviceId).toArray } else { - this.symbol.bind(this._ctx, - newArgDict, - newGradDict, - this._gradsReq.asInstanceOf[Map[String, String]], - newAuxDict, - this._group2ctx, - this) + Array.empty[Int] } - // This method has created new NDArrays that will need to be managed by the new Executor - if (setArgOwner) reshapedExecutor.ownsArgArrays = true - if (setGradOwner) reshapedExecutor.ownsGradArrays = true - if (setAuxOwner) reshapedExecutor.ownsAuxArrays = true + val inArgs = ArrayBuffer.empty[NDArrayHandle] + val argGrads = ArrayBuffer.empty[NDArrayHandle] + val auxStates = ArrayBuffer.empty[NDArrayHandle] + val outHandle = new ExecutorHandleRef() + + checkCall(_LIB.mxExecutorReshape( + if (partialShaping) 1 else 0, + if (allowUpSizing) 1 else 0, + _ctx.deviceTypeid, + _ctx.deviceId, + ctxMapKeys.toArray, + ctxMapDevTypes.toArray, + ctxMapDevIds.toArray, + providedArgShapeNames.toArray, + providedArgShapeData.toArray, + providedArgShapeIdx.toArray, + inArgs, + argGrads, + auxStates, + this.handle, + outHandle)) + + val argArrays = inArgs.map(new NDArray(_)).toArray + val gradArrays = argGrads.map(handle => + if (handle == 0) null else new NDArray(handle)).toArray + val auxArrays = auxStates.map(new NDArray(_)).toArray - reshapedExecutor + val executor = new Executor(outHandle.value, this.symbol) + executor._ctx = this._ctx + executor._gradsReq = this._gradsReq + executor._group2ctx = this._group2ctx + executor.argArrays = argArrays + executor.gradArrays = gradArrays + executor.auxArrays = auxArrays + executor } /** diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/LibInfo.scala b/scala-package/core/src/main/scala/org/apache/mxnet/LibInfo.scala index 40fc0951e885..aba618540141 100644 --- a/scala-package/core/src/main/scala/org/apache/mxnet/LibInfo.scala +++ b/scala-package/core/src/main/scala/org/apache/mxnet/LibInfo.scala @@ -188,6 +188,23 @@ private[mxnet] class LibInfo { grads: Array[NDArrayHandle]): Int @native def mxExecutorPrint(handle: ExecutorHandle, debugStr: RefString): Int @native def mxExecutorSetMonitorCallback(handle: ExecutorHandle, callback: MXMonitorCallback): Int + // scalastyle:off parameterNum + @native def mxExecutorReshape(partialShaping: Int, + allowUpSizing: Int, + devType: Int, + devId: Int, + mapKeys: Array[String], + mapDevTypes: Array[Int], + mapDevIds: Array[Int], + providedArgShapeNames: Array[String], + providedArgShapeData: Array[Int], + providedArgShapeIdx: Array[Int], + inArgs: ArrayBuffer[NDArrayHandle], + argGrads: ArrayBuffer[NDArrayHandle], + auxStates: ArrayBuffer[NDArrayHandle], + sharedExec: ExecutorHandle, + out: ExecutorHandleRef): Int + // scalastyle:on parameterNum // Symbols @native def mxSymbolListAtomicSymbolCreators(symbolList: ListBuffer[SymbolHandle]): Int @@ -240,11 +257,20 @@ private[mxnet] class LibInfo { numArgs: MXUint, keys: Array[String], argIndPtr: Array[MXUint], - argShapeData: Array[MXUint], + argShapeData: Array[Int], inShapeData: ListBuffer[Array[Int]], outShapeData: ListBuffer[Array[Int]], auxShapeData: ListBuffer[Array[Int]], complete: RefInt): Int + @native def mxSymbolInferShapePartial(handle: SymbolHandle, + numArgs: MXUint, + keys: Array[String], + argIndPtr: Array[MXUint], + argShapeData: Array[Int], + inShapeData: ListBuffer[Array[Int]], + outShapeData: ListBuffer[Array[Int]], + auxShapeData: ListBuffer[Array[Int]], + complete: RefInt): Int @native def mxSymbolGetOutput(handle: SymbolHandle, index: Int, out: SymbolHandleRef): Int @native def mxSymbolSaveToJSON(handle: SymbolHandle, out: RefString): Int @native def mxSymbolCreateFromJSON(json: String, handle: SymbolHandleRef): Int @@ -322,4 +348,8 @@ private[mxnet] class LibInfo { @native def mxSetProfilerConfig(keys: Array[String], vals: Array[String]): Int @native def mxSetProfilerState(state: Int): Int @native def mxDumpProfile(finished: Int): Int + + // Numpy + @native def mxIsNumpyCompatible(compatible: RefInt): Int + @native def mxSetIsNumpyCompatible(isNpComp: Int, prev: RefInt): Int } diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/NDArray.scala b/scala-package/core/src/main/scala/org/apache/mxnet/NDArray.scala index ab42265ae102..849f4566f528 100644 --- a/scala-package/core/src/main/scala/org/apache/mxnet/NDArray.scala +++ b/scala-package/core/src/main/scala/org/apache/mxnet/NDArray.scala @@ -1274,11 +1274,15 @@ class NDArray private[mxnet](private[mxnet] val handle: NDArrayHandle, * @return an array representing shape of current ndarray */ def shape: Shape = { - val ndim = new MXUintRef + val ndim = new RefInt val data = ArrayBuffer[Int]() checkCall(_LIB.mxNDArrayGetShape(handle, ndim, data)) - require(ndim.value == data.length, s"ndim=$ndim, while len(data)=${data.length}") - Shape(data) + if (ndim.value == -1) { + null + } else { + require(ndim.value == data.length, s"ndim=$ndim, while len(data)=${data.length}") + Shape(data) + } } // Get size of current NDArray. diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala b/scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala new file mode 100644 index 000000000000..ec366ea4029d --- /dev/null +++ b/scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mxnet + +import org.apache.mxnet.Base._ + +object NumpyScope { + def setNumpyCompatible(isNpComp: Boolean): Boolean = { + val prev = new RefInt() + checkCall(_LIB.mxSetIsNumpyCompatible(if (isNpComp) 1 else 0, prev)) + if (prev.value != 0) true else false + } + + def isNumpyCompatible: Boolean = { + val curr = new RefInt + checkCall(_LIB.mxIsNumpyCompatible(curr)) + if (curr.value != 0) true else false + } + + def enableNumpyCompatible: NumpyScope = { + new NumpyScope(true) + } + + + def disableNumpyCompatible: NumpyScope = { + new NumpyScope(false) + } +} + +class NumpyScope(var isCompatible: Boolean) { + private var prev: Boolean = false + + def withScope[T](body: => T): T = { + prev = NumpyScope.setNumpyCompatible(isCompatible) + try { + body + } finally { + if (prev != isCompatible) { + NumpyScope.setNumpyCompatible(prev) + } + } + } +} diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/Symbol.scala b/scala-package/core/src/main/scala/org/apache/mxnet/Symbol.scala index 821e04f08df2..808a23a8c945 100644 --- a/scala-package/core/src/main/scala/org/apache/mxnet/Symbol.scala +++ b/scala-package/core/src/main/scala/org/apache/mxnet/Symbol.scala @@ -260,17 +260,45 @@ class Symbol private(private[mxnet] val handle: SymbolHandle) extends NativeReso def inferShape(keys: Array[String], indPtr: Array[Int], values: Array[Int]) : (IndexedSeq[Shape], IndexedSeq[Shape], IndexedSeq[Shape]) = { + val res = inferShapeImpl(partial = false, keys, indPtr, values) + if (res._2 == null) { + val (argShapes, _, _) = inferShapeImpl(partial = true, keys, indPtr, values) + val argNames = listArguments() + val unknown = (argNames zip argShapes).map { case (name, shape) => + val shapeIsNone = if (NumpyScope.isNumpyCompatible) { + shape == null || shape.toVector.contains(-1) + } else { + shape == null || shape.toVector.contains(0) + } + if (shapeIsNone) s"$name: $shape" else "" + } + logger.warn("Cannot decide shape for the following arguments. " + + "Consider providing them as input: \n\t{}", + unknown.filter(_ != "").mkString("\n\t")) + } + res + } + + private def inferShapeImpl(partial: Boolean, + keys: Array[String], + indPtr: Array[Int], + values: Array[Int]) + : (IndexedSeq[Shape], IndexedSeq[Shape], IndexedSeq[Shape]) = { val argShapeData = ListBuffer.empty[Array[Int]] val outShapeData = ListBuffer.empty[Array[Int]] val auxShapeData = ListBuffer.empty[Array[Int]] val complete = new RefInt - - checkCall(_LIB.mxSymbolInferShape(handle, indPtr.length - 1, keys, indPtr, values, - argShapeData, outShapeData, auxShapeData, complete)) + if (partial) { + checkCall(_LIB.mxSymbolInferShapePartial(handle, indPtr.length - 1, keys, indPtr, values, + argShapeData, outShapeData, auxShapeData, complete)) + } else { + checkCall(_LIB.mxSymbolInferShape(handle, indPtr.length - 1, keys, indPtr, values, + argShapeData, outShapeData, auxShapeData, complete)) + } if (complete.value != 0) { (argShapeData.map(s => Shape(s)).toIndexedSeq, - outShapeData.map(s => Shape(s)).toIndexedSeq, - auxShapeData.map(s => Shape(s)).toIndexedSeq) + outShapeData.map(s => Shape(s)).toIndexedSeq, + auxShapeData.map(s => Shape(s)).toIndexedSeq) } else { (null, null, null) } diff --git a/scala-package/core/src/test/scala/org/apache/mxnet/NumpyScopeSuite.scala b/scala-package/core/src/test/scala/org/apache/mxnet/NumpyScopeSuite.scala new file mode 100644 index 000000000000..bf6627ac7e91 --- /dev/null +++ b/scala-package/core/src/test/scala/org/apache/mxnet/NumpyScopeSuite.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mxnet + +import org.scalatest.{BeforeAndAfterAll, FunSuite} + +class NumpyScopeSuite extends FunSuite with BeforeAndAfterAll { + test("compatible") { + NumpyScope.enableNumpyCompatible.withScope { + assert(NumpyScope.isNumpyCompatible === true) + } + } + + test("incompatible") { + NumpyScope.disableNumpyCompatible.withScope { + assert(NumpyScope.isNumpyCompatible === false) + } + } +} diff --git a/scala-package/macros/src/main/scala/org/apache/mxnet/utils/CToScalaUtils.scala b/scala-package/macros/src/main/scala/org/apache/mxnet/utils/CToScalaUtils.scala index 57c4cfba10b7..12d797f9b100 100644 --- a/scala-package/macros/src/main/scala/org/apache/mxnet/utils/CToScalaUtils.scala +++ b/scala-package/macros/src/main/scala/org/apache/mxnet/utils/CToScalaUtils.scala @@ -47,7 +47,8 @@ private[mxnet] object CToScalaUtils { case "double" | "doubleorNone" => types("double") case "string" => "String" case "boolean" | "booleanorNone" => types("bool") - case "tupleof" | "tupleof" | "tupleof<>" | "ptr" | "" => "Any" + case "tupleof" | "tupleof" | "tupleof" | "tupleof" | + "tupleof<>" | "ptr" | "" => "Any" case default => throw new IllegalArgumentException( s"Invalid type for args: $default\nString argType: $argType\nargName: $argName") } diff --git a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc index 33e4cca99b3a..678dfc12afac 100644 --- a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc +++ b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc @@ -354,8 +354,8 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayLoadFromRawBytes JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayGetShape (JNIEnv *env, jobject obj, jlong ndArrayPtr, jobject ndimRef, jobject dataBuf) { - mx_uint ndim; - const mx_uint *pdata; + int ndim; + const int *pdata; int ret = MXNDArrayGetShape(reinterpret_cast(ndArrayPtr), &ndim, &pdata); // fill dataBuf @@ -365,7 +365,7 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayGetShape jclass arrayClass = env->FindClass("scala/collection/mutable/ArrayBuffer"); jmethodID arrayAppend = env->GetMethodID(arrayClass, "$plus$eq", "(Ljava/lang/Object;)Lscala/collection/mutable/ArrayBuffer;"); - for (size_t i = 0; i < ndim; ++i) { + for (int i = 0; i < ndim; ++i) { jobject data = env->NewObject(integerClass, newInteger, pdata[i]); env->CallObjectMethod(dataBuf, arrayAppend, data); env->DeleteLocalRef(data); @@ -892,6 +892,118 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorBackward return ret; } +JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorReshape + (JNIEnv * env, jobject obj, + jint partialReshaping, jint allowUpSizing, jint devType, jint devId, + jobjectArray jmapKeys, jintArray jmapDevTypes, jintArray jmapDevIds, + jobjectArray jprovidedArgShapeNames, jintArray jprovidedArgShapeData, jintArray jprovidedArgShapeIdx, + jobject jrefInArgs, jobject jrefArgGrads, jobject jrefAuxStates, + jlong jsharedExec, jobject jrefOut) { + CHECK(jmapKeys != NULL); + CHECK(jprovidedArgShapeNames != NULL); + + int numMapKeys = env->GetArrayLength(jmapKeys); + jint *mapDevTypes = env->GetIntArrayElements(jmapDevTypes, NULL); + jint *mapDevIds = env->GetIntArrayElements(jmapDevIds, NULL); + const char **mapKeys = NULL; + if (numMapKeys > 0) { + mapKeys = new const char*[numMapKeys]; + for (int i = 0; i < numMapKeys; ++i) { + jstring jkey = reinterpret_cast(env->GetObjectArrayElement(jmapKeys, i)); + mapKeys[i] = env->GetStringUTFChars(jkey, 0); + env->DeleteLocalRef(jkey); + } + } + + int numProvidedArgShapes = env->GetArrayLength(jprovidedArgShapeNames); + jint *providedArgShapeData = env->GetIntArrayElements(jprovidedArgShapeData, NULL); + jint *providedArgShapeIdx = env->GetIntArrayElements(jprovidedArgShapeIdx, NULL); + const char **providedArgShapeNames = NULL; + if (numProvidedArgShapes > 0) { + providedArgShapeNames = new const char*[numProvidedArgShapes]; + for (int i = 0; i < numProvidedArgShapes; ++i) { + jstring jkey = reinterpret_cast(env->GetObjectArrayElement(jprovidedArgShapeNames, i)); + providedArgShapeNames[i] = env->GetStringUTFChars(jkey, 0); + env->DeleteLocalRef(jkey); + } + } + + mx_uint numInArgs = 0; + NDArrayHandle *inArgs; + NDArrayHandle *argGrads; + + mx_uint numAuxStates = 0; + NDArrayHandle *auxStates; + + ExecutorHandle out; + + int ret = MXExecutorReshape(partialReshaping, + allowUpSizing, + devType, + devId, + static_cast(numMapKeys), + mapKeys, + static_cast(mapDevTypes), + static_cast(mapDevIds), + static_cast(numProvidedArgShapes), + providedArgShapeNames, + static_cast(providedArgShapeData), + reinterpret_cast(providedArgShapeIdx), + &numInArgs, + &inArgs, + &argGrads, + &numAuxStates, + &auxStates, + reinterpret_cast(jsharedExec), + &out); + + jclass longCls = env->FindClass("java/lang/Long"); + jmethodID newLong = env->GetMethodID(longCls, "", "(J)V"); + + jclass arrayClass = env->FindClass("scala/collection/mutable/ArrayBuffer"); + jmethodID arrayAppend = env->GetMethodID(arrayClass, + "$plus$eq", "(Ljava/lang/Object;)Lscala/collection/mutable/ArrayBuffer;"); + + for (size_t i = 0; i < numInArgs; ++i) { + jobject inArg = env->NewObject(longCls, newLong, inArgs[i]); + env->CallObjectMethod(jrefInArgs, arrayAppend, inArg); + env->DeleteLocalRef(inArg); + + jobject argGrad = env->NewObject(longCls, newLong, argGrads[i]); + env->CallObjectMethod(jrefArgGrads, arrayAppend, argGrad); + env->DeleteLocalRef(argGrad); + } + + for (size_t i = 0; i < numAuxStates; ++i) { + jobject auxState = env->NewObject(longCls, newLong, auxStates[i]); + env->CallObjectMethod(jrefAuxStates, arrayAppend, auxState); + env->DeleteLocalRef(auxState); + } + + SetLongField(env, jrefOut, reinterpret_cast(out)); + + // release allocated memory + for (int i = 0; i < numMapKeys; i++) { + jstring jkey = reinterpret_cast(env->GetObjectArrayElement(jmapKeys, i)); + env->ReleaseStringUTFChars(jkey, mapKeys[i]); + env->DeleteLocalRef(jkey); + } + if (mapKeys != NULL) { + delete[] mapKeys; + } + + for (int i = 0; i < numProvidedArgShapes; i++) { + jstring jkey = reinterpret_cast(env->GetObjectArrayElement(jprovidedArgShapeNames, i)); + env->ReleaseStringUTFChars(jkey, providedArgShapeNames[i]); + env->DeleteLocalRef(jkey); + } + if (providedArgShapeNames != NULL) { + delete[] providedArgShapeNames; + } + + return ret; +} + JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorPrint (JNIEnv * env, jobject obj, jlong ptr, jobject debugStr) { const char *retDebugStr; @@ -1530,23 +1642,26 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolCreateFromFile int FillSymbolInferShape (JNIEnv *env, jmethodID listAppend, jobject joutData, - mx_uint shapeSize, const mx_uint *shapeNdim, const mx_uint **shapeData) { - for (size_t i = 0; i < shapeSize; ++i) { - jintArray jshape = env->NewIntArray(shapeNdim[i]); - if (jshape == NULL) { - // TODO(Yizhi): out of memory error thrown, return a specific error code ? - return -1; + int shapeSize, const int *shapeNdim, const int **shapeData) { + for (int i = 0; i < shapeSize; ++i) { + jintArray jshape = NULL; + if (shapeNdim[i] >= 0) { + jshape = env->NewIntArray(shapeNdim[i]); + if (jshape == NULL) { + // TODO(Yizhi): out of memory error thrown, return a specific error code ? + return -1; + } + env->SetIntArrayRegion(jshape, 0, shapeNdim[i], reinterpret_cast(shapeData[i])); } - env->SetIntArrayRegion(jshape, 0, shapeNdim[i], reinterpret_cast(shapeData[i])); env->CallObjectMethod(joutData, listAppend, jshape); env->DeleteLocalRef(jshape); } return 0; } -JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShape - (JNIEnv *env, jobject obj, jlong symbolPtr, jint jnumArgs, jobjectArray jkeys, - jintArray jargIndPtr, jintArray jargShapeData, - jobject jinShapeData, jobject joutShapeData, jobject jauxShapeData, jobject jcomplete) { + +int SymbolInferShapeHelper(JNIEnv *env, jobject obj, jlong symbolPtr, jint jnumArgs, jobjectArray jkeys, + jintArray jargIndPtr, jintArray jargShapeData, jobject jinShapeData, + jobject joutShapeData, jobject jauxShapeData, jobject jcomplete, bool partial) { const char **keys = NULL; if (jkeys != NULL) { keys = new const char *[jnumArgs]; @@ -1559,36 +1674,55 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShape } mx_uint inShapeSize; - const mx_uint *inShapeNdim; - const mx_uint **inShapeData; + const int *inShapeNdim; + const int **inShapeData; mx_uint outShapeSize; - const mx_uint *outShapeNdim; - const mx_uint **outShapeData; + const int *outShapeNdim; + const int **outShapeData; mx_uint auxShapeSize; - const mx_uint *auxShapeNdim; - const mx_uint **auxShapeData; + const int *auxShapeNdim; + const int **auxShapeData; int complete; jint *argIndPtr = env->GetIntArrayElements(jargIndPtr, NULL); jint *argShapeData = env->GetIntArrayElements(jargShapeData, NULL); - int ret = MXSymbolInferShape(reinterpret_cast(symbolPtr), - static_cast(jnumArgs), - keys, - reinterpret_cast(argIndPtr), - reinterpret_cast(argShapeData), - &inShapeSize, - &inShapeNdim, - &inShapeData, - &outShapeSize, - &outShapeNdim, - &outShapeData, - &auxShapeSize, - &auxShapeNdim, - &auxShapeData, - &complete); + int ret; + if (!partial) { + ret = MXSymbolInferShape(reinterpret_cast(symbolPtr), + static_cast(jnumArgs), + keys, + reinterpret_cast(argIndPtr), + reinterpret_cast(argShapeData), + &inShapeSize, + &inShapeNdim, + &inShapeData, + &outShapeSize, + &outShapeNdim, + &outShapeData, + &auxShapeSize, + &auxShapeNdim, + &auxShapeData, + &complete); + } else { + ret = MXSymbolInferShapePartial(reinterpret_cast(symbolPtr), + static_cast(jnumArgs), + keys, + reinterpret_cast(argIndPtr), + reinterpret_cast(argShapeData), + &inShapeSize, + &inShapeNdim, + &inShapeData, + &outShapeSize, + &outShapeNdim, + &outShapeData, + &auxShapeSize, + &auxShapeNdim, + &auxShapeData, + &complete); + } env->ReleaseIntArrayElements(jargShapeData, argShapeData, 0); env->ReleaseIntArrayElements(jargIndPtr, argIndPtr, 0); @@ -1629,6 +1763,24 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShape return ret; } +JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShape + (JNIEnv *env, jobject obj, jlong symbolPtr, jint jnumArgs, jobjectArray jkeys, + jintArray jargIndPtr, jintArray jargShapeData, + jobject jinShapeData, jobject joutShapeData, jobject jauxShapeData, jobject jcomplete) { + + return SymbolInferShapeHelper(env, obj, symbolPtr, jnumArgs, jkeys, jargIndPtr, jargShapeData, + jinShapeData, joutShapeData, jauxShapeData, jcomplete, false); +} + +JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShapePartial + (JNIEnv *env, jobject obj, jlong symbolPtr, jint jnumArgs, jobjectArray jkeys, + jintArray jargIndPtr, jintArray jargShapeData, + jobject jinShapeData, jobject joutShapeData, jobject jauxShapeData, jobject jcomplete) { + + return SymbolInferShapeHelper(env, obj, symbolPtr, jnumArgs, jkeys, jargIndPtr, jargShapeData, + jinShapeData, joutShapeData, jauxShapeData, jcomplete, true); +} + JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorBindX (JNIEnv *env, jobject obj, jlong symbolPtr, jint deviceTypeId, jint deviceID, jint numCtx, jobjectArray jctxMapKeys, jintArray jctxMapDevTypes, jintArray jctxMapDevIDs, jint numArgs, @@ -2551,3 +2703,20 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxDumpProfile (JNIEnv *env, jobject obj, jint finished) { return MXDumpProfile(finished); } + +// Numpy +JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxIsNumpyCompatible + (JNIEnv *env, jobject obj, jobject compatibleRef) { + bool isCompatible; + int ret = MXIsNumpyCompatible(&isCompatible); + SetIntField(env, compatibleRef, static_cast(isCompatible)); + return ret; +} + +JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSetIsNumpyCompatible + (JNIEnv *env, jobject obj, jint isNpComp, jobject prevRef) { + int prev; + int ret = MXSetIsNumpyCompatible(isNpComp, &prev); + SetIntField(env, prevRef, prev); + return ret; +} \ No newline at end of file diff --git a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.h b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.h index b8a9b3b9e64f..467272cea9cf 100644 --- a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.h +++ b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.h @@ -511,6 +511,14 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorPrint JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorSetMonitorCallback (JNIEnv *, jobject, jlong, jobject); +/* + * Class: org_apache_mxnet_LibInfo + * Method: mxExecutorReshape + * Signature: (IIII[Ljava/lang/String;[I[I[Ljava/lang/String;[I[ILscala/collection/mutable/ArrayBuffer;Lscala/collection/mutable/ArrayBuffer;Lscala/collection/mutable/ArrayBuffer;JLorg/apache/mxnet/Base/RefLong;)I + */ +JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorReshape + (JNIEnv *, jobject, jint, jint, jint, jint, jobjectArray, jintArray, jintArray, jobjectArray, jintArray, jintArray, jobject, jobject, jobject, jlong, jobject); + /* * Class: org_apache_mxnet_LibInfo * Method: mxSymbolListAtomicSymbolCreators @@ -655,6 +663,14 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferType JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShape (JNIEnv *, jobject, jlong, jint, jobjectArray, jintArray, jintArray, jobject, jobject, jobject, jobject); +/* + * Class: org_apache_mxnet_LibInfo + * Method: mxSymbolInferShapePartial + * Signature: (JI[Ljava/lang/String;[I[ILscala/collection/mutable/ListBuffer;Lscala/collection/mutable/ListBuffer;Lscala/collection/mutable/ListBuffer;Lorg/apache/mxnet/Base/RefInt;)I + */ +JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShapePartial + (JNIEnv *, jobject, jlong, jint, jobjectArray, jintArray, jintArray, jobject, jobject, jobject, jobject); + /* * Class: org_apache_mxnet_LibInfo * Method: mxSymbolGetOutput @@ -855,6 +871,22 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSetProfilerState JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxDumpProfile (JNIEnv *, jobject, jint); +/* + * Class: org_apache_mxnet_LibInfo + * Method: mxIsNumpyCompatible + * Signature: (Lorg/apache/mxnet/Base/RefInt;)I + */ +JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxIsNumpyCompatible + (JNIEnv *, jobject, jobject); + +/* + * Class: org_apache_mxnet_LibInfo + * Method: mxSetIsNumpyCompatible + * Signature: (ILorg/apache/mxnet/Base/RefInt;)I + */ +JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSetIsNumpyCompatible + (JNIEnv *, jobject, jint, jobject); + #ifdef __cplusplus } #endif diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h index e91f904debb9..87e911a56800 100644 --- a/src/operator/tensor/matrix_op-inl.h +++ b/src/operator/tensor/matrix_op-inl.h @@ -240,7 +240,7 @@ inline bool FlattenShape(const nnvm::NodeAttrs& attrs, struct TransposeParam : public dmlc::Parameter { mxnet::TShape axes; DMLC_DECLARE_PARAMETER(TransposeParam) { - DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape()) + DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape(0)) .describe("Target axis order. By default the axes will be inverted."); } @@ -318,7 +318,7 @@ void Transpose(const nnvm::NodeAttrs& attrs, const std::vector& outputs) { const TransposeParam& param = nnvm::get(attrs.parsed); CHECK_EQ(req[0], kWriteTo) << "Transpose does not support inplace"; - if (!mxnet::ndim_is_known(param.axes)) { + if (param.axes.ndim() == 0) { mxnet::TShape axes(inputs[0].ndim(), -1); for (int i = 0; i < axes.ndim(); ++i) { axes[i] = axes.ndim() - 1 - i; @@ -338,7 +338,7 @@ inline bool TransposeShape(const nnvm::NodeAttrs& attrs, mxnet::TShape& shp = (*in_attrs)[0]; CHECK_LE(shp.ndim(), 6U) << "Transpose support at most 6 dimensions"; mxnet::TShape ret(shp.ndim(), -1); - if (!mxnet::ndim_is_known(param.axes)) { + if (param.axes.ndim() == 0) { for (int i = 0; i < shp.ndim(); ++i) { ret[i] = shp[shp.ndim()-1-i]; } From c7f4ebd91b9662038281cd9977548c07a0f0af6f Mon Sep 17 00:00:00 2001 From: Haibin Lin Date: Fri, 5 Apr 2019 10:30:20 -0700 Subject: [PATCH 15/32] fix shape index bug (#14630) --- src/c_api/c_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index f1571312dceb..533f18fa9e0b 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -802,7 +802,7 @@ int MXDataIterGetLabel(DataIterHandle handle, NDArrayHandle *out) { // temp hack to make label 1D // TODO(tianjun) make label 1D when label_width=0 mxnet::TShape shape = db.data[1].shape(); - if (shape[1] == 1) { + if (shape.ndim() > 1 && shape[1] == 1) { *pndarray = db.data[1].Reshape(mshadow::Shape1(shape[0])); } else { *pndarray = db.data[1]; From c526ac4718cbd1478390f2f31604226f0837cbee Mon Sep 17 00:00:00 2001 From: Yizhi Liu Date: Fri, 5 Apr 2019 13:12:11 -0700 Subject: [PATCH 16/32] fix jni lint (#14634) --- .../native/org_apache_mxnet_native_c_api.cc | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc index 678dfc12afac..ae01c8a7c05e 100644 --- a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc +++ b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc @@ -896,9 +896,9 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorReshape (JNIEnv * env, jobject obj, jint partialReshaping, jint allowUpSizing, jint devType, jint devId, jobjectArray jmapKeys, jintArray jmapDevTypes, jintArray jmapDevIds, - jobjectArray jprovidedArgShapeNames, jintArray jprovidedArgShapeData, jintArray jprovidedArgShapeIdx, - jobject jrefInArgs, jobject jrefArgGrads, jobject jrefAuxStates, - jlong jsharedExec, jobject jrefOut) { + jobjectArray jprovidedArgShapeNames, jintArray jprovidedArgShapeData, + jintArray jprovidedArgShapeIdx, jobject jrefInArgs, jobject jrefArgGrads, + jobject jrefAuxStates, jlong jsharedExec, jobject jrefOut) { CHECK(jmapKeys != NULL); CHECK(jprovidedArgShapeNames != NULL); @@ -922,7 +922,8 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorReshape if (numProvidedArgShapes > 0) { providedArgShapeNames = new const char*[numProvidedArgShapes]; for (int i = 0; i < numProvidedArgShapes; ++i) { - jstring jkey = reinterpret_cast(env->GetObjectArrayElement(jprovidedArgShapeNames, i)); + jstring jkey = reinterpret_cast( + env->GetObjectArrayElement(jprovidedArgShapeNames, i)); providedArgShapeNames[i] = env->GetStringUTFChars(jkey, 0); env->DeleteLocalRef(jkey); } @@ -1659,9 +1660,10 @@ int FillSymbolInferShape return 0; } -int SymbolInferShapeHelper(JNIEnv *env, jobject obj, jlong symbolPtr, jint jnumArgs, jobjectArray jkeys, - jintArray jargIndPtr, jintArray jargShapeData, jobject jinShapeData, - jobject joutShapeData, jobject jauxShapeData, jobject jcomplete, bool partial) { +int SymbolInferShapeHelper(JNIEnv *env, jobject obj, jlong symbolPtr, jint jnumArgs, + jobjectArray jkeys, jintArray jargIndPtr, jintArray jargShapeData, + jobject jinShapeData, jobject joutShapeData, jobject jauxShapeData, + jobject jcomplete, bool partial) { const char **keys = NULL; if (jkeys != NULL) { keys = new const char *[jnumArgs]; @@ -2719,4 +2721,4 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSetIsNumpyCompatible int ret = MXSetIsNumpyCompatible(isNpComp, &prev); SetIntField(env, prevRef, prev); return ret; -} \ No newline at end of file +} From d12a1fab4bbdf58e6627e41debee8e7ba7f62d3c Mon Sep 17 00:00:00 2001 From: reminisce Date: Mon, 8 Apr 2019 11:29:00 -0700 Subject: [PATCH 17/32] [numpy] Fix numpy branch failing tests in CI (#14639) * Remove numpy namespaces for operator registration * Fix bug when shape is compeltely unknown * Fix singed/unsigned compare warning * Fix CI * Fix pylint * Avoid launching gpu kernels for zero-size output tensors * Fix test_ndarray * Fix binary broadcast with zero-size tensors * Better error message for infer shape failure in imperative * Fix TShape constructor ambiguity on certain platforms * Fix mkldnn build failure * Fix build failure in gpu and cpp test * Fix gpu cpp test build with mkldnn * Fix mkldnn cpp test * Fix concatenating zero-size tensors * Avoid letting mkldnn handle zero-size tensors in concat * Fix quantized_concat infer shape * Try to fix perl c api --- include/mxnet/tuple.h | 13 +- perl-package/AI-MXNetCAPI/mxnet.i | 84 ++++---- python/mxnet/__init__.py | 3 +- python/mxnet/base.py | 135 +++++++++++-- python/mxnet/ndarray/__init__.py | 2 +- python/mxnet/ndarray/numpy.py | 18 -- python/mxnet/numpy/__init__.py | 66 ------- python/mxnet/symbol/__init__.py | 2 +- python/mxnet/symbol/numpy.py | 18 -- python/mxnet/symbol/symbol.py | 3 +- src/c_api/c_api_common.h | 4 +- src/common/utils.h | 2 +- src/imperative/imperative_utils.h | 12 +- src/ndarray/ndarray.cc | 4 +- src/operator/channel_op_common.h | 4 + src/operator/contrib/bounding_box-inl.h | 3 +- .../contrib/deformable_convolution-inl.h | 6 +- src/operator/contrib/index_copy-inl.h | 2 +- src/operator/mxnet_op.h | 2 + src/operator/nn/concat.cc | 5 +- src/operator/nn/cudnn/cudnn_convolution-inl.h | 2 +- .../nn/cudnn/cudnn_deconvolution-inl.h | 2 +- src/operator/nn/deconvolution-inl.h | 2 +- src/operator/nn/mkldnn/mkldnn_slice.cc | 4 +- src/operator/numpy/np_broadcast_reduce_op.h | 186 ------------------ .../numpy/np_broadcast_reduce_op_value.cc | 61 ------ .../numpy/np_broadcast_reduce_op_value.cu | 36 ---- src/operator/pad-inl.h | 2 +- src/operator/pooling_v1-inl.h | 6 +- src/operator/quantization/dequantize-inl.h | 2 +- .../mkldnn/mkldnn_requantize-inl.h | 2 +- src/operator/quantization/quantize-inl.h | 2 +- src/operator/quantization/quantize_v2-inl.h | 2 +- src/operator/quantization/quantized_concat.cc | 16 +- src/operator/quantization/quantized_conv.cc | 4 +- .../quantization/quantized_fully_connected.cc | 4 +- src/operator/quantization/requantize-inl.h | 2 +- src/operator/swapaxis-inl.h | 2 +- src/operator/tensor/broadcast_reduce_op.h | 6 +- .../tensor/elemwise_binary_broadcast_op.h | 15 +- src/operator/tensor/histogram-inl.h | 8 +- src/operator/tensor/init_op.h | 2 + src/operator/tensor/matrix_op-inl.h | 12 +- tests/cpp/include/test_mkldnn.h | 18 +- tests/cpp/include/test_util.h | 4 +- tests/cpp/misc/serialization.cc | 2 +- tests/cpp/operator/batchnorm_test.cc | 4 +- tests/cpp/operator/mkldnn_operator_test.cc | 6 +- tests/python/gpu/test_operator_gpu.py | 22 ++- tests/python/unittest/test_infer_shape.py | 16 ++ tests/python/unittest/test_ndarray.py | 2 +- tests/python/unittest/test_operator.py | 54 ++++- 52 files changed, 349 insertions(+), 547 deletions(-) delete mode 100644 python/mxnet/ndarray/numpy.py delete mode 100644 python/mxnet/numpy/__init__.py delete mode 100644 python/mxnet/symbol/numpy.py delete mode 100644 src/operator/numpy/np_broadcast_reduce_op.h delete mode 100644 src/operator/numpy/np_broadcast_reduce_op_value.cc delete mode 100644 src/operator/numpy/np_broadcast_reduce_op_value.cu diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h index c5a358628ccd..c5c0ccd548df 100644 --- a/include/mxnet/tuple.h +++ b/include/mxnet/tuple.h @@ -390,7 +390,7 @@ class TShape : public Tuple { * \param ndim the number of dimension * \param value the dimension size for all dims */ - inline TShape(int ndim, int value = -1) { // NOLINT(*) + inline TShape(const int ndim, const dim_t value) { // NOLINT(*) this->SetDim(ndim); if (ndim > 0) { std::fill_n(begin(), ndim, value); @@ -422,12 +422,17 @@ class TShape : public Tuple { this->swap(s); } /*! - * \brief construct the Tuple from content of iterator + * \brief construct the Tuple from content of iterator. + * This function is enforced with template arguments of random access iterator types. + * This is necessary to distinguish from another constructor: TShape(const int, const dim_t). * \param begin the beginning of iterator * \param end end the end of the iterator * \tparam RandomAccessIterator iterator type */ - template + template::iterator_category, + std::random_access_iterator_tag>::value, int>::type = 0> inline TShape(RandomAccessIterator begin, RandomAccessIterator end) { this->assign(begin, end); @@ -622,7 +627,7 @@ inline bool ndim_is_known(const TShape& x) { } /*! brief check if a shape's dim size is known. */ -inline bool dim_size_is_known(const int dim_size) { +inline bool dim_size_is_known(const dim_t dim_size) { CHECK_GE(dim_size, -1) << "shape dim size must be >= -1, while received " << dim_size; return dim_size != -1; } diff --git a/perl-package/AI-MXNetCAPI/mxnet.i b/perl-package/AI-MXNetCAPI/mxnet.i index 0e6a05ea9695..0ecf5b3a9cc3 100644 --- a/perl-package/AI-MXNetCAPI/mxnet.i +++ b/perl-package/AI-MXNetCAPI/mxnet.i @@ -641,8 +641,8 @@ int MXNDArrayReshape64(NDArrayHandle handle, * \return 0 when success, -1 when failure happens */ int MXNDArrayGetShape(NDArrayHandle handle, - mx_uint *out_dim, - const mx_uint **out_pdata); + int *out_dim, + const int **out_pdata); /*! * \brief get the content of the data in NDArray * \param handle the handle to the ndarray @@ -1290,20 +1290,20 @@ int MXSymbolGrad(SymbolHandle sym, * \return 0 when success, -1 when failure happens */ int MXSymbolInferShape(SymbolHandle sym, - mx_uint num_args, - const char** in, - const mx_uint *in, - const mx_uint *in, - mx_uint *in_shape_size, - const mx_uint **in_shape_ndim, - const mx_uint ***in_shape_data, - mx_uint *out_shape_size, - const mx_uint **out_shape_ndim, - const mx_uint ***out_shape_data, - mx_uint *aux_shape_size, - const mx_uint **aux_shape_ndim, - const mx_uint ***aux_shape_data, - int *out); + mx_uint num_args, + const char** in, + const mx_uint *in, + const int *in, + mx_uint *in_shape_size, + const int **in_shape_ndim, + const int ***in_shape_data, + mx_uint *out_shape_size, + const int **out_shape_ndim, + const int ***out_shape_data, + mx_uint *aux_shape_size, + const int **aux_shape_ndim, + const int ***aux_shape_data, + int *out); /*! * \brief partially infer shape of unknown input shapes given the known one. * @@ -1332,16 +1332,16 @@ int MXSymbolInferShapePartial(SymbolHandle sym, mx_uint num_args, const char** in, const mx_uint *in, - const mx_uint *in, + const int *in, mx_uint *in_shape_size, - const mx_uint **in_shape_ndim, - const mx_uint ***in_shape_data, + const int **in_shape_ndim, + const int ***in_shape_data, mx_uint *out_shape_size, - const mx_uint **out_shape_ndim, - const mx_uint ***out_shape_data, + const int **out_shape_ndim, + const int ***out_shape_data, mx_uint *aux_shape_size, - const mx_uint **aux_shape_ndim, - const mx_uint ***aux_shape_data, + const int **aux_shape_ndim, + const int ***aux_shape_data, int *out); /*! @@ -1547,7 +1547,7 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle, const char** in, // provided_grad_req_types, const mx_uint num_provided_arg_shapes, const char** in, // provided_arg_shape_names, - const mx_uint* in, // provided_arg_shape_data, + const int* in, // provided_arg_shape_data, const mx_uint* in, // provided_arg_shape_idx, const mx_uint num_provided_arg_dtypes, const char** in, // provided_arg_dtype_names, @@ -1593,24 +1593,24 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle, * \return a new executor */ int MXExecutorReshape(int partial_shaping, - int allow_up_sizing, - int dev_type, - int dev_id, - mx_uint num_map_keys, - const char** in, - const int* in, - const int* in, - const mx_uint num_provided_arg_shapes, - const char** in, - const mx_uint* in, - const mx_uint* in, - mx_uint* couple_out_size, - NDArrayHandle** out_first_array, - NDArrayHandle** out_second_array, - mx_uint* out_size, - NDArrayHandle** out_array, - ExecutorHandle shared_exec, - ExecutorHandle *out); + int allow_up_sizing, + int dev_type, + int dev_id, + mx_uint num_map_keys, + const char** in, + const int* in, + const int* in, + const mx_uint num_provided_arg_shapes, + const char** in, + const int* in, + const mx_uint* in, + mx_uint* couple_out_size, + NDArrayHandle** out_first_array, + NDArrayHandle** out_second_array, + mx_uint* out_size, + NDArrayHandle** out_array, + ExecutorHandle shared_exec, + ExecutorHandle *out); /*! * \brief set a call back to notify the completion of operation diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index 8db83a286157..5f4f9b393e41 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -23,9 +23,8 @@ from .context import Context, current_context, cpu, gpu, cpu_pinned from . import engine -from .base import MXNetError +from .base import MXNetError, is_np_comp, set_np_comp, enable_np_comp, disable_np_comp from . import base -from . import numpy from . import contrib from . import ndarray from . import ndarray as nd diff --git a/python/mxnet/base.py b/python/mxnet/base.py index fe1dd00f9454..916e74182f94 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -561,7 +561,7 @@ def _as_list(obj): return [obj] -_OP_NAME_PREFIX_LIST = ['_contrib_', '_linalg_', '_sparse_', '_image_', '_random_', '_numpy_'] +_OP_NAME_PREFIX_LIST = ['_contrib_', '_linalg_', '_sparse_', '_image_', '_random_'] def _get_op_name_prefix(op_name): @@ -607,13 +607,6 @@ def _init_op_module(root_namespace, module_name, make_op_func): # use mx.nd.contrib or mx.sym.contrib from now on contrib_module_name_old = "%s.contrib.%s" % (root_namespace, module_name) contrib_module_old = sys.modules[contrib_module_name_old] - # special handling of registering numpy ops - if module_name == 'ndarray': - numpy_module_name = "%s.numpy" % root_namespace - numpy_module = sys.modules[numpy_module_name] - else: - numpy_module_name = None - numpy_module = None submodule_dict = {} for op_name_prefix in _OP_NAME_PREFIX_LIST: submodule_dict[op_name_prefix] =\ @@ -652,16 +645,6 @@ def _init_op_module(root_namespace, module_name, make_op_func): function.__module__ = contrib_module_name_old setattr(contrib_module_old, function.__name__, function) contrib_module_old.__all__.append(function.__name__) - elif op_name_prefix == '_numpy_' and numpy_module_name is not None: - # only register numpy ops under mxnet.numpy in imperative mode - hdl = OpHandle() - check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl))) - # TODO(reminisce): Didn't consider third level module here, e.g. mxnet.numpy.random. - func_name = name[len(op_name_prefix):] - function = make_op_func(hdl, name, func_name) - function.__module__ = numpy_module_name - setattr(numpy_module, function.__name__, function) - numpy_module.__all__.append(function.__name__) def _generate_op_module_signature(root_namespace, module_name, op_code_gen_func): @@ -751,3 +734,119 @@ def write_all_str(module_file, module_all_list): ctypes.pythonapi.PyCapsule_New.restype = ctypes.py_object ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.c_void_p + + +def set_np_comp(flag): + """ + Turns on/off NumPy compatibility. NumPy-compatibility is turned off by default in backend. + + Parameters + ---------- + flag : bool + Indicates whether to turn on/off NumPy compatibility. + + Returns + ------- + A bool value indicating the previous state of NumPy compatibility. + """ + prev = ctypes.c_int() + check_call(_LIB.MXSetIsNumpyCompatible(ctypes.c_int(flag), ctypes.byref(prev))) + return bool(prev.value) + + +def is_np_comp(): + """ + Checks whether the NumPy compatibility is currently turned on. + NumPy-compatibility is turned off by default in backend. + + Returns + ------- + A bool value indicating whether the NumPy compatibility is currently on. + """ + curr = ctypes.c_bool() + check_call(_LIB.MXIsNumpyCompatible(ctypes.byref(curr))) + return curr.value + + +class _NumpyCompatibilityStateScope(object): + """Scope for managing numpy compatibility state. + + Example:: + + with _NumpyCompatibilityStateScope(True): + y = model(x) + backward([y]) + + """ + def __init__(self, is_np_comp): #pylint: disable=redefined-outer-name + self._enter_is_np_comp = is_np_comp + self._prev_is_np_comp = None + + def __enter__(self): + if self._enter_is_np_comp is not None: + self._prev_is_np_comp = set_np_comp(self._enter_is_np_comp) + + def __exit__(self, ptype, value, trace): + if self._enter_is_np_comp is not None and self._prev_is_np_comp != self._enter_is_np_comp: + set_np_comp(self._prev_is_np_comp) + + +def enable_np_comp(): + """Returns a NumPy compatibility state scope to be used in 'with' statement + and captures code that needs the compatibility. + + Example:: + + with mx.enable_np_comp(): + # A scalar tensor's shape is `()`, whose `ndim` is `0`. + scalar = mx.nd.ones(shape=()) + assert scalar.shape == () + + # In NumPy compatible mode, 0 in a shape means that dimension contains zero elements. + data = mx.sym.var("data", shape=(0, 2, 3)) + ret = mx.sym.sin(data) + arg_shapes, out_shapes, _ = ret.infer_shape() + assert arg_shapes[0] == (0, 2, 3) + assert out_shapes[0] == (0, 2, 3) + + # -1 means unknown shape dimension size in the new NumPy-compatible shape definition + data = mx.sym.var("data", shape=(-1, 2, 3)) + ret = mx.sym.sin(data) + arg_shapes, out_shapes, _ = ret.infer_shape_partial() + assert arg_shapes[0] == (-1, 2, 3) + assert out_shapes[0] == (-1, 2, 3) + + # When a shape is completely unknown in NumPy-compatible mode, it is + # represented as `None` in Python. + data = mx.sym.var("data") + ret = mx.sym.sin(data) + arg_shapes, out_shapes, _ = ret.infer_shape_partial() + assert arg_shapes[0] is None + assert out_shapes[0] is None + """ + return _NumpyCompatibilityStateScope(True) + + +def disable_np_comp(): + """Returns a state scope with NumPy-compatibility disabled to be used in 'with' statement + and captures code that does not need the compatibility. + + Example:: + + with mx.disable_np_comp(): + # 0 means unknown shape dimension size in the legacy shape definition. + data = mx.sym.var("data", shape=(0, 2, 3)) + ret = mx.sym.sin(data) + arg_shapes, out_shapes, _ = ret.infer_shape_partial() + assert arg_shapes[0] == (0, 2, 3) + assert out_shapes[0] == (0, 2, 3) + + # When a shape is completely unknown in the legacy mode (default), its ndim is + # equal to 0 and it is represented as `()` in Python. + data = mx.sym.var("data") + ret = mx.sym.sin(data) + arg_shapes, out_shapes, _ = ret.infer_shape_partial() + assert arg_shapes[0] == () + assert out_shapes[0] == () + """ + return _NumpyCompatibilityStateScope(False) diff --git a/python/mxnet/ndarray/__init__.py b/python/mxnet/ndarray/__init__.py index a102399521cc..f09908e894d5 100644 --- a/python/mxnet/ndarray/__init__.py +++ b/python/mxnet/ndarray/__init__.py @@ -17,7 +17,7 @@ """NDArray API of MXNet.""" -from . import _internal, contrib, linalg, op, random, sparse, utils, image, ndarray, numpy +from . import _internal, contrib, linalg, op, random, sparse, utils, image, ndarray # pylint: disable=wildcard-import, redefined-builtin try: from .gen_op import * # pylint: disable=unused-wildcard-import diff --git a/python/mxnet/ndarray/numpy.py b/python/mxnet/ndarray/numpy.py deleted file mode 100644 index 0826ac8aca7f..000000000000 --- a/python/mxnet/ndarray/numpy.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -__all__ = [] diff --git a/python/mxnet/numpy/__init__.py b/python/mxnet/numpy/__init__.py deleted file mode 100644 index e0dfda10113e..000000000000 --- a/python/mxnet/numpy/__init__.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import ctypes -from ..base import _LIB, check_call - -__all__ = [] - - -def set_np_comp(is_np_comp): - prev = ctypes.c_int() - check_call(_LIB.MXSetIsNumpyCompatible(ctypes.c_int(is_np_comp), ctypes.byref(prev))) - return bool(prev.value) - - -def is_np_comp(): - curr = ctypes.c_bool() - check_call(_LIB.MXIsNumpyCompatible(ctypes.byref(curr))) - return curr.value - - -class _NumpyCompatibilityStateScope(object): - """Scope for managing numpy compatibility state. - - Example:: - - with _NumpyCompatibilityStateScope(True): - y = model(x) - backward([y]) - - """ - def __init__(self, is_np_comp): #pylint: disable=redefined-outer-name - self._enter_is_np_comp = is_np_comp - self._prev_is_np_comp = None - - def __enter__(self): - if self._enter_is_np_comp is not None: - self._prev_is_np_comp = set_np_comp(self._enter_is_np_comp) - - def __exit__(self, ptype, value, trace): - if self._enter_is_np_comp is not None and self._prev_is_np_comp != self._enter_is_np_comp: - set_np_comp(self._prev_is_np_comp) - - -def enable_np_comp(): - return _NumpyCompatibilityStateScope(True) - - -def disable_np_comp(): - return _NumpyCompatibilityStateScope(False) diff --git a/python/mxnet/symbol/__init__.py b/python/mxnet/symbol/__init__.py index 326e4f5aff78..f438e4954aa9 100644 --- a/python/mxnet/symbol/__init__.py +++ b/python/mxnet/symbol/__init__.py @@ -17,7 +17,7 @@ """Symbol API of MXNet.""" -from . import _internal, contrib, linalg, op, random, sparse, image, symbol, numpy +from . import _internal, contrib, linalg, op, random, sparse, image, symbol # pylint: disable=wildcard-import, redefined-builtin try: from .gen_op import * # pylint: disable=unused-wildcard-import diff --git a/python/mxnet/symbol/numpy.py b/python/mxnet/symbol/numpy.py deleted file mode 100644 index 0826ac8aca7f..000000000000 --- a/python/mxnet/symbol/numpy.py +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -__all__ = [] diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py index b2b8261be51d..2de4954cae81 100644 --- a/python/mxnet/symbol/symbol.py +++ b/python/mxnet/symbol/symbol.py @@ -34,7 +34,7 @@ from ..attribute import AttrScope from ..base import _LIB, numeric_types, c_array, c_array_buf, c_str, c_str_array, c_handle_array -from ..base import mx_uint, py_str, string_types, integer_types, mx_int +from ..base import mx_uint, py_str, string_types, integer_types, mx_int, is_np_comp from ..base import NDArrayHandle, ExecutorHandle, SymbolHandle from ..base import check_call, MXNetError, NotImplementedForSymbol from ..context import Context, current_context @@ -42,7 +42,6 @@ from ..ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID from ..ndarray import _ndarray_cls from ..executor import Executor -from ..numpy import is_np_comp from . import _internal from . import op from ._internal import SymbolBase, _set_symbol_class diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h index 55608b950866..8be192b22c53 100644 --- a/src/c_api/c_api_common.h +++ b/src/c_api/c_api_common.h @@ -100,7 +100,9 @@ struct MXAPIThreadLocalEntry { for (size_t i = 0; i < shapes.size(); ++i) { ndim->at(i) = shapes[i].ndim(); data->at(i) = ptr; - ptr = mxnet::ShapeTypeCast(shapes[i].begin(), shapes[i].end(), ptr); + if (shapes[i].ndim() > 0) { + ptr = mxnet::ShapeTypeCast(shapes[i].begin(), shapes[i].end(), ptr); + } } } }; diff --git a/src/common/utils.h b/src/common/utils.h index 4fb398d883a6..6cdb869ff9ae 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -776,7 +776,7 @@ inline void ConvertToNumpyShape(mxnet::ShapeVector* shapes) { */ inline void ConvertToLegacyShape(mxnet::TShape* shape) { if (!mxnet::ndim_is_known(*shape)) { - *shape = mxnet::TShape(0); + *shape = mxnet::TShape(0, -1); } else { for (int j = 0; j < shape->ndim(); ++j) { if (!mxnet::dim_size_is_known(*shape, j)) { diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h index d058df4b3806..24e6c49d5edb 100644 --- a/src/imperative/imperative_utils.h +++ b/src/imperative/imperative_utils.h @@ -131,12 +131,16 @@ inline void SetShapeType(const Context& ctx, std::stringstream os; os << "Operator " << attrs.op->name << " inferring shapes failed.\n"; os << "input shapes:\n"; - for (auto& nd : inputs) { - os << nd->shape() << '\n'; + for (const auto& s : in_shapes) { + os << s << '\n'; } os << "output shapes:\n"; - for (auto& nd : outputs) { - os << nd->shape() << '\n'; + for (const auto& s : out_shapes) { + os << s << '\n'; + } + os << "operator attributes:\n"; + for (const auto& kv : attrs.dict) { + os << kv.first << " : " << kv.second << '\n'; } LOG(FATAL) << os.str(); } diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index 604000028bf1..f5aac36a48eb 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -549,7 +549,7 @@ const mkldnn::memory *NDArray::GetMKLDNNDataReorder( // If they have different shapes, we need to reshape the array first. // Since this method will only be used inside an operator, we can call // MKLDNNDataReshape to reshape an array. - mxnet::TShape required_shape(desc2.data.ndims); + mxnet::TShape required_shape(desc2.data.ndims, -1); for (int i = 0; i < desc2.data.ndims; i++) required_shape[i] = desc2.data.dims[i]; NDArray reshaped = MKLDNNDataReshape(required_shape); @@ -575,7 +575,7 @@ NDArray NDArray::Reorder2Default() const { // create new ndarray from mkldnn layout mkldnn::memory::desc from_desc = ptr_->mkl_mem_->GetPrimitiveDesc().desc(); - mxnet::TShape tshape(from_desc.data.ndims); + mxnet::TShape tshape(from_desc.data.ndims, -1); for (int i = 0; i < from_desc.data.ndims; i++) tshape[i] = from_desc.data.dims[i]; NDArray ret(tshape, ctx(), false, dtype()); mkldnn::memory::primitive_desc def_pd = ptr_->mkl_mem_->GetPrimitiveDesc(format); diff --git a/src/operator/channel_op_common.h b/src/operator/channel_op_common.h index 1afc13ad2594..43f689d2defa 100644 --- a/src/operator/channel_op_common.h +++ b/src/operator/channel_op_common.h @@ -45,6 +45,8 @@ inline void concatenate_helper(const std::vector(out, begin, end), req, input[i]); begin = end; @@ -80,6 +82,8 @@ void split_helper(const mshadow::Tensor &input, size_t size = out.size(); index_t begin = 0; for (size_t i = 0; i < size; ++i) { + // If out[i] is a zero-size tensor, do nothing. + if (out[i].shape_.Size() == 0) continue; index_t end = begin + out[i].size(cdim); Assign(out[i], req[i], slice(input, begin, end)); begin = end; diff --git a/src/operator/contrib/bounding_box-inl.h b/src/operator/contrib/bounding_box-inl.h index 6ea4e8097b6c..686f1666a310 100644 --- a/src/operator/contrib/bounding_box-inl.h +++ b/src/operator/contrib/bounding_box-inl.h @@ -94,9 +94,8 @@ inline bool BoxNMSShape(const nnvm::NodeAttrs& attrs, const BoxNMSParam& param = nnvm::get(attrs.parsed); CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 2U); - // TODO(@junrushao1994): verify with Joshua Z. Zhang about this operator if (mxnet::op::shape_is_none(in_attrs->at(0)) - && mxnet::op::shape_is_none(out_attrs->at(0))) { + && mxnet::op::shape_is_none(out_attrs->at(0))) { return false; } diff --git a/src/operator/contrib/deformable_convolution-inl.h b/src/operator/contrib/deformable_convolution-inl.h index a7e22f548151..000d703066d7 100644 --- a/src/operator/contrib/deformable_convolution-inl.h +++ b/src/operator/contrib/deformable_convolution-inl.h @@ -69,11 +69,11 @@ struct DeformableConvolutionParam : public dmlc::Parameter layout; DMLC_DECLARE_PARAMETER(DeformableConvolutionParam) { DMLC_DECLARE_FIELD(kernel).describe("Convolution kernel size: (h, w) or (d, h, w)"); - DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0)) + DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, -1)) .describe("Convolution stride: (h, w) or (d, h, w). Defaults to 1 for each dimension."); - DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0)) + DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0, -1)) .describe("Convolution dilate: (h, w) or (d, h, w). Defaults to 1 for each dimension."); - DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0)) + DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, -1)) .describe("Zero pad for convolution: (h, w) or (d, h, w). Defaults to no padding."); DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000) .describe("Convolution filter(channel) number"); diff --git a/src/operator/contrib/index_copy-inl.h b/src/operator/contrib/index_copy-inl.h index 35f88916da20..9f78f0593ed1 100644 --- a/src/operator/contrib/index_copy-inl.h +++ b/src/operator/contrib/index_copy-inl.h @@ -64,7 +64,7 @@ inline bool IndexCopyShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->at(1).ndim(), 1); // Shape matching CHECK_EQ(in_attrs->at(0).ndim(), in_attrs->at(2).ndim()); - for (size_t i = 0; i < in_attrs->at(0).ndim(); ++i) { + for (int i = 0; i < in_attrs->at(0).ndim(); ++i) { if (i == 0) { CHECK_GE(in_attrs->at(0)[i], in_attrs->at(2)[i]); } else { diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h index a937f839c9bb..e331255c2e50 100644 --- a/src/operator/mxnet_op.h +++ b/src/operator/mxnet_op.h @@ -781,6 +781,7 @@ struct Kernel { /*! \brief Launch GPU kernel */ template inline static void Launch(mshadow::Stream *s, int N, Args... args) { + if (0 == N) return; using namespace mshadow::cuda; int ngrid = std::min(kMaxGridNum, (N + kBaseThreadNum - 1) / kBaseThreadNum); mxnet_generic_kernel @@ -791,6 +792,7 @@ struct Kernel { template inline static void LaunchEx(mshadow::Stream *s, const int N, Args... args) { + if (0 == N) return; using namespace mshadow::cuda; int ngrid = std::min(kMaxGridNum, (N + kBaseThreadNum - 1) / kBaseThreadNum); mxnet_generic_kernel_ex diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc index b534ee58e85c..8fb229889332 100644 --- a/src/operator/nn/concat.cc +++ b/src/operator/nn/concat.cc @@ -234,9 +234,10 @@ bool SupportMKLDNNConcat(const std::vector &arrs) { for (auto &arr : arrs) { if (arr.IsView()) return false; if (arr.dtype() != mshadow::kFloat32) return false; + // DO not support zero-size tensors. + if (arr.shape().Size() == 0) return false; int ndim = arr.shape().ndim(); - unsigned mkldnn_ndims = - static_cast(arr.GetMKLDNNData()->get_primitive_desc().desc().data.ndims); + const int mkldnn_ndims = arr.GetMKLDNNData()->get_primitive_desc().desc().data.ndims; if (!(ndim == 2 || ndim == 4) || ndim != mkldnn_ndims) return false; } return true; diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index 44d1c3c36e99..679e0cd1057b 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -1016,7 +1016,7 @@ class CuDNNConvolutionOp { template inline Shape Strides(const mxnet::TShape &s) { int ndim = s.ndim(); - mxnet::TShape strides(ndim); + mxnet::TShape strides(ndim, -1); for (int i = 0; i != ndim; ++i) strides[i] = s.ProdShape(i+1, ndim); return strides.get(); diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index f652dd85bd41..adb6caf1c028 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -934,7 +934,7 @@ class CuDNNDeconvolutionOp { template inline Shape Strides(const mxnet::TShape &s) { int ndim = s.ndim(); - mxnet::TShape strides(ndim); + mxnet::TShape strides(ndim, -1); for (int i = 0; i != ndim; ++i) strides[i] = s.ProdShape(i+1, ndim); return strides.get(); diff --git a/src/operator/nn/deconvolution-inl.h b/src/operator/nn/deconvolution-inl.h index e82a073ea08d..1eeccb02e030 100644 --- a/src/operator/nn/deconvolution-inl.h +++ b/src/operator/nn/deconvolution-inl.h @@ -143,7 +143,7 @@ struct DeconvolutionParam : public dmlc::Parameter { } } } else { - for (int i = 0; i < (int) ndim; i++) { + for (int i = 0; i < static_cast(ndim); i++) { o_pad[i] = i < pad.ndim() ? pad[i] : 0; o_adj[i] = i < adj.ndim() ? adj[i] : 0; } diff --git a/src/operator/nn/mkldnn/mkldnn_slice.cc b/src/operator/nn/mkldnn/mkldnn_slice.cc index 96a8afdab6e2..2a817a25a5b8 100644 --- a/src/operator/nn/mkldnn/mkldnn_slice.cc +++ b/src/operator/nn/mkldnn/mkldnn_slice.cc @@ -37,10 +37,10 @@ MKLDNNSliceFwd::MKLDNNSliceFwd(const SliceParam ¶m, const NDArray &out) { const mxnet::TShape ishape = in.shape(); const mxnet::TShape oshape = out.shape(); - uint32_t N = ishape.ndim(); + const int N = ishape.ndim(); mkldnn::memory::dims dims(N); mkldnn::memory::dims offsets(N); - for (uint32_t i = 0; i < N; ++i) { + for (int i = 0; i < N; ++i) { int s = 0; if (i < param.begin.ndim() && param.begin[i]) { s = *param.begin[i]; diff --git a/src/operator/numpy/np_broadcast_reduce_op.h b/src/operator/numpy/np_broadcast_reduce_op.h deleted file mode 100644 index e0379a040c3f..000000000000 --- a/src/operator/numpy/np_broadcast_reduce_op.h +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Copyright (c) 2015 by Contributors - * \file broadcast_reduce_op.h - * \brief Function definition of broadcast and reduce operators - */ -#ifndef MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_ -#define MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_ - -#include "../tensor/broadcast_reduce_op.h" - -namespace mxnet { -namespace op { - -struct NumpyReduceAxesParam : public dmlc::Parameter { - dmlc::optional> axis; - dmlc::optional dtype; - bool keepdims; - dmlc::optional initial; - DMLC_DECLARE_PARAMETER(NumpyReduceAxesParam) { - DMLC_DECLARE_FIELD(axis).set_default(dmlc::optional>()) - .describe(R"code()code"); - DMLC_DECLARE_FIELD(dtype).set_default(dmlc::optional()) - .describe(""); - DMLC_DECLARE_FIELD(keepdims).set_default(false) - .describe("If this is set to `True`, the reduced axes are left " - "in the result as dimension with size one."); - } -}; - -inline TShape NumpyReduceAxesShapeImpl(const TShape& ishape, - const dmlc::optional>& axis, - bool keepdims) { - // TODO(junwu): improve the logic - // If input is a scalar, output should be a scalar too - if (ishape.ndim() == 0) { - if (axis.has_value()) { - const nnvm::Tuple& axes = axis.value(); - if (axes.ndim() > 0) { - CHECK_EQ(axes.ndim(), 1); - CHECK(axes[0] == 0 || axes[0] == -1); - } - } - return TShape(0, -1); - } - - // axis=None, do global reduction - if (!axis.has_value()) { - if (keepdims) { - return TShape(ishape.ndim(), 1); - } else { - return TShape(0, -1); - } - } - - // axis = (), will return identity(input) - if (axis.value().ndim() == 0) { - return ishape; - } - - // axis has value - nnvm::Tuple axes(axis.value()); - for (index_t i = 0; i < axes.ndim(); i++) { - if (axes[i] < 0) { - axes[i] += ishape.ndim(); - } - } - std::sort(axes.begin(), axes.end()); - - for (index_t i = 1; i < axes.ndim(); i++) { - CHECK_LT(axes[i-1], axes[i]) - << "Reduction axes have duplicates " - << axes; - } - CHECK_LT(axes[axes.ndim()-1], ishape.ndim()) - << "Reduction axis " << axes[axes.ndim()-1] - << " Exceeds input dimensions " << ishape; - CHECK_GE(axes[0], 0) - << "Reduction axis " << axis.value() - << " Exceeds input dimensions " << ishape; - - TShape oshape; - if (keepdims) { - oshape = TShape(ishape); - } else { - oshape = TShape(ishape.ndim() - axes.ndim(), -1); - } - - if (keepdims) { - for (index_t i = 0; i < axes.ndim(); ++i) { - oshape[axes[i]] = 1; - } - } else { - for (index_t i = 0, j = 0, k = 0; i < ishape.ndim(); ++i) { - if (j < axes.ndim() && i == axes[j]) { - ++j; - continue; - } - oshape[k++] = ishape[i]; - } - } - return oshape; -} - -inline bool NumpyReduceAxesShape(const nnvm::NodeAttrs& attrs, - std::vector *in_attrs, - std::vector *out_attrs) { - CHECK_EQ(in_attrs->size(), 1U); - CHECK_EQ(out_attrs->size(), 1U); - if (!shape_is_known(in_attrs->at(0))) { - return false; - } - const NumpyReduceAxesParam& param = nnvm::get(attrs.parsed); - SHAPE_ASSIGN_CHECK(*out_attrs, 0, - NumpyReduceAxesShapeImpl((*in_attrs)[0], param.axis, param.keepdims)); - return shape_is_known(out_attrs->at(0)); -} - -template -void NumpyReduceAxesCompute(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - const NumpyReduceAxesParam& param = nnvm::get(attrs.parsed); - if (param.axis.has_value() && param.axis.value().ndim() == 0) { - UnaryOp::IdentityCompute(attrs, ctx, inputs, req, outputs); - } - TShape small; - if (param.keepdims) { - small = outputs[0].shape_; - } else { - small = NumpyReduceAxesShapeImpl(inputs[0].shape_, param.axis, true); - } - - ReduceAxesComputeImpl(ctx, inputs, req, outputs, small); -} - -template -inline void NumpyReduceAxesBackwardUseNone(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - using namespace mshadow; - using namespace mshadow::expr; - const NumpyReduceAxesParam& param = nnvm::get(attrs.parsed); - TShape small; - if (param.keepdims) { - small = inputs[0].shape_; - } else { - small = NumpyReduceAxesShapeImpl(outputs[0].shape_, param.axis, true); - } - - BroadcastComputeImpl(attrs, ctx, inputs, req, outputs, small); - if (normalize) { - Stream *s = ctx.get_stream(); - MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { - Tensor igrad = outputs[0].FlatTo1D(s); - igrad /= scalar(outputs[0].Size()/inputs[0].Size()); - }); - } -} - -} // namespace op -} // namespace mxnet -#endif // MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_ diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cc b/src/operator/numpy/np_broadcast_reduce_op_value.cc deleted file mode 100644 index c028e2368737..000000000000 --- a/src/operator/numpy/np_broadcast_reduce_op_value.cc +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Copyright (c) 2019 by Contributors - * \file np_reduce_op_value.cc - * \brief CPU Implementation of broadcast and reduce functions based on value. - */ - -#include "np_broadcast_reduce_op.h" - -namespace mxnet { -namespace op { - -DMLC_REGISTER_PARAMETER(NumpyReduceAxesParam); - -NNVM_REGISTER_OP(_numpy_sum) -.describe(R"code()code" ADD_FILELINE) -.set_num_inputs(1) -.set_num_outputs(1) -.set_attr_parser(ParamParser) -.set_attr("FInferShape", NumpyReduceAxesShape) -.set_attr("FInferType", ElemwiseType<1, 1>) -.set_attr("FListInputNames", - [](const NodeAttrs& attrs) { - return std::vector{"a"}; - }) -.add_argument("a", "NDArray-or-Symbol", "The input") -.add_arguments(NumpyReduceAxesParam::__FIELDS__()) -.set_attr("FCompute", NumpyReduceAxesCompute) -.set_attr("FResourceRequest", - [](const NodeAttrs& attrs) { - return std::vector{ResourceRequest::kTempSpace}; - }) -.set_attr("FGradient", ElemwiseGradUseNone{"_backward_numpy_sum"}); - -NNVM_REGISTER_OP(_backward_numpy_sum) -.set_num_outputs(1) -.set_attr_parser(ParamParser) -.set_attr("TIsBackward", true) -.set_num_inputs(1) -.set_attr("FCompute", NumpyReduceAxesBackwardUseNone); - -} // namespace op -} // namespace mxnet diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cu b/src/operator/numpy/np_broadcast_reduce_op_value.cu deleted file mode 100644 index c975b18226db..000000000000 --- a/src/operator/numpy/np_broadcast_reduce_op_value.cu +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * Copyright (c) 2019 by Contributors - * \file np_reduce_op_value.cu - * \brief GPU Implementation of reduce functions based on value. - */ -#include "np_broadcast_reduce_op.h" - -namespace mxnet { -namespace op { -NNVM_REGISTER_OP(_numpy_sum) -.set_attr("FCompute", NumpyReduceAxesCompute); - -NNVM_REGISTER_OP(_backward_numpy_sum) -.set_attr("FCompute", NumpyReduceAxesBackwardUseNone); - -} // namespace op -} // namespace mxnet diff --git a/src/operator/pad-inl.h b/src/operator/pad-inl.h index 140d7099e817..89b0ab7780b6 100644 --- a/src/operator/pad-inl.h +++ b/src/operator/pad-inl.h @@ -230,7 +230,7 @@ class PadProp : public OperatorProperty { } } mxnet::TShape oshape = dshape; - for (size_t i = 0; i < dshape.ndim(); ++i) { + for (int i = 0; i < dshape.ndim(); ++i) { oshape[i] = param_.pad_width[2 * i] + param_.pad_width[2 * i + 1] + dshape[i]; } diff --git a/src/operator/pooling_v1-inl.h b/src/operator/pooling_v1-inl.h index 22a166cbb6cc..4241b08a0c5e 100644 --- a/src/operator/pooling_v1-inl.h +++ b/src/operator/pooling_v1-inl.h @@ -55,7 +55,7 @@ struct PoolingV1Param : public dmlc::Parameter { int pooling_convention; bool global_pool; DMLC_DECLARE_PARAMETER(PoolingV1Param) { - DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape(0)) + DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape(0, -1)) .enforce_nonzero() .describe("pooling kernel size: (y, x) or (d, y, x)"); @@ -73,11 +73,11 @@ struct PoolingV1Param : public dmlc::Parameter { .add_enum("valid", pool_v1_enum::kValid) .describe("Pooling convention to be applied."); - DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0)) + DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, -1)) .enforce_nonzero() .describe("stride: for pooling (y, x) or (d, y, x)"); - DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0)) + DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, -1)) .describe("pad for pooling: (y, x) or (d, y, x)"); } }; diff --git a/src/operator/quantization/dequantize-inl.h b/src/operator/quantization/dequantize-inl.h index 88199bc2591d..7c91ad507fd9 100644 --- a/src/operator/quantization/dequantize-inl.h +++ b/src/operator/quantization/dequantize-inl.h @@ -99,7 +99,7 @@ inline bool DequantizeShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 1U); for (size_t i = 1; i < 3; ++i) { - SHAPE_ASSIGN_CHECK(*in_attrs, i, mxnet::TShape({1})); + SHAPE_ASSIGN_CHECK(*in_attrs, i, mxnet::TShape(1, 1)); } SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); diff --git a/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h b/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h index 45713589dd48..ac414c72d51a 100644 --- a/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h +++ b/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h @@ -115,7 +115,7 @@ static void MKLDNNRequantizeForward(const nnvm::NodeAttrs& attrs, const size_t actual_float_size = sizeof(float); const size_t actual_quantized_size = sizeof(SrcDType); const size_t temp_reduce_size = ConfigReduce(s, - inputs[0].shape(), mxnet::TShape({1}), &src_shape, &dst_shape); + inputs[0].shape(), mxnet::TShape(1, 1), &src_shape, &dst_shape); Tensor temp_space = ctx.requested[0].get_space_typed( Shape1(2*actual_float_size+2*actual_quantized_size+temp_reduce_size), s); diff --git a/src/operator/quantization/quantize-inl.h b/src/operator/quantization/quantize-inl.h index 2c267a76a571..7b856579a7b5 100644 --- a/src/operator/quantization/quantize-inl.h +++ b/src/operator/quantization/quantize-inl.h @@ -120,7 +120,7 @@ inline bool QuantizeShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(out_attrs->size(), 3U); for (size_t i = 1; i < 3; ++i) { - SHAPE_ASSIGN_CHECK(*in_attrs, i, mxnet::TShape({1})); + SHAPE_ASSIGN_CHECK(*in_attrs, i, mxnet::TShape(1, 1)); } SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); diff --git a/src/operator/quantization/quantize_v2-inl.h b/src/operator/quantization/quantize_v2-inl.h index 02ace6c39fac..9ebb645e1ba6 100644 --- a/src/operator/quantization/quantize_v2-inl.h +++ b/src/operator/quantization/quantize_v2-inl.h @@ -175,7 +175,7 @@ void QuantizeV2Compute(const nnvm::NodeAttrs &attrs, const OpContext &ctx, mxnet::TShape src_shape, dst_shape; const size_t actual_float_size = sizeof(float); const size_t temp_reduce_size = ConfigReduce( - s, inputs[0].shape_, mxnet::TShape({1}), &src_shape, &dst_shape); + s, inputs[0].shape_, mxnet::TShape(1, 1), &src_shape, &dst_shape); Tensor temp_space = ctx.requested[0].get_space_typed( Shape1(2 * actual_float_size + temp_reduce_size), s); const int dev_id = ctx.run_ctx.ctx.dev_id; diff --git a/src/operator/quantization/quantized_concat.cc b/src/operator/quantization/quantized_concat.cc index 2cc2ec9d0374..d6aeb41da1f8 100644 --- a/src/operator/quantization/quantized_concat.cc +++ b/src/operator/quantization/quantized_concat.cc @@ -35,23 +35,23 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_sha CHECK_EQ(out_shape->size(), 3U); mxnet::TShape dshape; index_t size = 0; - bool has_zero = false; + bool has_unknown_dim_size = false; int axis = -1; for (int i = 0; i < param_.num_args; ++i) { mxnet::TShape tmp = (*in_shape)[i]; - if (tmp.ndim()) { + if (tmp.ndim() > 0) { axis = CheckAxis(param_.dim, tmp.ndim()); - has_zero = tmp[axis] == 0 || has_zero; + has_unknown_dim_size = !mxnet::dim_size_is_known(tmp, axis) || has_unknown_dim_size; size += tmp[axis]; - tmp[axis] = 0; + tmp[axis] = -1; shape_assign(&dshape, tmp); } } mxnet::TShape tmp = (*out_shape)[0]; - if (tmp.ndim()) { + if (tmp.ndim() > 0) { axis = CheckAxis(param_.dim, tmp.ndim()); - tmp[axis] = 0; + tmp[axis] = -1; shape_assign(&dshape, tmp); } @@ -62,7 +62,7 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_sha << "Incompatible input shape: expected " << dshape << ", got " << (*in_shape)[i]; } - if (!has_zero) dshape[axis] = size; + if (!has_unknown_dim_size) dshape[axis] = size; CHECK(shape_assign(&(*out_shape)[0], dshape)) << "Incompatible output shape: expected " << dshape << ", got " << (*out_shape)[0]; @@ -71,7 +71,7 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_sha } SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape{1}); SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape{1}); - return dshape.Size() != 0; + return shape_is_known(dshape); } static bool ConcatType(const nnvm::NodeAttrs& attrs, std::vector* in_type, diff --git a/src/operator/quantization/quantized_conv.cc b/src/operator/quantization/quantized_conv.cc index 1a801ee50744..aa3f5ce1ad61 100644 --- a/src/operator/quantization/quantized_conv.cc +++ b/src/operator/quantization/quantized_conv.cc @@ -78,8 +78,8 @@ bool QuantizedConvShape(const nnvm::NodeAttrs& attrs, oshape[W] = (AddPad(dshape[W], param.pad[1]) - wshape[W]) / param.stride[1] + 1; SHAPE_ASSIGN_CHECK(*out_shape, 0, oshape); - SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape({1})); - SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape({1})); + SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape(1, 1)); + SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape(1, 1)); return true; } diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc index cc4365f818d2..e42ea3020352 100644 --- a/src/operator/quantization/quantized_fully_connected.cc +++ b/src/operator/quantization/quantized_fully_connected.cc @@ -75,8 +75,8 @@ bool QuantizedFullyConnectedShape(const nnvm::NodeAttrs& attrs, } else { SHAPE_ASSIGN_CHECK(*out_shape, 0, Shape2(dshape[0], param.num_hidden)); } - SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape({1})); - SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape({1})); + SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape(1, 1)); + SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape(1, 1)); return true; } diff --git a/src/operator/quantization/requantize-inl.h b/src/operator/quantization/requantize-inl.h index 21d58d4607eb..9106c7fe4716 100644 --- a/src/operator/quantization/requantize-inl.h +++ b/src/operator/quantization/requantize-inl.h @@ -111,7 +111,7 @@ void RequantizeForward(const nnvm::NodeAttrs& attrs, const size_t actual_float_size = sizeof(float); const size_t actual_quantized_size = sizeof(SrcDType); const size_t temp_reduce_size = ConfigReduce( - s, inputs[0].shape_, mxnet::TShape({1}), &src_shape, &dst_shape); + s, inputs[0].shape_, mxnet::TShape(1, 1), &src_shape, &dst_shape); Tensor temp_space = ctx.requested[0].get_space_typed( Shape1(2*actual_float_size+2*actual_quantized_size+temp_reduce_size), s); diff --git a/src/operator/swapaxis-inl.h b/src/operator/swapaxis-inl.h index 41cb940d957a..7335daa48392 100644 --- a/src/operator/swapaxis-inl.h +++ b/src/operator/swapaxis-inl.h @@ -69,7 +69,7 @@ class SwapAxisOp : public Operator { void Reshape2Five(mshadow::Shape<5> *inter_shape, const mxnet::TShape &shape, - uint32_t dim1, uint32_t dim2) { + int dim1, int dim2) { using namespace mshadow; using namespace mshadow::expr; int ndim_in = shape.ndim(); diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h index ac714c2cfcdd..9392d4129406 100644 --- a/src/operator/tensor/broadcast_reduce_op.h +++ b/src/operator/tensor/broadcast_reduce_op.h @@ -139,9 +139,9 @@ struct BroadcastAxesParam : public dmlc::Parameter { mxnet::TShape axis; mxnet::TShape size; DMLC_DECLARE_PARAMETER(BroadcastAxesParam) { - DMLC_DECLARE_FIELD(axis).set_default(mxnet::TShape(0)) + DMLC_DECLARE_FIELD(axis).set_default(mxnet::TShape(0, -1)) .describe("The axes to perform the broadcasting."); - DMLC_DECLARE_FIELD(size).set_default(mxnet::TShape(0)) + DMLC_DECLARE_FIELD(size).set_default(mxnet::TShape(0, -1)) .describe("Target sizes of the broadcasting axes."); } }; @@ -149,7 +149,7 @@ struct BroadcastAxesParam : public dmlc::Parameter { struct BroadcastToParam : public dmlc::Parameter { mxnet::TShape shape; DMLC_DECLARE_PARAMETER(BroadcastToParam) { - DMLC_DECLARE_FIELD(shape).set_default(mxnet::TShape(0)) + DMLC_DECLARE_FIELD(shape).set_default(mxnet::TShape(0, -1)) .describe("The shape of the desired array." " We can set the dim to zero if it's same as the original." " E.g `A = broadcast_to(B, shape=(10, 0, 0))` " diff --git a/src/operator/tensor/elemwise_binary_broadcast_op.h b/src/operator/tensor/elemwise_binary_broadcast_op.h index 64a4d7cc15ff..73019fa8389b 100644 --- a/src/operator/tensor/elemwise_binary_broadcast_op.h +++ b/src/operator/tensor/elemwise_binary_broadcast_op.h @@ -61,16 +61,13 @@ inline bool BinaryBroadcastShape(const nnvm::NodeAttrs& attrs, int l = 1, r = 1; if (i >= bl) l = lhs[i-bl]; if (i >= br) r = rhs[i-br]; + if (!mxnet::dim_size_is_known(l) || !mxnet::dim_size_is_known(r)) continue; if (l != r) { - if (l == 0 || r == 0) { - // TODO(junwu): here is not compatible with NumPy. - // For example, (2, 3) cannot broadcast to (2, 0, 3). - out[i] = 0; - } else { - CHECK(l == 1 || r == 1) - << "operands could not be broadcast together with shapes " << lhs << " " << rhs; - out[i] = std::max(l, r); - } + // Make it compatible with NumPy. + // For example, (2, 3) cannot broadcast to (2, 0, 3), but (1, 3) can broadcast to (2, 0, 3). + CHECK(l == 1 || r == 1) + << "operands could not be broadcast together with shapes " << lhs << " " << rhs; + out[i] = (l == 1 ? r : l); } else { out[i] = l; } diff --git a/src/operator/tensor/histogram-inl.h b/src/operator/tensor/histogram-inl.h index 9cf9c490bba2..7194445d7b52 100644 --- a/src/operator/tensor/histogram-inl.h +++ b/src/operator/tensor/histogram-inl.h @@ -86,9 +86,9 @@ inline bool HistogramOpShape(const nnvm::NodeAttrs& attrs, if (has_cnt) { // if cnt is specified, the output histogram has shape (cnt,) // while output bins has shape (cnt+1,) - const int bin_cnt = param.bin_cnt.value(); - SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape({bin_cnt})); - SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape({bin_cnt + 1})); + const dim_t bin_cnt = param.bin_cnt.value(); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, bin_cnt)); + SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape(1, bin_cnt + 1)); } else { // if cnt is not specified, the output histogram has shape (bins.Size() - 1) // while output bins has same shape as input bins @@ -97,7 +97,7 @@ inline bool HistogramOpShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(oshape.ndim(), 1U) << "bins argument should be an 1D vector"; CHECK_GE(oshape.Size(), 2U) << "number of bounds should be >= 2"; - SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape({(oshape[0] - 1)})); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, oshape[0] - 1)); SHAPE_ASSIGN_CHECK(*out_attrs, 1, in_attrs->at(1)); } diff --git a/src/operator/tensor/init_op.h b/src/operator/tensor/init_op.h index bcad602c95c0..b2e3830064ae 100644 --- a/src/operator/tensor/init_op.h +++ b/src/operator/tensor/init_op.h @@ -278,6 +278,8 @@ inline bool InitStorageType(const nnvm::NodeAttrs& attrs, */ template void Fill(mshadow::Stream *s, const TBlob& b, const OpReqType req, ValueType val) { + // If b is a zero-size tensor, do nothing. + if (b.Size() == 0) return; if (req != kNullOp) { const size_t size = b.Size(); if (val == 0) { diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h index 87e911a56800..0e7f66240926 100644 --- a/src/operator/tensor/matrix_op-inl.h +++ b/src/operator/tensor/matrix_op-inl.h @@ -59,7 +59,7 @@ struct ReshapeParam : public dmlc::Parameter { .set_default(false) .describe("If true then the special values are inferred from right to left"); DMLC_DECLARE_FIELD(target_shape) - .set_default(mxnet::TShape(0)) + .set_default(mxnet::TShape(0, -1)) .describe("(Deprecated! Use ``shape`` instead.) " "Target new shape. One and only one dim can be 0, " "in which case it will be inferred from the rest of dims"); @@ -209,7 +209,8 @@ inline bool ReshapeShape(const nnvm::NodeAttrs& attrs, oshape[inf_idx] = dshape.Size() / oshape.Size(); } } else { - return shape_is_known((*out_attrs)[0]) && ReverseReshapeInferShape(&(*in_attrs)[0], (*out_attrs)[0]); + return shape_is_known((*out_attrs)[0]) + && ReverseReshapeInferShape(&(*in_attrs)[0], (*out_attrs)[0]); } ReverseReshapeInferShape(&dshape, oshape); #if 0 @@ -240,7 +241,7 @@ inline bool FlattenShape(const nnvm::NodeAttrs& attrs, struct TransposeParam : public dmlc::Parameter { mxnet::TShape axes; DMLC_DECLARE_PARAMETER(TransposeParam) { - DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape(0)) + DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape(0, -1)) .describe("Target axis order. By default the axes will be inverted."); } @@ -1158,9 +1159,14 @@ inline bool SliceAxisShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(in_attrs->size(), 1U); CHECK_EQ(out_attrs->size(), 1U); mxnet::TShape& ishape = (*in_attrs)[0]; + if (!mxnet::ndim_is_known(ishape)) return false; int axis; index_t begin, end; GetSliceAxisParams(param, ishape, &axis, &begin, &end); + if (!mxnet::dim_size_is_known(ishape, axis)) { + SHAPE_ASSIGN_CHECK(*out_attrs, 0, ishape); + return false; + } mxnet::TShape shape(ishape.ndim(), -1); for (int i = 0; i < ishape.ndim(); ++i) { if (static_cast(i) == axis) { diff --git a/tests/cpp/include/test_mkldnn.h b/tests/cpp/include/test_mkldnn.h index a379dab7bf90..f1682772a14a 100644 --- a/tests/cpp/include/test_mkldnn.h +++ b/tests/cpp/include/test_mkldnn.h @@ -49,7 +49,7 @@ inline static mkldnn::memory::primitive_desc GetMemPD(const mxnet::TShape s, int inline static mkldnn::memory::primitive_desc GetExpandedMemPD( mkldnn::memory::primitive_desc pd, float scale, int dim = 0) { CHECK(dim < pd.desc().data.ndims) << "dimension cannot be larger than total dimensions of input"; - mxnet::TShape s(pd.desc().data.ndims); + mxnet::TShape s(pd.desc().data.ndims, -1); for (size_t i = 0; i < pd.desc().data.ndims; i++) s[i] = pd.desc().data.dims[i]; s[dim] = static_cast(s[dim] * scale); @@ -165,7 +165,7 @@ inline static TestArrayShapes GetTestArrayShapes(bool spatial_data_format = fals std::vector pds; { // 1D - mxnet::TShape s(1); + mxnet::TShape s(1, -1); s[0] = 279936; shapes.push_back(s); pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::x)); @@ -175,7 +175,7 @@ inline static TestArrayShapes GetTestArrayShapes(bool spatial_data_format = fals } { // 2D - mxnet::TShape s(2); + mxnet::TShape s(2, -1); s[0] = 96; s[1] = 2916; shapes.push_back(s); @@ -187,12 +187,12 @@ inline static TestArrayShapes GetTestArrayShapes(bool spatial_data_format = fals } { // 4D - mxnet::TShape s1(4); + mxnet::TShape s1(4, -1); s1[0] = 10; s1[1] = 96; s1[2] = 54; s1[3] = 54; shapes.push_back(s1); pds.push_back(GetMemPD(s1, dtype, mkldnn::memory::format::nchw)); - mxnet::TShape s2(4); + mxnet::TShape s2(4, -1); s2[0] = 96; s2[1] = 3; s2[2] = 11; s2[3] = 11; shapes.push_back(s2); pds.push_back(GetMemPD(s2, dtype, mkldnn::memory::format::oihw)); @@ -204,7 +204,7 @@ inline static TestArrayShapes GetTestArrayShapes(bool spatial_data_format = fals } { // 5D - mxnet::TShape s(5); + mxnet::TShape s(5, -1); s[0] = 96; s[1] = 1; s[2] = 3; s[3] = 11; s[4] = 11; shapes.push_back(s); pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::goihw)); @@ -259,7 +259,7 @@ enum ArrayTypes { inline NDArray CreateKernelNDArray(mxnet::TShape kernel, int num_filters, mxnet::TShape input, bool is_deconv = false) { CHECK_EQ(kernel.ndim(), 2) << "mkldnn only supports 2d filters on 4d inputs"; - mxnet::TShape target_shape(4); + mxnet::TShape target_shape(4, -1); target_shape[0] = is_deconv ? input[1] : num_filters; target_shape[1] = is_deconv ? num_filters : input[1]; target_shape[2] = kernel[0]; @@ -470,7 +470,7 @@ inline std::vector GetTestOutputArrays( in_arrs.emplace_back(arr0.Slice(1, shape[0] + 1), "Reshaped NDArray"); } - mxnet::TShape s(1); + mxnet::TShape s(1, -1); if (types & ArrayTypes::NormalReused) { // Type 5. // Get a reused version. @@ -528,7 +528,7 @@ inline std::vector GetTestOutputArrays( // Type 8, 9. // Get a reused version. - mxnet::TShape s(1); + mxnet::TShape s(1, -1); s[0] = shape.Size(); NDArray arr = NDArray(s, Context()); arr = arr.AsArray(shape, arr.dtype()); diff --git a/tests/cpp/include/test_util.h b/tests/cpp/include/test_util.h index e0caddbcd027..b0114e1721ef 100644 --- a/tests/cpp/include/test_util.h +++ b/tests/cpp/include/test_util.h @@ -353,14 +353,14 @@ inline StreamType& print_blob_(const RunContext& ctx, if (dim == 1) { // probably a 1d tensor (mshadow::Tensor is deprecated) - TBlob changed(blob.dptr(), mxnet::TShape(3), blob.dev_mask(), blob.dev_id()); + TBlob changed(blob.dptr(), mxnet::TShape(3, -1), blob.dev_mask(), blob.dev_id()); changed.shape_[0] = 1; changed.shape_[1] = 1; changed.shape_[2] = blob.shape_[0]; return print_blob_(ctx, &os, changed, false, false, add_endl); } else if (dim == 2) { // probably a 2d tensor (mshadow::Tensor is deprecated) - TBlob changed(blob.dptr(), mxnet::TShape(4), blob.dev_mask(), blob.dev_id()); + TBlob changed(blob.dptr(), mxnet::TShape(4, -1), blob.dev_mask(), blob.dev_id()); changed.shape_[0] = 1; changed.shape_[1] = 1; changed.shape_[2] = blob.shape_[0]; diff --git a/tests/cpp/misc/serialization.cc b/tests/cpp/misc/serialization.cc index 77014238c2fa..2509a43c27ee 100644 --- a/tests/cpp/misc/serialization.cc +++ b/tests/cpp/misc/serialization.cc @@ -48,7 +48,7 @@ TEST(SerializerTest, OutputMapCorrect) { std::map > output_map; output_map.emplace("output_0", std::make_tuple(1, mxnet::TShape({23, 12, 63, 432}), 0, 1)); output_map.emplace("another_output", std::make_tuple(2, mxnet::TShape({23, 123}), 14, -23)); - output_map.emplace("last_output", std::make_tuple(0, mxnet::TShape({0}), -1, 0)); + output_map.emplace("last_output", std::make_tuple(0, mxnet::TShape(1, 0), -1, 0)); std::string serialized_data; common::Serialize(output_map, &serialized_data); std::map > deserialized_output_map; diff --git a/tests/cpp/operator/batchnorm_test.cc b/tests/cpp/operator/batchnorm_test.cc index d74493a0f7fb..ed0e70b831f1 100644 --- a/tests/cpp/operator/batchnorm_test.cc +++ b/tests/cpp/operator/batchnorm_test.cc @@ -1266,7 +1266,7 @@ static void testSaveAndLoad(const std::vector& dims, ChannelAxisTestData data; data.channel_data_ = inputChannelData; - mxnet::TShape shape(dims.size()); + mxnet::TShape shape(dims.size(), -1); for (size_t i = 0, n = dims.size(); i < n; ++i) { shape[i] = index_t(dims[i]); } @@ -1322,7 +1322,7 @@ static mxnet::TShape MakeShape(const std::vector& shape, } CHECK_LT(channelAxis, shape.size() + 1); const index_t dim = index_t(shape.size()) + 1; - mxnet::TShape newShape(dim); + mxnet::TShape newShape(dim, -1); for (size_t x = 0; x < static_cast(channelAxis); ++x) { newShape[x] = index_t(shape[x]); } diff --git a/tests/cpp/operator/mkldnn_operator_test.cc b/tests/cpp/operator/mkldnn_operator_test.cc index 559ab5da0ccc..961785dcfc87 100644 --- a/tests/cpp/operator/mkldnn_operator_test.cc +++ b/tests/cpp/operator/mkldnn_operator_test.cc @@ -916,13 +916,13 @@ void TestFullyConnectedOp(const OpAttrs &forward_attrs, const OpAttrs &backwards if (in_shape.ndim() < 2) continue; - mxnet::TShape wt_shape(2); + mxnet::TShape wt_shape(2, -1); wt_shape[0] = num_hid; wt_shape[1] = GetFCWeightDim2(in_shape); NDArray weights(wt_shape, Context()); InitDefaultArray(&weights, false); - mxnet::TShape bias_shape(1); + mxnet::TShape bias_shape(1, -1); bias_shape[0] = num_hid; NDArray bias(bias_shape, Context()); InitDefaultArray(&bias, false); @@ -931,7 +931,7 @@ void TestFullyConnectedOp(const OpAttrs &forward_attrs, const OpAttrs &backwards inputs[1] = &weights; inputs[2] = &bias; - mxnet::TShape out_shape(2); + mxnet::TShape out_shape(2, -1); out_shape[0] = in_shape[0]; out_shape[1] = num_hid; diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index fbbfc53a9a5e..f8ebe9517bac 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -1963,19 +1963,21 @@ def check_proposal_consistency(op, batch_size, with_nms=False): # The following 2 functions launch 0-thread kernels, an error that should be caught and signaled. def kernel_error_check_imperative(): os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine' - a = mx.nd.array([1,2,3],ctx=mx.gpu(0)) - b = mx.nd.array([],ctx=mx.gpu(0)) - c = (a / b).asnumpy() + with mx.enable_np_comp(): + a = mx.nd.array([1,2,3],ctx=mx.gpu(0)) + b = mx.nd.array([],ctx=mx.gpu(0)) + c = (a / b).asnumpy() def kernel_error_check_symbolic(): os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine' - a = mx.sym.Variable('a') - b = mx.sym.Variable('b') - c = a / b - f = c.bind(mx.gpu(0), { 'a':mx.nd.array([1,2,3],ctx=mx.gpu(0)), - 'b':mx.nd.array([],ctx=mx.gpu(0))}) - f.forward() - g = f.outputs[0].asnumpy() + with mx.enable_np_comp(): + a = mx.sym.Variable('a') + b = mx.sym.Variable('b') + c = a / b + f = c.bind(mx.gpu(0), { 'a':mx.nd.array([1,2,3],ctx=mx.gpu(0)), + 'b':mx.nd.array([],ctx=mx.gpu(0))}) + f.forward() + g = f.outputs[0].asnumpy() def test_kernel_error_checking(): # Running tests that may throw exceptions out of worker threads will stop CI testing diff --git a/tests/python/unittest/test_infer_shape.py b/tests/python/unittest/test_infer_shape.py index 73654a604135..e0b4d35ea9aa 100644 --- a/tests/python/unittest/test_infer_shape.py +++ b/tests/python/unittest/test_infer_shape.py @@ -147,6 +147,21 @@ def test_fc_infer_type(): assert arg_type_dict[k] == v +def test_shape_completely_unknown(): + data = mx.sym.var("data") + ret = mx.sym.sin(data) + arg_shapes, out_shapes, _ = ret.infer_shape_partial() + assert arg_shapes[0] == () + assert out_shapes[0] == () + + with mx.enable_np_comp(): + data = mx.sym.var("data") + ret = mx.sym.sin(data) + arg_shapes, out_shapes, _ = ret.infer_shape_partial() + assert arg_shapes[0] is None + assert out_shapes[0] is None + + if __name__ == "__main__": test_mlp2_infer_shape() test_mlp2_infer_error() @@ -156,3 +171,4 @@ def test_fc_infer_type(): test_incomplete_infer_slicechannel() test_incomplete_infer_convolution() test_incomplete_infer_concat() + test_shape_completely_unknown() diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py index 1bdb7d51df67..f8f52c2cee24 100644 --- a/tests/python/unittest/test_ndarray.py +++ b/tests/python/unittest/test_ndarray.py @@ -123,7 +123,7 @@ def test_ndarray_setitem(): # numpy assignment for empty axis for trivial_shape in [(), (1,), (1, 1), (1, 1, 1)]: if trivial_shape == tuple(): - with mx.numpy.enable_np_comp(): + with mx.enable_np_comp(): x = mx.nd.zeros(trivial_shape) else: x = mx.nd.zeros(trivial_shape) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index d44f1113e788..b82b0e9b5746 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -4403,7 +4403,7 @@ def test_invalid_reps(): assert_exception(mx.nd.tile, MXNetError, data, (1, 0, 3)) test_normal_case() - with mx.numpy.enable_np_comp(): + with mx.enable_np_comp(): test_empty_tensor() test_empty_reps() test_tile_backward() @@ -4464,7 +4464,7 @@ def test_zero_depth(): test_normal_case(index_type=np.float64) test_normal_case(index_type=np.float32) test_normal_case(index_type=np.float16) - with mx.numpy.enable_np_comp(): + with mx.enable_np_comp(): test_empty_indices() test_zero_depth() @@ -6850,6 +6850,20 @@ def check_slice_axis_partial_infer(data, axis, begin, end, expected_out_shape): check_slice_axis_partial_infer(var1, 0, 0, 5, (5, 0)) check_slice_axis_partial_infer(var1, 1, 0, 5, (10, 0)) + with mx.enable_np_comp(): + var1 = mx.sym.var(name="data", shape=(-1, 20)) + check_slice_partial_infer(var1, (None, None), (None, 10), [], (-1, 10)) + check_slice_partial_infer(var1, (None, None), (None, 10), (None, 2), (-1, 5)) + check_slice_partial_infer(var1, (None, 3), (None, 10), [], (-1, 7)) + check_slice_partial_infer(var1, (None, 3), (5, 10), [], (-1, 7)) + check_slice_partial_infer(var1, (2, 3), (None, 10), [], (-1, 7)) + check_slice_partial_infer(var1, (2, 3), (None, 10), (None, 1), (-1, 7)) + check_slice_partial_infer(var1, (2, 3), (None, 10), (3, 3), (-1, 3)) + + var1 = mx.sym.var(name='data', shape=(10, -1)) + check_slice_axis_partial_infer(var1, 0, 0, 5, (5, -1)) + check_slice_axis_partial_infer(var1, 1, 0, 5, (10, -1)) + @with_seed() def test_float16_min_max(): @@ -7898,6 +7912,42 @@ def test_image_normalize(): check_numeric_gradient(img_norm_sym, [data_in_4d], atol=0.001) +@with_seed() +def test_scalar_tensor_creation(): + assertRaises(MXNetError, mx.nd.zeros, shape=()) + assertRaises(MXNetError, mx.nd.ones, shape=()) + with mx.enable_np_comp(): + data_mx = mx.nd.ones(shape=()) + data_np = np.ones((), dtype=data_mx.dtype) + assert same(data_mx.asnumpy(), data_np) + + +@with_seed() +def test_zero_size_tensor_creation(): + assertRaises(MXNetError, mx.nd.zeros, shape=(0, 1, 3, 0)) + assertRaises(MXNetError, mx.nd.ones, shape=(0, 1, 3, 0)) + with mx.enable_np_comp(): + data_mx = mx.nd.ones(shape=(0, 1, 0, 4)) + data_np = np.ones(shape=data_mx.shape, dtype=data_mx.dtype) + assert same(data_mx.asnumpy(), data_np) + + +@with_seed() +def test_concat_with_zero_size_tensor(): + with mx.enable_np_comp(): + data1 = mx.nd.ones((0, 8, 12)) + data2 = mx.nd.ones((3, 8, 12)) + data3 = mx.nd.ones((0, 8, 12)) + ret = mx.nd.Concat(data1, data2, data3, dim=0) + assert ret.shape == (3, 8, 12) + + data1 = mx.nd.ones((0, 3, 10)) + data2 = mx.nd.ones((0, 4, 10)) + data3 = mx.nd.ones((0, 5, 10)) + ret = mx.nd.Concat(data1, data2, data3, dim=1) + assert ret.shape == (0, 12, 10) + + if __name__ == '__main__': import nose nose.runmodule() From 7a83953eef93902a397937b3cb7ff6d8b516fd1d Mon Sep 17 00:00:00 2001 From: Yizhi Liu Date: Tue, 9 Apr 2019 19:59:09 -0700 Subject: [PATCH 18/32] fix invalid ndarray dispose (#14657) --- .../core/src/main/scala/org/apache/mxnet/Executor.scala | 9 ++++++--- src/common/utils.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala b/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala index f51424b7edf6..b0fae0f9d58d 100644 --- a/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala +++ b/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala @@ -61,6 +61,8 @@ class Executor private[mxnet](private[mxnet] val handle: ExecutorHandle, protected var monitorCallback: MXMonitorCallback = null private val logger: Logger = LoggerFactory.getLogger(classOf[Executor]) + private var reshaped = false + override def nativeAddress: CPtrAddress = handle override def nativeDeAllocator: (CPtrAddress => Int) = _LIB.mxExecutorFree // cannot determine the off-heap size of this object @@ -71,12 +73,12 @@ class Executor private[mxnet](private[mxnet] val handle: ExecutorHandle, if (!super.isDisposed) { super.dispose() outputs.foreach(o => o.dispose()) - if (argArrays != null) {argArrays.foreach(a => a.dispose())} - if (gradArrays != null) {gradArrays.foreach( + if (reshaped && argArrays != null) {argArrays.foreach(a => a.dispose())} + if (reshaped && gradArrays != null) {gradArrays.foreach( // Symbol will sometimes fill this with nulls so we've got to check the elements too a => if (a != null) {a.dispose()}) } - if (auxArrays != null) {auxArrays.foreach(a => a.dispose())} + if (reshaped && auxArrays != null) {auxArrays.foreach(a => a.dispose())} } } @@ -146,6 +148,7 @@ class Executor private[mxnet](private[mxnet] val handle: ExecutorHandle, executor.argArrays = argArrays executor.gradArrays = gradArrays executor.auxArrays = auxArrays + executor.reshaped = true executor } diff --git a/src/common/utils.h b/src/common/utils.h index 6cdb869ff9ae..251a8fe3c190 100644 --- a/src/common/utils.h +++ b/src/common/utils.h @@ -654,7 +654,7 @@ FCompType GetFCompute(const nnvm::Op* op, const std::string& name, } else if (ctx.dev_mask() == gpu::kDevMask) { return fcompute_gpu.get(op, nullptr); } else { - LOG(FATAL) << "Unknown device mask"; + LOG(FATAL) << "Unknown device mask " << ctx.dev_mask(); return nullptr; } } From 3eab56abc9890ecb5d104b58527885e5d81fcfac Mon Sep 17 00:00:00 2001 From: Sergey Kolychev Date: Tue, 9 Apr 2019 23:19:01 -0700 Subject: [PATCH 19/32] swig fixes for the changes in c_api.h (#14655) --- perl-package/AI-MXNetCAPI/mxnet_typemaps.i | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/perl-package/AI-MXNetCAPI/mxnet_typemaps.i b/perl-package/AI-MXNetCAPI/mxnet_typemaps.i index 50296c2aaba5..3ec9f95ea9c3 100644 --- a/perl-package/AI-MXNetCAPI/mxnet_typemaps.i +++ b/perl-package/AI-MXNetCAPI/mxnet_typemaps.i @@ -524,13 +524,13 @@ } } -%typemap(in,numinputs=0) (mx_uint *out_dim, const mx_uint **out_pdata) (mx_uint temp_dim, mx_uint *temp_pdata) +%typemap(in,numinputs=0) (int *out_dim, const int **out_pdata) (int temp_dim, int *temp_pdata) { $1 = &temp_dim; $2 = &temp_pdata; } -%typemap(argout) (mx_uint *out_dim, const mx_uint **out_pdata) +%typemap(argout) (int *out_dim, const int **out_pdata) { if(!result) { @@ -956,12 +956,12 @@ } } -%typemap(in,numinputs=0) (mx_uint *in_shape_size, const mx_uint **in_shape_ndim, const mx_uint ***in_shape_data) - (mx_uint temp1, mx_uint *temp2, mx_uint **temp3), - (mx_uint *out_shape_size, const mx_uint **out_shape_ndim, const mx_uint ***out_shape_data) - (mx_uint temp1, mx_uint *temp2, mx_uint **temp3), - (mx_uint *aux_shape_size, const mx_uint **aux_shape_ndim, const mx_uint ***aux_shape_data) - (mx_uint temp1, mx_uint *temp2, mx_uint **temp3) +%typemap(in,numinputs=0) (mx_uint *in_shape_size, const int **in_shape_ndim, const int ***in_shape_data) + (mx_uint temp1, int *temp2, int **temp3), + (mx_uint *out_shape_size, const int **out_shape_ndim, const int ***out_shape_data) + (mx_uint temp1, int *temp2, int **temp3), + (mx_uint *aux_shape_size, const int **aux_shape_ndim, const int ***aux_shape_data) + (mx_uint temp1, int *temp2, int **temp3) { $1 = &temp1; $2 = &temp2; @@ -969,9 +969,9 @@ *$1 = 0; } -%typemap(argout) (mx_uint *in_shape_size, const mx_uint **in_shape_ndim, const mx_uint ***in_shape_data), - (mx_uint *out_shape_size, const mx_uint **out_shape_ndim, const mx_uint ***out_shape_data), - (mx_uint *aux_shape_size, const mx_uint **aux_shape_ndim, const mx_uint ***aux_shape_data) +%typemap(argout) (mx_uint *in_shape_size, const int **in_shape_ndim, const int ***in_shape_data), + (mx_uint *out_shape_size, const int **out_shape_ndim, const int ***out_shape_data), + (mx_uint *aux_shape_size, const int **aux_shape_ndim, const int ***aux_shape_data) { if(!result && *arg15) { From 4b2244aec99912bde5fe5ed6a7370065970c4d4f Mon Sep 17 00:00:00 2001 From: reminisce Date: Wed, 10 Apr 2019 12:48:32 -0700 Subject: [PATCH 20/32] Rename np_comp to np_compat for readability --- python/mxnet/base.py | 26 +++++++++++------------ python/mxnet/symbol/symbol.py | 4 ++-- tests/python/gpu/test_operator_gpu.py | 4 ++-- tests/python/unittest/test_infer_shape.py | 2 +- tests/python/unittest/test_ndarray.py | 2 +- tests/python/unittest/test_operator.py | 12 +++++------ 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 916e74182f94..0532430ef031 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -736,7 +736,7 @@ def write_all_str(module_file, module_all_list): ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.c_void_p -def set_np_comp(flag): +def set_np_compat(flag): """ Turns on/off NumPy compatibility. NumPy-compatibility is turned off by default in backend. @@ -754,7 +754,7 @@ def set_np_comp(flag): return bool(prev.value) -def is_np_comp(): +def is_np_compat(): """ Checks whether the NumPy compatibility is currently turned on. NumPy-compatibility is turned off by default in backend. @@ -778,26 +778,26 @@ class _NumpyCompatibilityStateScope(object): backward([y]) """ - def __init__(self, is_np_comp): #pylint: disable=redefined-outer-name - self._enter_is_np_comp = is_np_comp - self._prev_is_np_comp = None + def __init__(self, is_np_compat): #pylint: disable=redefined-outer-name + self._enter_is_np_compat = is_np_compat + self._prev_is_np_compat = None def __enter__(self): - if self._enter_is_np_comp is not None: - self._prev_is_np_comp = set_np_comp(self._enter_is_np_comp) + if self._enter_is_np_compat is not None: + self._prev_is_np_compat = set_np_compat(self._enter_is_np_compat) def __exit__(self, ptype, value, trace): - if self._enter_is_np_comp is not None and self._prev_is_np_comp != self._enter_is_np_comp: - set_np_comp(self._prev_is_np_comp) + if self._enter_is_np_compat is not None and self._prev_is_np_compat != self._enter_is_np_compat: + set_np_compat(self._prev_is_np_compat) -def enable_np_comp(): +def enable_np_compat(): """Returns a NumPy compatibility state scope to be used in 'with' statement and captures code that needs the compatibility. Example:: - with mx.enable_np_comp(): + with mx.enable_np_compat(): # A scalar tensor's shape is `()`, whose `ndim` is `0`. scalar = mx.nd.ones(shape=()) assert scalar.shape == () @@ -827,13 +827,13 @@ def enable_np_comp(): return _NumpyCompatibilityStateScope(True) -def disable_np_comp(): +def disable_np_compat(): """Returns a state scope with NumPy-compatibility disabled to be used in 'with' statement and captures code that does not need the compatibility. Example:: - with mx.disable_np_comp(): + with mx.disable_np_compat(): # 0 means unknown shape dimension size in the legacy shape definition. data = mx.sym.var("data", shape=(0, 2, 3)) ret = mx.sym.sin(data) diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py index 2de4954cae81..a187f67d608a 100644 --- a/python/mxnet/symbol/symbol.py +++ b/python/mxnet/symbol/symbol.py @@ -34,7 +34,7 @@ from ..attribute import AttrScope from ..base import _LIB, numeric_types, c_array, c_array_buf, c_str, c_str_array, c_handle_array -from ..base import mx_uint, py_str, string_types, integer_types, mx_int, is_np_comp +from ..base import mx_uint, py_str, string_types, integer_types, mx_int, is_np_compat from ..base import NDArrayHandle, ExecutorHandle, SymbolHandle from ..base import check_call, MXNetError, NotImplementedForSymbol from ..context import Context, current_context @@ -1078,7 +1078,7 @@ def infer_shape(self, *args, **kwargs): arg_names = self.list_arguments() unknowns = [] for name, shape in zip(arg_names, arg_shapes): - if is_np_comp(): + if is_np_compat(): shape_is_none = not shape or -1 in shape else: shape_is_none = not shape or 0 in shape diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index f8ebe9517bac..cb1248e46a85 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -1963,14 +1963,14 @@ def check_proposal_consistency(op, batch_size, with_nms=False): # The following 2 functions launch 0-thread kernels, an error that should be caught and signaled. def kernel_error_check_imperative(): os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine' - with mx.enable_np_comp(): + with mx.enable_np_compat(): a = mx.nd.array([1,2,3],ctx=mx.gpu(0)) b = mx.nd.array([],ctx=mx.gpu(0)) c = (a / b).asnumpy() def kernel_error_check_symbolic(): os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine' - with mx.enable_np_comp(): + with mx.enable_np_compat(): a = mx.sym.Variable('a') b = mx.sym.Variable('b') c = a / b diff --git a/tests/python/unittest/test_infer_shape.py b/tests/python/unittest/test_infer_shape.py index e0b4d35ea9aa..e4a3e5a3d86f 100644 --- a/tests/python/unittest/test_infer_shape.py +++ b/tests/python/unittest/test_infer_shape.py @@ -154,7 +154,7 @@ def test_shape_completely_unknown(): assert arg_shapes[0] == () assert out_shapes[0] == () - with mx.enable_np_comp(): + with mx.enable_np_compat(): data = mx.sym.var("data") ret = mx.sym.sin(data) arg_shapes, out_shapes, _ = ret.infer_shape_partial() diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py index f8f52c2cee24..fb0cac737ee0 100644 --- a/tests/python/unittest/test_ndarray.py +++ b/tests/python/unittest/test_ndarray.py @@ -123,7 +123,7 @@ def test_ndarray_setitem(): # numpy assignment for empty axis for trivial_shape in [(), (1,), (1, 1), (1, 1, 1)]: if trivial_shape == tuple(): - with mx.enable_np_comp(): + with mx.enable_np_compat(): x = mx.nd.zeros(trivial_shape) else: x = mx.nd.zeros(trivial_shape) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index b82b0e9b5746..c5523c5b2268 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -4403,7 +4403,7 @@ def test_invalid_reps(): assert_exception(mx.nd.tile, MXNetError, data, (1, 0, 3)) test_normal_case() - with mx.enable_np_comp(): + with mx.enable_np_compat(): test_empty_tensor() test_empty_reps() test_tile_backward() @@ -4464,7 +4464,7 @@ def test_zero_depth(): test_normal_case(index_type=np.float64) test_normal_case(index_type=np.float32) test_normal_case(index_type=np.float16) - with mx.enable_np_comp(): + with mx.enable_np_compat(): test_empty_indices() test_zero_depth() @@ -6850,7 +6850,7 @@ def check_slice_axis_partial_infer(data, axis, begin, end, expected_out_shape): check_slice_axis_partial_infer(var1, 0, 0, 5, (5, 0)) check_slice_axis_partial_infer(var1, 1, 0, 5, (10, 0)) - with mx.enable_np_comp(): + with mx.enable_np_compat(): var1 = mx.sym.var(name="data", shape=(-1, 20)) check_slice_partial_infer(var1, (None, None), (None, 10), [], (-1, 10)) check_slice_partial_infer(var1, (None, None), (None, 10), (None, 2), (-1, 5)) @@ -7916,7 +7916,7 @@ def test_image_normalize(): def test_scalar_tensor_creation(): assertRaises(MXNetError, mx.nd.zeros, shape=()) assertRaises(MXNetError, mx.nd.ones, shape=()) - with mx.enable_np_comp(): + with mx.enable_np_compat(): data_mx = mx.nd.ones(shape=()) data_np = np.ones((), dtype=data_mx.dtype) assert same(data_mx.asnumpy(), data_np) @@ -7926,7 +7926,7 @@ def test_scalar_tensor_creation(): def test_zero_size_tensor_creation(): assertRaises(MXNetError, mx.nd.zeros, shape=(0, 1, 3, 0)) assertRaises(MXNetError, mx.nd.ones, shape=(0, 1, 3, 0)) - with mx.enable_np_comp(): + with mx.enable_np_compat(): data_mx = mx.nd.ones(shape=(0, 1, 0, 4)) data_np = np.ones(shape=data_mx.shape, dtype=data_mx.dtype) assert same(data_mx.asnumpy(), data_np) @@ -7934,7 +7934,7 @@ def test_zero_size_tensor_creation(): @with_seed() def test_concat_with_zero_size_tensor(): - with mx.enable_np_comp(): + with mx.enable_np_compat(): data1 = mx.nd.ones((0, 8, 12)) data2 = mx.nd.ones((3, 8, 12)) data3 = mx.nd.ones((0, 8, 12)) From 04ad0871b74c86c6efc1045d92194115b9c6813c Mon Sep 17 00:00:00 2001 From: reminisce Date: Wed, 10 Apr 2019 12:52:09 -0700 Subject: [PATCH 21/32] Fix import error --- python/mxnet/__init__.py | 2 +- python/mxnet/base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index 5f4f9b393e41..99f565ad309a 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -23,7 +23,7 @@ from .context import Context, current_context, cpu, gpu, cpu_pinned from . import engine -from .base import MXNetError, is_np_comp, set_np_comp, enable_np_comp, disable_np_comp +from .base import MXNetError, is_np_compat, set_np_compat, enable_np_compat, disable_np_compat from . import base from . import contrib from . import ndarray diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 0532430ef031..b056b73d308e 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -30,7 +30,7 @@ from . import libinfo -__all__ = ['MXNetError'] +__all__ = ['MXNetError', 'is_np_compat', 'set_np_compat', 'enable_np_compat', 'disable_np_compat'] #---------------------------- # library loading #---------------------------- From 8a023eb440adefab718e6aeca10cc1f54a4e5b48 Mon Sep 17 00:00:00 2001 From: reminisce Date: Thu, 11 Apr 2019 15:41:10 -0700 Subject: [PATCH 22/32] Keep old c apis unchanged --- R-package/src/ndarray.cc | 2 +- cpp-package/include/mxnet-cpp/ndarray.hpp | 2 +- cpp-package/include/mxnet-cpp/symbol.hpp | 12 +- include/mxnet/c_api.h | 235 ++++++++- perl-package/AI-MXNetCAPI/mxnet.i | 168 +++---- python/mxnet/executor.py | 46 +- python/mxnet/ndarray/ndarray.py | 4 +- python/mxnet/symbol/symbol.py | 76 +-- .../native/org_apache_mxnet_native_c_api.cc | 100 ++-- src/c_api/c_api.cc | 36 +- src/c_api/c_api_common.h | 31 +- src/c_api/c_api_executor.cc | 457 +++++++++++++++++- src/c_api/c_api_symbolic.cc | 126 ++++- 13 files changed, 1039 insertions(+), 256 deletions(-) diff --git a/R-package/src/ndarray.cc b/R-package/src/ndarray.cc index d08671a3f026..0409d3ba8887 100644 --- a/R-package/src/ndarray.cc +++ b/R-package/src/ndarray.cc @@ -181,7 +181,7 @@ Rcpp::RObject NDArrayPacker::CreateNDArrayPacker() { Rcpp::Dimension NDArray::dim() const { int ndim; const int *pshape; - MX_CALL(MXNDArrayGetShape( + MX_CALL(MXNDArrayGetShapeEx( ptr_->handle, &ndim, &pshape)); Rcpp::IntegerVector dat(pshape, pshape + ndim); std::reverse(dat.begin(), dat.end()); diff --git a/cpp-package/include/mxnet-cpp/ndarray.hpp b/cpp-package/include/mxnet-cpp/ndarray.hpp index bf1d82ca33b4..d0438305a62e 100644 --- a/cpp-package/include/mxnet-cpp/ndarray.hpp +++ b/cpp-package/include/mxnet-cpp/ndarray.hpp @@ -399,7 +399,7 @@ inline size_t NDArray::Size() const { inline std::vector NDArray::GetShape() const { const int *out_pdata; int out_dim; - MXNDArrayGetShape(blob_ptr_->handle_, &out_dim, &out_pdata); + MXNDArrayGetShapeEx(blob_ptr_->handle_, &out_dim, &out_pdata); std::vector ret; for (int i = 0; i < out_dim; ++i) { ret.push_back(out_pdata[i]); diff --git a/cpp-package/include/mxnet-cpp/symbol.hpp b/cpp-package/include/mxnet-cpp/symbol.hpp index d82b7abaf614..7bdbe52ddc00 100644 --- a/cpp-package/include/mxnet-cpp/symbol.hpp +++ b/cpp-package/include/mxnet-cpp/symbol.hpp @@ -210,12 +210,12 @@ inline void Symbol::InferShape( const int **aux_shape_data; int complete; - CHECK_EQ(MXSymbolInferShape(GetHandle(), keys.size(), keys.data(), - arg_ind_ptr.data(), arg_shape_data.data(), - &in_shape_size, &in_shape_ndim, &in_shape_data, - &out_shape_size, &out_shape_ndim, &out_shape_data, - &aux_shape_size, &aux_shape_ndim, &aux_shape_data, - &complete), + CHECK_EQ(MXSymbolInferShapeEx(GetHandle(), keys.size(), keys.data(), + arg_ind_ptr.data(), arg_shape_data.data(), + &in_shape_size, &in_shape_ndim, &in_shape_data, + &out_shape_size, &out_shape_ndim, &out_shape_data, + &aux_shape_size, &aux_shape_ndim, &aux_shape_data, + &complete), 0); if (complete) { diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 102e4b2c14a2..7907265a68a9 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -768,15 +768,26 @@ MXNET_DLL int MXNDArrayReshape64(NDArrayHandle handle, bool reverse, NDArrayHandle *out); /*! - * \brief get the shape of the array + * \brief DEPRECATED. Use MXNDArrayGetShapeEx instead. + * get the shape of the array * \param handle the handle to the narray * \param out_dim the output dimension * \param out_pdata pointer holder to get data pointer of the shape * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXNDArrayGetShape(NDArrayHandle handle, - int *out_dim, - const int **out_pdata); + mx_uint *out_dim, + const mx_uint **out_pdata); +/*! + * \brief get the shape of the array + * \param handle the handle to the narray + * \param out_dim the output dimension + * \param out_pdata pointer holder to get data pointer of the shape + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXNDArrayGetShapeEx(NDArrayHandle handle, + int *out_dim, + const int **out_pdata); /*! * \brief get the content of the data in NDArray * \param handle the handle to the ndarray @@ -1481,7 +1492,8 @@ MXNET_DLL int MXSymbolGrad(SymbolHandle sym, const char** wrt, SymbolHandle* out); /*! - * \brief infer shape of unknown input shapes given the known one. + * \brief DEPRECATED. Use MXSymbolInferShapeEx instead. + * infer shape of unknown input shapes given the known one. * The shapes are packed into a CSR matrix represented by arg_ind_ptr and arg_shape_data * The call will be treated as a kwargs call if key != nullptr or num_args==0, otherwise it is positional. * @@ -1506,19 +1518,58 @@ MXNET_DLL int MXSymbolInferShape(SymbolHandle sym, mx_uint num_args, const char** keys, const mx_uint *arg_ind_ptr, - const int *arg_shape_data, + const mx_uint *arg_shape_data, mx_uint *in_shape_size, - const int **in_shape_ndim, - const int ***in_shape_data, + const mx_uint **in_shape_ndim, + const mx_uint ***in_shape_data, mx_uint *out_shape_size, - const int **out_shape_ndim, - const int ***out_shape_data, + const mx_uint **out_shape_ndim, + const mx_uint ***out_shape_data, mx_uint *aux_shape_size, - const int **aux_shape_ndim, - const int ***aux_shape_data, + const mx_uint **aux_shape_ndim, + const mx_uint ***aux_shape_data, int *complete); + /*! - * \brief partially infer shape of unknown input shapes given the known one. + * \brief infer shape of unknown input shapes given the known one. + * The shapes are packed into a CSR matrix represented by arg_ind_ptr and arg_shape_data + * The call will be treated as a kwargs call if key != nullptr or num_args==0, otherwise it is positional. + * + * \param sym symbol handle + * \param num_args numbe of input arguments. + * \param keys the key of keyword args (optional) + * \param arg_ind_ptr the head pointer of the rows in CSR + * \param arg_shape_data the content of the CSR + * \param in_shape_size sizeof the returning array of in_shapes + * \param in_shape_ndim returning array of shape dimensions of eachs input shape. + * \param in_shape_data returning array of pointers to head of the input shape. + * \param out_shape_size sizeof the returning array of out_shapes + * \param out_shape_ndim returning array of shape dimensions of eachs input shape. + * \param out_shape_data returning array of pointers to head of the input shape. + * \param aux_shape_size sizeof the returning array of aux_shapes + * \param aux_shape_ndim returning array of shape dimensions of eachs auxiliary shape. + * \param aux_shape_data returning array of pointers to head of the auxiliary shape. + * \param complete whether infer shape completes or more information is needed. + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXSymbolInferShapeEx(SymbolHandle sym, + mx_uint num_args, + const char** keys, + const mx_uint *arg_ind_ptr, + const int *arg_shape_data, + mx_uint *in_shape_size, + const int **in_shape_ndim, + const int ***in_shape_data, + mx_uint *out_shape_size, + const int **out_shape_ndim, + const int ***out_shape_data, + mx_uint *aux_shape_size, + const int **aux_shape_ndim, + const int ***aux_shape_data, + int *complete); +/*! + * \brief DEPRECATED. Use MXSymbolInferShapePartialEx instead. + * partially infer shape of unknown input shapes given the known one. * * Return partially inferred results if not all shapes could be inferred. * The shapes are packed into a CSR matrix represented by arg_ind_ptr and arg_shape_data @@ -1545,18 +1596,59 @@ MXNET_DLL int MXSymbolInferShapePartial(SymbolHandle sym, mx_uint num_args, const char** keys, const mx_uint *arg_ind_ptr, - const int *arg_shape_data, + const mx_uint *arg_shape_data, mx_uint *in_shape_size, - const int **in_shape_ndim, - const int ***in_shape_data, + const mx_uint **in_shape_ndim, + const mx_uint ***in_shape_data, mx_uint *out_shape_size, - const int **out_shape_ndim, - const int ***out_shape_data, + const mx_uint **out_shape_ndim, + const mx_uint ***out_shape_data, mx_uint *aux_shape_size, - const int **aux_shape_ndim, - const int ***aux_shape_data, + const mx_uint **aux_shape_ndim, + const mx_uint ***aux_shape_data, int *complete); + +/*! + * \brief partially infer shape of unknown input shapes given the known one. + * + * Return partially inferred results if not all shapes could be inferred. + * The shapes are packed into a CSR matrix represented by arg_ind_ptr and arg_shape_data + * The call will be treated as a kwargs call if key != nullptr or num_args==0, otherwise it is positional. + * + * \param sym symbol handle + * \param num_args numbe of input arguments. + * \param keys the key of keyword args (optional) + * \param arg_ind_ptr the head pointer of the rows in CSR + * \param arg_shape_data the content of the CSR + * \param in_shape_size sizeof the returning array of in_shapes + * \param in_shape_ndim returning array of shape dimensions of eachs input shape. + * \param in_shape_data returning array of pointers to head of the input shape. + * \param out_shape_size sizeof the returning array of out_shapes + * \param out_shape_ndim returning array of shape dimensions of eachs input shape. + * \param out_shape_data returning array of pointers to head of the input shape. + * \param aux_shape_size sizeof the returning array of aux_shapes + * \param aux_shape_ndim returning array of shape dimensions of eachs auxiliary shape. + * \param aux_shape_data returning array of pointers to head of the auxiliary shape. + * \param complete whether infer shape completes or more information is needed. + * \return 0 when success, -1 when failure happens + */ +MXNET_DLL int MXSymbolInferShapePartialEx(SymbolHandle sym, + mx_uint num_args, + const char** keys, + const mx_uint *arg_ind_ptr, + const int *arg_shape_data, + mx_uint *in_shape_size, + const int **in_shape_ndim, + const int ***in_shape_data, + mx_uint *out_shape_size, + const int **out_shape_ndim, + const int ***out_shape_data, + mx_uint *aux_shape_size, + const int **aux_shape_ndim, + const int ***aux_shape_data, + int *complete); + /*! * \brief infer type of unknown input types given the known one. * The types are packed into a CSR matrix represented by arg_ind_ptr and arg_type_data @@ -1820,7 +1912,8 @@ MXNET_DLL int MXExecutorBindEX(SymbolHandle symbol_handle, NDArrayHandle *aux_states, ExecutorHandle shared_exec, ExecutorHandle *out); - +/*! \brief DEPRECATED. Use MXExecutorSimpleBindEx instead. + */ MXNET_DLL int MXExecutorSimpleBind(SymbolHandle symbol_handle, int dev_type, int dev_id, @@ -1833,7 +1926,7 @@ MXNET_DLL int MXExecutorSimpleBind(SymbolHandle symbol_handle, const char** provided_grad_req_types, const mx_uint num_provided_arg_shapes, const char** provided_arg_shape_names, - const int* provided_arg_shape_data, + const mx_uint* provided_arg_shape_data, const mx_uint* provided_arg_shape_idx, const mx_uint num_provided_arg_dtypes, const char** provided_arg_dtype_names, @@ -1856,8 +1949,44 @@ MXNET_DLL int MXExecutorSimpleBind(SymbolHandle symbol_handle, ExecutorHandle shared_exec_handle, ExecutorHandle* out); -/*! - * \brief Return a new executor with the same symbol and shared memory, + +MXNET_DLL int MXExecutorSimpleBindEx(SymbolHandle symbol_handle, + int dev_type, + int dev_id, + const mx_uint num_g2c_keys, + const char** g2c_keys, + const int* g2c_dev_types, + const int* g2c_dev_ids, + const mx_uint provided_grad_req_list_len, + const char** provided_grad_req_names, + const char** provided_grad_req_types, + const mx_uint num_provided_arg_shapes, + const char** provided_arg_shape_names, + const int* provided_arg_shape_data, + const mx_uint* provided_arg_shape_idx, + const mx_uint num_provided_arg_dtypes, + const char** provided_arg_dtype_names, + const int* provided_arg_dtypes, + const mx_uint num_provided_arg_stypes, + const char** provided_arg_stype_names, + const int* provided_arg_stypes, + const mx_uint num_shared_arg_names, + const char** shared_arg_name_list, + int* shared_buffer_len, + const char** shared_buffer_name_list, + NDArrayHandle* shared_buffer_handle_list, + const char*** updated_shared_buffer_name_list, + NDArrayHandle** updated_shared_buffer_handle_list, + mx_uint* num_in_args, + NDArrayHandle** in_args, + NDArrayHandle** arg_grads, + mx_uint* num_aux_states, + NDArrayHandle** aux_states, + ExecutorHandle shared_exec_handle, + ExecutorHandle* out); +/*! + * \brief DEPRECATED. Use MXExecutorReshapeEx instead. + * Return a new executor with the same symbol and shared memory, * but different input/output shapes. * * \param partial_shaping Whether to allow changing the shape of unspecified arguments. @@ -1887,7 +2016,7 @@ MXNET_DLL int MXExecutorReshape(int partial_shaping, const int* map_dev_ids, const mx_uint num_provided_arg_shapes, const char** provided_arg_shape_names, - const int* provided_arg_shape_data, + const mx_uint* provided_arg_shape_data, const mx_uint* provided_arg_shape_idx, mx_uint* num_in_args, NDArrayHandle** in_args, @@ -1896,6 +2025,46 @@ MXNET_DLL int MXExecutorReshape(int partial_shaping, NDArrayHandle** aux_states, ExecutorHandle shared_exec, ExecutorHandle *out); +/*! + * \brief Return a new executor with the same symbol and shared memory, + * but different input/output shapes. + * + * \param partial_shaping Whether to allow changing the shape of unspecified arguments. + * \param allow_up_sizing Whether to allow allocating new ndarrays that's larger than the original. + * \param dev_type device type of default context + * \param dev_id device id of default context + * \param num_map_keys size of group2ctx map + * \param map_keys keys of group2ctx map + * \param map_dev_types device type of group2ctx map + * \param map_dev_ids device id of group2ctx map + * \param num_in_args length of in_args + * \param in_args in args array + * \param arg_grads arg grads handle array + * \param num_aux_states length of auxiliary states + * \param aux_states auxiliary states array + * \param shared_exec input executor handle for memory sharing + * \param out output executor handle + * \return a new executor + */ +MXNET_DLL int MXExecutorReshapeEx(int partial_shaping, + int allow_up_sizing, + int dev_type, + int dev_id, + mx_uint num_map_keys, + const char** map_keys, + const int* map_dev_types, + const int* map_dev_ids, + const mx_uint num_provided_arg_shapes, + const char** provided_arg_shape_names, + const int* provided_arg_shape_data, + const mx_uint* provided_arg_shape_idx, + mx_uint* num_in_args, + NDArrayHandle** in_args, + NDArrayHandle** arg_grads, + mx_uint* num_aux_states, + NDArrayHandle** aux_states, + ExecutorHandle shared_exec, + ExecutorHandle *out); /*! * \brief get optimized graph from graph executor @@ -2554,6 +2723,20 @@ MXNET_DLL int MXRtcCudaKernelCall(CudaKernelHandle handle, int dev_id, void** ar */ MXNET_DLL int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid, int* shared_id); +/*! + * \brief DEPRECATED. Use MXNDArrayCreateFromSharedMemEx instead. + * Reconstruct NDArray from shared memory handle + * \param shared_pid shared PID + * \param shared_id shared memory id + * \param shape pointer to NDArray dimensions + * \param ndim number of NDArray dimensions + * \param dtype data type of NDArray + * \param out constructed NDArray + */ +MXNET_DLL int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const mx_uint *shape, + mx_uint ndim, int dtype, NDArrayHandle *out); + + /*! * \brief Reconstruct NDArray from shared memory handle * \param shared_pid shared PID @@ -2563,8 +2746,8 @@ MXNET_DLL int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid, * \param dtype data type of NDArray * \param out constructed NDArray */ -MXNET_DLL int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const int *shape, - int ndim, int dtype, NDArrayHandle *out); +MXNET_DLL int MXNDArrayCreateFromSharedMemEx(int shared_pid, int shared_id, const int *shape, + int ndim, int dtype, NDArrayHandle *out); /*! * \brief Push an asynchronous operation to the engine. diff --git a/perl-package/AI-MXNetCAPI/mxnet.i b/perl-package/AI-MXNetCAPI/mxnet.i index 0ecf5b3a9cc3..e38402c56100 100644 --- a/perl-package/AI-MXNetCAPI/mxnet.i +++ b/perl-package/AI-MXNetCAPI/mxnet.i @@ -640,7 +640,7 @@ int MXNDArrayReshape64(NDArrayHandle handle, * \param out_pdata pointer holder to get data pointer of the shape * \return 0 when success, -1 when failure happens */ -int MXNDArrayGetShape(NDArrayHandle handle, +int MXNDArrayGetShapeEx(NDArrayHandle handle, int *out_dim, const int **out_pdata); /*! @@ -1289,21 +1289,21 @@ int MXSymbolGrad(SymbolHandle sym, * \param complete whether infer shape completes or more information is needed. * \return 0 when success, -1 when failure happens */ -int MXSymbolInferShape(SymbolHandle sym, - mx_uint num_args, - const char** in, - const mx_uint *in, - const int *in, - mx_uint *in_shape_size, - const int **in_shape_ndim, - const int ***in_shape_data, - mx_uint *out_shape_size, - const int **out_shape_ndim, - const int ***out_shape_data, - mx_uint *aux_shape_size, - const int **aux_shape_ndim, - const int ***aux_shape_data, - int *out); +int MXSymbolInferShapeEx(SymbolHandle sym, + mx_uint num_args, + const char** in, + const mx_uint *in, + const int *in, + mx_uint *in_shape_size, + const int **in_shape_ndim, + const int ***in_shape_data, + mx_uint *out_shape_size, + const int **out_shape_ndim, + const int ***out_shape_data, + mx_uint *aux_shape_size, + const int **aux_shape_ndim, + const int ***aux_shape_data, + int *out); /*! * \brief partially infer shape of unknown input shapes given the known one. * @@ -1328,21 +1328,21 @@ int MXSymbolInferShape(SymbolHandle sym, * \param complete whether infer shape completes or more information is needed. * \return 0 when success, -1 when failure happens */ -int MXSymbolInferShapePartial(SymbolHandle sym, - mx_uint num_args, - const char** in, - const mx_uint *in, - const int *in, - mx_uint *in_shape_size, - const int **in_shape_ndim, - const int ***in_shape_data, - mx_uint *out_shape_size, - const int **out_shape_ndim, - const int ***out_shape_data, - mx_uint *aux_shape_size, - const int **aux_shape_ndim, - const int ***aux_shape_data, - int *out); +int MXSymbolInferShapePartialEx(SymbolHandle sym, + mx_uint num_args, + const char** in, + const mx_uint *in, + const int *in, + mx_uint *in_shape_size, + const int **in_shape_ndim, + const int ***in_shape_data, + mx_uint *out_shape_size, + const int **out_shape_ndim, + const int ***out_shape_data, + mx_uint *aux_shape_size, + const int **aux_shape_ndim, + const int ***aux_shape_data, + int *out); /*! * \brief infer type of unknown input types given the known one. @@ -1535,40 +1535,40 @@ int MXExecutorBindEX(SymbolHandle symbol_handle, ExecutorHandle shared_exec, ExecutorHandle *out); -int MXExecutorSimpleBind(SymbolHandle symbol_handle, - int dev_type, - int dev_id, - const mx_uint num_g2c_keys, - const char** in, // g2c_keys, - const int* in, // g2c_dev_types, - const int* in, // g2c_dev_ids, - const mx_uint provided_grad_req_list_len, - const char** in, // provided_grad_req_names, - const char** in, // provided_grad_req_types, - const mx_uint num_provided_arg_shapes, - const char** in, // provided_arg_shape_names, - const int* in, // provided_arg_shape_data, - const mx_uint* in, // provided_arg_shape_idx, - const mx_uint num_provided_arg_dtypes, - const char** in, // provided_arg_dtype_names, - const int* in, // provided_arg_dtypes, - const mx_uint num_provided_arg_stypes, - const char** in, // provided_arg_stype_names, - const int* in, // provided_arg_stypes, - const mx_uint num_shared_arg_names, - const char** in, // shared_arg_name_list, - int* shared_buffer_len, - const char** shared_buffer_name_list, - NDArrayHandle* shared_buffer_handle_list, - const char*** updated_shared_buffer_name_list, - NDArrayHandle** updated_shared_buffer_handle_list, - mx_uint* num_in_args, - NDArrayHandle** in_args, - NDArrayHandle** arg_grads, - mx_uint* num_aux_states, - NDArrayHandle** aux_states, - ExecutorHandle shared_exec_handle, - ExecutorHandle* out +int MXExecutorSimpleBindEx(SymbolHandle symbol_handle, + int dev_type, + int dev_id, + const mx_uint num_g2c_keys, + const char** in, // g2c_keys, + const int* in, // g2c_dev_types, + const int* in, // g2c_dev_ids, + const mx_uint provided_grad_req_list_len, + const char** in, // provided_grad_req_names, + const char** in, // provided_grad_req_types, + const mx_uint num_provided_arg_shapes, + const char** in, // provided_arg_shape_names, + const int* in, // provided_arg_shape_data, + const mx_uint* in, // provided_arg_shape_idx, + const mx_uint num_provided_arg_dtypes, + const char** in, // provided_arg_dtype_names, + const int* in, // provided_arg_dtypes, + const mx_uint num_provided_arg_stypes, + const char** in, // provided_arg_stype_names, + const int* in, // provided_arg_stypes, + const mx_uint num_shared_arg_names, + const char** in, // shared_arg_name_list, + int* shared_buffer_len, + const char** shared_buffer_name_list, + NDArrayHandle* shared_buffer_handle_list, + const char*** updated_shared_buffer_name_list, + NDArrayHandle** updated_shared_buffer_handle_list, + mx_uint* num_in_args, + NDArrayHandle** in_args, + NDArrayHandle** arg_grads, + mx_uint* num_aux_states, + NDArrayHandle** aux_states, + ExecutorHandle shared_exec_handle, + ExecutorHandle* out ); /*! @@ -1592,25 +1592,25 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle, * \param out output executor handle * \return a new executor */ -int MXExecutorReshape(int partial_shaping, - int allow_up_sizing, - int dev_type, - int dev_id, - mx_uint num_map_keys, - const char** in, - const int* in, - const int* in, - const mx_uint num_provided_arg_shapes, - const char** in, - const int* in, - const mx_uint* in, - mx_uint* couple_out_size, - NDArrayHandle** out_first_array, - NDArrayHandle** out_second_array, - mx_uint* out_size, - NDArrayHandle** out_array, - ExecutorHandle shared_exec, - ExecutorHandle *out); +int MXExecutorReshapeEx(int partial_shaping, + int allow_up_sizing, + int dev_type, + int dev_id, + mx_uint num_map_keys, + const char** in, + const int* in, + const int* in, + const mx_uint num_provided_arg_shapes, + const char** in, + const int* in, + const mx_uint* in, + mx_uint* couple_out_size, + NDArrayHandle** out_first_array, + NDArrayHandle** out_second_array, + mx_uint* out_size, + NDArrayHandle** out_array, + ExecutorHandle shared_exec, + ExecutorHandle *out); /*! * \brief set a call back to notify the completion of operation diff --git a/python/mxnet/executor.py b/python/mxnet/executor.py index 53ddc252d6b5..9dfe63682f86 100644 --- a/python/mxnet/executor.py +++ b/python/mxnet/executor.py @@ -433,29 +433,29 @@ def reshape(self, partial_shaping=False, allow_up_sizing=False, **kwargs): num_aux_states = ctypes.c_uint() aux_state_handles = ctypes.POINTER(NDArrayHandle)() - check_call(_LIB.MXExecutorReshape(ctypes.c_int(int(partial_shaping)), - ctypes.c_int(int(allow_up_sizing)), - ctypes.c_int(self._ctx.device_typeid), - ctypes.c_int(self._ctx.device_id), - mx_uint(len(ctx_map_keys)), - c_str_array(ctx_map_keys), - c_array_buf(ctypes.c_int, - py_array('i', ctx_map_dev_types)), - c_array_buf(ctypes.c_int, - py_array('i', ctx_map_dev_ids)), - mx_uint(len(provided_arg_shape_names)), - c_str_array(provided_arg_shape_names), - c_array_buf(mx_int, - py_array('i', provided_arg_shape_data)), - c_array_buf(mx_uint, - py_array('I', provided_arg_shape_idx)), - ctypes.byref(num_in_args), - ctypes.byref(in_arg_handles), - ctypes.byref(arg_grad_handles), - ctypes.byref(num_aux_states), - ctypes.byref(aux_state_handles), - shared_handle, - ctypes.byref(handle))) + check_call(_LIB.MXExecutorReshapeEx(ctypes.c_int(int(partial_shaping)), + ctypes.c_int(int(allow_up_sizing)), + ctypes.c_int(self._ctx.device_typeid), + ctypes.c_int(self._ctx.device_id), + mx_uint(len(ctx_map_keys)), + c_str_array(ctx_map_keys), + c_array_buf(ctypes.c_int, + py_array('i', ctx_map_dev_types)), + c_array_buf(ctypes.c_int, + py_array('i', ctx_map_dev_ids)), + mx_uint(len(provided_arg_shape_names)), + c_str_array(provided_arg_shape_names), + c_array_buf(mx_int, + py_array('i', provided_arg_shape_data)), + c_array_buf(mx_uint, + py_array('I', provided_arg_shape_idx)), + ctypes.byref(num_in_args), + ctypes.byref(in_arg_handles), + ctypes.byref(arg_grad_handles), + ctypes.byref(num_aux_states), + ctypes.byref(aux_state_handles), + shared_handle, + ctypes.byref(handle))) arg_arrays = [_ndarray_cls(NDArrayHandle(in_arg_handles[i])) for i in range(num_in_args.value)] diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py index 44334fc7f43e..97cfd827c7fe 100644 --- a/python/mxnet/ndarray/ndarray.py +++ b/python/mxnet/ndarray/ndarray.py @@ -143,7 +143,7 @@ def _new_alloc_handle(shape, ctx, delay_alloc, dtype=mx_real_t): def _new_from_shared_mem(shared_pid, shared_id, shape, dtype): hdl = NDArrayHandle() - check_call(_LIB.MXNDArrayCreateFromSharedMem( + check_call(_LIB.MXNDArrayCreateFromSharedMemEx( ctypes.c_int(shared_pid), ctypes.c_int(shared_id), c_array(mx_int, shape), @@ -1847,7 +1847,7 @@ def shape(self): """ ndim = mx_int() pdata = ctypes.POINTER(mx_int)() - check_call(_LIB.MXNDArrayGetShape( + check_call(_LIB.MXNDArrayGetShapeEx( self.handle, ctypes.byref(ndim), ctypes.byref(pdata))) if ndim.value == -1: return None diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py index a187f67d608a..4bf60a6a1fcd 100644 --- a/python/mxnet/symbol/symbol.py +++ b/python/mxnet/symbol/symbol.py @@ -1188,9 +1188,9 @@ def _infer_shape_impl(self, partial, *args, **kwargs): aux_shape_data = ctypes.POINTER(ctypes.POINTER(mx_int))() complete = ctypes.c_int() if partial: - infer_func = _LIB.MXSymbolInferShapePartial + infer_func = _LIB.MXSymbolInferShapePartialEx else: - infer_func = _LIB.MXSymbolInferShape + infer_func = _LIB.MXSymbolInferShapeEx check_call(infer_func( self.handle, mx_uint(len(indptr) - 1), @@ -1571,42 +1571,42 @@ def simple_bind(self, ctx, grad_req='write', type_dict=None, stype_dict=None, aux_state_handles = ctypes.POINTER(NDArrayHandle)() try: - check_call(_LIB.MXExecutorSimpleBind(self.handle, - ctypes.c_int(ctx.device_typeid), - ctypes.c_int(ctx.device_id), - num_ctx_map_keys, - ctx_map_keys, - ctx_map_dev_types, - ctx_map_dev_ids, - mx_uint(provided_req_type_list_len), - provided_grad_req_names, - provided_grad_req_types, - mx_uint(len(provided_arg_shape_names)), - c_str_array(provided_arg_shape_names), - c_array_buf(mx_int, - array('I', provided_arg_shape_data)), - c_array_buf(mx_uint, - array('i', provided_arg_shape_idx)), - num_provided_arg_types, - provided_arg_type_names, - provided_arg_type_data, - num_provided_arg_stypes, - provided_arg_stype_names, - provided_arg_stype_data, - mx_uint(len(shared_arg_name_list)), - c_str_array(shared_arg_name_list), - ctypes.byref(shared_buffer_len), - shared_buffer_names, - shared_buffer_handles, - ctypes.byref(updated_shared_buffer_names), - ctypes.byref(updated_shared_buffer_handles), - ctypes.byref(num_in_args), - ctypes.byref(in_arg_handles), - ctypes.byref(arg_grad_handles), - ctypes.byref(num_aux_states), - ctypes.byref(aux_state_handles), - shared_exec_handle, - ctypes.byref(exe_handle))) + check_call(_LIB.MXExecutorSimpleBindEx(self.handle, + ctypes.c_int(ctx.device_typeid), + ctypes.c_int(ctx.device_id), + num_ctx_map_keys, + ctx_map_keys, + ctx_map_dev_types, + ctx_map_dev_ids, + mx_uint(provided_req_type_list_len), + provided_grad_req_names, + provided_grad_req_types, + mx_uint(len(provided_arg_shape_names)), + c_str_array(provided_arg_shape_names), + c_array_buf(mx_int, + array('I', provided_arg_shape_data)), + c_array_buf(mx_uint, + array('i', provided_arg_shape_idx)), + num_provided_arg_types, + provided_arg_type_names, + provided_arg_type_data, + num_provided_arg_stypes, + provided_arg_stype_names, + provided_arg_stype_data, + mx_uint(len(shared_arg_name_list)), + c_str_array(shared_arg_name_list), + ctypes.byref(shared_buffer_len), + shared_buffer_names, + shared_buffer_handles, + ctypes.byref(updated_shared_buffer_names), + ctypes.byref(updated_shared_buffer_handles), + ctypes.byref(num_in_args), + ctypes.byref(in_arg_handles), + ctypes.byref(arg_grad_handles), + ctypes.byref(num_aux_states), + ctypes.byref(aux_state_handles), + shared_exec_handle, + ctypes.byref(exe_handle))) except MXNetError as e: error_msg = "simple_bind error. Arguments:\n" for k, v in kwargs.items(): diff --git a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc index ae01c8a7c05e..7323d23ac556 100644 --- a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc +++ b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc @@ -356,7 +356,7 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayGetShape (JNIEnv *env, jobject obj, jlong ndArrayPtr, jobject ndimRef, jobject dataBuf) { int ndim; const int *pdata; - int ret = MXNDArrayGetShape(reinterpret_cast(ndArrayPtr), &ndim, &pdata); + int ret = MXNDArrayGetShapeEx(reinterpret_cast(ndArrayPtr), &ndim, &pdata); // fill dataBuf jclass integerClass = env->FindClass("java/lang/Integer"); @@ -938,25 +938,25 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorReshape ExecutorHandle out; - int ret = MXExecutorReshape(partialReshaping, - allowUpSizing, - devType, - devId, - static_cast(numMapKeys), - mapKeys, - static_cast(mapDevTypes), - static_cast(mapDevIds), - static_cast(numProvidedArgShapes), - providedArgShapeNames, - static_cast(providedArgShapeData), - reinterpret_cast(providedArgShapeIdx), - &numInArgs, - &inArgs, - &argGrads, - &numAuxStates, - &auxStates, - reinterpret_cast(jsharedExec), - &out); + int ret = MXExecutorReshapeEx(partialReshaping, + allowUpSizing, + devType, + devId, + static_cast(numMapKeys), + mapKeys, + static_cast(mapDevTypes), + static_cast(mapDevIds), + static_cast(numProvidedArgShapes), + providedArgShapeNames, + static_cast(providedArgShapeData), + reinterpret_cast(providedArgShapeIdx), + &numInArgs, + &inArgs, + &argGrads, + &numAuxStates, + &auxStates, + reinterpret_cast(jsharedExec), + &out); jclass longCls = env->FindClass("java/lang/Long"); jmethodID newLong = env->GetMethodID(longCls, "", "(J)V"); @@ -1693,37 +1693,37 @@ int SymbolInferShapeHelper(JNIEnv *env, jobject obj, jlong symbolPtr, jint jnumA jint *argShapeData = env->GetIntArrayElements(jargShapeData, NULL); int ret; if (!partial) { - ret = MXSymbolInferShape(reinterpret_cast(symbolPtr), - static_cast(jnumArgs), - keys, - reinterpret_cast(argIndPtr), - reinterpret_cast(argShapeData), - &inShapeSize, - &inShapeNdim, - &inShapeData, - &outShapeSize, - &outShapeNdim, - &outShapeData, - &auxShapeSize, - &auxShapeNdim, - &auxShapeData, - &complete); + ret = MXSymbolInferShapeEx(reinterpret_cast(symbolPtr), + static_cast(jnumArgs), + keys, + reinterpret_cast(argIndPtr), + reinterpret_cast(argShapeData), + &inShapeSize, + &inShapeNdim, + &inShapeData, + &outShapeSize, + &outShapeNdim, + &outShapeData, + &auxShapeSize, + &auxShapeNdim, + &auxShapeData, + &complete); } else { - ret = MXSymbolInferShapePartial(reinterpret_cast(symbolPtr), - static_cast(jnumArgs), - keys, - reinterpret_cast(argIndPtr), - reinterpret_cast(argShapeData), - &inShapeSize, - &inShapeNdim, - &inShapeData, - &outShapeSize, - &outShapeNdim, - &outShapeData, - &auxShapeSize, - &auxShapeNdim, - &auxShapeData, - &complete); + ret = MXSymbolInferShapePartialEx(reinterpret_cast(symbolPtr), + static_cast(jnumArgs), + keys, + reinterpret_cast(argIndPtr), + reinterpret_cast(argShapeData), + &inShapeSize, + &inShapeNdim, + &inShapeData, + &outShapeSize, + &outShapeNdim, + &outShapeData, + &auxShapeSize, + &auxShapeNdim, + &auxShapeData, + &complete); } env->ReleaseIntArrayElements(jargShapeData, argShapeData, 0); env->ReleaseIntArrayElements(jargIndPtr, argIndPtr, 0); diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 533f18fa9e0b..f549ddd13994 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -495,8 +495,27 @@ int MXNDArrayGetStorageType(NDArrayHandle handle, } int MXNDArrayGetShape(NDArrayHandle handle, - int *out_dim, - const int **out_pdata) { + mx_uint *out_dim, + const mx_uint **out_pdata) { + MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); + API_BEGIN(); + NDArray *arr = static_cast(handle); + if (!arr->is_none()) { + const mxnet::TShape &s = arr->shape(); + *out_dim = s.ndim(); + std::vector& buffer = ret->arg_shape_buffer; + buffer.resize(s.ndim()); + nnvm::ShapeTypeCast(s.begin(), s.end(), buffer.data()); + *out_pdata = buffer.data(); + } else { + *out_dim = 0; + } + API_END(); +} + +int MXNDArrayGetShapeEx(NDArrayHandle handle, + int *out_dim, + const int **out_pdata) { MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); API_BEGIN(); NDArray *arr = static_cast(handle); @@ -507,7 +526,7 @@ int MXNDArrayGetShape(NDArrayHandle handle, } *out_dim = s.ndim(); if (s.ndim() >= 0) { - std::vector &buffer = ret->arg_shape_buffer; + std::vector &buffer = ret->arg_shape_buffer_ex; buffer.resize(s.ndim()); mxnet::ShapeTypeCast(s.begin(), s.end(), buffer.data()); *out_pdata = buffer.data(); @@ -1406,8 +1425,15 @@ int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid, int* shar API_END(); } -int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const int *shape, - int ndim, int dtype, NDArrayHandle *out) { +int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const mx_uint *shape, + mx_uint ndim, int dtype, NDArrayHandle *out) { + API_BEGIN(); + *out = new NDArray(shared_pid, shared_id, mxnet::TShape(shape, shape + ndim), dtype); + API_END(); +} + +int MXNDArrayCreateFromSharedMemEx(int shared_pid, int shared_id, const int *shape, + int ndim, int dtype, NDArrayHandle *out) { API_BEGIN(); *out = new NDArray(shared_pid, shared_id, mxnet::TShape(shape, shape + ndim), dtype); API_END(); diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h index 8be192b22c53..013ecab93da8 100644 --- a/src/c_api/c_api_common.h +++ b/src/c_api/c_api_common.h @@ -74,15 +74,40 @@ struct MXAPIThreadLocalEntry { /*! \brief result holder for returning storage types */ std::vector arg_storage_types, out_storage_types, aux_storage_types; /*! \brief result holder for returning shape dimensions */ - std::vector arg_shape_ndim, out_shape_ndim, aux_shape_ndim; + std::vector arg_shape_ndim, out_shape_ndim, aux_shape_ndim; + /*! \brief result holder for returning shape dimensions */ + std::vector arg_shape_ndim_ex, out_shape_ndim_ex, aux_shape_ndim_ex; + /*! \brief result holder for returning shape pointer */ + std::vector arg_shape_data, out_shape_data, aux_shape_data; /*! \brief result holder for returning shape pointer */ - std::vector arg_shape_data, out_shape_data, aux_shape_data; + std::vector arg_shape_data_ex, out_shape_data_ex, aux_shape_data_ex; /*! \brief uint32_t buffer for returning shape pointer */ - std::vector arg_shape_buffer, out_shape_buffer, aux_shape_buffer; + std::vector arg_shape_buffer, out_shape_buffer, aux_shape_buffer; + /*! \brief uint32_t buffer for returning shape pointer */ + std::vector arg_shape_buffer_ex, out_shape_buffer_ex, aux_shape_buffer_ex; /*! \brief bool buffer */ std::vector save_inputs, save_outputs; + // DEPRECATED. Use SetupShapeArrayReturnWithBufferEx instead. // helper function to setup return value of shape array inline static void SetupShapeArrayReturnWithBuffer( + const mxnet::ShapeVector &shapes, + std::vector *ndim, + std::vector *data, + std::vector *buffer) { + ndim->resize(shapes.size()); + data->resize(shapes.size()); + size_t size = 0; + for (const auto& s : shapes) size += s.ndim(); + buffer->resize(size); + uint32_t *ptr = buffer->data(); + for (size_t i = 0; i < shapes.size(); ++i) { + ndim->at(i) = shapes[i].ndim(); + data->at(i) = ptr; + ptr = nnvm::ShapeTypeCast(shapes[i].begin(), shapes[i].end(), ptr); + } + } + // helper function to setup return value of shape array + inline static void SetupShapeArrayReturnWithBufferEx( const mxnet::ShapeVector &shapes, std::vector *ndim, std::vector *data, diff --git a/src/c_api/c_api_executor.cc b/src/c_api/c_api_executor.cc index fc59463ab689..5352fcfe0951 100644 --- a/src/c_api/c_api_executor.cc +++ b/src/c_api/c_api_executor.cc @@ -185,7 +185,7 @@ int MXExecutorBindEX(SymbolHandle symbol_handle, } /*! - * \brief + * \brief DEPRECATED. Use MXExecutorSimpleBindEx instead. * \param symbol_handle symbol handle * \param dev_type default device type * \param dev_id default device id @@ -233,7 +233,7 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle, const char** provided_grad_req_types, const mx_uint num_provided_arg_shapes, const char** provided_arg_shape_names, - const int* provided_arg_shape_data, + const mx_uint* provided_arg_shape_data, const mx_uint* provided_arg_shape_idx, const mx_uint num_provided_arg_dtypes, const char** provided_arg_dtype_names, @@ -544,6 +544,366 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle, API_END(); } +/*! + * \brief + * \param symbol_handle symbol handle + * \param dev_type default device type + * \param dev_id default device id + * \param num_g2c_keys number of group2ctx keys + * \param g2c_keys key list of group2ctx + * \param g2c_dev_types device type list of group2ctx + * \param g2c_dev_ids id list of group2ctx + * \param provided_grad_req_list_len grad_req length provided by users in front-end + * \param provided_grad_req_names grad_req names provided by users in front-end + * \param provided_grad_req_types req types provided by users in front-end + * \param num_provided_arg_shapes number of user provided in_arg and aux_state shapes + * \param provided_arg_shape_names name list of provided shapes + * \param provided_arg_shape_data provided shape data + * \param provided_arg_shape_idx provided shape data index + * \param num_provided_arg_dtypes number of user provided in_arg and axu_state dtypes + * \param provided_arg_dtype_names argument name list of provided dtypes + * \param provided_arg_dtypes data of provided dtypes + * \param num_provided_arg_stypes number of user provided in_arg and axu_state storage types + * \param provided_arg_stype_names argument name list of provided storage types + * \param provided_arg_stypes data of provided storage types + * \param num_shared_arg_names number of parameter names passed from _bind_ith_exec + * \param shared_arg_name_list parameter name list passed from _bind_ith_exec + * \param shared_buffer_len number of shared data arrays passed from _bind_ith_exec + * \param shared_buffer_name_list shared data array names passed from _bind_ith_exec + * \param shared_buffer_handle_list shared data array handles passed from _bind_ith_exec + * \param updated_shared_buffer_name_list updated shared data array names after binding + * \param updated_shared_buffer_handle_list updated shared data arrays after binding + * \param num_in_args number of input arguments of this sym + * \param in_args list_arguments associated with the current executor + * \param arg_grads list of gradients of in_args associated with the current executor + * \param num_aux_states number of aux states of this sym + * \param aux_states list_auxiliary_states associated with the current executor + * \param shared_exec_handle shared excutor handle passed from _bind_ith_exec + * \param out the handle of the executor to be created + */ +int MXExecutorSimpleBindEx(SymbolHandle symbol_handle, + int dev_type, + int dev_id, + const mx_uint num_g2c_keys, + const char** g2c_keys, + const int* g2c_dev_types, + const int* g2c_dev_ids, + const mx_uint provided_grad_req_list_len, + const char** provided_grad_req_names, + const char** provided_grad_req_types, + const mx_uint num_provided_arg_shapes, + const char** provided_arg_shape_names, + const int* provided_arg_shape_data, + const mx_uint* provided_arg_shape_idx, + const mx_uint num_provided_arg_dtypes, + const char** provided_arg_dtype_names, + const int* provided_arg_dtypes, + const mx_uint num_provided_arg_stypes, + const char** provided_arg_stype_names, + const int* provided_arg_stypes, + const mx_uint num_shared_arg_names, + const char** shared_arg_name_list, + int* shared_buffer_len, + const char** shared_buffer_name_list, + NDArrayHandle* shared_buffer_handle_list, + const char*** updated_shared_buffer_name_list, + NDArrayHandle** updated_shared_buffer_handle_list, + mx_uint* num_in_args, + NDArrayHandle** in_args, + NDArrayHandle** arg_grads, + mx_uint* num_aux_states, + NDArrayHandle** aux_states, + ExecutorHandle shared_exec_handle, + ExecutorHandle* out) { + MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); + API_BEGIN(); + nnvm::Symbol *sym = static_cast(symbol_handle); + + // get in_arg names + std::vector in_arg_names = sym->ListInputNames(nnvm::Symbol::kReadOnlyArgs); + std::vector aux_state_names = sym->ListInputNames(nnvm::Symbol::kAuxiliaryStates); + + // attr_dict for setting up type_dict and arg/aux ctx + std::unordered_map> attr_dict; + if (nullptr == provided_arg_dtypes || nullptr != g2c_keys || nullptr == provided_arg_stypes) { + std::vector> attrs = + sym->ListAttrsRecursive(); + attr_dict.reserve(attrs.size()); + for (const auto& tp : attrs) { + attr_dict[std::get<0>(tp)][std::get<1>(tp)] = std::get<2>(tp); + } + } + + // setup arg_dtype_map + std::unordered_map arg_dtype_map; + if (nullptr == provided_arg_dtypes) { // use attr_dict + for (const auto& arg_name : in_arg_names) { + const auto it = attr_dict.find(arg_name); + if (it == attr_dict.end() || !it->second.count("__dtype__")) { + arg_dtype_map[arg_name] = mshadow::kFloat32; + } + } + } else { // use user input type_dict + // create dtype map for in_args and aux_states + arg_dtype_map.reserve(num_provided_arg_dtypes); + for (mx_uint i = 0; i < num_provided_arg_dtypes; ++i) { + arg_dtype_map[provided_arg_dtype_names[i]] = provided_arg_dtypes[i]; + } + } + + // setup arg_stype_map + std::unordered_map arg_stype_map; + if (nullptr == provided_arg_stypes) { // use attr_dict + for (const auto& arg_name : in_arg_names) { + const auto it = attr_dict.find(arg_name); + if (it == attr_dict.end() || !it->second.count("__storage_type__")) { + arg_stype_map[arg_name] = kDefaultStorage; + } + } + } else { // use user input type_dict + // create stype map for in_args and aux_states + arg_stype_map.reserve(num_provided_arg_stypes); + for (mx_uint i = 0; i < num_provided_arg_stypes; ++i) { + arg_stype_map[provided_arg_stype_names[i]] = provided_arg_stypes[i]; + } + } + + // create default ctx + Context ctx = Context::Create(static_cast(dev_type), dev_id); + // create ctx map + std::map ctx_map; + std::vector in_arg_ctx_vec(in_arg_names.size(), ctx); + std::vector aux_state_ctx_vec(aux_state_names.size(), ctx); + if (nullptr != g2c_keys) { // use user input group2ctx dict + for (mx_uint i = 0; i < num_g2c_keys; ++i) { + ctx_map[g2c_keys[i]] = Context::Create( + static_cast(g2c_dev_types[i]), g2c_dev_ids[i]); + } + + // initialize in_arg_ctx_vec using group2ctx if there are any + for (size_t i = 0; i < in_arg_ctx_vec.size(); ++i) { + const auto it1 = attr_dict.find(in_arg_names[i]); + if (it1 != attr_dict.end()) { + const auto it2 = it1->second.find("__ctx_group__"); + if (it2 != it1->second.end()) { + const auto it3 = ctx_map.find(it2->second); + if (it3 != ctx_map.end()) { + in_arg_ctx_vec[i] = it3->second; + } + } + } + } + + // initialize aux_state_ctx_vec using group2ctx if there are any + for (size_t i = 0; i < aux_state_ctx_vec.size(); ++i) { + const auto it1 = attr_dict.find(aux_state_names[i]); + if (it1 != attr_dict.end()) { + const auto it2 = it1->second.find("__ctx_group__"); + if (it2 != it1->second.end()) { + const auto it3 = ctx_map.find(it2->second); + if (it3 != ctx_map.end()) { + aux_state_ctx_vec[i] = it3->second; + } + } + } + } + } + + // create provided_grad_req_map + const std::map req_map = + {{"null", kNullOp}, {"write", kWriteTo}, {"add", kAddTo}}; + std::unordered_map provided_grad_req_map; + std::string grad_req_type; + if (0 == provided_grad_req_list_len + && nullptr == provided_grad_req_names + && nullptr != provided_grad_req_types) { // string, grad_req='write' + CHECK_EQ(req_map.count(provided_grad_req_types[0]), 1U) + << "grad_req=" << provided_grad_req_types[0] << " is not a valid input in simple_bind; " + "only \'null\', \'write\', and \'add\' are supported"; + grad_req_type = "string"; + } else if (provided_grad_req_list_len > 0 + && nullptr == provided_grad_req_names + && nullptr != provided_grad_req_types) { // list, grad_req=['null', 'write'] + grad_req_type = "list"; + CHECK_EQ(provided_grad_req_list_len, in_arg_names.size()) + << "The length of grad_req list does not match the number of input arguments in simple_bind, " + "expected " << in_arg_names.size() << ", provided " << provided_grad_req_list_len; + } else if (provided_grad_req_list_len > 0 + && nullptr != provided_grad_req_names + && nullptr != provided_grad_req_types) { // dict, grad_req=['lhs': 'null', 'rhs': 'write'] + grad_req_type = "dict"; + provided_grad_req_map.reserve(provided_grad_req_list_len); + for (mx_uint i = 0; i < provided_grad_req_list_len; ++i) { + CHECK_EQ(req_map.count(provided_grad_req_types[i]), 1U) + << "grad_req=" << provided_grad_req_types[i] << " is not a valid input in simple_bind; " + "only \'null\', \'write\', and \'add\' are supported"; + provided_grad_req_map[provided_grad_req_names[i]] = provided_grad_req_types[i]; + } + } else { // grad_req is None + grad_req_type = "none"; + } + + // initialize arg_grad_ctx_vec and grad_req_type_vec + std::vector arg_grad_ctx_vec(in_arg_names.size(), ctx); + std::vector grad_req_type_vec(in_arg_names.size(), kNullOp); + if ("none" != grad_req_type) { + for (size_t i = 0; i < in_arg_names.size(); ++i) { + OpReqType cur_req = kNullOp; + if ("string" == grad_req_type) { + cur_req = req_map.at(provided_grad_req_types[0]); + } else if ("list" == grad_req_type) { + CHECK_EQ(req_map.count(provided_grad_req_types[i]), 1U) + << "grad_req=" << provided_grad_req_types[i] << " is not a valid input in simple_bind; " + "only \'null\', \'write\', and \'add\' are supported"; + cur_req = req_map.at(provided_grad_req_types[i]); + } else if ("dict" == grad_req_type) { + const auto it = provided_grad_req_map.find(in_arg_names[i]); + if (it != provided_grad_req_map.end()) { + cur_req = req_map.at(it->second); + } + } + if (kNullOp != cur_req) { + arg_grad_ctx_vec[i] = in_arg_ctx_vec[i]; + grad_req_type_vec[i] = static_cast(cur_req); + } + } + } + + // create shape map for in_args and aux_states + std::unordered_map arg_shape_map(num_provided_arg_shapes); + for (mx_uint i = 0; i < num_provided_arg_shapes; ++i) { + auto p = arg_shape_map.emplace(provided_arg_shape_names[i], + mxnet::TShape(provided_arg_shape_data+provided_arg_shape_idx[i], + provided_arg_shape_data+provided_arg_shape_idx[i+1])); + CHECK(p.second) << "Duplicate shapes are provided for argument " + << provided_arg_shape_names[i] << " in simple_bind"; + } + if (!Imperative::Get()->is_np_comp()) { + for (auto &kv : arg_shape_map) { + common::ConvertToNumpyShape(&kv.second); + } + } + + // create para name set for sharing data array memory + std::unordered_set shared_arg_name_set(num_shared_arg_names); + for (mx_uint i = 0; i < num_shared_arg_names; ++i) { + shared_arg_name_set.insert(shared_arg_name_list[i]); + } + + // create shared_buffer_map + std::unordered_map shared_buffer_map; + bool use_shared_buffer = (*shared_buffer_len >= 0); + if (*shared_buffer_len > 0) { + // create shared_buffer_map + shared_buffer_map.reserve(*shared_buffer_len); + NDArray** shared_buffer_ptrs = + reinterpret_cast(shared_buffer_handle_list); + for (int i = 0; i < *shared_buffer_len; ++i) { + shared_buffer_map[shared_buffer_name_list[i]] = *(shared_buffer_ptrs[i]); + } + } + + // create temporary place holders for the initialized NDArrays + // to be passed back to front end + std::vector in_arg_vec; + std::vector arg_grad_vec; + std::vector aux_state_vec; +#if MXNET_USE_TENSORRT + // If we've built with TensorRT support we by default return an TRTExecutor. + // Users can override this behaviour via env var, which is useful for example for A/B + // performance testing. + if (dmlc::GetEnv("MXNET_USE_TENSORRT", false)) { + *out = exec::TrtGraphExecutor::TensorRTBind(*sym, ctx, ctx_map, &in_arg_ctx_vec, + &arg_grad_ctx_vec, &aux_state_ctx_vec, + &arg_shape_map, &arg_dtype_map, &arg_stype_map, + &grad_req_type_vec, shared_arg_name_set, + &in_arg_vec, &arg_grad_vec, &aux_state_vec, + use_shared_buffer ? &shared_buffer_map : nullptr, + reinterpret_cast(shared_exec_handle)); + } else { + // Checks to see if this env var has been set to true or false by the user. + // If the user is using a TensorRT build, but has not enabled TRT at inference time, warn + // them and describe further steps. + const int unset_indicator = std::numeric_limits::quiet_NaN(); + if (dmlc::GetEnv("MXNET_USE_TENSORRT", unset_indicator) == unset_indicator) { + LOG(INFO) << "TensorRT not enabled by default. Please set the MXNET_USE_TENSORRT " + "environment variable to 1 or call mx.contrib.tensorrt.set_use_tensorrt(True) " + "to enable."; + } +#endif // MXNET_USE_TENSORRT + *out = Executor::SimpleBind(*sym, ctx, ctx_map, in_arg_ctx_vec, arg_grad_ctx_vec, + aux_state_ctx_vec, arg_shape_map, arg_dtype_map, arg_stype_map, + grad_req_type_vec, shared_arg_name_set, &in_arg_vec, + &arg_grad_vec, &aux_state_vec, + use_shared_buffer ? &shared_buffer_map : nullptr, + reinterpret_cast(shared_exec_handle)); +#if MXNET_USE_TENSORRT + } +#endif // MXNET_USE_TENSORRT + + // copy ndarray ptrs to ret->handles so that front end + // can access them + ret->ret_handles.clear(); + ret->ret_handles.reserve(in_arg_vec.size()+arg_grad_vec.size()+aux_state_vec.size() + +shared_buffer_map.size()); + size_t nd_idx = 0; + for (const auto& nd : in_arg_vec) { + if (nd.is_none()) { + LOG(FATAL) << "Input argument NDArray cannot be un-allocated"; + } + ret->ret_handles.push_back(new NDArray(nd)); + } + if (in_arg_vec.size() > 0) { + *num_in_args = in_arg_vec.size(); + *in_args = &(ret->ret_handles[nd_idx]); + nd_idx = ret->ret_handles.size(); + } + + for (const auto& nd : arg_grad_vec) { + if (nd.is_none()) { + ret->ret_handles.push_back(nullptr); + } else { + ret->ret_handles.push_back(new NDArray(nd)); + } + } + if (arg_grad_vec.size() > 0) { + *arg_grads = &(ret->ret_handles[nd_idx]); + nd_idx = ret->ret_handles.size(); + } + + for (const auto& nd : aux_state_vec) { + if (nd.is_none()) { + LOG(FATAL) << "Auxiliary argument NDArray cannot be un-allocated"; + } + ret->ret_handles.push_back(new NDArray(nd)); + } + if (aux_state_vec.size() > 0) { + *num_aux_states = aux_state_vec.size(); + *aux_states = &(ret->ret_handles[nd_idx]); + nd_idx = ret->ret_handles.size(); + } + + if (use_shared_buffer) { + ret->ret_vec_str.clear(); + ret->ret_vec_str.reserve(shared_buffer_map.size()); + ret->ret_vec_charp.clear(); + ret->ret_vec_charp.reserve(shared_buffer_map.size()); + for (const auto& kv : shared_buffer_map) { + if (kv.second.is_none()) { + LOG(FATAL) << "Shared data NDArray cannot be un-allocated"; + } + ret->ret_handles.push_back(new NDArray(kv.second)); + ret->ret_vec_str.emplace_back(kv.first); + ret->ret_vec_charp.push_back(ret->ret_vec_str.back().c_str()); + } + *shared_buffer_len = shared_buffer_map.size(); + *updated_shared_buffer_handle_list = &(ret->ret_handles[nd_idx]); + *updated_shared_buffer_name_list = &(ret->ret_vec_charp[0]); + } + + API_END(); +} + int MXExecutorReshape(int partial_shaping, int allow_up_sizing, int dev_type, @@ -554,7 +914,7 @@ int MXExecutorReshape(int partial_shaping, const int* map_dev_ids, const mx_uint num_provided_arg_shapes, const char** provided_arg_shape_names, - const int* provided_arg_shape_data, + const mx_uint* provided_arg_shape_data, const mx_uint* provided_arg_shape_idx, mx_uint* num_in_args, NDArrayHandle** in_args, @@ -635,6 +995,97 @@ int MXExecutorReshape(int partial_shaping, API_END_HANDLE_ERROR(delete new_exec); } +int MXExecutorReshapeEx(int partial_shaping, + int allow_up_sizing, + int dev_type, + int dev_id, + mx_uint num_map_keys, + const char** map_keys, + const int* map_dev_types, + const int* map_dev_ids, + const mx_uint num_provided_arg_shapes, + const char** provided_arg_shape_names, + const int* provided_arg_shape_data, + const mx_uint* provided_arg_shape_idx, + mx_uint* num_in_args, + NDArrayHandle** in_args, + NDArrayHandle** arg_grads, + mx_uint* num_aux_states, + NDArrayHandle** aux_states, + ExecutorHandle shared_exec, + ExecutorHandle *out) { + Executor* new_exec = nullptr; + + MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); + API_BEGIN(); + *out = nullptr; // ensure we can know whether to free executor on early abort + // create shape map for in_args and aux_states + std::unordered_map kwargs(num_provided_arg_shapes); + for (mx_uint i = 0; i < num_provided_arg_shapes; ++i) { + auto p = kwargs.emplace(provided_arg_shape_names[i], + mxnet::TShape(provided_arg_shape_data+provided_arg_shape_idx[i], + provided_arg_shape_data+provided_arg_shape_idx[i+1])); + CHECK(p.second) << "Duplicate shapes are provided for argument " + << provided_arg_shape_names[i] << " in reshape of executor"; + } + + Context ctx = Context::Create(static_cast(dev_type), dev_id); + std::map ctx_map; + for (mx_uint i = 0; i < num_map_keys; ++i) { + ctx_map[std::string(map_keys[i])] = Context::Create( + static_cast(map_dev_types[i]), map_dev_ids[i]); + } + std::vector in_arg_vec; + std::vector arg_grad_vec; + std::vector aux_state_vec; + + Executor* exec = static_cast(shared_exec); + new_exec = exec->Reshape(partial_shaping, allow_up_sizing, ctx, ctx_map, kwargs, + &in_arg_vec, &arg_grad_vec, &aux_state_vec); + *out = new_exec; + + ret->ret_handles.clear(); + ret->ret_handles.reserve(in_arg_vec.size()+arg_grad_vec.size()+aux_state_vec.size()); + + size_t nd_idx = 0; + for (const auto& nd : in_arg_vec) { + if (nd.is_none()) { + LOG(FATAL) << "Input argument NDArray cannot be un-allocated"; + } + ret->ret_handles.push_back(new NDArray(nd)); + } + if (in_arg_vec.size() > 0) { + *num_in_args = in_arg_vec.size(); + *in_args = &(ret->ret_handles[nd_idx]); + nd_idx = ret->ret_handles.size(); + } + + for (const auto& nd : arg_grad_vec) { + if (nd.is_none()) { + ret->ret_handles.push_back(nullptr); + } else { + ret->ret_handles.push_back(new NDArray(nd)); + } + } + if (arg_grad_vec.size() > 0) { + *arg_grads = &(ret->ret_handles[nd_idx]); + nd_idx = ret->ret_handles.size(); + } + + for (const auto& nd : aux_state_vec) { + if (nd.is_none()) { + LOG(FATAL) << "Auxiliary argument NDArray cannot be un-allocated"; + } + ret->ret_handles.push_back(new NDArray(nd)); + } + if (aux_state_vec.size() > 0) { + *num_aux_states = aux_state_vec.size(); + *aux_states = &(ret->ret_handles[nd_idx]); + nd_idx = ret->ret_handles.size(); + } + API_END_HANDLE_ERROR(delete new_exec); +} + int MXExecutorGetOptimizedSymbol(ExecutorHandle handle, SymbolHandle *out) { auto s = new nnvm::Symbol(); diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc index 98034417abae..24a88520376f 100644 --- a/src/c_api/c_api_symbolic.cc +++ b/src/c_api/c_api_symbolic.cc @@ -506,16 +506,16 @@ int MXSymbolInferShape(SymbolHandle sym, mx_uint num_args, const char** keys, const mx_uint *arg_ind_ptr, - const int *arg_shape_data, + const mx_uint *arg_shape_data, mx_uint *in_shape_size, - const int **in_shape_ndim, - const int ***in_shape_data, + const mx_uint **in_shape_ndim, + const mx_uint ***in_shape_data, mx_uint *out_shape_size, - const int **out_shape_ndim, - const int ***out_shape_data, + const mx_uint **out_shape_ndim, + const mx_uint ***out_shape_data, mx_uint *aux_shape_size, - const int **aux_shape_ndim, - const int ***aux_shape_data, + const mx_uint **aux_shape_ndim, + const mx_uint ***aux_shape_data, int *complete) { nnvm::Symbol *s = static_cast(sym); MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); @@ -575,20 +575,93 @@ int MXSymbolInferShape(SymbolHandle sym, API_END(); } +int MXSymbolInferShapeEx(SymbolHandle sym, + mx_uint num_args, + const char** keys, + const mx_uint *arg_ind_ptr, + const int *arg_shape_data, + mx_uint *in_shape_size, + const int **in_shape_ndim, + const int ***in_shape_data, + mx_uint *out_shape_size, + const int **out_shape_ndim, + const int ***out_shape_data, + mx_uint *aux_shape_size, + const int **aux_shape_ndim, + const int ***aux_shape_data, + int *complete) { + nnvm::Symbol *s = static_cast(sym); + MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get(); + API_BEGIN(); + nnvm::Graph g = Symbol2Graph(*s); + mxnet::ShapeVector arg_shapes(g.indexed_graph().input_nodes().size(), mxnet::TShape()); + if (keys == nullptr && num_args != 0) { + std::vector read_only_args = mxnet::ReadOnlyArgIndices(g.indexed_graph()); + CHECK_LE(num_args, read_only_args.size()); + for (mx_uint i = 0; i < num_args; ++i) { + arg_shapes[read_only_args[i]] = mxnet::ShapeTypeCast( + arg_shape_data + arg_ind_ptr[i], arg_shape_data + arg_ind_ptr[i+1]); + } + } else { + std::unordered_map kwargs; + for (mx_uint i = 0; i < num_args; ++i) { + kwargs[keys[i]] = mxnet::ShapeTypeCast( + arg_shape_data + arg_ind_ptr[i], arg_shape_data + arg_ind_ptr[i+1]); + } + mxnet::MatchArguments(g.indexed_graph(), kwargs, &arg_shapes, "InferShape"); + } + + try { + g = mxnet::exec::InferShape(std::move(g), std::move(arg_shapes), "__shape__"); + } catch (const mxnet::op::InferShapeError &err) { + throw dmlc::Error(err.msg); + } + + // if use legacy shape definition, need to convert numpy shape to legacy shape + mxnet::ShapeVector shapes = g.GetAttr("shape"); + if (!Imperative::Get()->is_np_comp()) { + common::ConvertToLegacyShape(&shapes); + } + + // copy back + CopyAttr(g.indexed_graph(), shapes, + &(ret->arg_shapes), &(ret->out_shapes), &(ret->aux_shapes)); + + // copy data back + MXAPIThreadLocalEntry::SetupShapeArrayReturnWithBufferEx(ret->arg_shapes, + &(ret->arg_shape_ndim_ex), &(ret->arg_shape_data_ex), &(ret->arg_shape_buffer_ex)); + MXAPIThreadLocalEntry::SetupShapeArrayReturnWithBufferEx(ret->out_shapes, + &(ret->out_shape_ndim_ex), &(ret->out_shape_data_ex), &(ret->out_shape_buffer_ex)); + MXAPIThreadLocalEntry::SetupShapeArrayReturnWithBufferEx(ret->aux_shapes, + &(ret->aux_shape_ndim_ex), &(ret->aux_shape_data_ex), &(ret->aux_shape_buffer_ex)); + *in_shape_size = static_cast(ret->arg_shapes.size()); + *in_shape_ndim = dmlc::BeginPtr(ret->arg_shape_ndim_ex); + *in_shape_data = dmlc::BeginPtr(ret->arg_shape_data_ex); + *out_shape_size = static_cast(ret->out_shapes.size()); + *out_shape_ndim = dmlc::BeginPtr(ret->out_shape_ndim_ex); + *out_shape_data = dmlc::BeginPtr(ret->out_shape_data_ex); + *aux_shape_size = static_cast(ret->aux_shapes.size()); + *aux_shape_ndim = dmlc::BeginPtr(ret->aux_shape_ndim_ex); + *aux_shape_data = dmlc::BeginPtr(ret->aux_shape_data_ex); + // mark complete + *complete = (g.GetAttr("shape_num_unknown_nodes") == 0); + API_END(); +} + int MXSymbolInferShapePartial(SymbolHandle sym, mx_uint num_args, const char** keys, const mx_uint *arg_ind_ptr, - const int *arg_shape_data, + const mx_uint *arg_shape_data, mx_uint *in_shape_size, - const int **in_shape_ndim, - const int ***in_shape_data, + const mx_uint **in_shape_ndim, + const mx_uint ***in_shape_data, mx_uint *out_shape_size, - const int **out_shape_ndim, - const int ***out_shape_data, + const mx_uint **out_shape_ndim, + const mx_uint ***out_shape_data, mx_uint *aux_shape_size, - const int **aux_shape_ndim, - const int ***aux_shape_data, + const mx_uint **aux_shape_ndim, + const mx_uint ***aux_shape_data, int *complete) { int succ; *complete = 1; @@ -600,6 +673,31 @@ int MXSymbolInferShapePartial(SymbolHandle sym, &succ); } +int MXSymbolInferShapePartialEx(SymbolHandle sym, + mx_uint num_args, + const char** keys, + const mx_uint *arg_ind_ptr, + const int *arg_shape_data, + mx_uint *in_shape_size, + const int **in_shape_ndim, + const int ***in_shape_data, + mx_uint *out_shape_size, + const int **out_shape_ndim, + const int ***out_shape_data, + mx_uint *aux_shape_size, + const int **aux_shape_ndim, + const int ***aux_shape_data, + int *complete) { + int succ; + *complete = 1; + return MXSymbolInferShapeEx(sym, num_args, keys, + arg_ind_ptr, arg_shape_data, + in_shape_size, in_shape_ndim, in_shape_data, + out_shape_size, out_shape_ndim, out_shape_data, + aux_shape_size, aux_shape_ndim, aux_shape_data, + &succ); +} + int MXSymbolInferType(SymbolHandle sym, mx_uint num_args, const char** keys, From c8d2cce8faa11d71620988209fc6f0e95579254d Mon Sep 17 00:00:00 2001 From: reminisce Date: Thu, 11 Apr 2019 15:52:18 -0700 Subject: [PATCH 23/32] Fix lint --- include/mxnet/c_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 7907265a68a9..0acfde0686d4 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -786,8 +786,8 @@ MXNET_DLL int MXNDArrayGetShape(NDArrayHandle handle, * \return 0 when success, -1 when failure happens */ MXNET_DLL int MXNDArrayGetShapeEx(NDArrayHandle handle, - int *out_dim, - const int **out_pdata); + int *out_dim, + const int **out_pdata); /*! * \brief get the content of the data in NDArray * \param handle the handle to the ndarray From a4a2841bf62b6634525cb1c3f707f71ac75a667d Mon Sep 17 00:00:00 2001 From: reminisce Date: Thu, 11 Apr 2019 16:26:29 -0700 Subject: [PATCH 24/32] Rebase and fix build --- src/operator/nn/mkldnn/mkldnn_transpose.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/operator/nn/mkldnn/mkldnn_transpose.cc b/src/operator/nn/mkldnn/mkldnn_transpose.cc index 0986d0616f75..eec19bababb7 100644 --- a/src/operator/nn/mkldnn/mkldnn_transpose.cc +++ b/src/operator/nn/mkldnn/mkldnn_transpose.cc @@ -55,9 +55,9 @@ class MKLDNNTransposeForward { auto shape = data.shape(); auto data_ndim = shape.ndim(); auto axes_ndim = param.axes.ndim(); - auto axes = mxnet::TShape(data_ndim); + auto axes = mxnet::TShape(data_ndim, -1); if (axes_ndim == 0) { - for (size_t i = 0; i < data_ndim; i++) { + for (int i = 0; i < data_ndim; i++) { axes[i] = data_ndim - i - 1; } } else { @@ -79,7 +79,7 @@ class MKLDNNTransposeForward { dst_fmt.data_type = mkldnn_f32; dst_fmt.format = mkldnn_blocked; - for (size_t i = 0; i < data_ndim; i++) + for (int i = 0; i < data_ndim; i++) dst_fmt.dims[i] = shape[i]; unsigned int total_stride = 1; From a07bf8418f556d15109ebd52eb4de58a6e4a6e30 Mon Sep 17 00:00:00 2001 From: reminisce Date: Thu, 11 Apr 2019 20:58:57 -0700 Subject: [PATCH 25/32] Fix R build failure --- R-package/src/symbol.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/src/symbol.cc b/R-package/src/symbol.cc index 90ff9ef1dd67..317e82568012 100644 --- a/R-package/src/symbol.cc +++ b/R-package/src/symbol.cc @@ -207,7 +207,7 @@ SEXP Symbol::InferShape(const Rcpp::List& kwargs) const { const int **aux_shape_data; int complete; - MX_CALL(MXSymbolInferShape( + MX_CALL(MXSymbolInferShapeEx( handle_, static_cast(kwargs.size()), dmlc::BeginPtr(c_keys), dmlc::BeginPtr(arg_ind_ptr), dmlc::BeginPtr(arg_shape_data), &in_shape_size, &in_shape_ndim, &in_shape_data, From c204cd5727ab51887d7457a3fceb3f45a4bb7caf Mon Sep 17 00:00:00 2001 From: reminisce Date: Thu, 11 Apr 2019 20:59:41 -0700 Subject: [PATCH 26/32] Fix Perl build failure --- perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm | 2 +- perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm | 4 ++-- perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm index 573abbf588f2..5844302fce16 100644 --- a/perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm +++ b/perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm @@ -471,7 +471,7 @@ method reshape(HashRef[Shape] $kwargs, Int :$partial_shaping=0, Int :$allow_up_s my $shared_handle = $self->handle; my ($in_args_and_grad_handles, $aux_state_handles, $handle) = check_call( - AI::MXNetCAPI::ExecutorReshape( + AI::MXNetCAPI::ExecutorReshapeEx( $partial_shaping, $allow_up_sizing, $self->_ctx->device_type_id, diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm b/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm index 72f6cc772178..f466aaa11a3d 100644 --- a/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm +++ b/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm @@ -535,7 +535,7 @@ method wait_to_read() method shape() { - return scalar(check_call(AI::MXNetCAPI::NDArrayGetShape($self->handle))); + return scalar(check_call(AI::MXNetCAPI::NDArrayGetShapeEx($self->handle))); } =head2 size @@ -1460,7 +1460,7 @@ func _new_alloc_handle($shape, $ctx, $delay_alloc, $dtype) method _new_from_shared_mem($shared_pid, $shared_id, $shape, $dtype) { my $hdl = check_call( - AI::MXNetCAPI::NDArrayCreateFromSharedMem( + AI::MXNetCAPI::NDArrayCreateFromSharedMemEx( $shared_pid, $shared_id, $shape, diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm index 04dd1cbfc441..e4953f17031a 100644 --- a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm +++ b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm @@ -662,7 +662,7 @@ method _infer_shape_impl(Maybe[Str|Shape] @args) push @{ $indptr }, scalar(@{ $sdata }); } } - my $infer_func = $partial ? \&AI::MXNetCAPI::SymbolInferShapePartial : \&AI::MXNetCAPI::SymbolInferShape; + my $infer_func = $partial ? \&AI::MXNetCAPI::SymbolInferShapePartialEx : \&AI::MXNetCAPI::SymbolInferShapeEx; my ($arg_shapes, $out_shapes, $aux_shapes, $complete) = check_call( $infer_func->( $self->handle, @@ -937,7 +937,7 @@ method simple_bind( ($updated_shared_data, $in_arg_handles, $arg_grad_handles, $aux_state_handles, $exe_handle) = check_call( - AI::MXNetCAPI::ExecutorSimpleBind( + AI::MXNetCAPI::ExecutorSimpleBindEx( $self->handle, $ctx->device_type_id, $ctx->device_id, From c876bc2413d743e9c926acc8e48ca3011a757bb7 Mon Sep 17 00:00:00 2001 From: reminisce Date: Sat, 13 Apr 2019 14:01:16 -0700 Subject: [PATCH 27/32] Rebase with master --- src/operator/rnn.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/rnn.cc b/src/operator/rnn.cc index 74c563afceb1..7012a3c22f50 100644 --- a/src/operator/rnn.cc +++ b/src/operator/rnn.cc @@ -50,7 +50,7 @@ static bool RNNShape(const nnvm::NodeAttrs& attrs, "Needed input:[data, parameters, state], got in_shape->size(): " << in_shape->size(); } const TShape &dshape = (*in_shape)[rnn_enum::kData]; - if (dshape.ndim() == 0) return false; + if (!mxnet::ndim_is_known(dshape)) return false; CHECK_EQ(dshape.ndim(), 3U) \ << "Input data should be rank-3 tensor of dim [sequence length, batch size, input size]"; // data: [sequence len, batch, input dimension] From b864c4bdbb02a41694c882c814367ee31bb5acbd Mon Sep 17 00:00:00 2001 From: reminisce Date: Mon, 15 Apr 2019 10:30:22 -0700 Subject: [PATCH 28/32] Address cr comments --- cpp-package/include/mxnet-cpp/symbol.hpp | 2 +- include/mxnet/tuple.h | 40 +++++++++++++----------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/cpp-package/include/mxnet-cpp/symbol.hpp b/cpp-package/include/mxnet-cpp/symbol.hpp index 7bdbe52ddc00..2e3fb7a2d5de 100644 --- a/cpp-package/include/mxnet-cpp/symbol.hpp +++ b/cpp-package/include/mxnet-cpp/symbol.hpp @@ -200,7 +200,7 @@ inline void Symbol::InferShape( arg_ind_ptr.push_back(arg_shape_data.size()); mx_uint in_shape_size; - const int*in_shape_ndim; + const int *in_shape_ndim; const int **in_shape_data; mx_uint out_shape_size; const int *out_shape_ndim; diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h index c5c0ccd548df..8431bbb23b96 100644 --- a/include/mxnet/tuple.h +++ b/include/mxnet/tuple.h @@ -366,6 +366,19 @@ class Tuple { } }; + +/*! brief check if a shape's ndim is known. */ +inline bool ndim_is_known(const int ndim) { + CHECK_GE(ndim, -1) << "shape ndim must be >= -1, while received " << ndim; + return ndim != -1; +} + +/*! brief check if a shape's dim size is known. */ +inline bool dim_size_is_known(const dim_t dim_size) { + CHECK_GE(dim_size, -1) << "shape dim size must be >= -1, while received " << dim_size; + return dim_size != -1; +} + /*! * \brief A Shape class that is used to represent shape of each tensor. * @@ -461,11 +474,11 @@ class TShape : public Tuple { } /*! \return total number of elements in the shape */ inline size_t Size() const { - CHECK_GE(this->ndim(), 0) << "Shape is unknown."; + CHECK(ndim_is_known(this->ndim())) << "Shape is unknown."; dim_t size = 1; const dim_t* start = begin(), *fin = end(); for (const dim_t* it = start; it != fin; ++it) { - CHECK_GE(*it, 0) << "Shape dim size cannot be a negative value " << *it; + CHECK(dim_size_is_known(*it)) << "Shape dim size cannot be a negative value " << *it; size *= *it; } return size; @@ -476,11 +489,14 @@ class TShape : public Tuple { * \param dimend end dimension */ inline size_t ProdShape(int dimstart, int dimend) const { - CHECK_GE(this->ndim(), 0) << "Shape is unknown."; + CHECK(ndim_is_known(this->ndim())) << "Shape is unknown."; + CHECK_GE(dimstart, 0) << "dimstart must be >= 0, while received " << dimstart; + CHECK_LE(dimend, this->ndim()) << "dimend must be <= " << this->ndim() + << ", while received " << dimend; dim_t num = 1; const dim_t *d = this->data(); for (int i = dimstart; i < dimend; ++i) { - CHECK_GE(d[i], 0) << "Shape dim size cannot be a negative value " << d[i]; + CHECK(dim_size_is_known(d[i])) << "Shape dim size must be known, while received " << d[i]; num *= d[i]; } return num; @@ -536,7 +552,7 @@ class TShape : public Tuple { */ inline mshadow::Shape<2> FlatTo2D(void) const { mshadow::Shape<2> s; - CHECK_GE(ndim(), 0); + CHECK(ndim_is_known(ndim())) << "shape must have a valid ndim"; if (ndim() == 0) return mshadow::Shape2(1, 1); const dim_t *d = this->data(); s.shape_[1] = d[ndim() - 1]; @@ -556,7 +572,7 @@ class TShape : public Tuple { inline mshadow::Shape<3> FlatTo3D(size_t axis_begin, size_t axis_end) const { CHECK(axis_end >= axis_begin); mshadow::Shape<3> s; - CHECK_GE(ndim(), 0); + CHECK(ndim_is_known(ndim())) << "shape must have a valid ndim"; if (ndim() == 0) return mshadow::Shape3(1, 1, 1); const dim_t *d = this->data(); s.shape_[0] = 1; @@ -615,23 +631,11 @@ class TShape : public Tuple { #endif }; -/*! brief check if a shape's ndim is known. */ -inline bool ndim_is_known(const int ndim) { - CHECK_GE(ndim, -1) << "shape ndim must be >= -1, while received " << ndim; - return ndim != -1; -} - /*! brief check if a shape's ndim is known. */ inline bool ndim_is_known(const TShape& x) { return ndim_is_known(x.ndim()); } -/*! brief check if a shape's dim size is known. */ -inline bool dim_size_is_known(const dim_t dim_size) { - CHECK_GE(dim_size, -1) << "shape dim size must be >= -1, while received " << dim_size; - return dim_size != -1; -} - /*! brief check if a shape's dim size is known. */ inline bool dim_size_is_known(const TShape& x, const int idx) { CHECK(idx >= 0 && idx < x.ndim()) From 69791f107c11e9b9cea7acad06c57c525444583b Mon Sep 17 00:00:00 2001 From: reminisce Date: Mon, 15 Apr 2019 10:53:53 -0700 Subject: [PATCH 29/32] Use just one scope to represent numpy compatibility --- python/mxnet/__init__.py | 2 +- python/mxnet/base.py | 19 +++++-------------- tests/python/gpu/test_operator_gpu.py | 4 ++-- tests/python/unittest/test_infer_shape.py | 2 +- tests/python/unittest/test_ndarray.py | 2 +- tests/python/unittest/test_operator.py | 12 ++++++------ 6 files changed, 16 insertions(+), 25 deletions(-) diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index 99f565ad309a..f4884d62fcd2 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -23,7 +23,7 @@ from .context import Context, current_context, cpu, gpu, cpu_pinned from . import engine -from .base import MXNetError, is_np_compat, set_np_compat, enable_np_compat, disable_np_compat +from .base import MXNetError, is_np_compat, set_np_compat, np_compat from . import base from . import contrib from . import ndarray diff --git a/python/mxnet/base.py b/python/mxnet/base.py index b056b73d308e..bd1eceacac66 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -30,7 +30,7 @@ from . import libinfo -__all__ = ['MXNetError', 'is_np_compat', 'set_np_compat', 'enable_np_compat', 'disable_np_compat'] +__all__ = ['MXNetError', 'is_np_compat', 'set_np_compat', 'np_compat'] #---------------------------- # library loading #---------------------------- @@ -791,13 +791,13 @@ def __exit__(self, ptype, value, trace): set_np_compat(self._prev_is_np_compat) -def enable_np_compat(): +def np_compat(active=True): """Returns a NumPy compatibility state scope to be used in 'with' statement and captures code that needs the compatibility. Example:: - with mx.enable_np_compat(): + with mx.np_compat(active=True): # A scalar tensor's shape is `()`, whose `ndim` is `0`. scalar = mx.nd.ones(shape=()) assert scalar.shape == () @@ -823,17 +823,8 @@ def enable_np_compat(): arg_shapes, out_shapes, _ = ret.infer_shape_partial() assert arg_shapes[0] is None assert out_shapes[0] is None - """ - return _NumpyCompatibilityStateScope(True) - - -def disable_np_compat(): - """Returns a state scope with NumPy-compatibility disabled to be used in 'with' statement - and captures code that does not need the compatibility. - - Example:: - with mx.disable_np_compat(): + with mx.np_compat(active=False): # 0 means unknown shape dimension size in the legacy shape definition. data = mx.sym.var("data", shape=(0, 2, 3)) ret = mx.sym.sin(data) @@ -849,4 +840,4 @@ def disable_np_compat(): assert arg_shapes[0] == () assert out_shapes[0] == () """ - return _NumpyCompatibilityStateScope(False) + return _NumpyCompatibilityStateScope(active) diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index cb1248e46a85..19fc1eca89ce 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -1963,14 +1963,14 @@ def check_proposal_consistency(op, batch_size, with_nms=False): # The following 2 functions launch 0-thread kernels, an error that should be caught and signaled. def kernel_error_check_imperative(): os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine' - with mx.enable_np_compat(): + with mx.np_compat(active=True): a = mx.nd.array([1,2,3],ctx=mx.gpu(0)) b = mx.nd.array([],ctx=mx.gpu(0)) c = (a / b).asnumpy() def kernel_error_check_symbolic(): os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine' - with mx.enable_np_compat(): + with mx.np_compat(active=True): a = mx.sym.Variable('a') b = mx.sym.Variable('b') c = a / b diff --git a/tests/python/unittest/test_infer_shape.py b/tests/python/unittest/test_infer_shape.py index e4a3e5a3d86f..612861bd8303 100644 --- a/tests/python/unittest/test_infer_shape.py +++ b/tests/python/unittest/test_infer_shape.py @@ -154,7 +154,7 @@ def test_shape_completely_unknown(): assert arg_shapes[0] == () assert out_shapes[0] == () - with mx.enable_np_compat(): + with mx.np_compat(): data = mx.sym.var("data") ret = mx.sym.sin(data) arg_shapes, out_shapes, _ = ret.infer_shape_partial() diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py index fb0cac737ee0..94777677354d 100644 --- a/tests/python/unittest/test_ndarray.py +++ b/tests/python/unittest/test_ndarray.py @@ -123,7 +123,7 @@ def test_ndarray_setitem(): # numpy assignment for empty axis for trivial_shape in [(), (1,), (1, 1), (1, 1, 1)]: if trivial_shape == tuple(): - with mx.enable_np_compat(): + with mx.np_compat(): x = mx.nd.zeros(trivial_shape) else: x = mx.nd.zeros(trivial_shape) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index c5523c5b2268..191f3eef6aeb 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -4403,7 +4403,7 @@ def test_invalid_reps(): assert_exception(mx.nd.tile, MXNetError, data, (1, 0, 3)) test_normal_case() - with mx.enable_np_compat(): + with mx.np_compat(): test_empty_tensor() test_empty_reps() test_tile_backward() @@ -4464,7 +4464,7 @@ def test_zero_depth(): test_normal_case(index_type=np.float64) test_normal_case(index_type=np.float32) test_normal_case(index_type=np.float16) - with mx.enable_np_compat(): + with mx.np_compat(): test_empty_indices() test_zero_depth() @@ -6850,7 +6850,7 @@ def check_slice_axis_partial_infer(data, axis, begin, end, expected_out_shape): check_slice_axis_partial_infer(var1, 0, 0, 5, (5, 0)) check_slice_axis_partial_infer(var1, 1, 0, 5, (10, 0)) - with mx.enable_np_compat(): + with mx.np_compat(): var1 = mx.sym.var(name="data", shape=(-1, 20)) check_slice_partial_infer(var1, (None, None), (None, 10), [], (-1, 10)) check_slice_partial_infer(var1, (None, None), (None, 10), (None, 2), (-1, 5)) @@ -7916,7 +7916,7 @@ def test_image_normalize(): def test_scalar_tensor_creation(): assertRaises(MXNetError, mx.nd.zeros, shape=()) assertRaises(MXNetError, mx.nd.ones, shape=()) - with mx.enable_np_compat(): + with mx.np_compat(): data_mx = mx.nd.ones(shape=()) data_np = np.ones((), dtype=data_mx.dtype) assert same(data_mx.asnumpy(), data_np) @@ -7926,7 +7926,7 @@ def test_scalar_tensor_creation(): def test_zero_size_tensor_creation(): assertRaises(MXNetError, mx.nd.zeros, shape=(0, 1, 3, 0)) assertRaises(MXNetError, mx.nd.ones, shape=(0, 1, 3, 0)) - with mx.enable_np_compat(): + with mx.np_compat(): data_mx = mx.nd.ones(shape=(0, 1, 0, 4)) data_np = np.ones(shape=data_mx.shape, dtype=data_mx.dtype) assert same(data_mx.asnumpy(), data_np) @@ -7934,7 +7934,7 @@ def test_zero_size_tensor_creation(): @with_seed() def test_concat_with_zero_size_tensor(): - with mx.enable_np_compat(): + with mx.np_compat(): data1 = mx.nd.ones((0, 8, 12)) data2 = mx.nd.ones((3, 8, 12)) data3 = mx.nd.ones((0, 8, 12)) From ab08f35271c99586a4a2553dfb219218513752ad Mon Sep 17 00:00:00 2001 From: reminisce Date: Mon, 15 Apr 2019 11:29:51 -0700 Subject: [PATCH 30/32] Add code comment to NumpyScope object in Scala --- .../core/src/main/scala/org/apache/mxnet/NumpyScope.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala b/scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala index ec366ea4029d..d3e76f1044a7 100644 --- a/scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala +++ b/scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala @@ -19,6 +19,11 @@ package org.apache.mxnet import org.apache.mxnet.Base._ +/** + * NumpyScope object provides util functions for turning on/off NumPy compatibility + * and checking whether NumPy compatibility has been turned on/off. NumPy compatibility + * is introduced first to support zero-dim and zero-size tensors as in NumPy. + */ object NumpyScope { def setNumpyCompatible(isNpComp: Boolean): Boolean = { val prev = new RefInt() From b0cb2f18cfd7f14dc37630e0625d5228c8a3cc17 Mon Sep 17 00:00:00 2001 From: reminisce Date: Mon, 15 Apr 2019 14:51:38 -0700 Subject: [PATCH 31/32] Add use_np_compat decorator --- python/mxnet/__init__.py | 2 +- python/mxnet/base.py | 41 ++++++++++++++++++++--- src/operator/tensor/broadcast_reduce_op.h | 4 +-- tests/python/unittest/test_operator.py | 32 ++++++++++++++++++ 4 files changed, 71 insertions(+), 8 deletions(-) diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index f4884d62fcd2..79eb1f10f427 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -23,7 +23,7 @@ from .context import Context, current_context, cpu, gpu, cpu_pinned from . import engine -from .base import MXNetError, is_np_compat, set_np_compat, np_compat +from .base import MXNetError, is_np_compat, set_np_compat, np_compat, use_np_compat from . import base from . import contrib from . import ndarray diff --git a/python/mxnet/base.py b/python/mxnet/base.py index bd1eceacac66..6799c44e72ac 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -27,10 +27,11 @@ import inspect import platform import numpy as _np +from functools import wraps from . import libinfo -__all__ = ['MXNetError', 'is_np_compat', 'set_np_compat', 'np_compat'] +__all__ = ['MXNetError', 'is_np_compat', 'set_np_compat', 'np_compat', 'use_np_compat'] #---------------------------- # library loading #---------------------------- @@ -736,13 +737,13 @@ def write_all_str(module_file, module_all_list): ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.c_void_p -def set_np_compat(flag): +def set_np_compat(active): """ Turns on/off NumPy compatibility. NumPy-compatibility is turned off by default in backend. Parameters ---------- - flag : bool + active : bool Indicates whether to turn on/off NumPy compatibility. Returns @@ -750,7 +751,7 @@ def set_np_compat(flag): A bool value indicating the previous state of NumPy compatibility. """ prev = ctypes.c_int() - check_call(_LIB.MXSetIsNumpyCompatible(ctypes.c_int(flag), ctypes.byref(prev))) + check_call(_LIB.MXSetIsNumpyCompatible(ctypes.c_int(active), ctypes.byref(prev))) return bool(prev.value) @@ -770,6 +771,7 @@ def is_np_compat(): class _NumpyCompatibilityStateScope(object): """Scope for managing numpy compatibility state. + Do not use this class directly. Use `np_compat(active)` instead. Example:: @@ -792,7 +794,7 @@ def __exit__(self, ptype, value, trace): def np_compat(active=True): - """Returns a NumPy compatibility state scope to be used in 'with' statement + """Returns an activated/deactivated NumPy compatibility state scope to be used in 'with' statement and captures code that needs the compatibility. Example:: @@ -841,3 +843,32 @@ def np_compat(active=True): assert out_shapes[0] == () """ return _NumpyCompatibilityStateScope(active) + + +def use_np_compat(func): + """Wraps a function with an activated NumPy-compatibility scope. This ensures + that the execution of the function is guaranteed with NumPy compatible semantics, + such as zero-dim and zero size tensors. + + Example:: + import mxnet as mx + @mx.use_np_compat + def scalar_one(): + return mx.nd.ones(()) + print(scalar_one()) + + Parameters + ---------- + func : a user-provided callable function to be scoped by the NumPy compatibility state. + + Returns + ------- + Function + A function for wrapping the user functions in the NumPy compatibility scope. + """ + @wraps(func) + def _with_np_compat(*args, **kwargs): + with np_compat(active=True): + return func(*args, **kwargs) + + return _with_np_compat diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h index 9392d4129406..fc51d8af0f01 100644 --- a/src/operator/tensor/broadcast_reduce_op.h +++ b/src/operator/tensor/broadcast_reduce_op.h @@ -887,7 +887,7 @@ void ReduceAxesBackwardUseInOutImpl(const OpContext& ctx, MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, OType, { mshadow::Shape<5> in_shape; mshadow::Shape<5> out_shape; - for (uint32_t i = 0; i < 5; ++i) { + for (int i = 0; i < 5; ++i) { if (i < dst_shape.ndim()) { in_shape[i] = src_shape[i]; out_shape[i] = dst_shape[i]; @@ -1227,7 +1227,7 @@ void LpNormGradCompute(const nnvm::NodeAttrs& attrs, Stream *s = ctx.get_stream(); mshadow::Shape<5> in_shape; mshadow::Shape<5> out_shape; - for (uint32_t i = 0; i < 5; ++i) { + for (int i = 0; i < 5; ++i) { if (i < dst_shape.ndim()) { in_shape[i] = src_shape[i]; out_shape[i] = dst_shape[i]; diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 191f3eef6aeb..a1754d1de544 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -7948,6 +7948,38 @@ def test_concat_with_zero_size_tensor(): assert ret.shape == (0, 12, 10) +@with_seed() +def test_np_compat_decorator(): + @mx.use_np_compat + def check_scalar_one(): + """Generate scalar one tensor""" + return mx.nd.ones(shape=()) + assert check_scalar_one.__name__ == "check_scalar_one" + assert check_scalar_one.__doc__ == "Generate scalar one tensor" + assert check_scalar_one().shape == () + for active in [True, False]: + with mx.np_compat(active=active): + assert check_scalar_one.__name__ == "check_scalar_one" + assert check_scalar_one.__doc__ == "Generate scalar one tensor" + assert check_scalar_one().shape == () + + @mx.use_np_compat + def check_concat(shape1, shape2, axis): + data1 = mx.nd.ones(shape1) + data2 = mx.nd.ones(shape2) + ret = mx.nd.Concat(data1, data2, dim=axis) + expected_ret = np.concatenate((data1.asnumpy(), data2.asnumpy()), axis=axis) + assert ret.shape == expected_ret.shape + + check_concat((0, 3, 4), (5, 3, 4), 0) + check_concat((8, 0, 5), (8, 7, 5), 1) + check_concat((8, 0, 0), (8, 0, 0), 2) + for active in [True, False]: + check_concat((0, 3, 4), (5, 3, 4), 0) + check_concat((8, 0, 5), (8, 7, 5), 1) + check_concat((8, 0, 0), (8, 0, 0), 2) + + if __name__ == '__main__': import nose nose.runmodule() From bd7c4fb8b7fc166a6a41d1095e03d42045af7d27 Mon Sep 17 00:00:00 2001 From: reminisce Date: Mon, 15 Apr 2019 15:18:02 -0700 Subject: [PATCH 32/32] Fix pylint --- python/mxnet/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 6799c44e72ac..58f222dc1e85 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -20,6 +20,7 @@ """ctypes library of mxnet and helper functions.""" from __future__ import absolute_import +from functools import wraps import atexit import ctypes import os @@ -27,7 +28,6 @@ import inspect import platform import numpy as _np -from functools import wraps from . import libinfo