From 708c933dec2ee382b3b06a847c7120f852a124b9 Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Wed, 6 Mar 2019 11:30:27 -0800
Subject: [PATCH 01/32] [numpy] Shape support scalar tensor (#14315)

* Support scalar and zero-size tensors with np.sum

* Add sanity check when ndim is set
---
 include/mxnet/tensor_blob.h                   |   1 -
 include/mxnet/tuple.h                         | 100 +++++++---
 python/mxnet/__init__.py                      |   1 +
 python/mxnet/base.py                          |  29 ++-
 python/mxnet/ndarray/__init__.py              |   2 +-
 python/mxnet/ndarray/numpy.py                 |  18 ++
 python/mxnet/numpy/__init__.py                |  20 ++
 python/mxnet/symbol/__init__.py               |   2 +-
 python/mxnet/symbol/numpy.py                  |  18 ++
 src/executor/graph_executor.cc                |   3 +-
 src/executor/infer_graph_attr_pass.cc         |   6 +-
 src/nnvm/plan_memory.cc                       |   6 +-
 src/operator/numpy/np_broadcast_reduce_op.h   | 186 ++++++++++++++++++
 .../numpy/np_broadcast_reduce_op_value.cc     |  61 ++++++
 .../numpy/np_broadcast_reduce_op_value.cu     |  36 ++++
 src/operator/operator_common.h                |  20 +-
 16 files changed, 466 insertions(+), 43 deletions(-)
 create mode 100644 python/mxnet/ndarray/numpy.py
 create mode 100644 python/mxnet/numpy/__init__.py
 create mode 100644 python/mxnet/symbol/numpy.py
 create mode 100644 src/operator/numpy/np_broadcast_reduce_op.h
 create mode 100644 src/operator/numpy/np_broadcast_reduce_op_value.cc
 create mode 100644 src/operator/numpy/np_broadcast_reduce_op_value.cu
diff --git a/include/mxnet/tensor_blob.h b/include/mxnet/tensor_blob.h
index 7d059025b03e..45d4c7fda639 100755
--- a/include/mxnet/tensor_blob.h
+++ b/include/mxnet/tensor_blob.h
@@ -198,7 +198,6 @@ class TBlob {
       << "Expected: " << type_flag_ << " v.s. given " << mshadow::DataType<DType>::kFlag;
     return mshadow::Tensor<Device, 2, DType>(static_cast<DType*>(dptr_),
                                              shape_.FlatTo2D(),
-                                             shape_[shape_.ndim() - 1],
                                              stream);
   }
   /*!
diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h
index 7c1367333630..39c3c185e3c0 100644
--- a/include/mxnet/tuple.h
+++ b/include/mxnet/tuple.h
@@ -17,7 +17,7 @@
  * under the License.
  */
 /*!
- *  Copyright (c) 2016 by Contributors
+ *  Copyright (c) 2019 by Contributors
  * \file mxnet/tuple.h
  * \brief Data structure Tuple and TShape to store dynamic sized shapes.
  */
@@ -39,11 +39,14 @@ namespace mxnet {
 
 /*!
  * \brief A dynamic sized array data structure that is optimized for storing
- *        small number of elements with same type.
+ * small number of elements with same type.
  *
  *  Data will be stored in stack when number of elements is small.
  *  It is suitable to hold shape of Tensor.
  *
+ *  The ndim of a valid tuple is an integer in range [0, inf).
+ *  ndim = 0 means the tuple is empty.
+ *
  * \tparam ValueType The type of data stored inside tuple.
  * \sa TShape
  */
@@ -61,7 +64,11 @@ class Tuple {
    * \param s the source tuple
    */
   inline Tuple(const Tuple<ValueType>& s) {
-    this->assign(s.begin(), s.end());
+    if (s.ndim() == -1) {
+      this->SetDim(-1);
+    } else {
+      this->assign(s.begin(), s.end());
+    }
   }
   /*!
    * \brief constructor from initializer list
@@ -106,6 +113,7 @@ class Tuple {
   inline void assign(RandomAccessIterator begin,
                      RandomAccessIterator end) {
     this->SetDim(end - begin);
+    CHECK_GE(ndim(), 0);
     std::copy(begin, end, this->begin());
   }
   /*!
@@ -124,7 +132,11 @@ class Tuple {
    * \return reference of self
    */
   inline Tuple<ValueType>& operator=(const Tuple<ValueType>& src) {
-    this->assign(src.begin(), src.end());
+    if (src.ndim() == -1) {
+      this->SetDim(-1);
+    } else {
+      this->assign(src.begin(), src.end());
+    }
     return *this;
   }
   /*!
@@ -151,6 +163,7 @@ class Tuple {
    */
   inline bool operator==(const Tuple<ValueType> &s) const {
     if (ndim_ != s.ndim_) return false;
+    if (ndim() == -1) return true;
     return std::equal(begin(), end(), s.begin());
   }
   /*!
@@ -177,7 +190,7 @@ class Tuple {
     return ndim_ <= kStackCache ? (data_stack_ + ndim_): (data_heap_ + ndim_);
   }
   /*! \return number of dimension of the tuple */
-  inline uint32_t ndim() const {
+  inline int ndim() const {
     return ndim_;
   }
   /*!
@@ -185,7 +198,8 @@ class Tuple {
    * \param i dimension index
    * \return the corresponding dimension size
    */
-  inline ValueType& operator[](size_t i) {
+  inline ValueType& operator[](int i) {
+    CHECK(i >= 0 && i < ndim());
     return begin()[i];
   }
   /*!
@@ -193,7 +207,8 @@ class Tuple {
    * \param i dimension index
    * \return the corresponding dimension size
    */
-  inline const ValueType& operator[](size_t i) const {
+  inline const ValueType& operator[](int i) const {
+    CHECK(i >= 0 && i < ndim());
     return begin()[i];
   }
   /*!
@@ -220,6 +235,10 @@ class Tuple {
    * \return the ostream
    */
   friend std::ostream &operator<<(std::ostream &os, const Tuple<ValueType> &t) {
+    if (t.ndim() == -1) {
+      os << "UNKNOWN_SHAPE";
+      return os;
+    }
     os << '[';
     const ValueType* begin = t.begin();
     const ValueType* end = t.end();
@@ -316,22 +335,27 @@ class Tuple {
 
  protected:
   // stack cache size
-  static const uint32_t kStackCache = 4;
+  static const int kStackCache = 4;
   /*! \brief number of dimension of the tuple */
-  uint32_t ndim_{0};
+  int ndim_{0};
   /*! \brief number of cells allocated in data_heap_ */
-  uint32_t num_heap_allocated_{0};
+  int num_heap_allocated_{0};
   /*! \brief in stack space used to store shape when it is small */
   ValueType data_stack_[kStackCache];
   /*! \brief space to store shape when dimension is big*/
   ValueType* data_heap_{nullptr};
   // internal function to change the dimension
-  inline void SetDim(uint32_t ndim) {
+  inline void SetDim(int ndim) {
+    CHECK_GE(ndim, -1) << "ndim cannot be less than -1, received " << ndim;
     if (ndim > kStackCache &&
         ndim > num_heap_allocated_) {
       delete [] data_heap_;
       data_heap_ = new ValueType[ndim];
       num_heap_allocated_ = ndim;
+    } else if (ndim == -1 && data_heap_ != nullptr) {
+      delete [] data_heap_;
+      data_heap_ = nullptr;
+      num_heap_allocated_ = 0;
     }
     ndim_ = ndim;
   }
@@ -339,25 +363,47 @@ class Tuple {
 
 /*!
  * \brief A Shape class that is used to represent shape of each tensor.
+ *
+ * The ndim of a valid shape is an integer in range [-1, inf).
+ * ndim = -1 means the shape information is unknown and need to be inferred.
+ * ndim = 0 means the tensor with the shape is a scalar.
+ *
+ * The dimension size of a valid shape is an integer in range [-1, inf).
+ * dim_size = -1 means the size of that dimension is unknown and need to be inferred.
+ * dim_size = 0 means that dimension is empty.
+ *
+ * The definition of ndim = 0 and dim_size = 0 is consistent with NumPy.
  */
 class TShape : public Tuple<dim_t> {
  public:
   /*! \brief default constructor */
-  TShape() = default;
+  TShape() {
+    this->SetDim(-1);
+  }
   /*!
    * constructor to construct a shape with all 1.
+   * TODO(junwu): The value should default to -1. Need to keep 1 for now
+   * for backward compatibility. Change it to -1 in the future when we can
+   * break backward compatibility.
    * \param ndim the number of dimension
+   * \param value the dimension size for all dims
    */
-  inline TShape(uint32_t ndim) {  // NOLINT(*)
+  inline TShape(int ndim, int value = 1) {  // NOLINT(*)
     this->SetDim(ndim);
-    std::fill_n(begin(), ndim, 1);
+    if (ndim > 0) {
+      std::fill_n(begin(), ndim, value);
+    }
   }
   /*!
    * \brief copy constructor of TShape
    * \param s source shape.
    */
   inline TShape(const Tuple<dim_t>& s) { // NOLINT(*)
-    this->assign(s.begin(), s.end());
+    if (s.ndim() == -1) {
+      this->SetDim(-1);
+    } else {
+      this->assign(s.begin(), s.end());
+    }
   }
   /*!
    * \brief constructor from initializer list
@@ -390,7 +436,11 @@ class TShape : public Tuple<dim_t> {
    * \return self.
    */
   inline TShape& operator=(const Tuple<dim_t>& src) {
-    this->assign(src.begin(), src.end());
+    if (src.ndim() == -1) {
+      this->SetDim(-1);
+    } else {
+      this->assign(src.begin(), src.end());
+    }
     return *this;
   }
   /*!
@@ -404,9 +454,11 @@ class TShape : public Tuple<dim_t> {
   }
   /*! \return total number of elements in the shape */
   inline size_t Size() const {
+    CHECK_GE(this->ndim(), 0) << "Shape is unknown.";
     dim_t size = 1;
     const dim_t* start = begin(), *fin = end();
     for (const dim_t* it = start; it != fin; ++it) {
+      CHECK_GE(*it, 0) << "Shape dim size cannot be -1, which means unknown.";
       size *= *it;
     }
     return size;
@@ -417,9 +469,11 @@ class TShape : public Tuple<dim_t> {
    * \param dimend end dimension
    */
   inline size_t ProdShape(int dimstart, int dimend) const {
+    CHECK_GE(this->ndim(), 0) << "Shape is unknown.";
     dim_t num = 1;
     const dim_t *d = this->data();
     for (int i = dimstart; i < dimend; ++i) {
+      CHECK_GE(d[i], 0) << "Shape dim size cannot be -1, which means unknown.";
       num *= d[i];
     }
     return num;
@@ -460,7 +514,7 @@ class TShape : public Tuple<dim_t> {
    */
   template<int dim>
   inline mshadow::Shape<dim> get() const {
-    CHECK_EQ(dim, static_cast<int>(ndim()))
+    CHECK_EQ(dim, ndim())
         << "dimension do not match target dimension " << dim << " vs " << ndim();
     const dim_t *d = this->data();
     mshadow::Shape<dim> s;
@@ -475,11 +529,12 @@ class TShape : public Tuple<dim_t> {
    */
   inline mshadow::Shape<2> FlatTo2D(void) const {
     mshadow::Shape<2> s;
-    if (ndim() == 0) return mshadow::Shape2(0, 0);
+    CHECK_GE(ndim(), 0);
+    if (ndim() == 0) return mshadow::Shape2(1, 1);
     const dim_t *d = this->data();
     s.shape_[1] = d[ndim() - 1];
     dim_t ymax = 1;
-    for (size_t i = 1; i < ndim(); ++i) {
+    for (int i = 1; i < ndim(); ++i) {
       ymax *= d[i - 1];
     }
     s.shape_[0] = ymax;
@@ -494,7 +549,8 @@ class TShape : public Tuple<dim_t> {
   inline mshadow::Shape<3> FlatTo3D(size_t axis_begin, size_t axis_end) const {
     CHECK(axis_end >= axis_begin);
     mshadow::Shape<3> s;
-    if (ndim() == 0) return mshadow::Shape3(0, 0, 0);
+    CHECK_GE(ndim(), 0);
+    if (ndim() == 0) return mshadow::Shape3(1, 1, 1);
     const dim_t *d = this->data();
     s.shape_[0] = 1;
     s.shape_[1] = 1;
@@ -506,7 +562,7 @@ class TShape : public Tuple<dim_t> {
     for (size_t i = axis_begin; i <= axis_end; ++i) {
       s.shape_[1] *= d[i];
     }
-    for (size_t i = axis_end + 1; i < ndim(); ++i) {
+    for (int i = axis_end + 1; i < ndim(); ++i) {
       s.shape_[2] *= d[i];
     }
     return s;
@@ -627,7 +683,7 @@ struct hash<mxnet::TShape> {
   size_t operator()(const mxnet::TShape& val) const {
     std::hash<uint32_t> hash_uint;
     size_t res = hash_uint(val.ndim());
-    for (uint32_t i = 0; i < val.ndim(); ++i) {
+    for (int i = 0; i < val.ndim(); ++i) {
       res = dmlc::HashCombine(res, val[i]);
     }
     return res;
diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py
index 374a3b50bbb5..8db83a286157 100644
--- a/python/mxnet/__init__.py
+++ b/python/mxnet/__init__.py
@@ -25,6 +25,7 @@
 from . import engine
 from .base import MXNetError
 from . import base
+from . import numpy
 from . import contrib
 from . import ndarray
 from . import ndarray as nd
diff --git a/python/mxnet/base.py b/python/mxnet/base.py
index feb4d70b6533..7793deacf44c 100644
--- a/python/mxnet/base.py
+++ b/python/mxnet/base.py
@@ -26,7 +26,7 @@
 import sys
 import inspect
 import platform
-import numpy as np
+import numpy as _np
 
 from . import libinfo
 
@@ -44,8 +44,8 @@
     long = int
 # pylint: enable=pointless-statement
 
-integer_types = (int, long, np.int32, np.int64)
-numeric_types = (float, int, long, np.generic)
+integer_types = (int, long, _np.int32, _np.int64)
+numeric_types = (float, int, long, _np.generic)
 string_types = basestring,
 
 if sys.version_info[0] > 2:
@@ -216,7 +216,7 @@ def _load_lib():
 mx_uint = ctypes.c_uint
 mx_float = ctypes.c_float
 mx_float_p = ctypes.POINTER(mx_float)
-mx_real_t = np.float32
+mx_real_t = _np.float32
 NDArrayHandle = ctypes.c_void_p
 FunctionHandle = ctypes.c_void_p
 OpHandle = ctypes.c_void_p
@@ -455,7 +455,7 @@ def ctypes2numpy_shared(cptr, shape):
     for s in shape:
         size *= s
     dbuffer = (mx_float * size).from_address(ctypes.addressof(cptr.contents))
-    return np.frombuffer(dbuffer, dtype=np.float32).reshape(shape)
+    return _np.frombuffer(dbuffer, dtype=_np.float32).reshape(shape)
 
 
 def build_param_doc(arg_names, arg_types, arg_descs, remove_dup=True):
@@ -560,7 +560,7 @@ def _as_list(obj):
         return [obj]
 
 
-_OP_NAME_PREFIX_LIST = ['_contrib_', '_linalg_', '_sparse_', '_image_', '_random_']
+_OP_NAME_PREFIX_LIST = ['_contrib_', '_linalg_', '_sparse_', '_image_', '_random_', '_numpy_']
 
 
 def _get_op_name_prefix(op_name):
@@ -606,6 +606,13 @@ def _init_op_module(root_namespace, module_name, make_op_func):
     # use mx.nd.contrib or mx.sym.contrib from now on
     contrib_module_name_old = "%s.contrib.%s" % (root_namespace, module_name)
     contrib_module_old = sys.modules[contrib_module_name_old]
+    # special handling of registering numpy ops
+    if module_name == 'ndarray':
+        numpy_module_name = "%s.numpy" % root_namespace
+        numpy_module = sys.modules[numpy_module_name]
+    else:
+        numpy_module_name = None
+        numpy_module = None
     submodule_dict = {}
     for op_name_prefix in _OP_NAME_PREFIX_LIST:
         submodule_dict[op_name_prefix] =\
@@ -644,6 +651,16 @@ def _init_op_module(root_namespace, module_name, make_op_func):
             function.__module__ = contrib_module_name_old
             setattr(contrib_module_old, function.__name__, function)
             contrib_module_old.__all__.append(function.__name__)
+        elif op_name_prefix == '_numpy_' and numpy_module_name is not None:
+            # only register numpy ops under mxnet.numpy in imperative mode
+            hdl = OpHandle()
+            check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl)))
+            # TODO(reminisce): Didn't consider third level module here, e.g. mxnet.numpy.random.
+            func_name = name[len(op_name_prefix):]
+            function = make_op_func(hdl, name, func_name)
+            function.__module__ = numpy_module_name
+            setattr(numpy_module, function.__name__, function)
+            numpy_module.__all__.append(function.__name__)
 
 
 def _generate_op_module_signature(root_namespace, module_name, op_code_gen_func):
diff --git a/python/mxnet/ndarray/__init__.py b/python/mxnet/ndarray/__init__.py
index f09908e894d5..a102399521cc 100644
--- a/python/mxnet/ndarray/__init__.py
+++ b/python/mxnet/ndarray/__init__.py
@@ -17,7 +17,7 @@
 
 """NDArray API of MXNet."""
 
-from . import _internal, contrib, linalg, op, random, sparse, utils, image, ndarray
+from . import _internal, contrib, linalg, op, random, sparse, utils, image, ndarray, numpy
 # pylint: disable=wildcard-import, redefined-builtin
 try:
     from .gen_op import * # pylint: disable=unused-wildcard-import
diff --git a/python/mxnet/ndarray/numpy.py b/python/mxnet/ndarray/numpy.py
new file mode 100644
index 000000000000..0826ac8aca7f
--- /dev/null
+++ b/python/mxnet/ndarray/numpy.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+__all__ = []
diff --git a/python/mxnet/numpy/__init__.py b/python/mxnet/numpy/__init__.py
new file mode 100644
index 000000000000..b1139a05791d
--- /dev/null
+++ b/python/mxnet/numpy/__init__.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+__all__ = []
diff --git a/python/mxnet/symbol/__init__.py b/python/mxnet/symbol/__init__.py
index f438e4954aa9..326e4f5aff78 100644
--- a/python/mxnet/symbol/__init__.py
+++ b/python/mxnet/symbol/__init__.py
@@ -17,7 +17,7 @@
 
 """Symbol API of MXNet."""
 
-from . import _internal, contrib, linalg, op, random, sparse, image, symbol
+from . import _internal, contrib, linalg, op, random, sparse, image, symbol, numpy
 # pylint: disable=wildcard-import, redefined-builtin
 try:
     from .gen_op import * # pylint: disable=unused-wildcard-import
diff --git a/python/mxnet/symbol/numpy.py b/python/mxnet/symbol/numpy.py
new file mode 100644
index 000000000000..0826ac8aca7f
--- /dev/null
+++ b/python/mxnet/symbol/numpy.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+__all__ = []
diff --git a/src/executor/graph_executor.cc b/src/executor/graph_executor.cc
index 460cec371bd4..9e1b66d1b286 100644
--- a/src/executor/graph_executor.cc
+++ b/src/executor/graph_executor.cc
@@ -34,6 +34,7 @@
 #include "../common/utils.h"
 #include "../common/exec_utils.h"
 #include "../operator/subgraph/subgraph_property.h"
+#include "../operator/operator_common.h"
 
 namespace mxnet {
 namespace exec {
@@ -966,7 +967,7 @@ void GraphExecutor::InitDataEntryMemory(std::vector<NDArray>* shared_pool) {
     uint32_t oid = head_grad_map_.at(idx[nid].source);
     uint32_t eid = idx.entry_id(idx.outputs()[oid]);
     NDArrayStorageType stype = (NDArrayStorageType) vstorage_type[eid];
-    CHECK_NE(vshape[eid].ndim(), 0U);
+    CHECK(mxnet::op::shape_is_known(vshape[eid]));
     CHECK_NE(vdtype[eid], -1);
     auto data_eid = idx.entry_id(nid, 0);
     // initialize based on storage_type
diff --git a/src/executor/infer_graph_attr_pass.cc b/src/executor/infer_graph_attr_pass.cc
index 6a7fde62c2cf..aa72661e78b2 100644
--- a/src/executor/infer_graph_attr_pass.cc
+++ b/src/executor/infer_graph_attr_pass.cc
@@ -648,14 +648,14 @@ nnvm::Graph InferShape(nnvm::Graph&& graph,
       std::move(graph), mxnet::TShape(),
       "FInferShape", "shape_inputs", "shape_attr_key",
       "shape", "shape_num_unknown_nodes",
-      [](const mxnet::TShape& s) { return s.ndim() == 0 || s.Size() == 0; },
+      [](const mxnet::TShape& s) { return !mxnet::op::shape_is_known(s); },
       [](const mxnet::TShape& s) {
-        if (s.ndim() == 0) {  // TODO(reminisce): Usage of ndim
+        if (s.ndim() == -1) {
           return static_cast<size_t>(1);
         }
         size_t ret = 0;
         for (const auto& val : s) {
-          if (val == 0) {
+          if (val == -1) {
             ++ret;
           }
         }
diff --git a/src/nnvm/plan_memory.cc b/src/nnvm/plan_memory.cc
index 2b18f990c845..0dc7e6ddb1d9 100644
--- a/src/nnvm/plan_memory.cc
+++ b/src/nnvm/plan_memory.cc
@@ -30,6 +30,7 @@
 #include <mxnet/base.h>
 #include <memory>
 #include "graph_algorithm.h"
+#include "../operator/operator_common.h"
 
 namespace nnvm {
 namespace pass {
@@ -75,7 +76,7 @@ class GraphAllocator {
 
   // request a free storage
   StorageID Request(int dev_id, int dtype, mxnet::TShape shape, uint32_t node_id) {
-    if (shape.ndim() == 0) return kBadStorageID;
+    if (!mxnet::op::shape_is_known(shape)) return kBadStorageID;
     // search memory block in [size / match_range_, size * match_range_)
     // TODO(tqchen) add size of the dtype, assume 4 bytes for now
     size_t size = shape.Size() * 4;
@@ -267,8 +268,7 @@ size_t AllocMemory(const Graph& ret, const IndexedGraph& idx,
       // only request memory for kBadStorageID
       if (storage[eid] == GraphAllocator::kBadStorageID) {
         auto &eshape = shape_vec[eid];
-        size_t esize = 0;
-        if (eshape.ndim() != 0) esize = eshape.Size();
+        size_t esize = eshape.Size();
         eids.insert(std::make_pair(esize, eid));
       }
     }
diff --git a/src/operator/numpy/np_broadcast_reduce_op.h b/src/operator/numpy/np_broadcast_reduce_op.h
new file mode 100644
index 000000000000..bb2b7fca231c
--- /dev/null
+++ b/src/operator/numpy/np_broadcast_reduce_op.h
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2015 by Contributors
+ * \file broadcast_reduce_op.h
+ * \brief Function definition of broadcast and reduce operators
+ */
+#ifndef MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_
+#define MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_
+
+#include "../tensor/broadcast_reduce_op.h"
+
+namespace mxnet {
+namespace op {
+
+struct NumpyReduceAxesParam : public dmlc::Parameter<NumpyReduceAxesParam> {
+  dmlc::optional<nnvm::Tuple<int>> axis;
+  dmlc::optional<int> dtype;
+  bool keepdims;
+  dmlc::optional<double> initial;
+  DMLC_DECLARE_PARAMETER(NumpyReduceAxesParam) {
+    DMLC_DECLARE_FIELD(axis).set_default(dmlc::optional<nnvm::Tuple<int>>())
+      .describe(R"code()code");
+    DMLC_DECLARE_FIELD(dtype).set_default(dmlc::optional<int>())
+      .describe("");
+    DMLC_DECLARE_FIELD(keepdims).set_default(false)
+      .describe("If this is set to `True`, the reduced axes are left "
+                "in the result as dimension with size one.");
+  }
+};
+
+inline TShape NumpyReduceAxesShapeImpl(const TShape& ishape,
+                                       const dmlc::optional<nnvm::Tuple<int>>& axis,
+                                       bool keepdims) {
+  // TODO(junwu): improve the logic
+  // If input is a scalar, output should be a scalar too
+  if (ishape.ndim() == 0) {
+    if (axis.has_value()) {
+      const nnvm::Tuple<int>& axes = axis.value();
+      if (axes.ndim() > 0) {
+        CHECK_EQ(axes.ndim(), 1);
+        CHECK(axes[0] == 0 || axes[0] == -1);
+      }
+    }
+    return TShape(0);
+  }
+
+  // axis=None, do global reduction
+  if (!axis.has_value()) {
+    if (keepdims) {
+      return TShape(ishape.ndim(), 1);
+    } else {
+      return TShape(0);
+    }
+  }
+
+  // axis = (), will return identity(input)
+  if (axis.value().ndim() == 0) {
+    return ishape;
+  }
+
+  // axis has value
+  nnvm::Tuple<int> axes(axis.value());
+  for (index_t i = 0; i < axes.ndim(); i++) {
+    if (axes[i] < 0) {
+      axes[i] += ishape.ndim();
+    }
+  }
+  std::sort(axes.begin(), axes.end());
+
+  for (index_t i = 1; i < axes.ndim(); i++) {
+    CHECK_LT(axes[i-1], axes[i])
+        << "Reduction axes have duplicates "
+        << axes;
+  }
+  CHECK_LT(axes[axes.ndim()-1], ishape.ndim())
+      << "Reduction axis " << axes[axes.ndim()-1]
+      << " Exceeds input dimensions " << ishape;
+  CHECK_GE(axes[0], 0)
+      << "Reduction axis " << axis.value()
+      << " Exceeds input dimensions " << ishape;
+
+  TShape oshape;
+  if (keepdims) {
+    oshape = TShape(ishape);
+  } else {
+    oshape = TShape(ishape.ndim() - axes.ndim());
+  }
+
+  if (keepdims) {
+    for (index_t i = 0; i < axes.ndim(); ++i) {
+      oshape[axes[i]] = 1;
+    }
+  } else {
+    for (index_t i = 0, j = 0, k = 0; i < ishape.ndim(); ++i) {
+      if (j < axes.ndim() && i == axes[j]) {
+        ++j;
+        continue;
+      }
+      oshape[k++] = ishape[i];
+    }
+  }
+  return oshape;
+}
+
+inline bool NumpyReduceAxesShape(const nnvm::NodeAttrs& attrs,
+                                 std::vector<TShape> *in_attrs,
+                                 std::vector<TShape> *out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+  if (!shape_is_known(in_attrs->at(0))) {
+    return false;
+  }
+  const NumpyReduceAxesParam& param = nnvm::get<NumpyReduceAxesParam>(attrs.parsed);
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0,
+                     NumpyReduceAxesShapeImpl((*in_attrs)[0], param.axis, param.keepdims));
+  return shape_is_known(out_attrs->at(0));
+}
+
+template<typename xpu, typename reducer, bool normalize = false,
+         typename OP = op::mshadow_op::identity>
+void NumpyReduceAxesCompute(const nnvm::NodeAttrs& attrs,
+                            const OpContext& ctx,
+                            const std::vector<TBlob>& inputs,
+                            const std::vector<OpReqType>& req,
+                            const std::vector<TBlob>& outputs) {
+  const NumpyReduceAxesParam& param = nnvm::get<NumpyReduceAxesParam>(attrs.parsed);
+  if (param.axis.has_value() && param.axis.value().ndim() == 0) {
+    UnaryOp::IdentityCompute<xpu>(attrs, ctx, inputs, req, outputs);
+  }
+  TShape small;
+  if (param.keepdims) {
+    small = outputs[0].shape_;
+  } else {
+    small = NumpyReduceAxesShapeImpl(inputs[0].shape_, param.axis, true);
+  }
+
+  ReduceAxesComputeImpl<xpu, reducer, normalize, OP>(ctx, inputs, req, outputs, small);
+}
+
+template<typename xpu, bool normalize = false>
+inline void NumpyReduceAxesBackwardUseNone(const nnvm::NodeAttrs& attrs,
+                                           const OpContext& ctx,
+                                           const std::vector<TBlob>& inputs,
+                                           const std::vector<OpReqType>& req,
+                                           const std::vector<TBlob>& outputs) {
+  using namespace mshadow;
+  using namespace mshadow::expr;
+  const NumpyReduceAxesParam& param = nnvm::get<NumpyReduceAxesParam>(attrs.parsed);
+  TShape small;
+  if (param.keepdims) {
+    small = inputs[0].shape_;
+  } else {
+    small = NumpyReduceAxesShapeImpl(outputs[0].shape_, param.axis, true);
+  }
+
+  BroadcastComputeImpl<xpu>(attrs, ctx, inputs, req, outputs, small);
+  if (normalize) {
+    Stream<xpu> *s = ctx.get_stream<xpu>();
+    MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+      Tensor<xpu, 1, DType> igrad = outputs[0].FlatTo1D<xpu, DType>(s);
+      igrad /= scalar<DType>(outputs[0].Size()/inputs[0].Size());
+    });
+  }
+}
+
+}  // namespace op
+}  // namespace mxnet
+#endif  // MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_
diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cc b/src/operator/numpy/np_broadcast_reduce_op_value.cc
new file mode 100644
index 000000000000..c028e2368737
--- /dev/null
+++ b/src/operator/numpy/np_broadcast_reduce_op_value.cc
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_reduce_op_value.cc
+ * \brief CPU Implementation of broadcast and reduce functions based on value.
+ */
+
+#include "np_broadcast_reduce_op.h"
+
+namespace mxnet {
+namespace op {
+
+DMLC_REGISTER_PARAMETER(NumpyReduceAxesParam);
+
+NNVM_REGISTER_OP(_numpy_sum)
+.describe(R"code()code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<NumpyReduceAxesParam>)
+.set_attr<mxnet::FInferShape>("FInferShape", NumpyReduceAxesShape)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<nnvm::FListInputNames>("FListInputNames",
+  [](const NodeAttrs& attrs) {
+    return std::vector<std::string>{"a"};
+  })
+.add_argument("a", "NDArray-or-Symbol", "The input")
+.add_arguments(NumpyReduceAxesParam::__FIELDS__())
+.set_attr<FCompute>("FCompute<cpu>", NumpyReduceAxesCompute<cpu, mshadow::red::sum>)
+.set_attr<FResourceRequest>("FResourceRequest",
+  [](const NodeAttrs& attrs) {
+    return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+  })
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_numpy_sum"});
+
+NNVM_REGISTER_OP(_backward_numpy_sum)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<NumpyReduceAxesParam>)
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_num_inputs(1)
+.set_attr<FCompute>("FCompute<cpu>", NumpyReduceAxesBackwardUseNone<cpu>);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cu b/src/operator/numpy/np_broadcast_reduce_op_value.cu
new file mode 100644
index 000000000000..c975b18226db
--- /dev/null
+++ b/src/operator/numpy/np_broadcast_reduce_op_value.cu
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_reduce_op_value.cu
+ * \brief GPU Implementation of reduce functions based on value.
+ */
+#include "np_broadcast_reduce_op.h"
+
+namespace mxnet {
+namespace op {
+NNVM_REGISTER_OP(_numpy_sum)
+.set_attr<FCompute>("FCompute<gpu>", NumpyReduceAxesCompute<gpu, mshadow::red::sum>);
+
+NNVM_REGISTER_OP(_backward_numpy_sum)
+.set_attr<FCompute>("FCompute<gpu>", NumpyReduceAxesBackwardUseNone<gpu>);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h
index f629534dabd0..a461d2bc4cef 100644
--- a/src/operator/operator_common.h
+++ b/src/operator/operator_common.h
@@ -108,6 +108,16 @@ inline bool shape_is_none(const mxnet::TShape& x) {
   return x.ndim() == 0 || x.Size() == 0;
 }
 
+/*! brief check if shape is known using the NumPy compatible definition.
+ * zero-dim and zero-size tensors are valid. -1 means unknown.*/
+inline bool shape_is_known(const TShape& x) {
+  if (x.ndim() == -1) return false;
+  for (int i = 0; i < x.ndim(); ++i) {
+    if (x[i] == -1) return false;
+  }
+  return true;
+}
+
 /*! \brief check if type is none (-1) */
 inline bool type_is_none(const int& x) {
   return x == -1;
@@ -159,16 +169,16 @@ inline std::string type_string(const int& x) {
  * \return whether x and y are compatible.
  */
 inline bool shape_assign(mxnet::TShape *y, const mxnet::TShape& x) {
-  if (y->ndim() == 0) {
+  if (y->ndim() == -1) {
     *y = x;
     return true;
   } else if (y->ndim() != x.ndim()) {
-    return x.ndim() == 0;
+    return x.ndim() == -1;
   } else {
-    for (size_t i = 0; i < y->ndim(); ++i) {
-      if ((*y)[i] == 0) {
+    for (int i = 0; i < y->ndim(); ++i) {
+      if ((*y)[i] == -1) {
         (*y)[i] = x[i];
-      } else if ((*y)[i] != x[i] && x[i] != 0) {
+      } else if ((*y)[i] != x[i] && x[i] >= 0) {
         return false;
       }
     }

From a297a9ab98c4dd56a2d1d7922058207336fdefef Mon Sep 17 00:00:00 2001
From: Junru Shao <junrushao1994@gmail.com>
Date: Fri, 15 Mar 2019 12:56:04 -0700
Subject: [PATCH 02/32] [Numpy] Change semantics of ndim for operators in
 `src/operator/contrib` (#14409)

* Initial commit

* Address comments
---
 src/operator/contrib/adamw-inl.h              |  5 +--
 .../contrib/adaptive_avg_pooling-inl.h        |  6 ++--
 src/operator/contrib/bilinear_resize-inl.h    |  2 +-
 src/operator/contrib/boolean_mask.cc          |  2 +-
 src/operator/contrib/bounding_box-inl.h       |  4 ++-
 src/operator/contrib/count_sketch-inl.h       |  2 +-
 .../contrib/deformable_convolution-inl.h      | 14 ++++----
 src/operator/contrib/dgl_graph.cc             | 32 ++++++-------------
 src/operator/contrib/fft-inl.h                |  2 +-
 src/operator/contrib/ifft-inl.h               |  2 +-
 src/operator/contrib/index_copy-inl.h         |  3 +-
 src/operator/contrib/multi_proposal-inl.h     |  2 +-
 src/operator/contrib/nnvm_to_onnx.cc          |  3 +-
 src/operator/contrib/optimizer_op.cc          |  2 +-
 src/operator/contrib/proposal-inl.h           |  2 +-
 src/operator/contrib/quadratic_op-inl.h       |  2 +-
 src/operator/contrib/sync_batch_norm-inl.h    |  2 +-
 src/operator/contrib/transformer-inl.h        |  4 ++-
 18 files changed, 41 insertions(+), 50 deletions(-)

diff --git a/src/operator/contrib/adamw-inl.h b/src/operator/contrib/adamw-inl.h
index 07feaefe87aa..6ae9e46b7def 100644
--- a/src/operator/contrib/adamw-inl.h
+++ b/src/operator/contrib/adamw-inl.h
@@ -87,8 +87,9 @@ inline bool MPUpdateInferShape(const nnvm::NodeAttrs& attrs,
                                mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), static_cast<size_t>(total_in)) << " in operator " << attrs.name;
   CHECK_EQ(out_attrs->size(), static_cast<size_t>(n_out)) << " in operator " << attrs.name;
-  // rescale_grad.shape = (1,)
-  SHAPE_ASSIGN_CHECK(*in_attrs, total_in - 1, mshadow::Shape1(1));
+  // rescale_grad.shape = ()
+  SHAPE_ASSIGN_CHECK(*in_attrs, total_in - 1, mxnet::TShape());
+  // TODO(@reminisce): change "none" behavior in ElemwiseAttr
   return ElemwiseAttr<mxnet::TShape, shape_is_none, shape_assign, true, shape_string, n_in, n_out>(
       attrs, in_attrs, out_attrs, mxnet::TShape());
 }
diff --git a/src/operator/contrib/adaptive_avg_pooling-inl.h b/src/operator/contrib/adaptive_avg_pooling-inl.h
index 0d66de0a5692..eedab78db0c5 100644
--- a/src/operator/contrib/adaptive_avg_pooling-inl.h
+++ b/src/operator/contrib/adaptive_avg_pooling-inl.h
@@ -48,9 +48,9 @@ namespace mxnet {
 namespace op {
 
 struct AdaptiveAvgPoolParam : public dmlc::Parameter<AdaptiveAvgPoolParam> {
-  mxnet::TShape output_size;
+  mxnet::Tuple<int> output_size;
   DMLC_DECLARE_PARAMETER(AdaptiveAvgPoolParam) {
-    DMLC_DECLARE_FIELD(output_size).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(output_size).set_default(mxnet::Tuple<int>())
     .describe("int (output size) or a tuple of int for output (height, width).");
   }
 };
@@ -125,7 +125,7 @@ static bool AdaptiveAvgPoolOpInferShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_shape->size(), 1U) << "Output:[data]";
   const AdaptiveAvgPoolParam& param = nnvm::get<AdaptiveAvgPoolParam>(attrs.parsed);
   mxnet::TShape dshape(in_shape->at(0));
-  if (dshape.ndim() == 0) return false;
+  if (mxnet::op::shape_is_none(dshape)) return false;
   if (param.output_size.ndim() == 0) {
     dshape[2] = 1;
     dshape[3] = 1;
diff --git a/src/operator/contrib/bilinear_resize-inl.h b/src/operator/contrib/bilinear_resize-inl.h
index 46c8e1aa7c0d..ce9c6c83504c 100644
--- a/src/operator/contrib/bilinear_resize-inl.h
+++ b/src/operator/contrib/bilinear_resize-inl.h
@@ -134,7 +134,7 @@ static bool BilinearSampleOpInferShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_shape->size(), 1U) << "Output:[data]";
   const BilinearSampleParam& param = nnvm::get<BilinearSampleParam>(attrs.parsed);
   mxnet::TShape dshape(in_shape->at(0));
-  if (dshape.ndim() == 0) return false;
+  if (mxnet::op::shape_is_none(dshape)) return false;
   if (param.scale_height.has_value()) {
     dshape[2] = static_cast<int>(param.scale_height.value() * in_shape->at(0)[2]);
   } else {
diff --git a/src/operator/contrib/boolean_mask.cc b/src/operator/contrib/boolean_mask.cc
index e22c493d5e2c..06d8439e23a0 100644
--- a/src/operator/contrib/boolean_mask.cc
+++ b/src/operator/contrib/boolean_mask.cc
@@ -121,7 +121,7 @@ inline void BooleanMaskForward<cpu>(const nnvm::NodeAttrs& attrs,
   const NDArray &out = outputs[0];
   CHECK_EQ(axis, 0) << "Not supported yet";
   CHECK_EQ(data.shape()[axis], idx.shape()[0]);
-  CHECK_EQ(idx.shape().ndim(), 1U);
+  CHECK_EQ(idx.shape().ndim(), 1U);  // idx is required to be 1-d.
   // count the number of 1s in `idx`, so that we could know the output dimension
   size_t idx_size = idx.shape()[0];
   std::vector<int32_t> prefix_sum(idx_size, 0);
diff --git a/src/operator/contrib/bounding_box-inl.h b/src/operator/contrib/bounding_box-inl.h
index 37c4297ff49d..059327ef8334 100644
--- a/src/operator/contrib/bounding_box-inl.h
+++ b/src/operator/contrib/bounding_box-inl.h
@@ -94,7 +94,9 @@ inline bool BoxNMSShape(const nnvm::NodeAttrs& attrs,
   const BoxNMSParam& param = nnvm::get<BoxNMSParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 2U);
-  if (in_attrs->at(0).ndim() == 0U && out_attrs->at(0).ndim() == 0U) {
+  // TODO(@junrushao1994): verify with Joshua Z. Zhang about this operator
+  if (mxnet::op::shape_is_none(in_attrs->at(0))
+   && mxnet::op::shape_is_none(out_attrs->at(0))) {
     return false;
   }
 
diff --git a/src/operator/contrib/count_sketch-inl.h b/src/operator/contrib/count_sketch-inl.h
index f3a294f6ad46..3ea93e63d6fc 100644
--- a/src/operator/contrib/count_sketch-inl.h
+++ b/src/operator/contrib/count_sketch-inl.h
@@ -151,7 +151,7 @@ class CountSketchProp : public OperatorProperty {
     CHECK_EQ(in_shape->size(), 3) <<"Input:[data, h, s]";
     const mxnet::TShape &dshape = (*in_shape)[CountSketch::kData];
     // require data to be known
-    if (dshape.ndim() == 0) return false;
+    if (mxnet::op::shape_is_none(dshape)) return false;
 
     out_shape->clear();
     if (dshape.ndim() == 4) {
diff --git a/src/operator/contrib/deformable_convolution-inl.h b/src/operator/contrib/deformable_convolution-inl.h
index f50641fca6d6..3e96cad1c859 100644
--- a/src/operator/contrib/deformable_convolution-inl.h
+++ b/src/operator/contrib/deformable_convolution-inl.h
@@ -69,11 +69,11 @@ struct DeformableConvolutionParam : public dmlc::Parameter<DeformableConvolution
   dmlc::optional<int> layout;
   DMLC_DECLARE_PARAMETER(DeformableConvolutionParam) {
     DMLC_DECLARE_FIELD(kernel).describe("Convolution kernel size: (h, w) or (d, h, w)");
-    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0))
       .describe("Convolution stride: (h, w) or (d, h, w). Defaults to 1 for each dimension.");
-    DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0))
       .describe("Convolution dilate: (h, w) or (d, h, w). Defaults to 1 for each dimension.");
-    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0))
       .describe("Zero pad for convolution: (h, w) or (d, h, w). Defaults to no padding.");
     DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000)
       .describe("Convolution filter(channel) number");
@@ -347,9 +347,9 @@ class DeformableConvolutionProp : public OperatorProperty {
     param_.Init(kwargs);
     if (param_.kernel.ndim() == 2) {
       param_.layout = param_.layout ? param_.layout.value() : mshadow::kNCHW;
-      if (param_.stride.ndim() == 0) param_.stride = Shape2(1, 1);
-      if (param_.dilate.ndim() == 0) param_.dilate = Shape2(1, 1);
-      if (param_.pad.ndim() == 0) param_.pad = Shape2(0, 0);
+      if (mxnet::op::shape_is_none(param_.stride)) param_.stride = Shape2(1, 1);
+      if (mxnet::op::shape_is_none(param_.dilate)) param_.dilate = Shape2(1, 1);
+      if (mxnet::op::shape_is_none(param_.pad)) param_.pad = Shape2(0, 0);
     } else {
       LOG(FATAL) << "not implemented";
     }
@@ -371,7 +371,7 @@ class DeformableConvolutionProp : public OperatorProperty {
     out_shape->resize(1, mxnet::TShape());
     const mxnet::TShape &dshp = (*in_shape)[conv::kData];
     const mxnet::TShape &oshp = (*in_shape)[conv::kOffset];
-    if (dshp.ndim() == 0) return false;
+    if (mxnet::op::shape_is_none(dshp)) return false;
     if (param_.kernel.ndim() == 2) {
       // 2d conv
       CHECK_EQ(dshp.ndim(), 4U) \
diff --git a/src/operator/contrib/dgl_graph.cc b/src/operator/contrib/dgl_graph.cc
index f19af84ce9c6..02ef2cee1caa 100644
--- a/src/operator/contrib/dgl_graph.cc
+++ b/src/operator/contrib/dgl_graph.cc
@@ -265,9 +265,7 @@ static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs,
   out_shape[0] = params.max_num_vertices + 1;
   for (size_t i = 0; i < num_subgraphs; i++) {
     SHAPE_ASSIGN_CHECK(*out_attrs, i, out_shape);
-    success = success &&
-              out_attrs->at(i).ndim() != 0U &&
-              out_attrs->at(i).Size() != 0U;
+    success = success && !mxnet::op::shape_is_none(out_attrs->at(i));
   }
   // sub_csr
   mxnet::TShape out_csr_shape(2);
@@ -275,18 +273,14 @@ static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs,
   out_csr_shape[1] = in_attrs->at(0)[1];
   for (size_t i = 0; i < num_subgraphs; i++) {
     SHAPE_ASSIGN_CHECK(*out_attrs, i + num_subgraphs, out_csr_shape);
-    success = success &&
-              out_attrs->at(i + num_subgraphs).ndim() != 0U &&
-              out_attrs->at(i + num_subgraphs).Size() != 0U;
+    success = success && !mxnet::op::shape_is_none(out_attrs->at(i + num_subgraphs));
   }
   // sub_layer
   mxnet::TShape out_layer_shape(1);
   out_layer_shape[0] = params.max_num_vertices;
   for (size_t i = 0; i < num_subgraphs; i++) {
     SHAPE_ASSIGN_CHECK(*out_attrs, i + 2*num_subgraphs, out_layer_shape);
-    success = success &&
-              out_attrs->at(i + 2*num_subgraphs).ndim() != 0U &&
-              out_attrs->at(i + 2*num_subgraphs).Size() != 0U;
+    success = success && !mxnet::op::shape_is_none(out_attrs->at(i + 2 * num_subgraphs));
   }
 
   return success;
@@ -323,9 +317,7 @@ static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs,
   out_shape[0] = params.max_num_vertices + 1;
   for (size_t i = 0; i < num_subgraphs; i++) {
     SHAPE_ASSIGN_CHECK(*out_attrs, i, out_shape);
-    success = success &&
-              out_attrs->at(i).ndim() != 0U &&
-              out_attrs->at(i).Size() != 0U;
+    success = success && !mxnet::op::shape_is_none(out_attrs->at(i));
   }
   // sub_csr
   mxnet::TShape out_csr_shape(2);
@@ -333,27 +325,21 @@ static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs,
   out_csr_shape[1] = in_attrs->at(0)[1];
   for (size_t i = 0; i < num_subgraphs; i++) {
     SHAPE_ASSIGN_CHECK(*out_attrs, i + num_subgraphs, out_csr_shape);
-    success = success &&
-              out_attrs->at(i + num_subgraphs).ndim() != 0U &&
-              out_attrs->at(i + num_subgraphs).Size() != 0U;
+    success = success && !mxnet::op::shape_is_none(out_attrs->at(i + num_subgraphs));
   }
   // sub_probability
   mxnet::TShape out_prob_shape(1);
   out_prob_shape[0] = params.max_num_vertices;
   for (size_t i = 0; i < num_subgraphs; i++) {
     SHAPE_ASSIGN_CHECK(*out_attrs, i + 2*num_subgraphs, out_prob_shape);
-    success = success &&
-              out_attrs->at(i + 2*num_subgraphs).ndim() != 0U &&
-              out_attrs->at(i + 2*num_subgraphs).Size() != 0U;
+    success = success && !mxnet::op::shape_is_none(out_attrs->at(i + 2 * num_subgraphs));
   }
   // sub_layer
   mxnet::TShape out_layer_shape(1);
   out_layer_shape[0] = params.max_num_vertices;
   for (size_t i = 0; i < num_subgraphs; i++) {
     SHAPE_ASSIGN_CHECK(*out_attrs, i + 3*num_subgraphs, out_prob_shape);
-    success = success &&
-              out_attrs->at(i + 3*num_subgraphs).ndim() != 0U &&
-              out_attrs->at(i + 3*num_subgraphs).Size() != 0U;
+    success = success && !mxnet::op::shape_is_none(out_attrs->at(i + 3 * num_subgraphs));
   }
 
   return success;
@@ -1199,7 +1185,7 @@ inline bool EdgeIDShape(const nnvm::NodeAttrs& attrs,
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(1));
   SHAPE_ASSIGN_CHECK(*in_attrs, 1, out_attrs->at(0));
   SHAPE_ASSIGN_CHECK(*in_attrs, 2, out_attrs->at(0));
-  return out_attrs->at(0).ndim() != 0U && out_attrs->at(0).Size() != 0U;
+  return !mxnet::op::shape_is_none(out_attrs->at(0));
 }
 
 inline bool EdgeIDType(const nnvm::NodeAttrs& attrs,
@@ -1357,7 +1343,7 @@ inline bool DGLAdjacencyShape(const nnvm::NodeAttrs& attrs,
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
   SHAPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0));
-  return out_attrs->at(0).ndim() != 0U && out_attrs->at(0).Size() != 0U;
+  return !mxnet::op::shape_is_none(out_attrs->at(0));
 }
 
 inline bool DGLAdjacencyType(const nnvm::NodeAttrs& attrs,
diff --git a/src/operator/contrib/fft-inl.h b/src/operator/contrib/fft-inl.h
index 247f6290c02a..a5471b4ba2e2 100644
--- a/src/operator/contrib/fft-inl.h
+++ b/src/operator/contrib/fft-inl.h
@@ -241,7 +241,7 @@ class FFTProp : public OperatorProperty {
     CHECK_EQ(in_shape->size(), 1) <<"Input:[data]";
     const mxnet::TShape &dshape = (*in_shape)[fft::kData];
     // require data to be known
-    if (dshape.ndim() == 0) return false;
+    if (mxnet::op::shape_is_none(dshape)) return false;
 
     out_shape->clear();
     if (dshape.ndim() == 4) {
diff --git a/src/operator/contrib/ifft-inl.h b/src/operator/contrib/ifft-inl.h
index e53c0f60fa9e..7d8422e838b1 100644
--- a/src/operator/contrib/ifft-inl.h
+++ b/src/operator/contrib/ifft-inl.h
@@ -231,7 +231,7 @@ class IFFTProp : public OperatorProperty {
     CHECK_EQ(in_shape->size(), 1) <<"Input:[data]";
     const mxnet::TShape &dshape = (*in_shape)[ifft::kData];
     // require data to be known
-    if (dshape.ndim() == 0) return false;
+    if (mxnet::op::shape_is_none(dshape)) return false;
 
     out_shape->clear();
     if (dshape.ndim() == 4) {
diff --git a/src/operator/contrib/index_copy-inl.h b/src/operator/contrib/index_copy-inl.h
index 903dee13272b..35f88916da20 100644
--- a/src/operator/contrib/index_copy-inl.h
+++ b/src/operator/contrib/index_copy-inl.h
@@ -76,8 +76,7 @@ inline bool IndexCopyShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->at(1)[0], in_attrs->at(2)[0]);
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
   SHAPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0));
-  return out_attrs->at(0).ndim() != 0U &&
-         out_attrs->at(0).Size() != 0U;
+  return !mxnet::op::shape_is_none(out_attrs->at(0));
 }
 
 }  // namespace op
diff --git a/src/operator/contrib/multi_proposal-inl.h b/src/operator/contrib/multi_proposal-inl.h
index 4b9a41c2fa87..a9afb8e4114e 100644
--- a/src/operator/contrib/multi_proposal-inl.h
+++ b/src/operator/contrib/multi_proposal-inl.h
@@ -108,7 +108,7 @@ class MultiProposalProp : public OperatorProperty {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 3) << "Input:[cls_prob, bbox_pred, im_info]";
     const mxnet::TShape &dshape = in_shape->at(proposal::kClsProb);
-    if (dshape.ndim() == 0) return false;
+    if (!mxnet::op::shape_is_none(dshape)) return false;
     Shape<4> bbox_pred_shape;
     bbox_pred_shape = Shape4(dshape[0], dshape[1] * 2, dshape[2], dshape[3]);
     SHAPE_ASSIGN_CHECK(*in_shape, proposal::kBBoxPred,
diff --git a/src/operator/contrib/nnvm_to_onnx.cc b/src/operator/contrib/nnvm_to_onnx.cc
index 0417a085616a..0c8bd79490e3 100644
--- a/src/operator/contrib/nnvm_to_onnx.cc
+++ b/src/operator/contrib/nnvm_to_onnx.cc
@@ -417,7 +417,8 @@ std::unordered_map<std::string, mxnet::TShape> GetPlaceholderShapes(
   for (uint32_t i = 0; i < shape_inputs.size(); ++i) {
     std::string name = ig[ig.input_nodes()[i]].source->attrs.name;
     mxnet::TShape shp = shape_inputs[i];
-    if (shp.ndim() > 0) {
+    if (!mxnet::op::shape_is_none(shp)) {
+      // TODO(@reminisce): confirm
       placeholder_shapes.emplace(name, shp);
     }
   }
diff --git a/src/operator/contrib/optimizer_op.cc b/src/operator/contrib/optimizer_op.cc
index 9f948bad81b6..83bbcdab833d 100644
--- a/src/operator/contrib/optimizer_op.cc
+++ b/src/operator/contrib/optimizer_op.cc
@@ -45,7 +45,7 @@ inline bool GroupAdagradShape(const nnvm::NodeAttrs &attrs,
   SHAPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0));
   SHAPE_ASSIGN_CHECK(*in_attrs, 1, out_attrs->at(0));
 
-  return out_attrs->at(0).ndim() != 0U && out_attrs->at(0).Size() != 0U &&
+  return !mxnet::op::shape_is_none(out_attrs->at(0)) &&
          (in_attrs->at(0)[0] == in_attrs->at(1)[0]) &&
          (in_attrs->at(0)[0] == in_attrs->at(2)[0]);
 }
diff --git a/src/operator/contrib/proposal-inl.h b/src/operator/contrib/proposal-inl.h
index 9908ca96ec5f..21e9fe198e63 100644
--- a/src/operator/contrib/proposal-inl.h
+++ b/src/operator/contrib/proposal-inl.h
@@ -106,7 +106,7 @@ class ProposalProp : public OperatorProperty {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 3) << "Input:[cls_prob, bbox_pred, im_info]";
     const mxnet::TShape &dshape = in_shape->at(proposal::kClsProb);
-    if (dshape.ndim() == 0) return false;
+    if (mxnet::op::shape_is_none(dshape)) return false;
     Shape<4> bbox_pred_shape;
     bbox_pred_shape = Shape4(dshape[0], dshape[1] * 2, dshape[2], dshape[3]);
     SHAPE_ASSIGN_CHECK(*in_shape, proposal::kBBoxPred,
diff --git a/src/operator/contrib/quadratic_op-inl.h b/src/operator/contrib/quadratic_op-inl.h
index e679fedc8e57..a7aca63de17a 100644
--- a/src/operator/contrib/quadratic_op-inl.h
+++ b/src/operator/contrib/quadratic_op-inl.h
@@ -60,7 +60,7 @@ inline bool QuadraticOpShape(const nnvm::NodeAttrs& attrs,
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
   SHAPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0));
-  return out_attrs->at(0).ndim() != 0U && out_attrs->at(0).Size() != 0U;
+  return !mxnet::op::shape_is_none(out_attrs->at(0));
 }
 
 inline bool QuadraticOpType(const nnvm::NodeAttrs& attrs,
diff --git a/src/operator/contrib/sync_batch_norm-inl.h b/src/operator/contrib/sync_batch_norm-inl.h
index 1e6ab25db0e2..cd1a3285fe06 100644
--- a/src/operator/contrib/sync_batch_norm-inl.h
+++ b/src/operator/contrib/sync_batch_norm-inl.h
@@ -482,7 +482,7 @@ class SyncBatchNormProp : public OperatorProperty {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 3U) << "Input:[data, gamma, beta]";
     const mxnet::TShape &dshape = in_shape->at(0);
-    if (dshape.ndim() == 0) return false;
+    if (mxnet::op::shape_is_none(dshape)) return false;
     in_shape->at(1) = mxnet::TShape(Shape1(dshape[1]));
     in_shape->at(2) = mxnet::TShape(Shape1(dshape[1]));
     out_shape->clear();
diff --git a/src/operator/contrib/transformer-inl.h b/src/operator/contrib/transformer-inl.h
index 01faf244aff9..da3d14e33cf4 100644
--- a/src/operator/contrib/transformer-inl.h
+++ b/src/operator/contrib/transformer-inl.h
@@ -41,7 +41,9 @@ static void DivSqrtDimForward_(const nnvm::NodeAttrs& attrs,
                   const std::vector<OpReqType>& req,
                   const std::vector<TBlob>& outputs) {
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
-  double sqrt_dim = std::sqrt(static_cast<double>(inputs[0].shape_[inputs[0].ndim() - 1]));
+  CHECK_GE(inputs[0].ndim(), 1);
+  int last_idx = inputs[0].ndim() - 1;
+  double sqrt_dim = std::sqrt(static_cast<double>(inputs[0].shape_[last_idx]));
   MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
     MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
       mxnet_op::Kernel<mxnet_op::op_with_req<mshadow_op::div, Req>, xpu>::Launch(

From 1179a5992ab0c5c4ae26b741522856c6eab3c190 Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Sun, 17 Mar 2019 21:26:12 -0700
Subject: [PATCH 03/32] [WIP] Use new shape definition (#14453)

* Init checkin

* Fix ndarray alloc bug

* Use TShape(0) as default empty tuple params

* Fix bugs

* Fix TShape init value

* Fix infer shape pass shape type and reshape infer shape func
---
 include/mxnet/c_api.h                         |  40 +--
 include/mxnet/ndarray.h                       |   9 +-
 include/mxnet/tuple.h                         |  37 ++-
 python/mxnet/base.py                          |   1 +
 python/mxnet/executor.py                      |   6 +-
 python/mxnet/ndarray/ndarray.py               |  10 +-
 python/mxnet/symbol/symbol.py                 |  20 +-
 src/c_api/c_api.cc                            |  15 +-
 src/c_api/c_api_common.h                      |  21 +-
 src/c_api/c_api_executor.cc                   |   4 +-
 src/c_api/c_api_symbolic.cc                   |  28 +-
 src/common/exec_utils.h                       |   4 +-
 src/executor/graph_executor.cc                |   2 +-
 src/executor/infer_graph_attr_pass.cc         |   2 +-
 src/io/image_io.cc                            |   4 +-
 src/ndarray/ndarray.cc                        |   2 +-
 src/nnvm/plan_memory.cc                       |   2 +-
 src/operator/batch_norm_v1-inl.h              |   2 +-
 src/operator/bilinear_sampler-inl.h           |   4 +-
 src/operator/contrib/bounding_box-inl.h       |   8 +-
 .../contrib/deformable_convolution-inl.h      |   4 +-
 src/operator/contrib/dgl_graph.cc             |  32 +-
 src/operator/contrib/multibox_detection-inl.h |   2 +-
 src/operator/contrib/multibox_prior-inl.h     |   4 +-
 src/operator/control_flow.cc                  | 112 +++----
 src/operator/convolution_v1-inl.h             |   8 +-
 src/operator/image/image_random-inl.h         |   4 +-
 src/operator/image/resize-inl.h               |   4 +-
 src/operator/leaky_relu-inl.h                 |   2 +-
 src/operator/loss_binary_op-inl.h             |   2 +-
 src/operator/nn/batch_norm.cc                 |   2 +-
 src/operator/nn/concat.cc                     |  25 +-
 src/operator/nn/convolution-inl.h             |  18 +-
 src/operator/nn/convolution.cc                |   2 +-
 src/operator/nn/ctc_loss-inl.h                |   2 +-
 src/operator/nn/cudnn/cudnn_batch_norm.cc     |   2 +-
 src/operator/nn/cudnn/cudnn_convolution-inl.h |   4 +-
 .../nn/cudnn/cudnn_deconvolution-inl.h        |   4 +-
 src/operator/nn/deconvolution-inl.h           |  10 +-
 src/operator/nn/deconvolution.cc              |   2 +-
 src/operator/nn/dropout-inl.h                 |   2 +-
 src/operator/nn/dropout.cc                    |   4 +-
 src/operator/nn/fully_connected.cc            |   4 +-
 src/operator/nn/im2col.h                      |   4 +-
 src/operator/nn/layer_norm-inl.h              |   2 +-
 src/operator/nn/layer_norm.cc                 |   6 +-
 src/operator/nn/lrn.cc                        |   2 +-
 src/operator/nn/pooling-inl.h                 |  14 +-
 src/operator/nn/pooling.cc                    |   6 +-
 src/operator/nn/upsampling.cc                 |   2 +-
 src/operator/numpy/np_broadcast_reduce_op.h   |   6 +-
 src/operator/operator_common.h                |  19 +-
 src/operator/operator_util.cc                 |   2 +-
 src/operator/quantization/dequantize-inl.h    |   2 +-
 src/operator/quantization/quantize-inl.h      |   2 +-
 src/operator/quantization/quantized_concat.cc |   2 +-
 .../quantization/quantized_flatten-inl.h      |   6 +-
 .../quantization/quantized_fully_connected.cc |   2 +-
 .../quantization/quantized_pooling.cc         |   4 +-
 src/operator/random/multisample_op.h          |   2 +-
 src/operator/random/sample_multinomial_op.h   |  18 +-
 src/operator/random/unique_sample_op.h        |   2 +-
 src/operator/regression_output-inl.h          |   2 +-
 src/operator/sequence_last-inl.h              |   2 +-
 src/operator/slice_channel-inl.h              |   4 +-
 src/operator/softmax_output-inl.h             |  12 +-
 src/operator/softmax_output.cc                |  12 +-
 src/operator/spatial_transformer-inl.h        |   4 +-
 src/operator/subgraph_op_common.cc            |   4 +-
 src/operator/subgraph_op_common.h             |  12 +-
 src/operator/svm_output-inl.h                 |   6 +-
 src/operator/swapaxis-inl.h                   |   4 +-
 src/operator/tensor/broadcast_reduce_op.h     |  58 ++--
 src/operator/tensor/diag_op-inl.h             |  10 +-
 src/operator/tensor/dot-inl.h                 |  12 +-
 .../tensor/elemwise_binary_broadcast_op.h     |  42 +--
 .../tensor/elemwise_unary_op_basic.cc         |  10 +-
 src/operator/tensor/histogram-inl.h           |   6 +-
 src/operator/tensor/indexing_op.h             |  29 +-
 src/operator/tensor/init_op.h                 |   6 +-
 src/operator/tensor/la_op.h                   |   2 +-
 src/operator/tensor/matrix_op-inl.h           | 299 +++++++++---------
 src/operator/tensor/matrix_op.cc              |   4 +-
 src/operator/tensor/ordering_op-inl.h         |   2 +-
 src/operator/tensor/slice-inl.h               |   6 +-
 tests/python/unittest/test_operator.py        |   2 +-
 86 files changed, 573 insertions(+), 556 deletions(-)

diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h
index 2f9d74dc5ba0..cdc20b1e3970 100644
--- a/include/mxnet/c_api.h
+++ b/include/mxnet/c_api.h
@@ -775,8 +775,8 @@ MXNET_DLL int MXNDArrayReshape64(NDArrayHandle handle,
  * \return 0 when success, -1 when failure happens
  */
 MXNET_DLL int MXNDArrayGetShape(NDArrayHandle handle,
-                                mx_uint *out_dim,
-                                const mx_uint **out_pdata);
+                                int *out_dim,
+                                const int **out_pdata);
 /*!
  * \brief get the content of the data in NDArray
  * \param handle the handle to the ndarray
@@ -1493,16 +1493,16 @@ MXNET_DLL int MXSymbolInferShape(SymbolHandle sym,
                                  mx_uint num_args,
                                  const char** keys,
                                  const mx_uint *arg_ind_ptr,
-                                 const mx_uint *arg_shape_data,
+                                 const int *arg_shape_data,
                                  mx_uint *in_shape_size,
-                                 const mx_uint **in_shape_ndim,
-                                 const mx_uint ***in_shape_data,
+                                 const int **in_shape_ndim,
+                                 const int ***in_shape_data,
                                  mx_uint *out_shape_size,
-                                 const mx_uint **out_shape_ndim,
-                                 const mx_uint ***out_shape_data,
+                                 const int **out_shape_ndim,
+                                 const int ***out_shape_data,
                                  mx_uint *aux_shape_size,
-                                 const mx_uint **aux_shape_ndim,
-                                 const mx_uint ***aux_shape_data,
+                                 const int **aux_shape_ndim,
+                                 const int ***aux_shape_data,
                                  int *complete);
 /*!
  * \brief partially infer shape of unknown input shapes given the known one.
@@ -1532,16 +1532,16 @@ MXNET_DLL int MXSymbolInferShapePartial(SymbolHandle sym,
                                         mx_uint num_args,
                                         const char** keys,
                                         const mx_uint *arg_ind_ptr,
-                                        const mx_uint *arg_shape_data,
+                                        const int *arg_shape_data,
                                         mx_uint *in_shape_size,
-                                        const mx_uint **in_shape_ndim,
-                                        const mx_uint ***in_shape_data,
+                                        const int **in_shape_ndim,
+                                        const int ***in_shape_data,
                                         mx_uint *out_shape_size,
-                                        const mx_uint **out_shape_ndim,
-                                        const mx_uint ***out_shape_data,
+                                        const int **out_shape_ndim,
+                                        const int ***out_shape_data,
                                         mx_uint *aux_shape_size,
-                                        const mx_uint **aux_shape_ndim,
-                                        const mx_uint ***aux_shape_data,
+                                        const int **aux_shape_ndim,
+                                        const int ***aux_shape_data,
                                         int *complete);
 
 /*!
@@ -1820,7 +1820,7 @@ MXNET_DLL int MXExecutorSimpleBind(SymbolHandle symbol_handle,
                                    const char** provided_grad_req_types,
                                    const mx_uint num_provided_arg_shapes,
                                    const char** provided_arg_shape_names,
-                                   const mx_uint* provided_arg_shape_data,
+                                   const int* provided_arg_shape_data,
                                    const mx_uint* provided_arg_shape_idx,
                                    const mx_uint num_provided_arg_dtypes,
                                    const char** provided_arg_dtype_names,
@@ -1874,7 +1874,7 @@ MXNET_DLL int MXExecutorReshape(int partial_shaping,
                                 const int* map_dev_ids,
                                 const mx_uint num_provided_arg_shapes,
                                 const char** provided_arg_shape_names,
-                                const mx_uint* provided_arg_shape_data,
+                                const int* provided_arg_shape_data,
                                 const mx_uint* provided_arg_shape_idx,
                                 mx_uint* num_in_args,
                                 NDArrayHandle** in_args,
@@ -2550,8 +2550,8 @@ MXNET_DLL int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid,
  * \param dtype data type of NDArray
  * \param out constructed NDArray
  */
-MXNET_DLL int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const mx_uint *shape,
-                                           mx_uint ndim, int dtype, NDArrayHandle *out);
+MXNET_DLL int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const int *shape,
+                                           int ndim, int dtype, NDArrayHandle *out);
 
 /*!
   * \brief Push an asynchronous operation to the engine.
diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
index d00cb479b92e..2232ebe7be40 100644
--- a/include/mxnet/ndarray.h
+++ b/include/mxnet/ndarray.h
@@ -859,12 +859,15 @@ class NDArray {
     Chunk(mxnet::TShape shape, Context ctx_, bool delay_alloc_, int dtype)
         : static_data(false), delay_alloc(true), ctx(ctx_),
           storage_ref_(Storage::_GetSharedRef()) {
-      auto size = shape.Size();
       storage_shape = shape;
+      if (shape_is_known(storage_shape)) {
+        shandle.size = shape.Size() * mshadow::mshadow_sizeof(dtype);
+      }
       var = Engine::Get()->NewVariable();
-      shandle.size = size * mshadow::mshadow_sizeof(dtype);
       shandle.ctx = ctx_;
-      if (!delay_alloc_) this->CheckAndAlloc();
+      if (!delay_alloc_) {
+        this->CheckAndAlloc();
+      }
     }
 
     Chunk(const TBlob &data, int dev_id)
diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h
index 39c3c185e3c0..49852f73fbac 100644
--- a/include/mxnet/tuple.h
+++ b/include/mxnet/tuple.h
@@ -199,7 +199,7 @@ class Tuple {
    * \return the corresponding dimension size
    */
   inline ValueType& operator[](int i) {
-    CHECK(i >= 0 && i < ndim());
+    CHECK(i >= 0 && i < ndim()) << "index = " << i << " must be in range [0, " << ndim() << ")";
     return begin()[i];
   }
   /*!
@@ -208,7 +208,7 @@ class Tuple {
    * \return the corresponding dimension size
    */
   inline const ValueType& operator[](int i) const {
-    CHECK(i >= 0 && i < ndim());
+    CHECK(i >= 0 && i < ndim()) << "index = " << i << " must be in range [0, " << ndim() << ")";
     return begin()[i];
   }
   /*!
@@ -271,14 +271,16 @@ class Tuple {
       if (!isspace(ch)) {
         is.setstate(std::ios::failbit);
         return is;
+      }
     }
-    }
-    // Handle empty tuple
+    // Handle empty tuple. A tensor whose shape is an empty tuple
+    // represents a scalar with ndim = 0.
     while (isspace(is.peek())) {
       is.get();
     }
     if (is.peek() == ')' || is.peek() == ']') {
       is.get();
+      t.SetDim(0);
       return is;
     }
     // Handle non-empty tuple
@@ -352,7 +354,7 @@ class Tuple {
       delete [] data_heap_;
       data_heap_ = new ValueType[ndim];
       num_heap_allocated_ = ndim;
-    } else if (ndim == -1 && data_heap_ != nullptr) {
+    } else if (ndim <= 0 && data_heap_ != nullptr) {
       delete [] data_heap_;
       data_heap_ = nullptr;
       num_heap_allocated_ = 0;
@@ -381,14 +383,11 @@ class TShape : public Tuple<dim_t> {
     this->SetDim(-1);
   }
   /*!
-   * constructor to construct a shape with all 1.
-   * TODO(junwu): The value should default to -1. Need to keep 1 for now
-   * for backward compatibility. Change it to -1 in the future when we can
-   * break backward compatibility.
+   * constructor to construct a shape with all `value`.
    * \param ndim the number of dimension
    * \param value the dimension size for all dims
    */
-  inline TShape(int ndim, int value = 1) {  // NOLINT(*)
+  inline TShape(int ndim, int value = -1) {  // NOLINT(*)
     this->SetDim(ndim);
     if (ndim > 0) {
       std::fill_n(begin(), ndim, value);
@@ -458,7 +457,7 @@ class TShape : public Tuple<dim_t> {
     dim_t size = 1;
     const dim_t* start = begin(), *fin = end();
     for (const dim_t* it = start; it != fin; ++it) {
-      CHECK_GE(*it, 0) << "Shape dim size cannot be -1, which means unknown.";
+      CHECK_GE(*it, 0) << "Shape dim size cannot be a negative value " << *it;
       size *= *it;
     }
     return size;
@@ -473,7 +472,7 @@ class TShape : public Tuple<dim_t> {
     dim_t num = 1;
     const dim_t *d = this->data();
     for (int i = dimstart; i < dimend; ++i) {
-      CHECK_GE(d[i], 0) << "Shape dim size cannot be -1, which means unknown.";
+      CHECK_GE(d[i], 0) << "Shape dim size cannot be a negative value " << d[i];
       num *= d[i];
     }
     return num;
@@ -608,6 +607,16 @@ class TShape : public Tuple<dim_t> {
 #endif
 };
 
+/*! brief check if shape is known using the NumPy compatible definition.
+ * zero-dim and zero-size tensors are valid. -1 means unknown.*/
+inline bool shape_is_known(const TShape& x) {
+  if (x.ndim() == -1) return false;
+  for (int i = 0; i < x.ndim(); ++i) {
+    if (x[i] == -1) return false;
+  }
+  return true;
+}
+
 /*! \brief helper function to cast type of container elements */
 template<typename SrcIter, typename DstIter>
 inline DstIter ShapeTypeCast(const SrcIter begin,
@@ -623,7 +632,7 @@ inline DstIter ShapeTypeCast(const SrcIter begin,
 template<typename SrcIter>
 inline TShape ShapeTypeCast(const SrcIter begin, const SrcIter end) {
   size_t ndim = std::distance(begin, end);
-  TShape res(ndim);
+  TShape res(ndim, -1);
   ShapeTypeCast(begin, end, res.begin());
   return res;
 }
@@ -669,7 +678,7 @@ struct hash<mxnet::Tuple<T> > {
   size_t operator()(const mxnet::Tuple<T>& val) const {
     std::hash<uint32_t> hash_uint;
     size_t res = hash_uint(val.ndim());
-    for (uint32_t i = 0; i < val.ndim(); ++i) {
+    for (int i = 0; i < val.ndim(); ++i) {
       res = dmlc::HashCombine(res, val[i]);
     }
     return res;
diff --git a/python/mxnet/base.py b/python/mxnet/base.py
index 7793deacf44c..fe1dd00f9454 100644
--- a/python/mxnet/base.py
+++ b/python/mxnet/base.py
@@ -213,6 +213,7 @@ def _load_lib():
 _LIB = _load_lib()
 
 # type definitions
+mx_int = ctypes.c_int
 mx_uint = ctypes.c_uint
 mx_float = ctypes.c_float
 mx_float_p = ctypes.POINTER(mx_float)
diff --git a/python/mxnet/executor.py b/python/mxnet/executor.py
index 7bf867579d6b..53ddc252d6b5 100644
--- a/python/mxnet/executor.py
+++ b/python/mxnet/executor.py
@@ -25,7 +25,7 @@
 import copy
 import numpy as np
 from .base import _LIB
-from .base import mx_uint, NDArrayHandle, ExecutorHandle, py_str
+from .base import mx_uint, NDArrayHandle, ExecutorHandle, py_str, mx_int
 from .base import check_call, c_handle_array, c_array_buf, c_str_array
 from .ndarray import NDArray
 from .ndarray import _ndarray_cls
@@ -445,8 +445,8 @@ def reshape(self, partial_shaping=False, allow_up_sizing=False, **kwargs):
                                                       py_array('i', ctx_map_dev_ids)),
                                           mx_uint(len(provided_arg_shape_names)),
                                           c_str_array(provided_arg_shape_names),
-                                          c_array_buf(mx_uint,
-                                                      py_array('I', provided_arg_shape_data)),
+                                          c_array_buf(mx_int,
+                                                      py_array('i', provided_arg_shape_data)),
                                           c_array_buf(mx_uint,
                                                       py_array('I', provided_arg_shape_idx)),
                                           ctypes.byref(num_in_args),
diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index 87f2712d8a40..1200c4a72c00 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -35,7 +35,7 @@
 import numpy as np
 from ..base import _LIB, numeric_types, integer_types
 from ..base import c_str, c_array, c_array_buf, c_handle_array, mx_real_t
-from ..base import mx_uint, NDArrayHandle, check_call, DLPackHandle
+from ..base import mx_uint, NDArrayHandle, check_call, DLPackHandle, mx_int
 from ..base import ctypes2buffer
 from ..context import Context, current_context
 from . import _internal
@@ -146,8 +146,8 @@ def _new_from_shared_mem(shared_pid, shared_id, shape, dtype):
     check_call(_LIB.MXNDArrayCreateFromSharedMem(
         ctypes.c_int(shared_pid),
         ctypes.c_int(shared_id),
-        c_array(mx_uint, shape),
-        mx_uint(len(shape)),
+        c_array(mx_int, shape),
+        mx_int(len(shape)),
         ctypes.c_int(int(_DTYPE_NP_TO_MX[np.dtype(dtype).type])),
         ctypes.byref(hdl)))
     return hdl
@@ -1845,8 +1845,8 @@ def shape(self):
         >>> y.shape
         (2L, 3L, 4L)
         """
-        ndim = mx_uint()
-        pdata = ctypes.POINTER(mx_uint)()
+        ndim = mx_int()
+        pdata = ctypes.POINTER(mx_int)()
         check_call(_LIB.MXNDArrayGetShape(
             self.handle, ctypes.byref(ndim), ctypes.byref(pdata)))
         return tuple(pdata[:ndim.value]) # pylint: disable=invalid-slice-index
diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py
index 91d4ca16df07..b9432d372bc7 100644
--- a/python/mxnet/symbol/symbol.py
+++ b/python/mxnet/symbol/symbol.py
@@ -34,7 +34,7 @@
 
 from ..attribute import AttrScope
 from ..base import _LIB, numeric_types, c_array, c_array_buf, c_str, c_str_array, c_handle_array
-from ..base import mx_uint, py_str, string_types, integer_types
+from ..base import mx_uint, py_str, string_types, integer_types, mx_int
 from ..base import NDArrayHandle, ExecutorHandle, SymbolHandle
 from ..base import check_call, MXNetError, NotImplementedForSymbol
 from ..context import Context, current_context
@@ -1174,14 +1174,14 @@ def _infer_shape_impl(self, partial, *args, **kwargs):
                 indptr.append(len(sdata))
             keys = c_str_array(str_keys)
         arg_shape_size = mx_uint()
-        arg_shape_ndim = ctypes.POINTER(mx_uint)()
-        arg_shape_data = ctypes.POINTER(ctypes.POINTER(mx_uint))()
+        arg_shape_ndim = ctypes.POINTER(mx_int)()
+        arg_shape_data = ctypes.POINTER(ctypes.POINTER(mx_int))()
         out_shape_size = mx_uint()
-        out_shape_ndim = ctypes.POINTER(mx_uint)()
-        out_shape_data = ctypes.POINTER(ctypes.POINTER(mx_uint))()
+        out_shape_ndim = ctypes.POINTER(mx_int)()
+        out_shape_data = ctypes.POINTER(ctypes.POINTER(mx_int))()
         aux_shape_size = mx_uint()
-        aux_shape_ndim = ctypes.POINTER(mx_uint)()
-        aux_shape_data = ctypes.POINTER(ctypes.POINTER(mx_uint))()
+        aux_shape_ndim = ctypes.POINTER(mx_int)()
+        aux_shape_data = ctypes.POINTER(ctypes.POINTER(mx_int))()
         complete = ctypes.c_int()
         if partial:
             infer_func = _LIB.MXSymbolInferShapePartial
@@ -1192,7 +1192,7 @@ def _infer_shape_impl(self, partial, *args, **kwargs):
             mx_uint(len(indptr) - 1),
             keys,
             c_array_buf(mx_uint, array('I', indptr)),
-            c_array_buf(mx_uint, array('I', sdata)),
+            c_array_buf(mx_int, array('i', sdata)),
             ctypes.byref(arg_shape_size),
             ctypes.byref(arg_shape_ndim),
             ctypes.byref(arg_shape_data),
@@ -1576,10 +1576,10 @@ def simple_bind(self, ctx, grad_req='write', type_dict=None, stype_dict=None,
                                                  provided_grad_req_types,
                                                  mx_uint(len(provided_arg_shape_names)),
                                                  c_str_array(provided_arg_shape_names),
-                                                 c_array_buf(mx_uint,
+                                                 c_array_buf(mx_int,
                                                              array('I', provided_arg_shape_data)),
                                                  c_array_buf(mx_uint,
-                                                             array('I', provided_arg_shape_idx)),
+                                                             array('i', provided_arg_shape_idx)),
                                                  num_provided_arg_types,
                                                  provided_arg_type_names,
                                                  provided_arg_type_data,
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index 45197aafe019..614732081a98 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -471,7 +471,7 @@ MXNET_DLL int MXNDArrayReshape64(NDArrayHandle handle,
   NDArray *ptr = new NDArray();
   API_BEGIN();
   NDArray *arr = static_cast<NDArray*>(handle);
-  nnvm::Tuple<dim_t> shape(dims, dims+ndim);
+  mxnet::Tuple<dim_t> shape(dims, dims+ndim);
   CHECK_GT(arr->shape().Size(), 0) << "Source ndarray's shape is undefined. Input shape: "
     << arr->shape();
   mxnet::TShape new_shape = mxnet::op::InferReshapeShape(shape, arr->shape(), reverse);
@@ -493,17 +493,18 @@ int MXNDArrayGetStorageType(NDArrayHandle handle,
 }
 
 int MXNDArrayGetShape(NDArrayHandle handle,
-                      mx_uint *out_dim,
-                      const mx_uint **out_pdata) {
+                      int *out_dim,
+                      const int **out_pdata) {
   MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get();
   API_BEGIN();
   NDArray *arr = static_cast<NDArray*>(handle);
   if (!arr->is_none()) {
     const mxnet::TShape &s = arr->shape();
     *out_dim = s.ndim();
-    std::vector<uint32_t>& buffer = ret->arg_shape_buffer;
+    CHECK_GE(s.ndim(), 0);
+    std::vector<int>& buffer = ret->arg_shape_buffer;
     buffer.resize(s.ndim());
-    nnvm::ShapeTypeCast(s.begin(), s.end(), buffer.data());
+    mxnet::ShapeTypeCast(s.begin(), s.end(), buffer.data());
     *out_pdata = buffer.data();
   } else {
     *out_dim = 0;
@@ -1395,8 +1396,8 @@ int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid, int* shar
   API_END();
 }
 
-int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const mx_uint *shape,
-                                 mx_uint ndim, int dtype, NDArrayHandle *out) {
+int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const int *shape,
+                                 int ndim, int dtype, NDArrayHandle *out) {
   API_BEGIN();
   *out = new NDArray(shared_pid, shared_id, mxnet::TShape(shape, shape + ndim), dtype);
   API_END();
diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h
index b5adfa37eca9..690a1eae1055 100644
--- a/src/c_api/c_api_common.h
+++ b/src/c_api/c_api_common.h
@@ -74,29 +74,32 @@ struct MXAPIThreadLocalEntry {
   /*! \brief result holder for returning storage types */
   std::vector<int> arg_storage_types, out_storage_types, aux_storage_types;
   /*! \brief result holder for returning shape dimensions */
-  std::vector<mx_uint> arg_shape_ndim, out_shape_ndim, aux_shape_ndim;
+  std::vector<int> arg_shape_ndim, out_shape_ndim, aux_shape_ndim;
   /*! \brief result holder for returning shape pointer */
-  std::vector<const mx_uint*> arg_shape_data, out_shape_data, aux_shape_data;
+  std::vector<const int*> arg_shape_data, out_shape_data, aux_shape_data;
   /*! \brief uint32_t buffer for returning shape pointer */
-  std::vector<uint32_t> arg_shape_buffer, out_shape_buffer, aux_shape_buffer;
+  std::vector<int> arg_shape_buffer, out_shape_buffer, aux_shape_buffer;
   /*! \brief bool buffer */
   std::vector<bool> save_inputs, save_outputs;
   // helper function to setup return value of shape array
   inline static void SetupShapeArrayReturnWithBuffer(
       const mxnet::ShapeVector &shapes,
-      std::vector<mx_uint> *ndim,
-      std::vector<const mx_uint*> *data,
-      std::vector<uint32_t> *buffer) {
+      std::vector<int> *ndim,
+      std::vector<const int*> *data,
+      std::vector<int> *buffer) {
     ndim->resize(shapes.size());
     data->resize(shapes.size());
     size_t size = 0;
-    for (const auto& s : shapes) size += s.ndim();
+    for (const auto& s : shapes) {
+      CHECK_GE(s.ndim(), 0);
+      size += s.ndim();
+    }
     buffer->resize(size);
-    uint32_t *ptr = buffer->data();
+    int *ptr = buffer->data();
     for (size_t i = 0; i < shapes.size(); ++i) {
       ndim->at(i) = shapes[i].ndim();
       data->at(i) = ptr;
-      ptr = nnvm::ShapeTypeCast(shapes[i].begin(), shapes[i].end(), ptr);
+      ptr = mxnet::ShapeTypeCast(shapes[i].begin(), shapes[i].end(), ptr);
     }
   }
 };
diff --git a/src/c_api/c_api_executor.cc b/src/c_api/c_api_executor.cc
index a2e8bb810e6f..d0006383bdc8 100644
--- a/src/c_api/c_api_executor.cc
+++ b/src/c_api/c_api_executor.cc
@@ -231,7 +231,7 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle,
                          const char** provided_grad_req_types,
                          const mx_uint num_provided_arg_shapes,
                          const char** provided_arg_shape_names,
-                         const mx_uint* provided_arg_shape_data,
+                         const int* provided_arg_shape_data,
                          const mx_uint* provided_arg_shape_idx,
                          const mx_uint num_provided_arg_dtypes,
                          const char** provided_arg_dtype_names,
@@ -547,7 +547,7 @@ int MXExecutorReshape(int partial_shaping,
                       const int* map_dev_ids,
                       const mx_uint num_provided_arg_shapes,
                       const char** provided_arg_shape_names,
-                      const mx_uint* provided_arg_shape_data,
+                      const int* provided_arg_shape_data,
                       const mx_uint* provided_arg_shape_idx,
                       mx_uint* num_in_args,
                       NDArrayHandle** in_args,
diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc
index 545e95f04b79..8b961f46c39e 100644
--- a/src/c_api/c_api_symbolic.cc
+++ b/src/c_api/c_api_symbolic.cc
@@ -505,16 +505,16 @@ int MXSymbolInferShape(SymbolHandle sym,
                        mx_uint num_args,
                        const char** keys,
                        const mx_uint *arg_ind_ptr,
-                       const mx_uint *arg_shape_data,
+                       const int *arg_shape_data,
                        mx_uint *in_shape_size,
-                       const mx_uint **in_shape_ndim,
-                       const mx_uint ***in_shape_data,
+                       const int **in_shape_ndim,
+                       const int ***in_shape_data,
                        mx_uint *out_shape_size,
-                       const mx_uint **out_shape_ndim,
-                       const mx_uint ***out_shape_data,
+                       const int **out_shape_ndim,
+                       const int ***out_shape_data,
                        mx_uint *aux_shape_size,
-                       const mx_uint **aux_shape_ndim,
-                       const mx_uint ***aux_shape_data,
+                       const int **aux_shape_ndim,
+                       const int ***aux_shape_data,
                        int *complete) {
   nnvm::Symbol *s = static_cast<nnvm::Symbol*>(sym);
   MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get();
@@ -572,16 +572,16 @@ int MXSymbolInferShapePartial(SymbolHandle sym,
                               mx_uint num_args,
                               const char** keys,
                               const mx_uint *arg_ind_ptr,
-                              const mx_uint *arg_shape_data,
+                              const int *arg_shape_data,
                               mx_uint *in_shape_size,
-                              const mx_uint **in_shape_ndim,
-                              const mx_uint ***in_shape_data,
+                              const int **in_shape_ndim,
+                              const int ***in_shape_data,
                               mx_uint *out_shape_size,
-                              const mx_uint **out_shape_ndim,
-                              const mx_uint ***out_shape_data,
+                              const int **out_shape_ndim,
+                              const int ***out_shape_data,
                               mx_uint *aux_shape_size,
-                              const mx_uint **aux_shape_ndim,
-                              const mx_uint ***aux_shape_data,
+                              const int **aux_shape_ndim,
+                              const int ***aux_shape_data,
                               int *complete) {
   int succ;
   *complete = 1;
diff --git a/src/common/exec_utils.h b/src/common/exec_utils.h
index 279ecbd67f09..0551b429f17e 100644
--- a/src/common/exec_utils.h
+++ b/src/common/exec_utils.h
@@ -380,7 +380,7 @@ inline void HandleInferShapeError(const size_t num_forward_inputs,
     const uint32_t nid = idx.input_nodes().at(i);
     const uint32_t eid = idx.entry_id(nid, 0);
     const mxnet::TShape& inferred_shape = inferred_shapes[eid];
-    if (inferred_shape.ndim() == 0 || inferred_shape.Size() == 0U) {
+    if (!shape_is_known(inferred_shape)) {
       const std::string& arg_name = idx[nid].source->attrs.name;
       oss << arg_name << ": " << inferred_shape << ", ";
       if (--cnt == 0) {
@@ -390,7 +390,7 @@ inline void HandleInferShapeError(const size_t num_forward_inputs,
     }
   }
   LOG(FATAL) << "InferShape pass cannot decide shapes for the following arguments "
-                "(0s means unknown dimensions). Please consider providing them as inputs:\n"
+                "(-1 means unknown dimensions). Please consider providing them as inputs:\n"
              << oss.str();
 }
 
diff --git a/src/executor/graph_executor.cc b/src/executor/graph_executor.cc
index 9e1b66d1b286..4a4505581920 100644
--- a/src/executor/graph_executor.cc
+++ b/src/executor/graph_executor.cc
@@ -967,7 +967,7 @@ void GraphExecutor::InitDataEntryMemory(std::vector<NDArray>* shared_pool) {
     uint32_t oid = head_grad_map_.at(idx[nid].source);
     uint32_t eid = idx.entry_id(idx.outputs()[oid]);
     NDArrayStorageType stype = (NDArrayStorageType) vstorage_type[eid];
-    CHECK(mxnet::op::shape_is_known(vshape[eid]));
+    CHECK(mxnet::shape_is_known(vshape[eid]));
     CHECK_NE(vdtype[eid], -1);
     auto data_eid = idx.entry_id(nid, 0);
     // initialize based on storage_type
diff --git a/src/executor/infer_graph_attr_pass.cc b/src/executor/infer_graph_attr_pass.cc
index aa72661e78b2..3a5c5ab9806f 100644
--- a/src/executor/infer_graph_attr_pass.cc
+++ b/src/executor/infer_graph_attr_pass.cc
@@ -648,7 +648,7 @@ nnvm::Graph InferShape(nnvm::Graph&& graph,
       std::move(graph), mxnet::TShape(),
       "FInferShape", "shape_inputs", "shape_attr_key",
       "shape", "shape_num_unknown_nodes",
-      [](const mxnet::TShape& s) { return !mxnet::op::shape_is_known(s); },
+      [](const mxnet::TShape& s) { return !mxnet::shape_is_known(s); },
       [](const mxnet::TShape& s) {
         if (s.ndim() == -1) {
           return static_cast<size_t>(1);
diff --git a/src/io/image_io.cc b/src/io/image_io.cc
index 2196983928bb..965078cb2766 100644
--- a/src/io/image_io.cc
+++ b/src/io/image_io.cc
@@ -189,7 +189,7 @@ void Imdecode(const nnvm::NodeAttrs& attrs,
   size_t len = inputs[0].shape().Size();
   CHECK(len > 0) << "Input cannot be an empty buffer";
 
-  mxnet::TShape oshape(3);
+  mxnet::TShape oshape(3, 1);
   oshape[2] = param.flag == 0 ? 1 : 3;
   if (get_jpeg_size(str_img, len, &oshape[1], &oshape[0])) {
   } else if (get_png_size(str_img, len, &oshape[1], &oshape[0])) {
@@ -229,7 +229,7 @@ void Imread(const nnvm::NodeAttrs& attrs,
   CHECK(file.good()) << "Failed reading image file: '" << param.filename << "' "
             << strerror(errno);
 
-  mxnet::TShape oshape(3);
+  mxnet::TShape oshape(3, 1);
   oshape[2] = param.flag == 0 ? 1 : 3;
   if (get_jpeg_size(buff.get(), fsize, &oshape[1], &oshape[0])) {
   } else if (get_png_size(buff.get(), fsize, &oshape[1], &oshape[0])) {
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 377bef072b03..04518e0feb77 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -1650,7 +1650,7 @@ bool LegacyTShapeLoad(dmlc::Stream *strm, mxnet::TShape *shape, const uint32_t m
     default:
       // meet legacy mxnet::TShape, magic is ndim here
       uint32_t ndim = magic;
-      *shape = mxnet::TShape(ndim);
+      *shape = mxnet::TShape(ndim, -1);
       std::vector<uint32_t> buffer(ndim);
       size_t nread = ndim * sizeof(uint32_t);
       if (strm->Read(buffer.data(), nread) != nread) return false;
diff --git a/src/nnvm/plan_memory.cc b/src/nnvm/plan_memory.cc
index 0dc7e6ddb1d9..41b8559d16c2 100644
--- a/src/nnvm/plan_memory.cc
+++ b/src/nnvm/plan_memory.cc
@@ -76,7 +76,7 @@ class GraphAllocator {
 
   // request a free storage
   StorageID Request(int dev_id, int dtype, mxnet::TShape shape, uint32_t node_id) {
-    if (!mxnet::op::shape_is_known(shape)) return kBadStorageID;
+    if (!mxnet::shape_is_known(shape)) return kBadStorageID;
     // search memory block in [size / match_range_, size * match_range_)
     // TODO(tqchen) add size of the dtype, assume 4 bytes for now
     size_t size = shape.Size() * 4;
diff --git a/src/operator/batch_norm_v1-inl.h b/src/operator/batch_norm_v1-inl.h
index f407a5cce61b..8016510090ab 100644
--- a/src/operator/batch_norm_v1-inl.h
+++ b/src/operator/batch_norm_v1-inl.h
@@ -261,7 +261,7 @@ class BatchNormV1Prop : public OperatorProperty {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 3U) << "Input:[data, gamma, beta]";
     const mxnet::TShape &dshape = in_shape->at(0);
-    if (dshape.ndim() == 0) return false;
+    if (!shape_is_known(dshape)) return false;
     in_shape->at(1) = mxnet::TShape(Shape1(dshape[1]));
     in_shape->at(2) = mxnet::TShape(Shape1(dshape[1]));
     out_shape->clear();
diff --git a/src/operator/bilinear_sampler-inl.h b/src/operator/bilinear_sampler-inl.h
index 8b1ff38709b6..abb4a61dc84c 100644
--- a/src/operator/bilinear_sampler-inl.h
+++ b/src/operator/bilinear_sampler-inl.h
@@ -149,10 +149,10 @@ class BilinearSamplerProp : public OperatorProperty {
     CHECK_EQ(in_shape->size(), 2U) << "Input:[data, grid]";
     const mxnet::TShape &dshape = (*in_shape)[bs::kData];
     const mxnet::TShape &lshape = (*in_shape)[bs::kGrid];
-    if (dshape.ndim() == 0) return false;
+    if (!shape_is_known(dshape)) return false;
     CHECK_EQ(dshape.ndim(), 4U) \
         << "input data should be 4D in batch-num_filter-y-x";
-    if (lshape.ndim() ==  0) return false;
+    if (!shape_is_known(lshape)) return false;
     CHECK_EQ(lshape.ndim(), 4U) \
       << "Sampler grid should be 4D in batch-2-y-x";
     CHECK_EQ(dshape[0], lshape[0]);
diff --git a/src/operator/contrib/bounding_box-inl.h b/src/operator/contrib/bounding_box-inl.h
index 059327ef8334..6ea4e8097b6c 100644
--- a/src/operator/contrib/bounding_box-inl.h
+++ b/src/operator/contrib/bounding_box-inl.h
@@ -558,7 +558,7 @@ inline bool BoxOverlapShape(const nnvm::NodeAttrs& attrs,
     << rdim << " provided";
 
   // assign output shape
-  mxnet::TShape oshape(lshape.ndim() + rshape.ndim() - 2);
+  mxnet::TShape oshape(lshape.ndim() + rshape.ndim() - 2, -1);
   int idx = 0;
   for (index_t i = 0; i < lshape.ndim() - 1; ++i) {
     oshape[idx++] = lshape[i];
@@ -567,7 +567,7 @@ inline bool BoxOverlapShape(const nnvm::NodeAttrs& attrs,
     oshape[idx++] = rshape[i];
   }
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
-  return true;
+  return shape_is_known(oshape);
 }
 
 struct compute_overlap {
@@ -671,14 +671,14 @@ inline bool MatchingShape(const nnvm::NodeAttrs& attrs,
     << dshape.ndim() << " provided";
 
   // assign output shape
-  mxnet::TShape oshape(dshape.ndim() - 1);
+  mxnet::TShape oshape(dshape.ndim() - 1, -1);
   for (index_t i = 0; i < dshape.ndim() - 1; ++i) {
     oshape[i] = dshape[i];
   }
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
   oshape[oshape.ndim() - 1] = dshape[dshape.ndim() - 1];
   SHAPE_ASSIGN_CHECK(*out_attrs, 1, oshape);
-  return true;
+  return shape_is_known(oshape);
 }
 
 struct bipartite_matching {
diff --git a/src/operator/contrib/deformable_convolution-inl.h b/src/operator/contrib/deformable_convolution-inl.h
index 3e96cad1c859..936df7f1bcd4 100644
--- a/src/operator/contrib/deformable_convolution-inl.h
+++ b/src/operator/contrib/deformable_convolution-inl.h
@@ -127,7 +127,7 @@ class DeformableConvolutionOp : public Operator {
     Tensor<xpu, 1, DType> workspace = ctx.requested[conv::kTempSpace]
       .get_space_typed<xpu, 1, DType>(Shape1(col_buffer_size_), s);
     // calculate the shape of col_buffer
-    mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1);
+    mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1, -1);
     col_buffer_shape[0] = conv_in_channels_ * param_.kernel.Size();
     for (size_t i = 1; i < col_buffer_shape.ndim(); ++i) {
       col_buffer_shape[i] = out_data[0].shape_[i + 1];
@@ -189,7 +189,7 @@ class DeformableConvolutionOp : public Operator {
     Tensor<xpu, 1, DType> workspace = ctx.requested[conv::kTempSpace]
       .get_space_typed<xpu, 1, DType>(Shape1(col_buffer_size_), s);
     // calculate the shape of col_buffer
-    mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1);
+    mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1, -1);
     col_buffer_shape[0] = conv_in_channels_ * param_.kernel.Size();
     for (index_t i = 1; i < col_buffer_shape.ndim(); ++i) {
       col_buffer_shape[i] = out_grad[conv::kData].shape_[i + 1];
diff --git a/src/operator/contrib/dgl_graph.cc b/src/operator/contrib/dgl_graph.cc
index 02ef2cee1caa..313b855f0d2d 100644
--- a/src/operator/contrib/dgl_graph.cc
+++ b/src/operator/contrib/dgl_graph.cc
@@ -259,7 +259,7 @@ static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs,
 
   // Output
   bool success = true;
-  mxnet::TShape out_shape(1);
+  mxnet::TShape out_shape(1, -1);
   // We use the last element to store the actual
   // number of vertices in the subgraph.
   out_shape[0] = params.max_num_vertices + 1;
@@ -268,7 +268,7 @@ static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs,
     success = success && !mxnet::op::shape_is_none(out_attrs->at(i));
   }
   // sub_csr
-  mxnet::TShape out_csr_shape(2);
+  mxnet::TShape out_csr_shape(2, -1);
   out_csr_shape[0] = params.max_num_vertices;
   out_csr_shape[1] = in_attrs->at(0)[1];
   for (size_t i = 0; i < num_subgraphs; i++) {
@@ -276,7 +276,7 @@ static bool CSRNeighborUniformSampleShape(const nnvm::NodeAttrs& attrs,
     success = success && !mxnet::op::shape_is_none(out_attrs->at(i + num_subgraphs));
   }
   // sub_layer
-  mxnet::TShape out_layer_shape(1);
+  mxnet::TShape out_layer_shape(1, -1);
   out_layer_shape[0] = params.max_num_vertices;
   for (size_t i = 0; i < num_subgraphs; i++) {
     SHAPE_ASSIGN_CHECK(*out_attrs, i + 2*num_subgraphs, out_layer_shape);
@@ -311,7 +311,7 @@ static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs,
 
   // Output
   bool success = true;
-  mxnet::TShape out_shape(1);
+  mxnet::TShape out_shape(1, -1);
   // We use the last element to store the actual
   // number of vertices in the subgraph.
   out_shape[0] = params.max_num_vertices + 1;
@@ -320,7 +320,7 @@ static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs,
     success = success && !mxnet::op::shape_is_none(out_attrs->at(i));
   }
   // sub_csr
-  mxnet::TShape out_csr_shape(2);
+  mxnet::TShape out_csr_shape(2, -1);
   out_csr_shape[0] = params.max_num_vertices;
   out_csr_shape[1] = in_attrs->at(0)[1];
   for (size_t i = 0; i < num_subgraphs; i++) {
@@ -328,14 +328,14 @@ static bool CSRNeighborNonUniformSampleShape(const nnvm::NodeAttrs& attrs,
     success = success && !mxnet::op::shape_is_none(out_attrs->at(i + num_subgraphs));
   }
   // sub_probability
-  mxnet::TShape out_prob_shape(1);
+  mxnet::TShape out_prob_shape(1, -1);
   out_prob_shape[0] = params.max_num_vertices;
   for (size_t i = 0; i < num_subgraphs; i++) {
     SHAPE_ASSIGN_CHECK(*out_attrs, i + 2*num_subgraphs, out_prob_shape);
     success = success && !mxnet::op::shape_is_none(out_attrs->at(i + 2 * num_subgraphs));
   }
   // sub_layer
-  mxnet::TShape out_layer_shape(1);
+  mxnet::TShape out_layer_shape(1, -1);
   out_layer_shape[0] = params.max_num_vertices;
   for (size_t i = 0; i < num_subgraphs; i++) {
     SHAPE_ASSIGN_CHECK(*out_attrs, i + 3*num_subgraphs, out_prob_shape);
@@ -665,8 +665,8 @@ static void SampleSubgraph(const NDArray &csr,
     }
   }
   // Construct sub_csr_graph
-  mxnet::TShape shape_1(1);
-  mxnet::TShape shape_2(1);
+  mxnet::TShape shape_1(1, -1);
+  mxnet::TShape shape_2(1, -1);
   shape_1[0] = num_edges;
   shape_2[0] = max_num_vertices+1;
   sub_csr.CheckAndAllocData(shape_1);
@@ -946,13 +946,13 @@ static bool DGLSubgraphShape(const nnvm::NodeAttrs& attrs,
 
   size_t num_g = params.num_args - 1;
   for (size_t i = 0; i < num_g; i++) {
-    mxnet::TShape gshape(2);
+    mxnet::TShape gshape(2, -1);
     gshape[0] = in_attrs->at(i + 1)[0];
     gshape[1] = in_attrs->at(i + 1)[0];
     out_attrs->at(i) = gshape;
   }
   for (size_t i = num_g; i < out_attrs->size(); i++) {
-    mxnet::TShape gshape(2);
+    mxnet::TShape gshape(2, -1);
     gshape[0] = in_attrs->at(i - num_g + 1)[0];
     gshape[1] = in_attrs->at(i - num_g + 1)[0];
     out_attrs->at(i) = gshape;
@@ -1067,9 +1067,9 @@ static void GetSubgraph(const NDArray &csr_arr, const NDArray &varr,
     row_idx[i + 1] = col_idx.size();
   }
 
-  mxnet::TShape nz_shape(1);
+  mxnet::TShape nz_shape(1, -1);
   nz_shape[0] = col_idx.size();
-  mxnet::TShape indptr_shape(1);
+  mxnet::TShape indptr_shape(1, -1);
   indptr_shape[0] = row_idx.size();
 
   // Store the non-zeros in a subgraph with edge attributes of new edge ids.
@@ -1446,9 +1446,9 @@ static void CompactSubgraph(const NDArray &csr, const NDArray &vids,
     CHECK_NE(row_ids[i], -1);
   }
 
-  mxnet::TShape nz_shape(1);
+  mxnet::TShape nz_shape(1, -1);
   nz_shape[0] = num_elems;
-  mxnet::TShape indptr_shape(1);
+  mxnet::TShape indptr_shape(1, -1);
   CHECK_EQ(out_csr.shape()[0], graph_size);
   indptr_shape[0] = graph_size + 1;
   CHECK_GE(in_ptr_data.shape_[0], indptr_shape[0]);
@@ -1526,7 +1526,7 @@ static bool SubgraphCompactShape(const nnvm::NodeAttrs& attrs,
   }
 
   for (size_t i = 0; i < num_g; i++) {
-    mxnet::TShape gshape(2);
+    mxnet::TShape gshape(2, -1);
     gshape[0] = params.graph_sizes[i];
     gshape[1] = params.graph_sizes[i];
     out_attrs->at(i) = gshape;
diff --git a/src/operator/contrib/multibox_detection-inl.h b/src/operator/contrib/multibox_detection-inl.h
index 977126ad269d..1ac14e237f0d 100644
--- a/src/operator/contrib/multibox_detection-inl.h
+++ b/src/operator/contrib/multibox_detection-inl.h
@@ -161,7 +161,7 @@ class MultiBoxDetectionProp : public OperatorProperty {
     CHECK_EQ(cshape[2] * 4, lshape[1]) << "# anchors mismatch with # loc";
     CHECK_GT(ashape[1], 0U) << "Number of anchors must > 0";
     CHECK_EQ(ashape[2], 4U);
-    mxnet::TShape oshape = mxnet::TShape(3);
+    mxnet::TShape oshape = mxnet::TShape(3, -1);
     oshape[0] = cshape[0];
     oshape[1] = ashape[1];
     oshape[2] = 6;  // [id, prob, xmin, ymin, xmax, ymax]
diff --git a/src/operator/contrib/multibox_prior-inl.h b/src/operator/contrib/multibox_prior-inl.h
index 3636a6016bd2..d8929f3deff4 100644
--- a/src/operator/contrib/multibox_prior-inl.h
+++ b/src/operator/contrib/multibox_prior-inl.h
@@ -180,7 +180,7 @@ class MultiBoxPriorProp: public OperatorProperty {
     int in_width = dshape[3];
     CHECK_GT(in_width, 0) << "Input width should > 0";
     // since input sizes are same in each batch, we could share MultiBoxPrior
-    mxnet::TShape oshape = mxnet::TShape(3);
+    mxnet::TShape oshape = mxnet::TShape(3, -1);
     int num_sizes = param_.sizes.ndim();
     int num_ratios = param_.ratios.ndim();
     oshape[0] = 1;
@@ -189,7 +189,7 @@ class MultiBoxPriorProp: public OperatorProperty {
     out_shape->clear();
     out_shape->push_back(oshape);
     CHECK_EQ(param_.steps.ndim(), 2) << "Step ndim must be 2: (step_y, step_x)";
-    return true;
+    return shape_is_known(oshape);
   }
 
   OperatorProperty* Copy() const override {
diff --git a/src/operator/control_flow.cc b/src/operator/control_flow.cc
index ac6fea7c143b..9ba3b5471c60 100644
--- a/src/operator/control_flow.cc
+++ b/src/operator/control_flow.cc
@@ -37,11 +37,11 @@ struct ForeachParam : public dmlc::Parameter<ForeachParam> {
   int num_outputs;
   int num_out_data;
   // The location of states in the subgraph inputs.
-  nnvm::Tuple<dim_t> in_state_locs;
+  mxnet::Tuple<dim_t> in_state_locs;
   // The location of data arrays in the subgraph inputs.
-  nnvm::Tuple<dim_t> in_data_locs;
+  mxnet::Tuple<dim_t> in_data_locs;
   // The location of remaining arrays in the subgraph inputs.
-  nnvm::Tuple<dim_t> remain_locs;
+  mxnet::Tuple<dim_t> remain_locs;
   DMLC_DECLARE_PARAMETER(ForeachParam) {
     DMLC_DECLARE_FIELD(num_args).set_lower_bound(1)
     .describe("Number of inputs.");
@@ -82,7 +82,7 @@ static void ForeachComputeExCPU(const OpStatePtr& state_ptr,
   CHECK_GT(params.in_data_locs.ndim(), 0);
   size_t len = inputs[0].shape()[iter_dim];
   state.num_iterations = len;
-  for (size_t i = 1; i < params.in_data_locs.ndim(); i++)
+  for (int i = 1; i < params.in_data_locs.ndim(); i++)
     CHECK_EQ(inputs[i].shape()[iter_dim], len);
   for (size_t i = 0; i < (size_t) params.num_out_data; i++)
     CHECK_EQ(len, outputs[i].shape()[iter_dim]);
@@ -120,7 +120,7 @@ static void ForeachComputeExCPU(const OpStatePtr& state_ptr,
   // and the loop states.
   std::vector<NDArray> subg_inputs(inputs.size());
   // The remaining arrays (other than input data and states) only need to be set once.
-  for (size_t j = 0; j < params.remain_locs.ndim(); j++) {
+  for (int j = 0; j < params.remain_locs.ndim(); j++) {
     CHECK_LT(params.remain_locs[j], subg_inputs.size());
     subg_inputs[params.remain_locs[j]] = inputs[j + params.in_data_locs.ndim()
         + params.in_state_locs.ndim()];
@@ -148,7 +148,7 @@ static void ForeachComputeExCPU(const OpStatePtr& state_ptr,
 
     // Initialize inputs for the subgraph.
     // Get a slice from the input data arrays.
-    for (size_t j = 0; j < params.in_data_locs.ndim(); j++) {
+    for (int j = 0; j < params.in_data_locs.ndim(); j++) {
       size_t loc = params.in_data_locs[j];
       subg_inputs[loc] = inputs[j].At(i);
     }
@@ -161,7 +161,7 @@ static void ForeachComputeExCPU(const OpStatePtr& state_ptr,
         subg_inputs[params.in_state_locs[idx]] = (*subg_out_prev)[j];
       }
     } else {
-      for (size_t j = 0; j < params.in_state_locs.ndim(); j++) {
+      for (int j = 0; j < params.in_state_locs.ndim(); j++) {
         CHECK_LT(params.in_state_locs[j], subg_inputs.size());
         subg_inputs[params.in_state_locs[j]] = inputs[j + params.in_data_locs.ndim()];
       }
@@ -203,7 +203,7 @@ static void ForeachGradComputeExCPU(const OpStatePtr& state_ptr,
   // [data vars], [loop vars], [remaining vars]
 
   // [remaining vars]
-  for (size_t i = 0; i < params.remain_locs.ndim(); i++) {
+  for (int i = 0; i < params.remain_locs.ndim(); i++) {
     size_t loc = params.remain_locs[i];
     size_t orig_loc = i + params.in_data_locs.ndim() + params.in_state_locs.ndim();
     subg_igrads[loc] = outputs[orig_loc];
@@ -216,20 +216,20 @@ static void ForeachGradComputeExCPU(const OpStatePtr& state_ptr,
     if (iter_num < len - 1) {
       // For the rest of the iterations, we should add graidents to the
       // remaining vars.
-      for (size_t i = 0; i < params.remain_locs.ndim(); i++) {
+      for (int i = 0; i < params.remain_locs.ndim(); i++) {
         size_t loc = params.remain_locs[i];
         subg_req[loc] = kAddTo;
       }
     }
 
     // [data vars]
-    for (size_t i = 0; i < params.in_data_locs.ndim(); i++) {
+    for (int i = 0; i < params.in_data_locs.ndim(); i++) {
       size_t loc = params.in_data_locs[i];
       subg_igrads[loc] = outputs[i].At(iter_num);
       subg_req[loc] = req[i];
     }
     // [loop vars]
-    for (size_t i = 0; i < params.in_state_locs.ndim(); i++) {
+    for (int i = 0; i < params.in_state_locs.ndim(); i++) {
       size_t loc = params.in_state_locs[i];
       const NDArray &output = outputs[i + params.in_data_locs.ndim()];
       if (iter_num != 0) {
@@ -258,9 +258,9 @@ static void ForeachGradComputeExCPU(const OpStatePtr& state_ptr,
 
 template<typename T>
 static void remap(const std::vector<T> &op_in, size_t start,
-                  const nnvm::Tuple<dim_t> &locs, std::vector<T> *subg_in) {
+                  const mxnet::Tuple<dim_t> &locs, std::vector<T> *subg_in) {
   auto op_in_it = op_in.begin() + start;
-  for (size_t i = 0; i < locs.ndim(); i++) {
+  for (int i = 0; i < locs.ndim(); i++) {
     dim_t loc = locs[i];
     subg_in->at(loc) = *(op_in_it + i);
   }
@@ -284,7 +284,7 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs,
   mxnet::ShapeVector subg_in_shape(in_shape->size());
   // data shape
   std::vector<bool> data_1d(params.in_data_locs.ndim(), false);
-  for (size_t i = 0; i < params.in_data_locs.ndim(); i++) {
+  for (int i = 0; i < params.in_data_locs.ndim(); i++) {
     size_t loc = params.in_data_locs[i];
     if (in_shape->at(i).ndim() == 1)
       data_1d[i] = true;
@@ -301,7 +301,7 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs,
   for (int i = 0; i < params.num_out_data; i++) {
     mxnet::TShape shape = subg_out_shape[i];
     // If we don't have shape info, we don't need to do anything.
-    if (shape.ndim() == 0)
+    if (!shape_is_known(shape))
       continue;
     subg_out_shape[i] = SliceFirstDim(shape);
   }
@@ -317,12 +317,12 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs,
   for (int i = 0; i < params.num_out_data; i++) {
     // If the output shape isn't inferred, we don't need to propogate the info.
     const auto& g_out_shape = subg_out_shape[i];
-    if (g_out_shape.ndim() == 0)
+    if (!shape_is_known(g_out_shape))
       continue;
 
-    auto out = mxnet::TShape(g_out_shape.ndim() + 1);
+    auto out = mxnet::TShape(g_out_shape.ndim() + 1, -1);
     out[0] = len;
-    for (size_t i = 1; i < out.ndim(); i++)
+    for (int i = 1; i < out.ndim(); i++)
       out[i] = g_out_shape[i - 1];
     SHAPE_ASSIGN_CHECK(*out_shape, i, out);
   }
@@ -331,34 +331,34 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs,
     SHAPE_ASSIGN_CHECK(*out_shape, i, subg_out_shape[i]);
 
   // For the shape of input data.
-  for (size_t i = 0; i < params.in_data_locs.ndim(); i++) {
+  for (int i = 0; i < params.in_data_locs.ndim(); i++) {
     size_t loc = params.in_data_locs[i];
     const auto &shape = subg_in_shape[loc];
     // If the input data shape isn't inferred, we don't need to propogate the
     // info.
-    if (shape.ndim() == 0)
+    if (!shape_is_known(shape))
       continue;
 
     if (data_1d[i]) {
-      mxnet::TShape s(1);
+      mxnet::TShape s(1, -1);
       s[0] = len;
       SHAPE_ASSIGN_CHECK(*in_shape, i, s);
     } else {
-      auto in = mxnet::TShape(shape.ndim() + 1);
+      auto in = mxnet::TShape(shape.ndim() + 1, -1);
       in[0] = len;
-      for (size_t i = 1; i < in.ndim(); i++)
+      for (int i = 1; i < in.ndim(); i++)
         in[i] = shape[i - 1];
       SHAPE_ASSIGN_CHECK(*in_shape, i, in);
     }
   }
   // For the shape of state.
-  for (size_t i = 0; i < params.in_state_locs.ndim(); i++) {
+  for (int i = 0; i < params.in_state_locs.ndim(); i++) {
     size_t loc = params.in_state_locs[i];
     SHAPE_ASSIGN_CHECK(*in_shape, i + params.in_data_locs.ndim(),
                        subg_in_shape[loc]);
   }
   // For the shape of remaining data.
-  for (size_t i = 0; i < params.remain_locs.ndim(); i++) {
+  for (int i = 0; i < params.remain_locs.ndim(); i++) {
     size_t loc = params.remain_locs[i];
     SHAPE_ASSIGN_CHECK(*in_shape,
                        i + params.in_data_locs.ndim() + params.in_state_locs.ndim(),
@@ -387,15 +387,15 @@ static bool ForeachType(const nnvm::NodeAttrs& attrs,
   remap(*in_type, params.in_data_locs.ndim() + params.in_state_locs.ndim(),
         params.remain_locs, &subg_in_type);
   bool success = InferSubgraphDataType(*attrs.subgraphs[0], &subg_in_type, out_type);
-  for (size_t i = 0; i < params.in_data_locs.ndim(); i++) {
+  for (int i = 0; i < params.in_data_locs.ndim(); i++) {
     size_t loc = params.in_data_locs[i];
     TYPE_ASSIGN_CHECK(*in_type, i, subg_in_type[loc]);
   }
-  for (size_t i = 0; i < params.in_state_locs.ndim(); i++) {
+  for (int i = 0; i < params.in_state_locs.ndim(); i++) {
     size_t loc = params.in_state_locs[i];
     TYPE_ASSIGN_CHECK(*in_type, i + params.in_data_locs.ndim(), subg_in_type[loc]);
   }
-  for (size_t i = 0; i < params.remain_locs.ndim(); i++) {
+  for (int i = 0; i < params.remain_locs.ndim(); i++) {
     size_t loc = params.remain_locs[i];
     TYPE_ASSIGN_CHECK(*in_type, i + params.in_data_locs.ndim() + params.in_state_locs.ndim(),
                       subg_in_type[loc]);
@@ -418,16 +418,16 @@ static bool ForeachStorageType(const nnvm::NodeAttrs& attrs,
         params.remain_locs, &subg_in_attrs);
   bool success = InferSubgraphStorage(*attrs.subgraphs[0], dev_mask,
                                       dispatch_mode, &subg_in_attrs, out_attrs);
-  for (size_t i = 0; i < params.in_data_locs.ndim(); i++) {
+  for (int i = 0; i < params.in_data_locs.ndim(); i++) {
     size_t loc = params.in_data_locs[i];
     STORAGE_TYPE_ASSIGN_CHECK(*in_attrs, i, subg_in_attrs[loc]);
   }
-  for (size_t i = 0; i < params.in_state_locs.ndim(); i++) {
+  for (int i = 0; i < params.in_state_locs.ndim(); i++) {
     size_t loc = params.in_state_locs[i];
     STORAGE_TYPE_ASSIGN_CHECK(*in_attrs, i + params.in_data_locs.ndim(),
                               subg_in_attrs[loc]);
   }
-  for (size_t i = 0; i < params.remain_locs.ndim(); i++) {
+  for (int i = 0; i < params.remain_locs.ndim(); i++) {
     size_t loc = params.remain_locs[i];
     STORAGE_TYPE_ASSIGN_CHECK(*in_attrs,
                               i + params.in_data_locs.ndim() + params.in_state_locs.ndim(),
@@ -488,9 +488,9 @@ struct WhileLoopParam : public dmlc::Parameter<WhileLoopParam> {
   // `cond_input_locs' contains indices of inputs fed to `cond', and
   // `func_input_locs' contains indices of inputs fed to `func'.
   // `func_var_locs' are indices in which input "variables" are stored in func's inputs.
-  nnvm::Tuple<dim_t> cond_input_locs;
-  nnvm::Tuple<dim_t> func_input_locs;
-  nnvm::Tuple<dim_t> func_var_locs;
+  mxnet::Tuple<dim_t> cond_input_locs;
+  mxnet::Tuple<dim_t> func_input_locs;
+  mxnet::Tuple<dim_t> func_var_locs;
   DMLC_DECLARE_PARAMETER(WhileLoopParam) {
     DMLC_DECLARE_FIELD(num_args).set_lower_bound(2)
     .describe("Number of input arguments, including cond and func as two symbol inputs.");
@@ -538,12 +538,12 @@ class WhileLoopState: public LoopState {
                  n_iterations(0U),
                  cond_op(LoopState::MakeSharedOp(cond)),
                  oi_map(params.func_var_locs.ndim(), -1) {
-    const nnvm::Tuple<dim_t> &func_input_locs = params.func_input_locs;
-    const nnvm::Tuple<dim_t> &func_var_locs = params.func_var_locs;
-    const nnvm::Tuple<dim_t> &cond_input_locs = params.cond_input_locs;
-    for (size_t i = 0; i < func_var_locs.ndim(); ++i) {
+    const mxnet::Tuple<dim_t> &func_input_locs = params.func_input_locs;
+    const mxnet::Tuple<dim_t> &func_var_locs = params.func_var_locs;
+    const mxnet::Tuple<dim_t> &cond_input_locs = params.cond_input_locs;
+    for (int i = 0; i < func_var_locs.ndim(); ++i) {
       dim_t pos_i = func_input_locs[func_var_locs[i]];
-      for (size_t j = 0; j < cond_input_locs.ndim(); ++j) {
+      for (int j = 0; j < cond_input_locs.ndim(); ++j) {
         dim_t pos_j = cond_input_locs[j];
         if (pos_i == pos_j) {
           this->oi_map[i] = j;
@@ -740,7 +740,7 @@ static bool WhileLoopShape(const nnvm::NodeAttrs& attrs,
   // infer shape for cond and func
   auto infer_subg = [&params, in_shape, out_shape](std::shared_ptr<Symbol> subg,
                                                    ShapeVector *_subg_out,
-                                                   const nnvm::Tuple<dim_t> &input_locs,
+                                                   const mxnet::Tuple<dim_t> &input_locs,
                                                    int num_out_data,
                                                    bool fill_out_shape) {
     // create subg_in
@@ -781,7 +781,7 @@ static bool WhileLoopShape(const nnvm::NodeAttrs& attrs,
     for (size_t i = 0; i < subg_in.size(); ++i) {
       auto eid = idx.entry_id(input_nids[i], 0);
       auto g_out_shape = new_shapes[eid];
-      if (g_out_shape.ndim() == 0 || g_out_shape.Size() == 0) {
+      if (!shape_is_known(g_out_shape)) {
         // when the shape is not fully inferred
         continue;
       }
@@ -795,13 +795,13 @@ static bool WhileLoopShape(const nnvm::NodeAttrs& attrs,
     for (int i = 0; i < num_out_data; ++i) {
       auto eid = idx.entry_id(g.outputs[i]);
       auto g_out_shape = new_shapes[eid];
-      if (g_out_shape.ndim() == 0 || g_out_shape.Size() == 0) {
+      if (!shape_is_known(g_out_shape)) {
         // when the shape is not fully inferred
         continue;
       }
-      auto out = mxnet::TShape(g_out_shape.ndim() + 1);
+      auto out = mxnet::TShape(g_out_shape.ndim() + 1, -1);
       out[0] = params.max_iterations;
-      for (size_t i = 1; i < out.ndim(); i++)
+      for (int i = 1; i < out.ndim(); i++)
         out[i] = g_out_shape[i - 1];
       SHAPE_ASSIGN_CHECK(*out_shape, i, out);
     }
@@ -809,7 +809,7 @@ static bool WhileLoopShape(const nnvm::NodeAttrs& attrs,
     for (size_t i = num_out_data; i < g.outputs.size(); ++i) {
       auto eid = idx.entry_id(g.outputs[i]);
       auto g_out_shape = new_shapes[eid];
-      if (g_out_shape.ndim() == 0 || g_out_shape.Size() == 0) {
+      if (!shape_is_known(g_out_shape)) {
         // when the shape is not fully inferred
         continue;
       }
@@ -817,7 +817,7 @@ static bool WhileLoopShape(const nnvm::NodeAttrs& attrs,
     }
     return g.GetAttr<size_t>("shape_num_unknown_nodes") == 0;
   };
-  mxnet::ShapeVector cond_out_shape{mxnet::TShape(1U)};  // this means: [(1, )]
+  mxnet::ShapeVector cond_out_shape{mxnet::TShape(1, 1)};  // this means: [(1, )]
   mxnet::ShapeVector func_out_shape(params.num_outputs);
   CHECK(params.sync_in_out(in_shape, out_shape, is_udf));
   bool succ_0 = infer_subg(attrs.subgraphs[0], &cond_out_shape, params.cond_input_locs, 0, false);
@@ -915,9 +915,9 @@ WhileLoopGradient(const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& og
 struct CondParam : public dmlc::Parameter<CondParam> {
   int num_args;
   int num_outputs;
-  nnvm::Tuple<dim_t> cond_input_locs;
-  nnvm::Tuple<dim_t> then_input_locs;
-  nnvm::Tuple<dim_t> else_input_locs;
+  mxnet::Tuple<dim_t> cond_input_locs;
+  mxnet::Tuple<dim_t> then_input_locs;
+  mxnet::Tuple<dim_t> else_input_locs;
   DMLC_DECLARE_PARAMETER(CondParam) {
     DMLC_DECLARE_FIELD(num_args).set_lower_bound(3)
     .describe("Number of input arguments, including cond, then and else as three symbol inputs.");
@@ -992,7 +992,7 @@ static void CondComputeExCPU(const OpStatePtr& state_ptr,
   state.cond_op->Forward(nullptr, cond_input_ptr, cond_output_ptr);
   branch_selection = as_bool_scalar(*cond_output_ptr[0]);
   // select the right branch
-  const nnvm::Tuple<dim_t> &func_input_locs = branch_selection
+  const mxnet::Tuple<dim_t> &func_input_locs = branch_selection
                                             ? params.then_input_locs
                                             : params.else_input_locs;
   LoopState &loop_state = branch_selection
@@ -1017,7 +1017,7 @@ static void CondGradComputeExCPU(const OpStatePtr& state_ptr,
   // select the right branch
   int branch_selection = state.branch_selection;
   CHECK_NE(branch_selection, -1);
-  const nnvm::Tuple<dim_t> &func_input_locs = branch_selection
+  const mxnet::Tuple<dim_t> &func_input_locs = branch_selection
                                             ? params.then_input_locs
                                             : params.else_input_locs;
   LoopState &loop_state = branch_selection
@@ -1048,7 +1048,7 @@ static bool CondShape(const nnvm::NodeAttrs& attrs,
   // infer shape for cond, then and else
   auto infer_subg = [&params, in_shape, out_shape](std::shared_ptr<Symbol> subg,
                                                    ShapeVector *_subg_out,
-                                                   const nnvm::Tuple<dim_t> &input_locs,
+                                                   const mxnet::Tuple<dim_t> &input_locs,
                                                    bool fill_out_shape) {
     // create subg_in
     mxnet::ShapeVector subg_in;
@@ -1086,7 +1086,7 @@ static bool CondShape(const nnvm::NodeAttrs& attrs,
     for (size_t i = 0; i < subg_in.size(); ++i) {
       auto eid = idx.entry_id(input_nids[i], 0);
       auto g_out_shape = new_shapes[eid];
-      if (g_out_shape.ndim() == 0 || g_out_shape.Size() == 0) {
+      if (!shape_is_known(g_out_shape)) {
         // when the shape is not fully inferred
         continue;
       }
@@ -1099,7 +1099,7 @@ static bool CondShape(const nnvm::NodeAttrs& attrs,
     for (size_t i = 0; i < g.outputs.size(); ++i) {
       auto eid = idx.entry_id(g.outputs[i]);
       auto g_out_shape = new_shapes[eid];
-      if (g_out_shape.ndim() == 0 || g_out_shape.Size() == 0) {
+      if (!shape_is_known(g_out_shape)) {
         // when the shape is not fully inferred
         continue;
       }
@@ -1107,7 +1107,7 @@ static bool CondShape(const nnvm::NodeAttrs& attrs,
     }
     return g.GetAttr<size_t>("shape_num_unknown_nodes") == 0;
   };
-  ShapeVector cond_out_shape{mxnet::TShape(1U)};  // this means: [(1, )]
+  ShapeVector cond_out_shape{mxnet::TShape(1, 1)};  // this means: [(1, )]
   ShapeVector then_out_shape(params.num_outputs);
   ShapeVector else_out_shape(params.num_outputs);
   bool succ_0 = infer_subg(attrs.subgraphs[0], &cond_out_shape, \
@@ -1190,7 +1190,7 @@ static bool BackwardCondStorageType(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size() + 3U, (size_t) params.num_args);
   CHECK_EQ(attrs.subgraphs.size(), 3U);
   static const std::function<bool(const int &)> is_udf = is_stype_udf;
-  auto sub_pass = [&](const std::shared_ptr<Symbol> &subg, const nnvm::Tuple<dim_t> &input_locs) {
+  auto sub_pass = [&](const std::shared_ptr<Symbol> &subg, const mxnet::Tuple<dim_t> &input_locs) {
     // A. first construct subg_in_attrs
     // need subg_in_attrs as subg_bwd_out (copy), subg_fwd_in (extract), subg_fwd_out (copy)
     std::vector<int> subg_in_attrs;
diff --git a/src/operator/convolution_v1-inl.h b/src/operator/convolution_v1-inl.h
index ed6748a9c85c..0d6ffd7e895e 100644
--- a/src/operator/convolution_v1-inl.h
+++ b/src/operator/convolution_v1-inl.h
@@ -64,11 +64,11 @@ struct ConvolutionV1Param : public dmlc::Parameter<ConvolutionV1Param> {
   dmlc::optional<int> layout;
   DMLC_DECLARE_PARAMETER(ConvolutionV1Param) {
     DMLC_DECLARE_FIELD(kernel).describe("convolution kernel size: (h, w) or (d, h, w)");
-    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, 0))
     .describe("convolution stride: (h, w) or (d, h, w)");
-    DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0, 0))
     .describe("convolution dilate: (h, w) or (d, h, w)");
-    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, 0))
     .describe("pad for convolution: (h, w) or (d, h, w)");
     DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000)
     .describe("convolution filter(channel) number");
@@ -405,7 +405,7 @@ class ConvolutionV1Prop : public OperatorProperty {
     // CHECK_EQ(out_shape->size(), 1) << "Output: [output]";
     out_shape->resize(1, mxnet::TShape());
     const mxnet::TShape &dshp = (*in_shape)[conv_v1::kData];
-    if (dshp.ndim() ==  0) return false;
+    if (!shape_is_known(dshp)) return false;
     if (param_.kernel.ndim() == 2) {
       // 2d conv_v1
       CHECK_EQ(dshp.ndim(), 4U) \
diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index c37324678120..182cd682af8b 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -93,7 +93,7 @@ inline bool ToTensorShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 1U);
 
   mxnet::TShape &shp = (*in_attrs)[0];
-  if (!shp.ndim()) return false;
+  if (!shape_is_known(shp)) return false;
 
   CHECK((shp.ndim() == 3) || (shp.ndim() == 4))
       << "Input image must have shape (height, width, channels), or "
@@ -549,7 +549,7 @@ template<typename DType, int axis>
 void FlipImpl(const mxnet::TShape &shape, DType *src, DType *dst) {
   int head = 1, mid = shape[axis], tail = 1;
   for (int i = 0; i < axis; ++i) head *= shape[i];
-  for (uint32_t i = axis+1; i < shape.ndim(); ++i) tail *= shape[i];
+  for (int i = axis+1; i < shape.ndim(); ++i) tail *= shape[i];
 
   for (int i = 0; i < head; ++i) {
     for (int j = 0; j < (mid >> 1); ++j) {
diff --git a/src/operator/image/resize-inl.h b/src/operator/image/resize-inl.h
index de2189838d76..4ebebbfb272c 100644
--- a/src/operator/image/resize-inl.h
+++ b/src/operator/image/resize-inl.h
@@ -49,12 +49,12 @@ void ResizeImplCUDA(Stream<gpu> *s,
 #endif  // MXNET_USE_CUDA
 
 struct ResizeParam : public dmlc::Parameter<ResizeParam> {
-  nnvm::Tuple<int> size;
+  mxnet::Tuple<int> size;
   bool keep_ratio;
   int interp;
   DMLC_DECLARE_PARAMETER(ResizeParam) {
     DMLC_DECLARE_FIELD(size)
-    .set_default(nnvm::Tuple<int>())
+    .set_default(mxnet::Tuple<int>())
     .describe("Size of new image. Could be (width, height) or (size)");
     DMLC_DECLARE_FIELD(keep_ratio)
     .describe("Whether to resize the short edge or both edges to `size`, "
diff --git a/src/operator/leaky_relu-inl.h b/src/operator/leaky_relu-inl.h
index cfdd1064d6fb..aef990010818 100644
--- a/src/operator/leaky_relu-inl.h
+++ b/src/operator/leaky_relu-inl.h
@@ -315,7 +315,7 @@ class LeakyReLUOp : public Operator {
     return a < b ? (a < c ? a : c) : (b < c ? b : c);
   }
   static inline mxnet::TShape expand_shape(const mxnet::TShape& src, const mxnet::TShape& dst) {
-    mxnet::TShape result(dst.ndim());
+    mxnet::TShape result(dst.ndim(), -1);
     int s = src.ndim() - 1;
     for (int i = dst.ndim() - 1; i >= 0; i--) {
       if (s >= 0 && i <= 1 && (dst[i] == src[s] || src[s] == 1)) {
diff --git a/src/operator/loss_binary_op-inl.h b/src/operator/loss_binary_op-inl.h
index a3853c56359a..1d71993da515 100644
--- a/src/operator/loss_binary_op-inl.h
+++ b/src/operator/loss_binary_op-inl.h
@@ -43,7 +43,7 @@ inline bool SoftmaxCrossEntropyShape(const nnvm::NodeAttrs& attrs,
       << "SoftmaxCrossEntropy only accept 1D label";
   CHECK_EQ((*in_attrs)[0][0], (*in_attrs)[1][0])
       << "SoftmaxCrossEntropy: data label shape mismatch";
-  SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1));
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, 1));
   return true;
 }
 
diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc
index 511fe455e946..590b1b428023 100644
--- a/src/operator/nn/batch_norm.cc
+++ b/src/operator/nn/batch_norm.cc
@@ -332,7 +332,7 @@ static bool BatchNormShape(const nnvm::NodeAttrs& attrs,
 
   const int channelCount = dshape[channelAxis];
 
-  if (dshape.ndim() == 0) {
+  if (!shape_is_known(dshape)) {
     return false;
   }
 
diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc
index fa441c45321e..5435bd815334 100644
--- a/src/operator/nn/concat.cc
+++ b/src/operator/nn/concat.cc
@@ -39,39 +39,40 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs,
   const ConcatParam& param_ = nnvm::get<ConcatParam>(attrs.parsed);
   CHECK_EQ(in_shape->size(), static_cast<size_t>(param_.num_args));
   mxnet::TShape dshape;
-  index_t size = 0;
-  bool has_zero = false;
+  dim_t size = 0;
+  bool has_unknown_dim_size = false;
   int axis = -1;
   for (int i = 0; i < param_.num_args; ++i) {
     mxnet::TShape tmp = (*in_shape)[i];
-    if (tmp.ndim()) {
+    if (tmp.ndim() > 0) {
       axis = CheckAxis(param_.dim, tmp.ndim());
-      has_zero = tmp[axis] == 0 || has_zero;
+      has_unknown_dim_size = tmp[axis] == -1 || has_unknown_dim_size;
       size += tmp[axis];
-      tmp[axis] = 0;
+      tmp[axis] = -1;
       shape_assign(&dshape, tmp);
     }
   }
 
   mxnet::TShape tmp = (*out_shape)[0];
-  if (tmp.ndim()) {
+  if (tmp.ndim() > 0) {
     axis = CheckAxis(param_.dim, tmp.ndim());
-    tmp[axis] = 0;
+    tmp[axis] = -1;
     shape_assign(&dshape, tmp);
   }
 
-  if (dshape.ndim() == 0) return false;
+  if (dshape.ndim() == -1) return false;
+  CHECK_NE(dshape.ndim(), 0) << "zero-dimensional arrays cannot be concatenated";
 
   for (int i = 0; i < param_.num_args; ++i) {
     CHECK(shape_assign(&(*in_shape)[i], dshape))
         << "Incompatible input shape: expected " << dshape << ", got " << (*in_shape)[i];
   }
 
-  if (!has_zero) dshape[axis] = size;
+  if (!has_unknown_dim_size) dshape[axis] = size;
   CHECK(shape_assign(&(*out_shape)[0], dshape))
       << "Incompatible output shape: expected " << dshape << ", got " << (*out_shape)[0];
 
-  return dshape.Size() != 0;
+  return shape_is_known(dshape);
 }
 
 // Concat for RNN param deals with the reverse shape inference from output
@@ -109,7 +110,7 @@ static bool RNNParamConcatShape(const nnvm::NodeAttrs& attrs,
     shape_assign(&dshape, tmp);
   }
 
-  if (dshape.ndim() == 0) return false;
+  if (!shape_is_known(dshape)) return false;
 
   for (int i = 0; i < param_.num_args; ++i) {
     CHECK(shape_assign(&(*in_shape)[i], dshape))
@@ -232,7 +233,7 @@ bool SupportMKLDNNConcat(const std::vector<NDArray> &arrs) {
   for (auto &arr : arrs) {
     if (arr.IsView()) return false;
     if (arr.dtype() != mshadow::kFloat32) return false;
-    unsigned ndim = arr.shape().ndim();
+    int ndim = arr.shape().ndim();
     unsigned mkldnn_ndims =
         static_cast<unsigned>(arr.GetMKLDNNData()->get_primitive_desc().desc().data.ndims);
     if (!(ndim == 2 || ndim == 4) || ndim != mkldnn_ndims) return false;
diff --git a/src/operator/nn/convolution-inl.h b/src/operator/nn/convolution-inl.h
index 7ae34ae363b4..7d5f7c7d5757 100644
--- a/src/operator/nn/convolution-inl.h
+++ b/src/operator/nn/convolution-inl.h
@@ -69,11 +69,11 @@ struct ConvolutionParam : public dmlc::Parameter<ConvolutionParam> {
   dmlc::optional<int> layout;
   DMLC_DECLARE_PARAMETER(ConvolutionParam) {
     DMLC_DECLARE_FIELD(kernel).describe("Convolution kernel size: (w,), (h, w) or (d, h, w)");
-    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, 0))
     .describe("Convolution stride: (w,), (h, w) or (d, h, w). Defaults to 1 for each dimension.");
-    DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0, 0))
     .describe("Convolution dilate: (w,), (h, w) or (d, h, w). Defaults to 1 for each dimension.");
-    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, 0))
     .describe("Zero pad for convolution: (w,), (h, w) or (d, h, w). Defaults to no padding.");
     DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000)
     .describe("Convolution filter(channel) number");
@@ -209,9 +209,9 @@ class ConvolutionOp {
       Tensor<xpu, 1, DType> workspace = ctx.requested[conv::kTempSpace]
         .get_space_typed<xpu, 1, DType>(Shape1(col_buffer_size_), s);
       // calculate the shape of col_buffer
-      mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1);
+      mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1, 1);
       col_buffer_shape[0] = conv_in_channels_ * param_.kernel.Size();
-      for (index_t i = 1; i < col_buffer_shape.ndim(); ++i) {
+      for (int i = 1; i < col_buffer_shape.ndim(); ++i) {
         col_buffer_shape[i] = out_data[0].shape_[i+1];
       }
       // create a column buffer using workspace and col_buffer_shape
@@ -295,9 +295,9 @@ class ConvolutionOp {
       Tensor<xpu, 1, DType> workspace = ctx.requested[conv::kTempSpace]
         .get_space_typed<xpu, 1, DType>(Shape1(col_buffer_size_), s);
       // calculate the shape of col_buffer
-      mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1);
+      mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1, 1);
       col_buffer_shape[0] = conv_in_channels_ * param_.kernel.Size();
-      for (index_t i = 1; i < col_buffer_shape.ndim(); ++i) {
+      for (int i = 1; i < col_buffer_shape.ndim(); ++i) {
         col_buffer_shape[i] = out_grad[conv::kData].shape_[i+1];
       }
       // create a column buffer using workspace and col_buffer_shape
@@ -342,10 +342,10 @@ class ConvolutionOp {
   void LayerSetUp(const mxnet::TShape& ishape, const mxnet::TShape& oshape) {
     channel_axis_ = 1;  // hard code channel axis
     const index_t first_spatial_axis = channel_axis_ + 1;
-    const index_t num_axes = param_.kernel.ndim() + 2;
+    const int num_axes = param_.kernel.ndim() + 2;
     num_spatial_axes_ = num_axes - first_spatial_axis;
     is_1x1_ = true;
-    for (index_t i = 0; i < param_.kernel.ndim(); ++i) {
+    for (int i = 0; i < param_.kernel.ndim(); ++i) {
       is_1x1_ &= param_.kernel[i] == 1 && param_.stride[i] == 1 && param_.pad[i] == 0;
       if (!is_1x1_) break;
     }
diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc
index 527a0073930f..dfbc89de7b0e 100644
--- a/src/operator/nn/convolution.cc
+++ b/src/operator/nn/convolution.cc
@@ -96,7 +96,7 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs,
   // CHECK_EQ(out_shape->size(), 1) << "Output: [output]";
   out_shape->resize(1, mxnet::TShape());
   const mxnet::TShape &dshp = (*in_shape)[conv::kData];
-  if (dshp.ndim() ==  0) return false;
+  if (!shape_is_known(dshp)) return false;
 
   if (param_.kernel.ndim() == 1) {
     // 1d conv
diff --git a/src/operator/nn/ctc_loss-inl.h b/src/operator/nn/ctc_loss-inl.h
index 357888dc30f1..8c841dfc24b4 100644
--- a/src/operator/nn/ctc_loss-inl.h
+++ b/src/operator/nn/ctc_loss-inl.h
@@ -239,7 +239,7 @@ inline bool CTCLossOpShape(const nnvm::NodeAttrs &attrs,
                                       "the maximum sequence length of the "
                                       "data.";
 
-    mxnet::TShape oshape(1);
+    mxnet::TShape oshape(1, -1);
     oshape[0] = dshape[1];  // batch size
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);  // forward output
     SHAPE_ASSIGN_CHECK(*out_attrs, 1, dshape);  // grad output
diff --git a/src/operator/nn/cudnn/cudnn_batch_norm.cc b/src/operator/nn/cudnn/cudnn_batch_norm.cc
index 5632028dd769..1df888e4b38a 100644
--- a/src/operator/nn/cudnn/cudnn_batch_norm.cc
+++ b/src/operator/nn/cudnn/cudnn_batch_norm.cc
@@ -37,7 +37,7 @@ static bool BatchNormShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *in_
   using namespace mshadow;
   CHECK_EQ(in_shape->size(), 5U) << "Input:[data, gamma, beta, moving_mean, moving_var]";
   const mxnet::TShape &dshape = in_shape->at(0);
-  if (dshape.ndim() == 0) return false;
+  if (!shape_is_known(dshape)) return false;
   in_shape->at(1) = mxnet::TShape(Shape1(dshape[1]));
   in_shape->at(2) = mxnet::TShape(Shape1(dshape[1]));
   in_shape->at(3) = mxnet::TShape(Shape1(dshape[1]));
diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h
index 55b263896339..44d1c3c36e99 100644
--- a/src/operator/nn/cudnn/cudnn_convolution-inl.h
+++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h
@@ -1015,9 +1015,9 @@ class CuDNNConvolutionOp {
   // e.g. {shape[0], shape[1], shape[2]} -> {shape[1]*shape[2], shape[2], 1}
   template <int dim>
   inline Shape<dim> Strides(const mxnet::TShape &s) {
-    uint32_t ndim = s.ndim();
+    int ndim = s.ndim();
     mxnet::TShape strides(ndim);
-    for (uint32_t i = 0; i != ndim; ++i)
+    for (int i = 0; i != ndim; ++i)
       strides[i] = s.ProdShape(i+1, ndim);
     return strides.get<dim>();
   }
diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h
index 47f688c8ab9c..f652dd85bd41 100644
--- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h
+++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h
@@ -933,9 +933,9 @@ class CuDNNDeconvolutionOp {
   // e.g. {shape[0], shape[1], shape[2]} -> {shape[1]*shape[2], shape[2], 1}
   template <int dim>
   inline Shape<dim> Strides(const mxnet::TShape &s) {
-    uint32_t ndim = s.ndim();
+    int ndim = s.ndim();
     mxnet::TShape strides(ndim);
-    for (uint32_t i = 0; i != ndim; ++i)
+    for (int i = 0; i != ndim; ++i)
       strides[i] = s.ProdShape(i+1, ndim);
     return strides.get<dim>();
   }
diff --git a/src/operator/nn/deconvolution-inl.h b/src/operator/nn/deconvolution-inl.h
index 5248c1211ac7..b28e47818392 100644
--- a/src/operator/nn/deconvolution-inl.h
+++ b/src/operator/nn/deconvolution-inl.h
@@ -65,13 +65,13 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
   DMLC_DECLARE_PARAMETER(DeconvolutionParam) {
     DMLC_DECLARE_FIELD(kernel).describe("Deconvolution kernel size: (w,), (h, w) or (d, h, w). "
                   "This is same as the kernel size used for the corresponding convolution");
-    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, 0))
         .describe("The stride used for the corresponding convolution: (w,), (h, w) or (d, h, w). "
                   "Defaults to 1 for each dimension.");
-    DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0, 0))
         .describe("Dilation factor for each dimension of the input: (w,), (h, w) or (d, h, w). "
                   "Defaults to 1 for each dimension.");
-    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, 0))
         .describe("The amount of implicit zero padding added during convolution for each "
                   "dimension of the input: "
                   "(w,), (h, w) or (d, h, w). "
@@ -79,11 +79,11 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
                   "If `target_shape` is set, "
                   "`pad` will be ignored and a padding that will generate the target shape "
                   "will be used. Defaults to no padding.");
-    DMLC_DECLARE_FIELD(adj).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(adj).set_default(mxnet::TShape(0, 0))
         .describe("Adjustment for output shape: (w,), (h, w) or (d, h, w). "
                   "If `target_shape` is set, "
                   "`adj` will be ignored and computed accordingly.");
-    DMLC_DECLARE_FIELD(target_shape).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(target_shape).set_default(mxnet::TShape(0, 0))
         .describe("Shape of the output tensor: (w,), (h, w) or (d, h, w).");
     DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000)
         .describe("Number of output filters.");
diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc
index 27928b9b41c3..d8c91f7f96c8 100644
--- a/src/operator/nn/deconvolution.cc
+++ b/src/operator/nn/deconvolution.cc
@@ -54,7 +54,7 @@ static bool DeconvolutionShape(const nnvm::NodeAttrs& attrs,
   }
   out_shape->resize(1, mxnet::TShape());
   const mxnet::TShape &dshape = (*in_shape)[deconv::kData];
-  if (dshape.ndim() ==  0) return false;
+  if (!shape_is_known(dshape)) return false;
 
   if (param_.kernel.ndim() == 1) {
     // 1d conv
diff --git a/src/operator/nn/dropout-inl.h b/src/operator/nn/dropout-inl.h
index 01611dfce191..a34d2992c8c6 100644
--- a/src/operator/nn/dropout-inl.h
+++ b/src/operator/nn/dropout-inl.h
@@ -78,7 +78,7 @@ struct DropoutParam : public dmlc::Parameter<DropoutParam> {
     .add_enum("always", dropout::kAlways)
     .set_default(dropout::kTraining)
     .describe("Whether to only turn on dropout during training or to also turn on for inference.");
-    DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape(0, 0))
     .describe("Axes for variational dropout kernel.");
     DMLC_DECLARE_FIELD(cudnn_off).set_default(dmlc::optional<bool>(false))
     .describe("Whether to turn off cudnn in dropout operator. "
diff --git a/src/operator/nn/dropout.cc b/src/operator/nn/dropout.cc
index 5fdc672d766e..0e4d18b1fda8 100644
--- a/src/operator/nn/dropout.cc
+++ b/src/operator/nn/dropout.cc
@@ -95,10 +95,10 @@ Example::
   CHECK_EQ(in_shape->size(), 1U);
   const DropoutParam& param = nnvm::get<DropoutParam>(attrs.parsed);
   mxnet::TShape dshape(in_shape->at(0));
-  if (dshape.ndim() == 0) return false;
+  if (!shape_is_known(dshape)) return false;
   out_shape->clear();
   out_shape->push_back(dshape);
-  for (index_t i = 0; i < param.axes.ndim(); ++i) {
+  for (int i = 0; i < param.axes.ndim(); ++i) {
     dshape[param.axes[i]] = 1;
   }
   out_shape->push_back(dshape);
diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc
index 2bc321832af6..98277c206dd6 100644
--- a/src/operator/nn/fully_connected.cc
+++ b/src/operator/nn/fully_connected.cc
@@ -52,7 +52,7 @@ static bool FullyConnectedShape(const nnvm::NodeAttrs& attrs,
   mxnet::TShape dshape = (*in_shape)[fullc::kData];
   mxnet::TShape oshape = (*out_shape)[0];
   // require data to be known
-  if (dshape.ndim() ==  0) return false;
+  if (!shape_is_known(dshape)) return false;
 
   index_t num_input;
   if (!param.flatten) {
@@ -75,7 +75,7 @@ static bool FullyConnectedShape(const nnvm::NodeAttrs& attrs,
   } else {
     SHAPE_ASSIGN_CHECK(*out_shape, 0, Shape2(dshape[0], param.num_hidden));
   }
-  if (oshape.ndim() != 0) {
+  if (oshape.ndim() > 0) {
     dshape[0] = oshape[0];
     SHAPE_ASSIGN_CHECK(*in_shape, fullc::kData, dshape);
   }
diff --git a/src/operator/nn/im2col.h b/src/operator/nn/im2col.h
index 0059a420726d..06a4e1b75b33 100644
--- a/src/operator/nn/im2col.h
+++ b/src/operator/nn/im2col.h
@@ -152,7 +152,7 @@ inline void im2col_nd_core_cpu(const DType* data_input, const bool im2col,
     const mxnet::TShape& kernel_shape, const mxnet::TShape& pad, const mxnet::TShape& stride,
     const mxnet::TShape& dilation, DType* data_output, OpReqType req = mxnet::kWriteTo) {
   if (mxnet::kNullOp == req) return;
-  index_t num_spatial_axes = kernel_shape.ndim();
+  int num_spatial_axes = kernel_shape.ndim();
   if (!im2col) {
     index_t im_size = im_shape[1];  // skip batch dim
     for (index_t i = 0; i < num_spatial_axes; ++i) {
@@ -319,7 +319,7 @@ inline void col2im(mshadow::Stream<cpu>* s,
                    const mxnet::TShape& col_shape, const mxnet::TShape& kernel_shape,
                    const mxnet::TShape& pad, const mxnet::TShape& stride,
                    const mxnet::TShape& dilation, DType* data_im, OpReqType req) {
-  index_t num_spatial_axes = kernel_shape.ndim();
+  int num_spatial_axes = kernel_shape.ndim();
   if (2 == num_spatial_axes) {
     col2im_cpu(data_col, im_shape[1], im_shape[2], im_shape[3],
                kernel_shape[0], kernel_shape[1], pad[0], pad[1],
diff --git a/src/operator/nn/layer_norm-inl.h b/src/operator/nn/layer_norm-inl.h
index dc4914bf2457..c7de7d734521 100644
--- a/src/operator/nn/layer_norm-inl.h
+++ b/src/operator/nn/layer_norm-inl.h
@@ -167,7 +167,7 @@ void LayerNormGradCompute(const nnvm::NodeAttrs& attrs,
   const LayerNormParam& param = nnvm::get<LayerNormParam>(attrs.parsed);
   int axis = param.axis;
   if (axis < 0) {
-    axis += static_cast<int>(inputs[0].ndim());
+    axis += inputs[0].ndim();
   }
   CHECK(axis >= 0 && axis < inputs[0].ndim()) << "Channel axis out of range: " << param.axis;
   Stream<xpu> *s = ctx.get_stream<xpu>();
diff --git a/src/operator/nn/layer_norm.cc b/src/operator/nn/layer_norm.cc
index d4c308398cb7..1b0e99d8fe87 100644
--- a/src/operator/nn/layer_norm.cc
+++ b/src/operator/nn/layer_norm.cc
@@ -41,14 +41,14 @@ static bool LayerNormShape(const nnvm::NodeAttrs& attrs,
   const mxnet::TShape &dshape = in_shape->at(layernorm::kData);
   int axis = param.axis;
   if (axis < 0) {
-    axis += static_cast<int>(dshape.ndim());
+    axis += dshape.ndim();
   }
-  CHECK(axis >= 0 && axis < static_cast<int>(dshape.ndim()))
+  CHECK(axis >= 0 && axis < dshape.ndim())
     << "Channel axis out of range: axis=" << param.axis;
 
   const int channelCount = dshape[axis];
 
-  if (dshape.ndim() == 0) {
+  if (!shape_is_known(dshape)) {
     return false;
   }
 
diff --git a/src/operator/nn/lrn.cc b/src/operator/nn/lrn.cc
index 410bdab667e5..b632e35b57fe 100644
--- a/src/operator/nn/lrn.cc
+++ b/src/operator/nn/lrn.cc
@@ -40,7 +40,7 @@ bool LRNShape(const nnvm::NodeAttrs& attrs,
   using namespace mshadow;
   CHECK_EQ(in_shape->size(), 1U) << "Input:[data]";
   const mxnet::TShape &dshape = in_shape->at(0);
-  if (dshape.ndim() == 0) return false;
+  if (!shape_is_known(dshape)) return false;
   out_shape->clear();
   out_shape->push_back(dshape);
   out_shape->push_back(dshape);
diff --git a/src/operator/nn/pooling-inl.h b/src/operator/nn/pooling-inl.h
index 9e1e73bf19e2..03f0fa8edd6c 100644
--- a/src/operator/nn/pooling-inl.h
+++ b/src/operator/nn/pooling-inl.h
@@ -55,7 +55,7 @@ struct PoolingParam : public dmlc::Parameter<PoolingParam> {
   dmlc::optional<bool> count_include_pad;
   dmlc::optional<int> layout;
   DMLC_DECLARE_PARAMETER(PoolingParam) {
-    DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape())  // add default value here
+    DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape(0, 0))  // add default value here
     .enforce_nonzero()
     .describe("Pooling kernel size: (y, x) or (d, y, x)");
 
@@ -78,11 +78,11 @@ struct PoolingParam : public dmlc::Parameter<PoolingParam> {
     .add_enum("same", pool_enum::kSame)
     .describe("Pooling convention to be applied.");
 
-    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, 0))
     .enforce_nonzero()
     .describe("Stride: for pooling (y, x) or (d, y, x). Defaults to 1 for each dimension.");
 
-    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, 0))
     .describe("Pad for pooling: (y, x) or (d, y, x). Defaults to no padding.");
 
     DMLC_DECLARE_FIELD(p_value).set_default(dmlc::optional<int>())
@@ -200,11 +200,11 @@ class PoolingOp {
         kernel = mxnet::TShape(ishape.data() + 2,
                         ishape.data() + ishape.ndim());
       }
-      padding = mxnet::TShape(ishape.ndim() - 2);
+      padding = mxnet::TShape(ishape.ndim() - 2, 0);
       for (index_t i = 0; i < ishape.ndim() - 2; i++) {
         padding[i] = 0;
       }
-      stride = mxnet::TShape(ishape.ndim() - 2);
+      stride = mxnet::TShape(ishape.ndim() - 2, 1);
     }
     const int p_value = (param_.pool_type == pool_enum::kLpPooling && param_.p_value.has_value()) ?
                         param_.p_value.value() : 1;
@@ -257,11 +257,11 @@ class PoolingOp {
         kernel = mxnet::TShape(ishape.data() + 2,
                         ishape.data() + ishape.ndim());
       }
-      padding = mxnet::TShape(ishape.ndim() - 2);
+      padding = mxnet::TShape(ishape.ndim() - 2, 0);
       for (index_t i = 0; i < ishape.ndim() - 2; i++) {
         padding[i] = 0;
       }
-      stride = mxnet::TShape(ishape.ndim() - 2);
+      stride = mxnet::TShape(ishape.ndim() - 2, 1);
     }
 
     const int p_value = (param_.pool_type == pool_enum::kLpPooling && param_.p_value.has_value()) ?
diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc
index 2d16604baa20..7c365f5081a1 100644
--- a/src/operator/nn/pooling.cc
+++ b/src/operator/nn/pooling.cc
@@ -114,11 +114,11 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs,
       << "Pooling: Input data should be  3D in (batch, channel, x)"
       << " Or 4D in (batch, channel, y, x) "
       << " Or 5D in (batch, channel, d, y, x)";
-  if (dshape.ndim() == 0) return false;
+  if (!shape_is_known(dshape)) return false;
   int layout = param.GetLayout(dshape.ndim());
   if (param.global_pool) {
     mxnet::TShape oshape = dshape;
-    size_t c_index = 0;
+    int c_index = 0;
     switch (layout) {
       case mshadow::kNCW:
       case mshadow::kNCHW:
@@ -133,7 +133,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs,
       default:
         LOG(FATAL) << "Unsupported tensor layout " << param.layout.value();
     }
-    for (size_t i{1}; i < dshape.ndim(); i++)
+    for (int i = 1; i < dshape.ndim(); i++)
       if (i != c_index)
         oshape[i] = 1;
     out_shape->clear();
diff --git a/src/operator/nn/upsampling.cc b/src/operator/nn/upsampling.cc
index d09017bf713e..ac638162dc6d 100644
--- a/src/operator/nn/upsampling.cc
+++ b/src/operator/nn/upsampling.cc
@@ -60,7 +60,7 @@ static bool UpSamplingShape(const nnvm::NodeAttrs& attrs,
     CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
     CHECK_EQ(dshape.ndim(), 4U) << \
       "UpSamplingBilinear: Input data should be 4D in (batch, channel, y, x)";
-    if (dshape.ndim() ==  0) return false;
+    if (!shape_is_known(dshape)) return false;
     int kernel = 2 * param_.scale - param_.scale % 2;
     SHAPE_ASSIGN_CHECK(*in_shape,
         up_enum::kWeight,
diff --git a/src/operator/numpy/np_broadcast_reduce_op.h b/src/operator/numpy/np_broadcast_reduce_op.h
index bb2b7fca231c..e0379a040c3f 100644
--- a/src/operator/numpy/np_broadcast_reduce_op.h
+++ b/src/operator/numpy/np_broadcast_reduce_op.h
@@ -59,7 +59,7 @@ inline TShape NumpyReduceAxesShapeImpl(const TShape& ishape,
         CHECK(axes[0] == 0 || axes[0] == -1);
       }
     }
-    return TShape(0);
+    return TShape(0, -1);
   }
 
   // axis=None, do global reduction
@@ -67,7 +67,7 @@ inline TShape NumpyReduceAxesShapeImpl(const TShape& ishape,
     if (keepdims) {
       return TShape(ishape.ndim(), 1);
     } else {
-      return TShape(0);
+      return TShape(0, -1);
     }
   }
 
@@ -101,7 +101,7 @@ inline TShape NumpyReduceAxesShapeImpl(const TShape& ishape,
   if (keepdims) {
     oshape = TShape(ishape);
   } else {
-    oshape = TShape(ishape.ndim() - axes.ndim());
+    oshape = TShape(ishape.ndim() - axes.ndim(), -1);
   }
 
   if (keepdims) {
diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h
index a461d2bc4cef..c95f859a1b5b 100644
--- a/src/operator/operator_common.h
+++ b/src/operator/operator_common.h
@@ -103,19 +103,10 @@ struct InferStorageTypeError : public dmlc::Error {
     : dmlc::Error(msg_), msg(msg_), index(index) {}
 };
 
-/*! \brief check if shape is empty or contains unknown (0) dim. */
+/*! \brief check if shape is empty or contains unknown (0) dim.
+ * DEPRECATED. */
 inline bool shape_is_none(const mxnet::TShape& x) {
-  return x.ndim() == 0 || x.Size() == 0;
-}
-
-/*! brief check if shape is known using the NumPy compatible definition.
- * zero-dim and zero-size tensors are valid. -1 means unknown.*/
-inline bool shape_is_known(const TShape& x) {
-  if (x.ndim() == -1) return false;
-  for (int i = 0; i < x.ndim(); ++i) {
-    if (x[i] == -1) return false;
-  }
-  return true;
+  return !mxnet::shape_is_known(x);
 }
 
 /*! \brief check if type is none (-1) */
@@ -130,7 +121,7 @@ inline bool storage_type_is_none(const int& x) {
 
 /*! \brief check if shape is scalar({1}). */
 inline bool shape_is_scalar(const mxnet::TShape& x) {
-  return x.ndim() == 1 && x.Size() == 1;
+  return x.ndim() == 0;
 }
 
 /*! \brief get string representation of shape */
@@ -573,7 +564,7 @@ class OpSignature {
   }
 
   void AddSign(const mxnet::TShape &shape) {
-    for (size_t i = 0; i < shape.ndim(); i++) {
+    for (int i = 0; i < shape.ndim(); i++) {
       hash = hash * 2 + shape[i];
       eles.push_back(shape[i]);
     }
diff --git a/src/operator/operator_util.cc b/src/operator/operator_util.cc
index b87428ca2b64..bc097a5b0c1c 100644
--- a/src/operator/operator_util.cc
+++ b/src/operator/operator_util.cc
@@ -774,7 +774,7 @@ class SimpleUnaryOpProp : public SimpleOpPropBase {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 1) << "Input:[data]";
     const mxnet::TShape &dshape = in_shape->at(0);
-    if (dshape.ndim() == 0) return false;
+    if (!shape_is_known(dshape)) return false;
     out_shape->clear();
     if (source->unary_shape_ == nullptr) {
       out_shape->push_back(dshape);
diff --git a/src/operator/quantization/dequantize-inl.h b/src/operator/quantization/dequantize-inl.h
index dcda5a8b4bef..88199bc2591d 100644
--- a/src/operator/quantization/dequantize-inl.h
+++ b/src/operator/quantization/dequantize-inl.h
@@ -103,7 +103,7 @@ inline bool DequantizeShape(const nnvm::NodeAttrs& attrs,
   }
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
-  return !shape_is_none(out_attrs->at(0));
+  return shape_is_known(out_attrs->at(0));
 }
 
 inline bool DequantizeType(const nnvm::NodeAttrs& attrs,
diff --git a/src/operator/quantization/quantize-inl.h b/src/operator/quantization/quantize-inl.h
index 1ad0016c52bc..2c267a76a571 100644
--- a/src/operator/quantization/quantize-inl.h
+++ b/src/operator/quantization/quantize-inl.h
@@ -126,7 +126,7 @@ inline bool QuantizeShape(const nnvm::NodeAttrs& attrs,
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
   SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape{1});
   SHAPE_ASSIGN_CHECK(*out_attrs, 2, mxnet::TShape{1});
-  return !shape_is_none(out_attrs->at(0));
+  return shape_is_known(out_attrs->at(0));
 }
 
 inline bool QuantizeType(const nnvm::NodeAttrs& attrs,
diff --git a/src/operator/quantization/quantized_concat.cc b/src/operator/quantization/quantized_concat.cc
index e32bb5a18e1a..f97807424701 100644
--- a/src/operator/quantization/quantized_concat.cc
+++ b/src/operator/quantization/quantized_concat.cc
@@ -55,7 +55,7 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_sha
     shape_assign(&dshape, tmp);
   }
 
-  if (dshape.ndim() == 0) return false;
+  if (dshape.ndim() == -1) return false;
 
   for (int i = 0; i < param_.num_args; ++i) {
     CHECK(shape_assign(&(*in_shape)[i], dshape))
diff --git a/src/operator/quantization/quantized_flatten-inl.h b/src/operator/quantization/quantized_flatten-inl.h
index 99a262de19ca..de051b969659 100644
--- a/src/operator/quantization/quantized_flatten-inl.h
+++ b/src/operator/quantization/quantized_flatten-inl.h
@@ -86,10 +86,10 @@ inline bool QuantizedFlattenShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 3U);
 
   const mxnet::TShape &dshape = (*in_attrs)[0];
-  if (shape_is_none(dshape)) return false;
+  if (!shape_is_known(dshape)) return false;
 
-  uint32_t target_dim = 1;
-  for (uint32_t i = 1; i < dshape.ndim(); ++i) {
+  dim_t target_dim = 1;
+  for (int i = 1; i < dshape.ndim(); ++i) {
     target_dim *= dshape[i];
   }
 
diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc
index 0a04e71b9093..cc4365f818d2 100644
--- a/src/operator/quantization/quantized_fully_connected.cc
+++ b/src/operator/quantization/quantized_fully_connected.cc
@@ -47,7 +47,7 @@ bool QuantizedFullyConnectedShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_shape->size(), num_inputs * 3);
   CHECK_EQ(out_shape->size(), 3U);
 
-  CHECK(!shape_is_none(in_shape->at(0)))
+  CHECK(shape_is_known(in_shape->at(0)))
     << "QuantizedFullyConnectedOp input data shape must be given";
   const mxnet::TShape& dshape = in_shape->at(0);
   index_t num_input;
diff --git a/src/operator/quantization/quantized_pooling.cc b/src/operator/quantization/quantized_pooling.cc
index af604080a756..1839e2a29d77 100644
--- a/src/operator/quantization/quantized_pooling.cc
+++ b/src/operator/quantization/quantized_pooling.cc
@@ -35,7 +35,7 @@ bool QuantizedPoolingShape(const nnvm::NodeAttrs& attrs,
                            mxnet::ShapeVector *out_shape) {
   const PoolingParam& param = nnvm::get<PoolingParam>(attrs.parsed);
   CHECK_EQ(in_shape->size(), 3U);
-  if (shape_is_none(in_shape->at(0))) return false;
+  if (!shape_is_known(in_shape->at(0))) return false;
   const mxnet::TShape &dshape = (*in_shape)[0];
   CHECK_EQ(dshape.ndim(), 4U)
       << "quantized_pooling: Input data should be 4D in "
@@ -45,7 +45,7 @@ bool QuantizedPoolingShape(const nnvm::NodeAttrs& attrs,
       << "QuantizedPoolingOp only supports NCHW layout for now, saw " << layout;
   // NCHW layout
   const int N = 0, H = 2, W = 3, C = 1;
-  mxnet::TShape oshape(4);
+  mxnet::TShape oshape(4, -1);
   CHECK_EQ(param.kernel.ndim(), 2) << "QuantizedPoolingOp only supports 2D pooling for now";
   CHECK(param.kernel[0] <= dshape[H] + 2 * param.pad[0])
       << "kernel size (" << param.kernel[0]
diff --git a/src/operator/random/multisample_op.h b/src/operator/random/multisample_op.h
index e9f266932e13..7d5e256297ad 100644
--- a/src/operator/random/multisample_op.h
+++ b/src/operator/random/multisample_op.h
@@ -66,7 +66,7 @@ inline bool MultiSampleOpShape(const nnvm::NodeAttrs& attrs,
   // Get shape to be sampled for each parameter set.
   const MultiSampleParam& param = nnvm::get<MultiSampleParam>(attrs.parsed);
   mxnet::TShape sshape = param.shape;
-  for (size_t i = 0; i < sshape.ndim(); ++i) {
+  for (int i = 0; i < sshape.ndim(); ++i) {
     CHECK_GT(sshape[i], 0) << "shape parameter must be non-zero within each dimension";
   }
   // Examine output shape whether it is already defined.
diff --git a/src/operator/random/sample_multinomial_op.h b/src/operator/random/sample_multinomial_op.h
index e76cd646b850..b38aefbc1634 100644
--- a/src/operator/random/sample_multinomial_op.h
+++ b/src/operator/random/sample_multinomial_op.h
@@ -41,7 +41,7 @@ struct SampleMultinomialParam : public dmlc::Parameter<SampleMultinomialParam> {
   int dtype;
   DMLC_DECLARE_PARAMETER(SampleMultinomialParam) {
     DMLC_DECLARE_FIELD(shape)
-      .set_default(mxnet::TShape())
+      .set_default(mxnet::TShape(0, 1))
       .describe("Shape to be sampled from each random distribution.");
     DMLC_DECLARE_FIELD(get_prob)
     .set_default(false)
@@ -68,7 +68,7 @@ inline bool SampleMultinomialOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), param.get_prob ? 2U : 1U);
   const mxnet::TShape& ishape = (*in_attrs)[0];
-  if (!ishape.ndim()) return false;
+  if (!shape_is_known(ishape)) return false;
 
   MSHADOW_TYPE_SWITCH(param.dtype, DType, {
     CHECK_LE(ishape[ishape.ndim() - 1], mxnet::common::MaxIntegerValue<DType>())
@@ -76,26 +76,26 @@ inline bool SampleMultinomialOpShape(const nnvm::NodeAttrs& attrs,
   });
 
   if (ishape.ndim() == 1) {
-    if (param.shape.ndim()) {
+    if (param.shape.ndim() > 0) {
       SHAPE_ASSIGN_CHECK(*out_attrs, 0, param.shape);
       if (param.get_prob) SHAPE_ASSIGN_CHECK(*out_attrs, 1, param.shape);
     } else {
-      SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1));
-      if (param.get_prob) SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape(1));
+      SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, 1));
+      if (param.get_prob) SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape(1, 1));
     }
     return true;
   }
 
-  mxnet::TShape oshape(ishape.ndim() - 1 + param.shape.ndim());
-  for (size_t i = 0; i < ishape.ndim() - 1; ++i) {
+  mxnet::TShape oshape(ishape.ndim() - 1 + param.shape.ndim(), -1);
+  for (int i = 0; i < ishape.ndim() - 1; ++i) {
     oshape[i] = ishape[i];
   }
-  for (size_t i = 0; i < param.shape.ndim(); ++i) {
+  for (int i = 0; i < param.shape.ndim(); ++i) {
     oshape[i + ishape.ndim() - 1] = param.shape[i];
   }
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
   if (param.get_prob) SHAPE_ASSIGN_CHECK(*out_attrs, 1, oshape);
-  return true;
+  return shape_is_known(out_attrs->at(0)) && shape_is_known(out_attrs->at(1));
 }
 
 
diff --git a/src/operator/random/unique_sample_op.h b/src/operator/random/unique_sample_op.h
index 87998c8f46b1..c97d4fdf7ced 100644
--- a/src/operator/random/unique_sample_op.h
+++ b/src/operator/random/unique_sample_op.h
@@ -60,7 +60,7 @@ inline bool SampleUniqueShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 0U);
   CHECK_EQ(out_attrs->size(), 2U);
   // output shape is known
-  if ((*out_attrs)[0].ndim() == 2 && param.shape.ndim() == 0) {
+  if ((*out_attrs)[0].ndim() == 2 && param.shape.ndim() == -1) {
     SHAPE_ASSIGN_CHECK(*out_attrs, 1, mshadow::Shape1((*out_attrs)[0][0]));
     return true;
   }
diff --git a/src/operator/regression_output-inl.h b/src/operator/regression_output-inl.h
index 8b63a8a2cff6..d8f102de1675 100644
--- a/src/operator/regression_output-inl.h
+++ b/src/operator/regression_output-inl.h
@@ -57,7 +57,7 @@ inline bool RegressionOpShape(const nnvm::NodeAttrs& attrs,
   using namespace mshadow;
   CHECK_EQ(in_attrs->size(), 2U) << "Input:[data, label]";
   const mxnet::TShape &dshape = in_attrs->at(0);
-  if (dshape.ndim() == 0) return false;
+  if (!shape_is_known(dshape)) return false;
   auto &lshape = (*in_attrs)[1];
   if (lshape.ndim() == 0) {
     // special treatment for 1D output, to allow 1D label by default.
diff --git a/src/operator/sequence_last-inl.h b/src/operator/sequence_last-inl.h
index b4db80bdd721..4c42934f1618 100644
--- a/src/operator/sequence_last-inl.h
+++ b/src/operator/sequence_last-inl.h
@@ -263,7 +263,7 @@ class SequenceLastProp : public OperatorProperty {
       SHAPE_ASSIGN_CHECK(*in_shape, seq_last::kSequenceLength, Shape1(sbatch));
 
     // calculate output size
-    mxnet::TShape shape_o(dshape.ndim() - 1);
+    mxnet::TShape shape_o(dshape.ndim() - 1, -1);
     shape_o[0] = sbatch;
     for (index_t i = 1; i < shape_o.ndim(); ++i) shape_o[i] = dshape[i + 1];
 
diff --git a/src/operator/slice_channel-inl.h b/src/operator/slice_channel-inl.h
index 6125782d525b..a51b17cd324e 100644
--- a/src/operator/slice_channel-inl.h
+++ b/src/operator/slice_channel-inl.h
@@ -195,9 +195,9 @@ class SliceChannelProp : public OperatorProperty {
     CHECK_EQ(in_shape->size(), 1U);
     mxnet::TShape dshape = in_shape->at(slice_enum::kData);
     mxnet::TShape ishape = in_shape->at(slice_enum::kData);
-    if (dshape.ndim() == 0) return false;
+    if (!shape_is_known(dshape)) return false;
     if (param_.axis >= 0) {
-      CHECK_LT(static_cast<size_t>(param_.axis), dshape.ndim());
+      CHECK_LT(param_.axis, dshape.ndim());
     } else {
       CHECK_LT(param_.axis + dshape.ndim(), dshape.ndim());
     }
diff --git a/src/operator/softmax_output-inl.h b/src/operator/softmax_output-inl.h
index 5dca8bac14a3..f81a232d629a 100644
--- a/src/operator/softmax_output-inl.h
+++ b/src/operator/softmax_output-inl.h
@@ -337,19 +337,19 @@ class SoftmaxOutputProp : public OperatorProperty {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]";
     const mxnet::TShape &dshape = in_shape->at(0);
-    if (dshape.ndim() == 0) return false;
+    if (!shape_is_known(dshape)) return false;
 
     // label.shape == data.shape: use probability as label
     if (dshape != (*in_shape)[softmaxout_enum::kLabel]) {
       if (param_.multi_output) {
         mxnet::TShape lshape1 = Shape2(dshape[0], dshape.Size()/dshape[0]/dshape[1]);
-        mxnet::TShape lshape2(dshape.ndim() - 1);
+        mxnet::TShape lshape2(dshape.ndim() - 1, -1);
         lshape2[0] = dshape[0];
-        for (index_t i = 2; i < dshape.ndim(); ++i)
+        for (int i = 2; i < dshape.ndim(); ++i)
           lshape2[i-1] = dshape[i];
         mxnet::TShape lshape3 = dshape;
         lshape3[1] = 1;
-        if (in_shape->at(softmaxout_enum::kLabel).ndim() == 0) {
+        if (in_shape->at(softmaxout_enum::kLabel).ndim() == -1) {
           in_shape->at(softmaxout_enum::kLabel) = lshape1;
         } else if (in_shape->at(softmaxout_enum::kLabel) == lshape1) {
         } else if (in_shape->at(softmaxout_enum::kLabel) == lshape2) {
@@ -361,8 +361,8 @@ class SoftmaxOutputProp : public OperatorProperty {
           throw InferShapeError(os.str(), softmaxout_enum::kLabel);
         }
       } else {
-        mxnet::TShape label_shape(dshape.ndim() - 1);
-        for (index_t i = 0; i + 1 < dshape.ndim(); ++i)
+        mxnet::TShape label_shape(dshape.ndim() - 1, -1);
+        for (int i = 0; i + 1 < dshape.ndim(); ++i)
           label_shape[i] = dshape[i];
         SHAPE_ASSIGN_CHECK(*in_shape, softmaxout_enum::kLabel, label_shape);
       }
diff --git a/src/operator/softmax_output.cc b/src/operator/softmax_output.cc
index b17ef3527297..262242f98004 100644
--- a/src/operator/softmax_output.cc
+++ b/src/operator/softmax_output.cc
@@ -85,19 +85,19 @@ static bool SoftmaxOutputShape(const nnvm::NodeAttrs& attrs,
   const SoftmaxOutputParam& param = nnvm::get<SoftmaxOutputParam>(attrs.parsed);
   CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]";
   const mxnet::TShape &dshape = in_shape->at(0);
-  if (dshape.ndim() == 0) return false;
+  if (!shape_is_known(dshape)) return false;
 
   // label.shape == data.shape: use probability as label
   if (dshape != (*in_shape)[softmaxout_enum::kLabel]) {
     if (param.multi_output) {
       mxnet::TShape lshape1 = Shape2(dshape[0], dshape.Size()/dshape[0]/dshape[1]);
-      mxnet::TShape lshape2(dshape.ndim() - 1);
+      mxnet::TShape lshape2(dshape.ndim() - 1, -1);
       lshape2[0] = dshape[0];
-      for (index_t i = 2; i < dshape.ndim(); ++i)
+      for (int i = 2; i < dshape.ndim(); ++i)
         lshape2[i-1] = dshape[i];
       mxnet::TShape lshape3 = dshape;
       lshape3[1] = 1;
-      if (in_shape->at(softmaxout_enum::kLabel).ndim() == 0) {
+      if (in_shape->at(softmaxout_enum::kLabel).ndim() == -1) {
         in_shape->at(softmaxout_enum::kLabel) = lshape1;
       } else if (in_shape->at(softmaxout_enum::kLabel) == lshape1) {
       } else if (in_shape->at(softmaxout_enum::kLabel) == lshape2) {
@@ -109,8 +109,8 @@ static bool SoftmaxOutputShape(const nnvm::NodeAttrs& attrs,
         throw InferShapeError(os.str(), softmaxout_enum::kLabel);
       }
     } else {
-      mxnet::TShape label_shape(dshape.ndim() - 1);
-      for (index_t i = 0; i + 1 < dshape.ndim(); ++i)
+      mxnet::TShape label_shape(dshape.ndim() - 1, -1);
+      for (int i = 0; i + 1 < dshape.ndim(); ++i)
         label_shape[i] = dshape[i];
       SHAPE_ASSIGN_CHECK(*in_shape, softmaxout_enum::kLabel, label_shape);
     }
diff --git a/src/operator/spatial_transformer-inl.h b/src/operator/spatial_transformer-inl.h
index 9e5dee842d0d..660d57d55bab 100644
--- a/src/operator/spatial_transformer-inl.h
+++ b/src/operator/spatial_transformer-inl.h
@@ -190,10 +190,10 @@ class SpatialTransformerProp : public OperatorProperty {
     CHECK_EQ(param_.sampler_type, st::kBilinear) << "only supports bilinear sampling currently";
     const mxnet::TShape &dshape = (*in_shape)[st::kData];
     const mxnet::TShape &lshape = (*in_shape)[st::kLoc];
-    if (dshape.ndim() ==  0) return false;
+    if (!shape_is_known(dshape)) return false;
     CHECK_EQ(dshape.ndim(), 4U) \
         << "input data should be 4D in batch-num_filter-y-x";
-    if (lshape.ndim() ==  0) return false;
+    if (!shape_is_known(lshape)) return false;
     CHECK_EQ(lshape.ndim(), 2U) \
         << "locolisation paramter should be 4D in batch-num_hidden";
     if (param_.transform_type == st::kAffine) {
diff --git a/src/operator/subgraph_op_common.cc b/src/operator/subgraph_op_common.cc
index 8934438d428a..e53d911614a0 100644
--- a/src/operator/subgraph_op_common.cc
+++ b/src/operator/subgraph_op_common.cc
@@ -178,7 +178,7 @@ bool as_bool_scalar(const NDArray &a) {
 }
 
 bool is_shape_udf(const mxnet::TShape &x) {
-  return x.ndim() == 0 || x.Size() == 0;
+  return !shape_is_known(x);
 }
 
 bool is_stype_udf(const int &x) {
@@ -225,7 +225,7 @@ void LoopState::Forward(int iter_no,
     if (!out_bufs[i].IsSame(coutputs[i])) {
       // The line below checks whether dynamic shape exists.
       // If so, re-initialize the shape.
-      if (coutputs[i].shape().ndim() == 0) {
+      if (!shape_is_known(coutputs[i].shape())) {
         const_cast<NDArray &>(coutputs[i]).Init(out_bufs[i].shape());
       }
       CopyFromTo(out_bufs[i], coutputs[i]);
diff --git a/src/operator/subgraph_op_common.h b/src/operator/subgraph_op_common.h
index 91adf576dc07..19528349c0c7 100644
--- a/src/operator/subgraph_op_common.h
+++ b/src/operator/subgraph_op_common.h
@@ -67,7 +67,7 @@ bool is_type_udf(const int &x);
 
 template <typename T>
 void extract_by_loc(const std::vector<T> &array,
-                    const nnvm::Tuple<dim_t> input_locs,
+                    const mxnet::Tuple<dim_t> input_locs,
                     std::vector<T> *out) {
   out->clear();
   out->reserve(input_locs.ndim());
@@ -94,11 +94,11 @@ bool fill_value(T *x, T *y, bool x_empty, bool y_empty) {
 }
 
 template <typename T>
-bool sync_in_in(const nnvm::Tuple<dim_t> &input_locs,
-                         std::vector<T> *in,
-                         std::vector<T> *subg_in,
-                         std::function<bool(const T &)> is_empty) {
-  for (size_t i = 0; i < input_locs.ndim(); ++i) {
+bool sync_in_in(const mxnet::Tuple<dim_t> &input_locs,
+                std::vector<T> *in,
+                std::vector<T> *subg_in,
+                std::function<bool(const T &)> is_empty) {
+  for (int i = 0; i < input_locs.ndim(); ++i) {
     T &x = in->at(input_locs[i]);
     T &y = subg_in->at(i);
     fill_value(&x, &y, is_empty(x), is_empty(y));
diff --git a/src/operator/svm_output-inl.h b/src/operator/svm_output-inl.h
index 1609764f0ebe..3d651c13d8ba 100644
--- a/src/operator/svm_output-inl.h
+++ b/src/operator/svm_output-inl.h
@@ -143,9 +143,9 @@ class SVMOutputProp : public OperatorProperty {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]";
     const mxnet::TShape &dshape = in_shape->at(0);
-    if (dshape.ndim() == 0) return false;
-    mxnet::TShape label_shape(dshape.ndim() - 1);
-    for (index_t i = 0; i + 1 < dshape.ndim(); ++i)
+    if (!shape_is_known(dshape)) return false;
+    mxnet::TShape label_shape(dshape.ndim() - 1, -1);
+    for (int i = 0; i + 1 < dshape.ndim(); ++i)
       label_shape[i] = dshape[i];
     SHAPE_ASSIGN_CHECK(*in_shape, svm_enum::kLabel, label_shape);
     out_shape->clear();
diff --git a/src/operator/swapaxis-inl.h b/src/operator/swapaxis-inl.h
index ce835084ab32..41cb940d957a 100644
--- a/src/operator/swapaxis-inl.h
+++ b/src/operator/swapaxis-inl.h
@@ -72,8 +72,8 @@ class SwapAxisOp : public Operator {
                     uint32_t dim1, uint32_t dim2) {
     using namespace mshadow;
     using namespace mshadow::expr;
-    index_t ndim_in = shape.ndim();
-    index_t si;
+    int ndim_in = shape.ndim();
+    int si;
 
     if (dim1 > dim2) {
       std::swap(dim1, dim2);
diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h
index 069c8ddb04fb..90afdeea858e 100644
--- a/src/operator/tensor/broadcast_reduce_op.h
+++ b/src/operator/tensor/broadcast_reduce_op.h
@@ -139,9 +139,9 @@ struct BroadcastAxesParam : public dmlc::Parameter<BroadcastAxesParam> {
   mxnet::TShape axis;
   mxnet::TShape size;
   DMLC_DECLARE_PARAMETER(BroadcastAxesParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(axis).set_default(mxnet::TShape(0))
       .describe("The axes to perform the broadcasting.");
-    DMLC_DECLARE_FIELD(size).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(size).set_default(mxnet::TShape(0))
       .describe("Target sizes of the broadcasting axes.");
   }
 };
@@ -149,7 +149,7 @@ struct BroadcastAxesParam : public dmlc::Parameter<BroadcastAxesParam> {
 struct BroadcastToParam : public dmlc::Parameter<BroadcastToParam> {
   mxnet::TShape shape;
   DMLC_DECLARE_PARAMETER(BroadcastToParam) {
-    DMLC_DECLARE_FIELD(shape).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(shape).set_default(mxnet::TShape(0))
       .describe("The shape of the desired array."
                 " We can set the dim to zero if it's same as the original."
                 " E.g `A = broadcast_to(B, shape=(10, 0, 0))` "
@@ -175,7 +175,7 @@ inline int CheckAxis(int axis, int ndim) {
 }
 
 inline mxnet::TShape AxisShapeCompact(mxnet::TShape shape, int *axis, bool allow_2d) {
-  int ndim = static_cast<int>(shape.ndim());
+  int ndim = shape.ndim();
   index_t leading = 1, trailing = 1, M = shape[*axis];
   for (int i = 0; i < *axis; ++i) leading *= shape[i];
   for (int i = *axis + 1; i < ndim; ++i) trailing *= shape[i];
@@ -196,7 +196,7 @@ inline mxnet::TShape ReduceAxisShapeImpl(const mxnet::TShape& ishape,
                                          bool keepdims) {
   if (!axis || ishape.ndim() == 1) {
     if (keepdims) {
-      return mxnet::TShape(ishape.ndim());
+      return mxnet::TShape(ishape.ndim(), 1);
     }
     return mshadow::Shape1(1);
   }
@@ -208,7 +208,7 @@ inline mxnet::TShape ReduceAxisShapeImpl(const mxnet::TShape& ishape,
     return oshape;
   }
 
-  mxnet::TShape oshape(ishape.ndim() - 1);
+  mxnet::TShape oshape(ishape.ndim() - 1, 1);
   for (int i = 0; i < new_axis; ++i) oshape[i] = ishape[i];
   for (int i = new_axis+1; i < static_cast<int>(ishape.ndim()); ++i) {
     oshape[i-1] = ishape[i];
@@ -222,7 +222,7 @@ inline bool ReduceAxisShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   mxnet::TShape& ishape = (*in_attrs)[0];
-  if (ishape.ndim() == 0) return false;
+  if (!shape_is_known(ishape)) return false;
 
   const ReduceAxisParam& param = nnvm::get<ReduceAxisParam>(attrs.parsed);
   SHAPE_ASSIGN_CHECK(*out_attrs, 0,
@@ -233,12 +233,12 @@ inline bool ReduceAxisShape(const nnvm::NodeAttrs& attrs,
 inline mxnet::TShape ReduceAxesShapeImpl(const mxnet::TShape& ishape,
                                          const dmlc::optional<mxnet::TShape>& axis,
                                          bool keepdims, bool exclude) {
-  // if axis doesn't have value, treat it same mxnet::TShape().
+  // if axis doesn't have value, treat it same mxnet::TShape(0).
   if (!axis.has_value() || axis.value().ndim() == 0) {
     if (keepdims) {
-      return mxnet::TShape(ishape.ndim());
+      return mxnet::TShape(ishape.ndim(), 1);
     } else {
-      return mxnet::TShape(1);
+      return mxnet::TShape(1, 1);
     }
   }
   // axis has value
@@ -266,9 +266,9 @@ inline mxnet::TShape ReduceAxesShapeImpl(const mxnet::TShape& ishape,
   if (keepdims) {
     oshape = mxnet::TShape(ishape);
   } else if (exclude) {
-    oshape = mxnet::TShape(axes.ndim());
+    oshape = mxnet::TShape(axes.ndim(), 1);
   } else {
-    oshape = mxnet::TShape(std::max<index_t>(1, ishape.ndim() - axes.ndim()));
+    oshape = mxnet::TShape(std::max(1, ishape.ndim() - axes.ndim()), 1);
   }
 
   if (keepdims && exclude) {
@@ -304,7 +304,7 @@ inline bool ReduceAxesShape(const nnvm::NodeAttrs& attrs,
                             mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
-  if ((*in_attrs)[0].ndim() == 0) return false;
+  if (!shape_is_known((*in_attrs)[0])) return false;
   const ReduceAxesParam& param = nnvm::get<ReduceAxesParam>(attrs.parsed);
   SHAPE_ASSIGN_CHECK(*out_attrs, 0,
                      ReduceAxesShapeImpl((*in_attrs)[0], param.axis,
@@ -334,7 +334,7 @@ inline bool NormShape(const nnvm::NodeAttrs& attrs,
                       mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
-  if ((*in_attrs)[0].ndim() == 0) return false;
+  if (!shape_is_known((*in_attrs)[0])) return false;
   const NormParam& param = nnvm::get<NormParam>(attrs.parsed);
   SHAPE_ASSIGN_CHECK(*out_attrs, 0,
                      ReduceAxesShapeImpl((*in_attrs)[0], param.axis,
@@ -347,12 +347,12 @@ inline bool BroadcastAxesShape(const nnvm::NodeAttrs& attrs,
                                mxnet::ShapeVector *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
-  if ((*in_attrs)[0].ndim() == 0) return false;
+  if (!shape_is_known((*in_attrs)[0])) return false;
   const BroadcastAxesParam& param = nnvm::get<BroadcastAxesParam>(attrs.parsed);
   CHECK_EQ(param.axis.ndim() , param.size.ndim());
   mxnet::TShape &ishape = (*in_attrs)[0];
   mxnet::TShape oshape = ishape;
-  for (index_t i = 0; i < param.axis.ndim(); ++i) {
+  for (int i = 0; i < param.axis.ndim(); ++i) {
     CHECK_EQ(oshape[param.axis[i]], 1U) << "Broadcasting axis must have size 1";
     oshape[param.axis[i]] = param.size[i];
   }
@@ -366,12 +366,12 @@ inline bool BroadcastToShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   mxnet::TShape& ishape = (*in_attrs)[0];
-  if (ishape.ndim() == 0) return false;
+  if (!shape_is_known(ishape)) return false;
   const BroadcastToParam& param = nnvm::get<BroadcastToParam>(attrs.parsed);
   CHECK_EQ(ishape.ndim(), param.shape.ndim())
     << "Operand of shape " << ishape << " cannot be broadcasted to " << param.shape;
   mxnet::TShape oshape = param.shape;
-  for (index_t i = 0; i < ishape.ndim(); ++i) {
+  for (int i = 0; i < ishape.ndim(); ++i) {
     if (oshape[i] != 0) {
       CHECK(ishape[i] == oshape[i] || ishape[i] == 1)
         << "Array cannot be broadcasted from " << ishape << " to " << param.shape;
@@ -391,7 +391,7 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs,
   mxnet::TShape& lhs_shape = (*in_attrs)[0];
   mxnet::TShape& rhs_shape = (*in_attrs)[1];
 
-  if ((lhs_shape.ndim() == 0) || (lhs_shape.ndim() == 0)) {
+  if (!shape_is_known(lhs_shape) || !shape_is_known(lhs_shape)) {
     return false;
   }
 
@@ -404,7 +404,7 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs,
       << "Operand of shape " << lhs_shape << " cannot be broadcasted to " << rhs_shape;
 
     oshape = mxnet::TShape(rhs_shape);
-    for (index_t i = 0; i < lhs_shape.ndim(); ++i) {
+    for (int i = 0; i < lhs_shape.ndim(); ++i) {
       if (rhs_shape[i] != 0) {
         CHECK(lhs_shape[i] == rhs_shape[i] || lhs_shape[i] == 1)
           << "Array cannot be broadcasted from " << lhs_shape << " to " << rhs_shape;
@@ -423,7 +423,7 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs,
       << "Empty axes tuple is not allowed";
 
     oshape = mxnet::TShape(lhs_shape);
-    for (index_t i = 0; i < lhs_axes.ndim(); ++i) {
+    for (int i = 0; i < lhs_axes.ndim(); ++i) {
       auto copyfrom = lhs_axes[i];
       if (copyfrom < 0) {
         copyfrom =  lhs_shape.ndim() + copyfrom;
@@ -450,9 +450,9 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs,
 
 inline void BroadcastReduceShapeCompact(const mxnet::TShape& big, const mxnet::TShape& small,
                                         mxnet::TShape *new_big, mxnet::TShape *new_small) {
-  index_t idim = std::max<index_t>(big.ndim(), MXNET_SPECIAL_MAX_NDIM);
-  *new_big = mxnet::TShape(idim);
-  *new_small = mxnet::TShape(idim);
+  const int idim = std::max(big.ndim(), MXNET_SPECIAL_MAX_NDIM);
+  *new_big = mxnet::TShape(idim, 1);
+  *new_small = mxnet::TShape(idim, 1);
   index_t j = 0;
   if (small.Size() == 1) {
     (*new_big)[j++] = big.Size();
@@ -478,12 +478,10 @@ inline void BroadcastReduceShapeCompact(const mxnet::TShape& big, const mxnet::T
       ++j;
     }
   }
-  if (j <= 2) {
-    new_small->assign(&(*new_small)[0], &(*new_small)[2]);
-    new_big->assign(&(*new_big)[0], &(*new_big)[2]);
-  } else if (j <= MXNET_SPECIAL_MAX_NDIM) {
-    new_small->assign(&(*new_small)[0], &(*new_small)[MXNET_SPECIAL_MAX_NDIM]);
-    new_big->assign(&(*new_big)[0], &(*new_big)[MXNET_SPECIAL_MAX_NDIM]);
+  if (j <= MXNET_SPECIAL_MAX_NDIM) {
+    const int ndim = (j <= 2? 2 : MXNET_SPECIAL_MAX_NDIM);
+    new_small->assign(new_small->begin(), new_small->begin() + ndim);
+    new_big->assign(new_big->begin(), new_big->begin() + ndim);
   } else {
     LOG(FATAL) << "Too many reduction axes from " << big << " to " << small;
   }
diff --git a/src/operator/tensor/diag_op-inl.h b/src/operator/tensor/diag_op-inl.h
index 1e3c1c9701d4..b90b09a36bd3 100644
--- a/src/operator/tensor/diag_op-inl.h
+++ b/src/operator/tensor/diag_op-inl.h
@@ -91,12 +91,12 @@ inline mxnet::TShape DiagShapeImpl(const mxnet::TShape& ishape, const int k,
     std::swap(x1, x2);
   }
 
-  int32_t n_dim = static_cast<int32_t>(ishape.ndim()) - 1;
-  mxnet::TShape oshape(n_dim);
+  int32_t n_dim = ishape.ndim() - 1;
+  mxnet::TShape oshape(n_dim, -1);
 
   // remove axis1 and axis2 and append the new axis to the end
   uint32_t idx = 0;
-  for (int32_t i = 0; i <= n_dim; ++i) {
+  for (int i = 0; i <= n_dim; ++i) {
     if (i != x1 && i != x2) {
       oshape[idx++] = ishape[i];
     }
@@ -114,7 +114,7 @@ inline bool DiagOpShape(const nnvm::NodeAttrs& attrs,
     CHECK_EQ(out_attrs->size(), 1U);
 
     const mxnet::TShape& ishape = (*in_attrs)[0];
-    if (ishape.ndim() == 0) {
+    if (!shape_is_known(ishape)) {
       return false;
     }
 
@@ -129,7 +129,7 @@ inline bool DiagOpShape(const nnvm::NodeAttrs& attrs,
     }
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
 
-    return out_attrs->at(0).ndim() != 0U;
+    return shape_is_known(out_attrs->at(0));
 }
 
 inline bool DiagOpType(const nnvm::NodeAttrs& attrs,
diff --git a/src/operator/tensor/dot-inl.h b/src/operator/tensor/dot-inl.h
index 8a1eda0350b0..318254b26b9f 100644
--- a/src/operator/tensor/dot-inl.h
+++ b/src/operator/tensor/dot-inl.h
@@ -1241,20 +1241,20 @@ inline bool DotShape(const nnvm::NodeAttrs& attrs,
     if (Ta) {
       L[0] = mshadow::Shape1(lshape[0]);
       L[1] = lshape.ndim() > 1 ?
-             mxnet::TShape(&lshape[1], &lshape[lshape.ndim()]) : mxnet::TShape(1);
+             mxnet::TShape(&lshape[1], lshape.end()) : mxnet::TShape(1, 1);
     } else {
       L[0] = lshape.ndim() > 1 ?
-             mxnet::TShape(&lshape[0], &lshape[lshape.ndim()-1]) : mxnet::TShape(1);
+             mxnet::TShape(&lshape[0], &lshape[lshape.ndim()-1]) : mxnet::TShape(1, 1);
       L[1] = mshadow::Shape1(lshape[lshape.ndim()-1]);
     }
     if (Tb) {
       R[0] = rshape.ndim() > 1 ?
-             mxnet::TShape(&rshape[0], &rshape[rshape.ndim()-1]) : mxnet::TShape(1);
+             mxnet::TShape(&rshape[0], &rshape[rshape.ndim()-1]) : mxnet::TShape(1, 1);
       R[1] = mshadow::Shape1(rshape[rshape.ndim()-1]);
     } else {
       R[0] = mshadow::Shape1(rshape[0]);
       R[1] = rshape.ndim() > 1 ?
-             mxnet::TShape(&rshape[1], &rshape[rshape.ndim()]) : mxnet::TShape(1);
+             mxnet::TShape(&rshape[1], rshape.end()) : mxnet::TShape(1, 1);
     }
 
     if (L[!Ta].Size() != 0 && R[Tb].Size() != 0) {
@@ -1262,8 +1262,8 @@ inline bool DotShape(const nnvm::NodeAttrs& attrs,
         << "dot shape error: " << lshape << " X " << rshape;
     }
     std::vector<index_t> buf;
-    if (lshape.ndim() > 1) buf.insert(buf.end(), &L[Ta][0], &L[Ta][L[Ta].ndim()]);
-    if (rshape.ndim() > 1) buf.insert(buf.end(), &R[!Tb][0], &R[!Tb][R[!Tb].ndim()]);
+    if (lshape.ndim() > 1) buf.insert(buf.end(), &L[Ta][0], L[Ta].end());
+    if (rshape.ndim() > 1) buf.insert(buf.end(), &R[!Tb][0], R[!Tb].end());
     mxnet::TShape oshape(buf.begin(), buf.end());
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
   }
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op.h b/src/operator/tensor/elemwise_binary_broadcast_op.h
index 1d2b7c9c1163..dfb3231a75a9 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op.h
+++ b/src/operator/tensor/elemwise_binary_broadcast_op.h
@@ -48,21 +48,23 @@ inline bool BinaryBroadcastShape(const nnvm::NodeAttrs& attrs,
   mxnet::TShape& rhs = (*in_attrs)[1];
 
   // avoid pre-mature shape inference.
-  if (lhs.ndim() == 0 || rhs.ndim() == 0) return false;
+  if (lhs.ndim() == -1 || rhs.ndim() == -1) return false;
 
   if (lhs == rhs) {
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, lhs);
-    return true;
+    return shape_is_known(lhs);
   }
-  mxnet::TShape out(std::max(lhs.ndim(), rhs.ndim()));
-  index_t bl = out.ndim() - lhs.ndim();
-  index_t br = out.ndim() - rhs.ndim();
-  for (index_t i = 0; i < out.ndim(); ++i) {
-    index_t l = 1, r = 1;
+  mxnet::TShape out(std::max(lhs.ndim(), rhs.ndim()), -1);
+  const int bl = out.ndim() - lhs.ndim();
+  const int br = out.ndim() - rhs.ndim();
+  for (int i = 0; i < out.ndim(); ++i) {
+    int l = 1, r = 1;
     if (i >= bl) l = lhs[i-bl];
     if (i >= br) r = rhs[i-br];
     if (l != r) {
       if (l == 0 || r == 0) {
+        // TODO(junwu): here is not compatible with NumPy.
+        // For example, (2, 3) cannot broadcast to (2, 0, 3).
         out[i] = 0;
       } else {
         CHECK(l == 1 || r == 1)
@@ -74,7 +76,7 @@ inline bool BinaryBroadcastShape(const nnvm::NodeAttrs& attrs,
     }
   }
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, out);
-  return true;
+  return shape_is_known(lhs) && shape_is_known(rhs) && shape_is_known(out);
 }
 
 inline bool BinaryBroadcastMulStorageType(const nnvm::NodeAttrs& attrs,
@@ -146,15 +148,15 @@ inline int BinaryBroadcastShapeCompact(const mxnet::TShape& lshape, const mxnet:
                                        const mxnet::TShape& oshape, mxnet::TShape *new_lshape,
                                        mxnet::TShape *new_rshape, mxnet::TShape *new_oshape) {
   if (lshape == rshape) return 0;
-  index_t odim = std::max<index_t>(oshape.ndim(), broadcast::MAX_DIM);
-  *new_lshape = mxnet::TShape(odim);
-  *new_rshape = mxnet::TShape(odim);
-  *new_oshape = mxnet::TShape(odim);
-  index_t bl = oshape.ndim() - lshape.ndim();
-  index_t br = oshape.ndim() - rshape.ndim();
-  index_t j = 0, lprod = 1, rprod = 1, oprod = 1;
-  for (index_t i = 0; i < oshape.ndim(); ++i) {
-    index_t l = 1, r = 1, o = oshape[i];
+  const int odim = std::max(oshape.ndim(), broadcast::MAX_DIM);
+  *new_lshape = mxnet::TShape(odim, 1);
+  *new_rshape = mxnet::TShape(odim, 1);
+  *new_oshape = mxnet::TShape(odim, 1);
+  int bl = oshape.ndim() - lshape.ndim();
+  int br = oshape.ndim() - rshape.ndim();
+  int j = 0, lprod = 1, rprod = 1, oprod = 1;
+  for (int i = 0; i < oshape.ndim(); ++i) {
+    int l = 1, r = 1, o = oshape[i];
     if (i >= bl) l = lshape[i-bl];
     if (i >= br) r = rshape[i-br];
     if ((lprod != rprod || l != r) &&
@@ -176,9 +178,9 @@ inline int BinaryBroadcastShapeCompact(const mxnet::TShape& lshape, const mxnet:
   }
   if (j <= broadcast::MAX_DIM) {
     BROADCAST_NDIM_SWITCH(j, NDim, {
-      new_lshape->assign(&(*new_lshape)[0], &(*new_lshape)[NDim]);
-      new_rshape->assign(&(*new_rshape)[0], &(*new_rshape)[NDim]);
-      new_oshape->assign(&(*new_oshape)[0], &(*new_oshape)[NDim]);
+      new_lshape->assign(new_lshape->begin(), new_lshape->begin() + NDim);
+      new_rshape->assign(new_rshape->begin(), new_rshape->begin() + NDim);
+      new_oshape->assign(new_oshape->begin(), new_oshape->begin() + NDim);
     });
   } else {
     LOG(FATAL) << "Too many broadcast dimensions with operands " << lshape << " " << rshape;
diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc
index 19a9ac8359eb..5114a5d0dbe3 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -413,9 +413,9 @@ bool ReshapeLikeShapeCompute(const nnvm::NodeAttrs &attrs,
   GetReshapeLikeParams(param, lshape, rshape, &lhs_begin, &lhs_end, &rhs_begin,
                        &rhs_end);
 
-  int lhsrank = static_cast<int>(lshape.ndim());
+  int lhsrank = lshape.ndim();
   int orank = lhsrank + (rhs_end - rhs_begin) - (lhs_end - lhs_begin);
-  mxnet::TShape oshape(orank);
+  mxnet::TShape oshape(orank, -1);
 
   for (int i = 0; i < lhs_begin; ++i)
     oshape[i] = lshape[i];
@@ -436,7 +436,7 @@ bool ReshapeLikeShapeCompute(const nnvm::NodeAttrs &attrs,
       << "shape " << oshape << " because they have different "
       << "size.";
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
-  return true;
+  return shape_is_known(oshape);
 }
 
 DMLC_REGISTER_PARAMETER(ReshapeLikeParam);
@@ -537,7 +537,7 @@ Example::
      mxnet::ShapeVector *out_attrs) {
     CHECK_EQ(in_attrs->size(), 1U);
     CHECK_EQ(out_attrs->size(), 1U);
-    mxnet::TShape target_shape(1);
+    mxnet::TShape target_shape(1, -1);
     target_shape[0] = in_attrs->at(0).ndim();
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, target_shape);
     return !shape_is_none(out_attrs->at(0));
@@ -589,7 +589,7 @@ Example::
      mxnet::ShapeVector *out_attrs) {
     CHECK_EQ(in_attrs->size(), 1U);
     CHECK_EQ(out_attrs->size(), 1U);
-    SHAPE_ASSIGN_CHECK(*out_attrs, 0, 1U);
+    SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, 1));
     return !shape_is_none(out_attrs->at(0));
   })
 .set_attr<nnvm::FInferType>("FInferType",
diff --git a/src/operator/tensor/histogram-inl.h b/src/operator/tensor/histogram-inl.h
index 51d0bdb6c2b6..9cf9c490bba2 100644
--- a/src/operator/tensor/histogram-inl.h
+++ b/src/operator/tensor/histogram-inl.h
@@ -46,13 +46,13 @@ namespace op {
 
 struct HistogramParam : public dmlc::Parameter<HistogramParam> {
     dmlc::optional<int> bin_cnt;
-    dmlc::optional<nnvm::Tuple<double>> range;
+    dmlc::optional<mxnet::Tuple<double>> range;
     DMLC_DECLARE_PARAMETER(HistogramParam) {
       DMLC_DECLARE_FIELD(bin_cnt)
         .set_default(dmlc::optional<int>())
         .describe("Number of bins for uniform case");
       DMLC_DECLARE_FIELD(range)
-        .set_default(dmlc::optional<nnvm::Tuple<double>>())
+        .set_default(dmlc::optional<mxnet::Tuple<double>>())
         .describe("The lower and upper range of the bins. if not provided, "
                   "range is simply (a.min(), a.max()). values outside the "
                   "range are ignored. the first element of the range must be "
@@ -101,7 +101,7 @@ inline bool HistogramOpShape(const nnvm::NodeAttrs& attrs,
     SHAPE_ASSIGN_CHECK(*out_attrs, 1, in_attrs->at(1));
   }
 
-  return !shape_is_none(out_attrs->at(0)) && !shape_is_none(out_attrs->at(1)) &&
+  return shape_is_known(out_attrs->at(0)) && shape_is_known(out_attrs->at(1)) &&
          out_attrs->at(0).Size() == out_attrs->at(1).Size() - 1;
 }
 
diff --git a/src/operator/tensor/indexing_op.h b/src/operator/tensor/indexing_op.h
index 8979531fef4e..6469aae17558 100644
--- a/src/operator/tensor/indexing_op.h
+++ b/src/operator/tensor/indexing_op.h
@@ -145,20 +145,20 @@ inline bool EmbeddingOpShape(const nnvm::NodeAttrs& attrs,
                              mxnet::ShapeVector *out_attrs) {
   using namespace mshadow;
   const mxnet::TShape &dshape = (*in_attrs)[embedding::kData];
-  if (dshape.ndim() ==  0) return false;
+  if (!shape_is_known(dshape)) return false;
   const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
   SHAPE_ASSIGN_CHECK(*in_attrs, embedding::kWeight, Shape2(param.input_dim,
                                                            param.output_dim));
   out_attrs->clear();
 
-  mxnet::TShape oshape(dshape.ndim()+1);
-  for (size_t i = 0; i < dshape.ndim(); ++i) {
+  mxnet::TShape oshape(dshape.ndim()+1, -1);
+  for (int i = 0; i < dshape.ndim(); ++i) {
     oshape[i] = dshape[i];
   }
   oshape[dshape.ndim()] = param.output_dim;
 
   out_attrs->push_back(oshape);
-  return true;
+  return shape_is_known(oshape);
 }
 
 template<typename ParamType>
@@ -682,18 +682,18 @@ inline bool TakeOpShape(const nnvm::NodeAttrs& attrs,
   using namespace mshadow;
   const mxnet::TShape &arrshape = (*in_attrs)[take_::kArr];
   const mxnet::TShape &idxshape = (*in_attrs)[take_::kIdx];
-  if (idxshape.ndim() == 0U || idxshape.Size() == 0U) return false;
+  if (!shape_is_known(idxshape)) return false;
   const TakeParam& param = nnvm::get<TakeParam>(attrs.parsed);
   if (param.mode == take_::kRaise) {
     LOG(FATAL) << "Raise is not supported for the time being...";
   }
-  CHECK(param.axis >= -1 * (int)arrshape.ndim() && param.axis < (int)arrshape.ndim())
+  CHECK(param.axis >= -1 * arrshape.ndim() && param.axis < arrshape.ndim())
     << "Axis should be in the range of [-r, r-1] where r is the rank of input tensor";
 
   out_attrs->clear();
 
   const index_t actual_axis = param.axis + ((param.axis < 0) ? arrshape.ndim() : 0);
-  mxnet::TShape oshape(idxshape.ndim() + arrshape.ndim() - 1);
+  mxnet::TShape oshape(idxshape.ndim() + arrshape.ndim() - 1, -1);
   for (index_t i = 0; i < idxshape.ndim(); ++i) {
     oshape[i + actual_axis] = idxshape[i];
   }
@@ -705,7 +705,7 @@ inline bool TakeOpShape(const nnvm::NodeAttrs& attrs,
     }
   }
   out_attrs->push_back(oshape);
-  return true;
+  return shape_is_known(oshape);
 }
 
 inline bool TakeOpType(const nnvm::NodeAttrs& attrs,
@@ -1170,6 +1170,7 @@ inline bool OneHotOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 1U);
   // The shape of indices
   const mxnet::TShape& ishape = (*in_attrs)[0];
+  if (!shape_is_known(ishape)) return false;
 
   int depth = 0;
   double on_value = 1.0;
@@ -1177,13 +1178,13 @@ inline bool OneHotOpShape(const nnvm::NodeAttrs& attrs,
   int dtype = mshadow::kFloat32;
   GetOneHotParams(param, &depth, &on_value, &off_value, &dtype);
 
-  mxnet::TShape oshape(ishape.ndim() + 1);
+  mxnet::TShape oshape(ishape.ndim() + 1, -1);
   for (index_t i = 0; i < ishape.ndim(); ++i) {
     oshape[i] = ishape[i];
   }
   oshape[oshape.ndim()-1] = depth;
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
-  return true;
+  return shape_is_known(oshape);
 }
 
 inline bool OneHotOpType(const nnvm::NodeAttrs& attrs,
@@ -1270,15 +1271,15 @@ inline bool GatherNDShape(const nnvm::NodeAttrs& attrs,
   CHECK_LE(ishape[0], 10)
     << "gather_nd supports indexing along at most 10 dimensions.";
 
-  mxnet::TShape oshape(ishape.ndim() - 1 + dshape.ndim() - ishape[0]);
+  mxnet::TShape oshape(ishape.ndim() - 1 + dshape.ndim() - ishape[0], -1);
 
-  for (size_t i = 0; i < ishape.ndim() - 1; ++i) oshape[i] = ishape[i+1];
+  for (int i = 0; i < ishape.ndim() - 1; ++i) oshape[i] = ishape[i+1];
   for (int i = 0; i < dshape.ndim() - ishape[0]; ++i) {
     oshape[ishape.ndim()-1+i] = dshape[ishape[0] + i];
   }
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
-  return true;
+  return shape_is_known(oshape);
 }
 
 inline bool GatherNDType(const nnvm::NodeAttrs& attrs,
@@ -1370,7 +1371,7 @@ inline bool ScatterNDShape(const nnvm::NodeAttrs& attrs,
 
   bool valid = dshape.ndim() == ishape.ndim() - 1 + oshape.ndim() - ishape[0];
 
-  for (size_t i = 0; i < ishape.ndim() - 1; ++i) {
+  for (int i = 0; i < ishape.ndim() - 1; ++i) {
     valid = valid && dshape[i] == ishape[i+1];
   }
   for (int i = 0; i < oshape.ndim() - ishape[0]; ++i) {
diff --git a/src/operator/tensor/init_op.h b/src/operator/tensor/init_op.h
index fe1a1f62954a..3c4d34b3f4a5 100644
--- a/src/operator/tensor/init_op.h
+++ b/src/operator/tensor/init_op.h
@@ -49,7 +49,7 @@ struct InitOpParam : public dmlc::Parameter<InitOpParam> {
   int dtype;
   DMLC_DECLARE_PARAMETER(InitOpParam) {
     DMLC_DECLARE_FIELD(shape)
-    .set_default(mxnet::TShape())
+    .set_default(mxnet::TShape(0, 1))
     .describe("The shape of the output");
     DMLC_DECLARE_FIELD(ctx)
     .set_default("")
@@ -213,8 +213,8 @@ inline bool InitShape(const nnvm::NodeAttrs& attrs,
   const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 0U);
   CHECK_EQ(out_attrs->size(), 1U);
-  if ((*out_attrs)[0].ndim() != 0 && param.shape.ndim() == 0) return true;
-  for (unsigned int i=0 ; i < param.shape.ndim() ; ++i) {
+  if (shape_is_known((*out_attrs)[0]) && !shape_is_known(param.shape)) return true;
+  for (int i=0 ; i < param.shape.ndim() ; ++i) {
     if (param.shape[i] < 0U) {
       LOG(FATAL) << "Shape cannot contain negative values " << param.shape;
     }
diff --git a/src/operator/tensor/la_op.h b/src/operator/tensor/la_op.h
index 5e18e0ef5a25..db4607fe9262 100644
--- a/src/operator/tensor/la_op.h
+++ b/src/operator/tensor/la_op.h
@@ -384,7 +384,7 @@ mshadow::Tensor<xpu, dim, DType> LaOpFlatten(const TBlob& blob,
   }
   // Collapse ranges [0,axis-1] and [axis+1,ndim-2].
   CHECK_EQ(dim, 4);
-  mxnet::TShape shape(dim);
+  mxnet::TShape shape(dim, -1);
   shape[0] = 1;
   for (int i = 0; i < axis; ++i) {
     shape[0] *= blob.shape_[i];
diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index ba62d0e9def7..d61267faefe6 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -49,17 +49,17 @@ namespace op {
 struct ReshapeParam : public dmlc::Parameter<ReshapeParam> {
   mxnet::TShape target_shape;
   bool keep_highest;
-  nnvm::Tuple<int> shape;
+  mxnet::Tuple<int> shape;
   bool reverse;
   DMLC_DECLARE_PARAMETER(ReshapeParam) {
     DMLC_DECLARE_FIELD(shape)
-    .set_default(nnvm::Tuple<int>())
+    .set_default(mxnet::Tuple<int>())
     .describe("The target shape");
     DMLC_DECLARE_FIELD(reverse)
     .set_default(false)
     .describe("If true then the special values are inferred from right to left");
     DMLC_DECLARE_FIELD(target_shape)
-    .set_default(mxnet::TShape())
+    .set_default(mxnet::TShape(0))
     .describe("(Deprecated! Use ``shape`` instead.) "
               "Target new shape. One and only one dim can be 0, "
               "in which case it will be inferred from the rest of dims");
@@ -71,11 +71,11 @@ struct ReshapeParam : public dmlc::Parameter<ReshapeParam> {
 };
 
 template<typename IType>
-inline mxnet::TShape InferReshapeShape(const nnvm::Tuple<IType>& shape,
-                                const mxnet::TShape& dshape, bool reverse) {
+inline mxnet::TShape InferReshapeShape(const mxnet::Tuple<IType>& shape,
+                                       const mxnet::TShape& dshape, bool reverse) {
   std::vector<IType> dshape_vec;
   std::vector<IType> param_shape_vec(shape.begin(), shape.end());
-  for (index_t i = 0; i < dshape.ndim(); ++i) {
+  for (int i = 0; i < dshape.ndim(); ++i) {
     dshape_vec.push_back(dshape[i]);
   }
   std::vector<IType> tmp;
@@ -102,28 +102,31 @@ inline mxnet::TShape InferReshapeShape(const nnvm::Tuple<IType>& shape,
     } else if (proposed_dim == -2) {
       // copy all remaining dims from source
       while (src_idx < dshape_len) {
-        size_t dn = dshape_vec[src_idx++];
+        const int dn = dshape_vec[src_idx++];
         tmp.push_back(dn);
       }
     } else if (proposed_dim == -3) {
       // merge two dims from source
       CHECK_LT(src_idx, dshape_len-1);
-      size_t d1 = dshape_vec[src_idx++];
-      size_t d2 = dshape_vec[src_idx++];
-      size_t dn = d1 * d2;
-      tmp.push_back(dn);
+      const int d1 = dshape_vec[src_idx++];
+      const int d2 = dshape_vec[src_idx++];
+      if (d1 == -1 || d2 == -1) {
+        tmp.push_back(-1);
+      } else {
+        tmp.push_back(d1 * d2);
+      }
     } else if (proposed_dim == -4) {
       // split the source dim s into two dims
       // read the left dim and then the right dim (either can be -1)
       CHECK_LT(i + 2, params_len);
       CHECK_LT(src_idx, dshape_len);
-      size_t d0 = dshape_vec[src_idx++];
+      const int d0 = dshape_vec[src_idx++];
       IType d1 = param_shape_vec[++i];
       IType d2 = param_shape_vec[++i];
       CHECK(d1 != -1 || d2 != -1) << "Split dims cannot both be -1.";
-      if (d1 == -1) d1 = d0 / d2;
-      if (d2 == -1) d2 = d0 / d1;
-      CHECK(d1 * d2 == static_cast<IType>(d0) || static_cast<IType>(d0) == IType(0)) <<
+      if (d1 == -1 && d0 >= 0) d1 = d0 / d2;  // d0 must be known to do this
+      if (d2 == -1 && d0 >= 0) d2 = d0 / d1;  // d0 must be known to do this
+      CHECK(d1 * d2 == static_cast<IType>(d0) || static_cast<IType>(d0) == IType(-1)) <<
         "Split dims " << d1 << ", " << d2 << " do not divide original dim " << d0;
       tmp.push_back(d1);
       tmp.push_back(d2);
@@ -135,12 +138,12 @@ inline mxnet::TShape InferReshapeShape(const nnvm::Tuple<IType>& shape,
   }
 
   if (inf_idx >= 0) {
-    if (dshape.Size() > 0) {
+    if (shape_is_known(dshape)) {
       IType new_size = 1;
       for (IType x : tmp) new_size *= x;
       tmp[inf_idx] = dshape.Size() / new_size;
     } else {
-      tmp[inf_idx] = 0;
+      tmp[inf_idx] = -1;
     }
   }
   if (reverse) {
@@ -153,24 +156,24 @@ inline mxnet::TShape InferReshapeShape(const nnvm::Tuple<IType>& shape,
 }
 
 inline bool ReverseReshapeInferShape(mxnet::TShape *in, const mxnet::TShape& out) {
-  if (in->Size() && out.Size()) {
+  if (shape_is_known(*in) && shape_is_known(out)) {
     return true;
-  } else if (!out.Size()) {
+  } else if (!shape_is_known(out)) {
     return false;
   } else {
     int zero_axis = -1;
-    int non_zero_prod = 1;
-    for (index_t i = 0; i < in->ndim(); i++) {
-      if ((*in)[i] == 0) {
+    int known_dim_size_prod = 1;
+    for (int i = 0; i < in->ndim(); i++) {
+      if ((*in)[i] == -1) {
         if (zero_axis != -1)
           return false;  // more than 1 zero found.
         else
           zero_axis = i;
       } else {
-        non_zero_prod *= (*in)[i];
+        known_dim_size_prod *= (*in)[i];
       }
     }
-    (*in)[zero_axis] = out.Size() / non_zero_prod;
+    (*in)[zero_axis] = out.Size() / known_dim_size_prod;
     return true;
   }
 }
@@ -182,11 +185,11 @@ inline bool ReshapeShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]";
   CHECK_EQ(out_attrs->size(), 1U);
   mxnet::TShape &dshape = (*in_attrs)[0];
-  if (dshape.ndim() == 0) return false;
+  if (dshape.ndim() == -1) return false;
   mxnet::TShape oshape;
   if (param_.shape.ndim() != 0) {
     oshape = InferReshapeShape(param_.shape, dshape, param_.reverse);
-  } else if (param_.target_shape.ndim()) {
+  } else if (param_.target_shape.ndim() != -1) {
     LOG(INFO) << "Using target_shape will be deprecated.";
     oshape = param_.target_shape;
     int neg_count = 0;
@@ -195,7 +198,7 @@ inline bool ReshapeShape(const nnvm::NodeAttrs& attrs,
     if (param_.keep_highest) {
       oshape[0] = dshape[0];
     }
-    for (index_t i = start_idx; i < oshape.ndim(); ++i) {
+    for (int i = start_idx; i < oshape.ndim(); ++i) {
       if (oshape[i] == 0) {
         neg_count++;
         inf_idx = i;
@@ -206,13 +209,15 @@ inline bool ReshapeShape(const nnvm::NodeAttrs& attrs,
       oshape[inf_idx] = dshape.Size() / oshape.Size();
     }
   } else {
-    return (*out_attrs)[0].ndim() && ReverseReshapeInferShape(&(*in_attrs)[0], (*out_attrs)[0]);
+    return shape_is_known((*out_attrs)[0]) && ReverseReshapeInferShape(&(*in_attrs)[0], (*out_attrs)[0]);
   }
   ReverseReshapeInferShape(&dshape, oshape);
+#if 0
   CHECK_EQ(oshape.Size(), dshape.Size())
     << "Target shape size is different to source. "
     << "Target: " << oshape
     << "\nSource: " << dshape;
+#endif
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
   return ReverseReshapeInferShape(&(*in_attrs)[0], (*out_attrs)[0]);
 }
@@ -223,9 +228,9 @@ inline bool FlattenShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]";
   CHECK_EQ(out_attrs->size(), 1U);
   const mxnet::TShape &dshape = (*in_attrs)[0];
-  if (dshape.ndim() == 0) return false;
-  uint32_t target_dim = 1;
-  for (uint32_t i = 1; i < dshape.ndim(); ++i) {
+  if (!shape_is_known(dshape)) return false;
+  int target_dim = 1;
+  for (int i = 1; i < dshape.ndim(); ++i) {
     target_dim *= dshape[i];
   }
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::Shape2(dshape[0], target_dim));
@@ -313,9 +318,9 @@ void Transpose(const nnvm::NodeAttrs& attrs,
                const std::vector<TBlob>& outputs) {
   const TransposeParam& param = nnvm::get<TransposeParam>(attrs.parsed);
   CHECK_EQ(req[0], kWriteTo) << "Transpose does not support inplace";
-  if (param.axes.ndim() == 0) {
-    mxnet::TShape axes = mxnet::TShape(inputs[0].ndim());
-    for (index_t i = 0; i < axes.ndim(); ++i) {
+  if (param.axes.ndim() == -1) {
+    mxnet::TShape axes(inputs[0].ndim(), -1);
+    for (int i = 0; i < axes.ndim(); ++i) {
       axes[i] = axes.ndim() - 1 - i;
     }
     TransposeImpl<xpu>(ctx.run_ctx, inputs[0], outputs[0], axes);
@@ -332,20 +337,20 @@ inline bool TransposeShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 1U);
   mxnet::TShape& shp = (*in_attrs)[0];
   CHECK_LE(shp.ndim(), 6U) << "Transpose support at most 6 dimensions";
-  mxnet::TShape ret(shp.ndim());
-  if (param.axes.ndim() == 0) {
-    for (index_t i = 0; i < shp.ndim(); ++i) {
+  mxnet::TShape ret(shp.ndim(), -1);
+  if (param.axes.ndim() == -1) {
+    for (int i = 0; i < shp.ndim(); ++i) {
       ret[i] = shp[shp.ndim()-1-i];
     }
   } else {
     CHECK_EQ(shp.ndim(), param.axes.ndim());
-    for (size_t i = 0; i < shp.ndim(); ++i) {
+    for (int i = 0; i < shp.ndim(); ++i) {
       CHECK(param.axes[i] < static_cast<int64_t>(shp.ndim()));
       ret[i] = shp[param.axes[i]];
     }
   }
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret);
-  return true;
+  return shape_is_known(ret);
 }
 
 
@@ -366,7 +371,7 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs,
   const ExpandDimParam& param = nnvm::get<ExpandDimParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
-  if (in_attrs->at(0).ndim() == 0U && out_attrs->at(0).ndim() == 0U) {
+  if (!shape_is_known(in_attrs->at(0)) && !shape_is_known(out_attrs->at(0))) {
     return false;
   }
 
@@ -374,7 +379,7 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs,
   mxnet::TShape& oshape = (*out_attrs)[0];
   int indim = ishape.ndim();
   bool unknown_ishape = false;
-  if (0 == indim) {
+  if (-1 == indim) {
     indim = oshape.ndim() - 1;
     unknown_ishape = true;
   }
@@ -386,27 +391,27 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs,
   CHECK(axis >= 0 && axis <= indim)
       << "axis must be in the range [" << -indim << ", " << indim << "] ("
       << param.axis << " provided)";
-  mxnet::TShape ret(indim + 1);
+  mxnet::TShape ret(indim + 1, -1);
   for (int i = 0; i < axis; ++i) {
-    ret[i] = (unknown_ishape? 0 : ishape[i]);
+    ret[i] = (unknown_ishape? -1 : ishape[i]);
   }
   ret[axis] = 1;
   for (int i = axis+1; i < indim+1; ++i) {
-    ret[i] = (unknown_ishape? 0 : ishape[i-1]);
+    ret[i] = (unknown_ishape? -1 : ishape[i-1]);
   }
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret);
 
-  ret = mxnet::TShape(indim);
+  ret = mxnet::TShape(indim, -1);
   for (int i = 0; i < axis; ++i) ret[i] = oshape[i];
   for (int i = axis+1; i < indim+1; ++i) ret[i-1] = oshape[i];
   SHAPE_ASSIGN_CHECK(*in_attrs, 0, ret);
-  return true;
+  return shape_is_known(ret);
 }
 
 // Currently MKLDNN only supports step = 1 or step has no value
 inline bool SupportMKLDNNSlice(const SliceParam& param) {
   if (param.step.ndim() == 0U) return true;
-  for (uint32_t i = 0; i < param.step.ndim(); ++i) {
+  for (int i = 0; i < param.step.ndim(); ++i) {
     if (param.step[i].has_value() && param.step[i].value() != 1)
       return false;
   }
@@ -589,9 +594,9 @@ void SliceCsrImpl(const SliceParam &param, const OpContext& ctx,
   const mxnet::TShape ishape = in.shape();
   const mxnet::TShape oshape = out.shape();
 
-  uint32_t N = ishape.ndim();
-  mxnet::TShape begin(N), end(N);
-  for (uint32_t i = 0; i < N; ++i) {
+  int N = ishape.ndim();
+  mxnet::TShape begin(N, -1), end(N, -1);
+  for (int i = 0; i < N; ++i) {
     int s = 0;
     if (param.begin[i]) {
       s = *param.begin[i];
@@ -634,9 +639,9 @@ void SliceEx(const nnvm::NodeAttrs& attrs,
 
 template<int ndim>
 inline void GetIndexRange(const mxnet::TShape& dshape,
-                          const nnvm::Tuple<dmlc::optional<int>>& param_begin,
-                          const nnvm::Tuple<dmlc::optional<int>>& param_end,
-                          const nnvm::Tuple<dmlc::optional<int>>& param_step,
+                          const mxnet::Tuple<dmlc::optional<int>>& param_begin,
+                          const mxnet::Tuple<dmlc::optional<int>>& param_end,
+                          const mxnet::Tuple<dmlc::optional<int>>& param_step,
                           common::StaticArray<index_t, ndim>* begin,
                           common::StaticArray<index_t, ndim>* end,
                           common::StaticArray<index_t, ndim>* step) {
@@ -651,7 +656,7 @@ inline void GetIndexRange(const mxnet::TShape& dshape,
     << "Static array size=" << ndim
     << " is not equal to data shape ndim=" << dshape.ndim();
 
-  if (param_step.ndim() != 0U) {
+  if (param_step.ndim() != 0) {
     CHECK_EQ(param_step.ndim(), param_begin.ndim())
       << "step and begin must have the same length";
   }
@@ -723,7 +728,7 @@ inline bool SliceOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   const mxnet::TShape& dshape = (*in_attrs)[0];
-  if (dshape.ndim() == 0) return false;
+  if (!shape_is_known(dshape)) return false;
   const SliceParam& param = nnvm::get<SliceParam>(attrs.parsed);
   mxnet::TShape oshape = dshape;
 
@@ -737,7 +742,7 @@ inline bool SliceOpShape(const nnvm::NodeAttrs& attrs,
   });
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
-  return !shape_is_none(dshape) && !shape_is_none(oshape);
+  return shape_is_known(oshape);
 }
 
 template<int ndim, int req, typename xpu>
@@ -943,7 +948,7 @@ inline bool SliceAssignOpShape(const nnvm::NodeAttrs& attrs,
   MXNET_NDIM_SWITCH(dshape.ndim(), ndim, {
     common::StaticArray<index_t, ndim> begin, end, step;
     GetIndexRange(dshape, param.begin, param.end, param.step, &begin, &end, &step);
-    for (index_t i = 0; i < param.begin.ndim(); ++i) {
+    for (int i = 0; i < param.begin.ndim(); ++i) {
       const int b = begin[i], e = end[i], s = step[i];
       SetSliceOpOutputDimSize(i, b, e, s, &vshape);
     }
@@ -997,8 +1002,8 @@ void SliceAssignOpForward(const nnvm::NodeAttrs& attrs,
 
 struct SliceAssignScalarParam : public dmlc::Parameter<SliceAssignScalarParam> {
   double scalar;
-  nnvm::Tuple<dmlc::optional<int>> begin, end;
-  nnvm::Tuple<dmlc::optional<int>> step;
+  mxnet::Tuple<dmlc::optional<int>> begin, end;
+  mxnet::Tuple<dmlc::optional<int>> step;
   DMLC_DECLARE_PARAMETER(SliceAssignScalarParam) {
     DMLC_DECLARE_FIELD(scalar)
     .set_default(0)
@@ -1008,7 +1013,7 @@ struct SliceAssignScalarParam : public dmlc::Parameter<SliceAssignScalarParam> {
     DMLC_DECLARE_FIELD(end)
     .describe("ending indices for the slice operation, supports negative indices.");
     DMLC_DECLARE_FIELD(step)
-    .set_default(nnvm::Tuple<dmlc::optional<int>>())
+    .set_default(mxnet::Tuple<dmlc::optional<int>>())
     .describe("step for the slice operation, supports negative values.");
   }
 };
@@ -1019,7 +1024,7 @@ inline bool SliceAssignScalarOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   const mxnet::TShape& dshape = (*in_attrs)[0];
-  if (dshape.ndim() == 0U || dshape.Size() == 0U) return false;
+  if (!shape_is_known(dshape)) return false;
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, dshape);
   return true;
 }
@@ -1156,8 +1161,8 @@ inline bool SliceAxisShape(const nnvm::NodeAttrs& attrs,
   int axis;
   index_t begin, end;
   GetSliceAxisParams(param, ishape, &axis, &begin, &end);
-  mxnet::TShape shape(ishape.ndim());
-  for (index_t i = 0; i < ishape.ndim(); ++i) {
+  mxnet::TShape shape(ishape.ndim(), -1);
+  for (int i = 0; i < ishape.ndim(); ++i) {
     if (static_cast<int>(i) == axis) {
       shape[i] = static_cast<index_t>(end - begin);
     } else {
@@ -1165,7 +1170,7 @@ inline bool SliceAxisShape(const nnvm::NodeAttrs& attrs,
     }
   }
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape);
-  return true;
+  return shape_is_known(shape);
 }
 
 
@@ -1181,7 +1186,7 @@ void SliceAxis(const nnvm::NodeAttrs& attrs,
   int axis;
   index_t begin, end;
   GetSliceAxisParams(param, inputs[0].shape_, &axis, &begin, &end);
-  int ndim = static_cast<int>(outputs[0].ndim());
+  int ndim = outputs[0].ndim();
 
   if (axis + 1 == ndim) {
     MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
@@ -1252,9 +1257,9 @@ void SliceAxisGrad_(const nnvm::NodeAttrs& attrs,
 }
 
 struct SliceLikeParam : public dmlc::Parameter<SliceLikeParam> {
-  mxnet::TShape axes;
+  mxnet::Tuple<int> axes;
   DMLC_DECLARE_PARAMETER(SliceLikeParam) {
-    DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(axes).set_default(mxnet::Tuple<int>())
     .describe("List of axes on which input data will be sliced according to the "
               "corresponding size of the second input. By default will slice on "
               "all axes. Negative axes are supported.");
@@ -1273,7 +1278,7 @@ inline bool SliceLikeShape(const nnvm::NodeAttrs& attrs,
     CHECK_EQ(ishape.ndim(), from_shape.ndim())
       << "By default slice_axis performs slice on all axes, but ndim mismatch "
          "for inputs: " << ishape.ndim() << " vs. " << from_shape.ndim();
-    for (index_t i = 0; i < ishape.ndim(); ++i) {
+    for (int i = 0; i < ishape.ndim(); ++i) {
       CHECK_GE(ishape[i], from_shape[i])
         << "Slice axis " << i << " with size " << from_shape[i]
         << "exceeds limit of input with size " << ishape[i];
@@ -1281,7 +1286,7 @@ inline bool SliceLikeShape(const nnvm::NodeAttrs& attrs,
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, from_shape);
   } else {
     mxnet::TShape shape(ishape);
-    for (index_t i = 0; i < param.axes.ndim(); ++i) {
+    for (int i = 0; i < param.axes.ndim(); ++i) {
       int axis = static_cast<int>(param.axes[i]);
       if (axis < 0) {
         axis += static_cast<int>(ishape.ndim());
@@ -1304,21 +1309,21 @@ inline bool SliceLikeShape(const nnvm::NodeAttrs& attrs,
 
 inline void SliceLikeInferRanges(const mxnet::TShape& dshape,
                                  const mxnet::TShape& fshape,
-                                 const mxnet::TShape& axes,
-                                 nnvm::Tuple<dmlc::optional<int>>* param_begin,
-                                 nnvm::Tuple<dmlc::optional<int>>* param_end,
-                                 nnvm::Tuple<dmlc::optional<int>>* param_step) {
+                                 const mxnet::Tuple<int>& axes,
+                                 mxnet::Tuple<dmlc::optional<int>>* param_begin,
+                                 mxnet::Tuple<dmlc::optional<int>>* param_end,
+                                 mxnet::Tuple<dmlc::optional<int>>* param_step) {
   std::vector<dmlc::optional<int>> pb(dshape.ndim());
   std::vector<dmlc::optional<int>> pe(dshape.ndim());
   std::vector<dmlc::optional<int>> ps(dshape.ndim());
   if (axes.ndim() == 0) {
-    for (index_t i = 0; i < dshape.ndim(); ++i) {
+    for (int i = 0; i < dshape.ndim(); ++i) {
       pb[i] = 0;
       pe[i] = fshape[i];
       ps[i] = 1;
     }
   } else {
-    for (index_t i = 0; i < axes.ndim(); ++i) {
+    for (int i = 0; i < axes.ndim(); ++i) {
       int axis = static_cast<int>(axes[i]);
       if (axis < 0) {
         axis += static_cast<int>(dshape.ndim());
@@ -1334,9 +1339,9 @@ inline void SliceLikeInferRanges(const mxnet::TShape& dshape,
       ps[axis] = 1;
     }
   }
-  *param_begin = nnvm::Tuple<dmlc::optional<int>>(pb.begin(), pb.end());
-  *param_end = nnvm::Tuple<dmlc::optional<int>>(pe.begin(), pe.end());
-  *param_step = nnvm::Tuple<dmlc::optional<int>>(ps.begin(), ps.end());
+  *param_begin = mxnet::Tuple<dmlc::optional<int>>(pb.begin(), pb.end());
+  *param_end = mxnet::Tuple<dmlc::optional<int>>(pe.begin(), pe.end());
+  *param_step = mxnet::Tuple<dmlc::optional<int>>(ps.begin(), ps.end());
 }
 
 template<typename xpu>
@@ -1355,9 +1360,9 @@ void SliceLikeForward(const nnvm::NodeAttrs& attrs,
   const TBlob& out = outputs[0];
   const mxnet::TShape& ishape = data.shape_;
   const mxnet::TShape& from_shape = inputs[1].shape_;
-  nnvm::Tuple<dmlc::optional<int>> param_begin;
-  nnvm::Tuple<dmlc::optional<int>> param_end;
-  nnvm::Tuple<dmlc::optional<int>> param_step;
+  mxnet::Tuple<dmlc::optional<int>> param_begin;
+  mxnet::Tuple<dmlc::optional<int>> param_end;
+  mxnet::Tuple<dmlc::optional<int>> param_step;
   SliceLikeInferRanges(ishape, from_shape, param.axes, &param_begin, &param_end, &param_step);
 
   MXNET_NDIM_SWITCH(data.ndim(), ndim, {
@@ -1403,9 +1408,9 @@ void SliceLikeBackward(const nnvm::NodeAttrs& attrs,
 
   const mxnet::TShape& ishape = ograd.shape_;
   const mxnet::TShape& from_shape = outputs[1].shape_;
-  nnvm::Tuple<dmlc::optional<int>> param_begin;
-  nnvm::Tuple<dmlc::optional<int>> param_end;
-  nnvm::Tuple<dmlc::optional<int>> param_step;
+  mxnet::Tuple<dmlc::optional<int>> param_begin;
+  mxnet::Tuple<dmlc::optional<int>> param_end;
+  mxnet::Tuple<dmlc::optional<int>> param_step;
   SliceLikeInferRanges(ishape, from_shape, param.axes, &param_begin, &param_end, &param_step);
 
   MXNET_NDIM_SWITCH(ograd.ndim(), ndim, {
@@ -1546,7 +1551,7 @@ inline void GetRepeatParams(const RepeatParam& param, const mxnet::TShape& ishap
   CHECK_GE(*repeats, 0) << "repeats cannot be a negative number";
   *axisOpt = param.axis;
   if (static_cast<bool>(*axisOpt)) {
-    int ndims = static_cast<int>(ishape.ndim());
+    int ndims = ishape.ndim();
     int axis = axisOpt->value();
     if (axis < 0) {
       axis += ndims;
@@ -1565,34 +1570,33 @@ inline bool RepeatOpShape(const nnvm::NodeAttrs& attrs,
   int repeats = 0;
   dmlc::optional<int> axisOpt;
   GetRepeatParams(param, ishape, &repeats, &axisOpt);
-  // If 0 repeats, return an empty 0 dim array
+  // If 0 repeats, return an empty 1-dim, 0-size array
   if (0 == repeats) {
-    SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape());
+    SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, 0));
     return true;
   }
 
   // If repeats > 0, multiply the size of the corresponding axis by repeats
   if (static_cast<bool>(axisOpt)) {
-    int ndims = static_cast<int>(ishape.ndim());
+    int ndims = ishape.ndim();
     int axis = axisOpt.value();
     if (axis < 0) {
       axis += ndims;
     }
-    mxnet::TShape shape(ishape.ndim());
-    for (index_t i = 0; i < ishape.ndim(); ++i) {
-      if (static_cast<int>(i) == axis) {
-        shape[i] = static_cast<index_t>(repeats) * ishape[i];
+    mxnet::TShape shape(ishape.ndim(), -1);
+    for (int i = 0; i < ishape.ndim(); ++i) {
+      if (i == axis) {
+        shape[i] = repeats * ishape[i];
       } else {
         shape[i] = ishape[i];
       }
     }
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape);
   } else {  // If axis is not input by user, return a flat 1D array of size = in.size*repeats
-    mxnet::TShape shape(1);
-    shape[0] = ishape.Size() * static_cast<index_t>(repeats);
+    mxnet::TShape shape(1, ishape.Size() * repeats);
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape);
   }
-  return true;
+  return shape_is_known(out_attrs->at(0));
 }
 
 inline bool RepeatOpType(const nnvm::NodeAttrs& attrs,
@@ -1620,16 +1624,16 @@ inline std::pair<mxnet::TShape, mxnet::TShape> ReshapeInputOutputForRepeatOp(
   const int repeats) {
   if (static_cast<bool>(axisOpt)) {
     int axis = axisOpt.value();
-    int ndim = static_cast<int>(ishape.ndim());
+    int ndim = ishape.ndim();
     if (axis < 0)  {
       axis += ndim;
     }
-    CHECK(axis >= 0 && axis < static_cast<int>(ishape.ndim())) << "Invalid input of axis";
+    CHECK(axis >= 0 && axis < ishape.ndim()) << "Invalid input of axis";
 
     // reshape the input tensor by adding a dim at the (axis+1)-th dim
-    mxnet::TShape rshape(ishape.ndim()+1);
+    mxnet::TShape rshape(ishape.ndim()+1, 1);
     // the shape we want to broadcast to
-    mxnet::TShape bshape(rshape.ndim());
+    mxnet::TShape bshape(rshape.ndim(), 1);
     int i = 0;
     while (i <= axis) {
       rshape[i] = bshape[i] = ishape[i];
@@ -1637,7 +1641,7 @@ inline std::pair<mxnet::TShape, mxnet::TShape> ReshapeInputOutputForRepeatOp(
     }
     rshape[i] = 1;
     bshape[i] = repeats;
-    while (i < static_cast<int>(ishape.ndim())) {
+    while (i < ishape.ndim()) {
       rshape[i+1] = ishape[i];
       bshape[i+1] = ishape[i];
       ++i;
@@ -1648,11 +1652,11 @@ inline std::pair<mxnet::TShape, mxnet::TShape> ReshapeInputOutputForRepeatOp(
     // reshape the tensor into shape (ishape.Size(), 1)
     // then add one dim at axis = 1 and broadcast to
     // shape (ishape.Size(), repeats)
-    mxnet::TShape rshape(2);
+    mxnet::TShape rshape(2, 1);
     rshape[0] = ishape.Size();
     rshape[1] = 1;
 
-    mxnet::TShape bshape(2);
+    mxnet::TShape bshape(2, 1);
     bshape[0] = rshape[0];
     bshape[1] = repeats;
     return std::make_pair(rshape, bshape);
@@ -1667,7 +1671,7 @@ void RepeatOpForward(const nnvm::NodeAttrs& attrs,
                      const std::vector<TBlob>& outputs) {
   const TBlob& iTBlob = inputs[0];
   const mxnet::TShape& ishape = iTBlob.shape_;
-  if (ishape.ndim() == 0) return;
+  if (!shape_is_known(ishape)) return;
 
   int repeats = 0;
   dmlc::optional<int> axisOpt;
@@ -1711,7 +1715,7 @@ void RepeatOpBackward(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(outputs.size(), 1U);
 
   const mxnet::TShape& oshape = outputs[0].shape_;
-  if (oshape.ndim() == 0) return;
+  if (!shape_is_known(oshape)) return;
 
   int repeats = 0;
   dmlc::optional<int> axisOpt;
@@ -1737,7 +1741,7 @@ void RepeatOpBackward(const nnvm::NodeAttrs& attrs,
 }
 
 struct TileParam : public dmlc::Parameter<TileParam> {
-  mxnet::TShape reps;
+  mxnet::Tuple<int> reps;
   DMLC_DECLARE_PARAMETER(TileParam) {
     DMLC_DECLARE_FIELD(reps)
       .describe("The number of times for repeating the tensor a. Each dim size of reps"
@@ -1755,19 +1759,22 @@ inline bool TileOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 1U);
   const TileParam& param = nnvm::get<TileParam>(attrs.parsed);
   const mxnet::TShape& ishape = (*in_attrs)[0];
-  const mxnet::TShape& reps = param.reps;
+  if (!shape_is_known(ishape)) {
+    return false;
+  }
+  const mxnet::Tuple<int>& reps = param.reps;
   // If reps is empty, return a identical input array
-  if (reps.ndim() == 0 || ishape.ndim() == 0) {
+  if (reps.ndim() == 0) {
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, ishape);
     return true;
   }
-  for (size_t i = 0; i < reps.ndim(); ++i) {
+  for (int i = 0; i < reps.ndim(); ++i) {
     CHECK_GT(reps[i], 0) << "invalid reps=" << i << ", dim size must be greater than zero";
   }
-  mxnet::TShape oshape(std::max(ishape.ndim(), reps.ndim()));
-  int i1 = static_cast<int>(ishape.ndim()) - 1;
-  int i2 = static_cast<int>(reps.ndim()) - 1;
-  for (int i = static_cast<int>(oshape.ndim()) - 1; i >= 0; --i) {
+  mxnet::TShape oshape(std::max(ishape.ndim(), reps.ndim()), -1);
+  int i1 = ishape.ndim() - 1;
+  int i2 = reps.ndim() - 1;
+  for (int i = oshape.ndim() - 1; i >= 0; --i) {
     if (i1 >= 0 && i2 >= 0) {
       oshape[i] = ishape[i1--] * reps[i2--];
     } else if (i1 >= 0) {
@@ -1777,7 +1784,7 @@ inline bool TileOpShape(const nnvm::NodeAttrs& attrs,
     }
   }
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
-  return true;
+  return shape_is_known(oshape);
 }
 
 inline bool TileOpType(const nnvm::NodeAttrs& attrs,
@@ -1801,20 +1808,20 @@ inline bool TileOpType(const nnvm::NodeAttrs& attrs,
  */
 inline std::pair<mxnet::TShape, mxnet::TShape> ReshapeInputOutputForTileOp(
   const mxnet::TShape& ishape,
-  const mxnet::TShape& reps) {
+  const mxnet::Tuple<int>& reps) {
   if (ishape.ndim() == 0 || reps.ndim() == 0) {
     return std::make_pair(ishape, ishape);
   }
 
   // The shape we want to broadcast to
-  mxnet::TShape bshape(std::max(ishape.ndim(), reps.ndim()) * 2);
+  mxnet::TShape bshape(std::max(ishape.ndim(), reps.ndim()) * 2, 1);
 
   // The shape of the input tensor after adding new axes before each dim
-  mxnet::TShape rshape(bshape.ndim());
+  mxnet::TShape rshape(bshape.ndim(), 1);
 
-  int i1 = static_cast<int>(ishape.ndim()) - 1;
-  int i2 = static_cast<int>(reps.ndim()) - 1;
-  for (int i = static_cast<int>(bshape.ndim()) - 1; i >= 0; --i) {
+  int i1 = ishape.ndim() - 1;
+  int i2 = reps.ndim() - 1;
+  for (int i = bshape.ndim() - 1; i >= 0; --i) {
     if (0 == (i & 1)) {
       bshape[i] = (i2 >= 0? reps[i2--] : 1);
       rshape[i] = 1;
@@ -1854,10 +1861,10 @@ void TileOpForward(const nnvm::NodeAttrs& attrs,
 
   if (inputs[0].Size() == 0) return;
   const mxnet::TShape& ishape = inputs[0].shape_;
-  const mxnet::TShape& reps = nnvm::get<TileParam>(attrs.parsed).reps;
+  const mxnet::Tuple<int>& reps = nnvm::get<TileParam>(attrs.parsed).reps;
 
   // If any one of the number in reps is zero, return immediately
-  for (index_t i = 0; i < reps.ndim(); ++i) {
+  for (int i = 0; i < reps.ndim(); ++i) {
     if (0 == reps[i]) return;
   }
 
@@ -1896,10 +1903,10 @@ void TileOpBackward(const nnvm::NodeAttrs& attrs,
 
   if (inputs[0].Size() == 0) return;
   const mxnet::TShape& oshape = outputs[0].shape_;
-  const mxnet::TShape& reps = nnvm::get<TileParam>(attrs.parsed).reps;
+  const mxnet::Tuple<int>& reps = nnvm::get<TileParam>(attrs.parsed).reps;
 
   // If any one of the number in reps is zero, return immediately
-  for (index_t i = 0; i < reps.ndim(); ++i) {
+  for (int i = 0; i < reps.ndim(); ++i) {
     if (0 == reps[i]) return;
   }
 
@@ -1919,7 +1926,7 @@ void TileOpBackward(const nnvm::NodeAttrs& attrs,
 }
 
 struct ReverseParam : public dmlc::Parameter<ReverseParam> {
-  nnvm::Tuple<int> axis;
+  mxnet::Tuple<int> axis;
   DMLC_DECLARE_PARAMETER(ReverseParam) {
     DMLC_DECLARE_FIELD(axis)
     .describe("The axis which to reverse elements.");
@@ -1990,10 +1997,10 @@ void ReverseOpForward(const nnvm::NodeAttrs& attrs,
   std::vector<index_t>  trailing_(param.axis.ndim());
   index_t reverse_index = 0;
   for (int axis : param.axis) {
-    CHECK_LT(axis, static_cast<int>(ishape.ndim()));
+    CHECK_LT(axis, ishape.ndim());
     stride_[reverse_index] = ishape[axis];
     trailing_[reverse_index] = 1;
-    for (index_t i2 = axis + 1; i2 < ishape.ndim(); ++i2) {
+    for (int i2 = axis + 1; i2 < ishape.ndim(); ++i2) {
       trailing_[reverse_index] *= ishape[i2];
     }
     reverse_index++;
@@ -2054,9 +2061,9 @@ inline bool StackOpShape(const nnvm::NodeAttrs& attrs,
   for (const mxnet::TShape& i : (*in_attrs)) {
     shape_assign(&dshape, i);
   }
-  if (dshape.ndim() == 0) return false;
+  if (!shape_is_known(dshape)) return false;
 
-  mxnet::TShape oshape(dshape.ndim() + 1);
+  mxnet::TShape oshape(dshape.ndim() + 1, -1);
   int axis = CheckAxis(param.axis, oshape.ndim());
   for (int i = 0; i < axis; ++i) {
     oshape[i] = dshape[i];
@@ -2067,7 +2074,7 @@ inline bool StackOpShape(const nnvm::NodeAttrs& attrs,
   }
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape);
 
-  return true;
+  return shape_is_known(oshape);
 }
 
 
@@ -2140,10 +2147,10 @@ void StackOpBackward(const nnvm::NodeAttrs& attrs,
 }
 
 struct SqueezeParam : public dmlc::Parameter<SqueezeParam> {
-  dmlc::optional<mxnet::TShape> axis;
+  dmlc::optional<mxnet::Tuple<int>> axis;
   DMLC_DECLARE_PARAMETER(SqueezeParam) {
     DMLC_DECLARE_FIELD(axis)
-    .set_default(dmlc::optional<mxnet::TShape>())
+    .set_default(dmlc::optional<mxnet::Tuple<int>>())
     .describe("Selects a subset of the single-dimensional entries in the shape."
               " If an axis is selected with shape entry greater than one, an error is raised.");
   }
@@ -2156,7 +2163,7 @@ struct SqueezeParam : public dmlc::Parameter<SqueezeParam> {
 inline size_t SqueezeShapeHelper(mxnet::TShape* shape) {
   CHECK(shape != nullptr);
   size_t count = 0;
-  for (size_t i = 0; i < shape->ndim(); ++i) {
+  for (int i = 0; i < shape->ndim(); ++i) {
     if ((*shape)[i] == 0) {
       ++count;
     } else {
@@ -2174,12 +2181,12 @@ inline bool SqueezeShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 1U);
   const mxnet::TShape& dshape = in_attrs->at(0);
   const int dndim = dshape.ndim();
-  if (shape_is_none(dshape)) return false;
+  if (!shape_is_known(dshape)) return false;
   mxnet::TShape oshape = dshape;
   if (param.axis.has_value()) {
     // preprocess axis
-    mxnet::TShape axes = param.axis.value();
-    for (size_t i = 0; i < axes.ndim(); ++i) {
+    mxnet::Tuple<int> axes = param.axis.value();
+    for (int i = 0; i < axes.ndim(); ++i) {
       if (axes[i] < 0) {
         axes[i] += dndim;
         CHECK_GE(axes[i], 0)
@@ -2194,7 +2201,7 @@ inline bool SqueezeShape(const nnvm::NodeAttrs& attrs,
       oshape[axes[i]] = 0;
     }
   } else {
-    for (size_t i = 0; i < oshape.ndim(); ++i) {
+    for (int i = 0; i < oshape.ndim(); ++i) {
       if (oshape[i] == 1) oshape[i] = 0;
     }
   }
@@ -2223,7 +2230,7 @@ inline bool DepthToSpaceOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 1U);
   CHECK_EQ(in_attrs->at(0).ndim(), 4) << "Operation Depth To Space requires exactly 4D tensor";
 
-  mxnet::TShape expected_out(4);
+  mxnet::TShape expected_out(4, -1);
 
   mxnet::TShape& in_shape = in_attrs->at(0);
   int block = param.block_size;
@@ -2241,14 +2248,14 @@ inline bool DepthToSpaceOpShape(const nnvm::NodeAttrs& attrs,
 
   expected_out[0] = in_shape[0];
   expected_out[1] = in_shape[1] / (block * block);
-  size_t i = 2;
+  int i = 2;
   while (i < expected_out.ndim()) {
     expected_out[i] = in_shape[i] * block;
     ++i;
   }
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, expected_out);
-  return true;
+  return shape_is_known(expected_out);
 }
 
 inline bool DepthToSpaceOpType(const nnvm::NodeAttrs& attrs,
@@ -2387,7 +2394,7 @@ inline bool SpaceToDepthOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 1U);
   CHECK_EQ(in_attrs->at(0).ndim(), 4) << "Operation Space To Depth requires exactly 4D tensor";
 
-  mxnet::TShape expected_out(in_attrs->at(0).ndim());
+  mxnet::TShape expected_out(in_attrs->at(0).ndim(), -1);
 
   mxnet::TShape& in_shape = in_attrs->at(0);
   int block = param.block_size;
@@ -2408,14 +2415,14 @@ inline bool SpaceToDepthOpShape(const nnvm::NodeAttrs& attrs,
 
   expected_out[0] = in_shape[0];
   expected_out[1] = in_shape[1] * block * block;
-  uint32_t i = 2;
+  int i = 2;
   while (i < expected_out.ndim()) {
     expected_out[i] = in_shape[i] / block;
     ++i;
   }
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, expected_out);
-  return true;
+  return shape_is_known(expected_out);
 }
 
 inline bool SpaceToDepthOpType(const nnvm::NodeAttrs& attrs,
@@ -2556,7 +2563,7 @@ struct SplitParam : public dmlc::Parameter<SplitParam> {
 };  // struct SplitParam
 
 inline mxnet::TShape GetSplitIndices(const mxnet::TShape& ishape, int axis, int sections) {
-  mxnet::TShape indices(sections+1);
+  mxnet::TShape indices(sections+1, -1);
   indices[0] = 0;
   int64_t section_size = ishape[axis] / sections;
   for (int i = 0; i < sections; ++i) {
@@ -2588,7 +2595,7 @@ inline bool SplitOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1U);
   mxnet::TShape dshape = in_attrs->at(split_enum::kData);
   mxnet::TShape ishape = in_attrs->at(split_enum::kData);
-  if (dshape.ndim() == 0) return false;
+  if (!shape_is_known(dshape)) return false;
   if (param.axis >= 0) {
     CHECK_LT(static_cast<size_t>(param.axis), dshape.ndim());
   } else {
@@ -2603,7 +2610,7 @@ inline bool SplitOpShape(const nnvm::NodeAttrs& attrs,
   int num_outputs = (param.sections > 0) ? indices.ndim() - 1 : indices.ndim();
   // Pre-compute squeezed output shape for future usage
   mxnet::TShape squeezed_dshape = dshape;
-  for (int d = real_axis; d < static_cast<int>(squeezed_dshape.ndim()) - 1; ++d) {
+  for (int d = real_axis; d < squeezed_dshape.ndim() - 1; ++d) {
     squeezed_dshape[d] = squeezed_dshape[d+1];
   }
   squeezed_dshape = mxnet::TShape(&squeezed_dshape[0], &squeezed_dshape[squeezed_dshape.ndim()-1]);
@@ -2635,7 +2642,7 @@ inline bool SplitOpShape(const nnvm::NodeAttrs& attrs,
       back_calculate_dshape[real_axis] += (*out_attrs)[i][real_axis];
     }
   }
-  for (int d = real_axis + 1; d < static_cast<int>(ishape.ndim()); ++d) {
+  for (int d = real_axis + 1; d < ishape.ndim(); ++d) {
     if (param.squeeze_axis) {
       back_calculate_dshape[d] = (*out_attrs)[0][d - 1];
     } else {
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index 1431fef13594..b80c9a54510f 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -410,8 +410,8 @@ Examples::
           "transpose", n, ograds, {},
           std::unordered_map<std::string, std::string>());
     } else {
-      mxnet::TShape axes = mxnet::TShape(param.axes.ndim());
-      for (index_t i = 0; i < axes.ndim(); ++i) {
+      mxnet::TShape axes = mxnet::TShape(param.axes.ndim(), -1);
+      for (int i = 0; i < axes.ndim(); ++i) {
         axes[param.axes[i]] = i;
       }
       std::ostringstream os;
diff --git a/src/operator/tensor/ordering_op-inl.h b/src/operator/tensor/ordering_op-inl.h
index 5a95e05ffb65..1dda90104205 100644
--- a/src/operator/tensor/ordering_op-inl.h
+++ b/src/operator/tensor/ordering_op-inl.h
@@ -149,7 +149,7 @@ inline void ParseTopKParam(const mxnet::TShape& src_shape, const TopKParam& para
                                                   << src_shape.ndim() << ", found axis=" << *axis;
     *batch_size = src_shape.Size() / src_shape[*axis];
     *element_num = src_shape[*axis];
-    if (*axis != static_cast<int>(src_shape.ndim()) - 1) {
+    if (*axis != src_shape.ndim() - 1) {
       *do_transpose = true;
     }
   }
diff --git a/src/operator/tensor/slice-inl.h b/src/operator/tensor/slice-inl.h
index 4e94cbeda46c..78a2bd8c7b45 100644
--- a/src/operator/tensor/slice-inl.h
+++ b/src/operator/tensor/slice-inl.h
@@ -34,15 +34,15 @@ namespace mxnet {
 namespace op {
 
 struct SliceParam : public dmlc::Parameter<SliceParam> {
-  nnvm::Tuple<dmlc::optional<int>> begin, end;
-  nnvm::Tuple<dmlc::optional<int>> step;
+  mxnet::Tuple<dmlc::optional<int>> begin, end;
+  mxnet::Tuple<dmlc::optional<int>> step;
   DMLC_DECLARE_PARAMETER(SliceParam) {
     DMLC_DECLARE_FIELD(begin)
     .describe("starting indices for the slice operation, supports negative indices.");
     DMLC_DECLARE_FIELD(end)
     .describe("ending indices for the slice operation, supports negative indices.");
     DMLC_DECLARE_FIELD(step)
-    .set_default(nnvm::Tuple<dmlc::optional<int>>())
+    .set_default(mxnet::Tuple<dmlc::optional<int>>())
     .describe("step for the slice operation, supports negative values.");
   }
   bool operator==(const SliceParam& other) const {
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 59d72d4b18b6..1f8fc993b3fc 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -2265,7 +2265,7 @@ def test_reshape_new(src_shape, shape_args, reverse, dst_shape):
 
         for i in range(len(src_shape)):
             holdout_src_shape = list(src_shape)
-            holdout_src_shape[i] = 0
+            holdout_src_shape[i] = -1
             holdout_src_shape = tuple(holdout_src_shape)
             net = mx.sym.Variable('data')
             net = mx.sym.elemwise_add(net.reshape(shape_args, reverse=reverse), mx.sym.ones(shape=dst_shape))

From 3449ef8ae9ca1bb7505185926a9ec20ad8cdcbf0 Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Fri, 22 Mar 2019 13:20:02 -0700
Subject: [PATCH 04/32] [numpy] Fix unit tests after introducing numpy
 compatible shapes (#14487)

* Fix infer shape rnn

* Fix boolean mask and custom op unit tests

* Fix multi proposal

* Fix diag

* Add global switch for backward compatibility and fix infer shape bugs

* Fix slice op infer shape

* Fix rnn infer shape

* Add util funcs for ndim_is_known and dim_size_is_known

* Revert rnn_cell.py
---
 include/mxnet/c_api.h                         | 15 +++-
 include/mxnet/imperative.h                    | 16 ++++
 include/mxnet/tuple.h                         | 28 +++++-
 python/mxnet/ndarray/ndarray.py               |  5 +-
 python/mxnet/numpy/__init__.py                | 46 ++++++++++
 python/mxnet/operator.py                      | 26 +++---
 python/mxnet/symbol/symbol.py                 | 22 +++--
 src/c_api/c_api.cc                            | 24 ++++--
 src/c_api/c_api_common.h                      |  2 +-
 src/c_api/c_api_executor.cc                   |  7 ++
 src/c_api/c_api_ndarray.cc                    | 12 +++
 src/c_api/c_api_symbolic.cc                   |  9 +-
 src/common/utils.h                            | 54 ++++++++++++
 src/executor/infer_graph_attr_pass.cc         | 18 +++-
 src/imperative/imperative.cc                  |  4 +-
 src/imperative/imperative_utils.h             | 19 ++++-
 src/operator/batch_norm_v1-inl.h              |  2 +-
 src/operator/contrib/multi_proposal-inl.h     |  2 +-
 src/operator/control_flow.cc                  |  6 +-
 src/operator/convolution_v1-inl.h             |  2 +-
 src/operator/custom/custom.cc                 | 18 ++--
 src/operator/nn/batch_norm.cc                 |  2 +-
 src/operator/nn/concat.cc                     | 25 +++---
 src/operator/nn/convolution.cc                | 85 +++++++++++--------
 src/operator/nn/cudnn/cudnn_batch_norm.cc     |  2 +-
 src/operator/nn/dropout.cc                    |  2 +-
 src/operator/nn/fully_connected.cc            |  2 +-
 src/operator/nn/layer_norm.cc                 |  2 +-
 src/operator/nn/pooling.cc                    |  2 +-
 src/operator/operator_common.h                |  6 +-
 src/operator/pooling_v1-inl.h                 |  2 +-
 src/operator/quantization/quantized_concat.cc |  2 +-
 src/operator/random/unique_sample_op.h        |  2 +-
 src/operator/slice_channel-inl.h              | 15 ++--
 src/operator/softmax_output-inl.h             |  2 +-
 src/operator/softmax_output.cc                |  4 +-
 src/operator/svm_output-inl.h                 |  2 +-
 src/operator/tensor/broadcast_reduce_op.h     |  8 +-
 src/operator/tensor/diag_op-inl.h             |  4 +-
 .../tensor/elemwise_binary_broadcast_op.h     |  2 +-
 src/operator/tensor/init_op.h                 | 14 +--
 src/operator/tensor/matrix_op-inl.h           | 24 +++---
 tests/python/unittest/test_operator.py        |  8 +-
 43 files changed, 402 insertions(+), 152 deletions(-)

diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h
index cdc20b1e3970..102e4b2c14a2 100644
--- a/include/mxnet/c_api.h
+++ b/include/mxnet/c_api.h
@@ -182,7 +182,7 @@ typedef int (*CustomOpFBFunc)(int /*size*/, void** /*ptrs*/, int* /*tags*/,
 typedef int (*CustomOpDelFunc)(void* /*state*/);
 typedef int (*CustomOpListFunc)(char*** /*args*/, void* /*state*/);
 typedef int (*CustomOpInferShapeFunc)(int /*num_input*/, int* /*ndims*/,
-                                      unsigned** /*shapes*/, void* /*state*/);
+                                      int** /*shapes*/, void* /*state*/);
 typedef int (*CustomOpInferStorageTypeFunc)(int /*num_input*/, int* /*stypes*/, void* /*state*/);
 typedef int (*CustomOpBackwardInferStorageTypeFunc)(int /*num_input*/,
                                                     int * /*stypes*/,
@@ -1048,6 +1048,19 @@ MXNET_DLL int MXAutogradIsRecording(bool* curr);
  * \return 0 when success, -1 when failure happens
  */
 MXNET_DLL int MXAutogradIsTraining(bool* curr);
+/*!
+ * \brief get whether numpy compatibility is on
+ * \param curr returns the current status
+ * \return 0 when success, -1 when failure happens
+ */
+MXNET_DLL int MXIsNumpyCompatible(bool* curr);
+/*!
+ * \brief set numpy compatibility switch
+ * \param is_np_comp 1 when numpy compatibility is on, 0 when off
+ * \param prev returns the previous status before this set
+ * \return 0 when success, -1 when failure happens
+ */
+MXNET_DLL int MXSetIsNumpyCompatible(int is_np_comp, int* prev);
 /*!
  * \brief mark NDArrays as variables to compute gradient for autograd
  * \param num_var number of variable NDArrays
diff --git a/include/mxnet/imperative.h b/include/mxnet/imperative.h
index 52cedb2fadd9..ad209913ac53 100644
--- a/include/mxnet/imperative.h
+++ b/include/mxnet/imperative.h
@@ -97,6 +97,16 @@ class Imperative {
       is_recording_ = is_recording;
       return old;
   }
+  /*! brief whether numpy compatibility is on. */
+  bool is_np_comp() const {
+    return is_np_comp_;
+  }
+  /*! brief turn on or turn off numpy compatibility switch. */
+  bool set_is_np_comp(bool is_np_comp) {
+    bool old = is_np_comp_;
+    is_np_comp_ = is_np_comp;
+    return old;
+  }
   /*! \brief to record operator, return corresponding node. */
   void RecordOp(nnvm::NodeAttrs&& attrs,
                 const std::vector<NDArray*>& inputs,
@@ -165,9 +175,15 @@ class Imperative {
 #if DMLC_CXX11_THREAD_LOCAL
   static thread_local bool is_train_;
   static thread_local bool is_recording_;
+  // TOOD(junwu): Added numpy compatibility switch for backward compatibility.
+  // Delete it in the next major release.
+  static thread_local bool is_np_comp_;
 #else
   static MX_THREAD_LOCAL bool is_train_;
   static MX_THREAD_LOCAL bool is_recording_;
+  // TOOD(junwu): Added numpy compatibility switch for backward compatibility.
+  // Delete it in the next major release.
+  static MX_THREAD_LOCAL bool is_np_comp_;
 #endif
   /*! \brief node count used for naming */
   std::atomic<uint64_t> node_count_{0};
diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h
index 49852f73fbac..d83e843033e3 100644
--- a/include/mxnet/tuple.h
+++ b/include/mxnet/tuple.h
@@ -607,12 +607,36 @@ class TShape : public Tuple<dim_t> {
 #endif
 };
 
+/*! brief check if a shape's ndim is known. */
+inline bool ndim_is_known(const int ndim) {
+  CHECK_GE(ndim, -1) << "shape ndim must be >= -1, while received " << ndim;
+  return ndim != -1;
+}
+
+/*! brief check if a shape's ndim is known. */
+inline bool ndim_is_known(const TShape& x) {
+  return ndim_is_known(x.ndim());
+}
+
+/*! brief check if a shape's dim size is known. */
+inline bool dim_size_is_known(const int dim_size) {
+  CHECK_GE(dim_size, -1) << "shape dim size must be >= -1, while received " << dim_size;
+  return dim_size != -1;
+}
+
+/*! brief check if a shape's dim size is known. */
+inline bool dim_size_is_known(const TShape& x, const int idx) {
+  CHECK(idx >= 0 && idx < x.ndim())
+      << "idx = " << idx << " exceeds shape dimension range [0, " << x.ndim() << ")";
+  return dim_size_is_known(x[idx]);
+}
+
 /*! brief check if shape is known using the NumPy compatible definition.
  * zero-dim and zero-size tensors are valid. -1 means unknown.*/
 inline bool shape_is_known(const TShape& x) {
-  if (x.ndim() == -1) return false;
+  if (!ndim_is_known(x)) return false;
   for (int i = 0; i < x.ndim(); ++i) {
-    if (x[i] == -1) return false;
+    if (!dim_size_is_known(x, i)) return false;
   }
   return true;
 }
diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index 1200c4a72c00..44334fc7f43e 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -1849,7 +1849,10 @@ def shape(self):
         pdata = ctypes.POINTER(mx_int)()
         check_call(_LIB.MXNDArrayGetShape(
             self.handle, ctypes.byref(ndim), ctypes.byref(pdata)))
-        return tuple(pdata[:ndim.value]) # pylint: disable=invalid-slice-index
+        if ndim.value == -1:
+            return None
+        else:
+            return tuple(pdata[:ndim.value])  # pylint: disable=invalid-slice-index
 
 
     @property
diff --git a/python/mxnet/numpy/__init__.py b/python/mxnet/numpy/__init__.py
index b1139a05791d..e0dfda10113e 100644
--- a/python/mxnet/numpy/__init__.py
+++ b/python/mxnet/numpy/__init__.py
@@ -17,4 +17,50 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import ctypes
+from ..base import _LIB, check_call
+
 __all__ = []
+
+
+def set_np_comp(is_np_comp):
+    prev = ctypes.c_int()
+    check_call(_LIB.MXSetIsNumpyCompatible(ctypes.c_int(is_np_comp), ctypes.byref(prev)))
+    return bool(prev.value)
+
+
+def is_np_comp():
+    curr = ctypes.c_bool()
+    check_call(_LIB.MXIsNumpyCompatible(ctypes.byref(curr)))
+    return curr.value
+
+
+class _NumpyCompatibilityStateScope(object):
+    """Scope for managing numpy compatibility state.
+
+    Example::
+
+        with _NumpyCompatibilityStateScope(True):
+            y = model(x)
+            backward([y])
+
+    """
+    def __init__(self, is_np_comp): #pylint: disable=redefined-outer-name
+        self._enter_is_np_comp = is_np_comp
+        self._prev_is_np_comp = None
+
+    def __enter__(self):
+        if self._enter_is_np_comp is not None:
+            self._prev_is_np_comp = set_np_comp(self._enter_is_np_comp)
+
+    def __exit__(self, ptype, value, trace):
+        if self._enter_is_np_comp is not None and self._prev_is_np_comp != self._enter_is_np_comp:
+            set_np_comp(self._prev_is_np_comp)
+
+
+def enable_np_comp():
+    return _NumpyCompatibilityStateScope(True)
+
+
+def disable_np_comp():
+    return _NumpyCompatibilityStateScope(False)
diff --git a/python/mxnet/operator.py b/python/mxnet/operator.py
index e8fa571d44db..2c69b9b46521 100644
--- a/python/mxnet/operator.py
+++ b/python/mxnet/operator.py
@@ -28,7 +28,7 @@
 from ctypes import CFUNCTYPE, POINTER, Structure, pointer
 from ctypes import c_void_p, c_int, c_char, c_char_p, cast, c_bool
 
-from .base import _LIB, check_call, MXCallbackList, c_array, c_array_buf
+from .base import _LIB, check_call, MXCallbackList, c_array, c_array_buf, mx_int
 from .base import c_str, mx_uint, mx_float, ctypes2numpy_shared, NDArrayHandle, py_str
 from . import symbol, context
 from .ndarray import NDArray, _DTYPE_NP_TO_MX, _DTYPE_MX_TO_NP
@@ -164,7 +164,7 @@ def get_symbol(self, *args, **kwargs):
         fb_functype = CFUNCTYPE(None, c_int, POINTER(POINTER(mx_float)), POINTER(c_int),
                                 POINTER(POINTER(mx_uint)), POINTER(c_int), c_void_p)
         infer_functype = CFUNCTYPE(None, c_int, POINTER(c_int),
-                                   POINTER(POINTER(mx_uint)), c_void_p)
+                                   POINTER(POINTER(mx_int)), c_void_p)
         list_functype = CFUNCTYPE(None, POINTER(POINTER(POINTER(c_char))), c_void_p)
         class NumpyOpInfo(Structure):
             """Structure that holds Callback information. Passed to NumpyOpProp"""
@@ -214,9 +214,9 @@ def infer_shape_entry(num_tensor, tensor_dims,
             assert len(ishape) == n_in
             rshape = list(ishape) + list(oshape)
             for i in range(n_in+n_out):
-                tensor_shapes[i] = cast(c_array_buf(mx_uint,
-                                                    array('I', rshape[i])),
-                                        POINTER(mx_uint))
+                tensor_shapes[i] = cast(c_array_buf(mx_int,
+                                                    array('i', rshape[i])),
+                                        POINTER(mx_int))
                 tensor_dims[i] = len(rshape[i])
 
         def list_outputs_entry(out, _):
@@ -266,7 +266,7 @@ def __init__(self, need_top_grad=True):
     def get_symbol(self, *args, **kwargs):
         fb_functype = CFUNCTYPE(c_bool, c_int, POINTER(c_void_p), POINTER(c_int), c_void_p)
         infer_functype = CFUNCTYPE(c_bool, c_int, POINTER(c_int),
-                                   POINTER(POINTER(mx_uint)), c_void_p)
+                                   POINTER(POINTER(mx_int)), c_void_p)
         list_functype = CFUNCTYPE(c_bool, POINTER(POINTER(POINTER(c_char))), c_void_p)
         deps_functype = CFUNCTYPE(c_bool, c_int_p, c_int_p, c_int_p,
                                   c_int_p, POINTER(c_int_p), c_void_p)
@@ -335,9 +335,9 @@ def infer_shape_entry(num_tensor, tensor_dims,
                 assert len(ishape) == n_in
                 rshape = list(ishape) + list(oshape)
                 for i in range(n_in+n_out):
-                    tensor_shapes[i] = cast(c_array_buf(mx_uint,
-                                                        array('I', rshape[i])),
-                                            POINTER(mx_uint))
+                    tensor_shapes[i] = cast(c_array_buf(mx_int,
+                                                        array('i', rshape[i])),
+                                            POINTER(mx_int))
                     tensor_dims[i] = len(rshape[i])
             except Exception:
                 print('Error in NDArrayOp.infer_shape: %s' % traceback.format_exc())
@@ -698,7 +698,7 @@ def do_register(prop_cls):
         del_functype = CFUNCTYPE(c_int, c_void_p)
 
         infershape_functype = CFUNCTYPE(c_int, c_int, POINTER(c_int),
-                                        POINTER(POINTER(mx_uint)), c_void_p)
+                                        POINTER(POINTER(mx_int)), c_void_p)
         infertype_functype = CFUNCTYPE(c_int, c_int, POINTER(c_int), c_void_p)
         inferstorage_functype = CFUNCTYPE(c_int, c_int, POINTER(c_int), c_void_p)
         inferstorage_backward_functype = CFUNCTYPE(c_int, c_int, POINTER(c_int), \
@@ -747,9 +747,9 @@ def infer_shape_entry(num_tensor, tensor_dims,
                         "shapes, got %d."%(n_aux, len(ashape))
                     rshape = list(ishape) + list(oshape) + list(ashape)
                     for i in range(n_in+n_out+n_aux):
-                        tensor_shapes[i] = cast(c_array_buf(mx_uint,
-                                                            array('I', rshape[i])),
-                                                POINTER(mx_uint))
+                        tensor_shapes[i] = cast(c_array_buf(mx_int,
+                                                            array('i', rshape[i])),
+                                                POINTER(mx_int))
                         tensor_dims[i] = len(rshape[i])
 
                     infer_shape_entry._ref_holder = [tensor_shapes]
diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py
index b9432d372bc7..b2b8261be51d 100644
--- a/python/mxnet/symbol/symbol.py
+++ b/python/mxnet/symbol/symbol.py
@@ -42,6 +42,7 @@
 from ..ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID
 from ..ndarray import _ndarray_cls
 from ..executor import Executor
+from ..numpy import is_np_comp
 from . import _internal
 from . import op
 from ._internal import SymbolBase, _set_symbol_class
@@ -1078,7 +1079,11 @@ def infer_shape(self, *args, **kwargs):
                 arg_names = self.list_arguments()
                 unknowns = []
                 for name, shape in zip(arg_names, arg_shapes):
-                    if not shape or not _numpy.prod(shape):
+                    if is_np_comp():
+                        shape_is_none = not shape or -1 in shape
+                    else:
+                        shape_is_none = not shape or 0 in shape
+                    if shape_is_none:
                         if len(unknowns) >= 10:
                             unknowns.append('...')
                             break
@@ -1204,12 +1209,15 @@ def _infer_shape_impl(self, partial, *args, **kwargs):
             ctypes.byref(aux_shape_data),
             ctypes.byref(complete)))
         if complete.value != 0:
-            arg_shapes = [
-                tuple(arg_shape_data[i][:arg_shape_ndim[i]]) for i in range(arg_shape_size.value)]
-            out_shapes = [
-                tuple(out_shape_data[i][:out_shape_ndim[i]]) for i in range(out_shape_size.value)]
-            aux_shapes = [
-                tuple(aux_shape_data[i][:aux_shape_ndim[i]]) for i in range(aux_shape_size.value)]
+            arg_shapes = [tuple(arg_shape_data[i][:arg_shape_ndim[i]])
+                          if arg_shape_ndim[i] >= 0 else None
+                          for i in range(arg_shape_size.value)]
+            out_shapes = [tuple(out_shape_data[i][:out_shape_ndim[i]])
+                          if out_shape_ndim[i] >= 0 else None
+                          for i in range(out_shape_size.value)]
+            aux_shapes = [tuple(aux_shape_data[i][:aux_shape_ndim[i]])
+                          if aux_shape_ndim[i] >= 0 else None
+                          for i in range(aux_shape_size.value)]
             return (arg_shapes, out_shapes, aux_shapes)
         else:
             return (None, None, None)
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index 614732081a98..f1571312dceb 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -44,9 +44,11 @@
 #include "mxnet/rtc.h"
 #include "mxnet/storage.h"
 #include "mxnet/libinfo.h"
+#include "mxnet/imperative.h"
 #include "./c_api_common.h"
 #include "../operator/custom/custom-inl.h"
 #include "../operator/tensor/matrix_op-inl.h"
+#include "../common/utils.h"
 
 using namespace mxnet;
 
@@ -499,15 +501,23 @@ int MXNDArrayGetShape(NDArrayHandle handle,
   API_BEGIN();
   NDArray *arr = static_cast<NDArray*>(handle);
   if (!arr->is_none()) {
-    const mxnet::TShape &s = arr->shape();
+    mxnet::TShape s = arr->shape();
+    if (!Imperative::Get()->is_np_comp()) {
+      common::ConvertToLegacyShape(&s);
+    }
     *out_dim = s.ndim();
-    CHECK_GE(s.ndim(), 0);
-    std::vector<int>& buffer = ret->arg_shape_buffer;
-    buffer.resize(s.ndim());
-    mxnet::ShapeTypeCast(s.begin(), s.end(), buffer.data());
-    *out_pdata = buffer.data();
+    if (s.ndim() >= 0) {
+      std::vector<int> &buffer = ret->arg_shape_buffer;
+      buffer.resize(s.ndim());
+      mxnet::ShapeTypeCast(s.begin(), s.end(), buffer.data());
+      *out_pdata = buffer.data();
+    }
   } else {
-    *out_dim = 0;
+    if (Imperative::Get()->is_np_comp()) {
+      *out_dim = -1;
+    } else {
+      *out_dim = 0;
+    }
   }
   API_END();
 }
diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h
index 690a1eae1055..329dc9adc7cf 100644
--- a/src/c_api/c_api_common.h
+++ b/src/c_api/c_api_common.h
@@ -91,7 +91,7 @@ struct MXAPIThreadLocalEntry {
     data->resize(shapes.size());
     size_t size = 0;
     for (const auto& s : shapes) {
-      CHECK_GE(s.ndim(), 0);
+      if (s.ndim() > 0);
       size += s.ndim();
     }
     buffer->resize(size);
diff --git a/src/c_api/c_api_executor.cc b/src/c_api/c_api_executor.cc
index d0006383bdc8..fc59463ab689 100644
--- a/src/c_api/c_api_executor.cc
+++ b/src/c_api/c_api_executor.cc
@@ -25,8 +25,10 @@
 #include <mxnet/base.h>
 #include <mxnet/c_api.h>
 #include <mxnet/executor.h>
+#include <mxnet/imperative.h>
 #include "./c_api_common.h"
 #include "../executor/graph_executor.h"
+#include "../common/utils.h"
 #if MXNET_USE_TENSORRT
 #include "../executor/trt_graph_executor.h"
 #endif  // MXNET_USE_TENSORRT
@@ -416,6 +418,11 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle,
     CHECK(p.second) << "Duplicate shapes are provided for argument "
       << provided_arg_shape_names[i] << " in simple_bind";
   }
+  if (!Imperative::Get()->is_np_comp()) {
+    for (auto &kv : arg_shape_map) {
+      common::ConvertToNumpyShape(&kv.second);
+    }
+  }
 
   // create para name set for sharing data array memory
   std::unordered_set<std::string> shared_arg_name_set(num_shared_arg_names);
diff --git a/src/c_api/c_api_ndarray.cc b/src/c_api/c_api_ndarray.cc
index 18f6c411e039..0e136b03ecd7 100644
--- a/src/c_api/c_api_ndarray.cc
+++ b/src/c_api/c_api_ndarray.cc
@@ -276,6 +276,18 @@ int MXAutogradSetIsRecording(int is_recording, int* prev) {
   API_END();
 }
 
+int MXIsNumpyCompatible(bool* curr) {
+  API_BEGIN();
+  *curr = Imperative::Get()->is_np_comp();
+  API_END();
+}
+
+int MXSetIsNumpyCompatible(int is_np_comp, int* prev) {
+  API_BEGIN();
+  *prev = Imperative::Get()->set_is_np_comp(static_cast<bool>(is_np_comp));
+  API_END();
+}
+
 int MXAutogradMarkVariables(mx_uint num_var,
                             NDArrayHandle *var_handles,
                             mx_uint *reqs_array,
diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc
index 8b961f46c39e..98034417abae 100644
--- a/src/c_api/c_api_symbolic.cc
+++ b/src/c_api/c_api_symbolic.cc
@@ -24,6 +24,7 @@
  */
 #include "mxnet/base.h"
 #include "mxnet/c_api.h"
+#include "mxnet/imperative.h"
 #include "nnvm/c_api.h"
 #include "nnvm/pass.h"
 #include "nnvm/pass_functions.h"
@@ -543,8 +544,14 @@ int MXSymbolInferShape(SymbolHandle sym,
     throw dmlc::Error(err.msg);
   }
 
+  // if use legacy shape definition, need to convert numpy shape to legacy shape
+  mxnet::ShapeVector shapes = g.GetAttr<mxnet::ShapeVector>("shape");
+  if (!Imperative::Get()->is_np_comp()) {
+    common::ConvertToLegacyShape(&shapes);
+  }
+
   // copy back
-  CopyAttr(g.indexed_graph(), g.GetAttr<mxnet::ShapeVector>("shape"),
+  CopyAttr(g.indexed_graph(), shapes,
            &(ret->arg_shapes), &(ret->out_shapes), &(ret->aux_shapes));
 
   // copy data back
diff --git a/src/common/utils.h b/src/common/utils.h
index 8e6966952890..f3df2e15ec32 100644
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -734,6 +734,60 @@ inline void ParallelCopy(DType* dst, const DType* src, index_t size) {
   }
 }
 
+/*!
+ * \brief If numpy compatibility is turned off (default), the shapes passed in
+ * by users follow the legacy shape definition:
+ * 1. 0 ndim means the shape is completely unknown.
+ * 2. 0 dim size means the dim size is unknown.
+ * We need to convert those shapes to use the numpy shape definition:
+ * 1. 0 ndim means it's a scalar tensor.
+ * 2. -1 ndim means the shape is unknown.
+ * 3. 0 dim size means no elements in that dimension.
+ * 4. -1 dim size means the dimension's size is unknown.
+ * so that operator's infer shape function can work in backend.
+ * \param shape to be converted.
+ */
+inline void ConvertToNumpyShape(mxnet::TShape* shape) {
+  if (shape->ndim() == 0) {  // legacy shape ndim = 0 means unknown
+    *shape = mxnet::TShape();  // unknown shape ndim = -1
+  } else {
+    for (int j = 0; j < shape->ndim(); ++j) {
+      CHECK_GE((*shape)[j], 0) << "Legacy shape cannot have dim size < 0";
+      if ((*shape)[j] == 0) {  // legacy shape dim_size = 0 means unknown
+        (*shape)[j] = -1;  // unknown dim size = -1
+      }
+    }
+  }
+}
+
+inline void ConvertToNumpyShape(mxnet::ShapeVector* shapes) {
+  for (size_t i = 0; i < shapes->size(); ++i) {
+    ConvertToNumpyShape(&(shapes->at(i)));
+  }
+}
+
+/*!
+ * \brief This is function is used to convert shapes returned by
+ * the infer shape functions/pass to the legacy shape definition.
+ */
+inline void ConvertToLegacyShape(mxnet::TShape* shape) {
+  if (!mxnet::ndim_is_known(*shape)) {
+    *shape = mxnet::TShape(0);
+  } else {
+    for (int j = 0; j < shape->ndim(); ++j) {
+      if (!mxnet::dim_size_is_known(*shape, j)) {
+        (*shape)[j] = 0;
+      }
+    }
+  }
+}
+
+inline void ConvertToLegacyShape(mxnet::ShapeVector* shapes) {
+  for (size_t i = 0; i < shapes->size(); ++i) {
+    ConvertToLegacyShape(&(shapes->at(i)));
+  }
+}
+
 }  // namespace common
 }  // namespace mxnet
 #endif  // MXNET_COMMON_UTILS_H_
diff --git a/src/executor/infer_graph_attr_pass.cc b/src/executor/infer_graph_attr_pass.cc
index 3a5c5ab9806f..fa7aee518486 100644
--- a/src/executor/infer_graph_attr_pass.cc
+++ b/src/executor/infer_graph_attr_pass.cc
@@ -24,6 +24,7 @@
 
 #include <mxnet/op_attr_types.h>
 #include <mxnet/graph_attr_types.h>
+#include <mxnet/imperative.h>
 #include "./exec_pass.h"
 #include "../operator/operator_common.h"
 #include "../common/exec_utils.h"
@@ -467,6 +468,12 @@ nnvm::Graph InferShapeAttr(nnvm::Graph &&ret,
   std::vector<AttrType> ishape, oshape;
   // whether a shape is dynamic
   std::vector<int> is_dynamic(rshape.size(), 0);
+
+  // convert to numpy compatible shape to use operator's infer shape function
+  if (!Imperative::Get()->is_np_comp()) {
+    common::ConvertToNumpyShape(&rshape);
+  }
+
   // inference step function for nid
   auto infer_step = [&](uint32_t nid, bool last_iter) {
     const auto& inode = idx[nid];
@@ -483,6 +490,9 @@ nnvm::Graph InferShapeAttr(nnvm::Graph &&ret,
         if (it != inode.source->attrs.dict.end()) {
           std::istringstream is(it->second);
           CHECK(is >> rshape[out_ent_id]) << "Invalid attribute";
+          if (!Imperative::Get()->is_np_comp()) {
+            common::ConvertToNumpyShape(&rshape[out_ent_id]);
+          }
         }
       }
       // assign a default value to node attribute
@@ -546,7 +556,7 @@ nnvm::Graph InferShapeAttr(nnvm::Graph &&ret,
       bool is_input_dynamic_shape = false;
       for (uint32_t i = 0; i < ishape.size(); ++i) {
         ishape[i] = rshape[idx.entry_id(inode.inputs[i])];
-        if (ishape[i].ndim() == 0 && is_dynamic[idx.entry_id(inode.inputs[i])]) {
+        if (!mxnet::ndim_is_known(ishape[i]) && is_dynamic[idx.entry_id(inode.inputs[i])]) {
           is_input_dynamic_shape = true;
         }
         if (fis_none(ishape[i])) forward_known = false;
@@ -563,7 +573,7 @@ nnvm::Graph InferShapeAttr(nnvm::Graph &&ret,
       auto finfer = finfer_shape.get(inode.source->op(), fdefault);
       if (finfer == nullptr || is_input_dynamic_shape) {
         for (uint32_t i = 0; i < oshape.size(); ++i) {
-          if (oshape[i].ndim() == 0) {
+          if (!mxnet::ndim_is_known(oshape[i].ndim())) {
             is_dynamic[idx.entry_id(nid, i)] = 1;
           }
         }
@@ -650,12 +660,12 @@ nnvm::Graph InferShape(nnvm::Graph&& graph,
       "shape", "shape_num_unknown_nodes",
       [](const mxnet::TShape& s) { return !mxnet::shape_is_known(s); },
       [](const mxnet::TShape& s) {
-        if (s.ndim() == -1) {
+        if (!mxnet::ndim_is_known(s)) {
           return static_cast<size_t>(1);
         }
         size_t ret = 0;
         for (const auto& val : s) {
-          if (val == -1) {
+          if (!mxnet::dim_size_is_known(val)) {
             ++ret;
           }
         }
diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc
index 3e5b3987522c..b027de0a0f6f 100644
--- a/src/imperative/imperative.cc
+++ b/src/imperative/imperative.cc
@@ -25,9 +25,11 @@ namespace mxnet {
 #if DMLC_CXX11_THREAD_LOCAL
 thread_local bool Imperative::is_train_ = false;
 thread_local bool Imperative::is_recording_ = false;
+thread_local bool Imperative::is_np_comp_ = false;
 #else
 MX_THREAD_LOCAL bool Imperative::is_train_ = false;
 MX_THREAD_LOCAL bool Imperative::is_recording_ = false;
+MX_THREAD_LOCAL bool Imperative::is_np_comp_ = false;
 #endif
 
 Imperative* Imperative::Get() {
@@ -109,7 +111,7 @@ OpStatePtr Imperative::Invoke(
   OpStatePtr ret = InvokeOp(ctx, attrs, inputs, outputs, req, dispatch_mode);
   // the followinng loop is used for finding out the correct shape when some shapes are dynamic
   for (size_t i = 0; i < outputs.size(); i++) {
-    if (outputs[i]->shape().ndim() == 0) {
+    if (!shape_is_known(outputs[i]->shape())) {
       // the WaitToRead overhead here does not seem to be avoidable
       outputs[i]->WaitToRead();
       outputs[i]->SetShapeFromChunk();
diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h
index 071f4fa9dd0b..6864428d2559 100644
--- a/src/imperative/imperative_utils.h
+++ b/src/imperative/imperative_utils.h
@@ -121,7 +121,24 @@ inline void SetShapeType(const Context& ctx,
   if (!infershape.count(attrs.op)) {
     is_dynamic_shape_existing = true;
   } else {
-    CHECK(infershape[attrs.op](attrs, &in_shapes, &out_shapes));
+    if (!Imperative::Get()->is_np_comp()) {
+      common::ConvertToNumpyShape(&in_shapes);
+      common::ConvertToNumpyShape(&out_shapes);
+    }
+    const bool success = infershape[attrs.op](attrs, &in_shapes, &out_shapes);
+    if (!success) {
+      std::stringstream os;
+      os << "Operator " << attrs.op->name << " inferring shapes failed.\n";
+      os << "input shapes:\n";
+      for (auto& nd : inputs) {
+        os << nd->shape() << '\n';
+      }
+      os << "output shapes:\n";
+      for (auto& nd : outputs) {
+        os << nd->shape() << '\n';
+      }
+      LOG(FATAL) << os.str();
+    }
     CHECK_EQ(out_shapes.size(), outputs.size());
   }
   // infer type
diff --git a/src/operator/batch_norm_v1-inl.h b/src/operator/batch_norm_v1-inl.h
index 8016510090ab..89412357ac67 100644
--- a/src/operator/batch_norm_v1-inl.h
+++ b/src/operator/batch_norm_v1-inl.h
@@ -261,7 +261,7 @@ class BatchNormV1Prop : public OperatorProperty {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 3U) << "Input:[data, gamma, beta]";
     const mxnet::TShape &dshape = in_shape->at(0);
-    if (!shape_is_known(dshape)) return false;
+    if (!mxnet::ndim_is_known(dshape)) return false;
     in_shape->at(1) = mxnet::TShape(Shape1(dshape[1]));
     in_shape->at(2) = mxnet::TShape(Shape1(dshape[1]));
     out_shape->clear();
diff --git a/src/operator/contrib/multi_proposal-inl.h b/src/operator/contrib/multi_proposal-inl.h
index a9afb8e4114e..4d278fb40645 100644
--- a/src/operator/contrib/multi_proposal-inl.h
+++ b/src/operator/contrib/multi_proposal-inl.h
@@ -108,7 +108,7 @@ class MultiProposalProp : public OperatorProperty {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 3) << "Input:[cls_prob, bbox_pred, im_info]";
     const mxnet::TShape &dshape = in_shape->at(proposal::kClsProb);
-    if (!mxnet::op::shape_is_none(dshape)) return false;
+    if (mxnet::op::shape_is_none(dshape)) return false;
     Shape<4> bbox_pred_shape;
     bbox_pred_shape = Shape4(dshape[0], dshape[1] * 2, dshape[2], dshape[3]);
     SHAPE_ASSIGN_CHECK(*in_shape, proposal::kBBoxPred,
diff --git a/src/operator/control_flow.cc b/src/operator/control_flow.cc
index 9ba3b5471c60..4c0d67bb08f7 100644
--- a/src/operator/control_flow.cc
+++ b/src/operator/control_flow.cc
@@ -301,7 +301,7 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs,
   for (int i = 0; i < params.num_out_data; i++) {
     mxnet::TShape shape = subg_out_shape[i];
     // If we don't have shape info, we don't need to do anything.
-    if (!shape_is_known(shape))
+    if (!mxnet::ndim_is_known(shape))
       continue;
     subg_out_shape[i] = SliceFirstDim(shape);
   }
@@ -317,7 +317,7 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs,
   for (int i = 0; i < params.num_out_data; i++) {
     // If the output shape isn't inferred, we don't need to propogate the info.
     const auto& g_out_shape = subg_out_shape[i];
-    if (!shape_is_known(g_out_shape))
+    if (!mxnet::ndim_is_known(g_out_shape))
       continue;
 
     auto out = mxnet::TShape(g_out_shape.ndim() + 1, -1);
@@ -336,7 +336,7 @@ static bool ForeachShape(const nnvm::NodeAttrs& attrs,
     const auto &shape = subg_in_shape[loc];
     // If the input data shape isn't inferred, we don't need to propogate the
     // info.
-    if (!shape_is_known(shape))
+    if (!mxnet::ndim_is_known(shape))
       continue;
 
     if (data_1d[i]) {
diff --git a/src/operator/convolution_v1-inl.h b/src/operator/convolution_v1-inl.h
index 0d6ffd7e895e..080c718dc9bf 100644
--- a/src/operator/convolution_v1-inl.h
+++ b/src/operator/convolution_v1-inl.h
@@ -405,7 +405,7 @@ class ConvolutionV1Prop : public OperatorProperty {
     // CHECK_EQ(out_shape->size(), 1) << "Output: [output]";
     out_shape->resize(1, mxnet::TShape());
     const mxnet::TShape &dshp = (*in_shape)[conv_v1::kData];
-    if (!shape_is_known(dshp)) return false;
+    if (!mxnet::ndim_is_known(dshp)) return false;
     if (param_.kernel.ndim() == 2) {
       // 2d conv_v1
       CHECK_EQ(dshp.ndim(), 4U) \
diff --git a/src/operator/custom/custom.cc b/src/operator/custom/custom.cc
index 46249c9bbcc6..412bfa1bc3aa 100644
--- a/src/operator/custom/custom.cc
+++ b/src/operator/custom/custom.cc
@@ -128,17 +128,21 @@ bool InferShape(const NodeAttrs& attrs,
   const CustomParam& params = nnvm::get<CustomParam>(attrs.parsed);
 
   size_t total = params.num_args + params.num_outs + params.num_auxs;
-  std::vector<uint32_t*> shapes(total);
+  std::vector<int*> shapes(total);
   std::vector<int> ndims(total);
   size_t buff_size = 0;
-  for (const auto& i : *in_shape) buff_size += i.ndim();
-  std::vector<uint32_t> buff(buff_size);
-  uint32_t *ptr = buff.data();
+  for (const auto& i : *in_shape) {
+    if (i.ndim() > 0) {
+      buff_size += i.ndim();
+    }
+  }
+  std::vector<int> buff(buff_size);
+  int *ptr = buff.data();
   for (size_t i = 0; i < in_shape->size(); ++i) {
     shapes[i] = ptr;
     ndims[i] = (*in_shape)[i].ndim();
-    for (size_t j = 0; j < (*in_shape)[i].ndim(); ++j, ++ptr) {
-      *ptr = static_cast<uint32_t>((*in_shape)[i][j]);
+    for (int j = 0; j < (*in_shape)[i].ndim(); ++j, ++ptr) {
+      *ptr = (*in_shape)[i][j];
     }
   }
 
@@ -263,7 +267,7 @@ OpStatePtr CreateState(const NodeAttrs& attrs, Context ctx,
   for (size_t i = 0; i < in_shape.size(); ++i) {
     shapes[i] = ptr;
     ndims[i] = in_shape[i].ndim();
-    for (size_t j = 0; j < in_shape[i].ndim(); ++j, ++ptr) {
+    for (int j = 0; j < in_shape[i].ndim(); ++j, ++ptr) {
       *ptr = static_cast<uint32_t>(in_shape[i][j]);
     }
   }
diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc
index 590b1b428023..622952cc4bc5 100644
--- a/src/operator/nn/batch_norm.cc
+++ b/src/operator/nn/batch_norm.cc
@@ -332,7 +332,7 @@ static bool BatchNormShape(const nnvm::NodeAttrs& attrs,
 
   const int channelCount = dshape[channelAxis];
 
-  if (!shape_is_known(dshape)) {
+  if (!mxnet::ndim_is_known(dshape)) {
     return false;
   }
 
diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc
index 5435bd815334..b534ee58e85c 100644
--- a/src/operator/nn/concat.cc
+++ b/src/operator/nn/concat.cc
@@ -46,7 +46,7 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs,
     mxnet::TShape tmp = (*in_shape)[i];
     if (tmp.ndim() > 0) {
       axis = CheckAxis(param_.dim, tmp.ndim());
-      has_unknown_dim_size = tmp[axis] == -1 || has_unknown_dim_size;
+      has_unknown_dim_size = !mxnet::dim_size_is_known(tmp, axis) || has_unknown_dim_size;
       size += tmp[axis];
       tmp[axis] = -1;
       shape_assign(&dshape, tmp);
@@ -91,26 +91,27 @@ static bool RNNParamConcatShape(const nnvm::NodeAttrs& attrs,
   int axis = -1;
   for (int i = 0; i < param_.num_args; ++i) {
     mxnet::TShape tmp = (*in_shape)[i];
-    if (tmp.ndim()) {
+    if (tmp.ndim() > 0) {
       axis = CheckAxis(param_.dim, tmp.ndim());
-      if (tmp[axis] == 0) {
+      if (!mxnet::dim_size_is_known(tmp, axis)) {
         zero_indices.emplace_back(i);
       } else {
+        CHECK_GE(tmp[axis], 0);
         size += tmp[axis];
       }
-      tmp[axis] = 0;
+      tmp[axis] = -1;
       shape_assign(&dshape, tmp);
     }
   }
 
   mxnet::TShape tmp = (*out_shape)[0];
-  if (tmp.ndim()) {
+  if (tmp.ndim() > 0) {
     axis = CheckAxis(param_.dim, tmp.ndim());
-    tmp[axis] = 0;
+    tmp[axis] = -1;
     shape_assign(&dshape, tmp);
   }
 
-  if (!shape_is_known(dshape)) return false;
+  if (!mxnet::ndim_is_known(dshape)) return false;
 
   for (int i = 0; i < param_.num_args; ++i) {
     CHECK(shape_assign(&(*in_shape)[i], dshape))
@@ -120,21 +121,21 @@ static bool RNNParamConcatShape(const nnvm::NodeAttrs& attrs,
   if (zero_indices.empty()) dshape[axis] = size;
   CHECK(shape_assign(&(*out_shape)[0], dshape))
       << "Incompatible output shape: expected " << dshape << ", got " << (*out_shape)[0];
-  if ((*out_shape)[0][axis] != 0 && !zero_indices.empty()) {
+  if ((*out_shape)[0][axis] != -1 && !zero_indices.empty()) {
     int residual = (*out_shape)[0][axis] - size;
     CHECK_GE(residual, 0)
         << "Input size already exceeds output size. Residual: " << residual;
-    CHECK(zero_indices.size() <= 2 && zero_indices.size() >= 0)
+    CHECK(zero_indices.size() <= 2 && zero_indices.size() > 0)
         << "Expecting 1 or 2 inputs that need shape inference. Got: " << zero_indices.size();
-    bool need_infer = !(*out_shape)[0].Size();
+    bool need_infer = !shape_is_known((*out_shape)[0]);
     for (int i : zero_indices) {
       (*in_shape)[i][axis] = residual / zero_indices.size();
-      need_infer = need_infer || !(*in_shape)[i].Size();
+      need_infer = need_infer || !shape_is_known((*in_shape)[i]);
     }
     return !need_infer;
   }
 
-  return dshape.Size() != 0;
+  return shape_is_known(dshape);
 }
 
 static bool ConcatType(const nnvm::NodeAttrs& attrs,
diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc
index dfbc89de7b0e..536e9a731171 100644
--- a/src/operator/nn/convolution.cc
+++ b/src/operator/nn/convolution.cc
@@ -96,24 +96,28 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs,
   // CHECK_EQ(out_shape->size(), 1) << "Output: [output]";
   out_shape->resize(1, mxnet::TShape());
   const mxnet::TShape &dshp = (*in_shape)[conv::kData];
-  if (!shape_is_known(dshp)) return false;
+  if (!mxnet::ndim_is_known(dshp)) return false;
 
   if (param_.kernel.ndim() == 1) {
     // 1d conv
     CHECK_EQ(dshp.ndim(), 3U) << "Input data should be 3D in batch-num_filter-x";
     Shape<3> dshape = ConvertLayout(dshp.get<3>(), param_.layout.value(), kNCW);
-    Shape<3> wshape = Shape3(param_.num_filter / param_.num_group, dshape[1] / param_.num_group,
+    Shape<3> wshape = Shape3(param_.num_filter / param_.num_group,
+        mxnet::dim_size_is_known(dshape, 1) ? dshape[1] / param_.num_group : -1,
         param_.kernel[0]);
     wshape = ConvertLayout(wshape, kNCW, param_.layout.value());
-    wshape[0] *= param_.num_group;
+    if (wshape[0] >= 0) {
+      wshape[0] *= param_.num_group;
+    }
     SHAPE_ASSIGN_CHECK(*in_shape, conv::kWeight, wshape);
     if (!param_.no_bias) {
       SHAPE_ASSIGN_CHECK(*in_shape, conv::kBias, Shape1(param_.num_filter));
     }
 
     const index_t dilated_ksize_x = param_.DilatedKernelSize(0);
-    CHECK_EQ(dshape[1] % param_.num_group, 0U) \
-      << "input num_filter must divide group size";
+    if (dshape[1] != -1) {
+      CHECK_EQ(dshape[1] % param_.num_group, 0U) << "input num_filter must divide group size";
+    }
     CHECK_EQ(param_.num_filter % param_.num_group, 0U) \
       << "output num_filter must divide group size";
     CHECK_GT(param_.kernel.Size(), 0U) \
@@ -125,21 +129,21 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs,
     Shape<3> oshape;
     oshape[0] = dshape[0];
     oshape[1] = param_.num_filter;
-    oshape[2] = dshape[2] ?
-      (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_x) / param_.stride[0] + 1 : 0;
+    oshape[2] = dshape[2] != -1 ?
+      (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_x) / param_.stride[0] + 1 : -1;
     SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCW, param_.layout.value()));
     // Perform incomplete shape inference. Fill in the missing values in data shape.
     // 1) We can always fill in the batch_size.
     // 2) We can back-calculate the input height/width if the corresponding stride is 1.
     oshape = ConvertLayout((*out_shape)[0].get<3>(), param_.layout.value(), kNCW);
     dshape[0] = oshape[0];
-    if (oshape[2] && param_.stride[0] == 1) {
+    if (oshape[2] != -1 && param_.stride[0] == 1) {
       dshape[2] = oshape[2] + dilated_ksize_x - 1 - 2 * param_.pad[0];
     }
     SHAPE_ASSIGN_CHECK(*in_shape, conv::kData,
         ConvertLayout(dshape, kNCW, param_.layout.value()));
     // Check whether the kernel sizes are valid
-    if (dshape[2] != 0) {
+    if (dshape[2] != -1) {
       CHECK_LE(dilated_ksize_x, AddPad(dshape[2], param_.pad[0])) << "kernel size exceed input";
     }
     return true;
@@ -149,10 +153,12 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs,
       << "Input data should be 4D in batch-num_filter-y-x";
     Shape<4> dshape = ConvertLayout(dshp.get<4>(), param_.layout.value(), kNCHW);
     Shape<4> wshape = Shape4(param_.num_filter / param_.num_group,
-        dshape[1] / param_.num_group,
+        mxnet::dim_size_is_known(dshape, 1) ? dshape[1] / param_.num_group : -1,
         param_.kernel[0], param_.kernel[1]);
     wshape = ConvertLayout(wshape, kNCHW, param_.layout.value());
-    wshape[0] *= param_.num_group;
+    if (wshape[0] >= 0) {
+      wshape[0] *= param_.num_group;
+    }
     SHAPE_ASSIGN_CHECK(*in_shape, conv::kWeight, wshape);
     if (!param_.no_bias) {
       SHAPE_ASSIGN_CHECK(*in_shape, conv::kBias, Shape1(param_.num_filter));
@@ -160,8 +166,9 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs,
 
     const index_t dilated_ksize_y = param_.DilatedKernelSize(0);
     const index_t dilated_ksize_x = param_.DilatedKernelSize(1);
-    CHECK_EQ(dshape[1] % param_.num_group, 0U) \
-      << "input num_filter must divide group size";
+    if (dshape[1] != -1) {
+      CHECK_EQ(dshape[1] % param_.num_group, 0U) << "input num_filter must divide group size";
+    }
     CHECK_EQ(param_.num_filter % param_.num_group, 0U) \
       << "output num_filter must divide group size";
     CHECK_GT(param_.kernel.Size(), 0U) \
@@ -173,29 +180,29 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs,
     Shape<4> oshape;
     oshape[0] = dshape[0];
     oshape[1] = param_.num_filter;
-    oshape[2] = dshape[2] ?
-      (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_y) / param_.stride[0] + 1 : 0;
-    oshape[3] = dshape[3] ?
-      (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_x) / param_.stride[1] + 1 : 0;
+    oshape[2] = dshape[2] != -1 ?
+      (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_y) / param_.stride[0] + 1 : -1;
+    oshape[3] = dshape[3] != -1 ?
+      (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_x) / param_.stride[1] + 1 : -1;
     SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCHW, param_.layout.value()));
     // Perform incomplete shape inference. Fill in the missing values in data shape.
     // 1) We can always fill in the batch_size.
     // 2) We can back-calculate the input height/width if the corresponding stride is 1.
     oshape = ConvertLayout((*out_shape)[0].get<4>(), param_.layout.value(), kNCHW);
     dshape[0] = oshape[0];
-    if (oshape[2] && param_.stride[0] == 1) {
+    if (oshape[2] != -1 && param_.stride[0] == 1) {
       dshape[2] = oshape[2] + dilated_ksize_y - 1 - 2 * param_.pad[0];
     }
-    if (oshape[3] && param_.stride[1] == 1) {
+    if (oshape[3] != -1 && param_.stride[1] == 1) {
       dshape[3] = oshape[3] + dilated_ksize_x - 1 - 2 * param_.pad[1];
     }
     SHAPE_ASSIGN_CHECK(*in_shape, conv::kData,
         ConvertLayout(dshape, kNCHW, param_.layout.value()));
     // Check whether the kernel sizes are valid
-    if (dshape[2] != 0) {
+    if (dshape[2] != -1) {
       CHECK_LE(dilated_ksize_y, AddPad(dshape[2], param_.pad[0])) << "kernel size exceed input";
     }
-    if (dshape[3] != 0) {
+    if (dshape[3] != -1) {
       CHECK_LE(dilated_ksize_x, AddPad(dshape[3], param_.pad[1])) << "kernel size exceed input";
     }
     return true;
@@ -204,10 +211,13 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs,
     CHECK_EQ(dshp.ndim(), 5U) \
       << "Input data should be 5D in batch-num_filter-depth-y-x";
     Shape<5> dshape = ConvertLayout(dshp.get<5>(), param_.layout.value(), kNCDHW);
-    Shape<5> wshape = Shape5(param_.num_filter / param_.num_group, dshape[1] / param_.num_group,
+    Shape<5> wshape = Shape5(param_.num_filter / param_.num_group,
+        mxnet::dim_size_is_known(dshape, 1) ? dshape[1] / param_.num_group : -1,
         param_.kernel[0], param_.kernel[1], param_.kernel[2]);
     wshape = ConvertLayout(wshape, kNCDHW, param_.layout.value());
-    wshape[0] *= param_.num_group;
+    if (wshape[0] >= 0) {
+      wshape[0] *= param_.num_group;
+    }
     SHAPE_ASSIGN_CHECK(*in_shape, conv::kWeight, wshape);
     if (!param_.no_bias) {
       SHAPE_ASSIGN_CHECK(*in_shape, conv::kBias, Shape1(param_.num_filter));
@@ -218,8 +228,9 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs,
     const index_t dilated_ksize_d = param_.DilatedKernelSize(0);
     const index_t dilated_ksize_y = param_.DilatedKernelSize(1);
     const index_t dilated_ksize_x = param_.DilatedKernelSize(2);
-    CHECK_EQ(dshape[1] % param_.num_group, 0U)
-      << "input num_filter must divide group size";
+    if (dshape[1] >= 0) {
+      CHECK_EQ(dshape[1] % param_.num_group, 0U) << "input num_filter must divide group size";
+    }
     CHECK_EQ(param_.num_filter % param_.num_group, 0U)
       << "output num_filter must divide group size";
     CHECK_GT(param_.kernel.Size(), 0U) \
@@ -233,37 +244,37 @@ static bool ConvolutionShape(const nnvm::NodeAttrs& attrs,
     Shape<5> oshape;
     oshape[0] = dshape[0];
     oshape[1] = param_.num_filter;
-    oshape[2] = dshape[2] ?
-      (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_d) / param_.stride[0] + 1 : 0;
-    oshape[3] = dshape[3] ?
-      (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_y) / param_.stride[1] + 1 : 0;
-    oshape[4] = dshape[4] ?
-      (AddPad(dshape[4], param_.pad[2]) - dilated_ksize_x) / param_.stride[2] + 1 : 0;
+    oshape[2] = dshape[2] != -1 ?
+      (AddPad(dshape[2], param_.pad[0]) - dilated_ksize_d) / param_.stride[0] + 1 : -1;
+    oshape[3] = dshape[3] != -1 ?
+      (AddPad(dshape[3], param_.pad[1]) - dilated_ksize_y) / param_.stride[1] + 1 : -1;
+    oshape[4] = dshape[4] != -1 ?
+      (AddPad(dshape[4], param_.pad[2]) - dilated_ksize_x) / param_.stride[2] + 1 : -1;
     SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCDHW, param_.layout.value()));
     // Perform incomplete shape inference. Fill in the missing values in data shape.
     // 1) We can always fill in the batch_size.
     // 2) We can back-calculate the input depth/height/width if the corresponding stride is 1.
     oshape = ConvertLayout((*out_shape)[0].get<5>(), param_.layout.value(), kNCDHW);
     dshape[0] = oshape[0];
-    if (oshape[2] && param_.stride[0] == 1) {
+    if (oshape[2] != -1 && param_.stride[0] == 1) {
       dshape[2] = oshape[2] + dilated_ksize_d - 1 - 2 * param_.pad[0];
     }
-    if (oshape[3] && param_.stride[1] == 1) {
+    if (oshape[3] != -1 && param_.stride[1] == 1) {
       dshape[3] = oshape[3] + dilated_ksize_y - 1 - 2 * param_.pad[1];
     }
-    if (oshape[4] && param_.stride[2] == 1) {
+    if (oshape[4] != -1 && param_.stride[2] == 1) {
       dshape[4] = oshape[4] + dilated_ksize_x - 1 - 2 * param_.pad[2];
     }
     SHAPE_ASSIGN_CHECK(*in_shape, conv::kData,
         ConvertLayout(dshape, kNCDHW, param_.layout.value()));
     // Check whether the kernel sizes are valid
-    if (dshape[2] != 0) {
+    if (dshape[2] != -1) {
       CHECK_LE(dilated_ksize_d, AddPad(dshape[2], param_.pad[0])) << "kernel size exceed input";
     }
-    if (dshape[3] != 0) {
+    if (dshape[3] != -1) {
       CHECK_LE(dilated_ksize_y, AddPad(dshape[3], param_.pad[1])) << "kernel size exceed input";
     }
-    if (dshape[4] != 0) {
+    if (dshape[4] != -1) {
       CHECK_LE(dilated_ksize_x, AddPad(dshape[4], param_.pad[2])) << "kernel size exceed input";
     }
     return true;
diff --git a/src/operator/nn/cudnn/cudnn_batch_norm.cc b/src/operator/nn/cudnn/cudnn_batch_norm.cc
index 1df888e4b38a..cb35ce170e8e 100644
--- a/src/operator/nn/cudnn/cudnn_batch_norm.cc
+++ b/src/operator/nn/cudnn/cudnn_batch_norm.cc
@@ -37,7 +37,7 @@ static bool BatchNormShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *in_
   using namespace mshadow;
   CHECK_EQ(in_shape->size(), 5U) << "Input:[data, gamma, beta, moving_mean, moving_var]";
   const mxnet::TShape &dshape = in_shape->at(0);
-  if (!shape_is_known(dshape)) return false;
+  if (!mxnet::ndim_is_known(dshape)) return false;
   in_shape->at(1) = mxnet::TShape(Shape1(dshape[1]));
   in_shape->at(2) = mxnet::TShape(Shape1(dshape[1]));
   in_shape->at(3) = mxnet::TShape(Shape1(dshape[1]));
diff --git a/src/operator/nn/dropout.cc b/src/operator/nn/dropout.cc
index 0e4d18b1fda8..afad6fd5cc80 100644
--- a/src/operator/nn/dropout.cc
+++ b/src/operator/nn/dropout.cc
@@ -95,7 +95,7 @@ Example::
   CHECK_EQ(in_shape->size(), 1U);
   const DropoutParam& param = nnvm::get<DropoutParam>(attrs.parsed);
   mxnet::TShape dshape(in_shape->at(0));
-  if (!shape_is_known(dshape)) return false;
+  if (!mxnet::ndim_is_known(dshape)) return false;
   out_shape->clear();
   out_shape->push_back(dshape);
   for (int i = 0; i < param.axes.ndim(); ++i) {
diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc
index 98277c206dd6..a097357ef5a3 100644
--- a/src/operator/nn/fully_connected.cc
+++ b/src/operator/nn/fully_connected.cc
@@ -52,7 +52,7 @@ static bool FullyConnectedShape(const nnvm::NodeAttrs& attrs,
   mxnet::TShape dshape = (*in_shape)[fullc::kData];
   mxnet::TShape oshape = (*out_shape)[0];
   // require data to be known
-  if (!shape_is_known(dshape)) return false;
+  if (!mxnet::ndim_is_known(dshape)) return false;
 
   index_t num_input;
   if (!param.flatten) {
diff --git a/src/operator/nn/layer_norm.cc b/src/operator/nn/layer_norm.cc
index 1b0e99d8fe87..2e47503a3318 100644
--- a/src/operator/nn/layer_norm.cc
+++ b/src/operator/nn/layer_norm.cc
@@ -48,7 +48,7 @@ static bool LayerNormShape(const nnvm::NodeAttrs& attrs,
 
   const int channelCount = dshape[axis];
 
-  if (!shape_is_known(dshape)) {
+  if (!mxnet::ndim_is_known(dshape)) {
     return false;
   }
 
diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc
index 7c365f5081a1..3e081c9a0552 100644
--- a/src/operator/nn/pooling.cc
+++ b/src/operator/nn/pooling.cc
@@ -114,7 +114,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs,
       << "Pooling: Input data should be  3D in (batch, channel, x)"
       << " Or 4D in (batch, channel, y, x) "
       << " Or 5D in (batch, channel, d, y, x)";
-  if (!shape_is_known(dshape)) return false;
+  if (!mxnet::ndim_is_known(dshape)) return false;
   int layout = param.GetLayout(dshape.ndim());
   if (param.global_pool) {
     mxnet::TShape oshape = dshape;
diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h
index c95f859a1b5b..59f572211d0e 100644
--- a/src/operator/operator_common.h
+++ b/src/operator/operator_common.h
@@ -160,14 +160,14 @@ inline std::string type_string(const int& x) {
  * \return whether x and y are compatible.
  */
 inline bool shape_assign(mxnet::TShape *y, const mxnet::TShape& x) {
-  if (y->ndim() == -1) {
+  if (!mxnet::ndim_is_known(*y)) {
     *y = x;
     return true;
   } else if (y->ndim() != x.ndim()) {
-    return x.ndim() == -1;
+    return !mxnet::ndim_is_known(x);
   } else {
     for (int i = 0; i < y->ndim(); ++i) {
-      if ((*y)[i] == -1) {
+      if (!mxnet::dim_size_is_known(*y, i)) {
         (*y)[i] = x[i];
       } else if ((*y)[i] != x[i] && x[i] >= 0) {
         return false;
diff --git a/src/operator/pooling_v1-inl.h b/src/operator/pooling_v1-inl.h
index 4e0ccc1caeb9..21ba270c9d42 100644
--- a/src/operator/pooling_v1-inl.h
+++ b/src/operator/pooling_v1-inl.h
@@ -247,7 +247,7 @@ class PoolingV1Prop : public OperatorProperty {
     CHECK_LE(dshape.ndim(), 5U) << "Pooling: Input data should be 4D in (batch, channel, y, x) "
                                << "Or 5D in (batch, channel, d, y, x)";
     mxnet::TShape oshape = dshape;
-    if (dshape.ndim() ==  0) return false;
+    if (dshape.ndim() ==  -1) return false;
     if (param_.global_pool) {
       if (dshape.ndim() == 4) {
         oshape[2] = 1;
diff --git a/src/operator/quantization/quantized_concat.cc b/src/operator/quantization/quantized_concat.cc
index f97807424701..2cc2ec9d0374 100644
--- a/src/operator/quantization/quantized_concat.cc
+++ b/src/operator/quantization/quantized_concat.cc
@@ -55,7 +55,7 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_sha
     shape_assign(&dshape, tmp);
   }
 
-  if (dshape.ndim() == -1) return false;
+  if (!mxnet::ndim_is_known(dshape)) return false;
 
   for (int i = 0; i < param_.num_args; ++i) {
     CHECK(shape_assign(&(*in_shape)[i], dshape))
diff --git a/src/operator/random/unique_sample_op.h b/src/operator/random/unique_sample_op.h
index c97d4fdf7ced..e88b95a8bdd6 100644
--- a/src/operator/random/unique_sample_op.h
+++ b/src/operator/random/unique_sample_op.h
@@ -60,7 +60,7 @@ inline bool SampleUniqueShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 0U);
   CHECK_EQ(out_attrs->size(), 2U);
   // output shape is known
-  if ((*out_attrs)[0].ndim() == 2 && param.shape.ndim() == -1) {
+  if ((*out_attrs)[0].ndim() == 2 && !mxnet::ndim_is_known(param.shape)) {
     SHAPE_ASSIGN_CHECK(*out_attrs, 1, mshadow::Shape1((*out_attrs)[0][0]));
     return true;
   }
diff --git a/src/operator/slice_channel-inl.h b/src/operator/slice_channel-inl.h
index a51b17cd324e..e37ffdcf1b91 100644
--- a/src/operator/slice_channel-inl.h
+++ b/src/operator/slice_channel-inl.h
@@ -195,7 +195,7 @@ class SliceChannelProp : public OperatorProperty {
     CHECK_EQ(in_shape->size(), 1U);
     mxnet::TShape dshape = in_shape->at(slice_enum::kData);
     mxnet::TShape ishape = in_shape->at(slice_enum::kData);
-    if (!shape_is_known(dshape)) return false;
+    if (!mxnet::ndim_is_known(dshape)) return false;
     if (param_.axis >= 0) {
       CHECK_LT(param_.axis, dshape.ndim());
     } else {
@@ -212,15 +212,18 @@ class SliceChannelProp : public OperatorProperty {
       << " evenly sized chunks, but this is not possible because "
       << param_.num_outputs << " does not evenly divide "
       << dshape[real_axis];
-    if (param_.squeeze_axis && ishape[real_axis] != 0) {
-      CHECK_EQ(ishape[real_axis], static_cast<size_t>(param_.num_outputs))
+    if (param_.squeeze_axis && ishape[real_axis] != -1) {
+      CHECK_EQ(ishape[real_axis], param_.num_outputs)
         << "If squeeze axis is True, the size of the sliced axis must be the same as num_outputs."
         << " Input shape=" << ishape << ", axis=" << real_axis
         << ", num_outputs=" << param_.num_outputs << ".";
     }
-    dshape[real_axis] /= param_.num_outputs;
-    if (param_.squeeze_axis && (dshape[real_axis] == 1 || ishape[real_axis] == 0)) {
-      for (int d = real_axis; d < static_cast<int>(dshape.ndim()) - 1; ++d) {
+    if (dshape[real_axis] >= 0) {
+      dshape[real_axis] /= param_.num_outputs;
+    }
+    if (param_.squeeze_axis && (dshape[real_axis] == 1
+        || !mxnet::dim_size_is_known(ishape, real_axis))) {
+      for (int d = real_axis; d < dshape.ndim() - 1; ++d) {
         dshape[d] = dshape[d+1];
       }
       dshape = mxnet::TShape(&dshape[0], &dshape[dshape.ndim()-1]);
diff --git a/src/operator/softmax_output-inl.h b/src/operator/softmax_output-inl.h
index f81a232d629a..80ab40ef6c50 100644
--- a/src/operator/softmax_output-inl.h
+++ b/src/operator/softmax_output-inl.h
@@ -349,7 +349,7 @@ class SoftmaxOutputProp : public OperatorProperty {
           lshape2[i-1] = dshape[i];
         mxnet::TShape lshape3 = dshape;
         lshape3[1] = 1;
-        if (in_shape->at(softmaxout_enum::kLabel).ndim() == -1) {
+        if (!mxnet::ndim_is_known(in_shape->at(softmaxout_enum::kLabel))) {
           in_shape->at(softmaxout_enum::kLabel) = lshape1;
         } else if (in_shape->at(softmaxout_enum::kLabel) == lshape1) {
         } else if (in_shape->at(softmaxout_enum::kLabel) == lshape2) {
diff --git a/src/operator/softmax_output.cc b/src/operator/softmax_output.cc
index 262242f98004..548225f0496b 100644
--- a/src/operator/softmax_output.cc
+++ b/src/operator/softmax_output.cc
@@ -85,7 +85,7 @@ static bool SoftmaxOutputShape(const nnvm::NodeAttrs& attrs,
   const SoftmaxOutputParam& param = nnvm::get<SoftmaxOutputParam>(attrs.parsed);
   CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]";
   const mxnet::TShape &dshape = in_shape->at(0);
-  if (!shape_is_known(dshape)) return false;
+  if (!mxnet::ndim_is_known(dshape)) return false;
 
   // label.shape == data.shape: use probability as label
   if (dshape != (*in_shape)[softmaxout_enum::kLabel]) {
@@ -97,7 +97,7 @@ static bool SoftmaxOutputShape(const nnvm::NodeAttrs& attrs,
         lshape2[i-1] = dshape[i];
       mxnet::TShape lshape3 = dshape;
       lshape3[1] = 1;
-      if (in_shape->at(softmaxout_enum::kLabel).ndim() == -1) {
+      if (!mxnet::ndim_is_known(in_shape->at(softmaxout_enum::kLabel))) {
         in_shape->at(softmaxout_enum::kLabel) = lshape1;
       } else if (in_shape->at(softmaxout_enum::kLabel) == lshape1) {
       } else if (in_shape->at(softmaxout_enum::kLabel) == lshape2) {
diff --git a/src/operator/svm_output-inl.h b/src/operator/svm_output-inl.h
index 3d651c13d8ba..dfe9fa606e95 100644
--- a/src/operator/svm_output-inl.h
+++ b/src/operator/svm_output-inl.h
@@ -143,7 +143,7 @@ class SVMOutputProp : public OperatorProperty {
     using namespace mshadow;
     CHECK_EQ(in_shape->size(), 2U) << "Input:[data, label]";
     const mxnet::TShape &dshape = in_shape->at(0);
-    if (!shape_is_known(dshape)) return false;
+    if (!mxnet::ndim_is_known(dshape)) return false;
     mxnet::TShape label_shape(dshape.ndim() - 1, -1);
     for (int i = 0; i + 1 < dshape.ndim(); ++i)
       label_shape[i] = dshape[i];
diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h
index 90afdeea858e..ac714c2cfcdd 100644
--- a/src/operator/tensor/broadcast_reduce_op.h
+++ b/src/operator/tensor/broadcast_reduce_op.h
@@ -366,13 +366,13 @@ inline bool BroadcastToShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   mxnet::TShape& ishape = (*in_attrs)[0];
-  if (!shape_is_known(ishape)) return false;
+  if (!mxnet::ndim_is_known(ishape)) return false;
   const BroadcastToParam& param = nnvm::get<BroadcastToParam>(attrs.parsed);
   CHECK_EQ(ishape.ndim(), param.shape.ndim())
     << "Operand of shape " << ishape << " cannot be broadcasted to " << param.shape;
   mxnet::TShape oshape = param.shape;
   for (int i = 0; i < ishape.ndim(); ++i) {
-    if (oshape[i] != 0) {
+    if (oshape[i] != -1) {
       CHECK(ishape[i] == oshape[i] || ishape[i] == 1)
         << "Array cannot be broadcasted from " << ishape << " to " << param.shape;
     } else {
@@ -391,7 +391,7 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs,
   mxnet::TShape& lhs_shape = (*in_attrs)[0];
   mxnet::TShape& rhs_shape = (*in_attrs)[1];
 
-  if (!shape_is_known(lhs_shape) || !shape_is_known(lhs_shape)) {
+  if (!mxnet::ndim_is_known(lhs_shape) || !mxnet::ndim_is_known(rhs_shape)) {
     return false;
   }
 
@@ -405,7 +405,7 @@ inline bool BroadcastLikeShape(const nnvm::NodeAttrs& attrs,
 
     oshape = mxnet::TShape(rhs_shape);
     for (int i = 0; i < lhs_shape.ndim(); ++i) {
-      if (rhs_shape[i] != 0) {
+      if (rhs_shape[i] != -1) {
         CHECK(lhs_shape[i] == rhs_shape[i] || lhs_shape[i] == 1)
           << "Array cannot be broadcasted from " << lhs_shape << " to " << rhs_shape;
       } else {
diff --git a/src/operator/tensor/diag_op-inl.h b/src/operator/tensor/diag_op-inl.h
index b90b09a36bd3..c95c1ce414f2 100644
--- a/src/operator/tensor/diag_op-inl.h
+++ b/src/operator/tensor/diag_op-inl.h
@@ -84,7 +84,7 @@ inline mxnet::TShape DiagShapeImpl(const mxnet::TShape& ishape, const int k,
 
   auto s = std::min(h, w);
   if (s < 0) {
-    s = 0;
+    s = -1;
   }
 
   if (x1 > x2) {
@@ -114,7 +114,7 @@ inline bool DiagOpShape(const nnvm::NodeAttrs& attrs,
     CHECK_EQ(out_attrs->size(), 1U);
 
     const mxnet::TShape& ishape = (*in_attrs)[0];
-    if (!shape_is_known(ishape)) {
+    if (!mxnet::ndim_is_known(ishape)) {
       return false;
     }
 
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op.h b/src/operator/tensor/elemwise_binary_broadcast_op.h
index dfb3231a75a9..64a4d7cc15ff 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op.h
+++ b/src/operator/tensor/elemwise_binary_broadcast_op.h
@@ -48,7 +48,7 @@ inline bool BinaryBroadcastShape(const nnvm::NodeAttrs& attrs,
   mxnet::TShape& rhs = (*in_attrs)[1];
 
   // avoid pre-mature shape inference.
-  if (lhs.ndim() == -1 || rhs.ndim() == -1) return false;
+  if (!mxnet::ndim_is_known(lhs) || !mxnet::ndim_is_known(rhs)) return false;
 
   if (lhs == rhs) {
     SHAPE_ASSIGN_CHECK(*out_attrs, 0, lhs);
diff --git a/src/operator/tensor/init_op.h b/src/operator/tensor/init_op.h
index 3c4d34b3f4a5..bcad602c95c0 100644
--- a/src/operator/tensor/init_op.h
+++ b/src/operator/tensor/init_op.h
@@ -28,6 +28,7 @@
 #include <mxnet/base.h>
 #include <mxnet/operator_util.h>
 #include <mxnet/op_attr_types.h>
+#include <mxnet/imperative.h>
 #include <dmlc/parameter.h>
 #include <dmlc/optional.h>
 #include <vector>
@@ -213,14 +214,13 @@ inline bool InitShape(const nnvm::NodeAttrs& attrs,
   const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 0U);
   CHECK_EQ(out_attrs->size(), 1U);
-  if (shape_is_known((*out_attrs)[0]) && !shape_is_known(param.shape)) return true;
-  for (int i=0 ; i < param.shape.ndim() ; ++i) {
-    if (param.shape[i] < 0U) {
-      LOG(FATAL) << "Shape cannot contain negative values " << param.shape;
-    }
+  mxnet::TShape param_shape = param.shape;
+  if (!Imperative::Get()->is_np_comp()) {
+    common::ConvertToNumpyShape(&param_shape);
   }
-  SHAPE_ASSIGN_CHECK(*out_attrs, 0, param.shape);
-  return true;
+  if (shape_is_known((*out_attrs)[0]) && !shape_is_known(param_shape)) return true;
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, param_shape);
+  return shape_is_known(out_attrs->at(0));
 }
 
 template<typename ParamType>
diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index d61267faefe6..9ce22020fab4 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -110,7 +110,7 @@ inline mxnet::TShape InferReshapeShape(const mxnet::Tuple<IType>& shape,
       CHECK_LT(src_idx, dshape_len-1);
       const int d1 = dshape_vec[src_idx++];
       const int d2 = dshape_vec[src_idx++];
-      if (d1 == -1 || d2 == -1) {
+      if (!mxnet::dim_size_is_known(d1) || !mxnet::dim_size_is_known(d2)) {
         tmp.push_back(-1);
       } else {
         tmp.push_back(d1 * d2);
@@ -164,7 +164,7 @@ inline bool ReverseReshapeInferShape(mxnet::TShape *in, const mxnet::TShape& out
     int zero_axis = -1;
     int known_dim_size_prod = 1;
     for (int i = 0; i < in->ndim(); i++) {
-      if ((*in)[i] == -1) {
+      if (!mxnet::dim_size_is_known(*in, i)) {
         if (zero_axis != -1)
           return false;  // more than 1 zero found.
         else
@@ -185,7 +185,7 @@ inline bool ReshapeShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]";
   CHECK_EQ(out_attrs->size(), 1U);
   mxnet::TShape &dshape = (*in_attrs)[0];
-  if (dshape.ndim() == -1) return false;
+  if (!mxnet::ndim_is_known(dshape)) return false;
   mxnet::TShape oshape;
   if (param_.shape.ndim() != 0) {
     oshape = InferReshapeShape(param_.shape, dshape, param_.reverse);
@@ -318,7 +318,7 @@ void Transpose(const nnvm::NodeAttrs& attrs,
                const std::vector<TBlob>& outputs) {
   const TransposeParam& param = nnvm::get<TransposeParam>(attrs.parsed);
   CHECK_EQ(req[0], kWriteTo) << "Transpose does not support inplace";
-  if (param.axes.ndim() == -1) {
+  if (!mxnet::ndim_is_known(param.axes)) {
     mxnet::TShape axes(inputs[0].ndim(), -1);
     for (int i = 0; i < axes.ndim(); ++i) {
       axes[i] = axes.ndim() - 1 - i;
@@ -338,7 +338,7 @@ inline bool TransposeShape(const nnvm::NodeAttrs& attrs,
   mxnet::TShape& shp = (*in_attrs)[0];
   CHECK_LE(shp.ndim(), 6U) << "Transpose support at most 6 dimensions";
   mxnet::TShape ret(shp.ndim(), -1);
-  if (param.axes.ndim() == -1) {
+  if (!mxnet::ndim_is_known(param.axes)) {
     for (int i = 0; i < shp.ndim(); ++i) {
       ret[i] = shp[shp.ndim()-1-i];
     }
@@ -371,7 +371,7 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs,
   const ExpandDimParam& param = nnvm::get<ExpandDimParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
-  if (!shape_is_known(in_attrs->at(0)) && !shape_is_known(out_attrs->at(0))) {
+  if (!mxnet::ndim_is_known(in_attrs->at(0)) && !mxnet::ndim_is_known(out_attrs->at(0))) {
     return false;
   }
 
@@ -405,7 +405,7 @@ inline bool ExpandDimShape(const nnvm::NodeAttrs& attrs,
   for (int i = 0; i < axis; ++i) ret[i] = oshape[i];
   for (int i = axis+1; i < indim+1; ++i) ret[i-1] = oshape[i];
   SHAPE_ASSIGN_CHECK(*in_attrs, 0, ret);
-  return shape_is_known(ret);
+  return shape_is_known(in_attrs->at(0)) && shape_is_known(out_attrs->at(0));
 }
 
 // Currently MKLDNN only supports step = 1 or step has no value
@@ -728,7 +728,7 @@ inline bool SliceOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   const mxnet::TShape& dshape = (*in_attrs)[0];
-  if (!shape_is_known(dshape)) return false;
+  if (!mxnet::ndim_is_known(dshape)) return false;
   const SliceParam& param = nnvm::get<SliceParam>(attrs.parsed);
   mxnet::TShape oshape = dshape;
 
@@ -1119,9 +1119,9 @@ inline void GetSliceAxisParams(const SliceAxisParam& param, const mxnet::TShape&
                            int* axis, index_t* begin, index_t* end) {
   *axis = param.axis;
   if (*axis < 0) {
-    *axis += static_cast<int>(ishape.ndim());
+    *axis += ishape.ndim();
   }
-  CHECK(*axis < static_cast<int>(ishape.ndim()) && *axis >= 0) <<
+  CHECK(*axis < ishape.ndim() && *axis >= 0) <<
     "Transformed axis must be smaller than the source ndim and larger than zero! Recieved axis=" <<
     param.axis << ", src_ndim=" << ishape.ndim() << ", transformed axis=" << *axis;
   index_t axis_size = static_cast<index_t>(ishape[*axis]);
@@ -1130,7 +1130,7 @@ inline void GetSliceAxisParams(const SliceAxisParam& param, const mxnet::TShape&
   if (*begin < 0) {
     *begin += axis_size;
   }
-  if (axis_size) {
+  if (axis_size > 0) {
     if (!static_cast<bool>(param.end)) {
       *end = axis_size;
     } else {
@@ -2595,7 +2595,7 @@ inline bool SplitOpShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1U);
   mxnet::TShape dshape = in_attrs->at(split_enum::kData);
   mxnet::TShape ishape = in_attrs->at(split_enum::kData);
-  if (!shape_is_known(dshape)) return false;
+  if (!mxnet::ndim_is_known(dshape)) return false;
   if (param.axis >= 0) {
     CHECK_LT(static_cast<size_t>(param.axis), dshape.ndim());
   } else {
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 1f8fc993b3fc..d44f1113e788 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -2265,7 +2265,7 @@ def test_reshape_new(src_shape, shape_args, reverse, dst_shape):
 
         for i in range(len(src_shape)):
             holdout_src_shape = list(src_shape)
-            holdout_src_shape[i] = -1
+            holdout_src_shape[i] = 0
             holdout_src_shape = tuple(holdout_src_shape)
             net = mx.sym.Variable('data')
             net = mx.sym.elemwise_add(net.reshape(shape_args, reverse=reverse), mx.sym.ones(shape=dst_shape))
@@ -4403,7 +4403,8 @@ def test_invalid_reps():
         assert_exception(mx.nd.tile, MXNetError, data, (1, 0, 3))
 
     test_normal_case()
-    test_empty_tensor()
+    with mx.numpy.enable_np_comp():
+        test_empty_tensor()
     test_empty_reps()
     test_tile_backward()
     test_tile_numeric_gradient()
@@ -4463,7 +4464,8 @@ def test_zero_depth():
     test_normal_case(index_type=np.float64)
     test_normal_case(index_type=np.float32)
     test_normal_case(index_type=np.float16)
-    test_empty_indices()
+    with mx.numpy.enable_np_comp():
+        test_empty_indices()
     test_zero_depth()
 
 

From 098d189cf71b1bcfa611691815a5179cf3b25588 Mon Sep 17 00:00:00 2001
From: Da Zheng <zhengda1936@gmail.com>
Date: Mon, 25 Mar 2019 21:08:41 -0700
Subject: [PATCH 05/32] Fix a bug to pass the test in test_contrib_rnn (#14520)

* fix.

* remove type conversion.

* remove type cast.
---
 src/common/utils.h                       | 1 -
 src/ndarray/ndarray_function.cc          | 6 ++++--
 src/operator/nn/mkldnn/mkldnn_base-inl.h | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/common/utils.h b/src/common/utils.h
index f3df2e15ec32..4843d7e06b7b 100644
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -752,7 +752,6 @@ inline void ConvertToNumpyShape(mxnet::TShape* shape) {
     *shape = mxnet::TShape();  // unknown shape ndim = -1
   } else {
     for (int j = 0; j < shape->ndim(); ++j) {
-      CHECK_GE((*shape)[j], 0) << "Legacy shape cannot have dim size < 0";
       if ((*shape)[j] == 0) {  // legacy shape dim_size = 0 means unknown
         (*shape)[j] = -1;  // unknown dim size = -1
       }
diff --git a/src/ndarray/ndarray_function.cc b/src/ndarray/ndarray_function.cc
index a613d5a3decc..8f72bc259afc 100644
--- a/src/ndarray/ndarray_function.cc
+++ b/src/ndarray/ndarray_function.cc
@@ -210,8 +210,6 @@ void ElementwiseSumContainsDnsImpl(mshadow::Stream<cpu>* s,
     Kernel<set_zero, cpu>::Launch(s, out_data.Size(), out_data.dptr<DType>());
     for (size_t i = 0; i < nds.size(); ++i) {
       const NDArray& nd = nds[i];
-      const nnvm::dim_t num_rows = nd.shape()[0];
-      const nnvm::dim_t num_cols = nd.shape()[1];
       const TBlob& nd_data = nd.data();
 
       if (i == 0) {
@@ -234,6 +232,8 @@ void ElementwiseSumContainsDnsImpl(mshadow::Stream<cpu>* s,
         case kCSRStorage: {
           const TBlob& nd_indices = nd.aux_data(csr::kIdx);
           const TBlob& nd_indptr = nd.aux_data(csr::kIndPtr);
+          const nnvm::dim_t num_rows = nd.shape()[0];
+          const nnvm::dim_t num_cols = nd.shape()[1];
           MSHADOW_IDX_TYPE_SWITCH(nd_indices.type_flag_, IType, {  // indices type
             MSHADOW_IDX_TYPE_SWITCH(nd_indptr.type_flag_, CType, {  // indptr type
               if (nd.storage_initialized()) {
@@ -248,6 +248,8 @@ void ElementwiseSumContainsDnsImpl(mshadow::Stream<cpu>* s,
         }
         case kRowSparseStorage: {
           const TBlob& nd_indices = nd.aux_data(rowsparse::kIdx);
+          const nnvm::dim_t num_rows = nd.shape()[0];
+          const nnvm::dim_t num_cols = nd.shape()[1];
           MSHADOW_IDX_TYPE_SWITCH(nd_indices.type_flag_, IType, {  // indices type
             if (nd.storage_initialized()) {
               const nnvm::dim_t nz_rows = nd_indices.Size();
diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h
index a460e33fa548..3da3f23d7683 100644
--- a/src/operator/nn/mkldnn/mkldnn_base-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h
@@ -464,7 +464,7 @@ mkldnn::memory::primitive_desc GetPrimitiveDesc(mkldnn::memory::primitive_desc p
                                                 mkldnn_memory_format_t format);
 
 inline bool same_shape(const mxnet::TShape &shape, const mkldnn_dims_t dims, int ndims) {
-  if (shape.ndim() != (size_t)ndims)
+  if (shape.ndim() != ndims)
     return false;
   for (int i = 0; i < ndims; i++)
     if (shape[i] != dims[i])

From 9c77e3e4cbf35c7e96abeb0d1fc08894e61ed4fd Mon Sep 17 00:00:00 2001
From: Junru Shao <junrushao1994@gmail.com>
Date: Tue, 26 Mar 2019 17:07:13 -0700
Subject: [PATCH 06/32] [numpy] Fix test_dynamic_shape.test_dynamic_shape
 (#14538)

* Initial commit

* Address comments from Jun
---
 src/c_api/c_api_common.h           | 5 +++--
 src/imperative/imperative_utils.cc | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h
index 329dc9adc7cf..55608b950866 100644
--- a/src/c_api/c_api_common.h
+++ b/src/c_api/c_api_common.h
@@ -91,8 +91,9 @@ struct MXAPIThreadLocalEntry {
     data->resize(shapes.size());
     size_t size = 0;
     for (const auto& s : shapes) {
-      if (s.ndim() > 0);
-      size += s.ndim();
+      if (s.ndim() > 0) {
+        size += s.ndim();
+      }
     }
     buffer->resize(size);
     int *ptr = buffer->data();
diff --git a/src/imperative/imperative_utils.cc b/src/imperative/imperative_utils.cc
index 6cb4a70324b5..733a47bfe6c1 100644
--- a/src/imperative/imperative_utils.cc
+++ b/src/imperative/imperative_utils.cc
@@ -19,6 +19,7 @@
 
 #include "./imperative_utils.h"
 #include "./cached_op.h"
+#include "../operator/operator_common.h"
 
 namespace mxnet {
 namespace imperative {
@@ -186,7 +187,7 @@ void NaiveRunGraph(
       Imperative::Get()->InvokeOp(ctx, node.source->attrs, ndinputs, ndoutputs,
                                   req, dispatch_mode, state);
       for (size_t j = 0; j < ndoutputs.size(); ++j) {
-        if (ndoutputs[j]->shape().ndim() == 0) {
+        if (mxnet::op::shape_is_none(ndoutputs[j]->shape())) {
           ndoutputs[j]->WaitToRead();
           ndoutputs[j]->SetShapeFromChunk();
         }

From ebc7d4da78d932e681788a83755762a36ff8e154 Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Wed, 27 Mar 2019 13:49:59 -0700
Subject: [PATCH 07/32] [numpy] Fix numpy import in python2 (#14537)

* Fix several test failures

* Fix subgraph op infer shape

* Fix sparse slice

* Fix deconv infer shape

* Fix numpy import compatibility problem in python2
---
 python/mxnet/ndarray/_internal.py     |  2 --
 python/mxnet/ndarray/contrib.py       |  1 +
 python/mxnet/ndarray/register.py      |  7 ++--
 python/mxnet/symbol/_internal.py      |  2 --
 python/mxnet/symbol/register.py       |  7 ++--
 src/common/utils.h                    |  5 +++
 src/operator/leaky_relu-inl.h         |  4 +--
 src/operator/nn/deconvolution-inl.h   | 12 ++++---
 src/operator/nn/deconvolution.cc      | 50 ++++++++++++++++++++-------
 src/operator/tensor/matrix_op-inl.h   |  2 +-
 tests/python/unittest/test_ndarray.py |  6 +++-
 11 files changed, 66 insertions(+), 32 deletions(-)

diff --git a/python/mxnet/ndarray/_internal.py b/python/mxnet/ndarray/_internal.py
index 5f3ce976dbc5..8045d9bd2b14 100644
--- a/python/mxnet/ndarray/_internal.py
+++ b/python/mxnet/ndarray/_internal.py
@@ -20,8 +20,6 @@
 import os as _os
 import sys as _sys
 
-import numpy as np
-
 try:
     if int(_os.environ.get("MXNET_ENABLE_CYTHON", True)) == 0:
         from .._ctypes.ndarray import NDArrayBase, CachedOp
diff --git a/python/mxnet/ndarray/contrib.py b/python/mxnet/ndarray/contrib.py
index 74c355dc1288..1718a2c68d13 100644
--- a/python/mxnet/ndarray/contrib.py
+++ b/python/mxnet/ndarray/contrib.py
@@ -18,6 +18,7 @@
 # coding: utf-8
 # pylint: disable=wildcard-import, unused-wildcard-import,redefined-outer-name
 """Contrib NDArray API of MXNet."""
+from __future__ import absolute_import
 import math
 import numpy as np
 from ..context import current_context
diff --git a/python/mxnet/ndarray/register.py b/python/mxnet/ndarray/register.py
index 05d7f17a8fc1..1ccf228698ba 100644
--- a/python/mxnet/ndarray/register.py
+++ b/python/mxnet/ndarray/register.py
@@ -16,9 +16,10 @@
 # under the License.
 
 """Register backend ops in mxnet.ndarray namespace"""
+from __future__ import absolute_import
 import os as _os
 import ctypes
-import numpy as np  # pylint: disable=unused-import
+import numpy as _np  # pylint: disable=unused-import
 
 from ._internal import NDArrayBase, _imperative_invoke # pylint: disable=unused-import
 from ..ndarray_doc import _build_doc
@@ -103,7 +104,7 @@ def %s(*%s, **kwargs):"""%(func_name, arr_name))
             if dtype_name is not None:
                 code.append("""
     if '%s' in kwargs:
-        kwargs['%s'] = np.dtype(kwargs['%s']).name"""%(
+        kwargs['%s'] = _np.dtype(kwargs['%s']).name"""%(
             dtype_name, dtype_name, dtype_name))
             code.append("""
     _ = kwargs.pop('name', None)
@@ -136,7 +137,7 @@ def %s(%s):"""%(func_name, ', '.join(signature)))
                 code.append("""
     if %s is not _Null:
         keys.append('%s')
-        vals.append(np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name))
+        vals.append(_np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name))
 
     if not signature_only:
         code.append("""
diff --git a/python/mxnet/symbol/_internal.py b/python/mxnet/symbol/_internal.py
index 53fc684008cf..7e9787e32b1c 100644
--- a/python/mxnet/symbol/_internal.py
+++ b/python/mxnet/symbol/_internal.py
@@ -22,8 +22,6 @@
 import sys as _sys
 import os as _os
 
-import numpy as np
-
 try:
     if int(_os.environ.get("MXNET_ENABLE_CYTHON", True)) == 0:
         from .._ctypes.symbol import SymbolBase, _set_symbol_class
diff --git a/python/mxnet/symbol/register.py b/python/mxnet/symbol/register.py
index 15c8e5e1fa68..ac59f8b97f15 100644
--- a/python/mxnet/symbol/register.py
+++ b/python/mxnet/symbol/register.py
@@ -17,9 +17,10 @@
 
 # pylint: disable=unused-import
 """Register backend ops in mxnet.symbol namespace."""
+from __future__ import absolute_import
 import os as _os
 import ctypes
-import numpy as np
+import numpy as _np
 
 from . import _internal
 from ._internal import SymbolBase, _symbol_creator
@@ -109,7 +110,7 @@ def %s(*%s, **kwargs):"""%(func_name, arr_name))
             if dtype_name is not None:
                 code.append("""
     if '%s' in kwargs:
-        kwargs['%s'] = np.dtype(kwargs['%s']).name"""%(
+        kwargs['%s'] = _np.dtype(kwargs['%s']).name"""%(
             dtype_name, dtype_name, dtype_name))
             code.append("""
     attr = kwargs.pop('attr', None)
@@ -175,7 +176,7 @@ def %s(%s):"""%(func_name, ', '.join(signature)))
                 code.append("""
     if %s is not _Null:
         _keys.append('%s')
-        _vals.append(np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name))
+        _vals.append(_np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name))
 
             code.append("""
     if not hasattr(NameManager._current, "value"):
diff --git a/src/common/utils.h b/src/common/utils.h
index 4843d7e06b7b..4fb398d883a6 100644
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -746,6 +746,11 @@ inline void ParallelCopy(DType* dst, const DType* src, index_t size) {
  * 4. -1 dim size means the dimension's size is unknown.
  * so that operator's infer shape function can work in backend.
  * \param shape to be converted.
+ * Note: It is possible that the shape to be converted is already
+ * numpy compatible. For example, when a subgraph operator's infer
+ * shape function is called from the infer shape pass of the whole
+ * graph, its input/output shapes have been converted to numpy
+ * compatible shapes.
  */
 inline void ConvertToNumpyShape(mxnet::TShape* shape) {
   if (shape->ndim() == 0) {  // legacy shape ndim = 0 means unknown
diff --git a/src/operator/leaky_relu-inl.h b/src/operator/leaky_relu-inl.h
index aef990010818..7f8638630145 100644
--- a/src/operator/leaky_relu-inl.h
+++ b/src/operator/leaky_relu-inl.h
@@ -355,10 +355,10 @@ class LeakyReLUProp : public OperatorProperty {
       CHECK_EQ(in_shape->size(), 1U) << "Input:[data]";
     }
     const mxnet::TShape &dshape = in_shape->at(leakyrelu::kData);
-    if (dshape.ndim() == 0) return false;
+    if (!mxnet::ndim_is_known(dshape)) return false;
     if (param_.act_type == leakyrelu::kPReLU) {
       const mxnet::TShape &gshape = in_shape->at(leakyrelu::kGamma);
-      if (gshape.ndim() == 0) {
+      if (!mxnet::ndim_is_known(gshape)) {
         in_shape->at(leakyrelu::kGamma) = mxnet::TShape(Shape1(dshape[1]));
       }
       if (dshape == gshape) {
diff --git a/src/operator/nn/deconvolution-inl.h b/src/operator/nn/deconvolution-inl.h
index b28e47818392..5f3137f0dcb8 100644
--- a/src/operator/nn/deconvolution-inl.h
+++ b/src/operator/nn/deconvolution-inl.h
@@ -134,11 +134,13 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
       for (size_t i = 0; i < ndim; i++) {
         // input.ndim() can be larger than ndim, in case that the complete input
         // shape was passed and not only the ndim last ones
-        o_pad[i] = stride[i] * (input[(input_ndim - ndim) + i] - 1) + DilatedKernelSize(i);
-        CHECK_GE(o_pad[i], target_shape[i]) << "too big target shape";
-        o_pad[i] -= target_shape[i];
-        o_adj[i] = o_pad[i] % 2;
-        o_pad[i] = (o_pad[i] + 1) / 2;
+        if (mxnet::dim_size_is_known(input, input_ndim - ndim + i)) {
+          o_pad[i] = stride[i] * (input[(input_ndim - ndim) + i] - 1) + DilatedKernelSize(i);
+          CHECK_GE(o_pad[i], target_shape[i]) << "too big target shape";
+          o_pad[i] -= target_shape[i];
+          o_adj[i] = o_pad[i] % 2;
+          o_pad[i] = (o_pad[i] + 1) / 2;
+        }
       }
     } else {
       for (size_t i = 0; i < ndim; i++) {
diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc
index d8c91f7f96c8..09b255d009e0 100644
--- a/src/operator/nn/deconvolution.cc
+++ b/src/operator/nn/deconvolution.cc
@@ -54,7 +54,7 @@ static bool DeconvolutionShape(const nnvm::NodeAttrs& attrs,
   }
   out_shape->resize(1, mxnet::TShape());
   const mxnet::TShape &dshape = (*in_shape)[deconv::kData];
-  if (!shape_is_known(dshape)) return false;
+  if (!mxnet::ndim_is_known(dshape)) return false;
 
   if (param_.kernel.ndim() == 1) {
     // 1d conv
@@ -90,8 +90,12 @@ static bool DeconvolutionShape(const nnvm::NodeAttrs& attrs,
     Shape<3> oshape;
     oshape[0] = dshape_ncw[0];
     oshape[1] = param_.num_filter;
-    oshape[2] = param_.stride[0] * (dshape_ncw[2] - 1) +
-      dilated_ksize_x - 2 * o_pad[0] + o_adj[0];
+    if (mxnet::dim_size_is_known(dshape_ncw[2])) {
+      oshape[2] = param_.stride[0] * (dshape_ncw[2] - 1) +
+          dilated_ksize_x - 2 * o_pad[0] + o_adj[0];
+    } else {
+      oshape[2] = -1;
+    }
 
     if (param_.target_shape.ndim() > 0) {
       if (param_.target_shape[0] > 0) {
@@ -141,10 +145,18 @@ static bool DeconvolutionShape(const nnvm::NodeAttrs& attrs,
     Shape<4> oshape;
     oshape[0] = dshape_nchw[0];
     oshape[1] = param_.num_filter;
-    oshape[2] = param_.stride[0] * (dshape_nchw[2] - 1) +
-      dilated_ksize_y - 2 * o_pad[0] + o_adj[0];
-    oshape[3] = param_.stride[1] * (dshape_nchw[3] - 1) +
-      dilated_ksize_x - 2 * o_pad[1] + o_adj[1];
+    if (mxnet::dim_size_is_known(dshape_nchw[2])) {
+      oshape[2] = param_.stride[0] * (dshape_nchw[2] - 1) +
+          dilated_ksize_y - 2 * o_pad[0] + o_adj[0];
+    } else {
+      oshape[2] = -1;
+    }
+    if (mxnet::dim_size_is_known(dshape_nchw[3])) {
+      oshape[3] = param_.stride[1] * (dshape_nchw[3] - 1) +
+          dilated_ksize_x - 2 * o_pad[1] + o_adj[1];
+    } else {
+      oshape[3] = -1;
+    }
 
     if (param_.target_shape.ndim() > 1) {
       if (param_.target_shape[0] > 0) {
@@ -203,12 +215,24 @@ static bool DeconvolutionShape(const nnvm::NodeAttrs& attrs,
     Shape<5> oshape;
     oshape[0] = dshape_ncdhw[0];
     oshape[1] = param_.num_filter;
-    oshape[2] = param_.stride[0] * (dshape_ncdhw[2] - 1) +
-      dilated_ksize_d - 2 * o_pad[0] + o_adj[0];
-    oshape[3] = param_.stride[1] * (dshape_ncdhw[3] - 1) +
-      dilated_ksize_y - 2 * o_pad[1] + o_adj[1];
-    oshape[4] = param_.stride[2] * (dshape_ncdhw[4] - 1) +
-      dilated_ksize_x - 2 * o_pad[2] + o_adj[2];
+    if (mxnet::dim_size_is_known(dshape_ncdhw[2])) {
+      oshape[2] = param_.stride[0] * (dshape_ncdhw[2] - 1) +
+          dilated_ksize_d - 2 * o_pad[0] + o_adj[0];
+    } else {
+      oshape[2] = -1;
+    }
+    if (mxnet::dim_size_is_known(dshape_ncdhw[3])) {
+      oshape[3] = param_.stride[1] * (dshape_ncdhw[3] - 1) +
+          dilated_ksize_y - 2 * o_pad[1] + o_adj[1];
+    } else {
+      oshape[3] = -1;
+    }
+    if (mxnet::dim_size_is_known(dshape_ncdhw[4])) {
+      oshape[4] = param_.stride[2] * (dshape_ncdhw[4] - 1) +
+          dilated_ksize_x - 2 * o_pad[2] + o_adj[2];
+    } else {
+      oshape[4] = -1;
+    }
 
     if (param_.target_shape.ndim() > 2) {
       if (param_.target_shape[0] > 0) {
diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index 9ce22020fab4..e91f904debb9 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -598,7 +598,7 @@ void SliceCsrImpl(const SliceParam &param, const OpContext& ctx,
   mxnet::TShape begin(N, -1), end(N, -1);
   for (int i = 0; i < N; ++i) {
     int s = 0;
-    if (param.begin[i]) {
+    if (i < param.begin.ndim() && param.begin[i]) {
       s = *param.begin[i];
       if (s < 0) s += ishape[i];
     }
diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py
index 2446107ad466..1bdb7d51df67 100644
--- a/tests/python/unittest/test_ndarray.py
+++ b/tests/python/unittest/test_ndarray.py
@@ -122,7 +122,11 @@ def test_ndarray_setitem():
 
     # numpy assignment for empty axis
     for trivial_shape in [(), (1,), (1, 1), (1, 1, 1)]:
-        x = mx.nd.zeros(trivial_shape)
+        if trivial_shape == tuple():
+            with mx.numpy.enable_np_comp():
+                x = mx.nd.zeros(trivial_shape)
+        else:
+            x = mx.nd.zeros(trivial_shape)
         x[:] = np.ones(trivial_shape)
         x_np = np.ones(trivial_shape, dtype=x.dtype)
         assert x.shape == trivial_shape

From c63555e5e39c9eaad53e2d8b9f99edfcf3c65d0c Mon Sep 17 00:00:00 2001
From: Tao Lv <tao.a.lv@intel.com>
Date: Fri, 29 Mar 2019 10:50:41 +0800
Subject: [PATCH 08/32] fix concat and slice (#14549)

---
 src/operator/nn/mkldnn/mkldnn_concat.cc | 12 ++++++------
 src/operator/nn/mkldnn/mkldnn_slice.cc  |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/operator/nn/mkldnn/mkldnn_concat.cc b/src/operator/nn/mkldnn/mkldnn_concat.cc
index 8e2b57781a18..7b266efc2a14 100644
--- a/src/operator/nn/mkldnn/mkldnn_concat.cc
+++ b/src/operator/nn/mkldnn/mkldnn_concat.cc
@@ -92,13 +92,13 @@ void MKLDNNConcatBackward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
   auto gz_mem = inputs[0].GetMKLDNNData();
   mkldnn::memory::primitive_desc gz_pd = gz_mem->get_primitive_desc();
   /* init the offset */
-  mkldnn::memory::dims offsets = {0, 0, 0, 0};
+  mkldnn::memory::dims offsets(outputs[0].shape().ndim());
+  for (auto &v : offsets) {
+    v = 0;
+  }
+
   for (int i = 0; i < num_in_data; i++) {
-    mkldnn::memory::dims diff_src_tz
-        = {static_cast<int>(outputs[i].shape()[0]),
-          static_cast<int>(outputs[i].shape()[1]),
-          static_cast<int>(outputs[i].shape()[2]),
-          static_cast<int>(outputs[i].shape()[3])};
+    mkldnn::memory::dims diff_src_tz(outputs[i].shape().begin(), outputs[i].shape().end());
     auto diff_src_mpd = outputs[i].GetMKLDNNData()->get_primitive_desc();
     auto gradi_mem_ = CreateMKLDNNMem(outputs[i], diff_src_mpd, req[i]);
     // create view from gy to gxs[i]
diff --git a/src/operator/nn/mkldnn/mkldnn_slice.cc b/src/operator/nn/mkldnn/mkldnn_slice.cc
index 3f3d82020598..96a8afdab6e2 100644
--- a/src/operator/nn/mkldnn/mkldnn_slice.cc
+++ b/src/operator/nn/mkldnn/mkldnn_slice.cc
@@ -42,7 +42,7 @@ MKLDNNSliceFwd::MKLDNNSliceFwd(const SliceParam &param,
   mkldnn::memory::dims offsets(N);
   for (uint32_t i = 0; i < N; ++i) {
     int s = 0;
-    if (param.begin[i]) {
+    if (i < param.begin.ndim() &&  param.begin[i]) {
       s = *param.begin[i];
       if (s < 0) s += ishape[i];
     }

From 48cf65949fa5ce4cf0c2b1a317eca42ffdc831f2 Mon Sep 17 00:00:00 2001
From: Tong He <hetong007@gmail.com>
Date: Tue, 2 Apr 2019 21:10:54 -0700
Subject: [PATCH 09/32] fix R-package (#14536)

---
 R-package/src/ndarray.cc |  4 ++--
 R-package/src/symbol.cc  | 18 +++++++++---------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/R-package/src/ndarray.cc b/R-package/src/ndarray.cc
index 94d24f3fb46b..d08671a3f026 100644
--- a/R-package/src/ndarray.cc
+++ b/R-package/src/ndarray.cc
@@ -179,8 +179,8 @@ Rcpp::RObject NDArrayPacker::CreateNDArrayPacker() {
 }
 
 Rcpp::Dimension NDArray::dim() const {
-  mx_uint ndim;
-  const mx_uint *pshape;
+  int ndim;
+  const int *pshape;
   MX_CALL(MXNDArrayGetShape(
       ptr_->handle, &ndim, &pshape));
   Rcpp::IntegerVector dat(pshape, pshape + ndim);
diff --git a/R-package/src/symbol.cc b/R-package/src/symbol.cc
index 031c9a254019..90ff9ef1dd67 100644
--- a/R-package/src/symbol.cc
+++ b/R-package/src/symbol.cc
@@ -167,8 +167,8 @@ Symbol::RObjectType Symbol::GetOutput(mx_uint index) const {
 
 // helper function to convert shape into Rcpp vector
 inline Rcpp::List BuildShapeData(mx_uint shape_size,
-                                 const mx_uint *shape_ndim,
-                                 const mx_uint **shape_data,
+                                 const int *shape_ndim,
+                                 const int **shape_data,
                                  const std::vector<std::string> &names) {
   Rcpp::List ret(shape_size);
   for (mx_uint i = 0; i < shape_size; ++i) {
@@ -185,7 +185,7 @@ SEXP Symbol::InferShape(const Rcpp::List& kwargs) const {
       << "Need to pass parameters in key=value style.\n";
   std::vector<std::string> keys = kwargs.names();
   std::vector<mx_uint> arg_ind_ptr(1, 0);
-  std::vector<mx_uint> arg_shape_data;
+  std::vector<int> arg_shape_data;
 
   for (size_t i = 0; i < kwargs.size(); ++i) {
     RCHECK(keys[i].length() != 0)
@@ -197,14 +197,14 @@ SEXP Symbol::InferShape(const Rcpp::List& kwargs) const {
   std::vector<const char*> c_keys = CKeys(keys);
 
   mx_uint in_shape_size;
-  const mx_uint *in_shape_ndim;
-  const mx_uint **in_shape_data;
+  const int *in_shape_ndim;
+  const int **in_shape_data;
   mx_uint out_shape_size;
-  const mx_uint *out_shape_ndim;
-  const mx_uint **out_shape_data;
+  const int *out_shape_ndim;
+  const int **out_shape_data;
   mx_uint aux_shape_size;
-  const mx_uint *aux_shape_ndim;
-  const mx_uint **aux_shape_data;
+  const int *aux_shape_ndim;
+  const int **aux_shape_data;
   int complete;
 
   MX_CALL(MXSymbolInferShape(

From b165b35c932c23e94556a2eecb712015d1e53736 Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Wed, 3 Apr 2019 13:40:11 -0700
Subject: [PATCH 10/32] Fix cpp package build after using new shape definition
 (#14554)

---
 cpp-package/include/mxnet-cpp/ndarray.hpp |  6 +++---
 cpp-package/include/mxnet-cpp/symbol.hpp  | 20 ++++++++++----------
 include/mxnet/tensor_blob.h               |  2 ++
 include/mxnet/tuple.h                     |  6 +++++-
 4 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/cpp-package/include/mxnet-cpp/ndarray.hpp b/cpp-package/include/mxnet-cpp/ndarray.hpp
index b667542bffb5..bf1d82ca33b4 100644
--- a/cpp-package/include/mxnet-cpp/ndarray.hpp
+++ b/cpp-package/include/mxnet-cpp/ndarray.hpp
@@ -397,11 +397,11 @@ inline size_t NDArray::Size() const {
 }
 
 inline std::vector<mx_uint> NDArray::GetShape() const {
-  const mx_uint *out_pdata;
-  mx_uint out_dim;
+  const int *out_pdata;
+  int out_dim;
   MXNDArrayGetShape(blob_ptr_->handle_, &out_dim, &out_pdata);
   std::vector<mx_uint> ret;
-  for (mx_uint i = 0; i < out_dim; ++i) {
+  for (int i = 0; i < out_dim; ++i) {
     ret.push_back(out_pdata[i]);
   }
   return ret;
diff --git a/cpp-package/include/mxnet-cpp/symbol.hpp b/cpp-package/include/mxnet-cpp/symbol.hpp
index aed963949060..d82b7abaf614 100644
--- a/cpp-package/include/mxnet-cpp/symbol.hpp
+++ b/cpp-package/include/mxnet-cpp/symbol.hpp
@@ -188,7 +188,7 @@ inline void Symbol::InferShape(
 
   std::vector<const char *> keys;
   std::vector<mx_uint> arg_ind_ptr;
-  std::vector<mx_uint> arg_shape_data;
+  std::vector<int> arg_shape_data;
 
   for (const auto &arg : arg_shapes) {
     keys.push_back(arg.first.c_str());
@@ -200,14 +200,14 @@ inline void Symbol::InferShape(
   arg_ind_ptr.push_back(arg_shape_data.size());
 
   mx_uint in_shape_size;
-  const mx_uint *in_shape_ndim;
-  const mx_uint **in_shape_data;
+  const int*in_shape_ndim;
+  const int **in_shape_data;
   mx_uint out_shape_size;
-  const mx_uint *out_shape_ndim;
-  const mx_uint **out_shape_data;
+  const int *out_shape_ndim;
+  const int **out_shape_data;
   mx_uint aux_shape_size;
-  const mx_uint *aux_shape_ndim;
-  const mx_uint **aux_shape_data;
+  const int *aux_shape_ndim;
+  const int **aux_shape_data;
   int complete;
 
   CHECK_EQ(MXSymbolInferShape(GetHandle(), keys.size(), keys.data(),
@@ -221,19 +221,19 @@ inline void Symbol::InferShape(
   if (complete) {
     for (mx_uint i = 0; i < in_shape_size; ++i) {
       in_shape->push_back(std::vector<mx_uint>());
-      for (mx_uint j = 0; j < in_shape_ndim[i]; ++j) {
+      for (int j = 0; j < in_shape_ndim[i]; ++j) {
         (*in_shape)[i].push_back(in_shape_data[i][j]);
       }
     }
     for (mx_uint i = 0; i < aux_shape_size; ++i) {
       aux_shape->push_back(std::vector<mx_uint>());
-      for (mx_uint j = 0; j < aux_shape_ndim[i]; ++j) {
+      for (int j = 0; j < aux_shape_ndim[i]; ++j) {
         (*aux_shape)[i].push_back(aux_shape_data[i][j]);
       }
     }
     for (mx_uint i = 0; i < out_shape_size; ++i) {
       out_shape->push_back(std::vector<mx_uint>());
-      for (mx_uint j = 0; j < out_shape_ndim[i]; ++j) {
+      for (int j = 0; j < out_shape_ndim[i]; ++j) {
         (*out_shape)[i].push_back(out_shape_data[i][j]);
       }
     }
diff --git a/include/mxnet/tensor_blob.h b/include/mxnet/tensor_blob.h
index 45d4c7fda639..a7a57266dab8 100755
--- a/include/mxnet/tensor_blob.h
+++ b/include/mxnet/tensor_blob.h
@@ -418,6 +418,8 @@ class TBlob {
 namespace dmlc {
 // Add a few patches to support mxnet::TShape in dmlc/parameter.
 DMLC_DECLARE_TYPE_NAME(mxnet::TShape, "Shape(tuple)");
+DMLC_DECLARE_TYPE_NAME(mxnet::Tuple<int>, "Shape(tuple)");
+DMLC_DECLARE_TYPE_NAME(mxnet::Tuple<dmlc::optional<int>>, "Shape(tuple)");
 DMLC_DECLARE_TYPE_NAME(nnvm::Tuple<int>, "Shape(tuple)");
 DMLC_DECLARE_TYPE_NAME(nnvm::Tuple<dmlc::optional<int>>, "Shape(tuple)");
 
diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h
index d83e843033e3..c5a358628ccd 100644
--- a/include/mxnet/tuple.h
+++ b/include/mxnet/tuple.h
@@ -236,7 +236,10 @@ class Tuple {
    */
   friend std::ostream &operator<<(std::ostream &os, const Tuple<ValueType> &t) {
     if (t.ndim() == -1) {
-      os << "UNKNOWN_SHAPE";
+      // If t is an unknown shape, return string "None".
+      // This is consistent with returning unknown shape in Python and generating
+      // C++ operator APIs by OpWrapperGenerator.py (defaultString) in cpp-package.
+      os << "None";
       return os;
     }
     os << '[';
@@ -727,6 +730,7 @@ struct hash<mxnet::TShape> {
 namespace dmlc {
 /*! \brief description for optional TShape */
 DMLC_DECLARE_TYPE_NAME(optional<mxnet::TShape>, "Shape or None");
+DMLC_DECLARE_TYPE_NAME(optional<mxnet::Tuple<int>>, "Shape or None");
 // avoid low version of MSVC
 #if !defined(_MSC_VER)
 template<typename T>

From bff0bdce6ff83e252ad055e168661f34895d053e Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Wed, 3 Apr 2019 20:17:23 -0700
Subject: [PATCH 11/32] Fix pooling_v1 and deformable_convolution param
 initialization (#14577)

* Fix pooling_v1 param initialization

* Fix deformable_convolution param initialization
---
 .../contrib/deformable_convolution-inl.h      |  8 ++---
 src/operator/pooling_v1-inl.h                 | 33 ++++++++++---------
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/src/operator/contrib/deformable_convolution-inl.h b/src/operator/contrib/deformable_convolution-inl.h
index 936df7f1bcd4..a7e22f548151 100644
--- a/src/operator/contrib/deformable_convolution-inl.h
+++ b/src/operator/contrib/deformable_convolution-inl.h
@@ -129,7 +129,7 @@ class DeformableConvolutionOp : public Operator {
     // calculate the shape of col_buffer
     mxnet::TShape col_buffer_shape(num_spatial_axes_ + 1, -1);
     col_buffer_shape[0] = conv_in_channels_ * param_.kernel.Size();
-    for (size_t i = 1; i < col_buffer_shape.ndim(); ++i) {
+    for (int i = 1; i < col_buffer_shape.ndim(); ++i) {
       col_buffer_shape[i] = out_data[0].shape_[i + 1];
     }
     // create a column buffer using workspace and col_buffer_shape
@@ -347,9 +347,9 @@ class DeformableConvolutionProp : public OperatorProperty {
     param_.Init(kwargs);
     if (param_.kernel.ndim() == 2) {
       param_.layout = param_.layout ? param_.layout.value() : mshadow::kNCHW;
-      if (mxnet::op::shape_is_none(param_.stride)) param_.stride = Shape2(1, 1);
-      if (mxnet::op::shape_is_none(param_.dilate)) param_.dilate = Shape2(1, 1);
-      if (mxnet::op::shape_is_none(param_.pad)) param_.pad = Shape2(0, 0);
+      if (param_.stride.ndim() == 0) param_.stride = Shape2(1, 1);
+      if (param_.dilate.ndim() == 0) param_.dilate = Shape2(1, 1);
+      if (param_.pad.ndim() == 0) param_.pad = Shape2(0, 0);
     } else {
       LOG(FATAL) << "not implemented";
     }
diff --git a/src/operator/pooling_v1-inl.h b/src/operator/pooling_v1-inl.h
index 21ba270c9d42..22a166cbb6cc 100644
--- a/src/operator/pooling_v1-inl.h
+++ b/src/operator/pooling_v1-inl.h
@@ -55,7 +55,7 @@ struct PoolingV1Param : public dmlc::Parameter<PoolingV1Param> {
   int pooling_convention;
   bool global_pool;
   DMLC_DECLARE_PARAMETER(PoolingV1Param) {
-    DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape(0))
     .enforce_nonzero()
     .describe("pooling kernel size: (y, x) or (d, y, x)");
 
@@ -73,11 +73,11 @@ struct PoolingV1Param : public dmlc::Parameter<PoolingV1Param> {
     .add_enum("valid", pool_v1_enum::kValid)
     .describe("Pooling convention to be applied.");
 
-    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0))
     .enforce_nonzero()
     .describe("stride: for pooling (y, x) or (d, y, x)");
 
-    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0))
     .describe("pad for pooling: (y, x) or (d, y, x)");
   }
 };
@@ -217,19 +217,20 @@ class PoolingV1Prop : public OperatorProperty {
   void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
     using namespace mshadow;
     param_.Init(kwargs);
-    if (!param_.global_pool) {
-      if (param_.kernel.ndim() == 2) {
-        if (param_.stride.ndim() == 0) param_.stride = Shape2(1, 1);
-        if (param_.pad.ndim() == 0) param_.pad = Shape2(0, 0);
-      } else {
-        CHECK_EQ(param_.kernel.ndim(), 3U) << param_.kernel.ndim() << "D pooling not supported";
-        if (param_.stride.ndim() == 0) param_.stride = Shape3(1, 1, 1);
-        if (param_.pad.ndim() == 0) param_.pad = Shape3(0, 0, 0);
-      }
-      CHECK_EQ(param_.stride.ndim(), param_.kernel.ndim())
-        << "stride and kernel should have the same length";
-      CHECK_EQ(param_.pad.ndim(), param_.kernel.ndim())
-        << "pad and kernel should have the same length";
+    if (param_.kernel.ndim() == 1) {
+      if (param_.stride.ndim() == 0) param_.stride = Shape1(1);
+      if (param_.pad.ndim() == 0) param_.pad = Shape1(0);
+    } else if (param_.kernel.ndim() == 2) {
+      if (param_.stride.ndim() == 0) param_.stride = Shape2(1, 1);
+      if (param_.pad.ndim() == 0) param_.pad = Shape2(0, 0);
+    } else {
+        // ignore kernel size only if global_pool not assigned false
+        if (param_.global_pool == false) {
+          CHECK_EQ(param_.kernel.ndim(), 3U) << param_.kernel.ndim()
+              << "D pooling not supported";
+        }
+      if (param_.stride.ndim() == 0) param_.stride = Shape3(1, 1, 1);
+      if (param_.pad.ndim() == 0) param_.pad = Shape3(0, 0, 0);
     }
   }
 

From ebecad1a8107f7dd7df0571b3558ae2b9cc4ff67 Mon Sep 17 00:00:00 2001
From: Junru Shao <junrushao1994@gmail.com>
Date: Wed, 3 Apr 2019 20:17:48 -0700
Subject: [PATCH 12/32] [Numpy] Misc fix (#14612)

* [Numpy] Misc Fix

* fix build

* !shape_is_none => shape_is_known

* Address comments

* Fix
---
 include/mxnet/ndarray.h             |  2 +-
 src/c_api/c_predict_api.cc          |  1 +
 src/imperative/imperative_utils.h   |  3 ++-
 src/kvstore/gradient_compression.cc | 10 +++++-----
 src/ndarray/ndarray.cc              | 11 +++++++----
 src/ndarray/ndarray_function.h      |  2 +-
 6 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
index 2232ebe7be40..13fb42ce521e 100644
--- a/include/mxnet/ndarray.h
+++ b/include/mxnet/ndarray.h
@@ -956,7 +956,7 @@ class NDArray {
     /*! \brief set the shape for ith aux data, and update storage shape if necessary */
     inline void set_aux_shape(const size_t i, const mxnet::TShape& shape) {
       aux_shapes[i] = shape;
-      if (storage_shape.ndim() > 0) {
+      if (storage_shape.ndim() >= 0) {
         if (storage_type == kRowSparseStorage && i == rowsparse::kIdx) {
           storage_shape[0] = shape[0];
         } else if (storage_type == kCSRStorage && i == csr::kIdx) {
diff --git a/src/c_api/c_predict_api.cc b/src/c_api/c_predict_api.cc
index 3b9f43d86079..7de23ef935ef 100644
--- a/src/c_api/c_predict_api.cc
+++ b/src/c_api/c_predict_api.cc
@@ -436,6 +436,7 @@ int MXPredGetOutputShape(PredictorHandle handle,
       << "Index exceed number of outputs";
 
   const mxnet::TShape& s = p->out_shapes[out_index];
+  CHECK_GE(s.ndim(), 0);
   p->out_shapes_buffer.resize(s.ndim());
   nnvm::ShapeTypeCast(s.begin(), s.end(), p->out_shapes_buffer.data());
   *shape_data = p->out_shapes_buffer.data();
diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h
index 6864428d2559..d058df4b3806 100644
--- a/src/imperative/imperative_utils.h
+++ b/src/imperative/imperative_utils.h
@@ -31,6 +31,7 @@
 #include "../common/utils.h"
 #include "../common/exec_utils.h"
 #include "../operator/nn/mkldnn/mkldnn_base-inl.h"
+#include "../operator/operator_common.h"
 
 #ifndef MXNET_IMPERATIVE_IMPERATIVE_UTILS_H_
 #define MXNET_IMPERATIVE_IMPERATIVE_UTILS_H_
@@ -196,7 +197,7 @@ inline void SetShapeType(const Context& ctx,
 
   for (size_t i = 0; i < outputs.size(); ++i) {
     NDArrayStorageType storage_type = static_cast<NDArrayStorageType>(out_storage_types[i]);
-    if (outputs[i]->is_none() || outputs[i]->shape().ndim() == 0) {
+    if (outputs[i]->is_none() || mxnet::op::shape_is_none(outputs[i]->shape())) {
       if (is_dynamic_shape_existing) {
         // once there is dynamic shape somewhere, we could not pre-determine the shape.
         *outputs[i] = NDArray(ctx, out_types[i]);
diff --git a/src/kvstore/gradient_compression.cc b/src/kvstore/gradient_compression.cc
index e4a06fa9a1f2..30aaec91e27f 100644
--- a/src/kvstore/gradient_compression.cc
+++ b/src/kvstore/gradient_compression.cc
@@ -100,9 +100,9 @@ int64_t GradientCompression::GetCompressedSize(const int64_t original_size) {
 
 void GradientCompression::Quantize(const mxnet::NDArray &from, mxnet::NDArray *to,
                   mxnet::NDArray *residual, const int priority) {
-  CHECK(from.shape().ndim() != 0) << "source operand has zero dimension shape";
-  CHECK(to->shape().ndim() != 0) << "destination operand has zero dimension shape";
-  CHECK(residual->shape().ndim() != 0) << "residual operand has zero dimension shape";
+  CHECK(shape_is_known(from.shape())) << "source operand has undefined shape";
+  CHECK(shape_is_known(to->shape())) << "destination operand has undefined shape";
+  CHECK(shape_is_known(residual->shape())) << "residual operand has undefined shape";
   const int a = from.ctx().dev_mask();
   const int b = to->ctx().dev_mask();
   const float threshold = threshold_;
@@ -137,8 +137,8 @@ void GradientCompression::Quantize(const mxnet::NDArray &from, mxnet::NDArray *t
 
 void GradientCompression::Dequantize(const mxnet::NDArray &from, mxnet::NDArray *to,
                                      const int priority) {
-  CHECK(from.shape().ndim() != 0) << "source operands has zero dimension shape";
-  CHECK(to->shape().ndim() != 0) << "destination operand has zero dimension shape";
+  CHECK(shape_is_known(from.shape())) << "source operand has undefined shape";
+  CHECK(shape_is_known(to->shape())) << "destination operand has undefined shape";
   const int a = from.ctx().dev_mask();
   const int b = to->ctx().dev_mask();
   const float threshold = threshold_;
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 04518e0feb77..604000028bf1 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -1191,8 +1191,8 @@ void CopyFromTo(const NDArray& from, const NDArray& to, int priority, bool is_op
   CHECK(from.shape() == to.shape())
       << "operands shape mismatch"
       << "from.shape = " << from.shape() << " to.shape=" << to.shape();
-  CHECK(from.shape().ndim() != 0)
-      << "source operands have zero dimension shape";
+  CHECK(!mxnet::op::shape_is_none(from.shape()))
+      << "source operands have undefined shape";
   // important: callback must always capture by value
   const Context from_ctx = from.ctx();
   const int a = from_ctx.dev_mask();
@@ -1663,7 +1663,7 @@ bool NDArray::LegacyLoad(dmlc::Stream *strm, const uint32_t magic) {
   // load shape
   mxnet::TShape shape;
   if (!LegacyTShapeLoad(strm, &shape, magic)) return false;
-  if (shape.ndim() == 0) {
+  if (mxnet::op::shape_is_none(shape)) {
     *this = NDArray(); return true;
   }
   // load context
@@ -1711,7 +1711,10 @@ bool NDArray::Load(dmlc::Stream *strm) {
   // load shape
   mxnet::TShape shape;
   if (!shape.Load(strm)) return false;
-  if (shape.ndim() == 0) {
+  if (!Imperative::Get()->is_np_comp()) {
+    common::ConvertToNumpyShape(&shape);
+  }
+  if (mxnet::op::shape_is_none(shape)) {
     *this = NDArray(); return true;
   }
 
diff --git a/src/ndarray/ndarray_function.h b/src/ndarray/ndarray_function.h
index 70b626dbb9b7..505bd205a8d5 100644
--- a/src/ndarray/ndarray_function.h
+++ b/src/ndarray/ndarray_function.h
@@ -40,7 +40,7 @@ namespace ndarray {
 struct BinaryBase {
   inline static mxnet::TShape GetShape(const mxnet::TShape &lshape, const mxnet::TShape &rshape) {
     CHECK(lshape == rshape) << "operands shape mismatch";
-    CHECK(lshape.ndim() != 0) << "source operand have zero dimension shape";
+    CHECK(!mxnet::op::shape_is_none(lshape)) << "source operand have zero dimension shape";
     return lshape;
   }
 };

From 4c35ade5d1247a82db2c25cf7ff4f09b8ae1e031 Mon Sep 17 00:00:00 2001
From: Junru Shao <junrushao1994@gmail.com>
Date: Wed, 3 Apr 2019 20:20:07 -0700
Subject: [PATCH 13/32] [Numpy] fix
 test_operator_gpu.test_upsampling_bilinear_with_type (#14557)

* Fix test_operator_gpu.test_upsampling_bilinear_with_type

* Address comments
---
 src/operator/nn/deconvolution-inl.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/operator/nn/deconvolution-inl.h b/src/operator/nn/deconvolution-inl.h
index 5f3137f0dcb8..e82a073ea08d 100644
--- a/src/operator/nn/deconvolution-inl.h
+++ b/src/operator/nn/deconvolution-inl.h
@@ -143,9 +143,9 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
         }
       }
     } else {
-      for (size_t i = 0; i < ndim; i++) {
-        o_pad[i] = pad[i];
-        o_adj[i] = adj[i];
+      for (int i = 0; i < (int) ndim; i++) {
+        o_pad[i] = i < pad.ndim() ? pad[i] : 0;
+        o_adj[i] = i < adj.ndim() ? adj[i] : 0;
       }
     }
   }

From cdc9023b4e167794452dd5229cc37aa7dd0d39a8 Mon Sep 17 00:00:00 2001
From: Yizhi Liu <liuyizhi@apache.org>
Date: Fri, 5 Apr 2019 10:21:55 -0700
Subject: [PATCH 14/32] [Numpy] Java/Scala modification (#14625)

* modify jni to support 0 dim/shape

* fix transpose axes default value
---
 .../scala/org/apache/mxnet/Executor.scala     | 142 ++++-------
 .../main/scala/org/apache/mxnet/LibInfo.scala |  32 ++-
 .../main/scala/org/apache/mxnet/NDArray.scala |  10 +-
 .../scala/org/apache/mxnet/NumpyScope.scala   |  58 +++++
 .../main/scala/org/apache/mxnet/Symbol.scala  |  38 ++-
 .../org/apache/mxnet/NumpyScopeSuite.scala    |  34 +++
 .../apache/mxnet/utils/CToScalaUtils.scala    |   3 +-
 .../native/org_apache_mxnet_native_c_api.cc   | 239 +++++++++++++++---
 .../native/org_apache_mxnet_native_c_api.h    |  32 +++
 src/operator/tensor/matrix_op-inl.h           |   6 +-
 10 files changed, 452 insertions(+), 142 deletions(-)
 create mode 100644 scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala
 create mode 100644 scala-package/core/src/test/scala/org/apache/mxnet/NumpyScopeSuite.scala

diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala b/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala
index aec44023a5d3..f51424b7edf6 100644
--- a/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala
+++ b/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala
@@ -61,10 +61,6 @@ class Executor private[mxnet](private[mxnet] val handle: ExecutorHandle,
   protected var monitorCallback: MXMonitorCallback = null
   private val logger: Logger = LoggerFactory.getLogger(classOf[Executor])
 
-  private[mxnet] var ownsArgArrays = false
-  private[mxnet] var ownsGradArrays = false
-  private[mxnet] var ownsAuxArrays = false
-
   override def nativeAddress: CPtrAddress = handle
   override def nativeDeAllocator: (CPtrAddress => Int) = _LIB.mxExecutorFree
   // cannot determine the off-heap size of this object
@@ -75,17 +71,12 @@ class Executor private[mxnet](private[mxnet] val handle: ExecutorHandle,
     if (!super.isDisposed) {
       super.dispose()
       outputs.foreach(o => o.dispose())
-      // Symbol.bind clones symbol when creating the executor so we need to dispose of the clone
-      symbol.dispose()
-      if (ownsArgArrays && argArrays != null) {argArrays.foreach(a => a.dispose())}
-      if (ownsGradArrays && gradArrays != null) {gradArrays.foreach(
+      if (argArrays != null) {argArrays.foreach(a => a.dispose())}
+      if (gradArrays != null) {gradArrays.foreach(
         // Symbol will sometimes fill this with nulls so we've got to check the elements too
         a => if (a != null) {a.dispose()})
       }
-      if (ownsAuxArrays && auxArrays != null) {auxArrays.foreach(a => a.dispose())}
-      if (_argDict != null) {_argDict.foreach(a => a._2.dispose())}
-      if (_gradDict != null) {_gradDict.foreach(a => a._2.dispose())}
-      if (_auxDict != null) {_auxDict.foreach(a => a._2.dispose())}
+      if (auxArrays != null) {auxArrays.foreach(a => a.dispose())}
     }
   }
 
@@ -104,95 +95,58 @@ class Executor private[mxnet](private[mxnet] val handle: ExecutorHandle,
    */
   def reshape(partialShaping: Boolean = false, allowUpSizing: Boolean = false,
     kwargs: Map[String, Shape]): Executor = {
-    var setArgOwner = false
-    var setAuxOwner = false
-    var setGradOwner = false
-     val (argShapes, _, auxShapes) = this.symbol.inferShape(kwargs)
-    // TODO: more precise error message should be provided by backend
-    require(argShapes != null, "Shape inference failed." +
-      s"Known shapes are $kwargs for symbol arguments ${symbol.listArguments()} " +
-      s"and aux states ${symbol.listAuxiliaryStates()}")
-
-    var newArgDict = Map[String, NDArray]()
-    var newGradDict = Map[String, NDArray]()
 
-    this.symbol.listArguments().zipWithIndex.foreach { case (name, i) =>
-      val newShape = argShapes(i)
-      val arr = this.argArrays(i)
-      val dArr = if (this.gradArrays == null) null else this.gradArrays(i)
-      if (partialShaping || kwargs.contains(name) || newShape.equals(arr.shape)) {
-        if (newShape.product > arr.shape.product) {
-          require(allowUpSizing, s"New shape of arg:$name larger than original. " +
-                        "First making a big executor and then down sizing it " +
-                        "is more efficient than the reverse." +
-                        "If you really want to up size, set allowUpSizing = true " +
-                        "to enable allocation of new arrays.")
-          newArgDict = newArgDict + (name -> NDArray.empty(newShape, arr.context, arr.dtype))
-          setArgOwner = true
-          if (dArr != null) {
-            newGradDict = newGradDict + (name -> NDArray.empty(newShape, dArr.context, dArr.dtype))
-            setGradOwner = true
-          }
-        } else {
-          newArgDict = newArgDict + (name -> arr.reshape(newShape.toArray))
-          if (dArr != null) {
-            newGradDict = newGradDict + (name -> dArr.reshape(newShape.toArray))
-          }
-        }
-      } else {
-        throw new  AssertionError(s"Shape of unspecified array arg:$name changed." +
-                    "This can cause the new executor to not share parameters " +
-                    "with the old one. Please check for error in network." +
-                    "If this is intended, set partialShaping = true to suppress this warning.")
-      }
-    }
+    val providedArgShapeNames = kwargs.keys
+    val providedArgShapeData = kwargs.values.flatMap(_.toVector)
+    val providedArgShapeIdx = kwargs.values.scanLeft(0)((sum, shape) => sum + shape.size)
 
-    var newAuxDict = Map[String, NDArray]()
-    val zip3 = (this.symbol.listAuxiliaryStates(), auxShapes, this.auxArrays).zipped
-    zip3.foreach { case (name, newShape, arr) =>
-      if (partialShaping || newShape.equals(arr.shape)) {
-        if (newShape.product > arr.shape.product) {
-          require(allowUpSizing, s"New shape of aux:$name larger than original. " +
-                        "First making a big executor and then down sizing it " +
-                        "is more efficient than the reverse." +
-                        "If you really want to up size, set allowUpSizing = true " +
-                        "to enable allocation of new arrays.")
-          newAuxDict = newAuxDict + (name -> NDArray.empty(newShape, arr.context))
-          setAuxOwner = true
-        } else {
-          newAuxDict = newAuxDict + (name -> arr.reshape(newShape.toArray))
-        }
-      } else {
-        throw new  AssertionError(s"Shape of unspecified array aux:$name changed." +
-                  "This can cause the new executor to not share parameters " +
-                  "with the old one. Please check for error in network." +
-                  "If this is intended, set partialShaping = true to suppress this warning.")
-      }
+    val ctxMapKeys = if (_group2ctx != null) _group2ctx.keys.toArray else Array.empty[String]
+    val ctxMapDevTypes = if (_group2ctx != null) {
+      _group2ctx.values.map(_.deviceTypeid).toArray
+    } else {
+      Array.empty[Int]
     }
-    val reshapedExecutor = if (this._gradsReq.isInstanceOf[Seq[_]]) {
-      this.symbol.bind(this._ctx,
-                          newArgDict,
-                          newGradDict,
-                          this._gradsReq.asInstanceOf[Seq[String]],
-                          newAuxDict,
-                          this._group2ctx,
-                          this)
+    val ctxMapDevIds = if (_group2ctx != null) {
+      _group2ctx.values.map(_.deviceId).toArray
     } else {
-      this.symbol.bind(this._ctx,
-                          newArgDict,
-                          newGradDict,
-                          this._gradsReq.asInstanceOf[Map[String, String]],
-                          newAuxDict,
-                          this._group2ctx,
-                          this)
+      Array.empty[Int]
     }
 
-    // This method has created new NDArrays that will need to be managed by the new Executor
-    if (setArgOwner) reshapedExecutor.ownsArgArrays = true
-    if (setGradOwner) reshapedExecutor.ownsGradArrays = true
-    if (setAuxOwner) reshapedExecutor.ownsAuxArrays = true
+    val inArgs = ArrayBuffer.empty[NDArrayHandle]
+    val argGrads = ArrayBuffer.empty[NDArrayHandle]
+    val auxStates = ArrayBuffer.empty[NDArrayHandle]
+    val outHandle = new ExecutorHandleRef()
+
+    checkCall(_LIB.mxExecutorReshape(
+              if (partialShaping) 1 else 0,
+              if (allowUpSizing) 1 else 0,
+              _ctx.deviceTypeid,
+              _ctx.deviceId,
+              ctxMapKeys.toArray,
+              ctxMapDevTypes.toArray,
+              ctxMapDevIds.toArray,
+              providedArgShapeNames.toArray,
+              providedArgShapeData.toArray,
+              providedArgShapeIdx.toArray,
+              inArgs,
+              argGrads,
+              auxStates,
+              this.handle,
+              outHandle))
+
+    val argArrays = inArgs.map(new NDArray(_)).toArray
+    val gradArrays = argGrads.map(handle =>
+      if (handle == 0) null else new NDArray(handle)).toArray
+    val auxArrays = auxStates.map(new NDArray(_)).toArray
 
-    reshapedExecutor
+    val executor = new Executor(outHandle.value, this.symbol)
+    executor._ctx = this._ctx
+    executor._gradsReq = this._gradsReq
+    executor._group2ctx = this._group2ctx
+    executor.argArrays = argArrays
+    executor.gradArrays = gradArrays
+    executor.auxArrays = auxArrays
+    executor
   }
 
   /**
diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/LibInfo.scala b/scala-package/core/src/main/scala/org/apache/mxnet/LibInfo.scala
index 40fc0951e885..aba618540141 100644
--- a/scala-package/core/src/main/scala/org/apache/mxnet/LibInfo.scala
+++ b/scala-package/core/src/main/scala/org/apache/mxnet/LibInfo.scala
@@ -188,6 +188,23 @@ private[mxnet] class LibInfo {
                                  grads: Array[NDArrayHandle]): Int
   @native def mxExecutorPrint(handle: ExecutorHandle, debugStr: RefString): Int
   @native def mxExecutorSetMonitorCallback(handle: ExecutorHandle, callback: MXMonitorCallback): Int
+  // scalastyle:off parameterNum
+  @native def mxExecutorReshape(partialShaping: Int,
+                                allowUpSizing: Int,
+                                devType: Int,
+                                devId: Int,
+                                mapKeys: Array[String],
+                                mapDevTypes: Array[Int],
+                                mapDevIds: Array[Int],
+                                providedArgShapeNames: Array[String],
+                                providedArgShapeData: Array[Int],
+                                providedArgShapeIdx: Array[Int],
+                                inArgs: ArrayBuffer[NDArrayHandle],
+                                argGrads: ArrayBuffer[NDArrayHandle],
+                                auxStates: ArrayBuffer[NDArrayHandle],
+                                sharedExec: ExecutorHandle,
+                                out: ExecutorHandleRef): Int
+  // scalastyle:on parameterNum
 
   // Symbols
   @native def mxSymbolListAtomicSymbolCreators(symbolList: ListBuffer[SymbolHandle]): Int
@@ -240,11 +257,20 @@ private[mxnet] class LibInfo {
                                  numArgs: MXUint,
                                  keys: Array[String],
                                  argIndPtr: Array[MXUint],
-                                 argShapeData: Array[MXUint],
+                                 argShapeData: Array[Int],
                                  inShapeData: ListBuffer[Array[Int]],
                                  outShapeData: ListBuffer[Array[Int]],
                                  auxShapeData: ListBuffer[Array[Int]],
                                  complete: RefInt): Int
+  @native def mxSymbolInferShapePartial(handle: SymbolHandle,
+                                        numArgs: MXUint,
+                                        keys: Array[String],
+                                        argIndPtr: Array[MXUint],
+                                        argShapeData: Array[Int],
+                                        inShapeData: ListBuffer[Array[Int]],
+                                        outShapeData: ListBuffer[Array[Int]],
+                                        auxShapeData: ListBuffer[Array[Int]],
+                                        complete: RefInt): Int
   @native def mxSymbolGetOutput(handle: SymbolHandle, index: Int, out: SymbolHandleRef): Int
   @native def mxSymbolSaveToJSON(handle: SymbolHandle, out: RefString): Int
   @native def mxSymbolCreateFromJSON(json: String, handle: SymbolHandleRef): Int
@@ -322,4 +348,8 @@ private[mxnet] class LibInfo {
   @native def mxSetProfilerConfig(keys: Array[String], vals: Array[String]): Int
   @native def mxSetProfilerState(state: Int): Int
   @native def mxDumpProfile(finished: Int): Int
+
+  // Numpy
+  @native def mxIsNumpyCompatible(compatible: RefInt): Int
+  @native def mxSetIsNumpyCompatible(isNpComp: Int, prev: RefInt): Int
 }
diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/NDArray.scala b/scala-package/core/src/main/scala/org/apache/mxnet/NDArray.scala
index ab42265ae102..849f4566f528 100644
--- a/scala-package/core/src/main/scala/org/apache/mxnet/NDArray.scala
+++ b/scala-package/core/src/main/scala/org/apache/mxnet/NDArray.scala
@@ -1274,11 +1274,15 @@ class NDArray private[mxnet](private[mxnet] val handle: NDArrayHandle,
    * @return an array representing shape of current ndarray
    */
   def shape: Shape = {
-    val ndim = new MXUintRef
+    val ndim = new RefInt
     val data = ArrayBuffer[Int]()
     checkCall(_LIB.mxNDArrayGetShape(handle, ndim, data))
-    require(ndim.value == data.length, s"ndim=$ndim, while len(data)=${data.length}")
-    Shape(data)
+    if (ndim.value == -1) {
+      null
+    } else {
+      require(ndim.value == data.length, s"ndim=$ndim, while len(data)=${data.length}")
+      Shape(data)
+    }
   }
 
   // Get size of current NDArray.
diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala b/scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala
new file mode 100644
index 000000000000..ec366ea4029d
--- /dev/null
+++ b/scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mxnet
+
+import org.apache.mxnet.Base._
+
+object NumpyScope {
+  def setNumpyCompatible(isNpComp: Boolean): Boolean = {
+    val prev = new RefInt()
+    checkCall(_LIB.mxSetIsNumpyCompatible(if (isNpComp) 1 else 0, prev))
+    if (prev.value != 0) true else false
+  }
+
+  def isNumpyCompatible: Boolean = {
+    val curr = new RefInt
+    checkCall(_LIB.mxIsNumpyCompatible(curr))
+    if (curr.value != 0) true else false
+  }
+
+  def enableNumpyCompatible: NumpyScope = {
+    new NumpyScope(true)
+  }
+
+
+  def disableNumpyCompatible: NumpyScope = {
+    new NumpyScope(false)
+  }
+}
+
+class NumpyScope(var isCompatible: Boolean) {
+  private var prev: Boolean = false
+
+  def withScope[T](body: => T): T = {
+    prev = NumpyScope.setNumpyCompatible(isCompatible)
+    try {
+      body
+    } finally {
+      if (prev != isCompatible) {
+        NumpyScope.setNumpyCompatible(prev)
+      }
+    }
+  }
+}
diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/Symbol.scala b/scala-package/core/src/main/scala/org/apache/mxnet/Symbol.scala
index 821e04f08df2..808a23a8c945 100644
--- a/scala-package/core/src/main/scala/org/apache/mxnet/Symbol.scala
+++ b/scala-package/core/src/main/scala/org/apache/mxnet/Symbol.scala
@@ -260,17 +260,45 @@ class Symbol private(private[mxnet] val handle: SymbolHandle) extends NativeReso
 
   def inferShape(keys: Array[String], indPtr: Array[Int], values: Array[Int])
     : (IndexedSeq[Shape], IndexedSeq[Shape], IndexedSeq[Shape]) = {
+    val res = inferShapeImpl(partial = false, keys, indPtr, values)
+    if (res._2 == null) {
+      val (argShapes, _, _) = inferShapeImpl(partial = true, keys, indPtr, values)
+      val argNames = listArguments()
+      val unknown = (argNames zip argShapes).map { case (name, shape) =>
+        val shapeIsNone = if (NumpyScope.isNumpyCompatible) {
+          shape == null || shape.toVector.contains(-1)
+        } else {
+          shape == null || shape.toVector.contains(0)
+        }
+        if (shapeIsNone) s"$name: $shape" else ""
+      }
+      logger.warn("Cannot decide shape for the following arguments. " +
+        "Consider providing them as input: \n\t{}",
+        unknown.filter(_ != "").mkString("\n\t"))
+    }
+    res
+  }
+
+  private def inferShapeImpl(partial: Boolean,
+                             keys: Array[String],
+                             indPtr: Array[Int],
+                             values: Array[Int])
+    : (IndexedSeq[Shape], IndexedSeq[Shape], IndexedSeq[Shape]) = {
     val argShapeData = ListBuffer.empty[Array[Int]]
     val outShapeData = ListBuffer.empty[Array[Int]]
     val auxShapeData = ListBuffer.empty[Array[Int]]
     val complete = new RefInt
-
-    checkCall(_LIB.mxSymbolInferShape(handle, indPtr.length - 1, keys, indPtr, values,
-      argShapeData, outShapeData, auxShapeData, complete))
+    if (partial) {
+      checkCall(_LIB.mxSymbolInferShapePartial(handle, indPtr.length - 1, keys, indPtr, values,
+        argShapeData, outShapeData, auxShapeData, complete))
+    } else {
+      checkCall(_LIB.mxSymbolInferShape(handle, indPtr.length - 1, keys, indPtr, values,
+        argShapeData, outShapeData, auxShapeData, complete))
+    }
     if (complete.value != 0) {
       (argShapeData.map(s => Shape(s)).toIndexedSeq,
-       outShapeData.map(s => Shape(s)).toIndexedSeq,
-       auxShapeData.map(s => Shape(s)).toIndexedSeq)
+        outShapeData.map(s => Shape(s)).toIndexedSeq,
+        auxShapeData.map(s => Shape(s)).toIndexedSeq)
     } else {
       (null, null, null)
     }
diff --git a/scala-package/core/src/test/scala/org/apache/mxnet/NumpyScopeSuite.scala b/scala-package/core/src/test/scala/org/apache/mxnet/NumpyScopeSuite.scala
new file mode 100644
index 000000000000..bf6627ac7e91
--- /dev/null
+++ b/scala-package/core/src/test/scala/org/apache/mxnet/NumpyScopeSuite.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mxnet
+
+import org.scalatest.{BeforeAndAfterAll, FunSuite}
+
+class NumpyScopeSuite extends FunSuite with BeforeAndAfterAll {
+  test("compatible") {
+    NumpyScope.enableNumpyCompatible.withScope {
+      assert(NumpyScope.isNumpyCompatible === true)
+    }
+  }
+
+  test("incompatible") {
+    NumpyScope.disableNumpyCompatible.withScope {
+      assert(NumpyScope.isNumpyCompatible === false)
+    }
+  }
+}
diff --git a/scala-package/macros/src/main/scala/org/apache/mxnet/utils/CToScalaUtils.scala b/scala-package/macros/src/main/scala/org/apache/mxnet/utils/CToScalaUtils.scala
index 57c4cfba10b7..12d797f9b100 100644
--- a/scala-package/macros/src/main/scala/org/apache/mxnet/utils/CToScalaUtils.scala
+++ b/scala-package/macros/src/main/scala/org/apache/mxnet/utils/CToScalaUtils.scala
@@ -47,7 +47,8 @@ private[mxnet] object CToScalaUtils {
       case "double" | "doubleorNone" => types("double")
       case "string" => "String"
       case "boolean" | "booleanorNone" => types("bool")
-      case "tupleof<float>" | "tupleof<double>" | "tupleof<>" | "ptr" | "" => "Any"
+      case "tupleof<int>" | "tupleof<float>" | "tupleof<double>" | "tupleof<intorNone>" |
+           "tupleof<>" | "ptr" | "" => "Any"
       case default => throw new IllegalArgumentException(
         s"Invalid type for args: $default\nString argType: $argType\nargName: $argName")
     }
diff --git a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc
index 33e4cca99b3a..678dfc12afac 100644
--- a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc
+++ b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc
@@ -354,8 +354,8 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayLoadFromRawBytes
 
 JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayGetShape
   (JNIEnv *env, jobject obj, jlong ndArrayPtr, jobject ndimRef, jobject dataBuf) {
-  mx_uint ndim;
-  const mx_uint *pdata;
+  int ndim;
+  const int *pdata;
   int ret = MXNDArrayGetShape(reinterpret_cast<NDArrayHandle>(ndArrayPtr), &ndim, &pdata);
 
   // fill dataBuf
@@ -365,7 +365,7 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayGetShape
   jclass arrayClass = env->FindClass("scala/collection/mutable/ArrayBuffer");
   jmethodID arrayAppend = env->GetMethodID(arrayClass,
     "$plus$eq", "(Ljava/lang/Object;)Lscala/collection/mutable/ArrayBuffer;");
-  for (size_t i = 0; i < ndim; ++i) {
+  for (int i = 0; i < ndim; ++i) {
     jobject data = env->NewObject(integerClass, newInteger, pdata[i]);
     env->CallObjectMethod(dataBuf, arrayAppend, data);
     env->DeleteLocalRef(data);
@@ -892,6 +892,118 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorBackward
   return ret;
 }
 
+JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorReshape
+  (JNIEnv * env, jobject obj,
+    jint partialReshaping, jint allowUpSizing, jint devType, jint devId,
+    jobjectArray jmapKeys, jintArray jmapDevTypes, jintArray jmapDevIds,
+    jobjectArray jprovidedArgShapeNames, jintArray jprovidedArgShapeData, jintArray jprovidedArgShapeIdx,
+    jobject jrefInArgs, jobject jrefArgGrads, jobject jrefAuxStates,
+    jlong jsharedExec, jobject jrefOut) {
+  CHECK(jmapKeys != NULL);
+  CHECK(jprovidedArgShapeNames != NULL);
+
+  int numMapKeys = env->GetArrayLength(jmapKeys);
+  jint *mapDevTypes = env->GetIntArrayElements(jmapDevTypes, NULL);
+  jint *mapDevIds = env->GetIntArrayElements(jmapDevIds, NULL);
+  const char **mapKeys = NULL;
+  if (numMapKeys > 0) {
+    mapKeys = new const char*[numMapKeys];
+    for (int i = 0; i < numMapKeys; ++i) {
+      jstring jkey = reinterpret_cast<jstring>(env->GetObjectArrayElement(jmapKeys, i));
+      mapKeys[i] = env->GetStringUTFChars(jkey, 0);
+      env->DeleteLocalRef(jkey);
+    }
+  }
+
+  int numProvidedArgShapes = env->GetArrayLength(jprovidedArgShapeNames);
+  jint *providedArgShapeData = env->GetIntArrayElements(jprovidedArgShapeData, NULL);
+  jint *providedArgShapeIdx = env->GetIntArrayElements(jprovidedArgShapeIdx, NULL);
+  const char **providedArgShapeNames = NULL;
+  if (numProvidedArgShapes > 0) {
+    providedArgShapeNames = new const char*[numProvidedArgShapes];
+    for (int i = 0; i < numProvidedArgShapes; ++i) {
+      jstring jkey = reinterpret_cast<jstring>(env->GetObjectArrayElement(jprovidedArgShapeNames, i));
+      providedArgShapeNames[i] = env->GetStringUTFChars(jkey, 0);
+      env->DeleteLocalRef(jkey);
+    }
+  }
+
+  mx_uint numInArgs = 0;
+  NDArrayHandle *inArgs;
+  NDArrayHandle *argGrads;
+
+  mx_uint numAuxStates = 0;
+  NDArrayHandle *auxStates;
+
+  ExecutorHandle out;
+
+  int ret = MXExecutorReshape(partialReshaping,
+                              allowUpSizing,
+                              devType,
+                              devId,
+                              static_cast<mx_uint>(numMapKeys),
+                              mapKeys,
+                              static_cast<const int*>(mapDevTypes),
+                              static_cast<const int*>(mapDevIds),
+                              static_cast<const mx_uint>(numProvidedArgShapes),
+                              providedArgShapeNames,
+                              static_cast<const int*>(providedArgShapeData),
+                              reinterpret_cast<const mx_uint*>(providedArgShapeIdx),
+                              &numInArgs,
+                              &inArgs,
+                              &argGrads,
+                              &numAuxStates,
+                              &auxStates,
+                              reinterpret_cast<ExecutorHandle>(jsharedExec),
+                              &out);
+
+  jclass longCls = env->FindClass("java/lang/Long");
+  jmethodID newLong = env->GetMethodID(longCls, "<init>", "(J)V");
+
+  jclass arrayClass = env->FindClass("scala/collection/mutable/ArrayBuffer");
+  jmethodID arrayAppend = env->GetMethodID(arrayClass,
+    "$plus$eq", "(Ljava/lang/Object;)Lscala/collection/mutable/ArrayBuffer;");
+
+  for (size_t i = 0; i < numInArgs; ++i) {
+    jobject inArg = env->NewObject(longCls, newLong, inArgs[i]);
+    env->CallObjectMethod(jrefInArgs, arrayAppend, inArg);
+    env->DeleteLocalRef(inArg);
+
+    jobject argGrad = env->NewObject(longCls, newLong, argGrads[i]);
+    env->CallObjectMethod(jrefArgGrads, arrayAppend, argGrad);
+    env->DeleteLocalRef(argGrad);
+  }
+
+  for (size_t i = 0; i < numAuxStates; ++i) {
+    jobject auxState = env->NewObject(longCls, newLong, auxStates[i]);
+    env->CallObjectMethod(jrefAuxStates, arrayAppend, auxState);
+    env->DeleteLocalRef(auxState);
+  }
+
+  SetLongField(env, jrefOut, reinterpret_cast<jlong>(out));
+
+  // release allocated memory
+  for (int i = 0; i < numMapKeys; i++) {
+    jstring jkey = reinterpret_cast<jstring>(env->GetObjectArrayElement(jmapKeys, i));
+    env->ReleaseStringUTFChars(jkey, mapKeys[i]);
+    env->DeleteLocalRef(jkey);
+  }
+  if (mapKeys != NULL) {
+    delete[] mapKeys;
+  }
+
+  for (int i = 0; i < numProvidedArgShapes; i++) {
+    jstring jkey = reinterpret_cast<jstring>(env->GetObjectArrayElement(jprovidedArgShapeNames, i));
+    env->ReleaseStringUTFChars(jkey, providedArgShapeNames[i]);
+    env->DeleteLocalRef(jkey);
+  }
+  if (providedArgShapeNames != NULL) {
+    delete[] providedArgShapeNames;
+  }
+
+  return ret;
+}
+
 JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorPrint
   (JNIEnv * env, jobject obj, jlong ptr, jobject debugStr) {
   const char *retDebugStr;
@@ -1530,23 +1642,26 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolCreateFromFile
 
 int FillSymbolInferShape
   (JNIEnv *env, jmethodID listAppend, jobject joutData,
-    mx_uint shapeSize, const mx_uint *shapeNdim, const mx_uint **shapeData) {
-  for (size_t i = 0; i < shapeSize; ++i) {
-    jintArray jshape = env->NewIntArray(shapeNdim[i]);
-    if (jshape == NULL) {
-      // TODO(Yizhi): out of memory error thrown, return a specific error code ?
-      return -1;
+    int shapeSize, const int *shapeNdim, const int **shapeData) {
+  for (int i = 0; i < shapeSize; ++i) {
+    jintArray jshape = NULL;
+    if (shapeNdim[i] >= 0) {
+      jshape = env->NewIntArray(shapeNdim[i]);
+      if (jshape == NULL) {
+        // TODO(Yizhi): out of memory error thrown, return a specific error code ?
+        return -1;
+      }
+      env->SetIntArrayRegion(jshape, 0, shapeNdim[i], reinterpret_cast<const jint *>(shapeData[i]));
     }
-    env->SetIntArrayRegion(jshape, 0, shapeNdim[i], reinterpret_cast<const jint *>(shapeData[i]));
     env->CallObjectMethod(joutData, listAppend, jshape);
     env->DeleteLocalRef(jshape);
   }
   return 0;
 }
-JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShape
-  (JNIEnv *env, jobject obj, jlong symbolPtr, jint jnumArgs, jobjectArray jkeys,
-    jintArray jargIndPtr, jintArray jargShapeData,
-    jobject jinShapeData, jobject joutShapeData, jobject jauxShapeData, jobject jcomplete) {
+
+int SymbolInferShapeHelper(JNIEnv *env, jobject obj, jlong symbolPtr, jint jnumArgs, jobjectArray jkeys,
+                            jintArray jargIndPtr, jintArray jargShapeData, jobject jinShapeData,
+                            jobject joutShapeData, jobject jauxShapeData, jobject jcomplete, bool partial) {
   const char **keys = NULL;
   if (jkeys != NULL) {
     keys = new const char *[jnumArgs];
@@ -1559,36 +1674,55 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShape
   }
 
   mx_uint inShapeSize;
-  const mx_uint *inShapeNdim;
-  const mx_uint **inShapeData;
+  const int *inShapeNdim;
+  const int **inShapeData;
 
   mx_uint outShapeSize;
-  const mx_uint *outShapeNdim;
-  const mx_uint **outShapeData;
+  const int *outShapeNdim;
+  const int **outShapeData;
 
   mx_uint auxShapeSize;
-  const mx_uint *auxShapeNdim;
-  const mx_uint **auxShapeData;
+  const int *auxShapeNdim;
+  const int **auxShapeData;
 
   int complete;
 
   jint *argIndPtr = env->GetIntArrayElements(jargIndPtr, NULL);
   jint *argShapeData = env->GetIntArrayElements(jargShapeData, NULL);
-  int ret = MXSymbolInferShape(reinterpret_cast<SymbolHandle>(symbolPtr),
-                               static_cast<mx_uint>(jnumArgs),
-                               keys,
-                               reinterpret_cast<const mx_uint *>(argIndPtr),
-                               reinterpret_cast<const mx_uint *>(argShapeData),
-                               &inShapeSize,
-                               &inShapeNdim,
-                               &inShapeData,
-                               &outShapeSize,
-                               &outShapeNdim,
-                               &outShapeData,
-                               &auxShapeSize,
-                               &auxShapeNdim,
-                               &auxShapeData,
-                               &complete);
+  int ret;
+  if (!partial) {
+    ret = MXSymbolInferShape(reinterpret_cast<SymbolHandle>(symbolPtr),
+                              static_cast<mx_uint>(jnumArgs),
+                              keys,
+                              reinterpret_cast<mx_uint *>(argIndPtr),
+                              reinterpret_cast<const int *>(argShapeData),
+                              &inShapeSize,
+                              &inShapeNdim,
+                              &inShapeData,
+                              &outShapeSize,
+                              &outShapeNdim,
+                              &outShapeData,
+                              &auxShapeSize,
+                              &auxShapeNdim,
+                              &auxShapeData,
+                              &complete);
+  } else {
+    ret = MXSymbolInferShapePartial(reinterpret_cast<SymbolHandle>(symbolPtr),
+                                    static_cast<mx_uint>(jnumArgs),
+                                    keys,
+                                    reinterpret_cast<mx_uint *>(argIndPtr),
+                                    reinterpret_cast<const int *>(argShapeData),
+                                    &inShapeSize,
+                                    &inShapeNdim,
+                                    &inShapeData,
+                                    &outShapeSize,
+                                    &outShapeNdim,
+                                    &outShapeData,
+                                    &auxShapeSize,
+                                    &auxShapeNdim,
+                                    &auxShapeData,
+                                    &complete);
+  }
   env->ReleaseIntArrayElements(jargShapeData, argShapeData, 0);
   env->ReleaseIntArrayElements(jargIndPtr, argIndPtr, 0);
 
@@ -1629,6 +1763,24 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShape
   return ret;
 }
 
+JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShape
+  (JNIEnv *env, jobject obj, jlong symbolPtr, jint jnumArgs, jobjectArray jkeys,
+    jintArray jargIndPtr, jintArray jargShapeData,
+    jobject jinShapeData, jobject joutShapeData, jobject jauxShapeData, jobject jcomplete) {
+
+  return SymbolInferShapeHelper(env, obj, symbolPtr, jnumArgs, jkeys, jargIndPtr, jargShapeData,
+                                jinShapeData, joutShapeData, jauxShapeData, jcomplete, false);
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShapePartial
+  (JNIEnv *env, jobject obj, jlong symbolPtr, jint jnumArgs, jobjectArray jkeys,
+    jintArray jargIndPtr, jintArray jargShapeData,
+    jobject jinShapeData, jobject joutShapeData, jobject jauxShapeData, jobject jcomplete) {
+
+  return SymbolInferShapeHelper(env, obj, symbolPtr, jnumArgs, jkeys, jargIndPtr, jargShapeData,
+                                jinShapeData, joutShapeData, jauxShapeData, jcomplete, true);
+}
+
 JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorBindX
   (JNIEnv *env, jobject obj, jlong symbolPtr, jint deviceTypeId, jint deviceID, jint numCtx,
     jobjectArray jctxMapKeys, jintArray jctxMapDevTypes, jintArray jctxMapDevIDs, jint numArgs,
@@ -2551,3 +2703,20 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxDumpProfile
   (JNIEnv *env, jobject obj, jint finished) {
   return MXDumpProfile(finished);
 }
+
+// Numpy
+JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxIsNumpyCompatible
+  (JNIEnv *env, jobject obj, jobject compatibleRef) {
+  bool isCompatible;
+  int ret = MXIsNumpyCompatible(&isCompatible);
+  SetIntField(env, compatibleRef, static_cast<int>(isCompatible));
+  return ret;
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSetIsNumpyCompatible
+  (JNIEnv *env, jobject obj, jint isNpComp, jobject prevRef) {
+  int prev;
+  int ret = MXSetIsNumpyCompatible(isNpComp, &prev);
+  SetIntField(env, prevRef, prev);
+  return ret;
+}
\ No newline at end of file
diff --git a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.h b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.h
index b8a9b3b9e64f..467272cea9cf 100644
--- a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.h
+++ b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.h
@@ -511,6 +511,14 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorPrint
 JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorSetMonitorCallback
   (JNIEnv *, jobject, jlong, jobject);
 
+/*
+ * Class:     org_apache_mxnet_LibInfo
+ * Method:    mxExecutorReshape
+ * Signature: (IIII[Ljava/lang/String;[I[I[Ljava/lang/String;[I[ILscala/collection/mutable/ArrayBuffer;Lscala/collection/mutable/ArrayBuffer;Lscala/collection/mutable/ArrayBuffer;JLorg/apache/mxnet/Base/RefLong;)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorReshape
+  (JNIEnv *, jobject, jint, jint, jint, jint, jobjectArray, jintArray, jintArray, jobjectArray, jintArray, jintArray, jobject, jobject, jobject, jlong, jobject);
+
 /*
  * Class:     org_apache_mxnet_LibInfo
  * Method:    mxSymbolListAtomicSymbolCreators
@@ -655,6 +663,14 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferType
 JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShape
   (JNIEnv *, jobject, jlong, jint, jobjectArray, jintArray, jintArray, jobject, jobject, jobject, jobject);
 
+/*
+ * Class:     org_apache_mxnet_LibInfo
+ * Method:    mxSymbolInferShapePartial
+ * Signature: (JI[Ljava/lang/String;[I[ILscala/collection/mutable/ListBuffer;Lscala/collection/mutable/ListBuffer;Lscala/collection/mutable/ListBuffer;Lorg/apache/mxnet/Base/RefInt;)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSymbolInferShapePartial
+  (JNIEnv *, jobject, jlong, jint, jobjectArray, jintArray, jintArray, jobject, jobject, jobject, jobject);
+
 /*
  * Class:     org_apache_mxnet_LibInfo
  * Method:    mxSymbolGetOutput
@@ -855,6 +871,22 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSetProfilerState
 JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxDumpProfile
   (JNIEnv *, jobject, jint);
 
+/*
+ * Class:     org_apache_mxnet_LibInfo
+ * Method:    mxIsNumpyCompatible
+ * Signature: (Lorg/apache/mxnet/Base/RefInt;)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxIsNumpyCompatible
+  (JNIEnv *, jobject, jobject);
+
+/*
+ * Class:     org_apache_mxnet_LibInfo
+ * Method:    mxSetIsNumpyCompatible
+ * Signature: (ILorg/apache/mxnet/Base/RefInt;)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSetIsNumpyCompatible
+  (JNIEnv *, jobject, jint, jobject);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index e91f904debb9..87e911a56800 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -240,7 +240,7 @@ inline bool FlattenShape(const nnvm::NodeAttrs& attrs,
 struct TransposeParam : public dmlc::Parameter<TransposeParam> {
   mxnet::TShape axes;
   DMLC_DECLARE_PARAMETER(TransposeParam) {
-    DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape())
+    DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape(0))
     .describe("Target axis order. By default the axes will be inverted.");
   }
 
@@ -318,7 +318,7 @@ void Transpose(const nnvm::NodeAttrs& attrs,
                const std::vector<TBlob>& outputs) {
   const TransposeParam& param = nnvm::get<TransposeParam>(attrs.parsed);
   CHECK_EQ(req[0], kWriteTo) << "Transpose does not support inplace";
-  if (!mxnet::ndim_is_known(param.axes)) {
+  if (param.axes.ndim() == 0) {
     mxnet::TShape axes(inputs[0].ndim(), -1);
     for (int i = 0; i < axes.ndim(); ++i) {
       axes[i] = axes.ndim() - 1 - i;
@@ -338,7 +338,7 @@ inline bool TransposeShape(const nnvm::NodeAttrs& attrs,
   mxnet::TShape& shp = (*in_attrs)[0];
   CHECK_LE(shp.ndim(), 6U) << "Transpose support at most 6 dimensions";
   mxnet::TShape ret(shp.ndim(), -1);
-  if (!mxnet::ndim_is_known(param.axes)) {
+  if (param.axes.ndim() == 0) {
     for (int i = 0; i < shp.ndim(); ++i) {
       ret[i] = shp[shp.ndim()-1-i];
     }

From c7f4ebd91b9662038281cd9977548c07a0f0af6f Mon Sep 17 00:00:00 2001
From: Haibin Lin <linhaibin.eric@gmail.com>
Date: Fri, 5 Apr 2019 10:30:20 -0700
Subject: [PATCH 15/32] fix shape index bug (#14630)

---
 src/c_api/c_api.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index f1571312dceb..533f18fa9e0b 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -802,7 +802,7 @@ int MXDataIterGetLabel(DataIterHandle handle, NDArrayHandle *out) {
   // temp hack to make label 1D
   // TODO(tianjun) make label 1D when label_width=0
   mxnet::TShape shape = db.data[1].shape();
-  if (shape[1] == 1) {
+  if (shape.ndim() > 1 && shape[1] == 1) {
     *pndarray = db.data[1].Reshape(mshadow::Shape1(shape[0]));
   } else {
     *pndarray = db.data[1];

From c526ac4718cbd1478390f2f31604226f0837cbee Mon Sep 17 00:00:00 2001
From: Yizhi Liu <liuyizhi@apache.org>
Date: Fri, 5 Apr 2019 13:12:11 -0700
Subject: [PATCH 16/32] fix jni lint (#14634)

---
 .../native/org_apache_mxnet_native_c_api.cc    | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc
index 678dfc12afac..ae01c8a7c05e 100644
--- a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc
+++ b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc
@@ -896,9 +896,9 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorReshape
   (JNIEnv * env, jobject obj,
     jint partialReshaping, jint allowUpSizing, jint devType, jint devId,
     jobjectArray jmapKeys, jintArray jmapDevTypes, jintArray jmapDevIds,
-    jobjectArray jprovidedArgShapeNames, jintArray jprovidedArgShapeData, jintArray jprovidedArgShapeIdx,
-    jobject jrefInArgs, jobject jrefArgGrads, jobject jrefAuxStates,
-    jlong jsharedExec, jobject jrefOut) {
+    jobjectArray jprovidedArgShapeNames, jintArray jprovidedArgShapeData,
+    jintArray jprovidedArgShapeIdx, jobject jrefInArgs, jobject jrefArgGrads,
+    jobject jrefAuxStates, jlong jsharedExec, jobject jrefOut) {
   CHECK(jmapKeys != NULL);
   CHECK(jprovidedArgShapeNames != NULL);
 
@@ -922,7 +922,8 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorReshape
   if (numProvidedArgShapes > 0) {
     providedArgShapeNames = new const char*[numProvidedArgShapes];
     for (int i = 0; i < numProvidedArgShapes; ++i) {
-      jstring jkey = reinterpret_cast<jstring>(env->GetObjectArrayElement(jprovidedArgShapeNames, i));
+      jstring jkey = reinterpret_cast<jstring>(
+          env->GetObjectArrayElement(jprovidedArgShapeNames, i));
       providedArgShapeNames[i] = env->GetStringUTFChars(jkey, 0);
       env->DeleteLocalRef(jkey);
     }
@@ -1659,9 +1660,10 @@ int FillSymbolInferShape
   return 0;
 }
 
-int SymbolInferShapeHelper(JNIEnv *env, jobject obj, jlong symbolPtr, jint jnumArgs, jobjectArray jkeys,
-                            jintArray jargIndPtr, jintArray jargShapeData, jobject jinShapeData,
-                            jobject joutShapeData, jobject jauxShapeData, jobject jcomplete, bool partial) {
+int SymbolInferShapeHelper(JNIEnv *env, jobject obj, jlong symbolPtr, jint jnumArgs,
+                            jobjectArray jkeys, jintArray jargIndPtr, jintArray jargShapeData,
+                            jobject jinShapeData, jobject joutShapeData, jobject jauxShapeData,
+                            jobject jcomplete, bool partial) {
   const char **keys = NULL;
   if (jkeys != NULL) {
     keys = new const char *[jnumArgs];
@@ -2719,4 +2721,4 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxSetIsNumpyCompatible
   int ret = MXSetIsNumpyCompatible(isNpComp, &prev);
   SetIntField(env, prevRef, prev);
   return ret;
-}
\ No newline at end of file
+}

From d12a1fab4bbdf58e6627e41debee8e7ba7f62d3c Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Mon, 8 Apr 2019 11:29:00 -0700
Subject: [PATCH 17/32] [numpy] Fix numpy branch failing tests in CI (#14639)

* Remove numpy namespaces for operator registration

* Fix bug when shape is compeltely unknown

* Fix singed/unsigned compare warning

* Fix CI

* Fix pylint

* Avoid launching gpu kernels for zero-size output tensors

* Fix test_ndarray

* Fix binary broadcast with zero-size tensors

* Better error message for infer shape failure in imperative

* Fix TShape constructor ambiguity on certain platforms

* Fix mkldnn build failure

* Fix build failure in gpu and cpp test

* Fix gpu cpp test build with mkldnn

* Fix mkldnn cpp test

* Fix concatenating zero-size tensors

* Avoid letting mkldnn handle zero-size tensors in concat

* Fix quantized_concat infer shape

* Try to fix perl c api
---
 include/mxnet/tuple.h                         |  13 +-
 perl-package/AI-MXNetCAPI/mxnet.i             |  84 ++++----
 python/mxnet/__init__.py                      |   3 +-
 python/mxnet/base.py                          | 135 +++++++++++--
 python/mxnet/ndarray/__init__.py              |   2 +-
 python/mxnet/ndarray/numpy.py                 |  18 --
 python/mxnet/numpy/__init__.py                |  66 -------
 python/mxnet/symbol/__init__.py               |   2 +-
 python/mxnet/symbol/numpy.py                  |  18 --
 python/mxnet/symbol/symbol.py                 |   3 +-
 src/c_api/c_api_common.h                      |   4 +-
 src/common/utils.h                            |   2 +-
 src/imperative/imperative_utils.h             |  12 +-
 src/ndarray/ndarray.cc                        |   4 +-
 src/operator/channel_op_common.h              |   4 +
 src/operator/contrib/bounding_box-inl.h       |   3 +-
 .../contrib/deformable_convolution-inl.h      |   6 +-
 src/operator/contrib/index_copy-inl.h         |   2 +-
 src/operator/mxnet_op.h                       |   2 +
 src/operator/nn/concat.cc                     |   5 +-
 src/operator/nn/cudnn/cudnn_convolution-inl.h |   2 +-
 .../nn/cudnn/cudnn_deconvolution-inl.h        |   2 +-
 src/operator/nn/deconvolution-inl.h           |   2 +-
 src/operator/nn/mkldnn/mkldnn_slice.cc        |   4 +-
 src/operator/numpy/np_broadcast_reduce_op.h   | 186 ------------------
 .../numpy/np_broadcast_reduce_op_value.cc     |  61 ------
 .../numpy/np_broadcast_reduce_op_value.cu     |  36 ----
 src/operator/pad-inl.h                        |   2 +-
 src/operator/pooling_v1-inl.h                 |   6 +-
 src/operator/quantization/dequantize-inl.h    |   2 +-
 .../mkldnn/mkldnn_requantize-inl.h            |   2 +-
 src/operator/quantization/quantize-inl.h      |   2 +-
 src/operator/quantization/quantize_v2-inl.h   |   2 +-
 src/operator/quantization/quantized_concat.cc |  16 +-
 src/operator/quantization/quantized_conv.cc   |   4 +-
 .../quantization/quantized_fully_connected.cc |   4 +-
 src/operator/quantization/requantize-inl.h    |   2 +-
 src/operator/swapaxis-inl.h                   |   2 +-
 src/operator/tensor/broadcast_reduce_op.h     |   6 +-
 .../tensor/elemwise_binary_broadcast_op.h     |  15 +-
 src/operator/tensor/histogram-inl.h           |   8 +-
 src/operator/tensor/init_op.h                 |   2 +
 src/operator/tensor/matrix_op-inl.h           |  12 +-
 tests/cpp/include/test_mkldnn.h               |  18 +-
 tests/cpp/include/test_util.h                 |   4 +-
 tests/cpp/misc/serialization.cc               |   2 +-
 tests/cpp/operator/batchnorm_test.cc          |   4 +-
 tests/cpp/operator/mkldnn_operator_test.cc    |   6 +-
 tests/python/gpu/test_operator_gpu.py         |  22 ++-
 tests/python/unittest/test_infer_shape.py     |  16 ++
 tests/python/unittest/test_ndarray.py         |   2 +-
 tests/python/unittest/test_operator.py        |  54 ++++-
 52 files changed, 349 insertions(+), 547 deletions(-)
 delete mode 100644 python/mxnet/ndarray/numpy.py
 delete mode 100644 python/mxnet/numpy/__init__.py
 delete mode 100644 python/mxnet/symbol/numpy.py
 delete mode 100644 src/operator/numpy/np_broadcast_reduce_op.h
 delete mode 100644 src/operator/numpy/np_broadcast_reduce_op_value.cc
 delete mode 100644 src/operator/numpy/np_broadcast_reduce_op_value.cu

diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h
index c5a358628ccd..c5c0ccd548df 100644
--- a/include/mxnet/tuple.h
+++ b/include/mxnet/tuple.h
@@ -390,7 +390,7 @@ class TShape : public Tuple<dim_t> {
    * \param ndim the number of dimension
    * \param value the dimension size for all dims
    */
-  inline TShape(int ndim, int value = -1) {  // NOLINT(*)
+  inline TShape(const int ndim, const dim_t value) {  // NOLINT(*)
     this->SetDim(ndim);
     if (ndim > 0) {
       std::fill_n(begin(), ndim, value);
@@ -422,12 +422,17 @@ class TShape : public Tuple<dim_t> {
     this->swap(s);
   }
   /*!
-   * \brief construct the Tuple from content of iterator
+   * \brief construct the Tuple from content of iterator.
+   * This function is enforced with template arguments of random access iterator types.
+   * This is necessary to distinguish from another constructor: TShape(const int, const dim_t).
    * \param begin the beginning of iterator
    * \param end end the end of the iterator
    * \tparam RandomAccessIterator iterator type
    */
-  template<typename RandomAccessIterator>
+  template<typename RandomAccessIterator,
+           typename std::enable_if<
+               std::is_same<typename std::iterator_traits<RandomAccessIterator>::iterator_category,
+                            std::random_access_iterator_tag>::value, int>::type = 0>
   inline TShape(RandomAccessIterator begin,
                 RandomAccessIterator end) {
     this->assign(begin, end);
@@ -622,7 +627,7 @@ inline bool ndim_is_known(const TShape& x) {
 }
 
 /*! brief check if a shape's dim size is known. */
-inline bool dim_size_is_known(const int dim_size) {
+inline bool dim_size_is_known(const dim_t dim_size) {
   CHECK_GE(dim_size, -1) << "shape dim size must be >= -1, while received " << dim_size;
   return dim_size != -1;
 }
diff --git a/perl-package/AI-MXNetCAPI/mxnet.i b/perl-package/AI-MXNetCAPI/mxnet.i
index 0e6a05ea9695..0ecf5b3a9cc3 100644
--- a/perl-package/AI-MXNetCAPI/mxnet.i
+++ b/perl-package/AI-MXNetCAPI/mxnet.i
@@ -641,8 +641,8 @@ int MXNDArrayReshape64(NDArrayHandle handle,
  * \return 0 when success, -1 when failure happens
  */
 int MXNDArrayGetShape(NDArrayHandle handle,
-                                mx_uint *out_dim,
-                                const mx_uint **out_pdata);
+                                int *out_dim,
+                                const int **out_pdata);
 /*!
  * \brief get the content of the data in NDArray
  * \param handle the handle to the ndarray
@@ -1290,20 +1290,20 @@ int MXSymbolGrad(SymbolHandle sym,
  * \return 0 when success, -1 when failure happens
  */
 int MXSymbolInferShape(SymbolHandle sym,
-                                 mx_uint num_args,
-                                 const char** in,
-                                 const mx_uint *in,
-                                 const mx_uint *in,
-                                 mx_uint *in_shape_size,
-                                 const mx_uint **in_shape_ndim,
-                                 const mx_uint ***in_shape_data,
-                                 mx_uint *out_shape_size,
-                                 const mx_uint **out_shape_ndim,
-                                 const mx_uint ***out_shape_data,
-                                 mx_uint *aux_shape_size,
-                                 const mx_uint **aux_shape_ndim,
-                                 const mx_uint ***aux_shape_data,
-                                 int *out);
+                       mx_uint num_args,
+                       const char** in,
+                       const mx_uint *in,
+                       const int *in,
+                       mx_uint *in_shape_size,
+                       const int **in_shape_ndim,
+                       const int ***in_shape_data,
+                       mx_uint *out_shape_size,
+                       const int **out_shape_ndim,
+                       const int ***out_shape_data,
+                       mx_uint *aux_shape_size,
+                       const int **aux_shape_ndim,
+                       const int ***aux_shape_data,
+                       int *out);
 /*!
  * \brief partially infer shape of unknown input shapes given the known one.
  *
@@ -1332,16 +1332,16 @@ int MXSymbolInferShapePartial(SymbolHandle sym,
                                  mx_uint num_args,
                                  const char** in,
                                  const mx_uint *in,
-                                 const mx_uint *in,
+                                 const int *in,
                                  mx_uint *in_shape_size,
-                                 const mx_uint **in_shape_ndim,
-                                 const mx_uint ***in_shape_data,
+                                 const int **in_shape_ndim,
+                                 const int ***in_shape_data,
                                  mx_uint *out_shape_size,
-                                 const mx_uint **out_shape_ndim,
-                                 const mx_uint ***out_shape_data,
+                                 const int **out_shape_ndim,
+                                 const int ***out_shape_data,
                                  mx_uint *aux_shape_size,
-                                 const mx_uint **aux_shape_ndim,
-                                 const mx_uint ***aux_shape_data,
+                                 const int **aux_shape_ndim,
+                                 const int ***aux_shape_data,
                                  int *out);
 
 /*!
@@ -1547,7 +1547,7 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle,
                          const char** in, // provided_grad_req_types,
                          const mx_uint num_provided_arg_shapes,
                          const char** in, // provided_arg_shape_names,
-                         const mx_uint* in, // provided_arg_shape_data,
+                         const int* in, // provided_arg_shape_data,
                          const mx_uint* in, // provided_arg_shape_idx,
                          const mx_uint num_provided_arg_dtypes,
                          const char** in, // provided_arg_dtype_names,
@@ -1593,24 +1593,24 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle,
  * \return a new executor
  */
 int MXExecutorReshape(int partial_shaping,
-                                int allow_up_sizing,
-                                int dev_type,
-                                int dev_id,
-                                mx_uint num_map_keys,
-                                const char** in,
-                                const int* in,
-                                const int* in,
-                                const mx_uint num_provided_arg_shapes,
-                                const char** in,
-                                const mx_uint* in,
-                                const mx_uint* in,
-                                mx_uint* couple_out_size,
-                                NDArrayHandle** out_first_array,
-                                NDArrayHandle** out_second_array,
-                                mx_uint* out_size,
-                                NDArrayHandle** out_array,
-                                ExecutorHandle shared_exec,
-                                ExecutorHandle *out);
+                      int allow_up_sizing,
+                      int dev_type,
+                      int dev_id,
+                      mx_uint num_map_keys,
+                      const char** in,
+                      const int* in,
+                      const int* in,
+                      const mx_uint num_provided_arg_shapes,
+                      const char** in,
+                      const int* in,
+                      const mx_uint* in,
+                      mx_uint* couple_out_size,
+                      NDArrayHandle** out_first_array,
+                      NDArrayHandle** out_second_array,
+                      mx_uint* out_size,
+                      NDArrayHandle** out_array,
+                      ExecutorHandle shared_exec,
+                      ExecutorHandle *out);
 
 /*!
  * \brief set a call back to notify the completion of operation
diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py
index 8db83a286157..5f4f9b393e41 100644
--- a/python/mxnet/__init__.py
+++ b/python/mxnet/__init__.py
@@ -23,9 +23,8 @@
 
 from .context import Context, current_context, cpu, gpu, cpu_pinned
 from . import engine
-from .base import MXNetError
+from .base import MXNetError, is_np_comp, set_np_comp, enable_np_comp, disable_np_comp
 from . import base
-from . import numpy
 from . import contrib
 from . import ndarray
 from . import ndarray as nd
diff --git a/python/mxnet/base.py b/python/mxnet/base.py
index fe1dd00f9454..916e74182f94 100644
--- a/python/mxnet/base.py
+++ b/python/mxnet/base.py
@@ -561,7 +561,7 @@ def _as_list(obj):
         return [obj]
 
 
-_OP_NAME_PREFIX_LIST = ['_contrib_', '_linalg_', '_sparse_', '_image_', '_random_', '_numpy_']
+_OP_NAME_PREFIX_LIST = ['_contrib_', '_linalg_', '_sparse_', '_image_', '_random_']
 
 
 def _get_op_name_prefix(op_name):
@@ -607,13 +607,6 @@ def _init_op_module(root_namespace, module_name, make_op_func):
     # use mx.nd.contrib or mx.sym.contrib from now on
     contrib_module_name_old = "%s.contrib.%s" % (root_namespace, module_name)
     contrib_module_old = sys.modules[contrib_module_name_old]
-    # special handling of registering numpy ops
-    if module_name == 'ndarray':
-        numpy_module_name = "%s.numpy" % root_namespace
-        numpy_module = sys.modules[numpy_module_name]
-    else:
-        numpy_module_name = None
-        numpy_module = None
     submodule_dict = {}
     for op_name_prefix in _OP_NAME_PREFIX_LIST:
         submodule_dict[op_name_prefix] =\
@@ -652,16 +645,6 @@ def _init_op_module(root_namespace, module_name, make_op_func):
             function.__module__ = contrib_module_name_old
             setattr(contrib_module_old, function.__name__, function)
             contrib_module_old.__all__.append(function.__name__)
-        elif op_name_prefix == '_numpy_' and numpy_module_name is not None:
-            # only register numpy ops under mxnet.numpy in imperative mode
-            hdl = OpHandle()
-            check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl)))
-            # TODO(reminisce): Didn't consider third level module here, e.g. mxnet.numpy.random.
-            func_name = name[len(op_name_prefix):]
-            function = make_op_func(hdl, name, func_name)
-            function.__module__ = numpy_module_name
-            setattr(numpy_module, function.__name__, function)
-            numpy_module.__all__.append(function.__name__)
 
 
 def _generate_op_module_signature(root_namespace, module_name, op_code_gen_func):
@@ -751,3 +734,119 @@ def write_all_str(module_file, module_all_list):
 
 ctypes.pythonapi.PyCapsule_New.restype = ctypes.py_object
 ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.c_void_p
+
+
+def set_np_comp(flag):
+    """
+    Turns on/off NumPy compatibility. NumPy-compatibility is turned off by default in backend.
+
+    Parameters
+    ----------
+    flag : bool
+        Indicates whether to turn on/off NumPy compatibility.
+
+    Returns
+    -------
+        A bool value indicating the previous state of NumPy compatibility.
+    """
+    prev = ctypes.c_int()
+    check_call(_LIB.MXSetIsNumpyCompatible(ctypes.c_int(flag), ctypes.byref(prev)))
+    return bool(prev.value)
+
+
+def is_np_comp():
+    """
+    Checks whether the NumPy compatibility is currently turned on.
+    NumPy-compatibility is turned off by default in backend.
+
+    Returns
+    -------
+        A bool value indicating whether the NumPy compatibility is currently on.
+    """
+    curr = ctypes.c_bool()
+    check_call(_LIB.MXIsNumpyCompatible(ctypes.byref(curr)))
+    return curr.value
+
+
+class _NumpyCompatibilityStateScope(object):
+    """Scope for managing numpy compatibility state.
+
+    Example::
+
+        with _NumpyCompatibilityStateScope(True):
+            y = model(x)
+            backward([y])
+
+    """
+    def __init__(self, is_np_comp): #pylint: disable=redefined-outer-name
+        self._enter_is_np_comp = is_np_comp
+        self._prev_is_np_comp = None
+
+    def __enter__(self):
+        if self._enter_is_np_comp is not None:
+            self._prev_is_np_comp = set_np_comp(self._enter_is_np_comp)
+
+    def __exit__(self, ptype, value, trace):
+        if self._enter_is_np_comp is not None and self._prev_is_np_comp != self._enter_is_np_comp:
+            set_np_comp(self._prev_is_np_comp)
+
+
+def enable_np_comp():
+    """Returns a NumPy compatibility state scope to be used in 'with' statement
+    and captures code that needs the compatibility.
+
+    Example::
+
+        with mx.enable_np_comp():
+            # A scalar tensor's shape is `()`, whose `ndim` is `0`.
+            scalar = mx.nd.ones(shape=())
+            assert scalar.shape == ()
+
+            # In NumPy compatible mode, 0 in a shape means that dimension contains zero elements.
+            data = mx.sym.var("data", shape=(0, 2, 3))
+            ret = mx.sym.sin(data)
+            arg_shapes, out_shapes, _ = ret.infer_shape()
+            assert arg_shapes[0] == (0, 2, 3)
+            assert out_shapes[0] == (0, 2, 3)
+
+            # -1 means unknown shape dimension size in the new NumPy-compatible shape definition
+            data = mx.sym.var("data", shape=(-1, 2, 3))
+            ret = mx.sym.sin(data)
+            arg_shapes, out_shapes, _ = ret.infer_shape_partial()
+            assert arg_shapes[0] == (-1, 2, 3)
+            assert out_shapes[0] == (-1, 2, 3)
+
+            # When a shape is completely unknown in NumPy-compatible mode, it is
+            # represented as `None` in Python.
+            data = mx.sym.var("data")
+            ret = mx.sym.sin(data)
+            arg_shapes, out_shapes, _ = ret.infer_shape_partial()
+            assert arg_shapes[0] is None
+            assert out_shapes[0] is None
+    """
+    return _NumpyCompatibilityStateScope(True)
+
+
+def disable_np_comp():
+    """Returns a state scope with NumPy-compatibility disabled to be used in 'with' statement
+    and captures code that does not need the compatibility.
+
+    Example::
+
+        with mx.disable_np_comp():
+            # 0 means unknown shape dimension size in the legacy shape definition.
+            data = mx.sym.var("data", shape=(0, 2, 3))
+            ret = mx.sym.sin(data)
+            arg_shapes, out_shapes, _ = ret.infer_shape_partial()
+            assert arg_shapes[0] == (0, 2, 3)
+            assert out_shapes[0] == (0, 2, 3)
+
+            # When a shape is completely unknown in the legacy mode (default), its ndim is
+            # equal to 0 and it is represented as `()` in Python.
+            data = mx.sym.var("data")
+            ret = mx.sym.sin(data)
+            arg_shapes, out_shapes, _ = ret.infer_shape_partial()
+            assert arg_shapes[0] == ()
+            assert out_shapes[0] == ()
+    """
+    return _NumpyCompatibilityStateScope(False)
diff --git a/python/mxnet/ndarray/__init__.py b/python/mxnet/ndarray/__init__.py
index a102399521cc..f09908e894d5 100644
--- a/python/mxnet/ndarray/__init__.py
+++ b/python/mxnet/ndarray/__init__.py
@@ -17,7 +17,7 @@
 
 """NDArray API of MXNet."""
 
-from . import _internal, contrib, linalg, op, random, sparse, utils, image, ndarray, numpy
+from . import _internal, contrib, linalg, op, random, sparse, utils, image, ndarray
 # pylint: disable=wildcard-import, redefined-builtin
 try:
     from .gen_op import * # pylint: disable=unused-wildcard-import
diff --git a/python/mxnet/ndarray/numpy.py b/python/mxnet/ndarray/numpy.py
deleted file mode 100644
index 0826ac8aca7f..000000000000
--- a/python/mxnet/ndarray/numpy.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-__all__ = []
diff --git a/python/mxnet/numpy/__init__.py b/python/mxnet/numpy/__init__.py
deleted file mode 100644
index e0dfda10113e..000000000000
--- a/python/mxnet/numpy/__init__.py
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/usr/bin/env python
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import ctypes
-from ..base import _LIB, check_call
-
-__all__ = []
-
-
-def set_np_comp(is_np_comp):
-    prev = ctypes.c_int()
-    check_call(_LIB.MXSetIsNumpyCompatible(ctypes.c_int(is_np_comp), ctypes.byref(prev)))
-    return bool(prev.value)
-
-
-def is_np_comp():
-    curr = ctypes.c_bool()
-    check_call(_LIB.MXIsNumpyCompatible(ctypes.byref(curr)))
-    return curr.value
-
-
-class _NumpyCompatibilityStateScope(object):
-    """Scope for managing numpy compatibility state.
-
-    Example::
-
-        with _NumpyCompatibilityStateScope(True):
-            y = model(x)
-            backward([y])
-
-    """
-    def __init__(self, is_np_comp): #pylint: disable=redefined-outer-name
-        self._enter_is_np_comp = is_np_comp
-        self._prev_is_np_comp = None
-
-    def __enter__(self):
-        if self._enter_is_np_comp is not None:
-            self._prev_is_np_comp = set_np_comp(self._enter_is_np_comp)
-
-    def __exit__(self, ptype, value, trace):
-        if self._enter_is_np_comp is not None and self._prev_is_np_comp != self._enter_is_np_comp:
-            set_np_comp(self._prev_is_np_comp)
-
-
-def enable_np_comp():
-    return _NumpyCompatibilityStateScope(True)
-
-
-def disable_np_comp():
-    return _NumpyCompatibilityStateScope(False)
diff --git a/python/mxnet/symbol/__init__.py b/python/mxnet/symbol/__init__.py
index 326e4f5aff78..f438e4954aa9 100644
--- a/python/mxnet/symbol/__init__.py
+++ b/python/mxnet/symbol/__init__.py
@@ -17,7 +17,7 @@
 
 """Symbol API of MXNet."""
 
-from . import _internal, contrib, linalg, op, random, sparse, image, symbol, numpy
+from . import _internal, contrib, linalg, op, random, sparse, image, symbol
 # pylint: disable=wildcard-import, redefined-builtin
 try:
     from .gen_op import * # pylint: disable=unused-wildcard-import
diff --git a/python/mxnet/symbol/numpy.py b/python/mxnet/symbol/numpy.py
deleted file mode 100644
index 0826ac8aca7f..000000000000
--- a/python/mxnet/symbol/numpy.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-__all__ = []
diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py
index b2b8261be51d..2de4954cae81 100644
--- a/python/mxnet/symbol/symbol.py
+++ b/python/mxnet/symbol/symbol.py
@@ -34,7 +34,7 @@
 
 from ..attribute import AttrScope
 from ..base import _LIB, numeric_types, c_array, c_array_buf, c_str, c_str_array, c_handle_array
-from ..base import mx_uint, py_str, string_types, integer_types, mx_int
+from ..base import mx_uint, py_str, string_types, integer_types, mx_int, is_np_comp
 from ..base import NDArrayHandle, ExecutorHandle, SymbolHandle
 from ..base import check_call, MXNetError, NotImplementedForSymbol
 from ..context import Context, current_context
@@ -42,7 +42,6 @@
 from ..ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID
 from ..ndarray import _ndarray_cls
 from ..executor import Executor
-from ..numpy import is_np_comp
 from . import _internal
 from . import op
 from ._internal import SymbolBase, _set_symbol_class
diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h
index 55608b950866..8be192b22c53 100644
--- a/src/c_api/c_api_common.h
+++ b/src/c_api/c_api_common.h
@@ -100,7 +100,9 @@ struct MXAPIThreadLocalEntry {
     for (size_t i = 0; i < shapes.size(); ++i) {
       ndim->at(i) = shapes[i].ndim();
       data->at(i) = ptr;
-      ptr = mxnet::ShapeTypeCast(shapes[i].begin(), shapes[i].end(), ptr);
+      if (shapes[i].ndim() > 0) {
+        ptr = mxnet::ShapeTypeCast(shapes[i].begin(), shapes[i].end(), ptr);
+      }
     }
   }
 };
diff --git a/src/common/utils.h b/src/common/utils.h
index 4fb398d883a6..6cdb869ff9ae 100644
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -776,7 +776,7 @@ inline void ConvertToNumpyShape(mxnet::ShapeVector* shapes) {
  */
 inline void ConvertToLegacyShape(mxnet::TShape* shape) {
   if (!mxnet::ndim_is_known(*shape)) {
-    *shape = mxnet::TShape(0);
+    *shape = mxnet::TShape(0, -1);
   } else {
     for (int j = 0; j < shape->ndim(); ++j) {
       if (!mxnet::dim_size_is_known(*shape, j)) {
diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h
index d058df4b3806..24e6c49d5edb 100644
--- a/src/imperative/imperative_utils.h
+++ b/src/imperative/imperative_utils.h
@@ -131,12 +131,16 @@ inline void SetShapeType(const Context& ctx,
       std::stringstream os;
       os << "Operator " << attrs.op->name << " inferring shapes failed.\n";
       os << "input shapes:\n";
-      for (auto& nd : inputs) {
-        os << nd->shape() << '\n';
+      for (const auto& s : in_shapes) {
+        os << s << '\n';
       }
       os << "output shapes:\n";
-      for (auto& nd : outputs) {
-        os << nd->shape() << '\n';
+      for (const auto& s : out_shapes) {
+        os << s << '\n';
+      }
+      os << "operator attributes:\n";
+      for (const auto& kv : attrs.dict) {
+        os << kv.first << " : " << kv.second << '\n';
       }
       LOG(FATAL) << os.str();
     }
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 604000028bf1..f5aac36a48eb 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -549,7 +549,7 @@ const mkldnn::memory *NDArray::GetMKLDNNDataReorder(
     // If they have different shapes, we need to reshape the array first.
     // Since this method will only be used inside an operator, we can call
     // MKLDNNDataReshape to reshape an array.
-    mxnet::TShape required_shape(desc2.data.ndims);
+    mxnet::TShape required_shape(desc2.data.ndims, -1);
     for (int i = 0; i < desc2.data.ndims; i++)
       required_shape[i] = desc2.data.dims[i];
     NDArray reshaped = MKLDNNDataReshape(required_shape);
@@ -575,7 +575,7 @@ NDArray NDArray::Reorder2Default() const {
 
   // create new ndarray from  mkldnn layout
   mkldnn::memory::desc from_desc = ptr_->mkl_mem_->GetPrimitiveDesc().desc();
-  mxnet::TShape tshape(from_desc.data.ndims);
+  mxnet::TShape tshape(from_desc.data.ndims, -1);
   for (int i = 0; i < from_desc.data.ndims; i++) tshape[i] = from_desc.data.dims[i];
   NDArray ret(tshape, ctx(), false, dtype());
   mkldnn::memory::primitive_desc def_pd = ptr_->mkl_mem_->GetPrimitiveDesc(format);
diff --git a/src/operator/channel_op_common.h b/src/operator/channel_op_common.h
index 1afc13ad2594..43f689d2defa 100644
--- a/src/operator/channel_op_common.h
+++ b/src/operator/channel_op_common.h
@@ -45,6 +45,8 @@ inline void concatenate_helper(const std::vector<mshadow::Tensor<xpu, dim, DType
     size_t size = input.size();
     index_t begin = 0;
     for (size_t i = 0; i < size; ++i) {
+      // If input[i] is a zero-size tensor, do nothing.
+      if (input[i].shape_.Size() == 0) continue;
       index_t end = begin + input[i].size(cdim);
       Assign(slice<cdim>(out, begin, end), req, input[i]);
       begin = end;
@@ -80,6 +82,8 @@ void split_helper(const mshadow::Tensor<xpu, dim, DType> &input,
     size_t size = out.size();
     index_t begin = 0;
     for (size_t i = 0; i < size; ++i) {
+      // If out[i] is a zero-size tensor, do nothing.
+      if (out[i].shape_.Size() == 0) continue;
       index_t end = begin + out[i].size(cdim);
       Assign(out[i], req[i], slice<cdim>(input, begin, end));
       begin = end;
diff --git a/src/operator/contrib/bounding_box-inl.h b/src/operator/contrib/bounding_box-inl.h
index 6ea4e8097b6c..686f1666a310 100644
--- a/src/operator/contrib/bounding_box-inl.h
+++ b/src/operator/contrib/bounding_box-inl.h
@@ -94,9 +94,8 @@ inline bool BoxNMSShape(const nnvm::NodeAttrs& attrs,
   const BoxNMSParam& param = nnvm::get<BoxNMSParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 2U);
-  // TODO(@junrushao1994): verify with Joshua Z. Zhang about this operator
   if (mxnet::op::shape_is_none(in_attrs->at(0))
-   && mxnet::op::shape_is_none(out_attrs->at(0))) {
+    && mxnet::op::shape_is_none(out_attrs->at(0))) {
     return false;
   }
 
diff --git a/src/operator/contrib/deformable_convolution-inl.h b/src/operator/contrib/deformable_convolution-inl.h
index a7e22f548151..000d703066d7 100644
--- a/src/operator/contrib/deformable_convolution-inl.h
+++ b/src/operator/contrib/deformable_convolution-inl.h
@@ -69,11 +69,11 @@ struct DeformableConvolutionParam : public dmlc::Parameter<DeformableConvolution
   dmlc::optional<int> layout;
   DMLC_DECLARE_PARAMETER(DeformableConvolutionParam) {
     DMLC_DECLARE_FIELD(kernel).describe("Convolution kernel size: (h, w) or (d, h, w)");
-    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0))
+    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, -1))
       .describe("Convolution stride: (h, w) or (d, h, w). Defaults to 1 for each dimension.");
-    DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0))
+    DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0, -1))
       .describe("Convolution dilate: (h, w) or (d, h, w). Defaults to 1 for each dimension.");
-    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0))
+    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, -1))
       .describe("Zero pad for convolution: (h, w) or (d, h, w). Defaults to no padding.");
     DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000)
       .describe("Convolution filter(channel) number");
diff --git a/src/operator/contrib/index_copy-inl.h b/src/operator/contrib/index_copy-inl.h
index 35f88916da20..9f78f0593ed1 100644
--- a/src/operator/contrib/index_copy-inl.h
+++ b/src/operator/contrib/index_copy-inl.h
@@ -64,7 +64,7 @@ inline bool IndexCopyShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->at(1).ndim(), 1);
   // Shape matching
   CHECK_EQ(in_attrs->at(0).ndim(), in_attrs->at(2).ndim());
-  for (size_t i = 0; i < in_attrs->at(0).ndim(); ++i) {
+  for (int i = 0; i < in_attrs->at(0).ndim(); ++i) {
     if (i == 0) {
       CHECK_GE(in_attrs->at(0)[i], in_attrs->at(2)[i]);
     } else {
diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h
index a937f839c9bb..e331255c2e50 100644
--- a/src/operator/mxnet_op.h
+++ b/src/operator/mxnet_op.h
@@ -781,6 +781,7 @@ struct Kernel<OP, gpu> {
   /*! \brief Launch GPU kernel */
   template<typename ...Args>
   inline static void Launch(mshadow::Stream<gpu> *s, int N, Args... args) {
+    if (0 == N) return;
     using namespace mshadow::cuda;
     int ngrid = std::min(kMaxGridNum, (N + kBaseThreadNum - 1) / kBaseThreadNum);
     mxnet_generic_kernel<OP, Args...>
@@ -791,6 +792,7 @@ struct Kernel<OP, gpu> {
 
   template<typename ...Args>
   inline static void LaunchEx(mshadow::Stream<gpu> *s, const int N, Args... args) {
+    if (0 == N) return;
     using namespace mshadow::cuda;
     int ngrid = std::min(kMaxGridNum, (N + kBaseThreadNum - 1) / kBaseThreadNum);
     mxnet_generic_kernel_ex<OP, Args...>
diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc
index b534ee58e85c..8fb229889332 100644
--- a/src/operator/nn/concat.cc
+++ b/src/operator/nn/concat.cc
@@ -234,9 +234,10 @@ bool SupportMKLDNNConcat(const std::vector<NDArray> &arrs) {
   for (auto &arr : arrs) {
     if (arr.IsView()) return false;
     if (arr.dtype() != mshadow::kFloat32) return false;
+    // DO not support zero-size tensors.
+    if (arr.shape().Size() == 0) return false;
     int ndim = arr.shape().ndim();
-    unsigned mkldnn_ndims =
-        static_cast<unsigned>(arr.GetMKLDNNData()->get_primitive_desc().desc().data.ndims);
+    const int mkldnn_ndims = arr.GetMKLDNNData()->get_primitive_desc().desc().data.ndims;
     if (!(ndim == 2 || ndim == 4) || ndim != mkldnn_ndims) return false;
   }
   return true;
diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h
index 44d1c3c36e99..679e0cd1057b 100644
--- a/src/operator/nn/cudnn/cudnn_convolution-inl.h
+++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h
@@ -1016,7 +1016,7 @@ class CuDNNConvolutionOp {
   template <int dim>
   inline Shape<dim> Strides(const mxnet::TShape &s) {
     int ndim = s.ndim();
-    mxnet::TShape strides(ndim);
+    mxnet::TShape strides(ndim, -1);
     for (int i = 0; i != ndim; ++i)
       strides[i] = s.ProdShape(i+1, ndim);
     return strides.get<dim>();
diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h
index f652dd85bd41..adb6caf1c028 100644
--- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h
+++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h
@@ -934,7 +934,7 @@ class CuDNNDeconvolutionOp {
   template <int dim>
   inline Shape<dim> Strides(const mxnet::TShape &s) {
     int ndim = s.ndim();
-    mxnet::TShape strides(ndim);
+    mxnet::TShape strides(ndim, -1);
     for (int i = 0; i != ndim; ++i)
       strides[i] = s.ProdShape(i+1, ndim);
     return strides.get<dim>();
diff --git a/src/operator/nn/deconvolution-inl.h b/src/operator/nn/deconvolution-inl.h
index e82a073ea08d..1eeccb02e030 100644
--- a/src/operator/nn/deconvolution-inl.h
+++ b/src/operator/nn/deconvolution-inl.h
@@ -143,7 +143,7 @@ struct DeconvolutionParam : public dmlc::Parameter<DeconvolutionParam> {
         }
       }
     } else {
-      for (int i = 0; i < (int) ndim; i++) {
+      for (int i = 0; i < static_cast<int>(ndim); i++) {
         o_pad[i] = i < pad.ndim() ? pad[i] : 0;
         o_adj[i] = i < adj.ndim() ? adj[i] : 0;
       }
diff --git a/src/operator/nn/mkldnn/mkldnn_slice.cc b/src/operator/nn/mkldnn/mkldnn_slice.cc
index 96a8afdab6e2..2a817a25a5b8 100644
--- a/src/operator/nn/mkldnn/mkldnn_slice.cc
+++ b/src/operator/nn/mkldnn/mkldnn_slice.cc
@@ -37,10 +37,10 @@ MKLDNNSliceFwd::MKLDNNSliceFwd(const SliceParam &param,
                                const NDArray &out) {
   const mxnet::TShape ishape = in.shape();
   const mxnet::TShape oshape = out.shape();
-  uint32_t N = ishape.ndim();
+  const int N = ishape.ndim();
   mkldnn::memory::dims dims(N);
   mkldnn::memory::dims offsets(N);
-  for (uint32_t i = 0; i < N; ++i) {
+  for (int i = 0; i < N; ++i) {
     int s = 0;
     if (i < param.begin.ndim() &&  param.begin[i]) {
       s = *param.begin[i];
diff --git a/src/operator/numpy/np_broadcast_reduce_op.h b/src/operator/numpy/np_broadcast_reduce_op.h
deleted file mode 100644
index e0379a040c3f..000000000000
--- a/src/operator/numpy/np_broadcast_reduce_op.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- *  Copyright (c) 2015 by Contributors
- * \file broadcast_reduce_op.h
- * \brief Function definition of broadcast and reduce operators
- */
-#ifndef MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_
-#define MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_
-
-#include "../tensor/broadcast_reduce_op.h"
-
-namespace mxnet {
-namespace op {
-
-struct NumpyReduceAxesParam : public dmlc::Parameter<NumpyReduceAxesParam> {
-  dmlc::optional<nnvm::Tuple<int>> axis;
-  dmlc::optional<int> dtype;
-  bool keepdims;
-  dmlc::optional<double> initial;
-  DMLC_DECLARE_PARAMETER(NumpyReduceAxesParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(dmlc::optional<nnvm::Tuple<int>>())
-      .describe(R"code()code");
-    DMLC_DECLARE_FIELD(dtype).set_default(dmlc::optional<int>())
-      .describe("");
-    DMLC_DECLARE_FIELD(keepdims).set_default(false)
-      .describe("If this is set to `True`, the reduced axes are left "
-                "in the result as dimension with size one.");
-  }
-};
-
-inline TShape NumpyReduceAxesShapeImpl(const TShape& ishape,
-                                       const dmlc::optional<nnvm::Tuple<int>>& axis,
-                                       bool keepdims) {
-  // TODO(junwu): improve the logic
-  // If input is a scalar, output should be a scalar too
-  if (ishape.ndim() == 0) {
-    if (axis.has_value()) {
-      const nnvm::Tuple<int>& axes = axis.value();
-      if (axes.ndim() > 0) {
-        CHECK_EQ(axes.ndim(), 1);
-        CHECK(axes[0] == 0 || axes[0] == -1);
-      }
-    }
-    return TShape(0, -1);
-  }
-
-  // axis=None, do global reduction
-  if (!axis.has_value()) {
-    if (keepdims) {
-      return TShape(ishape.ndim(), 1);
-    } else {
-      return TShape(0, -1);
-    }
-  }
-
-  // axis = (), will return identity(input)
-  if (axis.value().ndim() == 0) {
-    return ishape;
-  }
-
-  // axis has value
-  nnvm::Tuple<int> axes(axis.value());
-  for (index_t i = 0; i < axes.ndim(); i++) {
-    if (axes[i] < 0) {
-      axes[i] += ishape.ndim();
-    }
-  }
-  std::sort(axes.begin(), axes.end());
-
-  for (index_t i = 1; i < axes.ndim(); i++) {
-    CHECK_LT(axes[i-1], axes[i])
-        << "Reduction axes have duplicates "
-        << axes;
-  }
-  CHECK_LT(axes[axes.ndim()-1], ishape.ndim())
-      << "Reduction axis " << axes[axes.ndim()-1]
-      << " Exceeds input dimensions " << ishape;
-  CHECK_GE(axes[0], 0)
-      << "Reduction axis " << axis.value()
-      << " Exceeds input dimensions " << ishape;
-
-  TShape oshape;
-  if (keepdims) {
-    oshape = TShape(ishape);
-  } else {
-    oshape = TShape(ishape.ndim() - axes.ndim(), -1);
-  }
-
-  if (keepdims) {
-    for (index_t i = 0; i < axes.ndim(); ++i) {
-      oshape[axes[i]] = 1;
-    }
-  } else {
-    for (index_t i = 0, j = 0, k = 0; i < ishape.ndim(); ++i) {
-      if (j < axes.ndim() && i == axes[j]) {
-        ++j;
-        continue;
-      }
-      oshape[k++] = ishape[i];
-    }
-  }
-  return oshape;
-}
-
-inline bool NumpyReduceAxesShape(const nnvm::NodeAttrs& attrs,
-                                 std::vector<TShape> *in_attrs,
-                                 std::vector<TShape> *out_attrs) {
-  CHECK_EQ(in_attrs->size(), 1U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  if (!shape_is_known(in_attrs->at(0))) {
-    return false;
-  }
-  const NumpyReduceAxesParam& param = nnvm::get<NumpyReduceAxesParam>(attrs.parsed);
-  SHAPE_ASSIGN_CHECK(*out_attrs, 0,
-                     NumpyReduceAxesShapeImpl((*in_attrs)[0], param.axis, param.keepdims));
-  return shape_is_known(out_attrs->at(0));
-}
-
-template<typename xpu, typename reducer, bool normalize = false,
-         typename OP = op::mshadow_op::identity>
-void NumpyReduceAxesCompute(const nnvm::NodeAttrs& attrs,
-                            const OpContext& ctx,
-                            const std::vector<TBlob>& inputs,
-                            const std::vector<OpReqType>& req,
-                            const std::vector<TBlob>& outputs) {
-  const NumpyReduceAxesParam& param = nnvm::get<NumpyReduceAxesParam>(attrs.parsed);
-  if (param.axis.has_value() && param.axis.value().ndim() == 0) {
-    UnaryOp::IdentityCompute<xpu>(attrs, ctx, inputs, req, outputs);
-  }
-  TShape small;
-  if (param.keepdims) {
-    small = outputs[0].shape_;
-  } else {
-    small = NumpyReduceAxesShapeImpl(inputs[0].shape_, param.axis, true);
-  }
-
-  ReduceAxesComputeImpl<xpu, reducer, normalize, OP>(ctx, inputs, req, outputs, small);
-}
-
-template<typename xpu, bool normalize = false>
-inline void NumpyReduceAxesBackwardUseNone(const nnvm::NodeAttrs& attrs,
-                                           const OpContext& ctx,
-                                           const std::vector<TBlob>& inputs,
-                                           const std::vector<OpReqType>& req,
-                                           const std::vector<TBlob>& outputs) {
-  using namespace mshadow;
-  using namespace mshadow::expr;
-  const NumpyReduceAxesParam& param = nnvm::get<NumpyReduceAxesParam>(attrs.parsed);
-  TShape small;
-  if (param.keepdims) {
-    small = inputs[0].shape_;
-  } else {
-    small = NumpyReduceAxesShapeImpl(outputs[0].shape_, param.axis, true);
-  }
-
-  BroadcastComputeImpl<xpu>(attrs, ctx, inputs, req, outputs, small);
-  if (normalize) {
-    Stream<xpu> *s = ctx.get_stream<xpu>();
-    MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
-      Tensor<xpu, 1, DType> igrad = outputs[0].FlatTo1D<xpu, DType>(s);
-      igrad /= scalar<DType>(outputs[0].Size()/inputs[0].Size());
-    });
-  }
-}
-
-}  // namespace op
-}  // namespace mxnet
-#endif  // MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_
diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cc b/src/operator/numpy/np_broadcast_reduce_op_value.cc
deleted file mode 100644
index c028e2368737..000000000000
--- a/src/operator/numpy/np_broadcast_reduce_op_value.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- *  Copyright (c) 2019 by Contributors
- * \file np_reduce_op_value.cc
- * \brief CPU Implementation of broadcast and reduce functions based on value.
- */
-
-#include "np_broadcast_reduce_op.h"
-
-namespace mxnet {
-namespace op {
-
-DMLC_REGISTER_PARAMETER(NumpyReduceAxesParam);
-
-NNVM_REGISTER_OP(_numpy_sum)
-.describe(R"code()code" ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr_parser(ParamParser<NumpyReduceAxesParam>)
-.set_attr<mxnet::FInferShape>("FInferShape", NumpyReduceAxesShape)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<nnvm::FListInputNames>("FListInputNames",
-  [](const NodeAttrs& attrs) {
-    return std::vector<std::string>{"a"};
-  })
-.add_argument("a", "NDArray-or-Symbol", "The input")
-.add_arguments(NumpyReduceAxesParam::__FIELDS__())
-.set_attr<FCompute>("FCompute<cpu>", NumpyReduceAxesCompute<cpu, mshadow::red::sum>)
-.set_attr<FResourceRequest>("FResourceRequest",
-  [](const NodeAttrs& attrs) {
-    return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
-  })
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_numpy_sum"});
-
-NNVM_REGISTER_OP(_backward_numpy_sum)
-.set_num_outputs(1)
-.set_attr_parser(ParamParser<NumpyReduceAxesParam>)
-.set_attr<nnvm::TIsBackward>("TIsBackward", true)
-.set_num_inputs(1)
-.set_attr<FCompute>("FCompute<cpu>", NumpyReduceAxesBackwardUseNone<cpu>);
-
-}  // namespace op
-}  // namespace mxnet
diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cu b/src/operator/numpy/np_broadcast_reduce_op_value.cu
deleted file mode 100644
index c975b18226db..000000000000
--- a/src/operator/numpy/np_broadcast_reduce_op_value.cu
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- *  Copyright (c) 2019 by Contributors
- * \file np_reduce_op_value.cu
- * \brief GPU Implementation of reduce functions based on value.
- */
-#include "np_broadcast_reduce_op.h"
-
-namespace mxnet {
-namespace op {
-NNVM_REGISTER_OP(_numpy_sum)
-.set_attr<FCompute>("FCompute<gpu>", NumpyReduceAxesCompute<gpu, mshadow::red::sum>);
-
-NNVM_REGISTER_OP(_backward_numpy_sum)
-.set_attr<FCompute>("FCompute<gpu>", NumpyReduceAxesBackwardUseNone<gpu>);
-
-}  // namespace op
-}  // namespace mxnet
diff --git a/src/operator/pad-inl.h b/src/operator/pad-inl.h
index 140d7099e817..89b0ab7780b6 100644
--- a/src/operator/pad-inl.h
+++ b/src/operator/pad-inl.h
@@ -230,7 +230,7 @@ class PadProp : public OperatorProperty {
       }
     }
     mxnet::TShape oshape = dshape;
-    for (size_t i = 0; i < dshape.ndim(); ++i) {
+    for (int i = 0; i < dshape.ndim(); ++i) {
       oshape[i] =
           param_.pad_width[2 * i] + param_.pad_width[2 * i + 1] + dshape[i];
     }
diff --git a/src/operator/pooling_v1-inl.h b/src/operator/pooling_v1-inl.h
index 22a166cbb6cc..4241b08a0c5e 100644
--- a/src/operator/pooling_v1-inl.h
+++ b/src/operator/pooling_v1-inl.h
@@ -55,7 +55,7 @@ struct PoolingV1Param : public dmlc::Parameter<PoolingV1Param> {
   int pooling_convention;
   bool global_pool;
   DMLC_DECLARE_PARAMETER(PoolingV1Param) {
-    DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape(0))
+    DMLC_DECLARE_FIELD(kernel).set_default(mxnet::TShape(0, -1))
     .enforce_nonzero()
     .describe("pooling kernel size: (y, x) or (d, y, x)");
 
@@ -73,11 +73,11 @@ struct PoolingV1Param : public dmlc::Parameter<PoolingV1Param> {
     .add_enum("valid", pool_v1_enum::kValid)
     .describe("Pooling convention to be applied.");
 
-    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0))
+    DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, -1))
     .enforce_nonzero()
     .describe("stride: for pooling (y, x) or (d, y, x)");
 
-    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0))
+    DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, -1))
     .describe("pad for pooling: (y, x) or (d, y, x)");
   }
 };
diff --git a/src/operator/quantization/dequantize-inl.h b/src/operator/quantization/dequantize-inl.h
index 88199bc2591d..7c91ad507fd9 100644
--- a/src/operator/quantization/dequantize-inl.h
+++ b/src/operator/quantization/dequantize-inl.h
@@ -99,7 +99,7 @@ inline bool DequantizeShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 1U);
 
   for (size_t i = 1; i < 3; ++i) {
-    SHAPE_ASSIGN_CHECK(*in_attrs, i, mxnet::TShape({1}));
+    SHAPE_ASSIGN_CHECK(*in_attrs, i, mxnet::TShape(1, 1));
   }
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
diff --git a/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h b/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h
index 45713589dd48..ac414c72d51a 100644
--- a/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h
+++ b/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h
@@ -115,7 +115,7 @@ static void MKLDNNRequantizeForward(const nnvm::NodeAttrs& attrs,
     const size_t actual_float_size = sizeof(float);
     const size_t actual_quantized_size = sizeof(SrcDType);
     const size_t temp_reduce_size = ConfigReduce<cpu, SrcDType>(s,
-                         inputs[0].shape(), mxnet::TShape({1}), &src_shape, &dst_shape);
+                         inputs[0].shape(), mxnet::TShape(1, 1), &src_shape, &dst_shape);
     Tensor<cpu, 1, char> temp_space =
       ctx.requested[0].get_space_typed<cpu, 1, char>(
       Shape1(2*actual_float_size+2*actual_quantized_size+temp_reduce_size), s);
diff --git a/src/operator/quantization/quantize-inl.h b/src/operator/quantization/quantize-inl.h
index 2c267a76a571..7b856579a7b5 100644
--- a/src/operator/quantization/quantize-inl.h
+++ b/src/operator/quantization/quantize-inl.h
@@ -120,7 +120,7 @@ inline bool QuantizeShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 3U);
 
   for (size_t i = 1; i < 3; ++i) {
-    SHAPE_ASSIGN_CHECK(*in_attrs, i, mxnet::TShape({1}));
+    SHAPE_ASSIGN_CHECK(*in_attrs, i, mxnet::TShape(1, 1));
   }
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
diff --git a/src/operator/quantization/quantize_v2-inl.h b/src/operator/quantization/quantize_v2-inl.h
index 02ace6c39fac..9ebb645e1ba6 100644
--- a/src/operator/quantization/quantize_v2-inl.h
+++ b/src/operator/quantization/quantize_v2-inl.h
@@ -175,7 +175,7 @@ void QuantizeV2Compute(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
       mxnet::TShape src_shape, dst_shape;
       const size_t actual_float_size = sizeof(float);
       const size_t temp_reduce_size = ConfigReduce<xpu, SrcDType>(
-          s, inputs[0].shape_, mxnet::TShape({1}), &src_shape, &dst_shape);
+          s, inputs[0].shape_, mxnet::TShape(1, 1), &src_shape, &dst_shape);
       Tensor<xpu, 1, char> temp_space = ctx.requested[0].get_space_typed<xpu, 1, char>(
           Shape1(2 * actual_float_size + temp_reduce_size), s);
       const int dev_id = ctx.run_ctx.ctx.dev_id;
diff --git a/src/operator/quantization/quantized_concat.cc b/src/operator/quantization/quantized_concat.cc
index 2cc2ec9d0374..d6aeb41da1f8 100644
--- a/src/operator/quantization/quantized_concat.cc
+++ b/src/operator/quantization/quantized_concat.cc
@@ -35,23 +35,23 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_sha
   CHECK_EQ(out_shape->size(), 3U);
   mxnet::TShape dshape;
   index_t size = 0;
-  bool has_zero = false;
+  bool has_unknown_dim_size = false;
   int axis = -1;
   for (int i = 0; i < param_.num_args; ++i) {
     mxnet::TShape tmp = (*in_shape)[i];
-    if (tmp.ndim()) {
+    if (tmp.ndim() > 0) {
       axis = CheckAxis(param_.dim, tmp.ndim());
-      has_zero = tmp[axis] == 0 || has_zero;
+      has_unknown_dim_size = !mxnet::dim_size_is_known(tmp, axis) || has_unknown_dim_size;
       size += tmp[axis];
-      tmp[axis] = 0;
+      tmp[axis] = -1;
       shape_assign(&dshape, tmp);
     }
   }
 
   mxnet::TShape tmp = (*out_shape)[0];
-  if (tmp.ndim()) {
+  if (tmp.ndim() > 0) {
     axis = CheckAxis(param_.dim, tmp.ndim());
-    tmp[axis] = 0;
+    tmp[axis] = -1;
     shape_assign(&dshape, tmp);
   }
 
@@ -62,7 +62,7 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_sha
         << "Incompatible input shape: expected " << dshape << ", got " << (*in_shape)[i];
   }
 
-  if (!has_zero) dshape[axis] = size;
+  if (!has_unknown_dim_size) dshape[axis] = size;
   CHECK(shape_assign(&(*out_shape)[0], dshape))
       << "Incompatible output shape: expected " << dshape << ", got " << (*out_shape)[0];
 
@@ -71,7 +71,7 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_sha
   }
   SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape{1});
   SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape{1});
-  return dshape.Size() != 0;
+  return shape_is_known(dshape);
 }
 
 static bool ConcatType(const nnvm::NodeAttrs& attrs, std::vector<int>* in_type,
diff --git a/src/operator/quantization/quantized_conv.cc b/src/operator/quantization/quantized_conv.cc
index 1a801ee50744..aa3f5ce1ad61 100644
--- a/src/operator/quantization/quantized_conv.cc
+++ b/src/operator/quantization/quantized_conv.cc
@@ -78,8 +78,8 @@ bool QuantizedConvShape(const nnvm::NodeAttrs& attrs,
   oshape[W] = (AddPad(dshape[W], param.pad[1]) - wshape[W]) / param.stride[1] + 1;
 
   SHAPE_ASSIGN_CHECK(*out_shape, 0, oshape);
-  SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape({1}));
-  SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape({1}));
+  SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape(1, 1));
+  SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape(1, 1));
   return true;
 }
 
diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc
index cc4365f818d2..e42ea3020352 100644
--- a/src/operator/quantization/quantized_fully_connected.cc
+++ b/src/operator/quantization/quantized_fully_connected.cc
@@ -75,8 +75,8 @@ bool QuantizedFullyConnectedShape(const nnvm::NodeAttrs& attrs,
   } else {
     SHAPE_ASSIGN_CHECK(*out_shape, 0, Shape2(dshape[0], param.num_hidden));
   }
-  SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape({1}));
-  SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape({1}));
+  SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape(1, 1));
+  SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape(1, 1));
   return true;
 }
 
diff --git a/src/operator/quantization/requantize-inl.h b/src/operator/quantization/requantize-inl.h
index 21d58d4607eb..9106c7fe4716 100644
--- a/src/operator/quantization/requantize-inl.h
+++ b/src/operator/quantization/requantize-inl.h
@@ -111,7 +111,7 @@ void RequantizeForward(const nnvm::NodeAttrs& attrs,
     const size_t actual_float_size = sizeof(float);
     const size_t actual_quantized_size = sizeof(SrcDType);
     const size_t temp_reduce_size = ConfigReduce<xpu, SrcDType>(
-        s, inputs[0].shape_, mxnet::TShape({1}), &src_shape, &dst_shape);
+        s, inputs[0].shape_, mxnet::TShape(1, 1), &src_shape, &dst_shape);
     Tensor<xpu, 1, char> temp_space =
       ctx.requested[0].get_space_typed<xpu, 1, char>(
           Shape1(2*actual_float_size+2*actual_quantized_size+temp_reduce_size), s);
diff --git a/src/operator/swapaxis-inl.h b/src/operator/swapaxis-inl.h
index 41cb940d957a..7335daa48392 100644
--- a/src/operator/swapaxis-inl.h
+++ b/src/operator/swapaxis-inl.h
@@ -69,7 +69,7 @@ class SwapAxisOp : public Operator {
 
   void Reshape2Five(mshadow::Shape<5> *inter_shape,
                     const mxnet::TShape &shape,
-                    uint32_t dim1, uint32_t dim2) {
+                    int dim1, int dim2) {
     using namespace mshadow;
     using namespace mshadow::expr;
     int ndim_in = shape.ndim();
diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h
index ac714c2cfcdd..9392d4129406 100644
--- a/src/operator/tensor/broadcast_reduce_op.h
+++ b/src/operator/tensor/broadcast_reduce_op.h
@@ -139,9 +139,9 @@ struct BroadcastAxesParam : public dmlc::Parameter<BroadcastAxesParam> {
   mxnet::TShape axis;
   mxnet::TShape size;
   DMLC_DECLARE_PARAMETER(BroadcastAxesParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(mxnet::TShape(0))
+    DMLC_DECLARE_FIELD(axis).set_default(mxnet::TShape(0, -1))
       .describe("The axes to perform the broadcasting.");
-    DMLC_DECLARE_FIELD(size).set_default(mxnet::TShape(0))
+    DMLC_DECLARE_FIELD(size).set_default(mxnet::TShape(0, -1))
       .describe("Target sizes of the broadcasting axes.");
   }
 };
@@ -149,7 +149,7 @@ struct BroadcastAxesParam : public dmlc::Parameter<BroadcastAxesParam> {
 struct BroadcastToParam : public dmlc::Parameter<BroadcastToParam> {
   mxnet::TShape shape;
   DMLC_DECLARE_PARAMETER(BroadcastToParam) {
-    DMLC_DECLARE_FIELD(shape).set_default(mxnet::TShape(0))
+    DMLC_DECLARE_FIELD(shape).set_default(mxnet::TShape(0, -1))
       .describe("The shape of the desired array."
                 " We can set the dim to zero if it's same as the original."
                 " E.g `A = broadcast_to(B, shape=(10, 0, 0))` "
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op.h b/src/operator/tensor/elemwise_binary_broadcast_op.h
index 64a4d7cc15ff..73019fa8389b 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op.h
+++ b/src/operator/tensor/elemwise_binary_broadcast_op.h
@@ -61,16 +61,13 @@ inline bool BinaryBroadcastShape(const nnvm::NodeAttrs& attrs,
     int l = 1, r = 1;
     if (i >= bl) l = lhs[i-bl];
     if (i >= br) r = rhs[i-br];
+    if (!mxnet::dim_size_is_known(l) || !mxnet::dim_size_is_known(r)) continue;
     if (l != r) {
-      if (l == 0 || r == 0) {
-        // TODO(junwu): here is not compatible with NumPy.
-        // For example, (2, 3) cannot broadcast to (2, 0, 3).
-        out[i] = 0;
-      } else {
-        CHECK(l == 1 || r == 1)
-          << "operands could not be broadcast together with shapes " << lhs << " " << rhs;
-        out[i] = std::max(l, r);
-      }
+      // Make it compatible with NumPy.
+      // For example, (2, 3) cannot broadcast to (2, 0, 3), but (1, 3) can broadcast to (2, 0, 3).
+      CHECK(l == 1 || r == 1)
+        << "operands could not be broadcast together with shapes " << lhs << " " << rhs;
+      out[i] = (l == 1 ? r : l);
     } else {
       out[i] = l;
     }
diff --git a/src/operator/tensor/histogram-inl.h b/src/operator/tensor/histogram-inl.h
index 9cf9c490bba2..7194445d7b52 100644
--- a/src/operator/tensor/histogram-inl.h
+++ b/src/operator/tensor/histogram-inl.h
@@ -86,9 +86,9 @@ inline bool HistogramOpShape(const nnvm::NodeAttrs& attrs,
   if (has_cnt) {
     // if cnt is specified, the output histogram has shape (cnt,)
     // while output bins has shape (cnt+1,)
-    const int bin_cnt = param.bin_cnt.value();
-    SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape({bin_cnt}));
-    SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape({bin_cnt + 1}));
+    const dim_t bin_cnt = param.bin_cnt.value();
+    SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, bin_cnt));
+    SHAPE_ASSIGN_CHECK(*out_attrs, 1, mxnet::TShape(1, bin_cnt + 1));
   } else {
     // if cnt is not specified, the output histogram has shape (bins.Size() - 1)
     // while output bins has same shape as input bins
@@ -97,7 +97,7 @@ inline bool HistogramOpShape(const nnvm::NodeAttrs& attrs,
     CHECK_EQ(oshape.ndim(), 1U) << "bins argument should be an 1D vector";
     CHECK_GE(oshape.Size(), 2U) << "number of bounds should be >= 2";
 
-    SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape({(oshape[0] - 1)}));
+    SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(1, oshape[0] - 1));
     SHAPE_ASSIGN_CHECK(*out_attrs, 1, in_attrs->at(1));
   }
 
diff --git a/src/operator/tensor/init_op.h b/src/operator/tensor/init_op.h
index bcad602c95c0..b2e3830064ae 100644
--- a/src/operator/tensor/init_op.h
+++ b/src/operator/tensor/init_op.h
@@ -278,6 +278,8 @@ inline bool InitStorageType(const nnvm::NodeAttrs& attrs,
  */
 template <bool is_integer = false, typename ValueType, typename xpu>
 void Fill(mshadow::Stream<xpu> *s, const TBlob& b, const OpReqType req, ValueType val) {
+  // If b is a zero-size tensor, do nothing.
+  if (b.Size() == 0) return;
   if (req != kNullOp) {
     const size_t size = b.Size();
     if (val == 0) {
diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index 87e911a56800..0e7f66240926 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -59,7 +59,7 @@ struct ReshapeParam : public dmlc::Parameter<ReshapeParam> {
     .set_default(false)
     .describe("If true then the special values are inferred from right to left");
     DMLC_DECLARE_FIELD(target_shape)
-    .set_default(mxnet::TShape(0))
+    .set_default(mxnet::TShape(0, -1))
     .describe("(Deprecated! Use ``shape`` instead.) "
               "Target new shape. One and only one dim can be 0, "
               "in which case it will be inferred from the rest of dims");
@@ -209,7 +209,8 @@ inline bool ReshapeShape(const nnvm::NodeAttrs& attrs,
       oshape[inf_idx] = dshape.Size() / oshape.Size();
     }
   } else {
-    return shape_is_known((*out_attrs)[0]) && ReverseReshapeInferShape(&(*in_attrs)[0], (*out_attrs)[0]);
+    return shape_is_known((*out_attrs)[0])
+           && ReverseReshapeInferShape(&(*in_attrs)[0], (*out_attrs)[0]);
   }
   ReverseReshapeInferShape(&dshape, oshape);
 #if 0
@@ -240,7 +241,7 @@ inline bool FlattenShape(const nnvm::NodeAttrs& attrs,
 struct TransposeParam : public dmlc::Parameter<TransposeParam> {
   mxnet::TShape axes;
   DMLC_DECLARE_PARAMETER(TransposeParam) {
-    DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape(0))
+    DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape(0, -1))
     .describe("Target axis order. By default the axes will be inverted.");
   }
 
@@ -1158,9 +1159,14 @@ inline bool SliceAxisShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
   mxnet::TShape& ishape = (*in_attrs)[0];
+  if (!mxnet::ndim_is_known(ishape)) return false;
   int axis;
   index_t begin, end;
   GetSliceAxisParams(param, ishape, &axis, &begin, &end);
+  if (!mxnet::dim_size_is_known(ishape, axis)) {
+    SHAPE_ASSIGN_CHECK(*out_attrs, 0, ishape);
+    return false;
+  }
   mxnet::TShape shape(ishape.ndim(), -1);
   for (int i = 0; i < ishape.ndim(); ++i) {
     if (static_cast<int>(i) == axis) {
diff --git a/tests/cpp/include/test_mkldnn.h b/tests/cpp/include/test_mkldnn.h
index a379dab7bf90..f1682772a14a 100644
--- a/tests/cpp/include/test_mkldnn.h
+++ b/tests/cpp/include/test_mkldnn.h
@@ -49,7 +49,7 @@ inline static mkldnn::memory::primitive_desc GetMemPD(const mxnet::TShape s, int
 inline static mkldnn::memory::primitive_desc GetExpandedMemPD(
     mkldnn::memory::primitive_desc pd, float scale, int dim = 0) {
   CHECK(dim < pd.desc().data.ndims) << "dimension cannot be larger than total dimensions of input";
-  mxnet::TShape s(pd.desc().data.ndims);
+  mxnet::TShape s(pd.desc().data.ndims, -1);
   for (size_t i = 0; i < pd.desc().data.ndims; i++)
     s[i] = pd.desc().data.dims[i];
   s[dim] = static_cast<int>(s[dim] * scale);
@@ -165,7 +165,7 @@ inline static TestArrayShapes GetTestArrayShapes(bool spatial_data_format = fals
   std::vector<mkldnn::memory::primitive_desc> pds;
   {
     // 1D
-    mxnet::TShape s(1);
+    mxnet::TShape s(1, -1);
     s[0] = 279936;
     shapes.push_back(s);
     pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::x));
@@ -175,7 +175,7 @@ inline static TestArrayShapes GetTestArrayShapes(bool spatial_data_format = fals
   }
   {
     // 2D
-    mxnet::TShape s(2);
+    mxnet::TShape s(2, -1);
     s[0] = 96;
     s[1] = 2916;
     shapes.push_back(s);
@@ -187,12 +187,12 @@ inline static TestArrayShapes GetTestArrayShapes(bool spatial_data_format = fals
   }
   {
     // 4D
-    mxnet::TShape s1(4);
+    mxnet::TShape s1(4, -1);
     s1[0] = 10; s1[1] = 96; s1[2] = 54; s1[3] = 54;
     shapes.push_back(s1);
     pds.push_back(GetMemPD(s1, dtype, mkldnn::memory::format::nchw));
 
-    mxnet::TShape s2(4);
+    mxnet::TShape s2(4, -1);
     s2[0] = 96; s2[1] = 3; s2[2] = 11; s2[3] = 11;
     shapes.push_back(s2);
     pds.push_back(GetMemPD(s2, dtype, mkldnn::memory::format::oihw));
@@ -204,7 +204,7 @@ inline static TestArrayShapes GetTestArrayShapes(bool spatial_data_format = fals
   }
   {
     // 5D
-    mxnet::TShape s(5);
+    mxnet::TShape s(5, -1);
     s[0] = 96; s[1] = 1; s[2] = 3; s[3] = 11; s[4] = 11;
     shapes.push_back(s);
     pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::goihw));
@@ -259,7 +259,7 @@ enum ArrayTypes {
 inline NDArray CreateKernelNDArray(mxnet::TShape kernel, int num_filters, mxnet::TShape input,
     bool is_deconv = false) {
   CHECK_EQ(kernel.ndim(), 2) << "mkldnn only supports 2d filters on 4d inputs";
-  mxnet::TShape target_shape(4);
+  mxnet::TShape target_shape(4, -1);
   target_shape[0] = is_deconv ? input[1] : num_filters;
   target_shape[1] = is_deconv ? num_filters : input[1];
   target_shape[2] = kernel[0];
@@ -470,7 +470,7 @@ inline std::vector<NDArrayAttrs> GetTestOutputArrays(
     in_arrs.emplace_back(arr0.Slice(1, shape[0] + 1), "Reshaped NDArray");
   }
 
-  mxnet::TShape s(1);
+  mxnet::TShape s(1, -1);
   if (types & ArrayTypes::NormalReused) {
     // Type 5.
     // Get a reused version.
@@ -528,7 +528,7 @@ inline std::vector<NDArrayAttrs> GetTestOutputArrays(
 
     // Type 8, 9.
     // Get a reused version.
-    mxnet::TShape s(1);
+    mxnet::TShape s(1, -1);
     s[0] = shape.Size();
     NDArray arr = NDArray(s, Context());
     arr = arr.AsArray(shape, arr.dtype());
diff --git a/tests/cpp/include/test_util.h b/tests/cpp/include/test_util.h
index e0caddbcd027..b0114e1721ef 100644
--- a/tests/cpp/include/test_util.h
+++ b/tests/cpp/include/test_util.h
@@ -353,14 +353,14 @@ inline StreamType& print_blob_(const RunContext& ctx,
 
   if (dim == 1) {
     // probably a 1d tensor (mshadow::Tensor is deprecated)
-    TBlob changed(blob.dptr<DType>(), mxnet::TShape(3), blob.dev_mask(), blob.dev_id());
+    TBlob changed(blob.dptr<DType>(), mxnet::TShape(3, -1), blob.dev_mask(), blob.dev_id());
     changed.shape_[0] = 1;
     changed.shape_[1] = 1;
     changed.shape_[2] = blob.shape_[0];
     return print_blob_<DType>(ctx, &os, changed, false, false, add_endl);
   } else if (dim == 2) {
     // probably a 2d tensor (mshadow::Tensor is deprecated)
-    TBlob changed(blob.dptr<DType>(), mxnet::TShape(4), blob.dev_mask(), blob.dev_id());
+    TBlob changed(blob.dptr<DType>(), mxnet::TShape(4, -1), blob.dev_mask(), blob.dev_id());
     changed.shape_[0] = 1;
     changed.shape_[1] = 1;
     changed.shape_[2] = blob.shape_[0];
diff --git a/tests/cpp/misc/serialization.cc b/tests/cpp/misc/serialization.cc
index 77014238c2fa..2509a43c27ee 100644
--- a/tests/cpp/misc/serialization.cc
+++ b/tests/cpp/misc/serialization.cc
@@ -48,7 +48,7 @@ TEST(SerializerTest, OutputMapCorrect) {
     std::map<std::string, std::tuple<uint32_t, mxnet::TShape, int, int> > output_map;
     output_map.emplace("output_0", std::make_tuple(1, mxnet::TShape({23, 12, 63, 432}), 0, 1));
     output_map.emplace("another_output", std::make_tuple(2, mxnet::TShape({23, 123}), 14, -23));
-    output_map.emplace("last_output", std::make_tuple(0, mxnet::TShape({0}), -1, 0));
+    output_map.emplace("last_output", std::make_tuple(0, mxnet::TShape(1, 0), -1, 0));
     std::string serialized_data;
     common::Serialize(output_map, &serialized_data);
     std::map<std::string, std::tuple<uint32_t, mxnet::TShape, int, int> > deserialized_output_map;
diff --git a/tests/cpp/operator/batchnorm_test.cc b/tests/cpp/operator/batchnorm_test.cc
index d74493a0f7fb..ed0e70b831f1 100644
--- a/tests/cpp/operator/batchnorm_test.cc
+++ b/tests/cpp/operator/batchnorm_test.cc
@@ -1266,7 +1266,7 @@ static void testSaveAndLoad(const std::vector<size_t>& dims,
   ChannelAxisTestData<DType> data;
   data.channel_data_ = inputChannelData;
 
-  mxnet::TShape shape(dims.size());
+  mxnet::TShape shape(dims.size(), -1);
   for (size_t i = 0, n = dims.size(); i < n; ++i) {
     shape[i] = index_t(dims[i]);
   }
@@ -1322,7 +1322,7 @@ static mxnet::TShape MakeShape(const std::vector<index_t>& shape,
   }
   CHECK_LT(channelAxis, shape.size() + 1);
   const index_t dim = index_t(shape.size()) + 1;
-  mxnet::TShape newShape(dim);
+  mxnet::TShape newShape(dim, -1);
   for (size_t x = 0; x < static_cast<size_t>(channelAxis); ++x) {
     newShape[x] = index_t(shape[x]);
   }
diff --git a/tests/cpp/operator/mkldnn_operator_test.cc b/tests/cpp/operator/mkldnn_operator_test.cc
index 559ab5da0ccc..961785dcfc87 100644
--- a/tests/cpp/operator/mkldnn_operator_test.cc
+++ b/tests/cpp/operator/mkldnn_operator_test.cc
@@ -916,13 +916,13 @@ void TestFullyConnectedOp(const OpAttrs &forward_attrs, const OpAttrs &backwards
       if (in_shape.ndim() < 2)
         continue;
 
-      mxnet::TShape wt_shape(2);
+      mxnet::TShape wt_shape(2, -1);
       wt_shape[0] = num_hid;
       wt_shape[1] = GetFCWeightDim2(in_shape);
       NDArray weights(wt_shape, Context());
       InitDefaultArray(&weights, false);
 
-      mxnet::TShape bias_shape(1);
+      mxnet::TShape bias_shape(1, -1);
       bias_shape[0] = num_hid;
       NDArray bias(bias_shape, Context());
       InitDefaultArray(&bias, false);
@@ -931,7 +931,7 @@ void TestFullyConnectedOp(const OpAttrs &forward_attrs, const OpAttrs &backwards
       inputs[1] = &weights;
       inputs[2] = &bias;
 
-      mxnet::TShape out_shape(2);
+      mxnet::TShape out_shape(2, -1);
       out_shape[0] = in_shape[0];
       out_shape[1] = num_hid;
 
diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py
index fbbfc53a9a5e..f8ebe9517bac 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -1963,19 +1963,21 @@ def check_proposal_consistency(op, batch_size, with_nms=False):
 # The following 2 functions launch 0-thread kernels, an error that should be caught and signaled.
 def kernel_error_check_imperative():
     os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine'
-    a = mx.nd.array([1,2,3],ctx=mx.gpu(0))
-    b = mx.nd.array([],ctx=mx.gpu(0))
-    c = (a / b).asnumpy()
+    with mx.enable_np_comp():
+        a = mx.nd.array([1,2,3],ctx=mx.gpu(0))
+        b = mx.nd.array([],ctx=mx.gpu(0))
+        c = (a / b).asnumpy()
 
 def kernel_error_check_symbolic():
     os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine'
-    a = mx.sym.Variable('a')
-    b = mx.sym.Variable('b')
-    c = a / b
-    f = c.bind(mx.gpu(0), { 'a':mx.nd.array([1,2,3],ctx=mx.gpu(0)),
-                            'b':mx.nd.array([],ctx=mx.gpu(0))})
-    f.forward()
-    g = f.outputs[0].asnumpy()
+    with mx.enable_np_comp():
+        a = mx.sym.Variable('a')
+        b = mx.sym.Variable('b')
+        c = a / b
+        f = c.bind(mx.gpu(0), { 'a':mx.nd.array([1,2,3],ctx=mx.gpu(0)),
+                                'b':mx.nd.array([],ctx=mx.gpu(0))})
+        f.forward()
+        g = f.outputs[0].asnumpy()
 
 def test_kernel_error_checking():
     # Running tests that may throw exceptions out of worker threads will stop CI testing
diff --git a/tests/python/unittest/test_infer_shape.py b/tests/python/unittest/test_infer_shape.py
index 73654a604135..e0b4d35ea9aa 100644
--- a/tests/python/unittest/test_infer_shape.py
+++ b/tests/python/unittest/test_infer_shape.py
@@ -147,6 +147,21 @@ def test_fc_infer_type():
         assert arg_type_dict[k] == v
 
 
+def test_shape_completely_unknown():
+    data = mx.sym.var("data")
+    ret = mx.sym.sin(data)
+    arg_shapes, out_shapes, _ = ret.infer_shape_partial()
+    assert arg_shapes[0] == ()
+    assert out_shapes[0] == ()
+
+    with mx.enable_np_comp():
+        data = mx.sym.var("data")
+        ret = mx.sym.sin(data)
+        arg_shapes, out_shapes, _ = ret.infer_shape_partial()
+        assert arg_shapes[0] is None
+        assert out_shapes[0] is None
+
+
 if __name__ == "__main__":
     test_mlp2_infer_shape()
     test_mlp2_infer_error()
@@ -156,3 +171,4 @@ def test_fc_infer_type():
     test_incomplete_infer_slicechannel()
     test_incomplete_infer_convolution()
     test_incomplete_infer_concat()
+    test_shape_completely_unknown()
diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py
index 1bdb7d51df67..f8f52c2cee24 100644
--- a/tests/python/unittest/test_ndarray.py
+++ b/tests/python/unittest/test_ndarray.py
@@ -123,7 +123,7 @@ def test_ndarray_setitem():
     # numpy assignment for empty axis
     for trivial_shape in [(), (1,), (1, 1), (1, 1, 1)]:
         if trivial_shape == tuple():
-            with mx.numpy.enable_np_comp():
+            with mx.enable_np_comp():
                 x = mx.nd.zeros(trivial_shape)
         else:
             x = mx.nd.zeros(trivial_shape)
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index d44f1113e788..b82b0e9b5746 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -4403,7 +4403,7 @@ def test_invalid_reps():
         assert_exception(mx.nd.tile, MXNetError, data, (1, 0, 3))
 
     test_normal_case()
-    with mx.numpy.enable_np_comp():
+    with mx.enable_np_comp():
         test_empty_tensor()
     test_empty_reps()
     test_tile_backward()
@@ -4464,7 +4464,7 @@ def test_zero_depth():
     test_normal_case(index_type=np.float64)
     test_normal_case(index_type=np.float32)
     test_normal_case(index_type=np.float16)
-    with mx.numpy.enable_np_comp():
+    with mx.enable_np_comp():
         test_empty_indices()
     test_zero_depth()
 
@@ -6850,6 +6850,20 @@ def check_slice_axis_partial_infer(data, axis, begin, end, expected_out_shape):
     check_slice_axis_partial_infer(var1, 0, 0, 5, (5, 0))
     check_slice_axis_partial_infer(var1, 1, 0, 5, (10, 0))
 
+    with mx.enable_np_comp():
+        var1 = mx.sym.var(name="data", shape=(-1, 20))
+        check_slice_partial_infer(var1, (None, None), (None, 10), [], (-1, 10))
+        check_slice_partial_infer(var1, (None, None), (None, 10), (None, 2), (-1, 5))
+        check_slice_partial_infer(var1, (None, 3), (None, 10), [], (-1, 7))
+        check_slice_partial_infer(var1, (None, 3), (5, 10), [], (-1, 7))
+        check_slice_partial_infer(var1, (2, 3), (None, 10), [], (-1, 7))
+        check_slice_partial_infer(var1, (2, 3), (None, 10), (None, 1), (-1, 7))
+        check_slice_partial_infer(var1, (2, 3), (None, 10), (3, 3), (-1, 3))
+
+        var1 = mx.sym.var(name='data', shape=(10, -1))
+        check_slice_axis_partial_infer(var1, 0, 0, 5, (5, -1))
+        check_slice_axis_partial_infer(var1, 1, 0, 5, (10, -1))
+
 
 @with_seed()
 def test_float16_min_max():
@@ -7898,6 +7912,42 @@ def test_image_normalize():
     check_numeric_gradient(img_norm_sym, [data_in_4d], atol=0.001)
 
 
+@with_seed()
+def test_scalar_tensor_creation():
+    assertRaises(MXNetError, mx.nd.zeros, shape=())
+    assertRaises(MXNetError, mx.nd.ones, shape=())
+    with mx.enable_np_comp():
+        data_mx = mx.nd.ones(shape=())
+        data_np = np.ones((), dtype=data_mx.dtype)
+        assert same(data_mx.asnumpy(), data_np)
+
+
+@with_seed()
+def test_zero_size_tensor_creation():
+    assertRaises(MXNetError, mx.nd.zeros, shape=(0, 1, 3, 0))
+    assertRaises(MXNetError, mx.nd.ones, shape=(0, 1, 3, 0))
+    with mx.enable_np_comp():
+        data_mx = mx.nd.ones(shape=(0, 1, 0, 4))
+        data_np = np.ones(shape=data_mx.shape, dtype=data_mx.dtype)
+        assert same(data_mx.asnumpy(), data_np)
+
+
+@with_seed()
+def test_concat_with_zero_size_tensor():
+    with mx.enable_np_comp():
+        data1 = mx.nd.ones((0, 8, 12))
+        data2 = mx.nd.ones((3, 8, 12))
+        data3 = mx.nd.ones((0, 8, 12))
+        ret = mx.nd.Concat(data1, data2, data3, dim=0)
+        assert ret.shape == (3, 8, 12)
+
+        data1 = mx.nd.ones((0, 3, 10))
+        data2 = mx.nd.ones((0, 4, 10))
+        data3 = mx.nd.ones((0, 5, 10))
+        ret = mx.nd.Concat(data1, data2, data3, dim=1)
+        assert ret.shape == (0, 12, 10)
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()

From 7a83953eef93902a397937b3cb7ff6d8b516fd1d Mon Sep 17 00:00:00 2001
From: Yizhi Liu <liuyizhi@apache.org>
Date: Tue, 9 Apr 2019 19:59:09 -0700
Subject: [PATCH 18/32] fix invalid ndarray dispose (#14657)

---
 .../core/src/main/scala/org/apache/mxnet/Executor.scala  | 9 ++++++---
 src/common/utils.h                                       | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala b/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala
index f51424b7edf6..b0fae0f9d58d 100644
--- a/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala
+++ b/scala-package/core/src/main/scala/org/apache/mxnet/Executor.scala
@@ -61,6 +61,8 @@ class Executor private[mxnet](private[mxnet] val handle: ExecutorHandle,
   protected var monitorCallback: MXMonitorCallback = null
   private val logger: Logger = LoggerFactory.getLogger(classOf[Executor])
 
+  private var reshaped = false
+
   override def nativeAddress: CPtrAddress = handle
   override def nativeDeAllocator: (CPtrAddress => Int) = _LIB.mxExecutorFree
   // cannot determine the off-heap size of this object
@@ -71,12 +73,12 @@ class Executor private[mxnet](private[mxnet] val handle: ExecutorHandle,
     if (!super.isDisposed) {
       super.dispose()
       outputs.foreach(o => o.dispose())
-      if (argArrays != null) {argArrays.foreach(a => a.dispose())}
-      if (gradArrays != null) {gradArrays.foreach(
+      if (reshaped && argArrays != null) {argArrays.foreach(a => a.dispose())}
+      if (reshaped && gradArrays != null) {gradArrays.foreach(
         // Symbol will sometimes fill this with nulls so we've got to check the elements too
         a => if (a != null) {a.dispose()})
       }
-      if (auxArrays != null) {auxArrays.foreach(a => a.dispose())}
+      if (reshaped && auxArrays != null) {auxArrays.foreach(a => a.dispose())}
     }
   }
 
@@ -146,6 +148,7 @@ class Executor private[mxnet](private[mxnet] val handle: ExecutorHandle,
     executor.argArrays = argArrays
     executor.gradArrays = gradArrays
     executor.auxArrays = auxArrays
+    executor.reshaped = true
     executor
   }
 
diff --git a/src/common/utils.h b/src/common/utils.h
index 6cdb869ff9ae..251a8fe3c190 100644
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -654,7 +654,7 @@ FCompType GetFCompute(const nnvm::Op* op, const std::string& name,
   } else if (ctx.dev_mask() == gpu::kDevMask) {
     return fcompute_gpu.get(op, nullptr);
   } else {
-    LOG(FATAL) << "Unknown device mask";
+    LOG(FATAL) << "Unknown device mask " << ctx.dev_mask();
     return nullptr;
   }
 }

From 3eab56abc9890ecb5d104b58527885e5d81fcfac Mon Sep 17 00:00:00 2001
From: Sergey Kolychev <sergeykolychev.github@gmail.com>
Date: Tue, 9 Apr 2019 23:19:01 -0700
Subject: [PATCH 19/32] swig fixes for the changes in c_api.h (#14655)

---
 perl-package/AI-MXNetCAPI/mxnet_typemaps.i | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/perl-package/AI-MXNetCAPI/mxnet_typemaps.i b/perl-package/AI-MXNetCAPI/mxnet_typemaps.i
index 50296c2aaba5..3ec9f95ea9c3 100644
--- a/perl-package/AI-MXNetCAPI/mxnet_typemaps.i
+++ b/perl-package/AI-MXNetCAPI/mxnet_typemaps.i
@@ -524,13 +524,13 @@
     }
 }
 
-%typemap(in,numinputs=0) (mx_uint *out_dim, const mx_uint **out_pdata) (mx_uint temp_dim, mx_uint *temp_pdata)
+%typemap(in,numinputs=0) (int *out_dim, const int **out_pdata) (int temp_dim, int *temp_pdata)
 {
     $1 = &temp_dim;
     $2 = &temp_pdata;
 }
 
-%typemap(argout) (mx_uint *out_dim, const mx_uint **out_pdata)
+%typemap(argout) (int *out_dim, const int **out_pdata)
 {
     if(!result)
     {
@@ -956,12 +956,12 @@
     }
 }
 
-%typemap(in,numinputs=0) (mx_uint *in_shape_size, const mx_uint **in_shape_ndim, const mx_uint ***in_shape_data) 
-                         (mx_uint temp1, mx_uint *temp2, mx_uint **temp3),
-                         (mx_uint *out_shape_size, const mx_uint **out_shape_ndim, const mx_uint ***out_shape_data) 
-                         (mx_uint temp1, mx_uint *temp2, mx_uint **temp3),
-                         (mx_uint *aux_shape_size, const mx_uint **aux_shape_ndim, const mx_uint ***aux_shape_data) 
-                         (mx_uint temp1, mx_uint *temp2, mx_uint **temp3)
+%typemap(in,numinputs=0) (mx_uint *in_shape_size, const int **in_shape_ndim, const int ***in_shape_data) 
+                         (mx_uint temp1, int *temp2, int **temp3),
+                         (mx_uint *out_shape_size, const int **out_shape_ndim, const int ***out_shape_data) 
+                         (mx_uint temp1, int *temp2, int **temp3),
+                         (mx_uint *aux_shape_size, const int **aux_shape_ndim, const int ***aux_shape_data) 
+                         (mx_uint temp1, int *temp2, int **temp3)
 {
     $1 = &temp1;
     $2 = &temp2;
@@ -969,9 +969,9 @@
     *$1 = 0;
 }
 
-%typemap(argout) (mx_uint *in_shape_size, const mx_uint **in_shape_ndim, const mx_uint ***in_shape_data),
-                 (mx_uint *out_shape_size, const mx_uint **out_shape_ndim, const mx_uint ***out_shape_data),
-                 (mx_uint *aux_shape_size, const mx_uint **aux_shape_ndim, const mx_uint ***aux_shape_data)
+%typemap(argout) (mx_uint *in_shape_size, const int **in_shape_ndim, const int ***in_shape_data),
+                 (mx_uint *out_shape_size, const int **out_shape_ndim, const int ***out_shape_data),
+                 (mx_uint *aux_shape_size, const int **aux_shape_ndim, const int ***aux_shape_data)
 {
     if(!result && *arg15)
     {

From 4b2244aec99912bde5fe5ed6a7370065970c4d4f Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Wed, 10 Apr 2019 12:48:32 -0700
Subject: [PATCH 20/32] Rename np_comp to np_compat for readability

---
 python/mxnet/base.py                      | 26 +++++++++++------------
 python/mxnet/symbol/symbol.py             |  4 ++--
 tests/python/gpu/test_operator_gpu.py     |  4 ++--
 tests/python/unittest/test_infer_shape.py |  2 +-
 tests/python/unittest/test_ndarray.py     |  2 +-
 tests/python/unittest/test_operator.py    | 12 +++++------
 6 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/python/mxnet/base.py b/python/mxnet/base.py
index 916e74182f94..0532430ef031 100644
--- a/python/mxnet/base.py
+++ b/python/mxnet/base.py
@@ -736,7 +736,7 @@ def write_all_str(module_file, module_all_list):
 ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.c_void_p
 
 
-def set_np_comp(flag):
+def set_np_compat(flag):
     """
     Turns on/off NumPy compatibility. NumPy-compatibility is turned off by default in backend.
 
@@ -754,7 +754,7 @@ def set_np_comp(flag):
     return bool(prev.value)
 
 
-def is_np_comp():
+def is_np_compat():
     """
     Checks whether the NumPy compatibility is currently turned on.
     NumPy-compatibility is turned off by default in backend.
@@ -778,26 +778,26 @@ class _NumpyCompatibilityStateScope(object):
             backward([y])
 
     """
-    def __init__(self, is_np_comp): #pylint: disable=redefined-outer-name
-        self._enter_is_np_comp = is_np_comp
-        self._prev_is_np_comp = None
+    def __init__(self, is_np_compat):  #pylint: disable=redefined-outer-name
+        self._enter_is_np_compat = is_np_compat
+        self._prev_is_np_compat = None
 
     def __enter__(self):
-        if self._enter_is_np_comp is not None:
-            self._prev_is_np_comp = set_np_comp(self._enter_is_np_comp)
+        if self._enter_is_np_compat is not None:
+            self._prev_is_np_compat = set_np_compat(self._enter_is_np_compat)
 
     def __exit__(self, ptype, value, trace):
-        if self._enter_is_np_comp is not None and self._prev_is_np_comp != self._enter_is_np_comp:
-            set_np_comp(self._prev_is_np_comp)
+        if self._enter_is_np_compat is not None and self._prev_is_np_compat != self._enter_is_np_compat:
+            set_np_compat(self._prev_is_np_compat)
 
 
-def enable_np_comp():
+def enable_np_compat():
     """Returns a NumPy compatibility state scope to be used in 'with' statement
     and captures code that needs the compatibility.
 
     Example::
 
-        with mx.enable_np_comp():
+        with mx.enable_np_compat():
             # A scalar tensor's shape is `()`, whose `ndim` is `0`.
             scalar = mx.nd.ones(shape=())
             assert scalar.shape == ()
@@ -827,13 +827,13 @@ def enable_np_comp():
     return _NumpyCompatibilityStateScope(True)
 
 
-def disable_np_comp():
+def disable_np_compat():
     """Returns a state scope with NumPy-compatibility disabled to be used in 'with' statement
     and captures code that does not need the compatibility.
 
     Example::
 
-        with mx.disable_np_comp():
+        with mx.disable_np_compat():
             # 0 means unknown shape dimension size in the legacy shape definition.
             data = mx.sym.var("data", shape=(0, 2, 3))
             ret = mx.sym.sin(data)
diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py
index 2de4954cae81..a187f67d608a 100644
--- a/python/mxnet/symbol/symbol.py
+++ b/python/mxnet/symbol/symbol.py
@@ -34,7 +34,7 @@
 
 from ..attribute import AttrScope
 from ..base import _LIB, numeric_types, c_array, c_array_buf, c_str, c_str_array, c_handle_array
-from ..base import mx_uint, py_str, string_types, integer_types, mx_int, is_np_comp
+from ..base import mx_uint, py_str, string_types, integer_types, mx_int, is_np_compat
 from ..base import NDArrayHandle, ExecutorHandle, SymbolHandle
 from ..base import check_call, MXNetError, NotImplementedForSymbol
 from ..context import Context, current_context
@@ -1078,7 +1078,7 @@ def infer_shape(self, *args, **kwargs):
                 arg_names = self.list_arguments()
                 unknowns = []
                 for name, shape in zip(arg_names, arg_shapes):
-                    if is_np_comp():
+                    if is_np_compat():
                         shape_is_none = not shape or -1 in shape
                     else:
                         shape_is_none = not shape or 0 in shape
diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py
index f8ebe9517bac..cb1248e46a85 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -1963,14 +1963,14 @@ def check_proposal_consistency(op, batch_size, with_nms=False):
 # The following 2 functions launch 0-thread kernels, an error that should be caught and signaled.
 def kernel_error_check_imperative():
     os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine'
-    with mx.enable_np_comp():
+    with mx.enable_np_compat():
         a = mx.nd.array([1,2,3],ctx=mx.gpu(0))
         b = mx.nd.array([],ctx=mx.gpu(0))
         c = (a / b).asnumpy()
 
 def kernel_error_check_symbolic():
     os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine'
-    with mx.enable_np_comp():
+    with mx.enable_np_compat():
         a = mx.sym.Variable('a')
         b = mx.sym.Variable('b')
         c = a / b
diff --git a/tests/python/unittest/test_infer_shape.py b/tests/python/unittest/test_infer_shape.py
index e0b4d35ea9aa..e4a3e5a3d86f 100644
--- a/tests/python/unittest/test_infer_shape.py
+++ b/tests/python/unittest/test_infer_shape.py
@@ -154,7 +154,7 @@ def test_shape_completely_unknown():
     assert arg_shapes[0] == ()
     assert out_shapes[0] == ()
 
-    with mx.enable_np_comp():
+    with mx.enable_np_compat():
         data = mx.sym.var("data")
         ret = mx.sym.sin(data)
         arg_shapes, out_shapes, _ = ret.infer_shape_partial()
diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py
index f8f52c2cee24..fb0cac737ee0 100644
--- a/tests/python/unittest/test_ndarray.py
+++ b/tests/python/unittest/test_ndarray.py
@@ -123,7 +123,7 @@ def test_ndarray_setitem():
     # numpy assignment for empty axis
     for trivial_shape in [(), (1,), (1, 1), (1, 1, 1)]:
         if trivial_shape == tuple():
-            with mx.enable_np_comp():
+            with mx.enable_np_compat():
                 x = mx.nd.zeros(trivial_shape)
         else:
             x = mx.nd.zeros(trivial_shape)
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index b82b0e9b5746..c5523c5b2268 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -4403,7 +4403,7 @@ def test_invalid_reps():
         assert_exception(mx.nd.tile, MXNetError, data, (1, 0, 3))
 
     test_normal_case()
-    with mx.enable_np_comp():
+    with mx.enable_np_compat():
         test_empty_tensor()
     test_empty_reps()
     test_tile_backward()
@@ -4464,7 +4464,7 @@ def test_zero_depth():
     test_normal_case(index_type=np.float64)
     test_normal_case(index_type=np.float32)
     test_normal_case(index_type=np.float16)
-    with mx.enable_np_comp():
+    with mx.enable_np_compat():
         test_empty_indices()
     test_zero_depth()
 
@@ -6850,7 +6850,7 @@ def check_slice_axis_partial_infer(data, axis, begin, end, expected_out_shape):
     check_slice_axis_partial_infer(var1, 0, 0, 5, (5, 0))
     check_slice_axis_partial_infer(var1, 1, 0, 5, (10, 0))
 
-    with mx.enable_np_comp():
+    with mx.enable_np_compat():
         var1 = mx.sym.var(name="data", shape=(-1, 20))
         check_slice_partial_infer(var1, (None, None), (None, 10), [], (-1, 10))
         check_slice_partial_infer(var1, (None, None), (None, 10), (None, 2), (-1, 5))
@@ -7916,7 +7916,7 @@ def test_image_normalize():
 def test_scalar_tensor_creation():
     assertRaises(MXNetError, mx.nd.zeros, shape=())
     assertRaises(MXNetError, mx.nd.ones, shape=())
-    with mx.enable_np_comp():
+    with mx.enable_np_compat():
         data_mx = mx.nd.ones(shape=())
         data_np = np.ones((), dtype=data_mx.dtype)
         assert same(data_mx.asnumpy(), data_np)
@@ -7926,7 +7926,7 @@ def test_scalar_tensor_creation():
 def test_zero_size_tensor_creation():
     assertRaises(MXNetError, mx.nd.zeros, shape=(0, 1, 3, 0))
     assertRaises(MXNetError, mx.nd.ones, shape=(0, 1, 3, 0))
-    with mx.enable_np_comp():
+    with mx.enable_np_compat():
         data_mx = mx.nd.ones(shape=(0, 1, 0, 4))
         data_np = np.ones(shape=data_mx.shape, dtype=data_mx.dtype)
         assert same(data_mx.asnumpy(), data_np)
@@ -7934,7 +7934,7 @@ def test_zero_size_tensor_creation():
 
 @with_seed()
 def test_concat_with_zero_size_tensor():
-    with mx.enable_np_comp():
+    with mx.enable_np_compat():
         data1 = mx.nd.ones((0, 8, 12))
         data2 = mx.nd.ones((3, 8, 12))
         data3 = mx.nd.ones((0, 8, 12))

From 04ad0871b74c86c6efc1045d92194115b9c6813c Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Wed, 10 Apr 2019 12:52:09 -0700
Subject: [PATCH 21/32] Fix import error

---
 python/mxnet/__init__.py | 2 +-
 python/mxnet/base.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py
index 5f4f9b393e41..99f565ad309a 100644
--- a/python/mxnet/__init__.py
+++ b/python/mxnet/__init__.py
@@ -23,7 +23,7 @@
 
 from .context import Context, current_context, cpu, gpu, cpu_pinned
 from . import engine
-from .base import MXNetError, is_np_comp, set_np_comp, enable_np_comp, disable_np_comp
+from .base import MXNetError, is_np_compat, set_np_compat, enable_np_compat, disable_np_compat
 from . import base
 from . import contrib
 from . import ndarray
diff --git a/python/mxnet/base.py b/python/mxnet/base.py
index 0532430ef031..b056b73d308e 100644
--- a/python/mxnet/base.py
+++ b/python/mxnet/base.py
@@ -30,7 +30,7 @@
 
 from . import libinfo
 
-__all__ = ['MXNetError']
+__all__ = ['MXNetError', 'is_np_compat', 'set_np_compat', 'enable_np_compat', 'disable_np_compat']
 #----------------------------
 # library loading
 #----------------------------

From 8a023eb440adefab718e6aeca10cc1f54a4e5b48 Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Thu, 11 Apr 2019 15:41:10 -0700
Subject: [PATCH 22/32] Keep old c apis unchanged

---
 R-package/src/ndarray.cc                      |   2 +-
 cpp-package/include/mxnet-cpp/ndarray.hpp     |   2 +-
 cpp-package/include/mxnet-cpp/symbol.hpp      |  12 +-
 include/mxnet/c_api.h                         | 235 ++++++++-
 perl-package/AI-MXNetCAPI/mxnet.i             | 168 +++----
 python/mxnet/executor.py                      |  46 +-
 python/mxnet/ndarray/ndarray.py               |   4 +-
 python/mxnet/symbol/symbol.py                 |  76 +--
 .../native/org_apache_mxnet_native_c_api.cc   | 100 ++--
 src/c_api/c_api.cc                            |  36 +-
 src/c_api/c_api_common.h                      |  31 +-
 src/c_api/c_api_executor.cc                   | 457 +++++++++++++++++-
 src/c_api/c_api_symbolic.cc                   | 126 ++++-
 13 files changed, 1039 insertions(+), 256 deletions(-)

diff --git a/R-package/src/ndarray.cc b/R-package/src/ndarray.cc
index d08671a3f026..0409d3ba8887 100644
--- a/R-package/src/ndarray.cc
+++ b/R-package/src/ndarray.cc
@@ -181,7 +181,7 @@ Rcpp::RObject NDArrayPacker::CreateNDArrayPacker() {
 Rcpp::Dimension NDArray::dim() const {
   int ndim;
   const int *pshape;
-  MX_CALL(MXNDArrayGetShape(
+  MX_CALL(MXNDArrayGetShapeEx(
       ptr_->handle, &ndim, &pshape));
   Rcpp::IntegerVector dat(pshape, pshape + ndim);
   std::reverse(dat.begin(), dat.end());
diff --git a/cpp-package/include/mxnet-cpp/ndarray.hpp b/cpp-package/include/mxnet-cpp/ndarray.hpp
index bf1d82ca33b4..d0438305a62e 100644
--- a/cpp-package/include/mxnet-cpp/ndarray.hpp
+++ b/cpp-package/include/mxnet-cpp/ndarray.hpp
@@ -399,7 +399,7 @@ inline size_t NDArray::Size() const {
 inline std::vector<mx_uint> NDArray::GetShape() const {
   const int *out_pdata;
   int out_dim;
-  MXNDArrayGetShape(blob_ptr_->handle_, &out_dim, &out_pdata);
+  MXNDArrayGetShapeEx(blob_ptr_->handle_, &out_dim, &out_pdata);
   std::vector<mx_uint> ret;
   for (int i = 0; i < out_dim; ++i) {
     ret.push_back(out_pdata[i]);
diff --git a/cpp-package/include/mxnet-cpp/symbol.hpp b/cpp-package/include/mxnet-cpp/symbol.hpp
index d82b7abaf614..7bdbe52ddc00 100644
--- a/cpp-package/include/mxnet-cpp/symbol.hpp
+++ b/cpp-package/include/mxnet-cpp/symbol.hpp
@@ -210,12 +210,12 @@ inline void Symbol::InferShape(
   const int **aux_shape_data;
   int complete;
 
-  CHECK_EQ(MXSymbolInferShape(GetHandle(), keys.size(), keys.data(),
-                              arg_ind_ptr.data(), arg_shape_data.data(),
-                              &in_shape_size, &in_shape_ndim, &in_shape_data,
-                              &out_shape_size, &out_shape_ndim, &out_shape_data,
-                              &aux_shape_size, &aux_shape_ndim, &aux_shape_data,
-                              &complete),
+  CHECK_EQ(MXSymbolInferShapeEx(GetHandle(), keys.size(), keys.data(),
+                                arg_ind_ptr.data(), arg_shape_data.data(),
+                                &in_shape_size, &in_shape_ndim, &in_shape_data,
+                                &out_shape_size, &out_shape_ndim, &out_shape_data,
+                                &aux_shape_size, &aux_shape_ndim, &aux_shape_data,
+                                &complete),
            0);
 
   if (complete) {
diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h
index 102e4b2c14a2..7907265a68a9 100644
--- a/include/mxnet/c_api.h
+++ b/include/mxnet/c_api.h
@@ -768,15 +768,26 @@ MXNET_DLL int MXNDArrayReshape64(NDArrayHandle handle,
                                  bool reverse,
                                  NDArrayHandle *out);
 /*!
- * \brief get the shape of the array
+ * \brief DEPRECATED. Use MXNDArrayGetShapeEx instead.
+ * get the shape of the array
  * \param handle the handle to the narray
  * \param out_dim the output dimension
  * \param out_pdata pointer holder to get data pointer of the shape
  * \return 0 when success, -1 when failure happens
  */
 MXNET_DLL int MXNDArrayGetShape(NDArrayHandle handle,
-                                int *out_dim,
-                                const int **out_pdata);
+                                mx_uint *out_dim,
+                                const mx_uint **out_pdata);
+/*!
+ * \brief get the shape of the array
+ * \param handle the handle to the narray
+ * \param out_dim the output dimension
+ * \param out_pdata pointer holder to get data pointer of the shape
+ * \return 0 when success, -1 when failure happens
+ */
+MXNET_DLL int MXNDArrayGetShapeEx(NDArrayHandle handle,
+																	int *out_dim,
+																	const int **out_pdata);
 /*!
  * \brief get the content of the data in NDArray
  * \param handle the handle to the ndarray
@@ -1481,7 +1492,8 @@ MXNET_DLL int MXSymbolGrad(SymbolHandle sym,
                            const char** wrt,
                            SymbolHandle* out);
 /*!
- * \brief infer shape of unknown input shapes given the known one.
+ * \brief DEPRECATED. Use MXSymbolInferShapeEx instead.
+ * infer shape of unknown input shapes given the known one.
  *  The shapes are packed into a CSR matrix represented by arg_ind_ptr and arg_shape_data
  *  The call will be treated as a kwargs call if key != nullptr or num_args==0, otherwise it is positional.
  *
@@ -1506,19 +1518,58 @@ MXNET_DLL int MXSymbolInferShape(SymbolHandle sym,
                                  mx_uint num_args,
                                  const char** keys,
                                  const mx_uint *arg_ind_ptr,
-                                 const int *arg_shape_data,
+                                 const mx_uint *arg_shape_data,
                                  mx_uint *in_shape_size,
-                                 const int **in_shape_ndim,
-                                 const int ***in_shape_data,
+                                 const mx_uint **in_shape_ndim,
+                                 const mx_uint ***in_shape_data,
                                  mx_uint *out_shape_size,
-                                 const int **out_shape_ndim,
-                                 const int ***out_shape_data,
+                                 const mx_uint **out_shape_ndim,
+                                 const mx_uint ***out_shape_data,
                                  mx_uint *aux_shape_size,
-                                 const int **aux_shape_ndim,
-                                 const int ***aux_shape_data,
+                                 const mx_uint **aux_shape_ndim,
+                                 const mx_uint ***aux_shape_data,
                                  int *complete);
+
 /*!
- * \brief partially infer shape of unknown input shapes given the known one.
+ * \brief infer shape of unknown input shapes given the known one.
+ *  The shapes are packed into a CSR matrix represented by arg_ind_ptr and arg_shape_data
+ *  The call will be treated as a kwargs call if key != nullptr or num_args==0, otherwise it is positional.
+ *
+ * \param sym symbol handle
+ * \param num_args numbe of input arguments.
+ * \param keys the key of keyword args (optional)
+ * \param arg_ind_ptr the head pointer of the rows in CSR
+ * \param arg_shape_data the content of the CSR
+ * \param in_shape_size sizeof the returning array of in_shapes
+ * \param in_shape_ndim returning array of shape dimensions of eachs input shape.
+ * \param in_shape_data returning array of pointers to head of the input shape.
+ * \param out_shape_size sizeof the returning array of out_shapes
+ * \param out_shape_ndim returning array of shape dimensions of eachs input shape.
+ * \param out_shape_data returning array of pointers to head of the input shape.
+ * \param aux_shape_size sizeof the returning array of aux_shapes
+ * \param aux_shape_ndim returning array of shape dimensions of eachs auxiliary shape.
+ * \param aux_shape_data returning array of pointers to head of the auxiliary shape.
+ * \param complete whether infer shape completes or more information is needed.
+ * \return 0 when success, -1 when failure happens
+ */
+MXNET_DLL int MXSymbolInferShapeEx(SymbolHandle sym,
+                                   mx_uint num_args,
+                                   const char** keys,
+                                   const mx_uint *arg_ind_ptr,
+                                   const int *arg_shape_data,
+                                   mx_uint *in_shape_size,
+                                   const int **in_shape_ndim,
+                                   const int ***in_shape_data,
+                                   mx_uint *out_shape_size,
+                                   const int **out_shape_ndim,
+                                   const int ***out_shape_data,
+                                   mx_uint *aux_shape_size,
+                                   const int **aux_shape_ndim,
+                                   const int ***aux_shape_data,
+                                   int *complete);
+/*!
+ * \brief DEPRECATED. Use MXSymbolInferShapePartialEx instead.
+ * partially infer shape of unknown input shapes given the known one.
  *
  *  Return partially inferred results if not all shapes could be inferred.
  *  The shapes are packed into a CSR matrix represented by arg_ind_ptr and arg_shape_data
@@ -1545,18 +1596,59 @@ MXNET_DLL int MXSymbolInferShapePartial(SymbolHandle sym,
                                         mx_uint num_args,
                                         const char** keys,
                                         const mx_uint *arg_ind_ptr,
-                                        const int *arg_shape_data,
+                                        const mx_uint *arg_shape_data,
                                         mx_uint *in_shape_size,
-                                        const int **in_shape_ndim,
-                                        const int ***in_shape_data,
+                                        const mx_uint **in_shape_ndim,
+                                        const mx_uint ***in_shape_data,
                                         mx_uint *out_shape_size,
-                                        const int **out_shape_ndim,
-                                        const int ***out_shape_data,
+                                        const mx_uint **out_shape_ndim,
+                                        const mx_uint ***out_shape_data,
                                         mx_uint *aux_shape_size,
-                                        const int **aux_shape_ndim,
-                                        const int ***aux_shape_data,
+                                        const mx_uint **aux_shape_ndim,
+                                        const mx_uint ***aux_shape_data,
                                         int *complete);
 
+
+/*!
+ * \brief partially infer shape of unknown input shapes given the known one.
+ *
+ *  Return partially inferred results if not all shapes could be inferred.
+ *  The shapes are packed into a CSR matrix represented by arg_ind_ptr and arg_shape_data
+ *  The call will be treated as a kwargs call if key != nullptr or num_args==0, otherwise it is positional.
+ *
+ * \param sym symbol handle
+ * \param num_args numbe of input arguments.
+ * \param keys the key of keyword args (optional)
+ * \param arg_ind_ptr the head pointer of the rows in CSR
+ * \param arg_shape_data the content of the CSR
+ * \param in_shape_size sizeof the returning array of in_shapes
+ * \param in_shape_ndim returning array of shape dimensions of eachs input shape.
+ * \param in_shape_data returning array of pointers to head of the input shape.
+ * \param out_shape_size sizeof the returning array of out_shapes
+ * \param out_shape_ndim returning array of shape dimensions of eachs input shape.
+ * \param out_shape_data returning array of pointers to head of the input shape.
+ * \param aux_shape_size sizeof the returning array of aux_shapes
+ * \param aux_shape_ndim returning array of shape dimensions of eachs auxiliary shape.
+ * \param aux_shape_data returning array of pointers to head of the auxiliary shape.
+ * \param complete whether infer shape completes or more information is needed.
+ * \return 0 when success, -1 when failure happens
+ */
+MXNET_DLL int MXSymbolInferShapePartialEx(SymbolHandle sym,
+                                          mx_uint num_args,
+                                          const char** keys,
+                                          const mx_uint *arg_ind_ptr,
+                                          const int *arg_shape_data,
+                                          mx_uint *in_shape_size,
+                                          const int **in_shape_ndim,
+                                          const int ***in_shape_data,
+                                          mx_uint *out_shape_size,
+                                          const int **out_shape_ndim,
+                                          const int ***out_shape_data,
+                                          mx_uint *aux_shape_size,
+                                          const int **aux_shape_ndim,
+                                          const int ***aux_shape_data,
+                                          int *complete);
+
 /*!
  * \brief infer type of unknown input types given the known one.
  *  The types are packed into a CSR matrix represented by arg_ind_ptr and arg_type_data
@@ -1820,7 +1912,8 @@ MXNET_DLL int MXExecutorBindEX(SymbolHandle symbol_handle,
                                NDArrayHandle *aux_states,
                                ExecutorHandle shared_exec,
                                ExecutorHandle *out);
-
+/*! \brief DEPRECATED. Use MXExecutorSimpleBindEx instead.
+ */
 MXNET_DLL int MXExecutorSimpleBind(SymbolHandle symbol_handle,
                                    int dev_type,
                                    int dev_id,
@@ -1833,7 +1926,7 @@ MXNET_DLL int MXExecutorSimpleBind(SymbolHandle symbol_handle,
                                    const char** provided_grad_req_types,
                                    const mx_uint num_provided_arg_shapes,
                                    const char** provided_arg_shape_names,
-                                   const int* provided_arg_shape_data,
+                                   const mx_uint* provided_arg_shape_data,
                                    const mx_uint* provided_arg_shape_idx,
                                    const mx_uint num_provided_arg_dtypes,
                                    const char** provided_arg_dtype_names,
@@ -1856,8 +1949,44 @@ MXNET_DLL int MXExecutorSimpleBind(SymbolHandle symbol_handle,
                                    ExecutorHandle shared_exec_handle,
                                    ExecutorHandle* out);
 
-/*!
- * \brief Return a new executor with the same symbol and shared memory,
+
+MXNET_DLL int MXExecutorSimpleBindEx(SymbolHandle symbol_handle,
+                                     int dev_type,
+                                     int dev_id,
+                                     const mx_uint num_g2c_keys,
+                                     const char** g2c_keys,
+                                     const int* g2c_dev_types,
+                                     const int* g2c_dev_ids,
+                                     const mx_uint provided_grad_req_list_len,
+                                     const char** provided_grad_req_names,
+                                     const char** provided_grad_req_types,
+                                     const mx_uint num_provided_arg_shapes,
+                                     const char** provided_arg_shape_names,
+                                     const int* provided_arg_shape_data,
+                                     const mx_uint* provided_arg_shape_idx,
+                                     const mx_uint num_provided_arg_dtypes,
+                                     const char** provided_arg_dtype_names,
+                                     const int* provided_arg_dtypes,
+                                     const mx_uint num_provided_arg_stypes,
+                                     const char** provided_arg_stype_names,
+                                     const int* provided_arg_stypes,
+                                     const mx_uint num_shared_arg_names,
+                                     const char** shared_arg_name_list,
+                                     int* shared_buffer_len,
+                                     const char** shared_buffer_name_list,
+                                     NDArrayHandle* shared_buffer_handle_list,
+                                     const char*** updated_shared_buffer_name_list,
+                                     NDArrayHandle** updated_shared_buffer_handle_list,
+                                     mx_uint* num_in_args,
+                                     NDArrayHandle** in_args,
+                                     NDArrayHandle** arg_grads,
+                                     mx_uint* num_aux_states,
+                                     NDArrayHandle** aux_states,
+                                     ExecutorHandle shared_exec_handle,
+                                     ExecutorHandle* out);
+/*!
+ * \brief DEPRECATED. Use MXExecutorReshapeEx instead.
+ * Return a new executor with the same symbol and shared memory,
  * but different input/output shapes.
  *
  * \param partial_shaping Whether to allow changing the shape of unspecified arguments.
@@ -1887,7 +2016,7 @@ MXNET_DLL int MXExecutorReshape(int partial_shaping,
                                 const int* map_dev_ids,
                                 const mx_uint num_provided_arg_shapes,
                                 const char** provided_arg_shape_names,
-                                const int* provided_arg_shape_data,
+                                const mx_uint* provided_arg_shape_data,
                                 const mx_uint* provided_arg_shape_idx,
                                 mx_uint* num_in_args,
                                 NDArrayHandle** in_args,
@@ -1896,6 +2025,46 @@ MXNET_DLL int MXExecutorReshape(int partial_shaping,
                                 NDArrayHandle** aux_states,
                                 ExecutorHandle shared_exec,
                                 ExecutorHandle *out);
+/*!
+ * \brief Return a new executor with the same symbol and shared memory,
+ * but different input/output shapes.
+ *
+ * \param partial_shaping Whether to allow changing the shape of unspecified arguments.
+ * \param allow_up_sizing Whether to allow allocating new ndarrays that's larger than the original.
+ * \param dev_type device type of default context
+ * \param dev_id device id of default context
+ * \param num_map_keys size of group2ctx map
+ * \param map_keys keys of group2ctx map
+ * \param map_dev_types device type of group2ctx map
+ * \param map_dev_ids device id of group2ctx map
+ * \param num_in_args length of in_args
+ * \param in_args in args array
+ * \param arg_grads arg grads handle array
+ * \param num_aux_states length of auxiliary states
+ * \param aux_states auxiliary states array
+ * \param shared_exec input executor handle for memory sharing
+ * \param out output executor handle
+ * \return a new executor
+ */
+MXNET_DLL int MXExecutorReshapeEx(int partial_shaping,
+                                  int allow_up_sizing,
+                                  int dev_type,
+                                  int dev_id,
+                                  mx_uint num_map_keys,
+                                  const char** map_keys,
+                                  const int* map_dev_types,
+                                  const int* map_dev_ids,
+                                  const mx_uint num_provided_arg_shapes,
+                                  const char** provided_arg_shape_names,
+                                  const int* provided_arg_shape_data,
+                                  const mx_uint* provided_arg_shape_idx,
+                                  mx_uint* num_in_args,
+                                  NDArrayHandle** in_args,
+                                  NDArrayHandle** arg_grads,
+                                  mx_uint* num_aux_states,
+                                  NDArrayHandle** aux_states,
+                                  ExecutorHandle shared_exec,
+                                  ExecutorHandle *out);
 
 /*!
  * \brief get optimized graph from graph executor
@@ -2554,6 +2723,20 @@ MXNET_DLL int MXRtcCudaKernelCall(CudaKernelHandle handle, int dev_id, void** ar
  */
 MXNET_DLL int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid,
                                           int* shared_id);
+/*!
+ * \brief DEPRECATED. Use MXNDArrayCreateFromSharedMemEx instead.
+ * Reconstruct NDArray from shared memory handle
+ * \param shared_pid shared PID
+ * \param shared_id shared memory id
+ * \param shape pointer to NDArray dimensions
+ * \param ndim number of NDArray dimensions
+ * \param dtype data type of NDArray
+ * \param out constructed NDArray
+ */
+MXNET_DLL int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const mx_uint *shape,
+                                           mx_uint ndim, int dtype, NDArrayHandle *out);
+
+
 /*!
  * \brief Reconstruct NDArray from shared memory handle
  * \param shared_pid shared PID
@@ -2563,8 +2746,8 @@ MXNET_DLL int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid,
  * \param dtype data type of NDArray
  * \param out constructed NDArray
  */
-MXNET_DLL int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const int *shape,
-                                           int ndim, int dtype, NDArrayHandle *out);
+MXNET_DLL int MXNDArrayCreateFromSharedMemEx(int shared_pid, int shared_id, const int *shape,
+                                             int ndim, int dtype, NDArrayHandle *out);
 
 /*!
   * \brief Push an asynchronous operation to the engine.
diff --git a/perl-package/AI-MXNetCAPI/mxnet.i b/perl-package/AI-MXNetCAPI/mxnet.i
index 0ecf5b3a9cc3..e38402c56100 100644
--- a/perl-package/AI-MXNetCAPI/mxnet.i
+++ b/perl-package/AI-MXNetCAPI/mxnet.i
@@ -640,7 +640,7 @@ int MXNDArrayReshape64(NDArrayHandle handle,
  * \param out_pdata pointer holder to get data pointer of the shape
  * \return 0 when success, -1 when failure happens
  */
-int MXNDArrayGetShape(NDArrayHandle handle,
+int MXNDArrayGetShapeEx(NDArrayHandle handle,
                                 int *out_dim,
                                 const int **out_pdata);
 /*!
@@ -1289,21 +1289,21 @@ int MXSymbolGrad(SymbolHandle sym,
  * \param complete whether infer shape completes or more information is needed.
  * \return 0 when success, -1 when failure happens
  */
-int MXSymbolInferShape(SymbolHandle sym,
-                       mx_uint num_args,
-                       const char** in,
-                       const mx_uint *in,
-                       const int *in,
-                       mx_uint *in_shape_size,
-                       const int **in_shape_ndim,
-                       const int ***in_shape_data,
-                       mx_uint *out_shape_size,
-                       const int **out_shape_ndim,
-                       const int ***out_shape_data,
-                       mx_uint *aux_shape_size,
-                       const int **aux_shape_ndim,
-                       const int ***aux_shape_data,
-                       int *out);
+int MXSymbolInferShapeEx(SymbolHandle sym,
+                         mx_uint num_args,
+                         const char** in,
+                         const mx_uint *in,
+                         const int *in,
+                         mx_uint *in_shape_size,
+                         const int **in_shape_ndim,
+                         const int ***in_shape_data,
+                         mx_uint *out_shape_size,
+                         const int **out_shape_ndim,
+                         const int ***out_shape_data,
+                         mx_uint *aux_shape_size,
+                         const int **aux_shape_ndim,
+                         const int ***aux_shape_data,
+                         int *out);
 /*!
  * \brief partially infer shape of unknown input shapes given the known one.
  *
@@ -1328,21 +1328,21 @@ int MXSymbolInferShape(SymbolHandle sym,
  * \param complete whether infer shape completes or more information is needed.
  * \return 0 when success, -1 when failure happens
  */
-int MXSymbolInferShapePartial(SymbolHandle sym,
-                                 mx_uint num_args,
-                                 const char** in,
-                                 const mx_uint *in,
-                                 const int *in,
-                                 mx_uint *in_shape_size,
-                                 const int **in_shape_ndim,
-                                 const int ***in_shape_data,
-                                 mx_uint *out_shape_size,
-                                 const int **out_shape_ndim,
-                                 const int ***out_shape_data,
-                                 mx_uint *aux_shape_size,
-                                 const int **aux_shape_ndim,
-                                 const int ***aux_shape_data,
-                                 int *out);
+int MXSymbolInferShapePartialEx(SymbolHandle sym,
+                                mx_uint num_args,
+                                const char** in,
+                                const mx_uint *in,
+                                const int *in,
+                                mx_uint *in_shape_size,
+                                const int **in_shape_ndim,
+                                const int ***in_shape_data,
+                                mx_uint *out_shape_size,
+                                const int **out_shape_ndim,
+                                const int ***out_shape_data,
+                                mx_uint *aux_shape_size,
+                                const int **aux_shape_ndim,
+                                const int ***aux_shape_data,
+                                int *out);
 
 /*!
  * \brief infer type of unknown input types given the known one.
@@ -1535,40 +1535,40 @@ int MXExecutorBindEX(SymbolHandle symbol_handle,
                                ExecutorHandle shared_exec,
                                ExecutorHandle *out);
 
-int MXExecutorSimpleBind(SymbolHandle symbol_handle,
-                         int dev_type,
-                         int dev_id,
-                         const mx_uint num_g2c_keys,
-                         const char** in, // g2c_keys,
-                         const int* in, // g2c_dev_types,
-                         const int* in, // g2c_dev_ids,
-                         const mx_uint provided_grad_req_list_len,
-                         const char** in, // provided_grad_req_names,
-                         const char** in, // provided_grad_req_types,
-                         const mx_uint num_provided_arg_shapes,
-                         const char** in, // provided_arg_shape_names,
-                         const int* in, // provided_arg_shape_data,
-                         const mx_uint* in, // provided_arg_shape_idx,
-                         const mx_uint num_provided_arg_dtypes,
-                         const char** in, // provided_arg_dtype_names,
-                         const int* in, // provided_arg_dtypes,
-                         const mx_uint num_provided_arg_stypes,
-                         const char** in, // provided_arg_stype_names,
-                         const int* in, // provided_arg_stypes,
-                         const mx_uint num_shared_arg_names,
-                         const char** in, // shared_arg_name_list,
-                         int* shared_buffer_len,
-                         const char** shared_buffer_name_list,
-                         NDArrayHandle* shared_buffer_handle_list,
-                         const char*** updated_shared_buffer_name_list,
-                         NDArrayHandle** updated_shared_buffer_handle_list,
-                         mx_uint* num_in_args,
-                         NDArrayHandle** in_args,
-                         NDArrayHandle** arg_grads,
-                         mx_uint* num_aux_states,
-                         NDArrayHandle** aux_states,
-                         ExecutorHandle shared_exec_handle,
-                         ExecutorHandle* out
+int MXExecutorSimpleBindEx(SymbolHandle symbol_handle,
+                           int dev_type,
+                           int dev_id,
+                           const mx_uint num_g2c_keys,
+                           const char** in, // g2c_keys,
+                           const int* in, // g2c_dev_types,
+                           const int* in, // g2c_dev_ids,
+                           const mx_uint provided_grad_req_list_len,
+                           const char** in, // provided_grad_req_names,
+                           const char** in, // provided_grad_req_types,
+                           const mx_uint num_provided_arg_shapes,
+                           const char** in, // provided_arg_shape_names,
+                           const int* in, // provided_arg_shape_data,
+                           const mx_uint* in, // provided_arg_shape_idx,
+                           const mx_uint num_provided_arg_dtypes,
+                           const char** in, // provided_arg_dtype_names,
+                           const int* in, // provided_arg_dtypes,
+                           const mx_uint num_provided_arg_stypes,
+                           const char** in, // provided_arg_stype_names,
+                           const int* in, // provided_arg_stypes,
+                           const mx_uint num_shared_arg_names,
+                           const char** in, // shared_arg_name_list,
+                           int* shared_buffer_len,
+                           const char** shared_buffer_name_list,
+                           NDArrayHandle* shared_buffer_handle_list,
+                           const char*** updated_shared_buffer_name_list,
+                           NDArrayHandle** updated_shared_buffer_handle_list,
+                           mx_uint* num_in_args,
+                           NDArrayHandle** in_args,
+                           NDArrayHandle** arg_grads,
+                           mx_uint* num_aux_states,
+                           NDArrayHandle** aux_states,
+                           ExecutorHandle shared_exec_handle,
+                           ExecutorHandle* out
 );
 
 /*!
@@ -1592,25 +1592,25 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle,
  * \param out output executor handle
  * \return a new executor
  */
-int MXExecutorReshape(int partial_shaping,
-                      int allow_up_sizing,
-                      int dev_type,
-                      int dev_id,
-                      mx_uint num_map_keys,
-                      const char** in,
-                      const int* in,
-                      const int* in,
-                      const mx_uint num_provided_arg_shapes,
-                      const char** in,
-                      const int* in,
-                      const mx_uint* in,
-                      mx_uint* couple_out_size,
-                      NDArrayHandle** out_first_array,
-                      NDArrayHandle** out_second_array,
-                      mx_uint* out_size,
-                      NDArrayHandle** out_array,
-                      ExecutorHandle shared_exec,
-                      ExecutorHandle *out);
+int MXExecutorReshapeEx(int partial_shaping,
+                        int allow_up_sizing,
+                        int dev_type,
+                        int dev_id,
+                        mx_uint num_map_keys,
+                        const char** in,
+                        const int* in,
+                        const int* in,
+                        const mx_uint num_provided_arg_shapes,
+                        const char** in,
+                        const int* in,
+                        const mx_uint* in,
+                        mx_uint* couple_out_size,
+                        NDArrayHandle** out_first_array,
+                        NDArrayHandle** out_second_array,
+                        mx_uint* out_size,
+                        NDArrayHandle** out_array,
+                        ExecutorHandle shared_exec,
+                        ExecutorHandle *out);
 
 /*!
  * \brief set a call back to notify the completion of operation
diff --git a/python/mxnet/executor.py b/python/mxnet/executor.py
index 53ddc252d6b5..9dfe63682f86 100644
--- a/python/mxnet/executor.py
+++ b/python/mxnet/executor.py
@@ -433,29 +433,29 @@ def reshape(self, partial_shaping=False, allow_up_sizing=False, **kwargs):
         num_aux_states = ctypes.c_uint()
         aux_state_handles = ctypes.POINTER(NDArrayHandle)()
 
-        check_call(_LIB.MXExecutorReshape(ctypes.c_int(int(partial_shaping)),
-                                          ctypes.c_int(int(allow_up_sizing)),
-                                          ctypes.c_int(self._ctx.device_typeid),
-                                          ctypes.c_int(self._ctx.device_id),
-                                          mx_uint(len(ctx_map_keys)),
-                                          c_str_array(ctx_map_keys),
-                                          c_array_buf(ctypes.c_int,
-                                                      py_array('i', ctx_map_dev_types)),
-                                          c_array_buf(ctypes.c_int,
-                                                      py_array('i', ctx_map_dev_ids)),
-                                          mx_uint(len(provided_arg_shape_names)),
-                                          c_str_array(provided_arg_shape_names),
-                                          c_array_buf(mx_int,
-                                                      py_array('i', provided_arg_shape_data)),
-                                          c_array_buf(mx_uint,
-                                                      py_array('I', provided_arg_shape_idx)),
-                                          ctypes.byref(num_in_args),
-                                          ctypes.byref(in_arg_handles),
-                                          ctypes.byref(arg_grad_handles),
-                                          ctypes.byref(num_aux_states),
-                                          ctypes.byref(aux_state_handles),
-                                          shared_handle,
-                                          ctypes.byref(handle)))
+        check_call(_LIB.MXExecutorReshapeEx(ctypes.c_int(int(partial_shaping)),
+                                            ctypes.c_int(int(allow_up_sizing)),
+                                            ctypes.c_int(self._ctx.device_typeid),
+                                            ctypes.c_int(self._ctx.device_id),
+                                            mx_uint(len(ctx_map_keys)),
+                                            c_str_array(ctx_map_keys),
+                                            c_array_buf(ctypes.c_int,
+                                                        py_array('i', ctx_map_dev_types)),
+                                            c_array_buf(ctypes.c_int,
+                                                        py_array('i', ctx_map_dev_ids)),
+                                            mx_uint(len(provided_arg_shape_names)),
+                                            c_str_array(provided_arg_shape_names),
+                                            c_array_buf(mx_int,
+                                                        py_array('i', provided_arg_shape_data)),
+                                            c_array_buf(mx_uint,
+                                                        py_array('I', provided_arg_shape_idx)),
+                                            ctypes.byref(num_in_args),
+                                            ctypes.byref(in_arg_handles),
+                                            ctypes.byref(arg_grad_handles),
+                                            ctypes.byref(num_aux_states),
+                                            ctypes.byref(aux_state_handles),
+                                            shared_handle,
+                                            ctypes.byref(handle)))
 
         arg_arrays = [_ndarray_cls(NDArrayHandle(in_arg_handles[i]))
                       for i in range(num_in_args.value)]
diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py
index 44334fc7f43e..97cfd827c7fe 100644
--- a/python/mxnet/ndarray/ndarray.py
+++ b/python/mxnet/ndarray/ndarray.py
@@ -143,7 +143,7 @@ def _new_alloc_handle(shape, ctx, delay_alloc, dtype=mx_real_t):
 
 def _new_from_shared_mem(shared_pid, shared_id, shape, dtype):
     hdl = NDArrayHandle()
-    check_call(_LIB.MXNDArrayCreateFromSharedMem(
+    check_call(_LIB.MXNDArrayCreateFromSharedMemEx(
         ctypes.c_int(shared_pid),
         ctypes.c_int(shared_id),
         c_array(mx_int, shape),
@@ -1847,7 +1847,7 @@ def shape(self):
         """
         ndim = mx_int()
         pdata = ctypes.POINTER(mx_int)()
-        check_call(_LIB.MXNDArrayGetShape(
+        check_call(_LIB.MXNDArrayGetShapeEx(
             self.handle, ctypes.byref(ndim), ctypes.byref(pdata)))
         if ndim.value == -1:
             return None
diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py
index a187f67d608a..4bf60a6a1fcd 100644
--- a/python/mxnet/symbol/symbol.py
+++ b/python/mxnet/symbol/symbol.py
@@ -1188,9 +1188,9 @@ def _infer_shape_impl(self, partial, *args, **kwargs):
         aux_shape_data = ctypes.POINTER(ctypes.POINTER(mx_int))()
         complete = ctypes.c_int()
         if partial:
-            infer_func = _LIB.MXSymbolInferShapePartial
+            infer_func = _LIB.MXSymbolInferShapePartialEx
         else:
-            infer_func = _LIB.MXSymbolInferShape
+            infer_func = _LIB.MXSymbolInferShapeEx
         check_call(infer_func(
             self.handle,
             mx_uint(len(indptr) - 1),
@@ -1571,42 +1571,42 @@ def simple_bind(self, ctx, grad_req='write', type_dict=None, stype_dict=None,
         aux_state_handles = ctypes.POINTER(NDArrayHandle)()
 
         try:
-            check_call(_LIB.MXExecutorSimpleBind(self.handle,
-                                                 ctypes.c_int(ctx.device_typeid),
-                                                 ctypes.c_int(ctx.device_id),
-                                                 num_ctx_map_keys,
-                                                 ctx_map_keys,
-                                                 ctx_map_dev_types,
-                                                 ctx_map_dev_ids,
-                                                 mx_uint(provided_req_type_list_len),
-                                                 provided_grad_req_names,
-                                                 provided_grad_req_types,
-                                                 mx_uint(len(provided_arg_shape_names)),
-                                                 c_str_array(provided_arg_shape_names),
-                                                 c_array_buf(mx_int,
-                                                             array('I', provided_arg_shape_data)),
-                                                 c_array_buf(mx_uint,
-                                                             array('i', provided_arg_shape_idx)),
-                                                 num_provided_arg_types,
-                                                 provided_arg_type_names,
-                                                 provided_arg_type_data,
-                                                 num_provided_arg_stypes,
-                                                 provided_arg_stype_names,
-                                                 provided_arg_stype_data,
-                                                 mx_uint(len(shared_arg_name_list)),
-                                                 c_str_array(shared_arg_name_list),
-                                                 ctypes.byref(shared_buffer_len),
-                                                 shared_buffer_names,
-                                                 shared_buffer_handles,
-                                                 ctypes.byref(updated_shared_buffer_names),
-                                                 ctypes.byref(updated_shared_buffer_handles),
-                                                 ctypes.byref(num_in_args),
-                                                 ctypes.byref(in_arg_handles),
-                                                 ctypes.byref(arg_grad_handles),
-                                                 ctypes.byref(num_aux_states),
-                                                 ctypes.byref(aux_state_handles),
-                                                 shared_exec_handle,
-                                                 ctypes.byref(exe_handle)))
+            check_call(_LIB.MXExecutorSimpleBindEx(self.handle,
+                                                   ctypes.c_int(ctx.device_typeid),
+                                                   ctypes.c_int(ctx.device_id),
+                                                   num_ctx_map_keys,
+                                                   ctx_map_keys,
+                                                   ctx_map_dev_types,
+                                                   ctx_map_dev_ids,
+                                                   mx_uint(provided_req_type_list_len),
+                                                   provided_grad_req_names,
+                                                   provided_grad_req_types,
+                                                   mx_uint(len(provided_arg_shape_names)),
+                                                   c_str_array(provided_arg_shape_names),
+                                                   c_array_buf(mx_int,
+                                                               array('I', provided_arg_shape_data)),
+                                                   c_array_buf(mx_uint,
+                                                               array('i', provided_arg_shape_idx)),
+                                                   num_provided_arg_types,
+                                                   provided_arg_type_names,
+                                                   provided_arg_type_data,
+                                                   num_provided_arg_stypes,
+                                                   provided_arg_stype_names,
+                                                   provided_arg_stype_data,
+                                                   mx_uint(len(shared_arg_name_list)),
+                                                   c_str_array(shared_arg_name_list),
+                                                   ctypes.byref(shared_buffer_len),
+                                                   shared_buffer_names,
+                                                   shared_buffer_handles,
+                                                   ctypes.byref(updated_shared_buffer_names),
+                                                   ctypes.byref(updated_shared_buffer_handles),
+                                                   ctypes.byref(num_in_args),
+                                                   ctypes.byref(in_arg_handles),
+                                                   ctypes.byref(arg_grad_handles),
+                                                   ctypes.byref(num_aux_states),
+                                                   ctypes.byref(aux_state_handles),
+                                                   shared_exec_handle,
+                                                   ctypes.byref(exe_handle)))
         except MXNetError as e:
             error_msg = "simple_bind error. Arguments:\n"
             for k, v in kwargs.items():
diff --git a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc
index ae01c8a7c05e..7323d23ac556 100644
--- a/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc
+++ b/scala-package/native/src/main/native/org_apache_mxnet_native_c_api.cc
@@ -356,7 +356,7 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxNDArrayGetShape
   (JNIEnv *env, jobject obj, jlong ndArrayPtr, jobject ndimRef, jobject dataBuf) {
   int ndim;
   const int *pdata;
-  int ret = MXNDArrayGetShape(reinterpret_cast<NDArrayHandle>(ndArrayPtr), &ndim, &pdata);
+  int ret = MXNDArrayGetShapeEx(reinterpret_cast<NDArrayHandle>(ndArrayPtr), &ndim, &pdata);
 
   // fill dataBuf
   jclass integerClass = env->FindClass("java/lang/Integer");
@@ -938,25 +938,25 @@ JNIEXPORT jint JNICALL Java_org_apache_mxnet_LibInfo_mxExecutorReshape
 
   ExecutorHandle out;
 
-  int ret = MXExecutorReshape(partialReshaping,
-                              allowUpSizing,
-                              devType,
-                              devId,
-                              static_cast<mx_uint>(numMapKeys),
-                              mapKeys,
-                              static_cast<const int*>(mapDevTypes),
-                              static_cast<const int*>(mapDevIds),
-                              static_cast<const mx_uint>(numProvidedArgShapes),
-                              providedArgShapeNames,
-                              static_cast<const int*>(providedArgShapeData),
-                              reinterpret_cast<const mx_uint*>(providedArgShapeIdx),
-                              &numInArgs,
-                              &inArgs,
-                              &argGrads,
-                              &numAuxStates,
-                              &auxStates,
-                              reinterpret_cast<ExecutorHandle>(jsharedExec),
-                              &out);
+  int ret = MXExecutorReshapeEx(partialReshaping,
+                                allowUpSizing,
+                                devType,
+                                devId,
+                                static_cast<mx_uint>(numMapKeys),
+                                mapKeys,
+                                static_cast<const int*>(mapDevTypes),
+                                static_cast<const int*>(mapDevIds),
+                                static_cast<const mx_uint>(numProvidedArgShapes),
+                                providedArgShapeNames,
+                                static_cast<const int*>(providedArgShapeData),
+                                reinterpret_cast<const mx_uint*>(providedArgShapeIdx),
+                                &numInArgs,
+                                &inArgs,
+                                &argGrads,
+                                &numAuxStates,
+                                &auxStates,
+                                reinterpret_cast<ExecutorHandle>(jsharedExec),
+                                &out);
 
   jclass longCls = env->FindClass("java/lang/Long");
   jmethodID newLong = env->GetMethodID(longCls, "<init>", "(J)V");
@@ -1693,37 +1693,37 @@ int SymbolInferShapeHelper(JNIEnv *env, jobject obj, jlong symbolPtr, jint jnumA
   jint *argShapeData = env->GetIntArrayElements(jargShapeData, NULL);
   int ret;
   if (!partial) {
-    ret = MXSymbolInferShape(reinterpret_cast<SymbolHandle>(symbolPtr),
-                              static_cast<mx_uint>(jnumArgs),
-                              keys,
-                              reinterpret_cast<mx_uint *>(argIndPtr),
-                              reinterpret_cast<const int *>(argShapeData),
-                              &inShapeSize,
-                              &inShapeNdim,
-                              &inShapeData,
-                              &outShapeSize,
-                              &outShapeNdim,
-                              &outShapeData,
-                              &auxShapeSize,
-                              &auxShapeNdim,
-                              &auxShapeData,
-                              &complete);
+    ret = MXSymbolInferShapeEx(reinterpret_cast<SymbolHandle>(symbolPtr),
+                               static_cast<mx_uint>(jnumArgs),
+                               keys,
+                               reinterpret_cast<mx_uint *>(argIndPtr),
+                               reinterpret_cast<const int *>(argShapeData),
+                               &inShapeSize,
+                               &inShapeNdim,
+                               &inShapeData,
+                               &outShapeSize,
+                               &outShapeNdim,
+                               &outShapeData,
+                               &auxShapeSize,
+                               &auxShapeNdim,
+                               &auxShapeData,
+                               &complete);
   } else {
-    ret = MXSymbolInferShapePartial(reinterpret_cast<SymbolHandle>(symbolPtr),
-                                    static_cast<mx_uint>(jnumArgs),
-                                    keys,
-                                    reinterpret_cast<mx_uint *>(argIndPtr),
-                                    reinterpret_cast<const int *>(argShapeData),
-                                    &inShapeSize,
-                                    &inShapeNdim,
-                                    &inShapeData,
-                                    &outShapeSize,
-                                    &outShapeNdim,
-                                    &outShapeData,
-                                    &auxShapeSize,
-                                    &auxShapeNdim,
-                                    &auxShapeData,
-                                    &complete);
+    ret = MXSymbolInferShapePartialEx(reinterpret_cast<SymbolHandle>(symbolPtr),
+                                      static_cast<mx_uint>(jnumArgs),
+                                      keys,
+                                      reinterpret_cast<mx_uint *>(argIndPtr),
+                                      reinterpret_cast<const int *>(argShapeData),
+                                      &inShapeSize,
+                                      &inShapeNdim,
+                                      &inShapeData,
+                                      &outShapeSize,
+                                      &outShapeNdim,
+                                      &outShapeData,
+                                      &auxShapeSize,
+                                      &auxShapeNdim,
+                                      &auxShapeData,
+                                      &complete);
   }
   env->ReleaseIntArrayElements(jargShapeData, argShapeData, 0);
   env->ReleaseIntArrayElements(jargIndPtr, argIndPtr, 0);
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index 533f18fa9e0b..f549ddd13994 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -495,8 +495,27 @@ int MXNDArrayGetStorageType(NDArrayHandle handle,
 }
 
 int MXNDArrayGetShape(NDArrayHandle handle,
-                      int *out_dim,
-                      const int **out_pdata) {
+                      mx_uint *out_dim,
+                      const mx_uint **out_pdata) {
+  MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get();
+  API_BEGIN();
+  NDArray *arr = static_cast<NDArray*>(handle);
+  if (!arr->is_none()) {
+    const mxnet::TShape &s = arr->shape();
+    *out_dim = s.ndim();
+    std::vector<uint32_t>& buffer = ret->arg_shape_buffer;
+    buffer.resize(s.ndim());
+    nnvm::ShapeTypeCast(s.begin(), s.end(), buffer.data());
+    *out_pdata = buffer.data();
+  } else {
+    *out_dim = 0;
+  }
+  API_END();
+}
+
+int MXNDArrayGetShapeEx(NDArrayHandle handle,
+                        int *out_dim,
+                        const int **out_pdata) {
   MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get();
   API_BEGIN();
   NDArray *arr = static_cast<NDArray*>(handle);
@@ -507,7 +526,7 @@ int MXNDArrayGetShape(NDArrayHandle handle,
     }
     *out_dim = s.ndim();
     if (s.ndim() >= 0) {
-      std::vector<int> &buffer = ret->arg_shape_buffer;
+      std::vector<int> &buffer = ret->arg_shape_buffer_ex;
       buffer.resize(s.ndim());
       mxnet::ShapeTypeCast(s.begin(), s.end(), buffer.data());
       *out_pdata = buffer.data();
@@ -1406,8 +1425,15 @@ int MXNDArrayGetSharedMemHandle(NDArrayHandle handle, int* shared_pid, int* shar
   API_END();
 }
 
-int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const int *shape,
-                                 int ndim, int dtype, NDArrayHandle *out) {
+int MXNDArrayCreateFromSharedMem(int shared_pid, int shared_id, const mx_uint *shape,
+                                 mx_uint ndim, int dtype, NDArrayHandle *out) {
+  API_BEGIN();
+  *out = new NDArray(shared_pid, shared_id, mxnet::TShape(shape, shape + ndim), dtype);
+  API_END();
+}
+
+int MXNDArrayCreateFromSharedMemEx(int shared_pid, int shared_id, const int *shape,
+                                   int ndim, int dtype, NDArrayHandle *out) {
   API_BEGIN();
   *out = new NDArray(shared_pid, shared_id, mxnet::TShape(shape, shape + ndim), dtype);
   API_END();
diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h
index 8be192b22c53..013ecab93da8 100644
--- a/src/c_api/c_api_common.h
+++ b/src/c_api/c_api_common.h
@@ -74,15 +74,40 @@ struct MXAPIThreadLocalEntry {
   /*! \brief result holder for returning storage types */
   std::vector<int> arg_storage_types, out_storage_types, aux_storage_types;
   /*! \brief result holder for returning shape dimensions */
-  std::vector<int> arg_shape_ndim, out_shape_ndim, aux_shape_ndim;
+  std::vector<mx_uint> arg_shape_ndim, out_shape_ndim, aux_shape_ndim;
+  /*! \brief result holder for returning shape dimensions */
+  std::vector<int> arg_shape_ndim_ex, out_shape_ndim_ex, aux_shape_ndim_ex;
+  /*! \brief result holder for returning shape pointer */
+  std::vector<const mx_uint*> arg_shape_data, out_shape_data, aux_shape_data;
   /*! \brief result holder for returning shape pointer */
-  std::vector<const int*> arg_shape_data, out_shape_data, aux_shape_data;
+  std::vector<const int*> arg_shape_data_ex, out_shape_data_ex, aux_shape_data_ex;
   /*! \brief uint32_t buffer for returning shape pointer */
-  std::vector<int> arg_shape_buffer, out_shape_buffer, aux_shape_buffer;
+  std::vector<uint32_t> arg_shape_buffer, out_shape_buffer, aux_shape_buffer;
+  /*! \brief uint32_t buffer for returning shape pointer */
+  std::vector<int> arg_shape_buffer_ex, out_shape_buffer_ex, aux_shape_buffer_ex;
   /*! \brief bool buffer */
   std::vector<bool> save_inputs, save_outputs;
+  // DEPRECATED. Use SetupShapeArrayReturnWithBufferEx instead.
   // helper function to setup return value of shape array
   inline static void SetupShapeArrayReturnWithBuffer(
+      const mxnet::ShapeVector &shapes,
+      std::vector<mx_uint> *ndim,
+      std::vector<const mx_uint*> *data,
+      std::vector<uint32_t> *buffer) {
+    ndim->resize(shapes.size());
+    data->resize(shapes.size());
+    size_t size = 0;
+    for (const auto& s : shapes) size += s.ndim();
+    buffer->resize(size);
+    uint32_t *ptr = buffer->data();
+    for (size_t i = 0; i < shapes.size(); ++i) {
+      ndim->at(i) = shapes[i].ndim();
+      data->at(i) = ptr;
+      ptr = nnvm::ShapeTypeCast(shapes[i].begin(), shapes[i].end(), ptr);
+    }
+  }
+  // helper function to setup return value of shape array
+  inline static void SetupShapeArrayReturnWithBufferEx(
       const mxnet::ShapeVector &shapes,
       std::vector<int> *ndim,
       std::vector<const int*> *data,
diff --git a/src/c_api/c_api_executor.cc b/src/c_api/c_api_executor.cc
index fc59463ab689..5352fcfe0951 100644
--- a/src/c_api/c_api_executor.cc
+++ b/src/c_api/c_api_executor.cc
@@ -185,7 +185,7 @@ int MXExecutorBindEX(SymbolHandle symbol_handle,
 }
 
 /*!
- * \brief
+ * \brief DEPRECATED. Use MXExecutorSimpleBindEx instead.
  * \param symbol_handle symbol handle
  * \param dev_type default device type
  * \param dev_id default device id
@@ -233,7 +233,7 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle,
                          const char** provided_grad_req_types,
                          const mx_uint num_provided_arg_shapes,
                          const char** provided_arg_shape_names,
-                         const int* provided_arg_shape_data,
+                         const mx_uint* provided_arg_shape_data,
                          const mx_uint* provided_arg_shape_idx,
                          const mx_uint num_provided_arg_dtypes,
                          const char** provided_arg_dtype_names,
@@ -544,6 +544,366 @@ int MXExecutorSimpleBind(SymbolHandle symbol_handle,
   API_END();
 }
 
+/*!
+ * \brief
+ * \param symbol_handle symbol handle
+ * \param dev_type default device type
+ * \param dev_id default device id
+ * \param num_g2c_keys number of group2ctx keys
+ * \param g2c_keys key list of group2ctx
+ * \param g2c_dev_types device type list of group2ctx
+ * \param g2c_dev_ids id list of group2ctx
+ * \param provided_grad_req_list_len grad_req length provided by users in front-end
+ * \param provided_grad_req_names grad_req names provided by users in front-end
+ * \param provided_grad_req_types req types provided by users in front-end
+ * \param num_provided_arg_shapes number of user provided in_arg and aux_state shapes
+ * \param provided_arg_shape_names name list of provided shapes
+ * \param provided_arg_shape_data provided shape data
+ * \param provided_arg_shape_idx provided shape data index
+ * \param num_provided_arg_dtypes number of user provided in_arg and axu_state dtypes
+ * \param provided_arg_dtype_names argument name list of provided dtypes
+ * \param provided_arg_dtypes data of provided dtypes
+ * \param num_provided_arg_stypes number of user provided in_arg and axu_state storage types
+ * \param provided_arg_stype_names argument name list of provided storage types
+ * \param provided_arg_stypes data of provided storage types
+ * \param num_shared_arg_names number of parameter names passed from _bind_ith_exec
+ * \param shared_arg_name_list parameter name list passed from _bind_ith_exec
+ * \param shared_buffer_len number of shared data arrays passed from _bind_ith_exec
+ * \param shared_buffer_name_list shared data array names passed from _bind_ith_exec
+ * \param shared_buffer_handle_list shared data array handles passed from _bind_ith_exec
+ * \param updated_shared_buffer_name_list updated shared data array names after binding
+ * \param updated_shared_buffer_handle_list updated shared data arrays after binding
+ * \param num_in_args number of input arguments of this sym
+ * \param in_args list_arguments associated with the current executor
+ * \param arg_grads list of gradients of in_args associated with the current executor
+ * \param num_aux_states number of aux states of this sym
+ * \param aux_states list_auxiliary_states associated with the current executor
+ * \param shared_exec_handle shared excutor handle passed from _bind_ith_exec
+ * \param out the handle of the executor to be created
+ */
+int MXExecutorSimpleBindEx(SymbolHandle symbol_handle,
+                           int dev_type,
+                           int dev_id,
+                           const mx_uint num_g2c_keys,
+                           const char** g2c_keys,
+                           const int* g2c_dev_types,
+                           const int* g2c_dev_ids,
+                           const mx_uint provided_grad_req_list_len,
+                           const char** provided_grad_req_names,
+                           const char** provided_grad_req_types,
+                           const mx_uint num_provided_arg_shapes,
+                           const char** provided_arg_shape_names,
+                           const int* provided_arg_shape_data,
+                           const mx_uint* provided_arg_shape_idx,
+                           const mx_uint num_provided_arg_dtypes,
+                           const char** provided_arg_dtype_names,
+                           const int* provided_arg_dtypes,
+                           const mx_uint num_provided_arg_stypes,
+                           const char** provided_arg_stype_names,
+                           const int* provided_arg_stypes,
+                           const mx_uint num_shared_arg_names,
+                           const char** shared_arg_name_list,
+                           int* shared_buffer_len,
+                           const char** shared_buffer_name_list,
+                           NDArrayHandle* shared_buffer_handle_list,
+                           const char*** updated_shared_buffer_name_list,
+                           NDArrayHandle** updated_shared_buffer_handle_list,
+                           mx_uint* num_in_args,
+                           NDArrayHandle** in_args,
+                           NDArrayHandle** arg_grads,
+                           mx_uint* num_aux_states,
+                           NDArrayHandle** aux_states,
+                           ExecutorHandle shared_exec_handle,
+                           ExecutorHandle* out) {
+  MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get();
+  API_BEGIN();
+  nnvm::Symbol *sym = static_cast<nnvm::Symbol*>(symbol_handle);
+
+  // get in_arg names
+  std::vector<std::string> in_arg_names = sym->ListInputNames(nnvm::Symbol::kReadOnlyArgs);
+  std::vector<std::string> aux_state_names = sym->ListInputNames(nnvm::Symbol::kAuxiliaryStates);
+
+  // attr_dict for setting up type_dict and arg/aux ctx
+  std::unordered_map<std::string, std::unordered_map<std::string, std::string>> attr_dict;
+  if (nullptr == provided_arg_dtypes || nullptr != g2c_keys || nullptr == provided_arg_stypes) {
+    std::vector<std::tuple<std::string, std::string, std::string>> attrs =
+      sym->ListAttrsRecursive();
+    attr_dict.reserve(attrs.size());
+    for (const auto& tp : attrs) {
+      attr_dict[std::get<0>(tp)][std::get<1>(tp)] = std::get<2>(tp);
+    }
+  }
+
+  // setup arg_dtype_map
+  std::unordered_map<std::string, int> arg_dtype_map;
+  if (nullptr == provided_arg_dtypes) {  // use attr_dict
+    for (const auto& arg_name : in_arg_names) {
+      const auto it = attr_dict.find(arg_name);
+      if (it == attr_dict.end() || !it->second.count("__dtype__")) {
+        arg_dtype_map[arg_name] = mshadow::kFloat32;
+      }
+    }
+  } else {  // use user input type_dict
+    // create dtype map for in_args and aux_states
+    arg_dtype_map.reserve(num_provided_arg_dtypes);
+    for (mx_uint i = 0; i < num_provided_arg_dtypes; ++i) {
+      arg_dtype_map[provided_arg_dtype_names[i]] = provided_arg_dtypes[i];
+    }
+  }
+
+  // setup arg_stype_map
+  std::unordered_map<std::string, int> arg_stype_map;
+  if (nullptr == provided_arg_stypes) {  // use attr_dict
+    for (const auto& arg_name : in_arg_names) {
+      const auto it = attr_dict.find(arg_name);
+      if (it == attr_dict.end() || !it->second.count("__storage_type__")) {
+        arg_stype_map[arg_name] = kDefaultStorage;
+      }
+    }
+  } else {  // use user input type_dict
+    // create stype map for in_args and aux_states
+    arg_stype_map.reserve(num_provided_arg_stypes);
+    for (mx_uint i = 0; i < num_provided_arg_stypes; ++i) {
+      arg_stype_map[provided_arg_stype_names[i]] = provided_arg_stypes[i];
+    }
+  }
+
+  // create default ctx
+  Context ctx = Context::Create(static_cast<Context::DeviceType>(dev_type), dev_id);
+  // create ctx map
+  std::map<std::string, Context> ctx_map;
+  std::vector<Context> in_arg_ctx_vec(in_arg_names.size(), ctx);
+  std::vector<Context> aux_state_ctx_vec(aux_state_names.size(), ctx);
+  if (nullptr != g2c_keys) {  // use user input group2ctx dict
+    for (mx_uint i = 0; i < num_g2c_keys; ++i) {
+      ctx_map[g2c_keys[i]] = Context::Create(
+          static_cast<Context::DeviceType>(g2c_dev_types[i]), g2c_dev_ids[i]);
+    }
+
+    // initialize in_arg_ctx_vec using group2ctx if there are any
+    for (size_t i = 0; i < in_arg_ctx_vec.size(); ++i) {
+      const auto it1 = attr_dict.find(in_arg_names[i]);
+      if (it1 != attr_dict.end()) {
+        const auto it2 = it1->second.find("__ctx_group__");
+        if (it2 != it1->second.end()) {
+          const auto it3 = ctx_map.find(it2->second);
+          if (it3 != ctx_map.end()) {
+            in_arg_ctx_vec[i] = it3->second;
+          }
+        }
+      }
+    }
+
+    // initialize aux_state_ctx_vec using group2ctx if there are any
+    for (size_t i = 0; i < aux_state_ctx_vec.size(); ++i) {
+      const auto it1 = attr_dict.find(aux_state_names[i]);
+      if (it1 != attr_dict.end()) {
+        const auto it2 = it1->second.find("__ctx_group__");
+        if (it2 != it1->second.end()) {
+          const auto it3 = ctx_map.find(it2->second);
+          if (it3 != ctx_map.end()) {
+            aux_state_ctx_vec[i] = it3->second;
+          }
+        }
+      }
+    }
+  }
+
+  // create provided_grad_req_map
+  const std::map<std::string, OpReqType> req_map =
+    {{"null", kNullOp}, {"write", kWriteTo}, {"add", kAddTo}};
+  std::unordered_map<std::string, std::string> provided_grad_req_map;
+  std::string grad_req_type;
+  if (0 == provided_grad_req_list_len
+      && nullptr == provided_grad_req_names
+      && nullptr != provided_grad_req_types) {  // string, grad_req='write'
+    CHECK_EQ(req_map.count(provided_grad_req_types[0]), 1U)
+      << "grad_req=" << provided_grad_req_types[0] << " is not a valid input in simple_bind; "
+      "only \'null\', \'write\', and \'add\' are supported";
+    grad_req_type = "string";
+  } else if (provided_grad_req_list_len > 0
+      && nullptr == provided_grad_req_names
+      && nullptr != provided_grad_req_types) {  // list, grad_req=['null', 'write']
+    grad_req_type = "list";
+    CHECK_EQ(provided_grad_req_list_len, in_arg_names.size())
+      << "The length of grad_req list does not match the number of input arguments in simple_bind, "
+      "expected " << in_arg_names.size() << ", provided " << provided_grad_req_list_len;
+  } else if (provided_grad_req_list_len > 0
+      && nullptr != provided_grad_req_names
+      && nullptr != provided_grad_req_types) {  // dict, grad_req=['lhs': 'null', 'rhs': 'write']
+    grad_req_type = "dict";
+    provided_grad_req_map.reserve(provided_grad_req_list_len);
+    for (mx_uint i = 0; i < provided_grad_req_list_len; ++i) {
+      CHECK_EQ(req_map.count(provided_grad_req_types[i]), 1U)
+        << "grad_req=" << provided_grad_req_types[i] << " is not a valid input in simple_bind; "
+        "only \'null\', \'write\', and \'add\' are supported";
+      provided_grad_req_map[provided_grad_req_names[i]] = provided_grad_req_types[i];
+    }
+  } else {  // grad_req is None
+    grad_req_type = "none";
+  }
+
+  // initialize arg_grad_ctx_vec and grad_req_type_vec
+  std::vector<Context> arg_grad_ctx_vec(in_arg_names.size(), ctx);
+  std::vector<OpReqType> grad_req_type_vec(in_arg_names.size(), kNullOp);
+  if ("none" != grad_req_type) {
+    for (size_t i = 0; i < in_arg_names.size(); ++i) {
+      OpReqType cur_req = kNullOp;
+      if ("string" == grad_req_type) {
+        cur_req = req_map.at(provided_grad_req_types[0]);
+      } else if ("list" == grad_req_type) {
+        CHECK_EQ(req_map.count(provided_grad_req_types[i]), 1U)
+          << "grad_req=" << provided_grad_req_types[i] << " is not a valid input in simple_bind; "
+          "only \'null\', \'write\', and \'add\' are supported";
+        cur_req = req_map.at(provided_grad_req_types[i]);
+      } else if ("dict" == grad_req_type) {
+        const auto it = provided_grad_req_map.find(in_arg_names[i]);
+        if (it != provided_grad_req_map.end()) {
+          cur_req = req_map.at(it->second);
+        }
+      }
+      if (kNullOp != cur_req) {
+        arg_grad_ctx_vec[i] = in_arg_ctx_vec[i];
+        grad_req_type_vec[i] = static_cast<OpReqType>(cur_req);
+      }
+    }
+  }
+
+  // create shape map for in_args and aux_states
+  std::unordered_map<std::string, mxnet::TShape> arg_shape_map(num_provided_arg_shapes);
+  for (mx_uint i = 0; i < num_provided_arg_shapes; ++i) {
+    auto p = arg_shape_map.emplace(provided_arg_shape_names[i],
+        mxnet::TShape(provided_arg_shape_data+provided_arg_shape_idx[i],
+          provided_arg_shape_data+provided_arg_shape_idx[i+1]));
+    CHECK(p.second) << "Duplicate shapes are provided for argument "
+      << provided_arg_shape_names[i] << " in simple_bind";
+  }
+  if (!Imperative::Get()->is_np_comp()) {
+    for (auto &kv : arg_shape_map) {
+      common::ConvertToNumpyShape(&kv.second);
+    }
+  }
+
+  // create para name set for sharing data array memory
+  std::unordered_set<std::string> shared_arg_name_set(num_shared_arg_names);
+  for (mx_uint i = 0; i < num_shared_arg_names; ++i) {
+    shared_arg_name_set.insert(shared_arg_name_list[i]);
+  }
+
+  // create shared_buffer_map
+  std::unordered_map<std::string, NDArray> shared_buffer_map;
+  bool use_shared_buffer = (*shared_buffer_len >= 0);
+  if (*shared_buffer_len > 0) {
+    // create shared_buffer_map
+    shared_buffer_map.reserve(*shared_buffer_len);
+    NDArray** shared_buffer_ptrs =
+      reinterpret_cast<NDArray**>(shared_buffer_handle_list);
+    for (int i = 0; i < *shared_buffer_len; ++i) {
+      shared_buffer_map[shared_buffer_name_list[i]] = *(shared_buffer_ptrs[i]);
+    }
+  }
+
+  // create temporary place holders for the initialized NDArrays
+  // to be passed back to front end
+  std::vector<NDArray> in_arg_vec;
+  std::vector<NDArray> arg_grad_vec;
+  std::vector<NDArray> aux_state_vec;
+#if MXNET_USE_TENSORRT
+  // If we've built with TensorRT support we by default return an TRTExecutor.
+  // Users can override this behaviour via env var, which is useful for example for A/B
+  // performance testing.
+  if (dmlc::GetEnv("MXNET_USE_TENSORRT", false)) {
+    *out = exec::TrtGraphExecutor::TensorRTBind(*sym, ctx, ctx_map, &in_arg_ctx_vec,
+                                                &arg_grad_ctx_vec, &aux_state_ctx_vec,
+                                                &arg_shape_map, &arg_dtype_map, &arg_stype_map,
+                                                &grad_req_type_vec, shared_arg_name_set,
+                                                &in_arg_vec, &arg_grad_vec, &aux_state_vec,
+                                                use_shared_buffer ? &shared_buffer_map : nullptr,
+                                                reinterpret_cast<Executor*>(shared_exec_handle));
+  } else {
+    // Checks to see if this env var has been set to true or false by the user.
+    // If the user is using a TensorRT build, but has not enabled TRT at inference time, warn
+    // them and describe further steps.
+    const int unset_indicator =  std::numeric_limits<int>::quiet_NaN();
+    if (dmlc::GetEnv("MXNET_USE_TENSORRT", unset_indicator) == unset_indicator) {
+      LOG(INFO) << "TensorRT not enabled by default.  Please set the MXNET_USE_TENSORRT "
+                   "environment variable to 1 or call mx.contrib.tensorrt.set_use_tensorrt(True) "
+                   "to enable.";
+    }
+#endif  // MXNET_USE_TENSORRT
+    *out = Executor::SimpleBind(*sym, ctx, ctx_map, in_arg_ctx_vec, arg_grad_ctx_vec,
+                                aux_state_ctx_vec, arg_shape_map, arg_dtype_map, arg_stype_map,
+                                grad_req_type_vec, shared_arg_name_set, &in_arg_vec,
+                                &arg_grad_vec, &aux_state_vec,
+                                use_shared_buffer ? &shared_buffer_map : nullptr,
+                                reinterpret_cast<Executor*>(shared_exec_handle));
+#if MXNET_USE_TENSORRT
+  }
+#endif  // MXNET_USE_TENSORRT
+
+  // copy ndarray ptrs to ret->handles so that front end
+  // can access them
+  ret->ret_handles.clear();
+  ret->ret_handles.reserve(in_arg_vec.size()+arg_grad_vec.size()+aux_state_vec.size()
+                           +shared_buffer_map.size());
+  size_t nd_idx = 0;
+  for (const auto& nd : in_arg_vec) {
+    if (nd.is_none()) {
+      LOG(FATAL) << "Input argument NDArray cannot be un-allocated";
+    }
+    ret->ret_handles.push_back(new NDArray(nd));
+  }
+  if (in_arg_vec.size() > 0) {
+    *num_in_args = in_arg_vec.size();
+    *in_args = &(ret->ret_handles[nd_idx]);
+    nd_idx = ret->ret_handles.size();
+  }
+
+  for (const auto& nd : arg_grad_vec) {
+    if (nd.is_none()) {
+      ret->ret_handles.push_back(nullptr);
+    } else {
+      ret->ret_handles.push_back(new NDArray(nd));
+    }
+  }
+  if (arg_grad_vec.size() > 0) {
+    *arg_grads = &(ret->ret_handles[nd_idx]);
+    nd_idx = ret->ret_handles.size();
+  }
+
+  for (const auto& nd : aux_state_vec) {
+    if (nd.is_none()) {
+      LOG(FATAL) << "Auxiliary argument NDArray cannot be un-allocated";
+    }
+    ret->ret_handles.push_back(new NDArray(nd));
+  }
+  if (aux_state_vec.size() > 0) {
+    *num_aux_states = aux_state_vec.size();
+    *aux_states = &(ret->ret_handles[nd_idx]);
+    nd_idx = ret->ret_handles.size();
+  }
+
+  if (use_shared_buffer) {
+    ret->ret_vec_str.clear();
+    ret->ret_vec_str.reserve(shared_buffer_map.size());
+    ret->ret_vec_charp.clear();
+    ret->ret_vec_charp.reserve(shared_buffer_map.size());
+    for (const auto& kv : shared_buffer_map) {
+      if (kv.second.is_none()) {
+        LOG(FATAL) << "Shared data NDArray cannot be un-allocated";
+      }
+      ret->ret_handles.push_back(new NDArray(kv.second));
+      ret->ret_vec_str.emplace_back(kv.first);
+      ret->ret_vec_charp.push_back(ret->ret_vec_str.back().c_str());
+    }
+    *shared_buffer_len = shared_buffer_map.size();
+    *updated_shared_buffer_handle_list = &(ret->ret_handles[nd_idx]);
+    *updated_shared_buffer_name_list = &(ret->ret_vec_charp[0]);
+  }
+
+  API_END();
+}
+
 int MXExecutorReshape(int partial_shaping,
                       int allow_up_sizing,
                       int dev_type,
@@ -554,7 +914,7 @@ int MXExecutorReshape(int partial_shaping,
                       const int* map_dev_ids,
                       const mx_uint num_provided_arg_shapes,
                       const char** provided_arg_shape_names,
-                      const int* provided_arg_shape_data,
+                      const mx_uint* provided_arg_shape_data,
                       const mx_uint* provided_arg_shape_idx,
                       mx_uint* num_in_args,
                       NDArrayHandle** in_args,
@@ -635,6 +995,97 @@ int MXExecutorReshape(int partial_shaping,
   API_END_HANDLE_ERROR(delete new_exec);
 }
 
+int MXExecutorReshapeEx(int partial_shaping,
+                        int allow_up_sizing,
+                        int dev_type,
+                        int dev_id,
+                        mx_uint num_map_keys,
+                        const char** map_keys,
+                        const int* map_dev_types,
+                        const int* map_dev_ids,
+                        const mx_uint num_provided_arg_shapes,
+                        const char** provided_arg_shape_names,
+                        const int* provided_arg_shape_data,
+                        const mx_uint* provided_arg_shape_idx,
+                        mx_uint* num_in_args,
+                        NDArrayHandle** in_args,
+                        NDArrayHandle** arg_grads,
+                        mx_uint* num_aux_states,
+                        NDArrayHandle** aux_states,
+                        ExecutorHandle shared_exec,
+                        ExecutorHandle *out) {
+  Executor* new_exec = nullptr;
+
+  MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get();
+  API_BEGIN();
+  *out = nullptr;  // ensure we can know whether to free executor on early abort
+  // create shape map for in_args and aux_states
+  std::unordered_map<std::string, mxnet::TShape> kwargs(num_provided_arg_shapes);
+  for (mx_uint i = 0; i < num_provided_arg_shapes; ++i) {
+    auto p = kwargs.emplace(provided_arg_shape_names[i],
+        mxnet::TShape(provided_arg_shape_data+provided_arg_shape_idx[i],
+          provided_arg_shape_data+provided_arg_shape_idx[i+1]));
+    CHECK(p.second) << "Duplicate shapes are provided for argument "
+      << provided_arg_shape_names[i] << " in reshape of executor";
+  }
+
+  Context ctx = Context::Create(static_cast<Context::DeviceType>(dev_type), dev_id);
+  std::map<std::string, Context> ctx_map;
+  for (mx_uint i = 0; i < num_map_keys; ++i) {
+    ctx_map[std::string(map_keys[i])] = Context::Create(
+        static_cast<Context::DeviceType>(map_dev_types[i]), map_dev_ids[i]);
+  }
+  std::vector<NDArray> in_arg_vec;
+  std::vector<NDArray> arg_grad_vec;
+  std::vector<NDArray> aux_state_vec;
+
+  Executor* exec = static_cast<Executor*>(shared_exec);
+  new_exec = exec->Reshape(partial_shaping, allow_up_sizing, ctx, ctx_map, kwargs,
+                       &in_arg_vec, &arg_grad_vec, &aux_state_vec);
+  *out = new_exec;
+
+  ret->ret_handles.clear();
+  ret->ret_handles.reserve(in_arg_vec.size()+arg_grad_vec.size()+aux_state_vec.size());
+
+  size_t nd_idx = 0;
+  for (const auto& nd : in_arg_vec) {
+    if (nd.is_none()) {
+      LOG(FATAL) << "Input argument NDArray cannot be un-allocated";
+    }
+    ret->ret_handles.push_back(new NDArray(nd));
+  }
+  if (in_arg_vec.size() > 0) {
+    *num_in_args = in_arg_vec.size();
+    *in_args = &(ret->ret_handles[nd_idx]);
+    nd_idx = ret->ret_handles.size();
+  }
+
+  for (const auto& nd : arg_grad_vec) {
+    if (nd.is_none()) {
+      ret->ret_handles.push_back(nullptr);
+    } else {
+      ret->ret_handles.push_back(new NDArray(nd));
+    }
+  }
+  if (arg_grad_vec.size() > 0) {
+    *arg_grads = &(ret->ret_handles[nd_idx]);
+    nd_idx = ret->ret_handles.size();
+  }
+
+  for (const auto& nd : aux_state_vec) {
+    if (nd.is_none()) {
+      LOG(FATAL) << "Auxiliary argument NDArray cannot be un-allocated";
+    }
+    ret->ret_handles.push_back(new NDArray(nd));
+  }
+  if (aux_state_vec.size() > 0) {
+    *num_aux_states = aux_state_vec.size();
+    *aux_states = &(ret->ret_handles[nd_idx]);
+    nd_idx = ret->ret_handles.size();
+  }
+  API_END_HANDLE_ERROR(delete new_exec);
+}
+
 int MXExecutorGetOptimizedSymbol(ExecutorHandle handle,
                                  SymbolHandle *out) {
   auto s = new nnvm::Symbol();
diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc
index 98034417abae..24a88520376f 100644
--- a/src/c_api/c_api_symbolic.cc
+++ b/src/c_api/c_api_symbolic.cc
@@ -506,16 +506,16 @@ int MXSymbolInferShape(SymbolHandle sym,
                        mx_uint num_args,
                        const char** keys,
                        const mx_uint *arg_ind_ptr,
-                       const int *arg_shape_data,
+                       const mx_uint *arg_shape_data,
                        mx_uint *in_shape_size,
-                       const int **in_shape_ndim,
-                       const int ***in_shape_data,
+                       const mx_uint **in_shape_ndim,
+                       const mx_uint ***in_shape_data,
                        mx_uint *out_shape_size,
-                       const int **out_shape_ndim,
-                       const int ***out_shape_data,
+                       const mx_uint **out_shape_ndim,
+                       const mx_uint ***out_shape_data,
                        mx_uint *aux_shape_size,
-                       const int **aux_shape_ndim,
-                       const int ***aux_shape_data,
+                       const mx_uint **aux_shape_ndim,
+                       const mx_uint ***aux_shape_data,
                        int *complete) {
   nnvm::Symbol *s = static_cast<nnvm::Symbol*>(sym);
   MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get();
@@ -575,20 +575,93 @@ int MXSymbolInferShape(SymbolHandle sym,
   API_END();
 }
 
+int MXSymbolInferShapeEx(SymbolHandle sym,
+                         mx_uint num_args,
+                         const char** keys,
+                         const mx_uint *arg_ind_ptr,
+                         const int *arg_shape_data,
+                         mx_uint *in_shape_size,
+                         const int **in_shape_ndim,
+                         const int ***in_shape_data,
+                         mx_uint *out_shape_size,
+                         const int **out_shape_ndim,
+                         const int ***out_shape_data,
+                         mx_uint *aux_shape_size,
+                         const int **aux_shape_ndim,
+                         const int ***aux_shape_data,
+                         int *complete) {
+  nnvm::Symbol *s = static_cast<nnvm::Symbol*>(sym);
+  MXAPIThreadLocalEntry *ret = MXAPIThreadLocalStore::Get();
+  API_BEGIN();
+  nnvm::Graph g = Symbol2Graph(*s);
+  mxnet::ShapeVector arg_shapes(g.indexed_graph().input_nodes().size(), mxnet::TShape());
+  if (keys == nullptr && num_args != 0) {
+    std::vector<uint32_t> read_only_args = mxnet::ReadOnlyArgIndices(g.indexed_graph());
+    CHECK_LE(num_args, read_only_args.size());
+    for (mx_uint i = 0; i < num_args; ++i) {
+      arg_shapes[read_only_args[i]] = mxnet::ShapeTypeCast(
+          arg_shape_data + arg_ind_ptr[i], arg_shape_data + arg_ind_ptr[i+1]);
+    }
+  } else {
+    std::unordered_map<std::string, mxnet::TShape> kwargs;
+    for (mx_uint i = 0; i < num_args; ++i) {
+      kwargs[keys[i]] = mxnet::ShapeTypeCast(
+          arg_shape_data + arg_ind_ptr[i], arg_shape_data + arg_ind_ptr[i+1]);
+    }
+    mxnet::MatchArguments(g.indexed_graph(), kwargs, &arg_shapes, "InferShape");
+  }
+
+  try {
+    g = mxnet::exec::InferShape(std::move(g), std::move(arg_shapes), "__shape__");
+  } catch (const mxnet::op::InferShapeError &err) {
+    throw dmlc::Error(err.msg);
+  }
+
+  // if use legacy shape definition, need to convert numpy shape to legacy shape
+  mxnet::ShapeVector shapes = g.GetAttr<mxnet::ShapeVector>("shape");
+  if (!Imperative::Get()->is_np_comp()) {
+    common::ConvertToLegacyShape(&shapes);
+  }
+
+  // copy back
+  CopyAttr(g.indexed_graph(), shapes,
+           &(ret->arg_shapes), &(ret->out_shapes), &(ret->aux_shapes));
+
+  // copy data back
+  MXAPIThreadLocalEntry::SetupShapeArrayReturnWithBufferEx(ret->arg_shapes,
+      &(ret->arg_shape_ndim_ex), &(ret->arg_shape_data_ex), &(ret->arg_shape_buffer_ex));
+  MXAPIThreadLocalEntry::SetupShapeArrayReturnWithBufferEx(ret->out_shapes,
+      &(ret->out_shape_ndim_ex), &(ret->out_shape_data_ex), &(ret->out_shape_buffer_ex));
+  MXAPIThreadLocalEntry::SetupShapeArrayReturnWithBufferEx(ret->aux_shapes,
+      &(ret->aux_shape_ndim_ex), &(ret->aux_shape_data_ex), &(ret->aux_shape_buffer_ex));
+  *in_shape_size = static_cast<mx_uint>(ret->arg_shapes.size());
+  *in_shape_ndim = dmlc::BeginPtr(ret->arg_shape_ndim_ex);
+  *in_shape_data = dmlc::BeginPtr(ret->arg_shape_data_ex);
+  *out_shape_size = static_cast<mx_uint>(ret->out_shapes.size());
+  *out_shape_ndim = dmlc::BeginPtr(ret->out_shape_ndim_ex);
+  *out_shape_data = dmlc::BeginPtr(ret->out_shape_data_ex);
+  *aux_shape_size = static_cast<mx_uint>(ret->aux_shapes.size());
+  *aux_shape_ndim = dmlc::BeginPtr(ret->aux_shape_ndim_ex);
+  *aux_shape_data = dmlc::BeginPtr(ret->aux_shape_data_ex);
+  // mark complete
+  *complete = (g.GetAttr<size_t>("shape_num_unknown_nodes") == 0);
+  API_END();
+}
+
 int MXSymbolInferShapePartial(SymbolHandle sym,
                               mx_uint num_args,
                               const char** keys,
                               const mx_uint *arg_ind_ptr,
-                              const int *arg_shape_data,
+                              const mx_uint *arg_shape_data,
                               mx_uint *in_shape_size,
-                              const int **in_shape_ndim,
-                              const int ***in_shape_data,
+                              const mx_uint **in_shape_ndim,
+                              const mx_uint ***in_shape_data,
                               mx_uint *out_shape_size,
-                              const int **out_shape_ndim,
-                              const int ***out_shape_data,
+                              const mx_uint **out_shape_ndim,
+                              const mx_uint ***out_shape_data,
                               mx_uint *aux_shape_size,
-                              const int **aux_shape_ndim,
-                              const int ***aux_shape_data,
+                              const mx_uint **aux_shape_ndim,
+                              const mx_uint ***aux_shape_data,
                               int *complete) {
   int succ;
   *complete = 1;
@@ -600,6 +673,31 @@ int MXSymbolInferShapePartial(SymbolHandle sym,
                             &succ);
 }
 
+int MXSymbolInferShapePartialEx(SymbolHandle sym,
+                                mx_uint num_args,
+                                const char** keys,
+                                const mx_uint *arg_ind_ptr,
+                                const int *arg_shape_data,
+                                mx_uint *in_shape_size,
+                                const int **in_shape_ndim,
+                                const int ***in_shape_data,
+                                mx_uint *out_shape_size,
+                                const int **out_shape_ndim,
+                                const int ***out_shape_data,
+                                mx_uint *aux_shape_size,
+                                const int **aux_shape_ndim,
+                                const int ***aux_shape_data,
+                                int *complete) {
+  int succ;
+  *complete = 1;
+  return MXSymbolInferShapeEx(sym, num_args, keys,
+                              arg_ind_ptr, arg_shape_data,
+                              in_shape_size, in_shape_ndim, in_shape_data,
+                              out_shape_size, out_shape_ndim, out_shape_data,
+                              aux_shape_size, aux_shape_ndim, aux_shape_data,
+                              &succ);
+}
+
 int MXSymbolInferType(SymbolHandle sym,
                       mx_uint num_args,
                       const char** keys,

From c8d2cce8faa11d71620988209fc6f0e95579254d Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Thu, 11 Apr 2019 15:52:18 -0700
Subject: [PATCH 23/32] Fix lint

---
 include/mxnet/c_api.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h
index 7907265a68a9..0acfde0686d4 100644
--- a/include/mxnet/c_api.h
+++ b/include/mxnet/c_api.h
@@ -786,8 +786,8 @@ MXNET_DLL int MXNDArrayGetShape(NDArrayHandle handle,
  * \return 0 when success, -1 when failure happens
  */
 MXNET_DLL int MXNDArrayGetShapeEx(NDArrayHandle handle,
-																	int *out_dim,
-																	const int **out_pdata);
+                                  int *out_dim,
+                                  const int **out_pdata);
 /*!
  * \brief get the content of the data in NDArray
  * \param handle the handle to the ndarray

From a4a2841bf62b6634525cb1c3f707f71ac75a667d Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Thu, 11 Apr 2019 16:26:29 -0700
Subject: [PATCH 24/32] Rebase and fix build

---
 src/operator/nn/mkldnn/mkldnn_transpose.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/operator/nn/mkldnn/mkldnn_transpose.cc b/src/operator/nn/mkldnn/mkldnn_transpose.cc
index 0986d0616f75..eec19bababb7 100644
--- a/src/operator/nn/mkldnn/mkldnn_transpose.cc
+++ b/src/operator/nn/mkldnn/mkldnn_transpose.cc
@@ -55,9 +55,9 @@ class MKLDNNTransposeForward {
     auto shape = data.shape();
     auto data_ndim = shape.ndim();
     auto axes_ndim = param.axes.ndim();
-    auto axes = mxnet::TShape(data_ndim);
+    auto axes = mxnet::TShape(data_ndim, -1);
     if (axes_ndim == 0) {
-      for (size_t i = 0; i < data_ndim; i++) {
+      for (int i = 0; i < data_ndim; i++) {
         axes[i] = data_ndim - i - 1;
       }
     } else {
@@ -79,7 +79,7 @@ class MKLDNNTransposeForward {
     dst_fmt.data_type = mkldnn_f32;
     dst_fmt.format = mkldnn_blocked;
 
-    for (size_t i = 0; i < data_ndim; i++)
+    for (int i = 0; i < data_ndim; i++)
       dst_fmt.dims[i] = shape[i];
 
     unsigned int total_stride = 1;

From a07bf8418f556d15109ebd52eb4de58a6e4a6e30 Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Thu, 11 Apr 2019 20:58:57 -0700
Subject: [PATCH 25/32] Fix R build failure

---
 R-package/src/symbol.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-package/src/symbol.cc b/R-package/src/symbol.cc
index 90ff9ef1dd67..317e82568012 100644
--- a/R-package/src/symbol.cc
+++ b/R-package/src/symbol.cc
@@ -207,7 +207,7 @@ SEXP Symbol::InferShape(const Rcpp::List& kwargs) const {
   const int **aux_shape_data;
   int complete;
 
-  MX_CALL(MXSymbolInferShape(
+  MX_CALL(MXSymbolInferShapeEx(
       handle_, static_cast<mx_uint>(kwargs.size()), dmlc::BeginPtr(c_keys),
       dmlc::BeginPtr(arg_ind_ptr), dmlc::BeginPtr(arg_shape_data),
       &in_shape_size, &in_shape_ndim, &in_shape_data,

From c204cd5727ab51887d7457a3fceb3f45a4bb7caf Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Thu, 11 Apr 2019 20:59:41 -0700
Subject: [PATCH 26/32] Fix Perl build failure

---
 perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm | 2 +-
 perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm  | 4 ++--
 perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm   | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm
index 573abbf588f2..5844302fce16 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Executor.pm
@@ -471,7 +471,7 @@ method reshape(HashRef[Shape] $kwargs, Int :$partial_shaping=0, Int :$allow_up_s
     my $shared_handle = $self->handle;
 
     my ($in_args_and_grad_handles, $aux_state_handles, $handle) = check_call(
-        AI::MXNetCAPI::ExecutorReshape(
+        AI::MXNetCAPI::ExecutorReshapeEx(
             $partial_shaping,
             $allow_up_sizing,
             $self->_ctx->device_type_id,
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm b/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm
index 72f6cc772178..f466aaa11a3d 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/NDArray.pm
@@ -535,7 +535,7 @@ method wait_to_read()
 
 method shape()
 {
-    return scalar(check_call(AI::MXNetCAPI::NDArrayGetShape($self->handle)));
+    return scalar(check_call(AI::MXNetCAPI::NDArrayGetShapeEx($self->handle)));
 }
 
 =head2 size
@@ -1460,7 +1460,7 @@ func _new_alloc_handle($shape, $ctx, $delay_alloc, $dtype)
 method _new_from_shared_mem($shared_pid, $shared_id, $shape, $dtype)
 {
     my $hdl = check_call(
-        AI::MXNetCAPI::NDArrayCreateFromSharedMem(
+        AI::MXNetCAPI::NDArrayCreateFromSharedMemEx(
             $shared_pid,
             $shared_id,
             $shape,
diff --git a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm
index 04dd1cbfc441..e4953f17031a 100644
--- a/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm
+++ b/perl-package/AI-MXNet/lib/AI/MXNet/Symbol.pm
@@ -662,7 +662,7 @@ method _infer_shape_impl(Maybe[Str|Shape] @args)
             push @{ $indptr }, scalar(@{ $sdata });
         }
     }
-    my $infer_func = $partial ? \&AI::MXNetCAPI::SymbolInferShapePartial : \&AI::MXNetCAPI::SymbolInferShape;
+    my $infer_func = $partial ? \&AI::MXNetCAPI::SymbolInferShapePartialEx : \&AI::MXNetCAPI::SymbolInferShapeEx;
     my ($arg_shapes, $out_shapes, $aux_shapes, $complete) = check_call(
         $infer_func->(
             $self->handle,
@@ -937,7 +937,7 @@ method simple_bind(
         ($updated_shared_data, $in_arg_handles, $arg_grad_handles, $aux_state_handles, $exe_handle)
             =
         check_call(
-            AI::MXNetCAPI::ExecutorSimpleBind(
+            AI::MXNetCAPI::ExecutorSimpleBindEx(
                 $self->handle,
                 $ctx->device_type_id,
                 $ctx->device_id,

From c876bc2413d743e9c926acc8e48ca3011a757bb7 Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Sat, 13 Apr 2019 14:01:16 -0700
Subject: [PATCH 27/32] Rebase with master

---
 src/operator/rnn.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/operator/rnn.cc b/src/operator/rnn.cc
index 74c563afceb1..7012a3c22f50 100644
--- a/src/operator/rnn.cc
+++ b/src/operator/rnn.cc
@@ -50,7 +50,7 @@ static bool RNNShape(const nnvm::NodeAttrs& attrs,
       "Needed input:[data, parameters, state], got in_shape->size(): " << in_shape->size();
   }
   const TShape &dshape = (*in_shape)[rnn_enum::kData];
-  if (dshape.ndim() ==  0) return false;
+  if (!mxnet::ndim_is_known(dshape)) return false;
   CHECK_EQ(dshape.ndim(), 3U) \
       << "Input data should be rank-3 tensor of dim [sequence length, batch size, input size]";
   // data: [sequence len, batch, input dimension]

From b864c4bdbb02a41694c882c814367ee31bb5acbd Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Mon, 15 Apr 2019 10:30:22 -0700
Subject: [PATCH 28/32] Address cr comments

---
 cpp-package/include/mxnet-cpp/symbol.hpp |  2 +-
 include/mxnet/tuple.h                    | 40 +++++++++++++-----------
 2 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/cpp-package/include/mxnet-cpp/symbol.hpp b/cpp-package/include/mxnet-cpp/symbol.hpp
index 7bdbe52ddc00..2e3fb7a2d5de 100644
--- a/cpp-package/include/mxnet-cpp/symbol.hpp
+++ b/cpp-package/include/mxnet-cpp/symbol.hpp
@@ -200,7 +200,7 @@ inline void Symbol::InferShape(
   arg_ind_ptr.push_back(arg_shape_data.size());
 
   mx_uint in_shape_size;
-  const int*in_shape_ndim;
+  const int *in_shape_ndim;
   const int **in_shape_data;
   mx_uint out_shape_size;
   const int *out_shape_ndim;
diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h
index c5c0ccd548df..8431bbb23b96 100644
--- a/include/mxnet/tuple.h
+++ b/include/mxnet/tuple.h
@@ -366,6 +366,19 @@ class Tuple {
   }
 };
 
+
+/*! brief check if a shape's ndim is known. */
+inline bool ndim_is_known(const int ndim) {
+  CHECK_GE(ndim, -1) << "shape ndim must be >= -1, while received " << ndim;
+  return ndim != -1;
+}
+
+/*! brief check if a shape's dim size is known. */
+inline bool dim_size_is_known(const dim_t dim_size) {
+  CHECK_GE(dim_size, -1) << "shape dim size must be >= -1, while received " << dim_size;
+  return dim_size != -1;
+}
+
 /*!
  * \brief A Shape class that is used to represent shape of each tensor.
  *
@@ -461,11 +474,11 @@ class TShape : public Tuple<dim_t> {
   }
   /*! \return total number of elements in the shape */
   inline size_t Size() const {
-    CHECK_GE(this->ndim(), 0) << "Shape is unknown.";
+    CHECK(ndim_is_known(this->ndim())) << "Shape is unknown.";
     dim_t size = 1;
     const dim_t* start = begin(), *fin = end();
     for (const dim_t* it = start; it != fin; ++it) {
-      CHECK_GE(*it, 0) << "Shape dim size cannot be a negative value " << *it;
+      CHECK(dim_size_is_known(*it)) << "Shape dim size cannot be a negative value " << *it;
       size *= *it;
     }
     return size;
@@ -476,11 +489,14 @@ class TShape : public Tuple<dim_t> {
    * \param dimend end dimension
    */
   inline size_t ProdShape(int dimstart, int dimend) const {
-    CHECK_GE(this->ndim(), 0) << "Shape is unknown.";
+    CHECK(ndim_is_known(this->ndim())) << "Shape is unknown.";
+    CHECK_GE(dimstart, 0) << "dimstart must be >= 0, while received " << dimstart;
+    CHECK_LE(dimend, this->ndim()) << "dimend must be <= " << this->ndim()
+                                   << ", while received " << dimend;
     dim_t num = 1;
     const dim_t *d = this->data();
     for (int i = dimstart; i < dimend; ++i) {
-      CHECK_GE(d[i], 0) << "Shape dim size cannot be a negative value " << d[i];
+      CHECK(dim_size_is_known(d[i])) << "Shape dim size must be known, while received " << d[i];
       num *= d[i];
     }
     return num;
@@ -536,7 +552,7 @@ class TShape : public Tuple<dim_t> {
    */
   inline mshadow::Shape<2> FlatTo2D(void) const {
     mshadow::Shape<2> s;
-    CHECK_GE(ndim(), 0);
+    CHECK(ndim_is_known(ndim())) << "shape must have a valid ndim";
     if (ndim() == 0) return mshadow::Shape2(1, 1);
     const dim_t *d = this->data();
     s.shape_[1] = d[ndim() - 1];
@@ -556,7 +572,7 @@ class TShape : public Tuple<dim_t> {
   inline mshadow::Shape<3> FlatTo3D(size_t axis_begin, size_t axis_end) const {
     CHECK(axis_end >= axis_begin);
     mshadow::Shape<3> s;
-    CHECK_GE(ndim(), 0);
+    CHECK(ndim_is_known(ndim())) << "shape must have a valid ndim";
     if (ndim() == 0) return mshadow::Shape3(1, 1, 1);
     const dim_t *d = this->data();
     s.shape_[0] = 1;
@@ -615,23 +631,11 @@ class TShape : public Tuple<dim_t> {
 #endif
 };
 
-/*! brief check if a shape's ndim is known. */
-inline bool ndim_is_known(const int ndim) {
-  CHECK_GE(ndim, -1) << "shape ndim must be >= -1, while received " << ndim;
-  return ndim != -1;
-}
-
 /*! brief check if a shape's ndim is known. */
 inline bool ndim_is_known(const TShape& x) {
   return ndim_is_known(x.ndim());
 }
 
-/*! brief check if a shape's dim size is known. */
-inline bool dim_size_is_known(const dim_t dim_size) {
-  CHECK_GE(dim_size, -1) << "shape dim size must be >= -1, while received " << dim_size;
-  return dim_size != -1;
-}
-
 /*! brief check if a shape's dim size is known. */
 inline bool dim_size_is_known(const TShape& x, const int idx) {
   CHECK(idx >= 0 && idx < x.ndim())

From 69791f107c11e9b9cea7acad06c57c525444583b Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Mon, 15 Apr 2019 10:53:53 -0700
Subject: [PATCH 29/32] Use just one scope to represent numpy compatibility

---
 python/mxnet/__init__.py                  |  2 +-
 python/mxnet/base.py                      | 19 +++++--------------
 tests/python/gpu/test_operator_gpu.py     |  4 ++--
 tests/python/unittest/test_infer_shape.py |  2 +-
 tests/python/unittest/test_ndarray.py     |  2 +-
 tests/python/unittest/test_operator.py    | 12 ++++++------
 6 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py
index 99f565ad309a..f4884d62fcd2 100644
--- a/python/mxnet/__init__.py
+++ b/python/mxnet/__init__.py
@@ -23,7 +23,7 @@
 
 from .context import Context, current_context, cpu, gpu, cpu_pinned
 from . import engine
-from .base import MXNetError, is_np_compat, set_np_compat, enable_np_compat, disable_np_compat
+from .base import MXNetError, is_np_compat, set_np_compat, np_compat
 from . import base
 from . import contrib
 from . import ndarray
diff --git a/python/mxnet/base.py b/python/mxnet/base.py
index b056b73d308e..bd1eceacac66 100644
--- a/python/mxnet/base.py
+++ b/python/mxnet/base.py
@@ -30,7 +30,7 @@
 
 from . import libinfo
 
-__all__ = ['MXNetError', 'is_np_compat', 'set_np_compat', 'enable_np_compat', 'disable_np_compat']
+__all__ = ['MXNetError', 'is_np_compat', 'set_np_compat', 'np_compat']
 #----------------------------
 # library loading
 #----------------------------
@@ -791,13 +791,13 @@ def __exit__(self, ptype, value, trace):
             set_np_compat(self._prev_is_np_compat)
 
 
-def enable_np_compat():
+def np_compat(active=True):
     """Returns a NumPy compatibility state scope to be used in 'with' statement
     and captures code that needs the compatibility.
 
     Example::
 
-        with mx.enable_np_compat():
+        with mx.np_compat(active=True):
             # A scalar tensor's shape is `()`, whose `ndim` is `0`.
             scalar = mx.nd.ones(shape=())
             assert scalar.shape == ()
@@ -823,17 +823,8 @@ def enable_np_compat():
             arg_shapes, out_shapes, _ = ret.infer_shape_partial()
             assert arg_shapes[0] is None
             assert out_shapes[0] is None
-    """
-    return _NumpyCompatibilityStateScope(True)
-
-
-def disable_np_compat():
-    """Returns a state scope with NumPy-compatibility disabled to be used in 'with' statement
-    and captures code that does not need the compatibility.
-
-    Example::
 
-        with mx.disable_np_compat():
+        with mx.np_compat(active=False):
             # 0 means unknown shape dimension size in the legacy shape definition.
             data = mx.sym.var("data", shape=(0, 2, 3))
             ret = mx.sym.sin(data)
@@ -849,4 +840,4 @@ def disable_np_compat():
             assert arg_shapes[0] == ()
             assert out_shapes[0] == ()
     """
-    return _NumpyCompatibilityStateScope(False)
+    return _NumpyCompatibilityStateScope(active)
diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py
index cb1248e46a85..19fc1eca89ce 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -1963,14 +1963,14 @@ def check_proposal_consistency(op, batch_size, with_nms=False):
 # The following 2 functions launch 0-thread kernels, an error that should be caught and signaled.
 def kernel_error_check_imperative():
     os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine'
-    with mx.enable_np_compat():
+    with mx.np_compat(active=True):
         a = mx.nd.array([1,2,3],ctx=mx.gpu(0))
         b = mx.nd.array([],ctx=mx.gpu(0))
         c = (a / b).asnumpy()
 
 def kernel_error_check_symbolic():
     os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine'
-    with mx.enable_np_compat():
+    with mx.np_compat(active=True):
         a = mx.sym.Variable('a')
         b = mx.sym.Variable('b')
         c = a / b
diff --git a/tests/python/unittest/test_infer_shape.py b/tests/python/unittest/test_infer_shape.py
index e4a3e5a3d86f..612861bd8303 100644
--- a/tests/python/unittest/test_infer_shape.py
+++ b/tests/python/unittest/test_infer_shape.py
@@ -154,7 +154,7 @@ def test_shape_completely_unknown():
     assert arg_shapes[0] == ()
     assert out_shapes[0] == ()
 
-    with mx.enable_np_compat():
+    with mx.np_compat():
         data = mx.sym.var("data")
         ret = mx.sym.sin(data)
         arg_shapes, out_shapes, _ = ret.infer_shape_partial()
diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py
index fb0cac737ee0..94777677354d 100644
--- a/tests/python/unittest/test_ndarray.py
+++ b/tests/python/unittest/test_ndarray.py
@@ -123,7 +123,7 @@ def test_ndarray_setitem():
     # numpy assignment for empty axis
     for trivial_shape in [(), (1,), (1, 1), (1, 1, 1)]:
         if trivial_shape == tuple():
-            with mx.enable_np_compat():
+            with mx.np_compat():
                 x = mx.nd.zeros(trivial_shape)
         else:
             x = mx.nd.zeros(trivial_shape)
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index c5523c5b2268..191f3eef6aeb 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -4403,7 +4403,7 @@ def test_invalid_reps():
         assert_exception(mx.nd.tile, MXNetError, data, (1, 0, 3))
 
     test_normal_case()
-    with mx.enable_np_compat():
+    with mx.np_compat():
         test_empty_tensor()
     test_empty_reps()
     test_tile_backward()
@@ -4464,7 +4464,7 @@ def test_zero_depth():
     test_normal_case(index_type=np.float64)
     test_normal_case(index_type=np.float32)
     test_normal_case(index_type=np.float16)
-    with mx.enable_np_compat():
+    with mx.np_compat():
         test_empty_indices()
     test_zero_depth()
 
@@ -6850,7 +6850,7 @@ def check_slice_axis_partial_infer(data, axis, begin, end, expected_out_shape):
     check_slice_axis_partial_infer(var1, 0, 0, 5, (5, 0))
     check_slice_axis_partial_infer(var1, 1, 0, 5, (10, 0))
 
-    with mx.enable_np_compat():
+    with mx.np_compat():
         var1 = mx.sym.var(name="data", shape=(-1, 20))
         check_slice_partial_infer(var1, (None, None), (None, 10), [], (-1, 10))
         check_slice_partial_infer(var1, (None, None), (None, 10), (None, 2), (-1, 5))
@@ -7916,7 +7916,7 @@ def test_image_normalize():
 def test_scalar_tensor_creation():
     assertRaises(MXNetError, mx.nd.zeros, shape=())
     assertRaises(MXNetError, mx.nd.ones, shape=())
-    with mx.enable_np_compat():
+    with mx.np_compat():
         data_mx = mx.nd.ones(shape=())
         data_np = np.ones((), dtype=data_mx.dtype)
         assert same(data_mx.asnumpy(), data_np)
@@ -7926,7 +7926,7 @@ def test_scalar_tensor_creation():
 def test_zero_size_tensor_creation():
     assertRaises(MXNetError, mx.nd.zeros, shape=(0, 1, 3, 0))
     assertRaises(MXNetError, mx.nd.ones, shape=(0, 1, 3, 0))
-    with mx.enable_np_compat():
+    with mx.np_compat():
         data_mx = mx.nd.ones(shape=(0, 1, 0, 4))
         data_np = np.ones(shape=data_mx.shape, dtype=data_mx.dtype)
         assert same(data_mx.asnumpy(), data_np)
@@ -7934,7 +7934,7 @@ def test_zero_size_tensor_creation():
 
 @with_seed()
 def test_concat_with_zero_size_tensor():
-    with mx.enable_np_compat():
+    with mx.np_compat():
         data1 = mx.nd.ones((0, 8, 12))
         data2 = mx.nd.ones((3, 8, 12))
         data3 = mx.nd.ones((0, 8, 12))

From ab08f35271c99586a4a2553dfb219218513752ad Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Mon, 15 Apr 2019 11:29:51 -0700
Subject: [PATCH 30/32] Add code comment to NumpyScope object in Scala

---
 .../core/src/main/scala/org/apache/mxnet/NumpyScope.scala    | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala b/scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala
index ec366ea4029d..d3e76f1044a7 100644
--- a/scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala
+++ b/scala-package/core/src/main/scala/org/apache/mxnet/NumpyScope.scala
@@ -19,6 +19,11 @@ package org.apache.mxnet
 
 import org.apache.mxnet.Base._
 
+/**
+  * NumpyScope object provides util functions for turning on/off NumPy compatibility
+  * and checking whether NumPy compatibility has been turned on/off. NumPy compatibility
+  * is introduced first to support zero-dim and zero-size tensors as in NumPy.
+  */
 object NumpyScope {
   def setNumpyCompatible(isNpComp: Boolean): Boolean = {
     val prev = new RefInt()

From b0cb2f18cfd7f14dc37630e0625d5228c8a3cc17 Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Mon, 15 Apr 2019 14:51:38 -0700
Subject: [PATCH 31/32] Add use_np_compat decorator

---
 python/mxnet/__init__.py                  |  2 +-
 python/mxnet/base.py                      | 41 ++++++++++++++++++++---
 src/operator/tensor/broadcast_reduce_op.h |  4 +--
 tests/python/unittest/test_operator.py    | 32 ++++++++++++++++++
 4 files changed, 71 insertions(+), 8 deletions(-)

diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py
index f4884d62fcd2..79eb1f10f427 100644
--- a/python/mxnet/__init__.py
+++ b/python/mxnet/__init__.py
@@ -23,7 +23,7 @@
 
 from .context import Context, current_context, cpu, gpu, cpu_pinned
 from . import engine
-from .base import MXNetError, is_np_compat, set_np_compat, np_compat
+from .base import MXNetError, is_np_compat, set_np_compat, np_compat, use_np_compat
 from . import base
 from . import contrib
 from . import ndarray
diff --git a/python/mxnet/base.py b/python/mxnet/base.py
index bd1eceacac66..6799c44e72ac 100644
--- a/python/mxnet/base.py
+++ b/python/mxnet/base.py
@@ -27,10 +27,11 @@
 import inspect
 import platform
 import numpy as _np
+from functools import wraps
 
 from . import libinfo
 
-__all__ = ['MXNetError', 'is_np_compat', 'set_np_compat', 'np_compat']
+__all__ = ['MXNetError', 'is_np_compat', 'set_np_compat', 'np_compat', 'use_np_compat']
 #----------------------------
 # library loading
 #----------------------------
@@ -736,13 +737,13 @@ def write_all_str(module_file, module_all_list):
 ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.c_void_p
 
 
-def set_np_compat(flag):
+def set_np_compat(active):
     """
     Turns on/off NumPy compatibility. NumPy-compatibility is turned off by default in backend.
 
     Parameters
     ----------
-    flag : bool
+    active : bool
         Indicates whether to turn on/off NumPy compatibility.
 
     Returns
@@ -750,7 +751,7 @@ def set_np_compat(flag):
         A bool value indicating the previous state of NumPy compatibility.
     """
     prev = ctypes.c_int()
-    check_call(_LIB.MXSetIsNumpyCompatible(ctypes.c_int(flag), ctypes.byref(prev)))
+    check_call(_LIB.MXSetIsNumpyCompatible(ctypes.c_int(active), ctypes.byref(prev)))
     return bool(prev.value)
 
 
@@ -770,6 +771,7 @@ def is_np_compat():
 
 class _NumpyCompatibilityStateScope(object):
     """Scope for managing numpy compatibility state.
+    Do not use this class directly. Use `np_compat(active)` instead.
 
     Example::
 
@@ -792,7 +794,7 @@ def __exit__(self, ptype, value, trace):
 
 
 def np_compat(active=True):
-    """Returns a NumPy compatibility state scope to be used in 'with' statement
+    """Returns an activated/deactivated NumPy compatibility state scope to be used in 'with' statement
     and captures code that needs the compatibility.
 
     Example::
@@ -841,3 +843,32 @@ def np_compat(active=True):
             assert out_shapes[0] == ()
     """
     return _NumpyCompatibilityStateScope(active)
+
+
+def use_np_compat(func):
+    """Wraps a function with an activated NumPy-compatibility scope. This ensures
+    that the execution of the function is guaranteed with NumPy compatible semantics,
+    such as zero-dim and zero size tensors.
+
+    Example::
+        import mxnet as mx
+        @mx.use_np_compat
+        def scalar_one():
+            return mx.nd.ones(())
+        print(scalar_one())
+
+    Parameters
+    ----------
+    func : a user-provided callable function to be scoped by the NumPy compatibility state.
+
+    Returns
+    -------
+    Function
+        A function for wrapping the user functions in the NumPy compatibility scope.
+    """
+    @wraps(func)
+    def _with_np_compat(*args, **kwargs):
+        with np_compat(active=True):
+            return func(*args, **kwargs)
+
+    return _with_np_compat
diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h
index 9392d4129406..fc51d8af0f01 100644
--- a/src/operator/tensor/broadcast_reduce_op.h
+++ b/src/operator/tensor/broadcast_reduce_op.h
@@ -887,7 +887,7 @@ void ReduceAxesBackwardUseInOutImpl(const OpContext& ctx,
     MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, OType, {
       mshadow::Shape<5> in_shape;
       mshadow::Shape<5> out_shape;
-      for (uint32_t i = 0; i < 5; ++i) {
+      for (int i = 0; i < 5; ++i) {
         if (i < dst_shape.ndim()) {
           in_shape[i] = src_shape[i];
           out_shape[i] = dst_shape[i];
@@ -1227,7 +1227,7 @@ void LpNormGradCompute(const nnvm::NodeAttrs& attrs,
     Stream<xpu> *s = ctx.get_stream<xpu>();
     mshadow::Shape<5> in_shape;
     mshadow::Shape<5> out_shape;
-    for (uint32_t i = 0; i < 5; ++i) {
+    for (int i = 0; i < 5; ++i) {
       if (i < dst_shape.ndim()) {
         in_shape[i] = src_shape[i];
         out_shape[i] = dst_shape[i];
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 191f3eef6aeb..a1754d1de544 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -7948,6 +7948,38 @@ def test_concat_with_zero_size_tensor():
         assert ret.shape == (0, 12, 10)
 
 
+@with_seed()
+def test_np_compat_decorator():
+    @mx.use_np_compat
+    def check_scalar_one():
+        """Generate scalar one tensor"""
+        return mx.nd.ones(shape=())
+    assert check_scalar_one.__name__ == "check_scalar_one"
+    assert check_scalar_one.__doc__ == "Generate scalar one tensor"
+    assert check_scalar_one().shape == ()
+    for active in [True, False]:
+        with mx.np_compat(active=active):
+            assert check_scalar_one.__name__ == "check_scalar_one"
+            assert check_scalar_one.__doc__ == "Generate scalar one tensor"
+            assert check_scalar_one().shape == ()
+
+    @mx.use_np_compat
+    def check_concat(shape1, shape2, axis):
+        data1 = mx.nd.ones(shape1)
+        data2 = mx.nd.ones(shape2)
+        ret = mx.nd.Concat(data1, data2, dim=axis)
+        expected_ret = np.concatenate((data1.asnumpy(), data2.asnumpy()), axis=axis)
+        assert ret.shape == expected_ret.shape
+
+    check_concat((0, 3, 4), (5, 3, 4), 0)
+    check_concat((8, 0, 5), (8, 7, 5), 1)
+    check_concat((8, 0, 0), (8, 0, 0), 2)
+    for active in [True, False]:
+        check_concat((0, 3, 4), (5, 3, 4), 0)
+        check_concat((8, 0, 5), (8, 7, 5), 1)
+        check_concat((8, 0, 0), (8, 0, 0), 2)
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()

From bd7c4fb8b7fc166a6a41d1095e03d42045af7d27 Mon Sep 17 00:00:00 2001
From: reminisce <wujun.nju@gmail.com>
Date: Mon, 15 Apr 2019 15:18:02 -0700
Subject: [PATCH 32/32] Fix pylint

---
 python/mxnet/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/mxnet/base.py b/python/mxnet/base.py
index 6799c44e72ac..58f222dc1e85 100644
--- a/python/mxnet/base.py
+++ b/python/mxnet/base.py
@@ -20,6 +20,7 @@
 """ctypes library of mxnet and helper functions."""
 from __future__ import absolute_import
 
+from functools import wraps
 import atexit
 import ctypes
 import os
@@ -27,7 +28,6 @@
 import inspect
 import platform
 import numpy as _np
-from functools import wraps
 
 from . import libinfo