From 8da42c26a829616c7f717d9697eb90271df4da14 Mon Sep 17 00:00:00 2001
From: eric-haibin-lin <linhaibin.eric@gmail.com>
Date: Wed, 9 Aug 2017 18:52:39 +0000
Subject: [PATCH 1/5] add support for initilazer with rowsparse output

---
 src/operator/random/sample_op.cc             |   9 +-
 src/operator/random/sample_op.h              | 109 ++++++++++++++++---
 src/operator/tensor/init_op.h                |  20 ++++
 tests/python/unittest/test_sparse_ndarray.py |  14 +++
 4 files changed, 136 insertions(+), 16 deletions(-)
diff --git a/src/operator/random/sample_op.cc b/src/operator/random/sample_op.cc
index 8d87d2b99d14..363163cbc697 100644
--- a/src/operator/random/sample_op.cc
+++ b/src/operator/random/sample_op.cc
@@ -61,7 +61,8 @@ Example::
                                                  [ 0.54488319,  0.84725171]]
 
 )code" ADD_FILELINE)
-.set_attr<FCompute>("FCompute<cpu>", SampleUniform_<cpu>);
+.set_attr<FCompute>("FCompute<cpu>", SampleUniform_<cpu>)
+.set_attr<FComputeEx>("FComputeEx<cpu>", SampleUniformEx_<cpu>);
 
 // Add "normal" alias for backward compatibility
 MXNET_OPERATOR_REGISTER_SAMPLE(random_normal, SampleNormalParam)
@@ -78,7 +79,8 @@ Example::
    random_normal(loc=0, scale=1, shape=(2,2)) = [[ 1.89171135, -1.16881478],
                                                  [-1.23474145,  1.55807114]]
 )code" ADD_FILELINE)
-.set_attr<FCompute>("FCompute<cpu>", SampleNormal_<cpu>);
+.set_attr<FCompute>("FCompute<cpu>", SampleNormal_<cpu>)
+.set_attr<FComputeEx>("FComputeEx<cpu>", SampleNormalEx_<cpu>);
 
 MXNET_OPERATOR_REGISTER_SAMPLE(random_gamma, SampleGammaParam)
 .add_alias("_sample_gamma")
@@ -91,7 +93,8 @@ Example::
    random_gamma(alpha=9, beta=0.5, shape=(2,2)) = [[ 7.10486984,  3.37695289],
                                                    [ 3.91697288,  3.65933681]]
 )code" ADD_FILELINE)
-.set_attr<FCompute>("FCompute<cpu>", SampleGamma_<cpu>);
+.set_attr<FCompute>("FCompute<cpu>", SampleGamma_<cpu>)
+.set_attr<FComputeEx>("FComputeEx<cpu>", SampleGammaEx_<cpu>);
 
 MXNET_OPERATOR_REGISTER_SAMPLE(random_exponential, SampleExponentialParam)
 .add_alias("_sample_exponential")
diff --git a/src/operator/random/sample_op.h b/src/operator/random/sample_op.h
index a1a6a2345b1b..0cd3f6bc2efb 100644
--- a/src/operator/random/sample_op.h
+++ b/src/operator/random/sample_op.h
@@ -232,29 +232,75 @@ struct SampleGenNegBinomialParam : public dmlc::Parameter<SampleGenNegBinomialPa
   }
 };
 
+using FSampleCompute = std::function<void (const nnvm::NodeAttrs& attrs,
+                                           const OpContext& ctx,
+                                           const OpReqType& req,
+                                           TBlob* outputs)>;
+
 template<typename xpu>
-void SampleUniform_(const nnvm::NodeAttrs& attrs,
-                    const OpContext& ctx,
-                    const std::vector<TBlob>& inputs,
-                    const std::vector<OpReqType>& req,
-                    const std::vector<TBlob>& outputs) {
+void SampleComputeEx_(const nnvm::NodeAttrs& attrs,
+                      const OpContext& ctx,
+                      const std::vector<NDArray>& inputs,
+                      const std::vector<OpReqType>& req,
+                      const std::vector<NDArray>& outputs,
+                      FSampleCompute fcomp) {
+  NDArray output = outputs[0];
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+  if (output.storage_type() == kRowSparseStorage) {
+    // indices
+    nnvm::dim_t nnr = output.shape()[0];
+    output.CheckAndAlloc({mshadow::Shape1(nnr)});
+    PopulateFullIdxRspImpl(s, &output);
+    // data
+    TBlob out_blob = output.data();
+    fcomp(attrs, ctx, req[0], &out_blob);
+  } else {
+    LOG(FATAL) << "Unexpected storage type for SampleComputeEx_: "
+               << output.storage_type();
+  }
+}
+
+template<typename xpu>
+void SampleUniformDnsImpl(const nnvm::NodeAttrs& attrs,
+                          const OpContext& ctx,
+                          const OpReqType& req,
+                          TBlob* output) {
   using namespace mxnet::op;
   using namespace mshadow::expr;
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
   const SampleUniformParam& param = nnvm::get<SampleUniformParam>(attrs.parsed);
-  MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+  MSHADOW_REAL_TYPE_SWITCH(output->type_flag_, DType, {
     mshadow::Random<xpu, DType> *prnd = ctx.requested[0].get_random<xpu, DType>(s);
-    mshadow::Tensor<xpu, 2, DType> out = outputs[0].FlatTo2D<xpu, DType>(s);
+    mshadow::Tensor<xpu, 2, DType> out = output->FlatTo2D<xpu, DType>(s);
     prnd->SampleUniform(&out, param.low, param.high);
   });
 }
 
 template<typename xpu>
-void SampleNormal_(const nnvm::NodeAttrs& attrs,
-                   const OpContext& ctx,
-                   const std::vector<TBlob>& inputs,
-                   const std::vector<OpReqType>& req,
-                   const std::vector<TBlob>& outputs) {
+void SampleUniform_(const nnvm::NodeAttrs& attrs,
+                    const OpContext& ctx,
+                    const std::vector<TBlob>& inputs,
+                    const std::vector<OpReqType>& req,
+                    const std::vector<TBlob>& outputs) {
+  TBlob out = outputs[0];
+  SampleUniformDnsImpl<xpu>(attrs, ctx, req[0], &out);
+}
+
+
+template<typename xpu>
+void SampleUniformEx_(const nnvm::NodeAttrs& attrs,
+                      const OpContext& ctx,
+                      const std::vector<NDArray>& inputs,
+                      const std::vector<OpReqType>& req,
+                      const std::vector<NDArray>& outputs) {
+  SampleComputeEx_<xpu>(attrs, ctx, inputs, req, outputs, SampleUniformDnsImpl<xpu>);
+}
+
+template<typename xpu>
+void SampleNormalDnsImpl(const nnvm::NodeAttrs& attrs,
+                         const OpContext& ctx,
+                         const OpReqType& req,
+                         TBlob* outputs) {
   using namespace mxnet::op;
   using namespace mshadow::expr;
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
@@ -268,11 +314,29 @@ void SampleNormal_(const nnvm::NodeAttrs& attrs,
 }
 
 template<typename xpu>
-void SampleGamma_(const nnvm::NodeAttrs& attrs,
+void SampleNormal_(const nnvm::NodeAttrs& attrs,
                    const OpContext& ctx,
                    const std::vector<TBlob>& inputs,
                    const std::vector<OpReqType>& req,
                    const std::vector<TBlob>& outputs) {
+  TBlob out = outputs[0];
+  SampleNormalDnsImpl<xpu>(attrs, ctx, req[0], &out);
+}
+
+template<typename xpu>
+void SampleNormalEx_(const nnvm::NodeAttrs& attrs,
+                      const OpContext& ctx,
+                      const std::vector<NDArray>& inputs,
+                      const std::vector<OpReqType>& req,
+                      const std::vector<NDArray>& outputs) {
+  SampleComputeEx_<xpu>(attrs, ctx, inputs, req, outputs, SampleNormalDnsImpl<xpu>);
+}
+
+template<typename xpu>
+void SampleGammaDnsImpl(const nnvm::NodeAttrs& attrs,
+                        const OpContext& ctx,
+                        const OpReqType& req,
+                        TBlob* outputs) {
   using namespace mxnet::op;
   using namespace mshadow::expr;
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
@@ -286,6 +350,25 @@ void SampleGamma_(const nnvm::NodeAttrs& attrs,
   });
 }
 
+template<typename xpu>
+void SampleGamma_(const nnvm::NodeAttrs& attrs,
+                  const OpContext& ctx,
+                  const std::vector<TBlob>& inputs,
+                  const std::vector<OpReqType>& req,
+                  const std::vector<TBlob>& outputs) {
+  TBlob out = outputs[0];
+  SampleGammaDnsImpl<xpu>(attrs, ctx, req[0], &out);
+}
+
+template<typename xpu>
+void SampleGammaEx_(const nnvm::NodeAttrs& attrs,
+                   const OpContext& ctx,
+                   const std::vector<NDArray>& inputs,
+                   const std::vector<OpReqType>& req,
+                   const std::vector<NDArray>& outputs) {
+  SampleComputeEx_<xpu>(attrs, ctx, inputs, req, outputs, SampleGammaDnsImpl<xpu>);
+}
+
 template<typename xpu>
 void SampleExponential_(const nnvm::NodeAttrs& attrs,
                    const OpContext& ctx,
diff --git a/src/operator/tensor/init_op.h b/src/operator/tensor/init_op.h
index 1b244251fca1..1ac933ddaef5 100644
--- a/src/operator/tensor/init_op.h
+++ b/src/operator/tensor/init_op.h
@@ -167,6 +167,26 @@ inline void FillDnsZerosRspImpl(mshadow::Stream<xpu> *s, NDArray *dst) {
   });
 }
 
+struct PopulateFullIdxRspKernel {
+  template<typename IType>
+  MSHADOW_XINLINE static void Map(int i, IType* out) {
+    KERNEL_ASSIGN(out[i], kWriteTo, i);
+  }
+};
+
+// Fill full indices NDArray with zeros by updating the aux shape.
+template<typename xpu>
+void PopulateFullIdxRspImpl(mshadow::Stream<xpu> *s, NDArray *dst) {
+  using namespace rowsparse;
+  CHECK_EQ(dst->storage_type(), kRowSparseStorage);
+  nnvm::dim_t nnr = dst->shape()[0];
+  dst->CheckAndAllocAuxData(kIdx, mshadow::Shape1(nnr));
+  MSHADOW_IDX_TYPE_SWITCH(dst->aux_type(kIdx), IType, {
+    IType* idx = dst->aux_data(kIdx).dptr<IType>();
+    mxnet_op::Kernel<PopulateFullIdxRspKernel, xpu>::Launch(s, nnr, idx);
+  });
+}
+
 // Fill a rsp NDArray with zeros by updating the aux shape.
 template<typename xpu>
 void FillZerosRspImpl(mshadow::Stream<xpu> *s, NDArray *dst) {
diff --git a/tests/python/unittest/test_sparse_ndarray.py b/tests/python/unittest/test_sparse_ndarray.py
index 63113fd03f01..cbce588549a1 100644
--- a/tests/python/unittest/test_sparse_ndarray.py
+++ b/tests/python/unittest/test_sparse_ndarray.py
@@ -351,6 +351,20 @@ def test_sparse_nd_output_fallback():
     mx.nd.random_normal(shape=shape, out=out)
     assert(np.sum(out.asnumpy()) != 0)
 
+def test_sparse_nd_random():
+    shape = (100, 100)
+    fns = [mx.nd.random_uniform, mx.nd.random_normal, mx.nd.random_gamma]
+    for fn in fns:
+        rsp_out = mx.nd.zeros(shape=shape, stype='row_sparse')
+        dns_out = mx.nd.zeros(shape=shape, stype='default')
+        mx.random.seed(0)
+        np.random.seed(0)
+        fn(shape=shape, out=dns_out)
+        mx.random.seed(0)
+        np.random.seed(0)
+        fn(shape=shape, out=rsp_out)
+        assert_almost_equal(dns_out.asnumpy(), rsp_out.asnumpy())
+
 
 def test_sparse_nd_astype():
     stypes = ['row_sparse', 'csr']

From 8aef7a56c44038f67bbec93811977ea2f9fa3c30 Mon Sep 17 00:00:00 2001
From: eric-haibin-lin <linhaibin.eric@gmail.com>
Date: Wed, 9 Aug 2017 20:10:13 +0000
Subject: [PATCH 2/5] add scalar assignment to row_sparse

---
 python/mxnet/ndarray/sparse_ndarray.py       | 11 ++++--
 src/ndarray/ndarray.cc                       | 35 +++++++++++++++++---
 tests/python/unittest/test_sparse_ndarray.py |  7 +++-
 3 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/python/mxnet/ndarray/sparse_ndarray.py b/python/mxnet/ndarray/sparse_ndarray.py
index 9248a43e9627..6c8bb44b0ae3 100644
--- a/python/mxnet/ndarray/sparse_ndarray.py
+++ b/python/mxnet/ndarray/sparse_ndarray.py
@@ -545,7 +545,7 @@ def __setitem__(self, key, value):
         ----------
         key : slice
             The indexing key.
-        value : NDArray or numpy.ndarray
+        value : scalar, NDArray or numpy.ndarray
             The value to set.
 
         Examples
@@ -568,6 +568,12 @@ def __setitem__(self, key, value):
         array([[ 1.,  1.,  1.],
                [ 1.,  1.,  1.],
                [ 1.,  1.,  1.]], dtype=float32)
+        >>> # assign scalar to RowSparseNDArray
+        >>> x[:] = 7
+        >>> x.asnumpy()
+        array([[ 7.,  7.,  7.],
+               [ 7.,  7.,  7.],
+               [ 7.,  7.,  7.]], dtype=float32)
         """
         if not self.writable:
             raise ValueError('Failed to assign to a readonly RowSparseNDArray')
@@ -580,8 +586,7 @@ def __setitem__(self, key, value):
                 if value.handle is not self.handle:
                     value.copyto(self)
             elif isinstance(value, numeric_types):
-                raise ValueError("Assigning numeric types to RowSparseNDArray " \
-                                 "is not implemented yet.")
+                _internal._set_value(float(value), out=self)
             elif isinstance(value, (np.ndarray, np.generic)):
                 warnings.warn('Assigning non-NDArray object to RowSparseNDArray is not efficient',
                               RuntimeWarning)
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 0d2968626d79..9f5b7bab820a 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -295,9 +295,21 @@ void SetValueOp(const real_t &rhs, NDArray *out) {
   switch (ret.ctx().dev_mask()) {
     case cpu::kDevMask: {
       Engine::Get()->PushSync([rhs, ret](RunContext ctx) {
-          CHECK(ret.storage_type() == kDefaultStorage);
-          TBlob tmp = ret.data();
-          ndarray::Eval<cpu>(rhs, &tmp, ctx);
+          auto ret_stype = ret.storage_type();
+          mshadow::Stream<cpu> *s = ctx.get_stream<cpu>();
+          if (ret_stype == kRowSparseStorage) {
+            NDArray out = ret;
+            // indices
+            nnvm::dim_t nnr = ret.shape()[0];
+            out.CheckAndAlloc({mshadow::Shape1(nnr)});
+            op::PopulateFullIdxRspImpl(s, &out);
+            // data
+            TBlob tmp = out.data();
+            ndarray::Eval<cpu>(rhs, &tmp, ctx);
+          } else {
+            TBlob tmp = ret.data();
+            ndarray::Eval<cpu>(rhs, &tmp, ctx);
+          }
         }, ret.ctx(), {}, {ret.var()},
         FnProperty::kNormal, 0, PROFILER_MESSAGE_FUNCNAME);
       break;
@@ -305,8 +317,21 @@ void SetValueOp(const real_t &rhs, NDArray *out) {
 #if MXNET_USE_CUDA
     case gpu::kDevMask: {
       Engine::Get()->PushSync([rhs, ret](RunContext ctx) {
-          TBlob tmp = ret.data();
-          ndarray::Eval<gpu>(rhs, &tmp, ctx);
+          auto ret_stype = ret.storage_type();
+          mshadow::Stream<gpu> *s = ctx.get_stream<gpu>();
+          if (ret_stype == kRowSparseStorage) {
+            NDArray out = ret;
+            // indices
+            nnvm::dim_t nnr = ret.shape()[0];
+            out.CheckAndAlloc({mshadow::Shape1(nnr)});
+            op::PopulateFullIdxRspImpl(s, &out);
+            // data
+            TBlob tmp = out.data();
+            ndarray::Eval<gpu>(rhs, &tmp, ctx);
+          } else {
+            TBlob tmp = ret.data();
+            ndarray::Eval<gpu>(rhs, &tmp, ctx);
+          }
           // Wait GPU kernel to complete
           ctx.get_stream<gpu>()->Wait();
         }, ret.ctx(), {}, {ret.var()},
diff --git a/tests/python/unittest/test_sparse_ndarray.py b/tests/python/unittest/test_sparse_ndarray.py
index cbce588549a1..6c4fb01978d4 100644
--- a/tests/python/unittest/test_sparse_ndarray.py
+++ b/tests/python/unittest/test_sparse_ndarray.py
@@ -102,7 +102,8 @@ def test_sparse_nd_setitem():
     def check_sparse_nd_setitem(stype, shape, dst):
         x = mx.nd.zeros(shape=shape, stype=stype)
         x[:] = dst
-        dst_nd = mx.nd.array(dst) if isinstance(dst, (np.ndarray, np.generic)) else dst
+        dst_nd = mx.nd.zeros(shape=shape)
+        dst_nd[:] = dst
         assert same(x.asnumpy(), dst_nd.asnumpy())
 
     shape = rand_shape_2d()
@@ -112,6 +113,10 @@ def check_sparse_nd_setitem(stype, shape, dst):
         check_sparse_nd_setitem(stype, shape, rand_ndarray(shape, stype))
         # numpy assignment
         check_sparse_nd_setitem(stype, shape, np.ones(shape))
+        if stype == 'row_sparse':
+            # scalar assignment
+            check_sparse_nd_setitem(stype, shape, 0)
+            check_sparse_nd_setitem(stype, shape, 1)
 
 
 def test_sparse_nd_slice():

From 3b969ac0980e8d7166a1cf46878ed2bd457986ed Mon Sep 17 00:00:00 2001
From: eric-haibin-lin <linhaibin.eric@gmail.com>
Date: Wed, 9 Aug 2017 20:12:19 +0000
Subject: [PATCH 3/5] add setitem test to gpu

---
 tests/python/gpu/test_operator_gpu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py
index 6af9a0f33d48..bb18d5d21c87 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -33,7 +33,7 @@
 from test_gluon_rnn import *
 from test_sparse_operator import test_cast_storage_ex, test_sparse_dot
 from test_sparse_operator import test_sparse_nd_zeros, test_sparse_retain
-from test_sparse_ndarray import test_create_csr, test_create_row_sparse
+from test_sparse_ndarray import test_create_csr, test_create_row_sparse, test_sparse_nd_setitem
 
 set_default_context(mx.gpu(0))
 del test_support_vector_machine_l1_svm

From 90f4bd218dc001f8ee74f9ececd4ee7395b09c67 Mon Sep 17 00:00:00 2001
From: eric-haibin-lin <linhaibin.eric@gmail.com>
Date: Wed, 9 Aug 2017 22:57:06 +0000
Subject: [PATCH 4/5] Revert "add scalar assignment to row_sparse"

This reverts commit 8aef7a56c44038f67bbec93811977ea2f9fa3c30.
---
 python/mxnet/ndarray/sparse_ndarray.py       | 11 ++----
 src/ndarray/ndarray.cc                       | 35 +++-----------------
 tests/python/unittest/test_sparse_ndarray.py |  7 +---
 3 files changed, 9 insertions(+), 44 deletions(-)

diff --git a/python/mxnet/ndarray/sparse_ndarray.py b/python/mxnet/ndarray/sparse_ndarray.py
index 6c8bb44b0ae3..9248a43e9627 100644
--- a/python/mxnet/ndarray/sparse_ndarray.py
+++ b/python/mxnet/ndarray/sparse_ndarray.py
@@ -545,7 +545,7 @@ def __setitem__(self, key, value):
         ----------
         key : slice
             The indexing key.
-        value : scalar, NDArray or numpy.ndarray
+        value : NDArray or numpy.ndarray
             The value to set.
 
         Examples
@@ -568,12 +568,6 @@ def __setitem__(self, key, value):
         array([[ 1.,  1.,  1.],
                [ 1.,  1.,  1.],
                [ 1.,  1.,  1.]], dtype=float32)
-        >>> # assign scalar to RowSparseNDArray
-        >>> x[:] = 7
-        >>> x.asnumpy()
-        array([[ 7.,  7.,  7.],
-               [ 7.,  7.,  7.],
-               [ 7.,  7.,  7.]], dtype=float32)
         """
         if not self.writable:
             raise ValueError('Failed to assign to a readonly RowSparseNDArray')
@@ -586,7 +580,8 @@ def __setitem__(self, key, value):
                 if value.handle is not self.handle:
                     value.copyto(self)
             elif isinstance(value, numeric_types):
-                _internal._set_value(float(value), out=self)
+                raise ValueError("Assigning numeric types to RowSparseNDArray " \
+                                 "is not implemented yet.")
             elif isinstance(value, (np.ndarray, np.generic)):
                 warnings.warn('Assigning non-NDArray object to RowSparseNDArray is not efficient',
                               RuntimeWarning)
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 9f5b7bab820a..0d2968626d79 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -295,21 +295,9 @@ void SetValueOp(const real_t &rhs, NDArray *out) {
   switch (ret.ctx().dev_mask()) {
     case cpu::kDevMask: {
       Engine::Get()->PushSync([rhs, ret](RunContext ctx) {
-          auto ret_stype = ret.storage_type();
-          mshadow::Stream<cpu> *s = ctx.get_stream<cpu>();
-          if (ret_stype == kRowSparseStorage) {
-            NDArray out = ret;
-            // indices
-            nnvm::dim_t nnr = ret.shape()[0];
-            out.CheckAndAlloc({mshadow::Shape1(nnr)});
-            op::PopulateFullIdxRspImpl(s, &out);
-            // data
-            TBlob tmp = out.data();
-            ndarray::Eval<cpu>(rhs, &tmp, ctx);
-          } else {
-            TBlob tmp = ret.data();
-            ndarray::Eval<cpu>(rhs, &tmp, ctx);
-          }
+          CHECK(ret.storage_type() == kDefaultStorage);
+          TBlob tmp = ret.data();
+          ndarray::Eval<cpu>(rhs, &tmp, ctx);
         }, ret.ctx(), {}, {ret.var()},
         FnProperty::kNormal, 0, PROFILER_MESSAGE_FUNCNAME);
       break;
@@ -317,21 +305,8 @@ void SetValueOp(const real_t &rhs, NDArray *out) {
 #if MXNET_USE_CUDA
     case gpu::kDevMask: {
       Engine::Get()->PushSync([rhs, ret](RunContext ctx) {
-          auto ret_stype = ret.storage_type();
-          mshadow::Stream<gpu> *s = ctx.get_stream<gpu>();
-          if (ret_stype == kRowSparseStorage) {
-            NDArray out = ret;
-            // indices
-            nnvm::dim_t nnr = ret.shape()[0];
-            out.CheckAndAlloc({mshadow::Shape1(nnr)});
-            op::PopulateFullIdxRspImpl(s, &out);
-            // data
-            TBlob tmp = out.data();
-            ndarray::Eval<gpu>(rhs, &tmp, ctx);
-          } else {
-            TBlob tmp = ret.data();
-            ndarray::Eval<gpu>(rhs, &tmp, ctx);
-          }
+          TBlob tmp = ret.data();
+          ndarray::Eval<gpu>(rhs, &tmp, ctx);
           // Wait GPU kernel to complete
           ctx.get_stream<gpu>()->Wait();
         }, ret.ctx(), {}, {ret.var()},
diff --git a/tests/python/unittest/test_sparse_ndarray.py b/tests/python/unittest/test_sparse_ndarray.py
index 6c4fb01978d4..cbce588549a1 100644
--- a/tests/python/unittest/test_sparse_ndarray.py
+++ b/tests/python/unittest/test_sparse_ndarray.py
@@ -102,8 +102,7 @@ def test_sparse_nd_setitem():
     def check_sparse_nd_setitem(stype, shape, dst):
         x = mx.nd.zeros(shape=shape, stype=stype)
         x[:] = dst
-        dst_nd = mx.nd.zeros(shape=shape)
-        dst_nd[:] = dst
+        dst_nd = mx.nd.array(dst) if isinstance(dst, (np.ndarray, np.generic)) else dst
         assert same(x.asnumpy(), dst_nd.asnumpy())
 
     shape = rand_shape_2d()
@@ -113,10 +112,6 @@ def check_sparse_nd_setitem(stype, shape, dst):
         check_sparse_nd_setitem(stype, shape, rand_ndarray(shape, stype))
         # numpy assignment
         check_sparse_nd_setitem(stype, shape, np.ones(shape))
-        if stype == 'row_sparse':
-            # scalar assignment
-            check_sparse_nd_setitem(stype, shape, 0)
-            check_sparse_nd_setitem(stype, shape, 1)
 
 
 def test_sparse_nd_slice():

From 0dc5611086bfac86a63dbfdc80ca5fe39c52c68f Mon Sep 17 00:00:00 2001
From: eric-haibin-lin <linhaibin.eric@gmail.com>
Date: Wed, 9 Aug 2017 22:57:14 +0000
Subject: [PATCH 5/5] Revert "add setitem test to gpu"

This reverts commit 3b969ac0980e8d7166a1cf46878ed2bd457986ed.
---
 tests/python/gpu/test_operator_gpu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py
index bb18d5d21c87..6af9a0f33d48 100644
--- a/tests/python/gpu/test_operator_gpu.py
+++ b/tests/python/gpu/test_operator_gpu.py
@@ -33,7 +33,7 @@
 from test_gluon_rnn import *
 from test_sparse_operator import test_cast_storage_ex, test_sparse_dot
 from test_sparse_operator import test_sparse_nd_zeros, test_sparse_retain
-from test_sparse_ndarray import test_create_csr, test_create_row_sparse, test_sparse_nd_setitem
+from test_sparse_ndarray import test_create_csr, test_create_row_sparse
 
 set_default_context(mx.gpu(0))
 del test_support_vector_machine_l1_svm