Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/mxnet/ndarray.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ class NDArray {
* \return the data TBlob
*/
inline TBlob data() const {
CheckAndAlloc();
TBlob res;
MSHADOW_TYPE_SWITCH(dtype_, DType, {
res = TBlob(static_cast<DType*>(ptr_->shandle.dptr)
Expand All @@ -116,6 +117,7 @@ class NDArray {
* \return a chunk of raw data in TBlob
*/
inline TBlob raw_data(index_t offset, index_t length) const {
CheckAndAlloc();
TBlob res;
TShape raw_shape(1);
raw_shape[0] = length;
Expand Down
2 changes: 0 additions & 2 deletions src/c_api/c_api_ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,6 @@ void PushFCompute(const FCompute& fn,
input_blobs.push_back(i.data());
}
for (auto& i : ndoutputs) {
i.CheckAndAlloc();
output_blobs.push_back(i.data());
}
OpContext opctx{is_train, rctx,
Expand Down Expand Up @@ -296,7 +295,6 @@ void PushOperator(std::shared_ptr<Operator> opr,
}
}
for (auto& i : ndoutputs) {
i.CheckAndAlloc();
output_blobs.push_back(i.data());
}
Capture* capture = new Capture({on_complete, opr});
Expand Down
1 change: 0 additions & 1 deletion src/io/image_io.cc
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ void Imdecode(const nnvm::NodeAttrs& attrs,

NDArray ndout(oshape, Context::CPU(), true, mshadow::kUint8);
Engine::Get()->PushSync([ndin, ndout, param](RunContext ctx){
ndout.CheckAndAlloc();
cv::Mat buf(1, ndin.shape().Size(), CV_8U, ndin.data().dptr_);
cv::Mat dst(ndout.shape()[0], ndout.shape()[1],
param.flag == 0 ? CV_8U : CV_8UC3,
Expand Down
20 changes: 0 additions & 20 deletions src/ndarray/ndarray.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ void TernaryOp(const NDArray &lhs,
switch (lhs.ctx().dev_mask()) {
case cpu::kDevMask: {
Engine::Get()->PushSync([lhs, mhs, rhs, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::Eval<cpu, OP>(lhs.data(), mhs.data(), rhs.data(), &tmp, ctx);
}, lhs.ctx(), const_vars, { ret.var() },
Expand All @@ -73,7 +72,6 @@ void TernaryOp(const NDArray &lhs,
#if MXNET_USE_CUDA
case gpu::kDevMask: {
Engine::Get()->PushSync([lhs, mhs, rhs, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::Eval<gpu, OP>(lhs.data(), mhs.data(), rhs.data(), &tmp, ctx);
// Wait GPU kernel to complete
Expand Down Expand Up @@ -125,7 +123,6 @@ void BinaryOp(const NDArray &lhs,
switch (lhs.ctx().dev_mask()) {
case cpu::kDevMask: {
Engine::Get()->PushSync([lhs, rhs, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::Eval<cpu, OP>(lhs.data(), rhs.data(), &tmp, ctx);
}, lhs.ctx(), const_vars, {ret.var()},
Expand All @@ -135,7 +132,6 @@ void BinaryOp(const NDArray &lhs,
#if MXNET_USE_CUDA
case gpu::kDevMask: {
Engine::Get()->PushSync([lhs, rhs, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::Eval<gpu, OP>(lhs.data(), rhs.data(), &tmp, ctx);
// Wait GPU kernel to complete
Expand All @@ -156,7 +152,6 @@ void SetValueOp(const real_t &rhs, NDArray *out) {
switch (ret.ctx().dev_mask()) {
case cpu::kDevMask: {
Engine::Get()->PushSync([rhs, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::Eval<cpu>(rhs, &tmp, ctx);
}, ret.ctx(), {}, {ret.var()},
Expand All @@ -166,7 +161,6 @@ void SetValueOp(const real_t &rhs, NDArray *out) {
#if MXNET_USE_CUDA
case gpu::kDevMask: {
Engine::Get()->PushSync([rhs, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::Eval<gpu>(rhs, &tmp, ctx);
// Wait GPU kernel to complete
Expand Down Expand Up @@ -207,7 +201,6 @@ void ScalarOp(const NDArray &lhs,
switch (lhs.ctx().dev_mask()) {
case cpu::kDevMask: {
Engine::Get()->PushSync([lhs, rhs, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::Eval<cpu, OP, reverse>(lhs.data(), rhs, &tmp, ctx);
}, lhs.ctx(), const_vars, {ret.var()},
Expand All @@ -217,7 +210,6 @@ void ScalarOp(const NDArray &lhs,
#if MXNET_USE_CUDA
case gpu::kDevMask: {
Engine::Get()->PushSync([lhs, rhs, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::Eval<gpu, OP, reverse>(lhs.data(), rhs, &tmp, ctx);
// Wait GPU kernel to complete
Expand Down Expand Up @@ -251,7 +243,6 @@ void CopyFromTo(const NDArray &from, NDArray *to, int priority) {

if (a == cpu::kDevMask && b == cpu::kDevMask) {
Engine::Get()->PushSync([from, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::Copy<cpu, cpu>(from.data(), &tmp,
from.ctx(), ret.ctx(), ctx);
Expand All @@ -261,7 +252,6 @@ void CopyFromTo(const NDArray &from, NDArray *to, int priority) {
#if MXNET_USE_CUDA
if (a == cpu::kDevMask && b == gpu::kDevMask) {
Engine::Get()->PushSync([from, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::Copy<cpu, gpu>(from.data(), &tmp,
from.ctx(), ret.ctx(), ctx);
Expand All @@ -271,7 +261,6 @@ void CopyFromTo(const NDArray &from, NDArray *to, int priority) {
FnProperty::kCopyToGPU, priority, PROFILER_MESSAGE("CopyCPU2GPU"));
} else if (a == gpu::kDevMask && b == cpu::kDevMask) {
Engine::Get()->PushSync([from, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::Copy<gpu, cpu>(from.data(), &tmp,
from.ctx(), ret.ctx(), ctx);
Expand All @@ -281,7 +270,6 @@ void CopyFromTo(const NDArray &from, NDArray *to, int priority) {
FnProperty::kCopyFromGPU, priority, PROFILER_MESSAGE("CopyGPU2CPU"));
} else if (a == gpu::kDevMask && b == gpu::kDevMask) {
Engine::Get()->PushSync([from, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::Copy<gpu, gpu>(from.data(), &tmp,
from.ctx(), ret.ctx(), ctx);
Expand Down Expand Up @@ -326,7 +314,6 @@ void ElementwiseSum(const std::vector<NDArray> &source, NDArray *out, int priori
for (size_t i = 0; i < source.size(); ++i) {
source_tblob[i] = source[i].data();
}
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::ElementwiseSum<cpu>(source_tblob, &tmp, ctx);
}, out->ctx(), const_vars, {ret.var()},
Expand All @@ -340,7 +327,6 @@ void ElementwiseSum(const std::vector<NDArray> &source, NDArray *out, int priori
for (size_t i = 0; i < source.size(); ++i) {
source_tblob[i] = source[i].data();
}
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::ElementwiseSum<gpu>(source_tblob, &tmp, ctx);
// Wait GPU kernel to complete
Expand Down Expand Up @@ -369,7 +355,6 @@ void ClipOp(const NDArray &src,
switch (src.ctx().dev_mask()) {
case cpu::kDevMask: {
Engine::Get()->PushSync([src, a_min, a_max, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::EvalClip<cpu>(src.data(), a_min, a_max, &tmp, ctx);
}, src.ctx(), const_vars, {ret.var()},
Expand All @@ -379,7 +364,6 @@ void ClipOp(const NDArray &src,
#if MXNET_USE_CUDA
case gpu::kDevMask: {
Engine::Get()->PushSync([src, a_min, a_max, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::EvalClip<gpu>(src.data(), a_min, a_max, &tmp, ctx);
}, src.ctx(), const_vars, {ret.var()},
Expand Down Expand Up @@ -408,7 +392,6 @@ void SampleOP(const real_t &a,
switch (out->ctx().dev_mask()) {
case cpu::kDevMask: {
Engine::Get()->PushSync([a, b, resource, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::EvalRandom<cpu, Distribution>(a, b, resource, &tmp, ctx);
}, out->ctx(), {}, {ret.var(), resource.var},
Expand All @@ -418,7 +401,6 @@ void SampleOP(const real_t &a,
#if MXNET_USE_CUDA
case gpu::kDevMask: {
Engine::Get()->PushSync([a, b, resource, ret](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
ndarray::EvalRandom<gpu, Distribution>(a, b, resource, &tmp, ctx);
// Wait GPU kernel to complete
Expand Down Expand Up @@ -721,15 +703,13 @@ void NDArray::SyncCopyFromCPU(const void *data, size_t size) const {

if (this->ctx().dev_mask() == cpu::kDevMask) {
this->WaitToWrite();
this->CheckAndAlloc();
RunContext rctx;
rctx.stream = nullptr;
TBlob dst = this->data();
ndarray::Copy<cpu, cpu>(src, &dst, Context::CPU(), Context::CPU(), rctx);
} else {
#if MXNET_USE_CUDA
Engine::Get()->PushSync([&](RunContext rctx) {
this->CheckAndAlloc();
TBlob dst = this->data();
ndarray::Copy<cpu, gpu>(src, &dst,
Context::CPU(), this->ctx(), rctx);
Expand Down
3 changes: 0 additions & 3 deletions src/operator/operator_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,6 @@ void SimpleOpRegEntryImpl::RegisterSourceImperative() {
OpReqType req = kWriteTo;

Engine::Get()->PushSync([ret, fun, dev_mask, req, env](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
(*fun)(env, &tmp, req, ctx);
#if MXNET_USE_CUDA
Expand Down Expand Up @@ -664,7 +663,6 @@ void SimpleOpRegEntryImpl::RegisterUnaryImperative() {
}

Engine::Get()->PushSync([src, ret, fun, dev_mask, req, env](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
(*fun)(src.data(), env, &tmp, req, ctx);
#if MXNET_USE_CUDA
Expand Down Expand Up @@ -939,7 +937,6 @@ void SimpleOpRegEntryImpl::RegisterBinaryImperative() {
}

Engine::Get()->PushSync([lhs, rhs, ret, fun, dev_mask, req, env](RunContext ctx) {
ret.CheckAndAlloc();
TBlob tmp = ret.data();
(*fun)(lhs.data(), rhs.data(), env, &tmp, req, ctx);
#if MXNET_USE_CUDA
Expand Down