From e95bd3e8fa33133cfdc1d61e05868c8848eb8e5b Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 10 Jul 2018 14:36:29 -0700 Subject: [PATCH 1/2] [RUNTIME] Simple NDArray container API in c++ --- include/tvm/runtime/c_runtime_api.h | 6 +- include/tvm/runtime/ndarray.h | 268 +++++++++++++++++++++++++++ include/tvm/runtime/packed_func.h | 50 ++++- python/tvm/_ffi/_ctypes/function.py | 4 +- python/tvm/_ffi/_cython/base.pxi | 1 + python/tvm/_ffi/_cython/function.pxi | 5 +- python/tvm/_ffi/runtime_ctypes.py | 1 + src/pass/make_api.cc | 1 + src/runtime/c_runtime_api.cc | 152 --------------- src/runtime/ndarray.cc | 244 ++++++++++++++++++++++++ src/runtime/rpc/rpc_session.cc | 8 +- tests/cpp/packed_func_test.cc | 25 +++ 12 files changed, 601 insertions(+), 164 deletions(-) create mode 100644 include/tvm/runtime/ndarray.h create mode 100644 src/runtime/ndarray.cc diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h index 7f3a11d9ddab..6fb7a0f3f8b3 100644 --- a/include/tvm/runtime/c_runtime_api.h +++ b/include/tvm/runtime/c_runtime_api.h @@ -87,6 +87,7 @@ typedef enum { kFuncHandle = 10U, kStr = 11U, kBytes = 12U, + kNDArrayContainer = 13U, // Extension codes for other frameworks to integrate TVM PackedFunc. // To make sure each framework's id do not conflict, use first and // last sections to mark ranges. @@ -121,6 +122,9 @@ typedef DLContext TVMContext; */ typedef DLTensor TVMArray; +/*! \brief the array handle */ +typedef TVMArray* TVMArrayHandle; + /*! * \brief Union type of values * being passed through API and function calls. @@ -149,8 +153,6 @@ typedef void* TVMModuleHandle; typedef void* TVMFunctionHandle; /*! \brief Handle to hold return value. */ typedef void* TVMRetValueHandle; -/*! \brief the array handle */ -typedef TVMArray* TVMArrayHandle; /*! * \brief The stream that is specific to device * can be NULL, which indicates the default one. diff --git a/include/tvm/runtime/ndarray.h b/include/tvm/runtime/ndarray.h new file mode 100644 index 000000000000..708a0519b153 --- /dev/null +++ b/include/tvm/runtime/ndarray.h @@ -0,0 +1,268 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file tvm/runtime/ndarray.h + * \brief Abstract device memory management API + */ +#ifndef TVM_RUNTIME_NDARRAY_H_ +#define TVM_RUNTIME_NDARRAY_H_ + +#include +#include +#include +#include "./c_runtime_api.h" + +namespace tvm { +namespace runtime { +/*! + * \brief Managed NDArray. + * The array is backed by reference counted blocks. + */ +class NDArray { + public: + // internal container type + struct Container; + /*! \brief default constructor */ + NDArray() {} + /*! + * \brief cosntruct a NDArray that refers to data + * \param data The data this NDArray refers to + */ + explicit inline NDArray(Container* data); + /*! + * \brief copy constructor + * \param other The value to be copied + */ + inline NDArray(const NDArray& other); // NOLINT(*) + /*! + * \brief move constructor + * \param other The value to be moved + */ + NDArray(NDArray&& other) // NOLINT(*) + : data_(other.data_) { + other.data_ = nullptr; + } + /*! \brief destructor */ + ~NDArray() { + this->reset(); + } + /*! + * \brief Swap this array with another NDArray + * \param other The other NDArray + */ + void swap(NDArray& other) { // NOLINT(*) + std::swap(data_, other.data_); + } + /*! + * \brief copy assignmemt + * \param other The value to be assigned. + * \return reference to self. + */ + NDArray& operator=(const NDArray& other) { // NOLINT(*) + // copy-and-swap idiom + NDArray(other).swap(*this); // NOLINT(*) + return *this; + } + /*! + * \brief move assignmemt + * \param other The value to be assigned. + * \return reference to self. + */ + NDArray& operator=(NDArray&& other) { // NOLINT(*) + // copy-and-swap idiom + NDArray(std::move(other)).swap(*this); // NOLINT(*) + return *this; + } + /*! \return If NDArray is defined */ + bool defined() const { + return data_ != nullptr; + } + /*! \return If both NDArray reference the same container */ + bool same_as(const NDArray& other) const { + return data_ == other.data_; + } + /*! \brief reset the content of NDArray to be nullptr */ + inline void reset(); + /*! + * \brief Copy data content from another array. + * \param other The source array to be copied from. + * \note The copy may happen asynchrously if it involves a GPU context. + * TVMSynchronize is necessary. + */ + inline void copyfrom(DLTensor* other); + inline void copyfrom(const NDArray& other); + /*! + * \brief Copy data content into another array. + * \param other The source array to be copied from. + * \note The copy may happen asynchrously if it involves a GPU context. + * TVMSynchronize is necessary. + */ + inline void copyto(DLTensor* other); + inline void copyto(const NDArray& other); + /*! + * \return the reference counter + * \note this number is approximate in multi-threaded setting. + */ + inline int use_count() const; + /*! \return Pointer to content of DLTensor */ + inline const DLTensor* operator->() const; + + /*! + * \brief Create a NDArray that shares the data memory with the current one. + * \param shape The shape of the new array. + * \param dtype The data type of the new array. + * \note The memory size of new array must be smaller than the current one. + */ + TVM_DLL NDArray create_view( + std::vector shape, DLDataType dtype); + /*! + * \brief Create a reference view of NDArray that + * represents as DLManagedTensor. + * \return A DLManagedTensor + */ + TVM_DLL DLManagedTensor* to_dlpack() const; + /*! + * \brief Create an empty NDArray. + * \param shape The shape of the new array. + * \param dtype The data type of the new array. + * \param ctx The context of the Array. + * \return The created Array + */ + TVM_DLL static NDArray empty(std::vector shape, + DLDataType dtype, + DLContext ctx); + /*! + * \brief Create a NDArray backed by dlpack tensor. + * \return The created NDArray view. + */ + TVM_DLL static NDArray from_dlpack(DLManagedTensor* tensor); + /*! + * \brief Function to copy data from one array to another. + * \param from The source array. + * \param to The target array. + * \param stream The stream used in copy. + */ + TVM_DLL static void CopyFromTo( + DLTensor* from, DLTensor* to, TVMStreamHandle stream = nullptr); + + // internal namespace + struct Internal; + private: + /*! \brief Internal Data content */ + Container* data_{nullptr}; + // enable internal functions + friend struct Internal; + friend class TVMRetValue; + friend class TVMArgsSetter; +}; + +/*! + * \brief Reference counted Container object used to back NDArray. + * + * This object is DLTensor compatible: + * the pointer to the NDArrayContainer can be directly + * interpreted as a DLTensor* + * + * \note: do not use this function directly, use NDArray. + */ +struct NDArray::Container { + public: + // NOTE: the first part of this structure is the same as + // DLManagedTensor, note that, however, the deleter + // is only called when the reference counter goes to 0 + /*! + * \brief The corresponding dl_tensor field. + * \note it is important that the first field is DLTensor + * So that this data structure is DLTensor compatible. + * The head ptr of this struct can be viewed as DLTensor*. + */ + DLTensor dl_tensor; + /*! \brief addtional context, reserved for recycling */ + void* manager_ctx{nullptr}; + /*! \brief Customized deleter */ + void (*deleter)(Container* self) = nullptr; + /*! \brief default constructor */ + Container() { + dl_tensor.data = nullptr; + dl_tensor.ndim = 0; + dl_tensor.shape = nullptr; + dl_tensor.strides = nullptr; + dl_tensor.byte_offset = 0; + } + /*! \brief developer function, increases reference counter */ + void IncRef() { + ref_counter_.fetch_add(1, std::memory_order_relaxed); + } + /*! \brief developer function, decrease reference counter */ + void DecRef() { + if (ref_counter_.fetch_sub(1, std::memory_order_release) == 1) { + std::atomic_thread_fence(std::memory_order_acquire); + if (this->deleter != nullptr) { + (*this->deleter)(this); + } + } + } + + private: + friend class NDArray; + /*! + * \brief The shape container, + * can be used used for shape data. + */ + std::vector shape_; + /*! \brief The internal array object */ + std::atomic ref_counter_{0}; +}; + +// implementations +inline NDArray::NDArray(Container* data) + : data_(data) { + data_->IncRef(); +} + +inline NDArray::NDArray(const NDArray& other) + : data_(other.data_) { + data_->IncRef(); +} + +inline void NDArray::reset() { + if (data_ != nullptr) { + data_->DecRef(); + data_ = nullptr; + } +} + +inline void NDArray::copyfrom(DLTensor* other) { + CHECK(data_ != nullptr); + CopyFromTo(other, &(data_->dl_tensor)); +} + +inline void NDArray::copyfrom(const NDArray& other) { + CHECK(data_ != nullptr); + CHECK(other.data_ != nullptr); + CopyFromTo(&(other.data_->dl_tensor), &(data_->dl_tensor)); +} + +inline void NDArray::copyto(DLTensor* other) { + CHECK(data_ != nullptr); + CopyFromTo(&(data_->dl_tensor), other); +} + +inline void NDArray::copyto(const NDArray& other) { + CHECK(data_ != nullptr); + CHECK(other.data_ != nullptr); + CopyFromTo(&(data_->dl_tensor), &(other.data_->dl_tensor)); +} + +inline int NDArray::use_count() const { + if (data_ == nullptr) return 0; + return data_->ref_counter_.load(std::memory_order_relaxed); +} + +inline const DLTensor* NDArray::operator->() const { + return &(data_->dl_tensor); +} + +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_NDARRAY_H_ diff --git a/include/tvm/runtime/packed_func.h b/include/tvm/runtime/packed_func.h index 16266b4f99c9..6d8df4a5e3d6 100644 --- a/include/tvm/runtime/packed_func.h +++ b/include/tvm/runtime/packed_func.h @@ -16,6 +16,7 @@ #include #include "./c_runtime_api.h" #include "./module.h" +#include "./ndarray.h" namespace HalideIR { // Forward declare type for extensions @@ -249,10 +250,22 @@ class TVMPODValue_ { TVM_CHECK_TYPE_CODE(type_code_, kHandle); return value_.v_handle; } - operator TVMArray*() const { - if (type_code_ == kNull) return nullptr; - TVM_CHECK_TYPE_CODE(type_code_, kArrayHandle); - return static_cast(value_.v_handle); + operator DLTensor*() const { + if (type_code_ == kArrayHandle || + type_code_ == kNDArrayContainer) { + return static_cast(value_.v_handle); + } else { + if (type_code_ == kNull) return nullptr; + LOG(FATAL) << "Expected " + << "DLTensor* or NDArray but get " + << TypeCode2Str(type_code_); + return nullptr; + } + } + operator NDArray() const { + if (type_code_ == kNull) return NDArray(); + TVM_CHECK_TYPE_CODE(type_code_, kNDArrayContainer); + return NDArray(static_cast(value_.v_handle)); } operator TVMContext() const { TVM_CHECK_TYPE_CODE(type_code_, kTVMContext); @@ -312,8 +325,10 @@ class TVMArgValue : public TVMPODValue_ { using TVMPODValue_::operator int; using TVMPODValue_::operator bool; using TVMPODValue_::operator void*; - using TVMPODValue_::operator TVMArray*; + using TVMPODValue_::operator DLTensor*; + using TVMPODValue_::operator NDArray; using TVMPODValue_::operator TVMContext; + // conversion operator. operator std::string() const { if (type_code_ == kTVMType) { @@ -394,8 +409,9 @@ class TVMRetValue : public TVMPODValue_ { using TVMPODValue_::operator int; using TVMPODValue_::operator bool; using TVMPODValue_::operator void*; - using TVMPODValue_::operator TVMArray*; + using TVMPODValue_::operator DLTensor*; using TVMPODValue_::operator TVMContext; + using TVMPODValue_::operator NDArray; // Disable copy and assign from another value, but allow move. TVMRetValue(const TVMRetValue& other) { this->Assign(other); @@ -477,6 +493,13 @@ class TVMRetValue : public TVMPODValue_ { this->SwitchToClass(kBytes, std::string(value.data, value.size)); return *this; } + TVMRetValue& operator=(NDArray other) { + this->Clear(); + type_code_ = kNDArrayContainer; + value_.v_handle = other.data_; + other.data_ = nullptr; + return *this; + } TVMRetValue& operator=(PackedFunc f) { this->SwitchToClass(kFuncHandle, f); return *this; @@ -559,6 +582,10 @@ class TVMRetValue : public TVMPODValue_ { SwitchToClass(kModuleHandle, other); break; } + case kNDArrayContainer: { + *this = other.operator NDArray(); + break; + } case kNodeHandle: { SwitchToClass >( kNodeHandle, *other.template ptr >()); @@ -607,6 +634,10 @@ class TVMRetValue : public TVMPODValue_ { case kFuncHandle: delete ptr(); break; case kModuleHandle: delete ptr(); break; case kNodeHandle: delete ptr >(); break; + case kNDArrayContainer: { + static_cast(value_.v_handle)->DecRef(); + break; + } } if (type_code_ > kExtBegin) { #if TVM_RUNTIME_HEADER_ONLY @@ -635,6 +666,7 @@ inline const char* TypeCode2Str(int type_code) { case kTVMContext: return "TVMContext"; case kFuncHandle: return "FunctionHandle"; case kModuleHandle: return "ModuleHandle"; + case kNDArrayContainer: return "NDArrayContainer"; default: LOG(FATAL) << "unknown type_code=" << static_cast(type_code); return ""; } @@ -776,7 +808,7 @@ class TVMArgsSetter { values_[i].v_handle = value; type_codes_[i] = kHandle; } - void operator()(size_t i, TVMArray* value) const { + void operator()(size_t i, DLTensor* value) const { values_[i].v_handle = value; type_codes_[i] = kArrayHandle; } @@ -811,6 +843,10 @@ class TVMArgsSetter { values_[i].v_handle = const_cast(&value); type_codes_[i] = kModuleHandle; } + void operator()(size_t i, const NDArray& value) const { // NOLINT(*) + values_[i].v_handle = value.data_; + type_codes_[i] = kNDArrayContainer; + } void operator()(size_t i, const TVMRetValue& value) const { // NOLINT(*) if (value.type_code() == kStr) { values_[i].v_str = value.ptr()->c_str(); diff --git a/python/tvm/_ffi/_ctypes/function.py b/python/tvm/_ffi/_ctypes/function.py index 189d9964baf5..602af3ef858b 100644 --- a/python/tvm/_ffi/_ctypes/function.py +++ b/python/tvm/_ffi/_ctypes/function.py @@ -94,7 +94,8 @@ def _make_tvm_args(args, temp_args): type_codes[i] = TypeCode.NULL elif isinstance(arg, NDArrayBase): values[i].v_handle = ctypes.cast(arg.handle, ctypes.c_void_p) - type_codes[i] = TypeCode.ARRAY_HANDLE + type_codes[i] = (TypeCode.NDARRAY_CONTAINER + if not arg.is_view else TypeCode.ARRAY_HANDLE) elif isinstance(arg, _nd._TVM_COMPATS): values[i].v_handle = ctypes.c_void_p(arg._tvm_handle) type_codes[i] = arg.__class__._tvm_tcode @@ -208,6 +209,7 @@ def _handle_return_func(x): C_TO_PY_ARG_SWITCH[TypeCode.MODULE_HANDLE] = _wrap_arg_func( _return_module, TypeCode.MODULE_HANDLE) C_TO_PY_ARG_SWITCH[TypeCode.ARRAY_HANDLE] = lambda x: _make_array(x.v_handle, True) +C_TO_PY_ARG_SWITCH[TypeCode.NDARRAY_CONTAINER] = lambda x: _make_array(x.v_handle, False) _CLASS_MODULE = None _CLASS_FUNCTION = None diff --git a/python/tvm/_ffi/_cython/base.pxi b/python/tvm/_ffi/_cython/base.pxi index c027c723de08..50a99245f793 100644 --- a/python/tvm/_ffi/_cython/base.pxi +++ b/python/tvm/_ffi/_cython/base.pxi @@ -18,6 +18,7 @@ cdef enum TVMTypeCode: kFuncHandle = 10 kStr = 11 kBytes = 12 + kNDArrayContainer = 13 kExtBegin = 15 cdef extern from "tvm/runtime/c_runtime_api.h": diff --git a/python/tvm/_ffi/_cython/function.pxi b/python/tvm/_ffi/_cython/function.pxi index 06cda82624b9..989f5b8e7b47 100644 --- a/python/tvm/_ffi/_cython/function.pxi +++ b/python/tvm/_ffi/_cython/function.pxi @@ -84,7 +84,8 @@ cdef inline int make_arg(object arg, tcode[0] = kNodeHandle elif isinstance(arg, NDArrayBase): value[0].v_handle = (arg).chandle - tcode[0] = kArrayHandle + tcode[0] = (kNDArrayContainer if + not (arg).c_is_view else kArrayHandle) elif isinstance(arg, _TVM_COMPATS): ptr = arg._tvm_handle value[0].v_handle = (ptr) @@ -173,6 +174,8 @@ cdef inline object make_ret(TVMValue value, int tcode): return value.v_int64 elif tcode == kFloat: return value.v_float64 + elif tcode == kNDArrayContainer: + return c_make_array(value.v_handle, False) elif tcode == kStr: return py_str(value.v_str) elif tcode == kBytes: diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py index 9609f867576b..612b54649d74 100644 --- a/python/tvm/_ffi/runtime_ctypes.py +++ b/python/tvm/_ffi/runtime_ctypes.py @@ -25,6 +25,7 @@ class TypeCode(object): FUNC_HANDLE = 10 STR = 11 BYTES = 12 + NDARRAY_CONTAINER = 13 EXT_BEGIN = 15 class TVMByteArray(ctypes.Structure): diff --git a/src/pass/make_api.cc b/src/pass/make_api.cc index 6290f63e611d..206bd95010ce 100644 --- a/src/pass/make_api.cc +++ b/src/pass/make_api.cc @@ -102,6 +102,7 @@ LoweredFunc MakeAPI(Stmt body, msg << name << ": Expect arg[" << i << "] to be pointer"; seq_check.emplace_back( AssertStmt::make(tcode == kHandle || + tcode == kNDArrayContainer || tcode == kArrayHandle || tcode == kNull, msg.str(), nop)); } else if (t.is_int() || t.is_uint()) { diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc index 5eb39abcc71a..7a7d7ab9f4db 100644 --- a/src/runtime/c_runtime_api.cc +++ b/src/runtime/c_runtime_api.cc @@ -124,54 +124,6 @@ void DeviceAPI::SyncStreamFromTo(TVMContext ctx, TVMStreamHandle event_dst) { LOG(FATAL) << "Device does not support stream api."; } - -inline TVMArray* TVMArrayCreate_() { - TVMArray* arr = new TVMArray(); - arr->shape = nullptr; - arr->strides = nullptr; - arr->ndim = 0; - arr->data = nullptr; - return arr; -} - -inline void TVMArrayFree_(TVMArray* arr) { - if (arr != nullptr) { - // ok to delete nullptr - delete[] arr->shape; - delete[] arr->strides; - if (arr->data != nullptr) { - DeviceAPIManager::Get(arr->ctx)->FreeDataSpace( - arr->ctx, arr->data); - } - } - delete arr; -} - -inline void VerifyType(int dtype_code, int dtype_bits, int dtype_lanes) { - CHECK_GE(dtype_lanes, 1); - if (dtype_code == kDLFloat) { - CHECK_EQ(dtype_bits % 8, 0); - } else { - CHECK_EQ(dtype_bits % 8, 0); - } - CHECK_EQ(dtype_bits & (dtype_bits - 1), 0); -} - -inline size_t GetDataSize(TVMArray* arr) { - size_t size = 1; - for (tvm_index_t i = 0; i < arr->ndim; ++i) { - size *= arr->shape[i]; - } - size *= (arr->dtype.bits * arr->dtype.lanes + 7) / 8; - return size; -} - -inline size_t GetDataAlignment(TVMArray* arr) { - size_t align = (arr->dtype.bits / 8) * arr->dtype.lanes; - if (align < kAllocAlignment) return kAllocAlignment; - return align; -} - } // namespace runtime } // namespace tvm @@ -370,110 +322,6 @@ int TVMFuncCreateFromCFunc(TVMPackedCFunc func, API_END(); } -int TVMArrayAlloc(const tvm_index_t* shape, - int ndim, - int dtype_code, - int dtype_bits, - int dtype_lanes, - int device_type, - int device_id, - TVMArrayHandle* out) { - TVMArray* arr = nullptr; - API_BEGIN(); - // shape - arr = TVMArrayCreate_(); - // ndim - arr->ndim = ndim; - // dtype - VerifyType(dtype_code, dtype_bits, dtype_lanes); - arr->dtype.code = static_cast(dtype_code); - arr->dtype.bits = static_cast(dtype_bits); - arr->dtype.lanes = static_cast(dtype_lanes); - if (ndim != 0) { - tvm_index_t* shape_copy = new tvm_index_t[ndim]; - std::copy(shape, shape + ndim, shape_copy); - arr->shape = shape_copy; - } else { - arr->shape = nullptr; - } - // ctx - arr->ctx.device_type = static_cast(device_type); - arr->ctx.device_id = device_id; - size_t size = GetDataSize(arr); - size_t alignment = GetDataAlignment(arr); - arr->data = DeviceAPIManager::Get(arr->ctx)->AllocDataSpace( - arr->ctx, size, alignment, arr->dtype); - *out = arr; - API_END_HANDLE_ERROR(TVMArrayFree_(arr)); -} - -int TVMArrayFree(TVMArrayHandle handle) { - API_BEGIN(); - TVMArray* arr = handle; - TVMArrayFree_(arr); - API_END(); -} - -int TVMArrayCopyFromTo(TVMArrayHandle from, - TVMArrayHandle to, - TVMStreamHandle stream) { - API_BEGIN(); - size_t from_size = GetDataSize(from); - size_t to_size = GetDataSize(to); - CHECK_EQ(from_size, to_size) - << "TVMArrayCopyFromTo: The size must exactly match"; - - CHECK(from->ctx.device_type == to->ctx.device_type - || from->ctx.device_type == kDLCPU - || to->ctx.device_type == kDLCPU) - << "Can not copy across different ctx types directly"; - - // Use the context that is *not* a cpu context to get the correct device - // api manager. - TVMContext ctx = from->ctx.device_type != kDLCPU ? from->ctx : to->ctx; - - DeviceAPIManager::Get(ctx)->CopyDataFromTo( - from->data, static_cast(from->byte_offset), - to->data, static_cast(to->byte_offset), - from_size, from->ctx, to->ctx, from->dtype, stream); - - API_END(); -} - -int TVMArrayCopyFromBytes(TVMArrayHandle handle, - void* data, - size_t nbytes) { - API_BEGIN(); - TVMContext cpu_ctx; - cpu_ctx.device_type = kDLCPU; - cpu_ctx.device_id = 0; - size_t arr_size = GetDataSize(handle); - CHECK_EQ(arr_size, nbytes) - << "TVMArrayCopyFromBytes: size mismatch"; - DeviceAPIManager::Get(handle->ctx)->CopyDataFromTo( - data, 0, - handle->data, static_cast(handle->byte_offset), - nbytes, cpu_ctx, handle->ctx, handle->dtype, nullptr); - API_END(); -} - -int TVMArrayCopyToBytes(TVMArrayHandle handle, - void* data, - size_t nbytes) { - API_BEGIN(); - TVMContext cpu_ctx; - cpu_ctx.device_type = kDLCPU; - cpu_ctx.device_id = 0; - size_t arr_size = GetDataSize(handle); - CHECK_EQ(arr_size, nbytes) - << "TVMArrayCopyToBytes: size mismatch"; - DeviceAPIManager::Get(handle->ctx)->CopyDataFromTo( - handle->data, static_cast(handle->byte_offset), - data, 0, - nbytes, handle->ctx, cpu_ctx, handle->dtype, nullptr); - API_END(); -} - int TVMStreamCreate(int device_type, int device_id, TVMStreamHandle* out) { API_BEGIN(); TVMContext ctx; diff --git a/src/runtime/ndarray.cc b/src/runtime/ndarray.cc new file mode 100644 index 000000000000..6f10d0feea9f --- /dev/null +++ b/src/runtime/ndarray.cc @@ -0,0 +1,244 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file ndarray.cc + * \brief NDArray container infratructure. + */ +#include +#include +#include +#include +#include "./runtime_base.h" + +// deleter for arrays used by DLPack exporter +extern "C" void NDArrayDLPackDeleter(DLManagedTensor* tensor); + +namespace tvm { +namespace runtime { + +inline void VerifyDataType(DLDataType dtype) { + CHECK_GE(dtype.lanes, 1); + if (dtype.code == kDLFloat) { + CHECK_EQ(dtype.bits % 8, 0); + } else { + CHECK_EQ(dtype.bits % 8, 0); + } + CHECK_EQ(dtype.bits & (dtype.bits - 1), 0); +} + +inline size_t GetDataSize(const DLTensor& arr) { + size_t size = 1; + for (tvm_index_t i = 0; i < arr.ndim; ++i) { + size *= arr.shape[i]; + } + size *= (arr.dtype.bits * arr.dtype.lanes + 7) / 8; + return size; +} + +inline size_t GetDataAlignment(const DLTensor& arr) { + size_t align = (arr.dtype.bits / 8) * arr.dtype.lanes; + if (align < kAllocAlignment) return kAllocAlignment; + return align; +} + +struct NDArray::Internal { + // Default deleter for the container + static void DefaultDeleter(NDArray::Container* ptr) { + using tvm::runtime::NDArray; + if (ptr->manager_ctx != nullptr) { + static_cast(ptr->manager_ctx)->DecRef(); + } else if (ptr->dl_tensor.data != nullptr) { + tvm::runtime::DeviceAPI::Get(ptr->dl_tensor.ctx)->FreeDataSpace( + ptr->dl_tensor.ctx, ptr->dl_tensor.data); + } + delete ptr; + } + // Deleter for NDArray converted from DLPack + static void DLPackDeleter(NDArray::Container* ptr) { + DLManagedTensor* tensor = static_cast(ptr->manager_ctx); + if (tensor->deleter != nullptr) { + (*tensor->deleter)(tensor); + } + delete ptr; + } + // local create function that + // allocates everything except for the space + static NDArray Create(std::vector shape, + DLDataType dtype, + DLContext ctx) { + VerifyDataType(dtype); + // critical zone + NDArray::Container* data = new NDArray::Container(); + data->deleter = DefaultDeleter; + NDArray ret(data); + ret.data_ = data; + // RAII now in effect + // setup shape + data->shape_ = std::move(shape); + data->dl_tensor.shape = dmlc::BeginPtr(data->shape_); + data->dl_tensor.ndim = static_cast(data->shape_.size()); + // setup dtype + data->dl_tensor.dtype = dtype; + // setup ctx + data->dl_tensor.ctx = ctx; + return ret; + } + // Implementation of API function + static DLTensor* MoveAsDLTensor(NDArray arr) { + DLTensor* tensor = const_cast(arr.operator->()); + CHECK(reinterpret_cast(arr.data_) == tensor); + arr.data_ = nullptr; + return tensor; + } +}; + +NDArray NDArray::create_view(std::vector shape, + DLDataType dtype) { + CHECK(data_ != nullptr); + CHECK(data_->dl_tensor.strides == nullptr) + << "Can only create view for compact tensor"; + NDArray ret = Internal::Create(shape, dtype, data_->dl_tensor.ctx); + ret.data_->dl_tensor.byte_offset = + this->data_->dl_tensor.byte_offset; + size_t curr_size = GetDataSize(this->data_->dl_tensor); + size_t view_size = GetDataSize(ret.data_->dl_tensor); + CHECK_LE(view_size, curr_size) + << "Tries to create a view that has bigger memory than current one"; + // increase ref count + this->data_->IncRef(); + ret.data_->manager_ctx = this->data_; + ret.data_->dl_tensor.data = this->data_->dl_tensor.data; + return ret; +} + +DLManagedTensor* NDArray::to_dlpack() const { + CHECK(data_ != nullptr); + DLManagedTensor* ret = new DLManagedTensor(); + ret->dl_tensor = data_->dl_tensor; + ret->manager_ctx = const_cast(this); + data_->IncRef(); + ret->deleter = NDArrayDLPackDeleter; + return ret; +} + +NDArray NDArray::empty(std::vector shape, + DLDataType dtype, + DLContext ctx) { + NDArray ret = Internal::Create(shape, dtype, ctx); + // setup memory content + size_t size = GetDataSize(ret.data_->dl_tensor); + size_t alignment = GetDataAlignment(ret.data_->dl_tensor); + ret.data_->dl_tensor.data = + DeviceAPI::Get(ret->ctx)->AllocDataSpace( + ret->ctx, size, alignment, ret->dtype); + return ret; +} + +NDArray NDArray::from_dlpack(DLManagedTensor* tensor) { + NDArray::Container* data = new NDArray::Container(); + data->deleter = Internal::DLPackDeleter; + data->manager_ctx = tensor; + data->dl_tensor = tensor->dl_tensor; + return NDArray(data); +} + +void NDArray::CopyFromTo(DLTensor* from, + DLTensor* to, + TVMStreamHandle stream) { + size_t from_size = GetDataSize(*from); + size_t to_size = GetDataSize(*to); + CHECK_EQ(from_size, to_size) + << "TVMArrayCopyFromTo: The size must exactly match"; + + CHECK(from->ctx.device_type == to->ctx.device_type + || from->ctx.device_type == kDLCPU + || to->ctx.device_type == kDLCPU) + << "Can not copy across different ctx types directly"; + + // Use the context that is *not* a cpu context to get the correct device + // api manager. + TVMContext ctx = from->ctx.device_type != kDLCPU ? from->ctx : to->ctx; + + DeviceAPI::Get(ctx)->CopyDataFromTo( + from->data, static_cast(from->byte_offset), + to->data, static_cast(to->byte_offset), + from_size, from->ctx, to->ctx, from->dtype, stream); +} + +} // namespace runtime +} // namespace tvm + +using namespace tvm::runtime; + +void NDArrayDLPackDeleter(DLManagedTensor* tensor) { + static_cast(tensor->manager_ctx)->DecRef(); + delete tensor; +} + +int TVMArrayAlloc(const tvm_index_t* shape, + int ndim, + int dtype_code, + int dtype_bits, + int dtype_lanes, + int device_type, + int device_id, + TVMArrayHandle* out) { + API_BEGIN(); + DLDataType dtype; + dtype.code = static_cast(dtype_code); + dtype.bits = static_cast(dtype_bits); + dtype.lanes = static_cast(dtype_lanes); + DLContext ctx; + ctx.device_type = static_cast(device_type); + ctx.device_id = device_id; + *out = NDArray::Internal::MoveAsDLTensor( + NDArray::empty(std::vector(shape, shape + ndim), dtype, ctx)); + API_END(); +} + +int TVMArrayFree(TVMArrayHandle handle) { + API_BEGIN(); + reinterpret_cast(handle)->DecRef(); + API_END(); +} + +int TVMArrayCopyFromTo(TVMArrayHandle from, + TVMArrayHandle to, + TVMStreamHandle stream) { + API_BEGIN(); + NDArray::CopyFromTo(from, to, stream); + API_END(); +} + +int TVMArrayCopyFromBytes(TVMArrayHandle handle, + void* data, + size_t nbytes) { + API_BEGIN(); + TVMContext cpu_ctx; + cpu_ctx.device_type = kDLCPU; + cpu_ctx.device_id = 0; + size_t arr_size = GetDataSize(*handle); + CHECK_EQ(arr_size, nbytes) + << "TVMArrayCopyFromBytes: size mismatch"; + DeviceAPI::Get(handle->ctx)->CopyDataFromTo( + data, 0, + handle->data, static_cast(handle->byte_offset), + nbytes, cpu_ctx, handle->ctx, handle->dtype, nullptr); + API_END(); +} + +int TVMArrayCopyToBytes(TVMArrayHandle handle, + void* data, + size_t nbytes) { + API_BEGIN(); + TVMContext cpu_ctx; + cpu_ctx.device_type = kDLCPU; + cpu_ctx.device_id = 0; + size_t arr_size = GetDataSize(*handle); + CHECK_EQ(arr_size, nbytes) + << "TVMArrayCopyToBytes: size mismatch"; + DeviceAPI::Get(handle->ctx)->CopyDataFromTo( + handle->data, static_cast(handle->byte_offset), + data, 0, + nbytes, handle->ctx, cpu_ctx, handle->dtype, nullptr); + API_END(); +} diff --git a/src/runtime/rpc/rpc_session.cc b/src/runtime/rpc/rpc_session.cc index 2f181e7edf9a..21fff7b29882 100644 --- a/src/runtime/rpc/rpc_session.cc +++ b/src/runtime/rpc/rpc_session.cc @@ -175,7 +175,12 @@ class RPCSession::EventHandler : public dmlc::Stream { // send Packed sequence to writer. void SendPackedSeq(const TVMValue* arg_values, const int* type_codes, int n) { this->Write(n); - this->WriteArray(type_codes, n); + // only handles . + for (int i = 0; i < n; ++i) { + int tcode = type_codes[i]; + if (tcode == kNDArrayContainer) tcode = kArrayHandle; + this->Write(tcode); + } // Argument packing. for (int i = 0; i < n; ++i) { int tcode = type_codes[i]; @@ -207,6 +212,7 @@ class RPCSession::EventHandler : public dmlc::Stream { this->Write(handle); break; } + case kNDArrayContainer: case kArrayHandle: { DLTensor* arr = static_cast(value.v_handle); TVMContext ctx = StripSessMask(arr->ctx); diff --git a/tests/cpp/packed_func_test.cc b/tests/cpp/packed_func_test.cc index 00e428f258a9..14f8a8cd1acf 100644 --- a/tests/cpp/packed_func_test.cc +++ b/tests/cpp/packed_func_test.cc @@ -38,6 +38,31 @@ TEST(PackedFunc, Node) { CHECK(t.same_as(x)); } +TEST(PackedFunc, NDArray) { + using namespace tvm; + using namespace tvm::runtime; + auto x = NDArray::empty( + {}, String2TVMType("float32"), + TVMContext{kDLCPU, 0}); + reinterpret_cast(x->data)[0] = 10.0f; + CHECK(x.use_count() == 1); + + PackedFunc forward([&](TVMArgs args, TVMRetValue* rv) { + *rv = args[0]; + }); + + NDArray ret = PackedFunc([&](TVMArgs args, TVMRetValue* rv) { + NDArray y = args[0]; + DLTensor* ptr = args[0]; + CHECK(ptr == x.operator->()); + CHECK(x.same_as(y)); + CHECK(x.use_count() == 2); + *rv = forward(y); + })(x); + CHECK(ret.use_count() == 2); + CHECK(ret.same_as(x)); +} + TEST(PackedFunc, str) { using namespace tvm; using namespace tvm::runtime; From 74bd284f88d65a3901bc6c05578e31bbddb38d6b Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 11 Jul 2018 17:22:09 -0700 Subject: [PATCH 2/2] Address review comment --- include/tvm/runtime/ndarray.h | 66 ++++++++++++++++++++++------------- src/runtime/ndarray.cc | 24 +++++++------ tests/cpp/packed_func_test.cc | 2 +- 3 files changed, 57 insertions(+), 35 deletions(-) diff --git a/include/tvm/runtime/ndarray.h b/include/tvm/runtime/ndarray.h index 708a0519b153..dfb06255381a 100644 --- a/include/tvm/runtime/ndarray.h +++ b/include/tvm/runtime/ndarray.h @@ -82,44 +82,43 @@ class NDArray { } /*! \brief reset the content of NDArray to be nullptr */ inline void reset(); + /*! + * \return the reference counter + * \note this number is approximate in multi-threaded setting. + */ + inline int use_count() const; + /*! \return Pointer to content of DLTensor */ + inline const DLTensor* operator->() const; /*! * \brief Copy data content from another array. * \param other The source array to be copied from. * \note The copy may happen asynchrously if it involves a GPU context. * TVMSynchronize is necessary. */ - inline void copyfrom(DLTensor* other); - inline void copyfrom(const NDArray& other); + inline void CopyFrom(DLTensor* other); + inline void CopyFrom(const NDArray& other); /*! * \brief Copy data content into another array. * \param other The source array to be copied from. * \note The copy may happen asynchrously if it involves a GPU context. * TVMSynchronize is necessary. */ - inline void copyto(DLTensor* other); - inline void copyto(const NDArray& other); - /*! - * \return the reference counter - * \note this number is approximate in multi-threaded setting. - */ - inline int use_count() const; - /*! \return Pointer to content of DLTensor */ - inline const DLTensor* operator->() const; - + inline void CopyTo(DLTensor* other); + inline void CopyTo(const NDArray& other); /*! * \brief Create a NDArray that shares the data memory with the current one. * \param shape The shape of the new array. * \param dtype The data type of the new array. * \note The memory size of new array must be smaller than the current one. */ - TVM_DLL NDArray create_view( + TVM_DLL NDArray CreateView( std::vector shape, DLDataType dtype); /*! * \brief Create a reference view of NDArray that * represents as DLManagedTensor. * \return A DLManagedTensor */ - TVM_DLL DLManagedTensor* to_dlpack() const; + TVM_DLL DLManagedTensor* ToDLPack() const; /*! * \brief Create an empty NDArray. * \param shape The shape of the new array. @@ -127,14 +126,21 @@ class NDArray { * \param ctx The context of the Array. * \return The created Array */ - TVM_DLL static NDArray empty(std::vector shape, + TVM_DLL static NDArray Empty(std::vector shape, DLDataType dtype, DLContext ctx); /*! - * \brief Create a NDArray backed by dlpack tensor. + * \brief Create a NDArray backed by a dlpack tensor. + * + * This allows us to create a NDArray using the memory + * allocated by an external deep learning framework + * that is DLPack compatible. + * + * The memory is retained until the NDArray went out of scope. + * * \return The created NDArray view. */ - TVM_DLL static NDArray from_dlpack(DLManagedTensor* tensor); + TVM_DLL static NDArray FromDLPack(DLManagedTensor* tensor); /*! * \brief Function to copy data from one array to another. * \param from The source array. @@ -176,9 +182,20 @@ struct NDArray::Container { * The head ptr of this struct can be viewed as DLTensor*. */ DLTensor dl_tensor; - /*! \brief addtional context, reserved for recycling */ + /*! + * \brief addtional context, reserved for recycling + * \note We can attach additional content here + * which the current container depend on + * (e.g. reference to original memory when creating views). + */ void* manager_ctx{nullptr}; - /*! \brief Customized deleter */ + /*! + * \brief Customized deleter + * + * \note The customized deleter is helpful to enable + * different ways of memory allocator that are not + * currently defined by the system. + */ void (*deleter)(Container* self) = nullptr; /*! \brief default constructor */ Container() { @@ -213,7 +230,8 @@ struct NDArray::Container { std::atomic ref_counter_{0}; }; -// implementations +// implementations of inline functions +// the usages of functions are documented in place. inline NDArray::NDArray(Container* data) : data_(data) { data_->IncRef(); @@ -231,23 +249,23 @@ inline void NDArray::reset() { } } -inline void NDArray::copyfrom(DLTensor* other) { +inline void NDArray::CopyFrom(DLTensor* other) { CHECK(data_ != nullptr); CopyFromTo(other, &(data_->dl_tensor)); } -inline void NDArray::copyfrom(const NDArray& other) { +inline void NDArray::CopyFrom(const NDArray& other) { CHECK(data_ != nullptr); CHECK(other.data_ != nullptr); CopyFromTo(&(other.data_->dl_tensor), &(data_->dl_tensor)); } -inline void NDArray::copyto(DLTensor* other) { +inline void NDArray::CopyTo(DLTensor* other) { CHECK(data_ != nullptr); CopyFromTo(&(data_->dl_tensor), other); } -inline void NDArray::copyto(const NDArray& other) { +inline void NDArray::CopyTo(const NDArray& other) { CHECK(data_ != nullptr); CHECK(other.data_ != nullptr); CopyFromTo(&(data_->dl_tensor), &(other.data_->dl_tensor)); diff --git a/src/runtime/ndarray.cc b/src/runtime/ndarray.cc index 6f10d0feea9f..f862f32f6e99 100644 --- a/src/runtime/ndarray.cc +++ b/src/runtime/ndarray.cc @@ -53,6 +53,10 @@ struct NDArray::Internal { delete ptr; } // Deleter for NDArray converted from DLPack + // This is used from data which is passed from external DLPack(DLManagedTensor) + // that are not allocated inside of TVM. + // This enables us to create NDArray from memory allocated by other + // frameworks that are DLPack compatible static void DLPackDeleter(NDArray::Container* ptr) { DLManagedTensor* tensor = static_cast(ptr->manager_ctx); if (tensor->deleter != nullptr) { @@ -60,8 +64,8 @@ struct NDArray::Internal { } delete ptr; } - // local create function that - // allocates everything except for the space + // Local create function which allocates tensor metadata + // but does not allocate space for the data. static NDArray Create(std::vector shape, DLDataType dtype, DLContext ctx) { @@ -91,8 +95,8 @@ struct NDArray::Internal { } }; -NDArray NDArray::create_view(std::vector shape, - DLDataType dtype) { +NDArray NDArray::CreateView(std::vector shape, + DLDataType dtype) { CHECK(data_ != nullptr); CHECK(data_->dl_tensor.strides == nullptr) << "Can only create view for compact tensor"; @@ -110,7 +114,7 @@ NDArray NDArray::create_view(std::vector shape, return ret; } -DLManagedTensor* NDArray::to_dlpack() const { +DLManagedTensor* NDArray::ToDLPack() const { CHECK(data_ != nullptr); DLManagedTensor* ret = new DLManagedTensor(); ret->dl_tensor = data_->dl_tensor; @@ -120,9 +124,9 @@ DLManagedTensor* NDArray::to_dlpack() const { return ret; } -NDArray NDArray::empty(std::vector shape, - DLDataType dtype, - DLContext ctx) { +NDArray NDArray::Empty(std::vector shape, + DLDataType dtype, + DLContext ctx) { NDArray ret = Internal::Create(shape, dtype, ctx); // setup memory content size_t size = GetDataSize(ret.data_->dl_tensor); @@ -133,7 +137,7 @@ NDArray NDArray::empty(std::vector shape, return ret; } -NDArray NDArray::from_dlpack(DLManagedTensor* tensor) { +NDArray NDArray::FromDLPack(DLManagedTensor* tensor) { NDArray::Container* data = new NDArray::Container(); data->deleter = Internal::DLPackDeleter; data->manager_ctx = tensor; @@ -191,7 +195,7 @@ int TVMArrayAlloc(const tvm_index_t* shape, ctx.device_type = static_cast(device_type); ctx.device_id = device_id; *out = NDArray::Internal::MoveAsDLTensor( - NDArray::empty(std::vector(shape, shape + ndim), dtype, ctx)); + NDArray::Empty(std::vector(shape, shape + ndim), dtype, ctx)); API_END(); } diff --git a/tests/cpp/packed_func_test.cc b/tests/cpp/packed_func_test.cc index 14f8a8cd1acf..9b2f1df73731 100644 --- a/tests/cpp/packed_func_test.cc +++ b/tests/cpp/packed_func_test.cc @@ -41,7 +41,7 @@ TEST(PackedFunc, Node) { TEST(PackedFunc, NDArray) { using namespace tvm; using namespace tvm::runtime; - auto x = NDArray::empty( + auto x = NDArray::Empty( {}, String2TVMType("float32"), TVMContext{kDLCPU, 0}); reinterpret_cast(x->data)[0] = 10.0f;