diff --git a/src/runtime/micro/micro_session.cc b/src/runtime/micro/micro_session.cc index a9efa0f56771..cdf373b2654d 100644 --- a/src/runtime/micro/micro_session.cc +++ b/src/runtime/micro/micro_session.cc @@ -398,8 +398,8 @@ std::tuple MicroSession::EncoderAppend(TargetDataLayoutEnc const int* type_codes = args.type_codes; int num_args = args.num_args; - auto tvm_vals_slot = encoder->Alloc(num_args); - auto type_codes_slot = encoder->Alloc(num_args); + auto tvm_vals_alloc = encoder->Alloc(num_args); + auto type_codes_alloc = encoder->Alloc(num_args); for (int i = 0; i < num_args; i++) { switch (type_codes[i]) { @@ -425,7 +425,7 @@ std::tuple MicroSession::EncoderAppend(TargetDataLayoutEnc TVMValue val; val.v_handle = arr_ptr; - tvm_vals_slot.WriteValue(val); + tvm_vals_alloc->WriteValue(val); break; } // TODO(weberlo): Implement `double` and `int64` case. @@ -437,25 +437,24 @@ std::tuple MicroSession::EncoderAppend(TargetDataLayoutEnc break; } } - type_codes_slot.WriteArray(type_codes, num_args); - return std::make_tuple(tvm_vals_slot.start_addr(), type_codes_slot.start_addr()); + type_codes_alloc->WriteArray(type_codes, num_args); + encoder->CheckUnfilledAllocs(); + return std::make_tuple(tvm_vals_alloc->start_addr(), type_codes_alloc->start_addr()); } template TargetPtr MicroSession::EncoderAppend(TargetDataLayoutEncoder* encoder, const DLTensor& arr) { - auto tvm_arr_slot = encoder->Alloc(); - auto shape_slot = encoder->Alloc(arr.ndim); - // `shape` and `strides` are stored on the host, so we need to write them to // the device first. The `data` field is already allocated on the device and // is a device pointer, so we don't need to write it. - shape_slot.WriteArray(arr.shape, arr.ndim); - TargetPtr shape_dev_addr = shape_slot.start_addr(); + auto shape_alloc = encoder->Alloc(arr.ndim); + shape_alloc->WriteArray(arr.shape, arr.ndim); + TargetPtr shape_dev_addr = shape_alloc->start_addr(); TargetPtr strides_dev_addr = TargetPtr(word_size_, nullptr); if (arr.strides != nullptr) { - auto stride_slot = encoder->Alloc(arr.ndim); - stride_slot.WriteArray(arr.strides, arr.ndim); - strides_dev_addr = stride_slot.start_addr(); + auto stride_alloc = encoder->Alloc(arr.ndim); + stride_alloc->WriteArray(arr.strides, arr.ndim); + strides_dev_addr = stride_alloc->start_addr(); } T dev_arr(TargetVal{word_size_.bits(), reinterpret_cast(arr.data)}, arr.ctx, arr.ndim, @@ -466,8 +465,10 @@ TargetPtr MicroSession::EncoderAppend(TargetDataLayoutEncoder* encoder, const DL // Update the device type to CPU, because from the microcontroller's // perspective, it is. dev_arr.ctx.device_type = DLDeviceType::kDLCPU; - tvm_arr_slot.WriteValue(dev_arr); - return tvm_arr_slot.start_addr(); + + auto tvm_arr_alloc = encoder->Alloc(); + tvm_arr_alloc->WriteValue(dev_arr); + return tvm_arr_alloc->start_addr(); } // TODO(weberlo): switch over entirely to error codes that expand to error diff --git a/src/runtime/micro/micro_session.h b/src/runtime/micro/micro_session.h index ab3afcc5bce8..0b65317d82f1 100644 --- a/src/runtime/micro/micro_session.h +++ b/src/runtime/micro/micro_session.h @@ -315,16 +315,13 @@ struct MicroDevSpace { struct TVMArray32 { TVMArray32(TargetVal data, DLContext ctx, int32_t ndim, DLDataType dtype, TargetVal shape, TargetVal strides, TargetVal byte_offset) - : data(data.uint32()), - ctx(ctx), - ndim(ndim), - pad0(0), - dtype(dtype), - shape(shape.uint32()), - strides(strides.uint32()), - pad1(0), - byte_offset(byte_offset.uint32()), - pad2(0) {} + : data{data.uint32()}, + ctx{ctx}, + ndim{ndim}, + dtype{dtype}, + shape{shape.uint32()}, + strides{strides.uint32()}, + byte_offset{byte_offset.uint32()} {} /*! * \brief The opaque data pointer points to the allocated data. @@ -336,8 +333,6 @@ struct TVMArray32 { DLContext ctx; /*! \brief Number of dimensions */ int32_t ndim; - /*! \brief Padding to enforce struct alignment */ - uint32_t pad0; /*! \brief The data type of the pointer */ DLDataType dtype; /*! \brief The shape of the tensor */ @@ -347,12 +342,8 @@ struct TVMArray32 { * can be NULL, indicating tensor is compact. */ uint32_t strides; - /*! \brief Padding to enforce struct alignment */ - uint32_t pad1; /*! \brief The offset in bytes to the beginning pointer to data */ uint32_t byte_offset; - /*! \brief Padding to enforce struct alignment */ - uint32_t pad2; }; /*! \brief TVM array for serialization to 64-bit devices */ @@ -362,7 +353,6 @@ struct TVMArray64 { : data(data.uint64()), ctx(ctx), ndim(ndim), - pad0(0), dtype(dtype), shape(shape.uint64()), strides(strides.uint64()), @@ -377,8 +367,6 @@ struct TVMArray64 { DLContext ctx; /*! \brief Number of dimensions */ int32_t ndim; - /*! \brief Padding to enforce struct alignment */ - uint32_t pad0; /*! \brief The data type of the pointer */ DLDataType dtype; /*! \brief The shape of the tensor */ diff --git a/src/runtime/micro/target_data_layout_encoder.cc b/src/runtime/micro/target_data_layout_encoder.cc new file mode 100644 index 000000000000..4a87a8f35721 --- /dev/null +++ b/src/runtime/micro/target_data_layout_encoder.cc @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "target_data_layout_encoder.h" + +namespace tvm { +namespace runtime { + +TargetDataLayoutEncoder::Alloc::Alloc(TargetDataLayoutEncoder* parent, size_t start_offset, + size_t size, TargetPtr start_addr) + : parent_(parent), + start_offset_(start_offset), + curr_offset_(0), + size_(size), + start_addr_(start_addr) { + parent_->live_unchecked_allocs_.insert(this); +} + +TargetDataLayoutEncoder::Alloc::~Alloc() { + auto it = parent_->live_unchecked_allocs_.find(this); + if (it != parent_->live_unchecked_allocs_.end()) { + // alloc was not already checked + parent_->live_unchecked_allocs_.erase(it); + if (curr_offset_ != size_) { + parent_->unchecked_alloc_start_offsets_.push_back(start_addr_.value().uint64()); + } + } +} + +void TargetDataLayoutEncoder::Alloc::CheckUnfilled() { + CHECK(curr_offset_ == size_) << "unwritten space in alloc 0x" << std::hex + << start_addr_.value().uint64() << "; curr_offset=0x" << curr_offset_ + << ", size=0x" << size_; +} + +TargetPtr TargetDataLayoutEncoder::Alloc::start_addr() { return start_addr_; } + +size_t TargetDataLayoutEncoder::Alloc::size() { return size_; } + +void TargetDataLayoutEncoder::CheckUnfilledAllocs() { + CHECK(live_unchecked_allocs_.size() > 0) << "No allocs to check"; + if (unchecked_alloc_start_offsets_.size() > 0) { + LOG(ERROR) << "Unchecked allocs were found:"; + for (size_t alloc_start_addr : unchecked_alloc_start_offsets_) { + LOG(ERROR) << " * 0x" << std::hex << alloc_start_addr; + } + CHECK(false) << "Unchecked allocs found during CheckUnfilledAllocs"; + } + + for (class Alloc* s : live_unchecked_allocs_) { + s->CheckUnfilled(); + } + live_unchecked_allocs_.clear(); +} + +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/micro/target_data_layout_encoder.h b/src/runtime/micro/target_data_layout_encoder.h index 0744dd1755cf..81587755e3b3 100644 --- a/src/runtime/micro/target_data_layout_encoder.h +++ b/src/runtime/micro/target_data_layout_encoder.h @@ -24,9 +24,12 @@ #ifndef TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_ #define TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_ +#include +#include #include #include "host_driven/utvm_runtime_enum.h" +#include "micro_common.h" namespace tvm { namespace runtime { @@ -41,55 +44,60 @@ class TargetDataLayoutEncoder { /*! * \brief helper class for writing into `TargetDataLayoutEncoder` */ - template - class Slot { + class Alloc { public: /*! * \brief constructor * \param parent pointer to parent encoder - * \param start_offset start byte offset of the slot in the backing buffer - * \param size size (in bytes) of the memory region allocated for this slot - * \param start_addr start address of the slot in the device's memory + * \param start_offset start byte offset of the alloc in the backing buffer + * \param size size (in bytes) of the memory region allocated for this alloc + * \param start_addr start address of the alloc in the device's memory */ - Slot(TargetDataLayoutEncoder* parent, size_t start_offset, size_t size, TargetPtr start_addr); + Alloc(TargetDataLayoutEncoder* parent, size_t start_offset, size_t size, TargetPtr start_addr); - ~Slot(); + ~Alloc(); /*! * \brief writes `sizeof(T) * num_elems` bytes of data from `arr` * \param arr array to be read from * \param num_elems number of elements in array */ + template void WriteArray(const T* arr, size_t num_elems); /*! * \brief writes `val` * \param val value to be written */ + template void WriteValue(const T& val); /*! - * \brief returns start address of the slot in device memory + * \brief returns start address of the alloc in device memory * \return device start address */ TargetPtr start_addr(); /*! - * \brief returns number of bytes allocated for this slot - * \return size of this slot + * \brief returns number of bytes allocated for this alloc + * \return size of this alloc */ size_t size(); + size_t curr_offset() const { return curr_offset_; } + + void CheckUnfilled(); + private: /*! \brief pointer to parent encoder */ TargetDataLayoutEncoder* parent_; - /*! \brief start offset of the slot in the parent's backing parent_buffer */ + /*! \brief start offset of the alloc in the parent's backing parent_buffer */ size_t start_offset_; - /*! \brief current offset relative to the start offset of this slot */ + /*! \brief current offset relative to the start offset of this alloc */ size_t curr_offset_; - /*! \brief size (in bytes) of the memory region allocated for this slot */ + /*! \brief size (in bytes) of the memory region allocated for this alloc */ size_t size_; - /*! \brief start address of the slot in the device's memory */ + /*! \brief start address of the alloc in the device's memory */ TargetPtr start_addr_; }; @@ -105,21 +113,23 @@ class TargetDataLayoutEncoder { word_size_(word_size) {} /*! - * \brief allocates a slot for `sizeof(T) * num_elems` bytes of data + * \brief allocates a alloc for `sizeof(T) * num_elems` bytes of data * \param num_elems number of elements of type `T` being allocated (defaults to 1) - * \return slot of size `sizeof(T) * num_elems` bytes + * \return alloc of size `sizeof(T) * num_elems` bytes */ template - Slot Alloc(size_t num_elems = 1) { + std::unique_ptr Alloc(size_t num_elems = 1) { curr_offset_ = UpperAlignValue(curr_offset_, word_size_.bytes()); size_t size = sizeof(T) * num_elems; if (curr_offset_ + size > buf_.size()) { buf_.resize(curr_offset_ + size); } CHECK(buf_.size() < capacity_) << "out of space in data encoder"; - size_t slot_start_offset = curr_offset_; + size_t alloc_start_offset = curr_offset_; curr_offset_ += size; - return Slot(this, slot_start_offset, size, start_addr() + slot_start_offset); + class Alloc* alloc = + new class Alloc(this, alloc_start_offset, size, start_addr() + alloc_start_offset); + return std::unique_ptr(alloc); } void Clear() { @@ -150,6 +160,8 @@ class TargetDataLayoutEncoder { TargetPtr(word_size_, UpperAlignValue(start_addr.value().uint64(), word_size_.bytes())); } + void CheckUnfilledAllocs(); + private: /*! \brief in-memory backing buffer */ std::vector buf_; @@ -161,50 +173,28 @@ class TargetDataLayoutEncoder { size_t capacity_; /*! \brief number of bytes in a word on the target device */ TargetWordSize word_size_; + /*! \brief Alloc instances allocated now but not yet checked by CheckUnfilledAllocs */ + std::set live_unchecked_allocs_; + /*! \brief start offsets Alloc instances that were dealloated before CheckUnfilledAllocs ran */ + std::vector unchecked_alloc_start_offsets_; + friend Alloc::~Alloc(); }; template -TargetDataLayoutEncoder::Slot::Slot(TargetDataLayoutEncoder* parent, size_t start_offset, - size_t size, TargetPtr start_addr) - : parent_(parent), - start_offset_(start_offset), - curr_offset_(0), - size_(size), - start_addr_(start_addr) {} - -template -TargetDataLayoutEncoder::Slot::~Slot() { - // TODO(weberlo, areusch): this can mask the exception thrown by slot allocation... even though - // that doesn't make sense. - CHECK(curr_offset_ == size_) << "unwritten space in slot; curr_offset=" << curr_offset_ - << ", size=" << size_; -} - -template -void TargetDataLayoutEncoder::Slot::WriteArray(const T* arr, size_t num_elems) { +void TargetDataLayoutEncoder::Alloc::WriteArray(const T* arr, size_t num_elems) { if (num_elems == 0) return; size_t size = sizeof(T) * num_elems; - CHECK(curr_offset_ + size <= size_) << "not enough space in slot"; + CHECK(curr_offset_ + size <= size_) << "not enough space in alloc"; uint8_t* curr_ptr = &(parent_->data())[start_offset_ + curr_offset_]; std::memcpy(curr_ptr, arr, size); curr_offset_ += size; } template -void TargetDataLayoutEncoder::Slot::WriteValue(const T& val) { +void TargetDataLayoutEncoder::Alloc::WriteValue(const T& val) { WriteArray(&val, 1); } -template -TargetPtr TargetDataLayoutEncoder::Slot::start_addr() { - return start_addr_; -} - -template -size_t TargetDataLayoutEncoder::Slot::size() { - return size_; -} - } // namespace runtime } // namespace tvm #endif // TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_