Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix TVMArray layout on device #5599

Merged
merged 4 commits into from May 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
31 changes: 16 additions & 15 deletions src/runtime/micro/micro_session.cc
Expand Up @@ -398,8 +398,8 @@ std::tuple<TargetPtr, TargetPtr> MicroSession::EncoderAppend(TargetDataLayoutEnc
const int* type_codes = args.type_codes;
int num_args = args.num_args;

auto tvm_vals_slot = encoder->Alloc<TVMValue>(num_args);
auto type_codes_slot = encoder->Alloc<const int>(num_args);
auto tvm_vals_alloc = encoder->Alloc<TVMValue>(num_args);
auto type_codes_alloc = encoder->Alloc<const int>(num_args);

for (int i = 0; i < num_args; i++) {
switch (type_codes[i]) {
Expand All @@ -425,7 +425,7 @@ std::tuple<TargetPtr, TargetPtr> MicroSession::EncoderAppend(TargetDataLayoutEnc

TVMValue val;
val.v_handle = arr_ptr;
tvm_vals_slot.WriteValue(val);
tvm_vals_alloc->WriteValue(val);
break;
}
// TODO(weberlo): Implement `double` and `int64` case.
Expand All @@ -437,25 +437,24 @@ std::tuple<TargetPtr, TargetPtr> MicroSession::EncoderAppend(TargetDataLayoutEnc
break;
}
}
type_codes_slot.WriteArray(type_codes, num_args);
return std::make_tuple(tvm_vals_slot.start_addr(), type_codes_slot.start_addr());
type_codes_alloc->WriteArray(type_codes, num_args);
encoder->CheckUnfilledAllocs();
return std::make_tuple(tvm_vals_alloc->start_addr(), type_codes_alloc->start_addr());
}

template <typename T>
TargetPtr MicroSession::EncoderAppend(TargetDataLayoutEncoder* encoder, const DLTensor& arr) {
auto tvm_arr_slot = encoder->Alloc<T>();
auto shape_slot = encoder->Alloc<int64_t>(arr.ndim);

// `shape` and `strides` are stored on the host, so we need to write them to
// the device first. The `data` field is already allocated on the device and
// is a device pointer, so we don't need to write it.
shape_slot.WriteArray(arr.shape, arr.ndim);
TargetPtr shape_dev_addr = shape_slot.start_addr();
auto shape_alloc = encoder->Alloc<int64_t>(arr.ndim);
shape_alloc->WriteArray(arr.shape, arr.ndim);
TargetPtr shape_dev_addr = shape_alloc->start_addr();
TargetPtr strides_dev_addr = TargetPtr(word_size_, nullptr);
if (arr.strides != nullptr) {
auto stride_slot = encoder->Alloc<int64_t>(arr.ndim);
stride_slot.WriteArray(arr.strides, arr.ndim);
strides_dev_addr = stride_slot.start_addr();
auto stride_alloc = encoder->Alloc<int64_t>(arr.ndim);
stride_alloc->WriteArray(arr.strides, arr.ndim);
strides_dev_addr = stride_alloc->start_addr();
}

T dev_arr(TargetVal{word_size_.bits(), reinterpret_cast<uint64_t>(arr.data)}, arr.ctx, arr.ndim,
Expand All @@ -466,8 +465,10 @@ TargetPtr MicroSession::EncoderAppend(TargetDataLayoutEncoder* encoder, const DL
// Update the device type to CPU, because from the microcontroller's
// perspective, it is.
dev_arr.ctx.device_type = DLDeviceType::kDLCPU;
tvm_arr_slot.WriteValue(dev_arr);
return tvm_arr_slot.start_addr();

auto tvm_arr_alloc = encoder->Alloc<T>();
tvm_arr_alloc->WriteValue(dev_arr);
return tvm_arr_alloc->start_addr();
}

// TODO(weberlo): switch over entirely to error codes that expand to error
Expand Down
26 changes: 7 additions & 19 deletions src/runtime/micro/micro_session.h
Expand Up @@ -315,16 +315,13 @@ struct MicroDevSpace {
struct TVMArray32 {
TVMArray32(TargetVal data, DLContext ctx, int32_t ndim, DLDataType dtype, TargetVal shape,
TargetVal strides, TargetVal byte_offset)
: data(data.uint32()),
ctx(ctx),
ndim(ndim),
pad0(0),
dtype(dtype),
shape(shape.uint32()),
strides(strides.uint32()),
pad1(0),
byte_offset(byte_offset.uint32()),
pad2(0) {}
: data{data.uint32()},
ctx{ctx},
ndim{ndim},
dtype{dtype},
shape{shape.uint32()},
strides{strides.uint32()},
byte_offset{byte_offset.uint32()} {}

/*!
* \brief The opaque data pointer points to the allocated data.
Expand All @@ -336,8 +333,6 @@ struct TVMArray32 {
DLContext ctx;
/*! \brief Number of dimensions */
int32_t ndim;
/*! \brief Padding to enforce struct alignment */
uint32_t pad0;
/*! \brief The data type of the pointer */
DLDataType dtype;
/*! \brief The shape of the tensor */
Expand All @@ -347,12 +342,8 @@ struct TVMArray32 {
* can be NULL, indicating tensor is compact.
*/
uint32_t strides;
/*! \brief Padding to enforce struct alignment */
uint32_t pad1;
/*! \brief The offset in bytes to the beginning pointer to data */
uint32_t byte_offset;
/*! \brief Padding to enforce struct alignment */
uint32_t pad2;
};

/*! \brief TVM array for serialization to 64-bit devices */
Expand All @@ -362,7 +353,6 @@ struct TVMArray64 {
: data(data.uint64()),
ctx(ctx),
ndim(ndim),
pad0(0),
dtype(dtype),
shape(shape.uint64()),
strides(strides.uint64()),
Expand All @@ -377,8 +367,6 @@ struct TVMArray64 {
DLContext ctx;
/*! \brief Number of dimensions */
int32_t ndim;
/*! \brief Padding to enforce struct alignment */
uint32_t pad0;
/*! \brief The data type of the pointer */
DLDataType dtype;
/*! \brief The shape of the tensor */
Expand Down
73 changes: 73 additions & 0 deletions src/runtime/micro/target_data_layout_encoder.cc
@@ -0,0 +1,73 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "target_data_layout_encoder.h"

namespace tvm {
namespace runtime {

TargetDataLayoutEncoder::Alloc::Alloc(TargetDataLayoutEncoder* parent, size_t start_offset,
size_t size, TargetPtr start_addr)
: parent_(parent),
start_offset_(start_offset),
curr_offset_(0),
size_(size),
start_addr_(start_addr) {
parent_->live_unchecked_allocs_.insert(this);
}

TargetDataLayoutEncoder::Alloc::~Alloc() {
auto it = parent_->live_unchecked_allocs_.find(this);
if (it != parent_->live_unchecked_allocs_.end()) {
// alloc was not already checked
parent_->live_unchecked_allocs_.erase(it);
if (curr_offset_ != size_) {
parent_->unchecked_alloc_start_offsets_.push_back(start_addr_.value().uint64());
}
}
}

void TargetDataLayoutEncoder::Alloc::CheckUnfilled() {
CHECK(curr_offset_ == size_) << "unwritten space in alloc 0x" << std::hex
<< start_addr_.value().uint64() << "; curr_offset=0x" << curr_offset_
<< ", size=0x" << size_;
}

TargetPtr TargetDataLayoutEncoder::Alloc::start_addr() { return start_addr_; }

size_t TargetDataLayoutEncoder::Alloc::size() { return size_; }

void TargetDataLayoutEncoder::CheckUnfilledAllocs() {
CHECK(live_unchecked_allocs_.size() > 0) << "No allocs to check";
if (unchecked_alloc_start_offsets_.size() > 0) {
LOG(ERROR) << "Unchecked allocs were found:";
for (size_t alloc_start_addr : unchecked_alloc_start_offsets_) {
LOG(ERROR) << " * 0x" << std::hex << alloc_start_addr;
}
CHECK(false) << "Unchecked allocs found during CheckUnfilledAllocs";
}

for (class Alloc* s : live_unchecked_allocs_) {
s->CheckUnfilled();
}
live_unchecked_allocs_.clear();
}

} // namespace runtime
} // namespace tvm
88 changes: 39 additions & 49 deletions src/runtime/micro/target_data_layout_encoder.h
Expand Up @@ -24,9 +24,12 @@
#ifndef TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_
#define TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_

#include <memory>
#include <set>
#include <vector>

#include "host_driven/utvm_runtime_enum.h"
#include "micro_common.h"

namespace tvm {
namespace runtime {
Expand All @@ -41,55 +44,60 @@ class TargetDataLayoutEncoder {
/*!
* \brief helper class for writing into `TargetDataLayoutEncoder`
*/
template <typename T>
class Slot {
class Alloc {
public:
/*!
* \brief constructor
* \param parent pointer to parent encoder
* \param start_offset start byte offset of the slot in the backing buffer
* \param size size (in bytes) of the memory region allocated for this slot
* \param start_addr start address of the slot in the device's memory
* \param start_offset start byte offset of the alloc in the backing buffer
* \param size size (in bytes) of the memory region allocated for this alloc
* \param start_addr start address of the alloc in the device's memory
*/
Slot(TargetDataLayoutEncoder* parent, size_t start_offset, size_t size, TargetPtr start_addr);
Alloc(TargetDataLayoutEncoder* parent, size_t start_offset, size_t size, TargetPtr start_addr);

~Slot();
~Alloc();

/*!
* \brief writes `sizeof(T) * num_elems` bytes of data from `arr`
* \param arr array to be read from
* \param num_elems number of elements in array
*/
template <typename T>
void WriteArray(const T* arr, size_t num_elems);

/*!
* \brief writes `val`
* \param val value to be written
*/
template <typename T>
void WriteValue(const T& val);

/*!
* \brief returns start address of the slot in device memory
* \brief returns start address of the alloc in device memory
* \return device start address
*/
TargetPtr start_addr();

/*!
* \brief returns number of bytes allocated for this slot
* \return size of this slot
* \brief returns number of bytes allocated for this alloc
* \return size of this alloc
*/
size_t size();

size_t curr_offset() const { return curr_offset_; }

void CheckUnfilled();

private:
/*! \brief pointer to parent encoder */
TargetDataLayoutEncoder* parent_;
/*! \brief start offset of the slot in the parent's backing parent_buffer */
/*! \brief start offset of the alloc in the parent's backing parent_buffer */
size_t start_offset_;
/*! \brief current offset relative to the start offset of this slot */
/*! \brief current offset relative to the start offset of this alloc */
size_t curr_offset_;
/*! \brief size (in bytes) of the memory region allocated for this slot */
/*! \brief size (in bytes) of the memory region allocated for this alloc */
size_t size_;
/*! \brief start address of the slot in the device's memory */
/*! \brief start address of the alloc in the device's memory */
TargetPtr start_addr_;
};

Expand All @@ -105,21 +113,23 @@ class TargetDataLayoutEncoder {
word_size_(word_size) {}

/*!
* \brief allocates a slot for `sizeof(T) * num_elems` bytes of data
* \brief allocates a alloc for `sizeof(T) * num_elems` bytes of data
* \param num_elems number of elements of type `T` being allocated (defaults to 1)
* \return slot of size `sizeof(T) * num_elems` bytes
* \return alloc of size `sizeof(T) * num_elems` bytes
*/
template <typename T>
Slot<T> Alloc(size_t num_elems = 1) {
std::unique_ptr<class Alloc> Alloc(size_t num_elems = 1) {
curr_offset_ = UpperAlignValue(curr_offset_, word_size_.bytes());
size_t size = sizeof(T) * num_elems;
if (curr_offset_ + size > buf_.size()) {
buf_.resize(curr_offset_ + size);
}
CHECK(buf_.size() < capacity_) << "out of space in data encoder";
size_t slot_start_offset = curr_offset_;
size_t alloc_start_offset = curr_offset_;
curr_offset_ += size;
return Slot<T>(this, slot_start_offset, size, start_addr() + slot_start_offset);
class Alloc* alloc =
new class Alloc(this, alloc_start_offset, size, start_addr() + alloc_start_offset);
return std::unique_ptr<class Alloc>(alloc);
}

void Clear() {
Expand Down Expand Up @@ -150,6 +160,8 @@ class TargetDataLayoutEncoder {
TargetPtr(word_size_, UpperAlignValue(start_addr.value().uint64(), word_size_.bytes()));
}

void CheckUnfilledAllocs();

private:
/*! \brief in-memory backing buffer */
std::vector<uint8_t> buf_;
Expand All @@ -161,50 +173,28 @@ class TargetDataLayoutEncoder {
size_t capacity_;
/*! \brief number of bytes in a word on the target device */
TargetWordSize word_size_;
/*! \brief Alloc instances allocated now but not yet checked by CheckUnfilledAllocs */
std::set<class Alloc*> live_unchecked_allocs_;
/*! \brief start offsets Alloc instances that were dealloated before CheckUnfilledAllocs ran */
std::vector<size_t> unchecked_alloc_start_offsets_;
friend Alloc::~Alloc();
};

template <typename T>
TargetDataLayoutEncoder::Slot<T>::Slot(TargetDataLayoutEncoder* parent, size_t start_offset,
size_t size, TargetPtr start_addr)
: parent_(parent),
start_offset_(start_offset),
curr_offset_(0),
size_(size),
start_addr_(start_addr) {}

template <typename T>
TargetDataLayoutEncoder::Slot<T>::~Slot() {
// TODO(weberlo, areusch): this can mask the exception thrown by slot allocation... even though
// that doesn't make sense.
CHECK(curr_offset_ == size_) << "unwritten space in slot; curr_offset=" << curr_offset_
<< ", size=" << size_;
}

template <typename T>
void TargetDataLayoutEncoder::Slot<T>::WriteArray(const T* arr, size_t num_elems) {
void TargetDataLayoutEncoder::Alloc::WriteArray(const T* arr, size_t num_elems) {
if (num_elems == 0) return;
size_t size = sizeof(T) * num_elems;
CHECK(curr_offset_ + size <= size_) << "not enough space in slot";
CHECK(curr_offset_ + size <= size_) << "not enough space in alloc";
uint8_t* curr_ptr = &(parent_->data())[start_offset_ + curr_offset_];
std::memcpy(curr_ptr, arr, size);
curr_offset_ += size;
}

template <typename T>
void TargetDataLayoutEncoder::Slot<T>::WriteValue(const T& val) {
void TargetDataLayoutEncoder::Alloc::WriteValue(const T& val) {
WriteArray(&val, 1);
}

template <typename T>
TargetPtr TargetDataLayoutEncoder::Slot<T>::start_addr() {
return start_addr_;
}

template <typename T>
size_t TargetDataLayoutEncoder::Slot<T>::size() {
return size_;
}

} // namespace runtime
} // namespace tvm
#endif // TVM_RUNTIME_MICRO_TARGET_DATA_LAYOUT_ENCODER_H_