Skip to content

Commit

Permalink
Fix sending more tensors from C++ API
Browse files Browse the repository at this point in the history
Fix bug in xmodel worker if tensors > batch size
  • Loading branch information
varunsh-xilinx committed Feb 9, 2022
1 parent 9e2a0e7 commit d1ad81d
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 62 deletions.
8 changes: 4 additions & 4 deletions examples/cpp/custom_processing.cpp
Expand Up @@ -170,7 +170,7 @@ int main() {
std::string root = root_env;

// +user variables: update as needed!
const auto batch_size = 4;
const auto request_num = 8;
const auto* path_to_xmodel =
"${AKS_XMODEL_ROOT}/artifacts/u200_u250/resnet_v1_50_tf/"
"resnet_v1_50_tf.xmodel";
Expand All @@ -192,9 +192,9 @@ int main() {

// +prepare images:
std::vector<std::string> paths;
paths.reserve(batch_size);
paths.reserve(request_num);

for (auto i = 0; i < batch_size; i++) {
for (auto i = 0; i < request_num; i++) {
paths.emplace_back(path_to_image);
}

Expand All @@ -206,7 +206,7 @@ int main() {
std::queue<proteus::InferenceResponseFuture> queue;

proteus::InferenceRequest request;
for (auto i = 0; i < batch_size; i++) {
for (auto i = 0; i < request_num; i++) {
request.addInputTensor(static_cast<void*>(images[i].data()), shape,
proteus::types::DataType::INT8);
}
Expand Down
2 changes: 1 addition & 1 deletion src/proteus/batching/batcher.cpp
Expand Up @@ -65,7 +65,7 @@ CppNativeApi::CppNativeApi(InferenceRequest request)
this->promise_ = std::make_unique<std::promise<proteus::InferenceResponse>>();
}

size_t CppNativeApi::getInputSize() { return 1; }
size_t CppNativeApi::getInputSize() { return this->request_.getInputSize(); }

std::promise<proteus::InferenceResponse> *CppNativeApi::getPromise() {
return this->promise_.get();
Expand Down
59 changes: 8 additions & 51 deletions src/proteus/core/predict_api_internal.cpp
Expand Up @@ -267,7 +267,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromInput(
auto buffers = input_buffers[buffer_index];
for (size_t i = 0; i < buffers.size(); i++) {
auto &buffer = buffers[i];
auto &offset = input_offsets[i];
auto &offset = input_offsets[buffer_index];

request->inputs_.push_back(std::move(
InferenceRequestInputBuilder::fromInput(input, buffer, offset)));
Expand All @@ -280,7 +280,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromInput(
if (batch_offset == batch_size) {
batch_offset = 0;
buffer_index++;
std::fill(input_offsets.begin(), input_offsets.end(), 0);
// std::fill(input_offsets.begin(), input_offsets.end(), 0);
}
}

Expand All @@ -294,7 +294,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromInput(
auto buffers = output_buffers[buffer_index];
for (size_t i = 0; i < buffers.size(); i++) {
auto &buffer = buffers[i];
auto &offset = output_offsets[i];
auto &offset = output_offsets[buffer_index];

request->outputs_.emplace_back(output);
request->outputs_.back().setData(
Expand All @@ -317,7 +317,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromInput(
auto buffers = output_buffers[buffer_index];
for (size_t j = 0; j < buffers.size(); j++) {
auto &buffer = buffers[j];
const auto &offset = output_offsets[j];
const auto &offset = output_offsets[buffer_index];

request->outputs_.emplace_back();
request->outputs_.back().setData(
Expand All @@ -335,49 +335,6 @@ InferenceRequestPtr InferenceRequestBuilder::fromInput(
}
}

// try {
// auto buffers = input_buffers[buffer_index];
// for (size_t i = 0; i < buffers.size(); i++) {
// auto &buffer = buffers[i];
// auto &offset = input_offsets[i];

// request->inputs_.push_back(std::move(InferenceRequestInputBuilder::fromInput(req,
// buffer, offset))); offset += request->inputs_.back().getSize();
// }
// } catch (const std::invalid_argument &e) {
// throw;
// }

// try {
// auto buffers = output_buffers[buffer_index];
// for (size_t i = 0; i < buffers.size(); i++) {
// auto &buffer = buffers[i];
// const auto &offset = output_offsets[i];

// request->outputs_.emplace_back();
// request->outputs_.back().setData(static_cast<std::byte
// *>(buffer->data()) +
// offset);
// // TODO(varunsh): output_offset is currently ignored! The size of the
// // output needs to come from the worker but we have no such
// information.
// }
// } catch (const std::invalid_argument &e) {
// throw;
// }

// batch_offset++;
// // FIXME(varunsh): this was intended to support multiple input tensors but
// it
// // creates a bug where the batch_offset gets reset to zero too early
// (void)batch_size;
// // if (batch_offset == batch_size) {
// // batch_offset = 0;
// // buffer_index++;
// // std::fill(input_offsets.begin(), input_offsets.end(), 0);
// // std::fill(output_offsets.begin(), output_offsets.end(), 0);
// // }

return request;
}

Expand Down Expand Up @@ -423,7 +380,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromJson(
auto &buffers = input_buffers[buffer_index];
for (size_t j = 0; j < buffers.size(); j++) {
auto &buffer = buffers[j];
auto &offset = input_offsets[j];
auto &offset = input_offsets[buffer_index];

auto input = InferenceRequestInputBuilder::fromJson(
std::make_shared<Json::Value>(i), buffer, offset);
Expand All @@ -438,7 +395,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromJson(
if (batch_offset == batch_size) {
batch_offset = 0;
buffer_index++;
std::fill(input_offsets.begin(), input_offsets.end(), 0);
// std::fill(input_offsets.begin(), input_offsets.end(), 0);
}
}

Expand All @@ -453,7 +410,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromJson(
auto buffers = output_buffers[buffer_index];
for (size_t j = 0; j < buffers.size(); j++) {
auto &buffer = buffers[j];
auto &offset = output_offsets[j];
auto &offset = output_offsets[buffer_index];

auto output = InferenceRequestOutputBuilder::fromJson(
std::make_shared<Json::Value>(i));
Expand All @@ -472,7 +429,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromJson(
auto buffers = output_buffers[buffer_index];
for (size_t j = 0; j < buffers.size(); j++) {
auto &buffer = buffers[j];
const auto &offset = output_offsets[j];
const auto &offset = output_offsets[buffer_index];

request->outputs_.emplace_back();
request->outputs_.back().setData(
Expand Down
16 changes: 10 additions & 6 deletions src/proteus/workers/xmodel.cpp
Expand Up @@ -287,7 +287,6 @@ void XModel::doRun(BatchPtrQueue* input_queue) {
std::vector<InferenceResponse> responses;
responses.reserve(batch->requests->size());

int tensor_count = 0;
for (auto& req : *(batch->requests)) {
auto& resp = responses.emplace_back();
resp.setID(req->getID());
Expand All @@ -308,18 +307,18 @@ void XModel::doRun(BatchPtrQueue* input_queue) {
}
}

// auto output_index = outputsPtr[0]->data().first;
// TODO(varunsh): assuming 1 output tensor (per 1 input) and single batch!
auto* output_index = (*batch->output_buffers)[0][0]->data();

tensor_count = 0;
for (unsigned int k = 0; k < batch->requests->size(); k++) {
auto req = (*batch->requests)[k];
auto inputs = req->getInputs();
auto outputs = req->getOutputs();
auto& resp = responses[k];

auto tensor_count = 0U;
auto buffer_index = 0;
for (unsigned int i = 0; i < inputs.size(); i++) {
// TODO(varunsh): assuming 1 output tensor (per 1 input)!
auto* output_index =
(*batch->output_buffers)[buffer_index][0]->data();
InferenceResponseOutput output;
auto output_tensors = getRunner()->get_output_tensors();
auto output_shape =
Expand Down Expand Up @@ -352,6 +351,11 @@ void XModel::doRun(BatchPtrQueue* input_queue) {

resp.addOutput(output);
tensor_count++;
if (tensor_count == this->batch_size_) {
tensor_count = 0;
buffer_index++;
// std::fill(input_offsets.begin(), input_offsets.end(), 0);
}
}

#ifdef PROTEUS_ENABLE_TRACING
Expand Down

0 comments on commit d1ad81d

Please sign in to comment.