Skip to content

Commit d1ad81d

Browse files
Fix sending more tensors from C++ API
Fix bug in xmodel worker if tensors > batch size
1 parent 9e2a0e7 commit d1ad81d

File tree

4 files changed

+23
-62
lines changed

4 files changed

+23
-62
lines changed

examples/cpp/custom_processing.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ int main() {
170170
std::string root = root_env;
171171

172172
// +user variables: update as needed!
173-
const auto batch_size = 4;
173+
const auto request_num = 8;
174174
const auto* path_to_xmodel =
175175
"${AKS_XMODEL_ROOT}/artifacts/u200_u250/resnet_v1_50_tf/"
176176
"resnet_v1_50_tf.xmodel";
@@ -192,9 +192,9 @@ int main() {
192192

193193
// +prepare images:
194194
std::vector<std::string> paths;
195-
paths.reserve(batch_size);
195+
paths.reserve(request_num);
196196

197-
for (auto i = 0; i < batch_size; i++) {
197+
for (auto i = 0; i < request_num; i++) {
198198
paths.emplace_back(path_to_image);
199199
}
200200

@@ -206,7 +206,7 @@ int main() {
206206
std::queue<proteus::InferenceResponseFuture> queue;
207207

208208
proteus::InferenceRequest request;
209-
for (auto i = 0; i < batch_size; i++) {
209+
for (auto i = 0; i < request_num; i++) {
210210
request.addInputTensor(static_cast<void*>(images[i].data()), shape,
211211
proteus::types::DataType::INT8);
212212
}

src/proteus/batching/batcher.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ CppNativeApi::CppNativeApi(InferenceRequest request)
6565
this->promise_ = std::make_unique<std::promise<proteus::InferenceResponse>>();
6666
}
6767

68-
size_t CppNativeApi::getInputSize() { return 1; }
68+
size_t CppNativeApi::getInputSize() { return this->request_.getInputSize(); }
6969

7070
std::promise<proteus::InferenceResponse> *CppNativeApi::getPromise() {
7171
return this->promise_.get();

src/proteus/core/predict_api_internal.cpp

Lines changed: 8 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromInput(
267267
auto buffers = input_buffers[buffer_index];
268268
for (size_t i = 0; i < buffers.size(); i++) {
269269
auto &buffer = buffers[i];
270-
auto &offset = input_offsets[i];
270+
auto &offset = input_offsets[buffer_index];
271271

272272
request->inputs_.push_back(std::move(
273273
InferenceRequestInputBuilder::fromInput(input, buffer, offset)));
@@ -280,7 +280,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromInput(
280280
if (batch_offset == batch_size) {
281281
batch_offset = 0;
282282
buffer_index++;
283-
std::fill(input_offsets.begin(), input_offsets.end(), 0);
283+
// std::fill(input_offsets.begin(), input_offsets.end(), 0);
284284
}
285285
}
286286

@@ -294,7 +294,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromInput(
294294
auto buffers = output_buffers[buffer_index];
295295
for (size_t i = 0; i < buffers.size(); i++) {
296296
auto &buffer = buffers[i];
297-
auto &offset = output_offsets[i];
297+
auto &offset = output_offsets[buffer_index];
298298

299299
request->outputs_.emplace_back(output);
300300
request->outputs_.back().setData(
@@ -317,7 +317,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromInput(
317317
auto buffers = output_buffers[buffer_index];
318318
for (size_t j = 0; j < buffers.size(); j++) {
319319
auto &buffer = buffers[j];
320-
const auto &offset = output_offsets[j];
320+
const auto &offset = output_offsets[buffer_index];
321321

322322
request->outputs_.emplace_back();
323323
request->outputs_.back().setData(
@@ -335,49 +335,6 @@ InferenceRequestPtr InferenceRequestBuilder::fromInput(
335335
}
336336
}
337337

338-
// try {
339-
// auto buffers = input_buffers[buffer_index];
340-
// for (size_t i = 0; i < buffers.size(); i++) {
341-
// auto &buffer = buffers[i];
342-
// auto &offset = input_offsets[i];
343-
344-
// request->inputs_.push_back(std::move(InferenceRequestInputBuilder::fromInput(req,
345-
// buffer, offset))); offset += request->inputs_.back().getSize();
346-
// }
347-
// } catch (const std::invalid_argument &e) {
348-
// throw;
349-
// }
350-
351-
// try {
352-
// auto buffers = output_buffers[buffer_index];
353-
// for (size_t i = 0; i < buffers.size(); i++) {
354-
// auto &buffer = buffers[i];
355-
// const auto &offset = output_offsets[i];
356-
357-
// request->outputs_.emplace_back();
358-
// request->outputs_.back().setData(static_cast<std::byte
359-
// *>(buffer->data()) +
360-
// offset);
361-
// // TODO(varunsh): output_offset is currently ignored! The size of the
362-
// // output needs to come from the worker but we have no such
363-
// information.
364-
// }
365-
// } catch (const std::invalid_argument &e) {
366-
// throw;
367-
// }
368-
369-
// batch_offset++;
370-
// // FIXME(varunsh): this was intended to support multiple input tensors but
371-
// it
372-
// // creates a bug where the batch_offset gets reset to zero too early
373-
// (void)batch_size;
374-
// // if (batch_offset == batch_size) {
375-
// // batch_offset = 0;
376-
// // buffer_index++;
377-
// // std::fill(input_offsets.begin(), input_offsets.end(), 0);
378-
// // std::fill(output_offsets.begin(), output_offsets.end(), 0);
379-
// // }
380-
381338
return request;
382339
}
383340

@@ -423,7 +380,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromJson(
423380
auto &buffers = input_buffers[buffer_index];
424381
for (size_t j = 0; j < buffers.size(); j++) {
425382
auto &buffer = buffers[j];
426-
auto &offset = input_offsets[j];
383+
auto &offset = input_offsets[buffer_index];
427384

428385
auto input = InferenceRequestInputBuilder::fromJson(
429386
std::make_shared<Json::Value>(i), buffer, offset);
@@ -438,7 +395,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromJson(
438395
if (batch_offset == batch_size) {
439396
batch_offset = 0;
440397
buffer_index++;
441-
std::fill(input_offsets.begin(), input_offsets.end(), 0);
398+
// std::fill(input_offsets.begin(), input_offsets.end(), 0);
442399
}
443400
}
444401

@@ -453,7 +410,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromJson(
453410
auto buffers = output_buffers[buffer_index];
454411
for (size_t j = 0; j < buffers.size(); j++) {
455412
auto &buffer = buffers[j];
456-
auto &offset = output_offsets[j];
413+
auto &offset = output_offsets[buffer_index];
457414

458415
auto output = InferenceRequestOutputBuilder::fromJson(
459416
std::make_shared<Json::Value>(i));
@@ -472,7 +429,7 @@ InferenceRequestPtr InferenceRequestBuilder::fromJson(
472429
auto buffers = output_buffers[buffer_index];
473430
for (size_t j = 0; j < buffers.size(); j++) {
474431
auto &buffer = buffers[j];
475-
const auto &offset = output_offsets[j];
432+
const auto &offset = output_offsets[buffer_index];
476433

477434
request->outputs_.emplace_back();
478435
request->outputs_.back().setData(

src/proteus/workers/xmodel.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,6 @@ void XModel::doRun(BatchPtrQueue* input_queue) {
287287
std::vector<InferenceResponse> responses;
288288
responses.reserve(batch->requests->size());
289289

290-
int tensor_count = 0;
291290
for (auto& req : *(batch->requests)) {
292291
auto& resp = responses.emplace_back();
293292
resp.setID(req->getID());
@@ -308,18 +307,18 @@ void XModel::doRun(BatchPtrQueue* input_queue) {
308307
}
309308
}
310309

311-
// auto output_index = outputsPtr[0]->data().first;
312-
// TODO(varunsh): assuming 1 output tensor (per 1 input) and single batch!
313-
auto* output_index = (*batch->output_buffers)[0][0]->data();
314-
315-
tensor_count = 0;
316310
for (unsigned int k = 0; k < batch->requests->size(); k++) {
317311
auto req = (*batch->requests)[k];
318312
auto inputs = req->getInputs();
319313
auto outputs = req->getOutputs();
320314
auto& resp = responses[k];
321315

316+
auto tensor_count = 0U;
317+
auto buffer_index = 0;
322318
for (unsigned int i = 0; i < inputs.size(); i++) {
319+
// TODO(varunsh): assuming 1 output tensor (per 1 input)!
320+
auto* output_index =
321+
(*batch->output_buffers)[buffer_index][0]->data();
323322
InferenceResponseOutput output;
324323
auto output_tensors = getRunner()->get_output_tensors();
325324
auto output_shape =
@@ -352,6 +351,11 @@ void XModel::doRun(BatchPtrQueue* input_queue) {
352351

353352
resp.addOutput(output);
354353
tensor_count++;
354+
if (tensor_count == this->batch_size_) {
355+
tensor_count = 0;
356+
buffer_index++;
357+
// std::fill(input_offsets.begin(), input_offsets.end(), 0);
358+
}
355359
}
356360

357361
#ifdef PROTEUS_ENABLE_TRACING

0 commit comments

Comments
 (0)