Skip to content

Commit

Permalink
address code review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
kpedro88 committed May 25, 2021
1 parent 20ab568 commit b5e9749
Show file tree
Hide file tree
Showing 9 changed files with 23 additions and 26 deletions.
2 changes: 1 addition & 1 deletion HeterogeneousCore/SonicTriton/interface/TritonClient.h
Expand Up @@ -13,7 +13,7 @@
#include <exception>
#include <unordered_map>

#include "HeterogeneousCore/SonicTriton/interface/grpc_client_gpu.h"
#include "grpc_client.h"
#include "grpc_service.pb.h"

class TritonClient : public SonicClient<TritonInputMap, TritonOutputMap> {
Expand Down
2 changes: 1 addition & 1 deletion HeterogeneousCore/SonicTriton/interface/TritonData.h
Expand Up @@ -12,7 +12,7 @@
#include <memory>
#include <atomic>

#include "HeterogeneousCore/SonicTriton/interface/grpc_client_gpu.h"
#include "grpc_client.h"
#include "grpc_service.pb.h"

//forward declaration
Expand Down
8 changes: 5 additions & 3 deletions HeterogeneousCore/SonicTriton/interface/TritonMemResource.h
Expand Up @@ -4,13 +4,13 @@
#include <string>
#include <memory>

#include "HeterogeneousCore/SonicTriton/interface/grpc_client_gpu.h"
#include "grpc_client.h"

#include "cuda_runtime_api.h"

//forward declaration
template <typename IO>
class TritonData;
struct cudaIpcMemHandle_st;
typedef cudaIpcMemHandle_st cudaIpcMemHandle_t;

//base class for memory operations
template <typename IO>
Expand All @@ -21,7 +21,9 @@ class TritonMemResource {
uint8_t* addr() { return addr_; }
size_t size() const { return size_; }
bool status() const { return status_; }
//used for input
virtual void copy(const void* values, size_t offset) {}
//used for output
virtual void copy(const uint8_t** values) {}
virtual bool set(bool canThrow);

Expand Down
7 changes: 0 additions & 7 deletions HeterogeneousCore/SonicTriton/interface/grpc_client_gpu.h

This file was deleted.

2 changes: 1 addition & 1 deletion HeterogeneousCore/SonicTriton/interface/triton_utils.h
Expand Up @@ -8,7 +8,7 @@
#include <vector>
#include <unordered_set>

#include "HeterogeneousCore/SonicTriton/interface/grpc_client_gpu.h"
#include "grpc_client.h"

#include "cuda_runtime_api.h"

Expand Down
1 change: 1 addition & 0 deletions HeterogeneousCore/SonicTriton/src/TritonData.cc
Expand Up @@ -139,6 +139,7 @@ bool TritonData<IO>::updateMem(size_t size, bool canThrow) {
bool status = true;
if (!memResource_ or size > memResource_->size()) {
if (useShm_ and client_->serverType() == TritonServerType::LocalCPU) {
//need to destroy before constructing new instance because shared memory key will be reused
memResource_.reset();
memResource_ = std::make_shared<TritonCpuShmResource<IO>>(this, shmName_, size, canThrow);
} else if (useShm_ and client_->serverType() == TritonServerType::LocalGPU) {
Expand Down
12 changes: 5 additions & 7 deletions HeterogeneousCore/SonicTriton/src/TritonMemResource.cc
Expand Up @@ -3,9 +3,7 @@
#include "HeterogeneousCore/SonicTriton/interface/TritonMemResource.h"
#include "HeterogeneousCore/SonicTriton/interface/triton_utils.h"

#include "HeterogeneousCore/SonicTriton/interface/grpc_client_gpu.h"

#include "cuda_runtime_api.h"
#include "grpc_client.h"

#include <cstring>
#include <fcntl.h>
Expand Down Expand Up @@ -121,11 +119,11 @@ void TritonOutputCpuShmResource::copy(const uint8_t** values) {
template <typename IO>
TritonGpuShmResource<IO>::TritonGpuShmResource(TritonData<IO>* data, const std::string& name, size_t size, bool canThrow)
: TritonMemResource<IO>(data, name, size, canThrow), deviceId_(0), handle_(std::make_shared<cudaIpcMemHandle_t>()) {
this->status_ &= triton_utils::cudaCheck(
cudaMalloc((void**)&this->addr_, this->size_), "unable to allocate GPU memory for key: " + this->name_, canThrow);
//todo: get server device id somehow?
this->status_ &= triton_utils::cudaCheck(
cudaSetDevice(deviceId_), "unable to set device ID to " + std::to_string(deviceId_), canThrow);
this->status_ &= triton_utils::cudaCheck(
cudaMalloc((void**)&this->addr_, this->size_), "unable to allocate GPU memory for key: " + this->name_, canThrow);
this->status_ &= triton_utils::cudaCheck(
cudaIpcGetMemHandle(handle_.get(), this->addr_), "unable to get IPC handle for key: " + this->name_, canThrow);
this->status_ &= triton_utils::warnOrThrowIfError(
Expand All @@ -144,7 +142,7 @@ TritonGpuShmResource<IO>::~TritonGpuShmResource<IO>() {
template <>
void TritonInputGpuShmResource::copy(const void* values, size_t offset) {
triton_utils::cudaCheck(
cudaMemcpy((void*)(addr_ + offset), values, data_->byteSizePerBatch_, cudaMemcpyHostToDevice),
cudaMemcpy(addr_ + offset, values, data_->byteSizePerBatch_, cudaMemcpyHostToDevice),
data_->name_ + " toServer(): unable to memcpy " + std::to_string(data_->byteSizePerBatch_) + " bytes to GPU",
true);
}
Expand All @@ -154,7 +152,7 @@ void TritonOutputGpuShmResource::copy(const uint8_t** values) {
//copy back from gpu, keep in scope
auto ptr = std::make_shared<std::vector<uint8_t>>(data_->totalByteSize_);
triton_utils::cudaCheck(
cudaMemcpy((void*)(ptr->data()), (void*)(addr_), data_->totalByteSize_, cudaMemcpyDeviceToHost),
cudaMemcpy(ptr->data(), addr_, data_->totalByteSize_, cudaMemcpyDeviceToHost),
data_->name_ + " fromServer(): unable to memcpy " + std::to_string(data_->totalByteSize_) + " bytes from GPU",
true);
*values = ptr->data();
Expand Down
12 changes: 8 additions & 4 deletions HeterogeneousCore/SonicTriton/src/TritonService.cc
Expand Up @@ -179,10 +179,14 @@ std::pair<std::string, TritonServerType> TritonService::serverAddress(const std:

//todo: use some algorithm to select server rather than just picking arbitrarily
const auto& serverInfo(servers_.find(serverName)->second);
return std::make_pair(serverInfo.url,
serverInfo.isFallback
? fallbackOpts_.useGPU ? TritonServerType::LocalGPU : TritonServerType::LocalCPU
: TritonServerType::Remote);
auto serverType = TritonServerType::Remote;
if (serverInfo.isFallback) {
if (fallbackOpts_.useGPU)
serverType = TritonServerType::LocalGPU;
else
serverType = TritonServerType::LocalCPU;
}
return std::make_pair(serverInfo.url, serverType);
}

void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm::ProcessContext const&) {
Expand Down
3 changes: 1 addition & 2 deletions HeterogeneousCore/SonicTriton/test/TritonGraphModules.cc
Expand Up @@ -64,15 +64,14 @@ class TritonGraphHelper {
if (brief_)
edm::LogInfo(debugName) << "output shape: " << output1.shape()[0] << ", " << output1.shape()[1];
else {
std::stringstream msg;
edm::LogInfo msg(debugName);
for (int i = 0; i < output1.shape()[0]; ++i) {
msg << "output " << i << ": ";
for (int j = 0; j < output1.shape()[1]; ++j) {
msg << tmp[0][output1.shape()[1] * i + j] << " ";
}
msg << "\n";
}
edm::LogInfo(debugName) << msg.str();
}
}
static void fillPSetDescription(edm::ParameterSetDescription& desc) {
Expand Down

0 comments on commit b5e9749

Please sign in to comment.