diff --git a/include/singa/core/device.h b/include/singa/core/device.h index 7a5dda116..8d93335ff 100644 --- a/include/singa/core/device.h +++ b/include/singa/core/device.h @@ -24,6 +24,7 @@ #include #include #include +#include #include "singa/singa_config.h" #include "singa/core/common.h" @@ -295,7 +296,8 @@ class Platform { /// Create a set of CudaGPU Device using given GPU IDs. static const std::vector> CreateCudaGPUsOn(const std::vector &devices, size_t init_size = 0); - + + static std::vector > UsedDevice; /// This function is implementd by Caffe (http://caffe.berkeleyvision.org/). /// This function checks the availability of GPU #device_id. /// It attempts to create a context on the device by calling cudaFree(0). @@ -311,6 +313,7 @@ class Platform { /// the permission. cudaFree(0) is one of those with no side effect, /// except the context initialization. static bool CheckDevice(const int device_id); + static std::mutex mtx_; #endif // USE_CUDA #ifdef USE_OPENCL diff --git a/include/singa/core/memory.h b/include/singa/core/memory.h index f664f95ce..4ab316813 100644 --- a/include/singa/core/memory.h +++ b/include/singa/core/memory.h @@ -43,6 +43,9 @@ class DeviceMemPool { virtual std::pair GetMemUsage() { return std::make_pair(0u, 0u); } + virtual std::pair GetMemUsage(int id) { + return std::make_pair(0u, 0u); + } virtual ~DeviceMemPool(){}; protected: @@ -62,6 +65,7 @@ class CnMemPool : public DeviceMemPool { void Free(void* ptr); std::pair GetMemUsage() override; + std::pair GetMemUsage(int id) override; // release all memory and set cnmem manager to unintialized ~CnMemPool(); @@ -78,7 +82,6 @@ class CnMemPool : public DeviceMemPool { // lock on the initialized variable std::mutex mtx_; - static std::atomic pool_count; }; class CudaMemPool : public DeviceMemPool { diff --git a/src/core/device/platform.cc b/src/core/device/platform.cc index 8ae15f860..e154d50e1 100644 --- a/src/core/device/platform.cc +++ b/src/core/device/platform.cc @@ -20,11 +20,14 @@ #include "singa/core/device.h" #include "singa/singa_config.h" #include "singa/utils/opencl_utils.h" - +#include +using namespace std; namespace singa { #ifdef USE_CUDA - + +std::vector > Platform::UsedDevice; +std::mutex Platform::mtx_; int Platform::GetNumGPUs() { int count; CUDA_CHECK(cudaGetDeviceCount(&count)); @@ -118,23 +121,28 @@ Platform::CreateCudaGPUs(const size_t num_devices, size_t init_size) { return CreateCudaGPUsOn(use_gpus, init_size); } -const vector> -Platform::CreateCudaGPUsOn(const vector &devices, size_t init_size) { +const vector > Platform::CreateCudaGPUsOn( + const vector& devices, size_t init_size) { MemPoolConf conf; - if (init_size > 0) - conf.set_init_size(init_size); + if (init_size > 0) conf.set_init_size(init_size); size_t bytes = conf.init_size() << 20; for (auto device : devices) { conf.add_device(device); CHECK_LE(bytes, Platform::GetGPUMemSize(device).first); } + mtx_.lock(); + if (UsedDevice.size() == 0) { + int count = Platform::GetNumGPUs(); + for (int i = 0; i < count; i++) UsedDevice.push_back(nullptr); + } auto pool = std::make_shared(conf); - vector > ret; - for (auto device : devices) { - auto dev = std::make_shared(device, pool); - ret.push_back(dev); + for (size_t i = 0; i < devices.size(); i++) { + if (UsedDevice[devices[i]] == nullptr) + UsedDevice[devices[i]] = std::make_shared(devices[i], pool); + ret.push_back(UsedDevice[devices[i]]); } + mtx_.unlock(); return ret; } diff --git a/src/core/memory/memory.cc b/src/core/memory/memory.cc index 0fb851125..73115d193 100644 --- a/src/core/memory/memory.cc +++ b/src/core/memory/memory.cc @@ -25,7 +25,6 @@ #ifdef USE_CUDA namespace singa { -std::atomic CnMemPool::pool_count(0); std::pair CnMemPool::GetMemUsage() { size_t free, total; auto status = cnmemMemGetInfo(&free, &total, NULL); @@ -33,18 +32,24 @@ std::pair CnMemPool::GetMemUsage() { << cnmemGetErrorString(status); return std::make_pair(free, total); } +std::pair CnMemPool::GetMemUsage(int id) { + CHECK_EQ(cudaSetDevice(id), cudaError_t::cudaSuccess); + size_t free, total; + auto status = cnmemMemGetInfo(&free, &total, NULL); + CHECK_EQ(status, cnmemStatus_t::CNMEM_STATUS_SUCCESS) + << cnmemGetErrorString(status); + return std::make_pair(free, total); +} CnMemPool::CnMemPool(int numDevices, size_t init_size, size_t max_size) { for (int i = 0; i < numDevices; i++) conf_.add_device(i); conf_.set_init_size(init_size); conf_.set_max_size(max_size); - CHECK_LT(++pool_count, 2) << "CnMemPool must be used as a singleton."; } CnMemPool::CnMemPool(const MemPoolConf &conf) { conf_ = conf; - CHECK_LT(++pool_count, 2) << "CnMemPool must be used as a singleton."; } void CnMemPool::Init() { @@ -79,7 +84,6 @@ CnMemPool::~CnMemPool() { CHECK_EQ(status, cnmemStatus_t::CNMEM_STATUS_SUCCESS) << " " << cnmemGetErrorString(status); initialized_ = false; - --pool_count; } mtx_.unlock(); } diff --git a/test/python/test_memoryPool.py b/test/python/test_memoryPool.py new file mode 100644 index 000000000..e69de29bb diff --git a/test/singa/test_platform.cc b/test/singa/test_platform.cc index f50c97851..e002265dd 100644 --- a/test/singa/test_platform.cc +++ b/test/singa/test_platform.cc @@ -20,9 +20,22 @@ #include "gtest/gtest.h" #include "singa/core/device.h" #include "singa/core/tensor.h" - +#include +using namespace std; #ifdef USE_CUDA using singa::Platform; + +TEST(Platform, CreateMultDevice) { + int n = Platform::GetNumGPUs(); + auto devs = Platform::CreateCudaGPUs(n); + for (int i= 0;i < devs.size();i++) { + auto b = devs[i]->NewBlock(512+512*(2-i)); + EXPECT_EQ(512+512*(2-i), devs[i]->GetAllocatedMem()); + devs[i]->FreeBlock(b); + } +} + + TEST(Platform, NumGPUs) { int n = Platform::GetNumGPUs(); EXPECT_GE(n, 0); @@ -68,15 +81,7 @@ TEST(Platform, CreateDevice) { } } -TEST(Platform, CreateMultDevice) { - int n = Platform::GetNumGPUs(); - auto devs = Platform::CreateCudaGPUs(n); - for (auto dev : devs) { - auto b = dev->NewBlock(32); - EXPECT_LE(32u, dev->GetAllocatedMem()); - dev->FreeBlock(b); - } -} + TEST(Platform, CreatTensor) { auto cuda = Platform::CreateCudaGPUs(1)[0];