From 16e2cc3b00e6f49370ec9726c24f8484f8c60d7f Mon Sep 17 00:00:00 2001
From: ShichengChen <c337134154@gmail.com>
Date: Mon, 29 Jul 2019 16:12:53 +0800
Subject: [PATCH 1/5] SINGA-477 memory pool

SINGA-477 memory pool add mutex

SINGA-477 memory pool google format
---
 include/singa/core/device.h |  5 ++++-
 src/core/device/platform.cc | 29 ++++++++++++++++++++---------
 src/core/memory/memory.cc   |  2 --
 3 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/include/singa/core/device.h b/include/singa/core/device.h
index 7a5dda116..8d93335ff 100644
--- a/include/singa/core/device.h
+++ b/include/singa/core/device.h
@@ -24,6 +24,7 @@
 #include <string>
 #include <functional>
 #include <memory>
+#include <mutex>
 
 #include "singa/singa_config.h"
 #include "singa/core/common.h"
@@ -295,7 +296,8 @@ class Platform {
   /// Create a set of CudaGPU Device using given GPU IDs.
   static const std::vector<std::shared_ptr<Device>>
   CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0);
-  
+
+  static std::vector<std::shared_ptr<Device> > UsedDevice;
   /// This function is implementd by Caffe (http://caffe.berkeleyvision.org/).
   /// This function checks the availability of GPU #device_id.
   /// It attempts to create a context on the device by calling cudaFree(0).
@@ -311,6 +313,7 @@ class Platform {
   /// the permission. cudaFree(0) is one of those with no side effect,
   /// except the context initialization.
   static bool CheckDevice(const int device_id);
+  static std::mutex mtx_;
 #endif // USE_CUDA
 
 #ifdef USE_OPENCL
diff --git a/src/core/device/platform.cc b/src/core/device/platform.cc
index 8ae15f860..f9a40a267 100644
--- a/src/core/device/platform.cc
+++ b/src/core/device/platform.cc
@@ -20,11 +20,14 @@
 #include "singa/core/device.h"
 #include "singa/singa_config.h"
 #include "singa/utils/opencl_utils.h"
-
+#include <iostream>
+using namespace std;
 namespace singa {
 
 #ifdef USE_CUDA
 
+std::vector<std::shared_ptr<Device> > Platform::UsedDevice;
+std::mutex Platform::mtx_;
 int Platform::GetNumGPUs() {
   int count;
   CUDA_CHECK(cudaGetDeviceCount(&count));
@@ -118,23 +121,31 @@ Platform::CreateCudaGPUs(const size_t num_devices, size_t init_size) {
   return CreateCudaGPUsOn(use_gpus, init_size);
 }
 
-const vector<shared_ptr<Device>>
-Platform::CreateCudaGPUsOn(const vector<int> &devices, size_t init_size) {
+const vector<shared_ptr<Device> > Platform::CreateCudaGPUsOn(
+    const vector<int>& devices, size_t init_size) {
   MemPoolConf conf;
-  if (init_size > 0)
-    conf.set_init_size(init_size);
+  if (init_size > 0) conf.set_init_size(init_size);
   size_t bytes = conf.init_size() << 20;
   for (auto device : devices) {
     conf.add_device(device);
     CHECK_LE(bytes, Platform::GetGPUMemSize(device).first);
   }
+  mtx_.lock();
+  if (UsedDevice.size() == 0) {
+    int count = Platform::GetNumGPUs();
+    for (int i = 0; i < count; i++) UsedDevice.push_back(nullptr);
+  }
   auto pool = std::make_shared<CnMemPool>(conf);
-
+  for (size_t i = 0; i < devices.size(); i++) {
+    if (UsedDevice[devices[i]] == nullptr)
+      UsedDevice[devices[i]] = std::make_shared<CudaGPU>(devices[i], pool);
+  }
   vector<shared_ptr<Device> > ret;
-  for (auto device : devices) {
-    auto dev = std::make_shared<CudaGPU>(device, pool);
-    ret.push_back(dev);
+  for (size_t i = 0; i < devices.size(); i++) {
+    int count = Platform::GetNumGPUs();
+    ret.push_back(UsedDevice[devices[i]]);
   }
+  mtx_.unlock();
   return ret;
 }
 
diff --git a/src/core/memory/memory.cc b/src/core/memory/memory.cc
index 0fb851125..506f94661 100644
--- a/src/core/memory/memory.cc
+++ b/src/core/memory/memory.cc
@@ -39,12 +39,10 @@ CnMemPool::CnMemPool(int numDevices, size_t init_size, size_t max_size) {
     conf_.add_device(i);
   conf_.set_init_size(init_size);
   conf_.set_max_size(max_size);
-  CHECK_LT(++pool_count, 2) << "CnMemPool must be used as a singleton.";
 }
 
 CnMemPool::CnMemPool(const MemPoolConf &conf) {
   conf_ = conf;
-  CHECK_LT(++pool_count, 2) << "CnMemPool must be used as a singleton.";
 }
 
 void CnMemPool::Init() {

From a60579747ae5dea9282fd2559ef751c21592b233 Mon Sep 17 00:00:00 2001
From: ShichengChen <c337134154@gmail.com>
Date: Sat, 3 Aug 2019 22:35:41 +0800
Subject: [PATCH 2/5] SINGA-477 memory pool google format

---
 src/core/device/platform.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/device/platform.cc b/src/core/device/platform.cc
index f9a40a267..eb3815f9a 100644
--- a/src/core/device/platform.cc
+++ b/src/core/device/platform.cc
@@ -25,7 +25,7 @@ using namespace std;
 namespace singa {
 
 #ifdef USE_CUDA
-
+ 
 std::vector<std::shared_ptr<Device> > Platform::UsedDevice;
 std::mutex Platform::mtx_;
 int Platform::GetNumGPUs() {

From 6b94b150e01c7725f40bdb093a591017091b4ef2 Mon Sep 17 00:00:00 2001
From: ShichengChen <c337134154@gmail.com>
Date: Mon, 5 Aug 2019 13:59:50 +0800
Subject: [PATCH 3/5] SINGA-477 memory pool remove useless code

---
 src/core/device/platform.cc | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/core/device/platform.cc b/src/core/device/platform.cc
index eb3815f9a..e154d50e1 100644
--- a/src/core/device/platform.cc
+++ b/src/core/device/platform.cc
@@ -136,13 +136,10 @@ const vector<shared_ptr<Device> > Platform::CreateCudaGPUsOn(
     for (int i = 0; i < count; i++) UsedDevice.push_back(nullptr);
   }
   auto pool = std::make_shared<CnMemPool>(conf);
+  vector<shared_ptr<Device> > ret;
   for (size_t i = 0; i < devices.size(); i++) {
     if (UsedDevice[devices[i]] == nullptr)
       UsedDevice[devices[i]] = std::make_shared<CudaGPU>(devices[i], pool);
-  }
-  vector<shared_ptr<Device> > ret;
-  for (size_t i = 0; i < devices.size(); i++) {
-    int count = Platform::GetNumGPUs();
     ret.push_back(UsedDevice[devices[i]]);
   }
   mtx_.unlock();

From e20571af5bec03a60371a7bc0bd8d43ef3bc018e Mon Sep 17 00:00:00 2001
From: ShichengChen <c337134154@gmail.com>
Date: Wed, 7 Aug 2019 19:33:07 +0800
Subject: [PATCH 4/5] SINGA-477 add two memory pool test

---
 include/singa/core/memory.h    |  5 ++++-
 src/core/memory/memory.cc      | 12 +++++++++---
 test/python/test_memoryPool.py |  0
 test/singa/test_platform.cc    | 25 +++++++++++++++----------
 4 files changed, 28 insertions(+), 14 deletions(-)
 create mode 100644 test/python/test_memoryPool.py

diff --git a/include/singa/core/memory.h b/include/singa/core/memory.h
index f664f95ce..4ab316813 100644
--- a/include/singa/core/memory.h
+++ b/include/singa/core/memory.h
@@ -43,6 +43,9 @@ class DeviceMemPool {
   virtual std::pair<size_t, size_t> GetMemUsage() {
     return std::make_pair(0u, 0u);
   }
+  virtual std::pair<size_t, size_t> GetMemUsage(int id) {
+    return std::make_pair(0u, 0u);
+  }
   virtual ~DeviceMemPool(){};
 
  protected:
@@ -62,6 +65,7 @@ class CnMemPool : public DeviceMemPool {
   void Free(void* ptr);
 
   std::pair<size_t, size_t> GetMemUsage() override;
+  std::pair<size_t, size_t> GetMemUsage(int id) override;
 
   // release all memory and set cnmem manager to unintialized
   ~CnMemPool();
@@ -78,7 +82,6 @@ class CnMemPool : public DeviceMemPool {
   // lock on the initialized variable
   std::mutex mtx_;
 
-  static std::atomic<int> pool_count;
 };
 
 class CudaMemPool : public DeviceMemPool {
diff --git a/src/core/memory/memory.cc b/src/core/memory/memory.cc
index 506f94661..0f5793f60 100644
--- a/src/core/memory/memory.cc
+++ b/src/core/memory/memory.cc
@@ -25,7 +25,6 @@
 #ifdef USE_CUDA
 
 namespace singa {
-std::atomic<int> CnMemPool::pool_count(0);
 std::pair<size_t, size_t> CnMemPool::GetMemUsage() {
   size_t free, total;
   auto status = cnmemMemGetInfo(&free, &total, NULL);
@@ -33,6 +32,14 @@ std::pair<size_t, size_t> CnMemPool::GetMemUsage() {
     << cnmemGetErrorString(status);
   return std::make_pair(free, total);
 }
+std::pair<size_t, size_t> CnMemPool::GetMemUsage(int id) {
+  CHECK_EQ(cudaSetDevice(id), cudaError_t::cudaSuccess);
+  size_t free, total;
+  auto status = cnmemMemGetInfo(&free, &total, NULL);
+  CHECK_EQ(status, cnmemStatus_t::CNMEM_STATUS_SUCCESS)
+    << cnmemGetErrorString(status);
+  return std::make_pair(free, total);
+}
 
 CnMemPool::CnMemPool(int numDevices, size_t init_size, size_t max_size) {
   for (int i = 0; i < numDevices; i++)
@@ -48,7 +55,7 @@ CnMemPool::CnMemPool(const MemPoolConf &conf) {
 void CnMemPool::Init() {
   mtx_.lock();
   if (!initialized_) {
-    const size_t kNBytesPerMB = (1u << 20);
+    const size_t kNBytesPerMB = (1u << 10);
     CHECK_GE(conf_.device_size(), 1);
     cnmemDevice_t *settingPtr = new cnmemDevice_t[conf_.device_size()];
     CHECK_GT(conf_.init_size(), 0u);
@@ -77,7 +84,6 @@ CnMemPool::~CnMemPool() {
     CHECK_EQ(status, cnmemStatus_t::CNMEM_STATUS_SUCCESS)
         << " " << cnmemGetErrorString(status);
     initialized_ = false;
-    --pool_count;
   }
   mtx_.unlock();
 }
diff --git a/test/python/test_memoryPool.py b/test/python/test_memoryPool.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/singa/test_platform.cc b/test/singa/test_platform.cc
index f50c97851..e002265dd 100644
--- a/test/singa/test_platform.cc
+++ b/test/singa/test_platform.cc
@@ -20,9 +20,22 @@
 #include "gtest/gtest.h"
 #include "singa/core/device.h"
 #include "singa/core/tensor.h"
-
+#include <iostream>
+using namespace std;
 #ifdef USE_CUDA
 using singa::Platform;
+
+TEST(Platform, CreateMultDevice) {
+  int n = Platform::GetNumGPUs();
+  auto devs = Platform::CreateCudaGPUs(n);
+  for (int i= 0;i < devs.size();i++) {
+    auto b = devs[i]->NewBlock(512+512*(2-i));
+    EXPECT_EQ(512+512*(2-i), devs[i]->GetAllocatedMem());
+    devs[i]->FreeBlock(b);
+  }
+}
+
+
 TEST(Platform, NumGPUs) {
   int n = Platform::GetNumGPUs();
   EXPECT_GE(n, 0);
@@ -68,15 +81,7 @@ TEST(Platform, CreateDevice) {
   }
 }
 
-TEST(Platform, CreateMultDevice) {
-  int n = Platform::GetNumGPUs();
-  auto devs = Platform::CreateCudaGPUs(n);
-  for (auto dev : devs) {
-    auto b = dev->NewBlock(32);
-    EXPECT_LE(32u, dev->GetAllocatedMem());
-    dev->FreeBlock(b);
-  }
-}
+
 
 TEST(Platform, CreatTensor) {
   auto cuda = Platform::CreateCudaGPUs(1)[0];

From 697315221391127f68d202cb76f1ddb45a48d4ff Mon Sep 17 00:00:00 2001
From: ShichengChen <c337134154@gmail.com>
Date: Wed, 7 Aug 2019 19:36:18 +0800
Subject: [PATCH 5/5] SINGA-477 add two memory pool test

---
 src/core/memory/memory.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core/memory/memory.cc b/src/core/memory/memory.cc
index 0f5793f60..73115d193 100644
--- a/src/core/memory/memory.cc
+++ b/src/core/memory/memory.cc
@@ -55,7 +55,7 @@ CnMemPool::CnMemPool(const MemPoolConf &conf) {
 void CnMemPool::Init() {
   mtx_.lock();
   if (!initialized_) {
-    const size_t kNBytesPerMB = (1u << 10);
+    const size_t kNBytesPerMB = (1u << 20);
     CHECK_GE(conf_.device_size(), 1);
     cnmemDevice_t *settingPtr = new cnmemDevice_t[conf_.device_size()];
     CHECK_GT(conf_.init_size(), 0u);