Skip to content
This repository has been archived by the owner on Oct 15, 2019. It is now read-only.

Commit

Permalink
[googlenet-perf-fix] Tests passing (except 1)
Browse files Browse the repository at this point in the history
  • Loading branch information
hotpxl committed Mar 11, 2015
1 parent d77cac9 commit 2e886a4
Show file tree
Hide file tree
Showing 20 changed files with 96 additions and 90 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Expand Up @@ -6,7 +6,7 @@ message(STATUS "cmake generator: ${CMAKE_GENERATOR}")
message(STATUS "cmake build tool: ${CMAKE_BUILD_TOOL}")
message(STATUS "cmake build type: ${CMAKE_BUILD_TYPE}")

set(COMPILER_FLAGS "-Wall -g -pthread -m64 -fPIC -flto")
set(COMPILER_FLAGS "-Wall -g -pthread -m64 -fPIC")
set(GENERAL_FLAGS_DEBUG "${COMPILER_FLAGS} -O0 -march=native")
set(GENERAL_FLAGS_RELEASE "${COMPILER_FLAGS} -O2 -march=native -mtune=native -mssse3 -ftree-vectorize -funswitch-loops -ftree-vectorizer-verbose=0 -DNDEBUG")
set(CMAKE_CXX_FLAGS_DEBUG "${GENERAL_FLAGS_DEBUG} -std=c++11")
Expand Down
25 changes: 25 additions & 0 deletions apps/main.cpp
@@ -0,0 +1,25 @@
#include <minerva.h>
#include <iostream>
#include <cstdint>

using namespace std;
using namespace minerva;

int main(int argc, char** argv) {
MinervaSystem::Initialize(&argc, &argv);
auto& ms = MinervaSystem::Instance();
auto gpu_device = ms.device_manager().CreateGpuDevice(0);
ms.current_device_id_ = gpu_device;
vector<NArray> narrs;
for (int i = 0; i < 1000; ++i) {
narrs.push_back(NArray::Constant({10, 10}, i));
}
for (int i = 0; i < narrs.size(); ++i) {
narrs[i] = narrs[i] * 100 + 1;
}
for (int i = 0; i < narrs.size(); ++i) {
narrs[i].Wait();
}
cout << ms.physical_dag().ToDotString() << endl;
MinervaSystem::Finalize();
}
File renamed without changes.
File renamed without changes.
Empty file modified apps/mnist_mlp.cpp.tmp 100755 → 100644
Empty file.
29 changes: 10 additions & 19 deletions minerva/CMakeLists.txt
Expand Up @@ -7,28 +7,19 @@ include_directories(
file(GLOB_RECURSE src_file_list "*.cpp")
file(GLOB_RECURSE cuda_src_file_list "*.cu")

if (NOT HAS_PS)
if (CUDA_FOUND)
CUDA_ADD_LIBRARY(minerva SHARED ${src_file_list} ${cuda_src_file_list})
CUDA_ADD_CUBLAS_TO_TARGET(minerva)
else (CUDA_FOUND)
add_library(minerva SHARED ${src_file_list})
endif (CUDA_FOUND)
else (NOT HAS_PS)
if (CUDA_FOUND)
CUDA_ADD_LIBRARY(minerva STATIC ${src_file_list} ${cuda_src_file_list})
CUDA_ADD_CUBLAS_TO_TARGET(minerva)
else (CUDA_FOUND)
add_library(minerva STATIC ${src_file_list})
endif (CUDA_FOUND)

target_link_libraries(minerva minervaps)
endif (NOT HAS_PS)

target_link_libraries(minerva glog gflags boost_thread boost_system)
if (CUDA_FOUND)
CUDA_ADD_LIBRARY(minerva SHARED ${src_file_list} ${cuda_src_file_list})
CUDA_ADD_CUBLAS_TO_TARGET(minerva)
target_link_libraries(minerva cudnn ${CUDA_curand_LIBRARY})
else (CUDA_FOUND)
add_library(minerva SHARED ${src_file_list})
endif (CUDA_FOUND)
target_link_libraries(minerva glog gflags boost_thread boost_system)

if (HAS_PS)
target_link_libraries(minerva minervaps)
endif ()

set(LIBRARY_OUTPUT_PATH ${Minerva_BINARY_DIR}/lib)

install(TARGETS minerva DESTINATION lib)
5 changes: 2 additions & 3 deletions minerva/backend/dag/dag_chunk.cpp
Expand Up @@ -5,9 +5,8 @@
namespace minerva {

static void ExternRCUpdate(PhysicalDataNode* node, int delta) {
auto& ms = MinervaSystem::Instance();
if (ms.IsAlive()) {
dynamic_cast<DagScheduler&>(ms.backend()).ExternRCUpdate(node, delta);
if (MinervaSystem::IsAlive()) {
dynamic_cast<DagScheduler&>(MinervaSystem::Instance().backend()).ExternRCUpdate(node, delta);
}
}

Expand Down
6 changes: 3 additions & 3 deletions minerva/backend/dag/dag_scheduler.cpp
Expand Up @@ -40,7 +40,7 @@ vector<BackendChunk*> DagScheduler::Create(const vector<BackendChunk*>& params,
{
MultiNodeLock lock(dag_, param_data_nodes);
auto op_node = dag_->NewOpNode(param_data_nodes, rst_data_nodes, {fn, current_device_id});
DLOG(INFO) << "create new nodes on device #" << current_device_id;
DLOG(INFO) << "create new node #" << op_node->node_id_ << " on device #" << current_device_id;
OnCreateNode(op_node);
Iter(param_data_nodes, [&](PhysicalDataNode* n) {
OnCreateEdge(n, op_node);
Expand Down Expand Up @@ -222,9 +222,9 @@ void DagScheduler::DispatcherRoutine() {
}
}
} else if (task.first == TaskType::kToDelete) {
DLOG(INFO) << "delete node #" << node_id;
DLOG(INFO) << "dispatcher ready to delete node #" << node_id;
OnDeleteNode(node);
to_delete = dag_->RemoveNodeFromDag(node_id);
OnDeleteNode(to_delete);
} else {
LOG(FATAL) << "illegal task state";
}
Expand Down
16 changes: 3 additions & 13 deletions minerva/backend/dag/multi_node_lock.h
Expand Up @@ -11,8 +11,7 @@ class MultiNodeLock {
public:
MultiNodeLock() = default;
template<typename T> MultiNodeLock(PhysicalDag*, const std::vector<T*>&);
template<typename T> MultiNodeLock(PhysicalDag*, const std::unordered_set<T*>&);
template<typename T> MultiNodeLock(PhysicalDag*, T*);
MultiNodeLock(PhysicalDag*, DagNode*);
DISALLOW_COPY_AND_ASSIGN(MultiNodeLock);
~MultiNodeLock() = default;

Expand All @@ -28,24 +27,15 @@ MultiNodeLock::MultiNodeLock(PhysicalDag* dag, const std::vector<T*>& nodes) {
});
}

template<typename T>
MultiNodeLock::MultiNodeLock(PhysicalDag* dag, const std::unordered_set<T*>& nodes) {
std::lock_guard<std::mutex> l(dag->m_);
Iter(nodes, [this](PhysicalDataNode* node) {
locks_.emplace_front(node->m_);
});
}

template<typename T>
MultiNodeLock::MultiNodeLock(PhysicalDag* dag, T* node) {
MultiNodeLock::MultiNodeLock(PhysicalDag* dag, DagNode* node) {
std::lock_guard<std::mutex> l(dag->m_);
locks_.emplace_front(node->m_);
Iter(node->successors_, [this](DagNode* n) {
locks_.emplace_front(n->m_);
});
Iter(node->predecessors_, [this](DagNode* n) {
locks_.emplace_front(n->m_);
});
locks_.emplace_front(node->m_);
}

} // namespace minerva
Expand Down
29 changes: 16 additions & 13 deletions minerva/backend/dag/priority_dispatcher_queue.cpp
@@ -1,42 +1,45 @@
#include "priority_dispatcher_queue.h"
#include <thread>
#include <glog/logging.h>

using namespace std;

namespace minerva {

PriorityDispatcherQueue::PriorityDispatcherQueue() : exit_now_(false), tasks_(static_cast<size_t>(TaskType::kEnd)) {
PriorityDispatcherQueue::PriorityDispatcherQueue() :
exit_now_(false),
tasks_(static_cast<size_t>(TaskType::kEnd)),
total_(0) {
}

void PriorityDispatcherQueue::Push(const TaskPair& task_pair) {
unique_lock<mutex> lock(m_);
tasks_.at(static_cast<size_t>(task_pair.first)).push_back(task_pair.second);
if (++total_ == 1) {
cv_.notify_all();
}
}

bool PriorityDispatcherQueue::Pop(TaskPair& task_pair) {
unique_lock<mutex> lock(m_);
int available_index = -1;
for (size_t i = 0; i < tasks_.size(); ++i) {
if (!tasks_[i].empty()) {
available_index = i;
break;
}
}
while (available_index == -1 && !exit_now_.Read()) {
while (total_ == 0 && !exit_now_.Read()) {
cv_.wait(lock);
}
if (exit_now_.Read()) {
return true;
} else {
int available_index = -1;
for (size_t i = 0; i < tasks_.size(); ++i) {
if (!tasks_[i].empty()) {
available_index = i;
break;
}
}
}
if (exit_now_.Read()) {
return true;
} else {
CHECK_NE(available_index, -1) << "empty task queue woken up";
task_pair.first = static_cast<TaskType>(available_index);
task_pair.second = tasks_[available_index].front();
tasks_[available_index].pop_front();
--total_;
return false;
}
}
Expand Down
1 change: 1 addition & 0 deletions minerva/backend/dag/priority_dispatcher_queue.h
Expand Up @@ -25,6 +25,7 @@ class PriorityDispatcherQueue {
std::condition_variable cv_;
BoolFlag exit_now_;
std::vector<std::list<uint64_t>> tasks_;
int total_;
};

} // namespace minerva
Expand Down
11 changes: 4 additions & 7 deletions minerva/backend/dag/runtime_info_map.cpp
Expand Up @@ -21,22 +21,19 @@ RuntimeInfo::RuntimeInfo() : num_triggers_needed(0), reference_count(0), state(N
}

void RuntimeInfoMap::AddNode(uint64_t id) {
if (info_.find(id) != info_.end()) {
LOG(FATAL) << "node #" << id << " already existed in runtime info map";
}
info_[id];
CHECK(info_.Insert(make_pair(id, RuntimeInfo()))) << "node #" << id << " already existed in runtime info map";
}

void RuntimeInfoMap::RemoveNode(uint64_t id) {
CHECK_EQ(info_.erase(id), 1);
CHECK_EQ(info_.Erase(id), 1);
}

RuntimeInfo& RuntimeInfoMap::At(uint64_t id) {
return info_.at(id);
return info_.At(id);
}

NodeState RuntimeInfoMap::GetState(uint64_t id) {
return info_.at(id).state;
return info_.At(id).state;
}

} // namespace minerva
Expand Down
3 changes: 2 additions & 1 deletion minerva/backend/dag/runtime_info_map.h
Expand Up @@ -4,6 +4,7 @@
#include <unordered_map>
#include <atomic>
#include "common/common.h"
#include "common/concurrent_unordered_map.h"

namespace minerva {

Expand Down Expand Up @@ -32,7 +33,7 @@ class RuntimeInfoMap {
void KillNode(uint64_t);

private:
std::unordered_map<uint64_t, RuntimeInfo> info_;
ConcurrentUnorderedMap<uint64_t, RuntimeInfo> info_;
};

} // namespace minerva
Expand Down
5 changes: 2 additions & 3 deletions minerva/dag/dag.h
Expand Up @@ -48,11 +48,10 @@ class Dag {

template<typename D, typename O>
Dag<D, O>::~Dag() {
index_to_node_.LockRead();
for (auto i : index_to_node_.VolatilePayload()) {
auto index_to_node_cp = index_to_node_.VolatilePayload();
for (auto i : index_to_node_cp) {
delete RemoveNodeFromDag(i.first);
}
index_to_node_.UnlockRead();
}

template<typename D, typename O>
Expand Down
2 changes: 1 addition & 1 deletion minerva/device/device.h
Expand Up @@ -73,7 +73,7 @@ class GpuDevice : public ThreadedDevice {
std::string Name() const override;

private:
static const size_t kParallelism = 4;
static const size_t kParallelism = 1; // TODO change me
const int device_;
cudaStream_t stream_[kParallelism];
cublasHandle_t cublas_handle_[kParallelism];
Expand Down
8 changes: 5 additions & 3 deletions tests/CMakeLists.txt
Expand Up @@ -6,11 +6,13 @@ include_directories(

enable_testing()

add_library(unittest_main STATIC unittest_main.cpp)
target_link_libraries(unittest_main minerva gtest)
if (CUDA_FOUND)
CUDA_ADD_LIBRARY(unittest_main SHARED unittest_main.cpp)
CUDA_ADD_CUBLAS_TO_TARGET(unittest_main)
endif (CUDA_FOUND)
else ()
add_library(unittest_main SHARED unittest_main.cpp)
endif ()
target_link_libraries(unittest_main minerva gtest)

set(unittest_list "")
set(test_list "")
Expand Down
1 change: 1 addition & 0 deletions tests/unittest_activation.cpp
Expand Up @@ -3,6 +3,7 @@

using namespace std;
using namespace minerva;

#ifdef HAS_CUDA
TEST(Activation, GpuSigmoidForward) {
auto& ms = MinervaSystem::Instance();
Expand Down
20 changes: 10 additions & 10 deletions tests/unittest_gc.cpp
Expand Up @@ -10,7 +10,7 @@ TEST(GCCorrectness, EvalInLoop) {
for(int i = 0; i < 10; ++i) {
narr += 1;
//cout << ms.physical_dag().PrintDag() << endl;
narr.WaitForEval();
narr.Wait();
EXPECT_EQ(ms.physical_dag().NumNodes(), 1) << "wrong #physical_nodes in iter#" << i;
//EXPECT_EQ(ms.data_store().GetTotalBytes(DataStore::CPU), 320) << "wrong memory usage in iter#" << i;
cout << "iter #" << i << " succeed!" << endl;
Expand All @@ -27,7 +27,7 @@ TEST(GCCorrectness, EvalPartial) {
for(int i = 0; i < 10; ++i)
arr.push_back(a + 1);
for(size_t i = 0; i < arr.size(); ++i) {
arr[i].WaitForEval();
arr[i].Wait();
ASSERT_EQ(ms.physical_dag().NumNodes(), 20 - i);
cout << "Eval #" << i << " succeed!" << endl;
}
Expand All @@ -37,14 +37,14 @@ TEST(GCCorrectness, EvalPartial) {
TEST(GCCorrectness, ChangeInternRCAfterEval) {
MinervaSystem& ms = MinervaSystem::Instance();
NArray a = NArray::Constant({10, 8}, 0.0);
a.WaitForEval();
a.Wait();
EXPECT_EQ(ms.physical_dag().NumNodes(), 1);
//EXPECT_EQ(ms.data_store().GetTotalBytes(DataStore::CPU), 320);
NArray b = a + 1;
NArray c = a + 1;
b.WaitForEval();
b.Wait();
EXPECT_EQ(ms.physical_dag().NumNodes(), 4);
c.WaitForEval();
c.Wait();
EXPECT_EQ(ms.physical_dag().NumNodes(), 3);
}

Expand All @@ -53,10 +53,10 @@ TEST(GCCorrectness, ChangeExternRCAfterEval) {
NArray a = NArray::Constant({10, 8}, 0.0);
{
NArray b = NArray::Constant({10, 8}, 0.0);
b.WaitForEval();
b.Wait();
EXPECT_EQ(ms.physical_dag().NumNodes(), 3);
}
a.WaitForEval();
a.Wait();
EXPECT_EQ(ms.physical_dag().NumNodes(), 1);
}

Expand All @@ -65,14 +65,14 @@ TEST(GCCorrectness, ChangeBothRCAfterEval) {
NArray a, b;
{
NArray c = NArray::Constant({10, 8}, 0.0);
c.WaitForEval();
c.Wait();
a = c + 1;
b = c + 2;
}
a.WaitForEval();
a.Wait();
//cout << ms.logical_dag().PrintDag() << endl;
EXPECT_EQ(ms.physical_dag().NumNodes(), 4);
b.WaitForEval();
b.Wait();
EXPECT_EQ(ms.physical_dag().NumNodes(), 2);
// check correctness
shared_ptr<float> aptr = a.Get();
Expand Down

0 comments on commit 2e886a4

Please sign in to comment.