Skip to content

Commit

Permalink
Merge pull request #45 from bcumming/gpu
Browse files Browse the repository at this point in the history
Part 1 of the big GPU merge
  • Loading branch information
Sam Yates committed Nov 14, 2016
2 parents 0e4970d + 6cf11d3 commit 1bc18ea
Show file tree
Hide file tree
Showing 213 changed files with 6,391 additions and 37,620 deletions.
15 changes: 3 additions & 12 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,9 @@ Makefile
# mechanism implementations generated my modparser
include/mechanisms

# external build stuff
external/bin
external/modparser-build
external/modparser-configure
external/modparser-done
external/modparser-download
external/modparser-install
external/modparser-mkdir
external/modparser-patch
external/modparser-update
external/tmp
mechanisms/*.hpp
# mechanisms generated from .mod files
mechanisms/multicore/*.hpp
mechanisms/gpu/*.hpp

# build path
build*
Expand Down
Empty file removed .gitmodules
Empty file.
16 changes: 6 additions & 10 deletions .ycm_extra_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,13 @@
'external',
'-I',
'miniapp',
# '-isystem',
# '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.10.sdk/usr/include/c++/4.2.1',
# '-I',
# '/usr/include/c++/4.9.2',
# '-isystem',
# '/usr/lib/gcc/x86_64-unknown-linux-gnu/4.9.2/include'
# '-isystem',
# '/usr/local/include',
'-I',
'modcc',
'-I',
'/cm/shared/apps/cuda/8.0.44/include',
'-DWITH_CUDA'
]



# Set this to the absolute path to the folder (NOT the file!) containing the
# compile_commands.json file to use that instead of 'flags'. See here for
# more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
Expand Down
61 changes: 54 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,61 @@ if(WITH_TRACE)
add_definitions("-DWITH_TRACE")
endif()

# TBB support
set(WITH_TBB OFF CACHE BOOL "use TBB for on-node threading" )
if(WITH_TBB)
# list of libraries to be linked against targets
set(EXTERNAL_LIBRARIES "")

#threading model selection
set(THREADING_MODEL "serial" CACHE STRING "set the threading model, one of serial/tbb/omp")
if(THREADING_MODEL MATCHES "tbb")
# TBB support
find_package(TBB REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TBB_DEFINITIONS}")
add_definitions(-DWITH_TBB)
list(APPEND EXTERNAL_LIBRARIES ${TBB_LIBRARIES})

elseif(THREADING_MODEL MATCHES "omp")
# OpenMP support
find_package(OpenMP REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
add_definitions(-DWITH_OMP)

elseif(THREADING_MODEL MATCHES "serial")
#setup previously done

else()
message( FATAL_ERROR "-- Threading model '${THREADING_MODEL}' not supported, use one of serial/tbb/omp")

endif()

# libunwind for pretty printing stack traces
find_package(Unwind)
if(UNWIND_FOUND)
add_definitions(-DWITH_UNWIND)
include_directories(${UNWIND_INCLUDE_DIR})
list(APPEND EXTERNAL_LIBRARIES ${UNWIND_LIBRARIES})
endif()

# CUDA support
set(WITH_CUDA OFF CACHE BOOL "use CUDA for GPU offload" )
if(WITH_CUDA)
find_package(CUDA REQUIRED)

# Turn off annoying and incorrect warnings generated in the JSON file.
# We also work around the same issue with the intel compiler.
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-Xcudafe \"--diag_suppress=not_used_in_template_function_params\";-Xcudafe \"--diag_suppress=cast_to_qualified_type\")

# set the CUDA target specfic flags
# code regions protected by WITH_CUDA should only be available to the CUDA
# compiler, which regions protected by WITH_GPU are visible to both host
# and device compiler when targetting GPU.
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DWITH_CUDA)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DWITH_GPU)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_35)
#set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-arch=sm_60)

add_definitions(-DWITH_GPU)
include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
list(APPEND EXTERNAL_LIBRARIES ${CUDA_LIBRARIES})
endif()

# MPI support
Expand All @@ -50,7 +99,7 @@ if(WITH_MPI)
set_property(DIRECTORY APPEND_STRING PROPERTY COMPILE_OPTIONS "${MPI_C_COMPILE_FLAGS}")
endif()

# Profiler support
# Internal profiler support
set(WITH_PROFILING OFF CACHE BOOL "use built-in profiling of miniapp" )
if(WITH_PROFILING)
add_definitions(-DWITH_PROFILING)
Expand Down Expand Up @@ -115,9 +164,7 @@ else()
set(BUILD_NRN_VALIDATION_DATA TRUE)
endif()


include_directories(${CMAKE_SOURCE_DIR}/tclap/include)
include_directories(${CMAKE_SOURCE_DIR}/vector)
include_directories(${CMAKE_SOURCE_DIR}/tclap)
include_directories(${CMAKE_SOURCE_DIR}/include)
include_directories(${CMAKE_SOURCE_DIR}/src)
include_directories(${CMAKE_SOURCE_DIR}/miniapp)
Expand Down
48 changes: 48 additions & 0 deletions cmake/FindUnwind.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Find the libunwind library
#
# UNWIND_FOUND - True if libunwind was found
# UNWIND_LIBRARIES - The libraries needed to use libunwind
# UNWIND_INCLUDE_DIR - Location of unwind.h and libunwind.h
#
# The environment and cmake variables UNWIND_ROOT and UNWIND_ROOT_DIR
# respectively can be used to help CMake finding the library if it
# is not installed in any of the usual locations.

if(NOT UNWIND_FOUND)
set(UNWIND_SEARCH_DIR ${UNWIND_ROOT_DIR} $ENV{UNWIND_ROOT})

find_path(UNWIND_INCLUDE_DIR libunwind.h
HINTS ${UNWIND_SEARCH_DIR}
PATH_SUFFIXES include
)

# libunwind requires that we link agains both libunwind.so/a and a
# a target-specific library libunwind-target.so/a.
# This code sets the "target" string above in libunwind_arch.
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm")
set(libunwind_arch "arm")
elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
set(libunwind_arch "x86_64")
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$")
set(libunwind_arch "x86")
endif()

find_library(unwind_library_generic unwind
HINTS ${UNWIND_SEARCH_DIR}
PATH_SUFFIXES lib64 lib
)

find_library(unwind_library_target unwind-${libunwind_arch}
HINTS ${UNWIND_SEARCH_DIR}
PATH_SUFFIXES lib64 lib
)

set(UNWIND_LIBRARIES ${unwind_library_generic} ${unwind_library_target})

mark_as_advanced(UNWIND_LIBRARIES UNWIND_INCLUDE_DIR)

unset(unwind_search_dir)
unset(unwind_library_generic)
unset(unwind_library_target)
unset(libunwind_arch)
endif()
2 changes: 1 addition & 1 deletion data/test.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ NEURON {
}

STATE {
h
h (nA)
m r
}

Expand Down
38 changes: 37 additions & 1 deletion mechanisms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@ set(mechanisms pas hh expsyn exp2syn)
# set the flags for the modcc compiler that converts NMODL
# files to C++/CUDA source.
set(modcc_flags "-t cpu")

if(USE_OPTIMIZED_KERNELS) # generate optimized kernels
set(modcc_flags ${modcc_flags} -O)
endif()

# generate source for each mechanism
foreach(mech ${mechanisms})
set(mod "${CMAKE_CURRENT_SOURCE_DIR}/mod/${mech}.mod")
set(hpp "${CMAKE_CURRENT_SOURCE_DIR}/${mech}.hpp")
set(hpp "${CMAKE_CURRENT_SOURCE_DIR}/multicore/${mech}.hpp")
if(use_external_modcc)
add_custom_command(
OUTPUT "${hpp}"
Expand All @@ -33,3 +34,38 @@ endforeach()
# Fake target to always trigger .mod -> .hpp dependencies because wtf CMake
add_custom_target(build_all_mods DEPENDS ${all_mod_hpps} modcc)

# oh sweet jesus, CMake is a dog's breakfast.
# that said, let'g go through the same dance to generate CUDA kernels if
# we are targetting the GPU.
if(WITH_CUDA)
set(modcc_flags "-t gpu")

if(USE_OPTIMIZED_KERNELS)
set(modcc_flags ${modcc_flags} -O)
endif()

# generate source for each mechanism
foreach(mech ${mechanisms})
set(mod "${CMAKE_CURRENT_SOURCE_DIR}/mod/${mech}.mod")
set(hpp "${CMAKE_CURRENT_SOURCE_DIR}/gpu/${mech}.hpp")
if(use_external_modcc)
add_custom_command(
OUTPUT "${hpp}"
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
COMMAND ${modcc} ${modcc_flags} ${mod} -o ${hpp}
)
else()
add_custom_command(
OUTPUT "${hpp}"
DEPENDS modparser "${mod}"
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
COMMAND ${modcc} ${modcc_flags} ${mod} -o ${hpp}
)
endif()
set_source_files_properties("${hpp}" PROPERTIES GENERATED TRUE)
list(APPEND all_gpu_mod_hpps "${hpp}")
endforeach()

# Fake target to always trigger .mod -> .hpp dependencies because wtf CMake
add_custom_target(build_all_gpu_mods DEPENDS ${all_gpu_mod_hpps} modcc)
endif()
10 changes: 0 additions & 10 deletions mechanisms/generate.sh

This file was deleted.

18 changes: 14 additions & 4 deletions miniapp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
set(HEADERS
)
set(MINIAPP_SOURCES
io.cpp
miniapp.cpp
io.cpp
miniapp_recipes.cpp
)
set(MINIAPP_SOURCES_CUDA
miniapp.cu
io.cpp
miniapp_recipes.cpp
)

add_executable(miniapp.exe ${MINIAPP_SOURCES} ${HEADERS})
if(WITH_CUDA)
cuda_add_executable(miniapp.exe ${MINIAPP_SOURCES_CUDA} ${HEADERS})
target_link_libraries(miniapp.exe LINK_PUBLIC gpu)
else()
add_executable(miniapp.exe ${MINIAPP_SOURCES} ${HEADERS})
endif()

target_link_libraries(miniapp.exe LINK_PUBLIC cellalgo)
target_link_libraries(miniapp.exe LINK_PUBLIC ${TBB_LIBRARIES})
target_link_libraries(miniapp.exe LINK_PUBLIC nestmc)
target_link_libraries(miniapp.exe LINK_PUBLIC ${EXTERNAL_LIBRARIES})

if(WITH_MPI)
target_link_libraries(miniapp.exe LINK_PUBLIC ${MPI_C_LIBRARIES})
Expand Down
28 changes: 18 additions & 10 deletions miniapp/miniapp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@

#include <json/json.hpp>

#include <backends/fvm.hpp>
#include <common_types.hpp>
#include <cell.hpp>
#include <communication/communicator.hpp>
#include <communication/global_policy.hpp>
#include <cell.hpp>
#include <fvm_multicell.hpp>
#include <io/exporter_spike_file.hpp>
#include <mechanism_catalogue.hpp>
#include <model.hpp>
#include <profiling/profiler.hpp>
#include <threading/threading.hpp>
#include <util/debug.hpp>
#include <util/ioutil.hpp>
#include <util/nop.hpp>
#include <util/optional.hpp>
Expand All @@ -28,17 +29,19 @@
using namespace nest::mc;

using global_policy = communication::global_policy;
using lowered_cell = fvm::fvm_multicell<double, cell_local_size_type>;
//using lowered_cell = fvm::fvm_cell<double, cell_local_size_type>;
#ifdef WITH_CUDA
using lowered_cell = fvm::fvm_multicell<gpu::backend>;
#else
using lowered_cell = fvm::fvm_multicell<multicore::backend>;
#endif
using model_type = model<lowered_cell>;
using time_type = model_type::time_type;
using sample_trace_type = sample_trace<time_type, model_type::value_type>;
using file_export_type = io::exporter_spike_file<time_type, global_policy>;
using sample_trace_type = sample_trace<model_type::time_type, model_type::value_type>;
using file_export_type = io::exporter_spike_file<model_type::time_type, global_policy>;
void banner();
std::unique_ptr<recipe> make_recipe(const io::cl_options&, const probe_distribution&);
std::unique_ptr<sample_trace_type> make_trace(cell_member_type probe_id, probe_spec probe);
std::pair<cell_gid_type, cell_gid_type> distribute_cells(cell_size_type ncells);
using communicator_type = communication::communicator<time_type, communication::global_policy>;
using communicator_type = communication::communicator<model_type::time_type, communication::global_policy>;
using spike_type = typename communicator_type::spike_type;

void write_trace_json(const sample_trace_type& trace, const std::string& prefix = "trace_");
Expand Down Expand Up @@ -84,7 +87,7 @@ int main(int argc, char** argv) {
options.file_extension, options.over_write);
};

// File output is depending on the input arguments
// File output depends on the input arguments
std::unique_ptr<file_export_type> file_exporter;
if (options.spike_file_output) {
if (options.single_file_per_rank) {
Expand Down Expand Up @@ -128,7 +131,7 @@ int main(int argc, char** argv) {

// reset the model
m.reset();
// rest the source spikes
// reset the source spikes
for (auto source : local_sources) {
m.add_artificial_spike({source, 0});
}
Expand Down Expand Up @@ -178,6 +181,11 @@ void banner() {
std::cout << " starting miniapp\n";
std::cout << " - " << threading::description() << " threading support\n";
std::cout << " - communication policy: " << global_policy::name() << "\n";
#ifdef WITH_CUDA
std::cout << " - gpu support: on\n";
#else
std::cout << " - gpu support: off\n";
#endif
std::cout << "====================\n";
}

Expand Down
1 change: 1 addition & 0 deletions miniapp/miniapp.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#include "miniapp.cpp"
Loading

0 comments on commit 1bc18ea

Please sign in to comment.