Permalink
Browse files

Fix multiply defined symbols.

- Fix multiply defined symbols by moving global variable definitions
  from device_wrapper.txx to device_wrapper.cpp.
- Change configure script to use ARFLAGS="cru -qoffload-build" and
  AR="xiar" when compiling library for Phi.
- Add template parameter to specify device synchronization. Fixes an
  issue, which caused linking errors when using different option than
  what was used to compile the library.
- Reorganize includes, making sure no circular dependencies exist.
  • Loading branch information...
dmalhotra committed Jul 31, 2014
1 parent 0143096 commit 8c2b816f952246d5e92c3732dbfa916943809584
Showing with 422 additions and 263 deletions.
  1. +6 −0 .gitignore
  2. +2 −0 Makefile.am
  3. +8 −2 configure.ac
  4. +6 −6 examples/Makefile
  5. +4 −1 examples/src/example1.cpp
  6. +5 −4 examples/src/fmm_cheb.cpp
  7. +7 −6 include/cheb_node.hpp
  8. +6 −4 include/cheb_node.txx
  9. +5 −4 include/cheb_utils.hpp
  10. +10 −3 include/cheb_utils.txx
  11. +8 −6 include/device_wrapper.hpp
  12. +14 −13 include/device_wrapper.txx
  13. +3 −3 include/dtypes.h
  14. +9 −4 include/fft_wrapper.hpp
  15. +9 −6 include/fmm_cheb.hpp
  16. +11 −3 include/fmm_cheb.txx
  17. +5 −2 include/fmm_gll.hpp
  18. +7 −5 include/fmm_node.hpp
  19. +5 −0 include/fmm_node.txx
  20. +15 −6 include/fmm_pts.hpp
  21. +22 −12 include/fmm_pts.txx
  22. +8 −5 include/fmm_tree.hpp
  23. +9 −1 include/fmm_tree.txx
  24. +5 −3 include/interac_list.hpp
  25. +4 −4 include/interac_list.txx
  26. +28 −38 include/kernel.hpp
  27. +24 −9 include/kernel.txx
  28. +3 −2 include/legendre_rule.hpp
  29. +2 −2 include/mat_utils.hpp
  30. +6 −4 include/mat_utils.txx
  31. +5 −5 include/matrix.hpp
  32. +8 −2 include/matrix.txx
  33. +12 −10 include/mem_mgr.hpp
  34. +2 −7 include/mem_utils.hpp
  35. +1 −2 include/mem_utils.txx
  36. +5 −5 include/mortonid.hpp
  37. +2 −0 include/mortonid.txx
  38. +6 −3 include/mpi_node.hpp
  39. +5 −3 include/mpi_node.txx
  40. +5 −4 include/mpi_tree.hpp
  41. +15 −4 include/mpi_tree.txx
  42. +3 −3 include/ompUtils.txx
  43. +4 −4 include/parUtils.h
  44. +6 −4 include/parUtils.txx
  45. +5 −3 include/precomp_mat.hpp
  46. +9 −1 include/precomp_mat.txx
  47. +4 −4 include/profile.hpp
  48. +12 −7 include/pvfmm.hpp
  49. +6 −2 include/pvfmm_common.hpp
  50. +2 −4 include/quad_utils.hpp
  51. +4 −2 include/quad_utils.txx
  52. +6 −3 include/tree.hpp
  53. +1 −4 include/tree.txx
  54. +2 −4 include/tree_node.hpp
  55. +5 −4 include/vector.hpp
  56. +4 −4 include/vector.txx
  57. +6 −6 m4/ac_check_intel_offload.m4
  58. +19 −0 src/device_wrapper.cpp
  59. +2 −1 src/fmm_gll.cpp
View
@@ -22,3 +22,9 @@
/src/*.o
/stamp-h1
m4/libtool.m4
m4/ltoptions.m4
m4/ltsugar.m4
m4/ltversion.m4
m4/lt~obsolete.m4
libtool
View
@@ -100,6 +100,7 @@ lib_libfmm_a_HEADERS = \
# the sources to add to the library and to add to the source distribution
lib_libpvfmm_a_SOURCES = \
$(lib_libpvfmm_a_HEADERS) \
src/device_wrapper.cpp \
src/fmm_gll.cpp \
src/legendre_rule.cpp \
src/mortonid.cpp \
@@ -216,6 +217,7 @@ clean-local: clean-doxygen
cd $(EX_DIR) && $(MAKE) clean;
$(RM) -r $(RESULT_DIR)/*
$(RM) *~ */*~ */*/*~
$(RM) ./lib/*
#------------------------------------------------------------------------------
View
@@ -25,8 +25,14 @@ AC_PROG_INSTALL
AC_PROG_MKDIR_P
AC_PROG_LN_S
AC_PROG_MAKE_SET
AC_PROG_RANLIB
#AM_PROG_AR
#AC_PROG_RANLIB
##AM_PROG_AR
# automake 1.12 seems to require AM_PROG_AR, but automake 1.11 doesn't
# recognize it
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
LT_INIT
AC_PROG_LIBTOOL
# Check for CUDA
AX_CHECK_CUDA
View
@@ -27,15 +27,15 @@ ifeq ($(INTEL_OFFLOAD_OK),yes)
$(BINDIR)/%: $(OBJDIR)/%.o
-@$(MKDIRS) $(dir $@)
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -no-offload $^ $(LDFLAGS_PVFMM) -o $@
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) $^_mic $(LDFLAGS_PVFMM) -o $@_mic
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -D__MIC_ASYNCH__ $^_async $(LDFLAGS_PVFMM) -o $@_async
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -no-offload $^ $(LDFLAGS_PVFMM) -o $@
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) $^_async $(LDFLAGS_PVFMM) -o $@_async
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -D__DEVICE_SYNC__=1 $^_mic $(LDFLAGS_PVFMM) -o $@_mic
$(OBJDIR)/%.o: $(SRCDIR)/%.cpp
-@$(MKDIRS) $(dir $@)
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -no-offload -I$(INCDIR) -c $^ -o $@
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -I$(INCDIR) -c $^ -o $@_mic
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -D__MIC_ASYNCH__ -I$(INCDIR) -c $^ -o $@_async
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -no-offload -I$(INCDIR) -c $^ -o $@
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -I$(INCDIR) -c $^ -o $@_async
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -D__DEVICE_SYNC__=1 -I$(INCDIR) -c $^ -o $@_mic
else
@@ -76,12 +76,15 @@ void fmm_test(size_t N, int mult_order, MPI_Comm comm){
for(size_t i=0;i< src_value.size();i++) src_value[i]=drand48();
for(size_t i=0;i<surf_value.size();i++) surf_value[i]=drand48();
// Create memory-manager (optional)
pvfmm::mem::MemoryManager mem_mgr(10000000);
// Construct tree.
size_t max_pts=300;
pvfmm::PtFMM_Tree* tree=PtFMM_CreateTree(src_coord, src_value, surf_coord, surf_value, trg_coord, comm, max_pts, pvfmm::FreeSpace);
// Load matrices.
pvfmm::PtFMM matrices;
pvfmm::PtFMM matrices(&mem_mgr);
matrices.Initialize(mult_order, comm, &kernel_fn, &kernel_fn_aux);
// FMM Setup
@@ -9,6 +9,7 @@
#include <fmm_cheb.hpp>
#include <fmm_node.hpp>
#include <fmm_tree.hpp>
#include <cheb_node.hpp>
#include <utils.hpp>
//////////////////////////////////////////////////////////////////////////////
@@ -229,16 +230,16 @@ void fmm_test(int test_case, size_t N, size_t M, bool unif, int mult_order, int
fn_input_=fn_input_t1<Real_t>;
fn_poten_=fn_poten_t1<Real_t>;
fn_grad_ =fn_grad_t1<Real_t>;
mykernel =pvfmm::LaplaceKernel<Real_t>::potn_ker;
//mykernel_grad=pvfmm::LaplaceKernel<Real_t>::grad_ker;
mykernel =&pvfmm::LaplaceKernel<Real_t>::potn_ker();
//mykernel_grad=&pvfmm::LaplaceKernel<Real_t>::grad_ker();
bndry=pvfmm::Periodic;
break;
case 2:
fn_input_=fn_input_t2<Real_t>;
fn_poten_=fn_poten_t2<Real_t>;
fn_grad_ =fn_grad_t2<Real_t>;
mykernel =pvfmm::LaplaceKernel<Real_t>::potn_ker;
//mykernel_grad=pvfmm::LaplaceKernel<Real_t>::grad_ker;
mykernel =&pvfmm::LaplaceKernel<Real_t>::potn_ker();
//mykernel_grad=&pvfmm::LaplaceKernel<Real_t>::grad_ker();
bndry=pvfmm::FreeSpace;
break;
case 3:
View
@@ -5,15 +5,16 @@
* \brief This is a derived cheb class of MPI_Node.
*/
#ifndef _PVFMM_CHEB_NODE_HPP_
#define _PVFMM_CHEB_NODE_HPP_
#include <vector>
#include <stdint.h>
#include <pvfmm_common.hpp>
#include <assert.h>
#include <vector.hpp>
#include <tree_node.hpp>
#include <mpi_node.hpp>
#include <mortonid.hpp>
#include <cheb_utils.hpp>
#include <vector.hpp>
#ifndef _PVFMM_CHEB_NODE_HPP_
#define _PVFMM_CHEB_NODE_HPP_
namespace pvfmm{
View
@@ -1,14 +1,16 @@
/**
* \file cheb_node.cpp
* \file cheb_node.txx
* \author Dhairya Malhotra, dhairya.malhotra@gmail.com
* \date 1-22-2010
* \brief This file contains the implementation of the class Cheb_Node.
*/
#include <iostream>
#include <matrix.hpp>
#include <omp.h>
#include <cmath>
#include <cassert>
#include <algorithm>
#include <cheb_utils.hpp>
#include <matrix.hpp>
namespace pvfmm{
View
@@ -5,13 +5,14 @@
* \brief This file contains chebyshev related functions.
*/
#ifndef _PVFMM_CHEB_UTILS_HPP_
#define _PVFMM_CHEB_UTILS_HPP_
#include <vector>
#include <pvfmm_common.hpp>
#include <vector>
#include <kernel.hpp>
#include <vector.hpp>
#include <kernel.hpp>
#ifndef _PVFMM_CHEB_UTILS_HPP_
#define _PVFMM_CHEB_UTILS_HPP_
namespace pvfmm{
View
@@ -5,11 +5,18 @@
* \brief This file contains chebyshev related functions.
*/
#include <assert.h>
#include <omp.h>
#include <cmath>
#include <cassert>
#include <iostream>
#include <algorithm>
#include <matrix.hpp>
#include <mem_mgr.hpp>
#include <legendre_rule.hpp>
#include <mem_utils.hpp>
#include <mat_utils.hpp>
#include <mem_mgr.hpp>
#include <matrix.hpp>
#include <profile.hpp>
namespace pvfmm{
View
@@ -5,13 +5,13 @@
* \brief This file contains definition of DeviceWrapper.
*/
#ifndef _PVFMM_DEVICE_WRAPPER_HPP_
#define _PVFMM_DEVICE_WRAPPER_HPP_
#include <cstdlib>
#include <cassert>
#include <stdint.h>
#include <pvfmm_common.hpp>
#include <vector.hpp>
#ifndef _PVFMM_DEVICE_WRAPPER_HPP_
#define _PVFMM_DEVICE_WRAPPER_HPP_
#ifdef __INTEL_OFFLOAD
#pragma offload_attribute(push,target(mic))
@@ -24,8 +24,10 @@ namespace DeviceWrapper{
void free_device(char* dev_handle, uintptr_t dev_ptr);
template <int SYNC=__DEVICE_SYNC__>
int host2device(char* host_ptr, char* dev_handle, uintptr_t dev_ptr, size_t len);
template <int SYNC=__DEVICE_SYNC__>
int device2host(char* dev_handle, uintptr_t dev_ptr, char* host_ptr, size_t len);
void wait(int lock_idx);
@@ -51,7 +53,7 @@ Note: Any MIC offload section should look like this:
MIC_Lock::release_lock(lock_idx);
}
#ifdef __MIC_ASYNCH__
#ifdef __DEVICE_SYNC__
MIC_Lock::wait_lock(lock_idx);
#endif
View
@@ -5,15 +5,20 @@
* \brief This file contains implementation of DeviceWrapper.
*/
#include <vector.hpp>
#include <device_wrapper.hpp>
#include <omp.h>
#include <cassert>
#include <cstdlib>
namespace pvfmm{
namespace DeviceWrapper{
// MIC functions
#define ALLOC alloc_if(1) free_if(0)
#define FREE alloc_if(0) free_if(1)
#define REUSE alloc_if(0) free_if(0)
inline uintptr_t alloc_device_mic(char* dev_handle, size_t len){
assert(dev_handle!=NULL);
uintptr_t dev_ptr=(uintptr_t)NULL;
@@ -52,10 +57,6 @@ namespace DeviceWrapper{
MIC_Lock::release_lock(lock_idx);
}
}
#ifndef __MIC_ASYNCH__ // Wait
#pragma offload target(mic:0)
{MIC_Lock::wait_lock(lock_idx);}
#endif
return lock_idx;
#endif
return -1;
@@ -80,9 +81,6 @@ namespace DeviceWrapper{
MIC_Lock::release_lock(lock_idx);
}
}
#ifndef __MIC_ASYNCH__ // Wait
MIC_Lock::wait_lock(lock_idx);
#endif
return lock_idx;
#endif
return -1;
@@ -116,20 +114,27 @@ namespace DeviceWrapper{
#endif
}
template <int SYNC=__DEVICE_SYNC__>
inline int host2device(char* host_ptr, char* dev_handle, uintptr_t dev_ptr, size_t len){
int lock_idx=-1;
#ifdef __INTEL_OFFLOAD
lock_idx=host2device_mic(host_ptr,dev_handle,dev_ptr,len);
if(SYNC){
#pragma offload target(mic:0)
{MIC_Lock::wait_lock(lock_idx);}
}
#else
;
#endif
return lock_idx;
}
template <int SYNC=__DEVICE_SYNC__>
inline int device2host(char* dev_handle, uintptr_t dev_ptr, char* host_ptr, size_t len){
int lock_idx=-1;
#ifdef __INTEL_OFFLOAD
lock_idx=device2host_mic(dev_handle,dev_ptr, host_ptr, len);
if(SYNC) MIC_Lock::wait_lock(lock_idx);
#else
;
#endif
@@ -230,8 +235,4 @@ namespace DeviceWrapper{
#endif
}
Vector<char> MIC_Lock::lock_vec;
Vector<char>::Device MIC_Lock::lock_vec_;
int MIC_Lock::lock_idx;
}//end namespace
View
@@ -1,9 +1,9 @@
#ifndef __PVFMM_DTYPES_H_
#define __PVFMM_DTYPES_H_
#include <mpi.h>
#include <complex>
#ifndef __PVFMM_DTYPES_H_
#define __PVFMM_DTYPES_H_
/**
* \file dtypes.h
* \brief Traits to determine MPI_DATATYPE from a C++ datatype
View
@@ -1,20 +1,25 @@
/**
* \file mat_utils.hpp
* \file fft_wrapper.hpp
* \author Dhairya Malhotra, dhairya.malhotra@gmail.com
* \date 2-11-2011
* \brief This file contains FFTW3 wrapper functions.
*/
#ifndef _PVFMM_FFT_WRAPPER_
#define _PVFMM_FFT_WRAPPER_
#include <cmath>
#include <cassert>
#include <vector>
#include <fftw3.h>
#ifdef FFTW3_MKL
#include <fftw3_mkl.h>
#endif
#include <pvfmm_common.hpp>
#include <mem_utils.hpp>
#include <matrix.hpp>
#ifndef _PVFMM_FFT_WRAPPER_
#define _PVFMM_FFT_WRAPPER_
namespace pvfmm{
template<class T>
View
@@ -6,16 +6,19 @@
* This handles all the translations through matrix multiplications.
*/
#ifndef _PVFMM_FMM_CHEB_HPP_
#define _PVFMM_FMM_CHEB_HPP_
#include <mpi.h>
#include <vector>
#include <pvfmm_common.hpp>
#include <mpi.h>
#include <matrix.hpp>
#include <precomp_mat.hpp>
#include <cheb_utils.hpp>
#include <cheb_node.hpp>
#include <mem_mgr.hpp>
#include <fmm_pts.hpp>
#include <vector.hpp>
#include <matrix.hpp>
#include <kernel.hpp>
#ifndef _PVFMM_FMM_CHEB_HPP_
#define _PVFMM_FMM_CHEB_HPP_
namespace pvfmm{
Oops, something went wrong.

0 comments on commit 8c2b816

Please sign in to comment.