Skip to content

Commit

Permalink
Fix multiply defined symbols.
Browse files Browse the repository at this point in the history
- Fix multiply defined symbols by moving global variable definitions
  from device_wrapper.txx to device_wrapper.cpp.
- Change configure script to use ARFLAGS="cru -qoffload-build" and
  AR="xiar" when compiling library for Phi.
- Add template parameter to specify device synchronization. Fixes an
  issue, which caused linking errors when using different option than
  what was used to compile the library.
- Reorganize includes, making sure no circular dependencies exist.
  • Loading branch information
dmalhotra committed Jul 31, 2014
1 parent 0143096 commit 8c2b816
Show file tree
Hide file tree
Showing 59 changed files with 422 additions and 263 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Expand Up @@ -22,3 +22,9 @@
/src/*.o
/stamp-h1

m4/libtool.m4
m4/ltoptions.m4
m4/ltsugar.m4
m4/ltversion.m4
m4/lt~obsolete.m4
libtool
2 changes: 2 additions & 0 deletions Makefile.am
Expand Up @@ -100,6 +100,7 @@ lib_libfmm_a_HEADERS = \
# the sources to add to the library and to add to the source distribution
lib_libpvfmm_a_SOURCES = \
$(lib_libpvfmm_a_HEADERS) \
src/device_wrapper.cpp \
src/fmm_gll.cpp \
src/legendre_rule.cpp \
src/mortonid.cpp \
Expand Down Expand Up @@ -216,6 +217,7 @@ clean-local: clean-doxygen
cd $(EX_DIR) && $(MAKE) clean;
$(RM) -r $(RESULT_DIR)/*
$(RM) *~ */*~ */*/*~
$(RM) ./lib/*

#------------------------------------------------------------------------------

10 changes: 8 additions & 2 deletions configure.ac
Expand Up @@ -25,8 +25,14 @@ AC_PROG_INSTALL
AC_PROG_MKDIR_P
AC_PROG_LN_S
AC_PROG_MAKE_SET
AC_PROG_RANLIB
#AM_PROG_AR
#AC_PROG_RANLIB
##AM_PROG_AR

# automake 1.12 seems to require AM_PROG_AR, but automake 1.11 doesn't
# recognize it
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
LT_INIT
AC_PROG_LIBTOOL

# Check for CUDA
AX_CHECK_CUDA
Expand Down
12 changes: 6 additions & 6 deletions examples/Makefile
Expand Up @@ -27,15 +27,15 @@ ifeq ($(INTEL_OFFLOAD_OK),yes)

$(BINDIR)/%: $(OBJDIR)/%.o
-@$(MKDIRS) $(dir $@)
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -no-offload $^ $(LDFLAGS_PVFMM) -o $@
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) $^_mic $(LDFLAGS_PVFMM) -o $@_mic
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -D__MIC_ASYNCH__ $^_async $(LDFLAGS_PVFMM) -o $@_async
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -no-offload $^ $(LDFLAGS_PVFMM) -o $@
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) $^_async $(LDFLAGS_PVFMM) -o $@_async
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -D__DEVICE_SYNC__=1 $^_mic $(LDFLAGS_PVFMM) -o $@_mic

$(OBJDIR)/%.o: $(SRCDIR)/%.cpp
-@$(MKDIRS) $(dir $@)
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -no-offload -I$(INCDIR) -c $^ -o $@
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -I$(INCDIR) -c $^ -o $@_mic
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -D__MIC_ASYNCH__ -I$(INCDIR) -c $^ -o $@_async
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -no-offload -I$(INCDIR) -c $^ -o $@
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -I$(INCDIR) -c $^ -o $@_async
$(CXX_PVFMM) $(CXXFLAGS_PVFMM) -D__DEVICE_SYNC__=1 -I$(INCDIR) -c $^ -o $@_mic

else

Expand Down
5 changes: 4 additions & 1 deletion examples/src/example1.cpp
Expand Up @@ -76,12 +76,15 @@ void fmm_test(size_t N, int mult_order, MPI_Comm comm){
for(size_t i=0;i< src_value.size();i++) src_value[i]=drand48();
for(size_t i=0;i<surf_value.size();i++) surf_value[i]=drand48();

// Create memory-manager (optional)
pvfmm::mem::MemoryManager mem_mgr(10000000);

// Construct tree.
size_t max_pts=300;
pvfmm::PtFMM_Tree* tree=PtFMM_CreateTree(src_coord, src_value, surf_coord, surf_value, trg_coord, comm, max_pts, pvfmm::FreeSpace);

// Load matrices.
pvfmm::PtFMM matrices;
pvfmm::PtFMM matrices(&mem_mgr);
matrices.Initialize(mult_order, comm, &kernel_fn, &kernel_fn_aux);

// FMM Setup
Expand Down
9 changes: 5 additions & 4 deletions examples/src/fmm_cheb.cpp
Expand Up @@ -9,6 +9,7 @@
#include <fmm_cheb.hpp>
#include <fmm_node.hpp>
#include <fmm_tree.hpp>
#include <cheb_node.hpp>
#include <utils.hpp>

//////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -229,16 +230,16 @@ void fmm_test(int test_case, size_t N, size_t M, bool unif, int mult_order, int
fn_input_=fn_input_t1<Real_t>;
fn_poten_=fn_poten_t1<Real_t>;
fn_grad_ =fn_grad_t1<Real_t>;
mykernel =pvfmm::LaplaceKernel<Real_t>::potn_ker;
//mykernel_grad=pvfmm::LaplaceKernel<Real_t>::grad_ker;
mykernel =&pvfmm::LaplaceKernel<Real_t>::potn_ker();
//mykernel_grad=&pvfmm::LaplaceKernel<Real_t>::grad_ker();
bndry=pvfmm::Periodic;
break;
case 2:
fn_input_=fn_input_t2<Real_t>;
fn_poten_=fn_poten_t2<Real_t>;
fn_grad_ =fn_grad_t2<Real_t>;
mykernel =pvfmm::LaplaceKernel<Real_t>::potn_ker;
//mykernel_grad=pvfmm::LaplaceKernel<Real_t>::grad_ker;
mykernel =&pvfmm::LaplaceKernel<Real_t>::potn_ker();
//mykernel_grad=&pvfmm::LaplaceKernel<Real_t>::grad_ker();
bndry=pvfmm::FreeSpace;
break;
case 3:
Expand Down
13 changes: 7 additions & 6 deletions include/cheb_node.hpp
Expand Up @@ -5,15 +5,16 @@
* \brief This is a derived cheb class of MPI_Node.
*/

#ifndef _PVFMM_CHEB_NODE_HPP_
#define _PVFMM_CHEB_NODE_HPP_
#include <vector>
#include <stdint.h>

#include <pvfmm_common.hpp>
#include <assert.h>
#include <vector.hpp>
#include <tree_node.hpp>
#include <mpi_node.hpp>
#include <mortonid.hpp>
#include <cheb_utils.hpp>
#include <vector.hpp>

#ifndef _PVFMM_CHEB_NODE_HPP_
#define _PVFMM_CHEB_NODE_HPP_

namespace pvfmm{

Expand Down
10 changes: 6 additions & 4 deletions include/cheb_node.txx
@@ -1,14 +1,16 @@
/**
* \file cheb_node.cpp
* \file cheb_node.txx
* \author Dhairya Malhotra, dhairya.malhotra@gmail.com
* \date 1-22-2010
* \brief This file contains the implementation of the class Cheb_Node.
*/

#include <iostream>
#include <matrix.hpp>
#include <omp.h>
#include <cmath>
#include <cassert>
#include <algorithm>

#include <cheb_utils.hpp>
#include <matrix.hpp>

namespace pvfmm{

Expand Down
9 changes: 5 additions & 4 deletions include/cheb_utils.hpp
Expand Up @@ -5,13 +5,14 @@
* \brief This file contains chebyshev related functions.
*/

#ifndef _PVFMM_CHEB_UTILS_HPP_
#define _PVFMM_CHEB_UTILS_HPP_
#include <vector>

#include <pvfmm_common.hpp>
#include <vector>
#include <kernel.hpp>
#include <vector.hpp>
#include <kernel.hpp>

#ifndef _PVFMM_CHEB_UTILS_HPP_
#define _PVFMM_CHEB_UTILS_HPP_

namespace pvfmm{

Expand Down
13 changes: 10 additions & 3 deletions include/cheb_utils.txx
Expand Up @@ -5,11 +5,18 @@
* \brief This file contains chebyshev related functions.
*/

#include <assert.h>
#include <omp.h>
#include <cmath>
#include <cassert>
#include <iostream>
#include <algorithm>
#include <matrix.hpp>
#include <mem_mgr.hpp>

#include <legendre_rule.hpp>
#include <mem_utils.hpp>
#include <mat_utils.hpp>
#include <mem_mgr.hpp>
#include <matrix.hpp>
#include <profile.hpp>

namespace pvfmm{

Expand Down
14 changes: 8 additions & 6 deletions include/device_wrapper.hpp
Expand Up @@ -5,13 +5,13 @@
* \brief This file contains definition of DeviceWrapper.
*/

#ifndef _PVFMM_DEVICE_WRAPPER_HPP_
#define _PVFMM_DEVICE_WRAPPER_HPP_

#include <cstdlib>
#include <cassert>
#include <stdint.h>

#include <pvfmm_common.hpp>
#include <vector.hpp>

#ifndef _PVFMM_DEVICE_WRAPPER_HPP_
#define _PVFMM_DEVICE_WRAPPER_HPP_

#ifdef __INTEL_OFFLOAD
#pragma offload_attribute(push,target(mic))
Expand All @@ -24,8 +24,10 @@ namespace DeviceWrapper{

void free_device(char* dev_handle, uintptr_t dev_ptr);

template <int SYNC=__DEVICE_SYNC__>
int host2device(char* host_ptr, char* dev_handle, uintptr_t dev_ptr, size_t len);

template <int SYNC=__DEVICE_SYNC__>
int device2host(char* dev_handle, uintptr_t dev_ptr, char* host_ptr, size_t len);

void wait(int lock_idx);
Expand All @@ -51,7 +53,7 @@ Note: Any MIC offload section should look like this:
MIC_Lock::release_lock(lock_idx);
}
#ifdef __MIC_ASYNCH__
#ifdef __DEVICE_SYNC__
MIC_Lock::wait_lock(lock_idx);
#endif
Expand Down
27 changes: 14 additions & 13 deletions include/device_wrapper.txx
Expand Up @@ -5,15 +5,20 @@
* \brief This file contains implementation of DeviceWrapper.
*/

#include <vector.hpp>
#include <device_wrapper.hpp>
#include <omp.h>
#include <cassert>
#include <cstdlib>

namespace pvfmm{

namespace DeviceWrapper{

// MIC functions

#define ALLOC alloc_if(1) free_if(0)
#define FREE alloc_if(0) free_if(1)
#define REUSE alloc_if(0) free_if(0)

inline uintptr_t alloc_device_mic(char* dev_handle, size_t len){
assert(dev_handle!=NULL);
uintptr_t dev_ptr=(uintptr_t)NULL;
Expand Down Expand Up @@ -52,10 +57,6 @@ namespace DeviceWrapper{
MIC_Lock::release_lock(lock_idx);
}
}
#ifndef __MIC_ASYNCH__ // Wait
#pragma offload target(mic:0)
{MIC_Lock::wait_lock(lock_idx);}
#endif
return lock_idx;
#endif
return -1;
Expand All @@ -80,9 +81,6 @@ namespace DeviceWrapper{
MIC_Lock::release_lock(lock_idx);
}
}
#ifndef __MIC_ASYNCH__ // Wait
MIC_Lock::wait_lock(lock_idx);
#endif
return lock_idx;
#endif
return -1;
Expand Down Expand Up @@ -116,20 +114,27 @@ namespace DeviceWrapper{
#endif
}

template <int SYNC=__DEVICE_SYNC__>
inline int host2device(char* host_ptr, char* dev_handle, uintptr_t dev_ptr, size_t len){
int lock_idx=-1;
#ifdef __INTEL_OFFLOAD
lock_idx=host2device_mic(host_ptr,dev_handle,dev_ptr,len);
if(SYNC){
#pragma offload target(mic:0)
{MIC_Lock::wait_lock(lock_idx);}
}
#else
;
#endif
return lock_idx;
}

template <int SYNC=__DEVICE_SYNC__>
inline int device2host(char* dev_handle, uintptr_t dev_ptr, char* host_ptr, size_t len){
int lock_idx=-1;
#ifdef __INTEL_OFFLOAD
lock_idx=device2host_mic(dev_handle,dev_ptr, host_ptr, len);
if(SYNC) MIC_Lock::wait_lock(lock_idx);
#else
;
#endif
Expand Down Expand Up @@ -230,8 +235,4 @@ namespace DeviceWrapper{
#endif
}

Vector<char> MIC_Lock::lock_vec;
Vector<char>::Device MIC_Lock::lock_vec_;
int MIC_Lock::lock_idx;

}//end namespace
6 changes: 3 additions & 3 deletions include/dtypes.h
@@ -1,9 +1,9 @@
#ifndef __PVFMM_DTYPES_H_
#define __PVFMM_DTYPES_H_

#include <mpi.h>
#include <complex>

#ifndef __PVFMM_DTYPES_H_
#define __PVFMM_DTYPES_H_

/**
* \file dtypes.h
* \brief Traits to determine MPI_DATATYPE from a C++ datatype
Expand Down
13 changes: 9 additions & 4 deletions include/fft_wrapper.hpp
@@ -1,20 +1,25 @@
/**
* \file mat_utils.hpp
* \file fft_wrapper.hpp
* \author Dhairya Malhotra, dhairya.malhotra@gmail.com
* \date 2-11-2011
* \brief This file contains FFTW3 wrapper functions.
*/

#ifndef _PVFMM_FFT_WRAPPER_
#define _PVFMM_FFT_WRAPPER_

#include <cmath>
#include <cassert>
#include <vector>
#include <fftw3.h>
#ifdef FFTW3_MKL
#include <fftw3_mkl.h>
#endif

#include <pvfmm_common.hpp>
#include <mem_utils.hpp>
#include <matrix.hpp>

#ifndef _PVFMM_FFT_WRAPPER_
#define _PVFMM_FFT_WRAPPER_

namespace pvfmm{

template<class T>
Expand Down
15 changes: 9 additions & 6 deletions include/fmm_cheb.hpp
Expand Up @@ -6,16 +6,19 @@
* This handles all the translations through matrix multiplications.
*/

#ifndef _PVFMM_FMM_CHEB_HPP_
#define _PVFMM_FMM_CHEB_HPP_
#include <mpi.h>
#include <vector>

#include <pvfmm_common.hpp>
#include <mpi.h>
#include <matrix.hpp>
#include <precomp_mat.hpp>
#include <cheb_utils.hpp>
#include <cheb_node.hpp>
#include <mem_mgr.hpp>
#include <fmm_pts.hpp>
#include <vector.hpp>
#include <matrix.hpp>
#include <kernel.hpp>

#ifndef _PVFMM_FMM_CHEB_HPP_
#define _PVFMM_FMM_CHEB_HPP_

namespace pvfmm{

Expand Down

0 comments on commit 8c2b816

Please sign in to comment.