Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Solved multiple definition problem

Inlined some functions, changed implementation of builtin functions.
closes #3
  • Loading branch information...
commit 47a5736f5c364ca565077ef54da7fe247da7dc52 1 parent e9bf6e0
@ddemidov authored
View
22 vexcl/devlist.hpp
@@ -37,7 +37,9 @@ THE SOFTWARE.
# define NOMINMAX
#endif
-#define __CL_ENABLE_EXCEPTIONS
+#ifndef __CL_ENABLE_EXCEPTIONS
+# define __CL_ENABLE_EXCEPTIONS
+#endif
#include <vector>
#include <string>
@@ -53,7 +55,9 @@ namespace Filter {
bool operator()(const cl::Device &d) const {
return true;
}
- } All;
+ };
+
+ const AllFilter All;
/// Selects devices whose vendor name match given value.
struct Vendor {
@@ -116,7 +120,9 @@ namespace Filter {
ext.find("cl_amd_fp64") != std::string::npos
);
}
- } DoublePrecision;
+ };
+
+ const DoublePrecisionFilter DoublePrecision;
/// Selects no more than given number of devices.
/**
@@ -201,7 +207,9 @@ namespace Filter {
const char *name;
const char *maxdev;
mutable int count;
- } Env;
+ };
+
+ const EnvFilter Env;
/// Negation of a filter.
template <class Flt>
@@ -393,7 +401,7 @@ class Context {
/// Output list of devices to stream.
-std::ostream& operator<<(std::ostream &os, const std::vector<cl::Device> &device) {
+inline std::ostream& operator<<(std::ostream &os, const std::vector<cl::Device> &device) {
uint p = 1;
for(auto d = device.begin(); d != device.end(); d++)
@@ -403,7 +411,7 @@ std::ostream& operator<<(std::ostream &os, const std::vector<cl::Device> &device
}
/// Output list of devices to stream.
-std::ostream& operator<<(std::ostream &os, const std::vector<cl::CommandQueue> &queue) {
+inline std::ostream& operator<<(std::ostream &os, const std::vector<cl::CommandQueue> &queue) {
uint p = 1;
for(auto q = queue.begin(); q != queue.end(); q++)
@@ -415,7 +423,7 @@ std::ostream& operator<<(std::ostream &os, const std::vector<cl::CommandQueue> &
}
/// Output list of devices to stream.
-std::ostream& operator<<(std::ostream &os, const vex::Context &ctx) {
+inline std::ostream& operator<<(std::ostream &os, const vex::Context &ctx) {
return os << ctx.queue();
}
View
6 vexcl/profiler.hpp
@@ -37,7 +37,9 @@ THE SOFTWARE.
# define NOMINMAX
#endif
-#define __CL_ENABLE_EXCEPTIONS
+#ifndef __CL_ENABLE_EXCEPTIONS
+# define __CL_ENABLE_EXCEPTIONS
+#endif
#include <iostream>
#include <iomanip>
@@ -290,7 +292,7 @@ class profiler {
};
-std::ostream& operator<<(std::ostream &os, profiler &prof) {
+inline std::ostream& operator<<(std::ostream &os, profiler &prof) {
prof.print(os);
return os;
}
View
4 vexcl/reduce.hpp
@@ -37,7 +37,9 @@ THE SOFTWARE.
# define NOMINMAX
#endif
-#define __CL_ENABLE_EXCEPTIONS
+#ifndef __CL_ENABLE_EXCEPTIONS
+# define __CL_ENABLE_EXCEPTIONS
+#endif
#include <vector>
#include <sstream>
View
8 vexcl/spmat.hpp
@@ -37,7 +37,9 @@ THE SOFTWARE.
# define NOMINMAX
#endif
-#define __CL_ENABLE_EXCEPTIONS
+#ifndef __CL_ENABLE_EXCEPTIONS
+# define __CL_ENABLE_EXCEPTIONS
+#endif
#include <vector>
#include <set>
@@ -1500,7 +1502,7 @@ void SpMatCCSR<real,column_t>::mul(
}
/// Returns device weight after spmv test
-double device_spmv_perf(
+inline double device_spmv_perf(
const cl::Context &context, const cl::Device &device,
size_t test_size = 64U
)
@@ -1596,7 +1598,7 @@ double device_spmv_perf(
* where a, b and c are device vectors. Each device gets portion of the vector
* proportional to the performance of this operation.
*/
-std::vector<size_t> partition_by_spmv_perf(
+inline std::vector<size_t> partition_by_spmv_perf(
size_t n, const std::vector<cl::CommandQueue> &queue)
{
View
4 vexcl/stencil.hpp
@@ -36,7 +36,9 @@ THE SOFTWARE.
# define NOMINMAX
#endif
-#define __CL_ENABLE_EXCEPTIONS
+#ifndef __CL_ENABLE_EXCEPTIONS
+# define __CL_ENABLE_EXCEPTIONS
+#endif
#include <vector>
#include <map>
View
64 vexcl/util.hpp
@@ -37,7 +37,9 @@ THE SOFTWARE.
# define NOMINMAX
#endif
-#define __CL_ENABLE_EXCEPTIONS
+#ifndef __CL_ENABLE_EXCEPTIONS
+# define __CL_ENABLE_EXCEPTIONS
+#endif
#include <iostream>
#include <sstream>
@@ -54,28 +56,28 @@ typedef unsigned char uchar;
namespace vex {
/// Convert typename to string.
-template <class T> std::string type_name() { return "undefined_type"; }
-template <> std::string type_name<float>() { return "float"; }
-template <> std::string type_name<double>() { return "double"; }
-template <> std::string type_name<int>() { return "int"; }
-template <> std::string type_name<char>() { return "char"; }
-template <> std::string type_name<bool>() { return "bool"; }
-template <> std::string type_name<uint>() { return "unsigned int"; }
-template <> std::string type_name<uchar>() { return "unsigned char"; }
-
-template <> std::string type_name<size_t>() {
+template <class T> inline std::string type_name() { return "undefined_type"; }
+template <> inline std::string type_name<float>() { return "float"; }
+template <> inline std::string type_name<double>() { return "double"; }
+template <> inline std::string type_name<int>() { return "int"; }
+template <> inline std::string type_name<char>() { return "char"; }
+template <> inline std::string type_name<bool>() { return "bool"; }
+template <> inline std::string type_name<uint>() { return "unsigned int"; }
+template <> inline std::string type_name<uchar>() { return "unsigned char"; }
+
+template <> inline std::string type_name<size_t>() {
static_assert(sizeof(size_t) == 4 || sizeof(size_t) == 8,
"Only 32bit or 64bit architectures are supported");
return sizeof(size_t) == 4 ? "uint" : "ulong";
}
-template <> std::string type_name<ptrdiff_t>() {
+template <> inline std::string type_name<ptrdiff_t>() {
static_assert(sizeof(size_t) == 4 || sizeof(size_t) == 8,
"Only 32bit or 64bit architectures are supported");
return sizeof(size_t) == 4 ? "int" : "long";
}
-std::string standard_kernel_header = std::string(
+const std::string standard_kernel_header = std::string(
"#if defined(cl_khr_fp64)\n"
"# pragma OPENCL EXTENSION cl_khr_fp64: enable\n"
"#elif defined(cl_amd_fp64)\n"
@@ -84,7 +86,7 @@ std::string standard_kernel_header = std::string(
);
/// Return next power of 2.
-size_t nextpow2(size_t x) {
+inline size_t nextpow2(size_t x) {
--x;
x |= x >> 1U;
x |= x >> 2U;
@@ -95,7 +97,7 @@ size_t nextpow2(size_t x) {
}
/// Align n to the next multiple of m.
-size_t alignup(size_t n, size_t m = 16U) {
+inline size_t alignup(size_t n, size_t m = 16U) {
return n % m ? n - n % m + m : n;
}
@@ -108,7 +110,7 @@ size_t alignup(size_t n, size_t m = 16U) {
* where a, b and c are device vectors. Each device gets portion of the vector
* proportional to the performance of this operation.
*/
-std::vector<size_t> partition_by_vector_perf(
+inline std::vector<size_t> partition_by_vector_perf(
size_t n, const std::vector<cl::CommandQueue> &queue);
/// Partitions vector wrt to spmv performance of devices.
@@ -121,11 +123,11 @@ std::vector<size_t> partition_by_vector_perf(
* domain. Each device gets portion of the vector proportional to the
* performance of this operation.
*/
-std::vector<size_t> partition_by_spmv_perf(
+inline std::vector<size_t> partition_by_spmv_perf(
size_t n, const std::vector<cl::CommandQueue> &queue);
/// Partitions vector equally.
-std::vector<size_t> partition_equally(
+inline static std::vector<size_t> partition_equally(
size_t n, const std::vector<cl::CommandQueue> &queue)
{
size_t m = queue.size();
@@ -150,6 +152,7 @@ std::vector<size_t> partition_equally(
* Otherwise default parttioning function (partition_by_vector_perf) is
* selected.
*/
+template <bool dummy = true>
struct partitioning_scheme {
typedef std::function<
std::vector<size_t>(size_t, const std::vector<cl::CommandQueue>&)
@@ -167,8 +170,8 @@ struct partitioning_scheme {
}
}
- std::vector<size_t> operator()(size_t n,
- const std::vector<cl::CommandQueue> &queue) const
+ static std::vector<size_t> get(size_t n,
+ const std::vector<cl::CommandQueue> &queue)
{
if (!is_set) {
pfun = partition_by_vector_perf;
@@ -180,13 +183,22 @@ struct partitioning_scheme {
private:
static bool is_set;
static function_type pfun;
-} partition;
+};
+
+template <bool dummy>
+bool partitioning_scheme<dummy>::is_set = false;
+
+template <bool dummy>
+typename partitioning_scheme<dummy>::function_type partitioning_scheme<dummy>::pfun;
-bool partitioning_scheme::is_set = false;
-partitioning_scheme::function_type partitioning_scheme::pfun;
+inline std::vector<size_t> partition(size_t n,
+ const std::vector<cl::CommandQueue> &queue)
+{
+ return partitioning_scheme<true>::get(n, queue);
+}
/// Create and build a program from source string.
-cl::Program build_sources(
+inline cl::Program build_sources(
const cl::Context &context, const std::string &source
)
{
@@ -210,7 +222,7 @@ cl::Program build_sources(
}
/// Get maximum possible workgroup size for given kernel.
-uint kernel_workgroup_size(
+inline uint kernel_workgroup_size(
const cl::Kernel &kernel,
const cl::Device &device
)
@@ -224,7 +236,7 @@ uint kernel_workgroup_size(
}
/// Output description of an OpenCL error to a stream.
-std::ostream& operator<<(std::ostream &os, const cl::Error &e) {
+inline std::ostream& operator<<(std::ostream &os, const cl::Error &e) {
os << e.what() << "(";
switch (e.err()) {
View
60 vexcl/vector.hpp
@@ -41,7 +41,9 @@ THE SOFTWARE.
# define VEXCL_VARIADIC_TEMPLATES
#endif
-#define __CL_ENABLE_EXCEPTIONS
+#ifndef __CL_ENABLE_EXCEPTIONS
+# define __CL_ENABLE_EXCEPTIONS
+#endif
#include <array>
#include <vector>
@@ -1357,7 +1359,7 @@ DEFINE_BINARY_OP(binop::LeftShift, <<)
/// \cond INTERNAL
/// Builtin function call.
-template <const char *func_name, class... Expr>
+template <class func_name, class... Expr>
class BuiltinFunction : public expression {
public:
BuiltinFunction(const Expr&... expr) : expr(expr...) {}
@@ -1367,7 +1369,7 @@ class BuiltinFunction : public expression {
}
std::string kernel_name() const {
- return std::string(func_name) + build_kernel_name<0>();
+ return std::string(func_name::value()) + build_kernel_name<0>();
}
void kernel_prm(std::ostream &os, std::string name) const {
@@ -1379,7 +1381,7 @@ class BuiltinFunction : public expression {
}
void kernel_expr(std::ostream &os, std::string name) const {
- os << func_name << "(";
+ os << func_name::value() << "(";
build_kernel_expr<0>(os, name);
os << ")";
}
@@ -1489,32 +1491,36 @@ struct All<Head, Tail...>
{};
#define DEFINE_BUILTIN_FUNCTION(name) \
-extern const char name##_fun[] = #name; \
+struct name##_name { \
+ static const char* value() { \
+ return #name; \
+ } \
+}; \
template <class... Expr> \
-typename std::enable_if< \
+inline typename std::enable_if< \
All<All<valid_expr<Expr>...>, Not<All<std::is_arithmetic<Expr>...>>>::value,\
- BuiltinFunction<name##_fun, Expr...>>::type \
+ BuiltinFunction<name##_name, Expr...>>::type \
name(const Expr&... expr) { \
-return BuiltinFunction<name##_fun, Expr...>(expr...); \
+return BuiltinFunction<name##_name, Expr...>(expr...); \
} \
template <class... MultiEx> \
-typename std::enable_if< \
+inline typename std::enable_if< \
All<valid_multiex<MultiEx>..., Not<All<std::is_arithmetic<MultiEx>...>>>::value, \
MultiExpression< \
- BuiltinFunction<name##_fun, typename multiex_traits<MultiEx>::subtype...>, multiex_dim<MultiEx...>::dim \
+ BuiltinFunction<name##_name, typename multiex_traits<MultiEx>::subtype...>, multiex_dim<MultiEx...>::dim \
>>::type \
name(const MultiEx&... multiexpr) { \
std::array< \
std::unique_ptr< \
- BuiltinFunction<name##_fun, typename multiex_traits<MultiEx>::subtype...> \
+ BuiltinFunction<name##_name, typename multiex_traits<MultiEx>::subtype...> \
>, \
multiex_dim<MultiEx...>::dim> ex; \
for(uint i = 0; i < multiex_dim<MultiEx...>::dim; i++) \
ex[i].reset( \
- new BuiltinFunction<name##_fun, typename multiex_traits<MultiEx>::subtype...>(extract_component(multiexpr, i)...) \
+ new BuiltinFunction<name##_name, typename multiex_traits<MultiEx>::subtype...>(extract_component(multiexpr, i)...) \
); \
return MultiExpression< \
- BuiltinFunction<name##_fun, typename multiex_traits<MultiEx>::subtype...>, multiex_dim<MultiEx...>::dim \
+ BuiltinFunction<name##_name, typename multiex_traits<MultiEx>::subtype...>, multiex_dim<MultiEx...>::dim \
>(ex); \
}
@@ -1599,7 +1605,7 @@ DEFINE_BUILTIN_FUNCTION(trunc)
/// \cond INTERNAL
/// Builtin function call.
-template <const char *func_name, class Expr>
+template <class func_name, class Expr>
struct BuiltinFunction : public expression {
BuiltinFunction(const Expr &expr) : expr(expr) {}
@@ -1608,11 +1614,11 @@ struct BuiltinFunction : public expression {
}
std::string kernel_name() const {
- return func_name + expr.kernel_name();
+ return func_name::value() + expr.kernel_name();
}
void kernel_expr(std::ostream &os, std::string name) const {
- os << func_name << "(";
+ os << func_name::value() << "(";
expr.kernel_expr(os, name);
os << ")";
}
@@ -1634,28 +1640,32 @@ struct BuiltinFunction : public expression {
};
#define DEFINE_BUILTIN_FUNCTION(name) \
-extern const char name##_fun[] = #name; \
+struct name##_name { \
+ static const char* value() { \
+ return #name; \
+ } \
+}; \
template <class Expr> \
typename std::enable_if<Expr::is_expr, \
-BuiltinFunction<name##_fun, Expr>>::type \
+BuiltinFunction<name##_name, Expr>>::type \
name(const Expr &expr) { \
-return BuiltinFunction<name##_fun, Expr>(expr); \
+return BuiltinFunction<name##_name, Expr>(expr); \
} \
template <class MultiEx> \
typename std::enable_if<MultiEx::is_multiex, \
MultiExpression< \
- BuiltinFunction<name##_fun, typename MultiEx::subtype>, MultiEx::dim \
+ BuiltinFunction<name##_name, typename MultiEx::subtype>, MultiEx::dim \
>>::type \
name(const MultiEx& multiexpr) { \
std::array<std::unique_ptr< \
- BuiltinFunction<name##_fun, typename MultiEx::subtype>>, MultiEx::dim \
+ BuiltinFunction<name##_name, typename MultiEx::subtype>>, MultiEx::dim \
> ex; \
for(uint i = 0; i < MultiEx::dim; i++) \
ex[i].reset( \
- new BuiltinFunction<name##_fun, typename MultiEx::subtype>(multiexpr(i)) \
+ new BuiltinFunction<name##_name, typename MultiEx::subtype>(multiexpr(i)) \
); \
return MultiExpression< \
- BuiltinFunction<name##_fun, typename MultiEx::subtype>, MultiEx::dim>(ex); \
+ BuiltinFunction<name##_name, typename MultiEx::subtype>, MultiEx::dim>(ex); \
}
/// \endcond
@@ -1934,7 +1944,7 @@ struct UserFunction<body, RetType(ArgType...)> {
/// \cond INTERNAL
/// Returns device weight after simple bandwidth test
-double device_vector_perf(
+inline double device_vector_perf(
const cl::Context &context, const cl::Device &device,
size_t test_size = 1024U * 1024U
)
@@ -1981,7 +1991,7 @@ double device_vector_perf(
* where a, b and c are device vectors. Each device gets portion of the vector
* proportional to the performance of this operation.
*/
-std::vector<size_t> partition_by_vector_perf(
+inline std::vector<size_t> partition_by_vector_perf(
size_t n, const std::vector<cl::CommandQueue> &queue)
{
View
4 vexcl/vexcl.hpp
@@ -337,7 +337,9 @@ user functions are not available at all.
# define NOMINMAX
#endif
-#define __CL_ENABLE_EXCEPTIONS
+#ifndef __CL_ENABLE_EXCEPTIONS
+# define __CL_ENABLE_EXCEPTIONS
+#endif
#include <CL/cl.hpp>
#include <iostream>
Please sign in to comment.
Something went wrong with that request. Please try again.