Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
  • 5 commits
  • 6 files changed
  • 0 commit comments
  • 2 contributors
View
22 configure
@@ -2878,7 +2878,7 @@ elif test -n "$(find /etc/OpenCL/vendors -maxdepth 1 -name 'amd*.icd' -print -qu
else
tmp_amdapp_sdk=0;
fi;
-if test $tmp_amdapp_sdk = 1; then
+#if test $tmp_amdapp_sdk = 1; then
for d in /opt/AMDAPP /usr/local/atistream
do
if test -e $d/include/CL/cl.h; then
@@ -2889,11 +2889,14 @@ if test $tmp_amdapp_sdk = 1; then
fi;
fi;
done;
-fi;
+#fi;
# find best known Nvidia paths
if test -n "$(find /etc/OpenCL/vendors -maxdepth 1 -name 'nvidia*.icd' -print -quit)"; then
tmp_nvidia_sdk=1;
+else
+ tmp_nvidia_sdk=0;
+fi;
for d in /usr/local/cuda
do
if test -e $d/OpenCL/common/inc/CL/cl.h; then
@@ -2918,9 +2921,9 @@ if test -n "$(find /etc/OpenCL/vendors -maxdepth 1 -name 'nvidia*.icd' -print -q
fi;
fi;
done;
-else
- tmp_nvidia_sdk=0;
-fi;
+#else
+# tmp_nvidia_sdk=0;
+#fi;
# set COPRTHR paths for bootstrap
if test x$enable_libocl = xyes; then
@@ -2936,6 +2939,9 @@ fi;
# find best known Intel paths
if test -n "$(find /etc/OpenCL/vendors -maxdepth 1 -name 'intel*.icd' -print -quit)"; then
tmp_intel_sdk=1;
+else
+ tmp_intel_sdk=0;
+fi;
for d in /usr/local
do
if test -e $d/include/CL/cl.h; then
@@ -2950,9 +2956,9 @@ if test -n "$(find /etc/OpenCL/vendors -maxdepth 1 -name 'intel*.icd' -print -qu
fi;
fi;
done;
-else
- tmp_intel_sdk=0;
-fi;
+#else
+# tmp_intel_sdk=0;
+#fi;
### step 2)
View
22 configure.in
@@ -124,7 +124,7 @@ elif test -n "$(find /etc/OpenCL/vendors -maxdepth 1 -name 'amd*.icd' -print -qu
else
tmp_amdapp_sdk=0;
fi;
-if test $tmp_amdapp_sdk = 1; then
+#if test $tmp_amdapp_sdk = 1; then
for d in /opt/AMDAPP /usr/local/atistream
do
if test -e $d/include/CL/cl.h; then
@@ -135,11 +135,14 @@ if test $tmp_amdapp_sdk = 1; then
fi;
fi;
done;
-fi;
+#fi;
# find best known Nvidia paths
if test -n "$(find /etc/OpenCL/vendors -maxdepth 1 -name 'nvidia*.icd' -print -quit)"; then
tmp_nvidia_sdk=1;
+else
+ tmp_nvidia_sdk=0;
+fi;
for d in /usr/local/cuda
do
if test -e $d/OpenCL/common/inc/CL/cl.h; then
@@ -164,9 +167,9 @@ if test -n "$(find /etc/OpenCL/vendors -maxdepth 1 -name 'nvidia*.icd' -print -q
fi;
fi;
done;
-else
- tmp_nvidia_sdk=0;
-fi;
+#else
+# tmp_nvidia_sdk=0;
+#fi;
# set COPRTHR paths for bootstrap
if test x$enable_libocl = xyes; then
@@ -182,6 +185,9 @@ fi;
# find best known Intel paths
if test -n "$(find /etc/OpenCL/vendors -maxdepth 1 -name 'intel*.icd' -print -quit)"; then
tmp_intel_sdk=1;
+else
+ tmp_intel_sdk=0;
+fi;
for d in /usr/local
do
if test -e $d/include/CL/cl.h; then
@@ -196,9 +202,9 @@ if test -n "$(find /etc/OpenCL/vendors -maxdepth 1 -name 'intel*.icd' -print -qu
fi;
fi;
done;
-else
- tmp_intel_sdk=0;
-fi;
+#else
+# tmp_intel_sdk=0;
+#fi;
### step 2)
View
2  examples/bdt_nbody/nbody.h
@@ -31,7 +31,7 @@
#define DEFAULT_NSTEP 200
#define DEFAULT_NBURST 2
-#define DEFAULT_NTHREAD 256
+#define DEFAULT_NTHREAD 64
#define DEFAULT_NBLOCK 1
#define G_CONSTANT 1.0
View
18 src/libcoprthr/__libcoprthr.h-no-template
@@ -700,15 +700,15 @@ typedef int sampler_t;
/*** builtin extensions for initializing vector data types [non-standard] ***/
-#define __builtin_vector_int2(x,y) _int2(x,y)
-#define __builtin_vector_int4(x,y,z,w) _int4(x,y,z,w)
-#define __builtin_vector_long2(x,y) _long2(x,y)
-#define __builtin_vector_uint2(x,y) _uint2(x,y)
-#define __builtin_vector_uint4(x,y,z,w) _uint4(x,y,z,w)
-#define __builtin_vector_ulong2(x,y) _ulong2(x,y)
-#define __builtin_vector_float2(x,y) _float2(x,y)
-#define __builtin_vector_float4(x,y,z,w) (_float4){x,y,z,w}
-#define __builtin_vector_double2(x,y) _double2(x,y)
+#define __builtin_vector_int2(x,y) vector_int2(x,y)
+#define __builtin_vector_int4(x,y,z,w) vector_int4(x,y,z,w)
+#define __builtin_vector_long2(x,y) vector_long2(x,y)
+#define __builtin_vector_uint2(x,y) vector_uint2(x,y)
+#define __builtin_vector_uint4(x,y,z,w) vector_uint4(x,y,z,w)
+#define __builtin_vector_ulong2(x,y) vector_ulong2(x,y)
+#define __builtin_vector_float2(x,y) vector_float2(x,y)
+#define __builtin_vector_float4(x,y,z,w) vector_float4(x,y,z,w)
+#define __builtin_vector_double2(x,y) vector_double2(x,y)
#else
View
4 src/libcoprthr/compiler_x86_64.c
@@ -58,13 +58,15 @@
" -falign-functions -falign-jumps -falign-loops -falign-labels " \
" -ftree-vrp -ftree-pre"
+#define XXX_GCC_HACK_FLAG " -fschedule-insns -fschedule-insns2"
+
/* XXX note that most flags suposedly enabled by -O2 are added explicitly
* XXX for CCFLAGS_OCL because this inexplicably improves performance by 2%.
* XXX the primary issue seems to be -fschedule-insns -fschedule-insns2 .
* XXX also, do not raise CCFLAGS_KCALL, effect is to break everything. -DAR */
//#define CCFLAGS_OCL " -O2 -msse3 " CCFLAGS_OCL_O2
-#define CCFLAGS_OCL " -O3 -msse3 -funsafe-math-optimizations -fno-math-errno -funsafe-math-optimizations "
+#define CCFLAGS_OCL " -O3 -msse3 -funsafe-math-optimizations -fno-math-errno -funsafe-math-optimizations " XXX_GCC_HACK_FLAG
#define CCFLAGS_KCALL " -O0 "
#define CCFLAGS_LINK
View
5 test/test_stdcl/Makefile.in
@@ -21,15 +21,14 @@ CLSRC = test_arg_int.cl test_arg_int4.cl \
TOPDIR ?= ../
+LIBS += -lm -lssl $(LIBELF_LIB) -lclelf
+
#INCS = -I/usr/local/browndeer/include
#LIBS = -lm -L/usr/local/browndeer/lib -lstdcl
#LIBS = -lm -L/usr/local/browndeer/lib -lstdcl_d -locl
INCS += $(USE_OPENCL_INCS) $(USE_STDCL_INCS)
LIBS += $(USE_OPENCL_LIBS) $(USE_STDCL_LIBS)
-#LIBS += -lm -lssl -lelf -lclelf
-LIBS += -lm -lssl $(LIBELF_LIB) -lclelf
-
SRC_TESTS = $(addprefix test_stdcl_,$(addsuffix .c,$(TESTS)))
#CLSRC = $(addprefix test_,$(addsuffix .cl,$(TESTS)))
PRG_TESTS = $(addprefix test_stdcl_,$(addsuffix .x,$(TESTS)))

No commit comments for this range

Something went wrong with that request. Please try again.