273 changes: 136 additions & 137 deletions src/client/openCL.misc.cpp
Expand Up @@ -27,6 +27,8 @@
#include "clFFT.h"
#include "openCL.misc.h"



void prettyPrintPlatformInfo( const cl_platform_id& pId )
{
size_t platformProfileSize = 0;
Expand Down Expand Up @@ -209,6 +211,23 @@ void prettyPrintDeviceInfo( const cl_device_id& dId )
std::cout << std::right << std::endl;
}

void prettyPrintCLPlatforms(std::vector< cl_platform_id >& platforms,
std::vector< std::vector< cl_device_id > >& devices)
{
for (unsigned int i = 0; i < platforms.size(); ++i)
{
std::cout << "OpenCL platform [ " << i << " ]:" << std::endl;
prettyPrintPlatformInfo(platforms[i]);

for (unsigned int n = 0; n < devices[i].size(); ++n)
{
std::cout << "OpenCL platform [ " << i << " ], device [ " << n << " ]:" << std::endl;
prettyPrintDeviceInfo((devices[i])[n]);
}
}

}

// Verify a failed condition; return true on fail
inline cl_bool OPENCL_V_FAIL( cl_int res )
{
Expand Down Expand Up @@ -334,145 +353,125 @@ std::string prettyPrintclFFTStatus( const cl_int& status )
}
}

std::vector< cl_device_id > initializeCL( cl_device_type deviceType,
cl_uint deviceGpuList,
cl_context& context,
bool printclInfo )
{
cl_int status = 0;

/*
* Have a look at the available platforms and pick either
* the AMD one if available or a reasonable default.
*/

cl_uint numPlatforms = 0;
cl_platform_id platform = NULL;
OPENCL_V_THROW( ::clGetPlatformIDs( 0, NULL, &numPlatforms ),
"Getting number of platforms( ::clGetPlatformsIDs() )" );

if( numPlatforms > 0 )
{
std::vector< cl_platform_id > platforms( numPlatforms );
OPENCL_V_THROW( ::clGetPlatformIDs( numPlatforms, &platforms[ 0 ], NULL ),
"Getting Platform Id's ( ::clGetPlatformsIDs() )" );

// TODO: How should we determine what platform to choose? We are just defaulting to the last one reported, as we
// print out the info
for( unsigned int i=0; i < numPlatforms; ++i )
{
if( printclInfo )
{
std::cout << "OpenCL platform [ " << i << " ]:" << std::endl;
prettyPrintPlatformInfo( platforms[i] );
}

platform = platforms[i];
}
}

if( NULL == platform )
{
throw std::runtime_error( "No appropriate OpenCL platform could be found" );
}

/*
* If we could find our platform, use it. Otherwise use just available platform.
*/

// Get the device list for this type.
//
cl_uint num_devices = 0;
OPENCL_V_THROW( ::clGetDeviceIDs( platform, deviceType, 0, NULL, &num_devices ),
"Getting OpenCL devices ( ::clGetDeviceIDs() )" );
if( 0 == num_devices )
{
OPENCL_V_THROW( CLFFT_DEVICE_NOT_AVAILABLE, "No devices available");
}

std::vector< cl_device_id > deviceIDs( num_devices );
OPENCL_V_THROW( ::clGetDeviceIDs( platform, deviceType, num_devices, &deviceIDs[0], NULL),
"Getting OpenCL deviceIDs ( ::clGetDeviceIDs() )" );

if( (CL_DEVICE_TYPE_GPU == deviceType) && (~cl_uint(0) != deviceGpuList) )
{
// The command line options specify to user certain gpu(s)
//
for( unsigned u = (unsigned) deviceIDs.size(); u-- > 0; )
{
if( 0 != (deviceGpuList & (1<<u) ) )
continue;

// Remove this GPU from the list
deviceIDs[u] = deviceIDs.back();
deviceIDs.pop_back();
}
}

if( 0 == deviceIDs.size( ) )
{
OPENCL_V_THROW( CLFFT_DEVICE_NOT_AVAILABLE, "No devices available");
}

cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };

/////////////////////////////////////////////////////////////////
// Create an OpenCL context
/////////////////////////////////////////////////////////////////
context = clCreateContext( cps,
(cl_uint) deviceIDs.size(),
& deviceIDs[0],
NULL,
NULL,
&status);
OPENCL_V_THROW( status, "Creating Context ( ::clCreateContextFromType() )" );

/* First, get the size of device list data */
size_t deviceListSize;
OPENCL_V_THROW( ::clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize ),
"Getting device array size ( ::clGetContextInfo() )" );

/////////////////////////////////////////////////////////////////
// Detect OpenCL devices
/////////////////////////////////////////////////////////////////
std::vector< cl_device_id > devices( deviceListSize/sizeof( cl_device_id ) );

/* Now, get the device list data */
OPENCL_V_THROW( ::clGetContextInfo( context, CL_CONTEXT_DEVICES, deviceListSize, &devices[ 0 ], NULL ),
"Getting device array ( ::clGetContextInfo() )" );

if( printclInfo )
{
cl_uint cContextDevices = 0;

size_t deviceVersionSize = 0;
OPENCL_V_THROW( ::clGetDeviceInfo( devices[0], CL_DEVICE_VERSION, 0, NULL, &deviceVersionSize ),
"Getting CL_DEVICE_VERSION Platform Info string size ( ::clGetDeviceInfo() )" );

std::vector< char > szDeviceVersion( deviceVersionSize );
OPENCL_V_THROW( ::clGetDeviceInfo( devices[0], CL_DEVICE_VERSION, deviceVersionSize, &szDeviceVersion[ 0 ], NULL ),
"Getting CL_DEVICE_VERSION Platform Info string ( ::clGetDeviceInfo() )" );

char openclstr[11]="OpenCL 1.0";

if (!strncmp((const char*)&szDeviceVersion[ 0 ], openclstr, 10))
{
cContextDevices = 1;
}
else
{
OPENCL_V_THROW( ::clGetContextInfo( context, CL_CONTEXT_NUM_DEVICES, sizeof( cContextDevices ), &cContextDevices, NULL ),
"Getting number of context devices ( ::clGetContextInfo() )" );
}

for( cl_uint i = 0; i < cContextDevices; ++i )
{
std::cout << "OpenCL devices [ " << i << " ]:" << std::endl;
prettyPrintDeviceInfo( devices[i] );
}
}
int discoverCLPlatforms( cl_device_type deviceType,
std::vector< cl_platform_id >& platforms,
std::vector< std::vector< cl_device_id > >& devices )
{
cl_int status = 0;

/*
* Find all OpenCL platforms this system has to offer.
*/

cl_uint numPlatforms = 0;
cl_platform_id platform = NULL;
OPENCL_V_THROW(::clGetPlatformIDs(0, NULL, &numPlatforms),
"Getting number of platforms( ::clGetPlatformsIDs() )");

if (numPlatforms > 0)
{
platforms.resize( numPlatforms );
devices.resize( numPlatforms );
OPENCL_V_THROW(::clGetPlatformIDs(numPlatforms, &platforms[0], NULL),
"Getting Platform Id's ( ::clGetPlatformsIDs() )");

if (NULL == platforms[0])
{
throw std::runtime_error("No appropriate OpenCL platform could be found");
}

/*
* Now, for each platform get all available devices matching deviceType.
*/
for (unsigned int i = 0; i < numPlatforms; ++i)
{
// Get the device list for deviceType.
//
cl_uint numDevices = 0;
OPENCL_V_WARN(::clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &numDevices),
"Getting OpenCL devices ( ::clGetDeviceIDs() )");
if (0 == numDevices)
{
// OPENCL_V_WARN(CLFFT_DEVICE_NOT_AVAILABLE, "No devices available");
continue;
}

devices[i].resize(numDevices);
OPENCL_V_THROW(::clGetDeviceIDs(platforms[i], deviceType, numDevices, &(devices[i])[0], NULL),
"Getting OpenCL deviceIDs ( ::clGetDeviceIDs() )");
}
}

return 0;
}

return devices;
std::vector< cl_device_id > initializeCL( cl_device_type deviceType,
cl_int deviceId,
cl_int platformId,
cl_context& context,
bool printclInfo )
{
cl_int status = 0;
cl_platform_id platform = NULL;
std::vector< cl_device_id > devices(1);
devices[0] = NULL;

// Have a look at all the available platforms on this system
std::vector< cl_platform_id > platformInfos;
std::vector< std::vector< cl_device_id > > deviceInfos;
discoverCLPlatforms( deviceType, platformInfos, deviceInfos );


for (unsigned int i = 0; i < platformInfos.size(); ++i)
{
if(i == platformId)
{
for (unsigned int n = 0; n < deviceInfos[i].size(); ++n)
{
if (n == deviceId)
{
platform = platformInfos[i];
devices[0] = deviceInfos[i][n];

if(printclInfo)
{
prettyPrintPlatformInfo(platform);
prettyPrintDeviceInfo(devices[0]);
}

break;
}
}

break;
}
}



// Do some error checking if we really selected a valid platform and a valid device
if (NULL == devices[0])
{
OPENCL_V_THROW(CLFFT_DEVICE_NOT_AVAILABLE, "No devices available");
}

if (NULL == platform)
{
throw std::runtime_error("No appropriate OpenCL platform could be found");
}

// Create an OpenCL context
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform, 0 };
context = clCreateContext(cps,
(cl_uint)devices.size(),
&devices[0],
NULL,
NULL,
&status);
OPENCL_V_THROW(status, "Creating Context ( ::clCreateContextFromType() )");

return devices;
}

int cleanupCL( cl_context* context, cl_command_queue* commandQueue,
Expand Down
43 changes: 42 additions & 1 deletion src/client/openCL.misc.h
Expand Up @@ -29,14 +29,27 @@
#define countOf( arr ) ( sizeof( arr ) / sizeof( arr[ 0 ] ) )
#endif

/*
* \brief OpenCL platform and device discovery
* Creates a list of OpenCL platforms
* and their associated devices
*/
int discoverCLPlatforms( cl_device_type deviceType,
std::vector< cl_platform_id >& platforms,
std::vector< std::vector< cl_device_id > >& devices );

void prettyPrintCLPlatforms(std::vector< cl_platform_id >& platforms,
std::vector< std::vector< cl_device_id > >& devices);

/*
* \brief OpenCL related initialization
* Create Context, Device list
* Load CL file, compile, link CL source
* Build program and kernel objects
*/
std::vector< cl_device_id > initializeCL( cl_device_type deviceType,
cl_uint deviceGpuList,
cl_int deviceId,
cl_int platformId,
cl_context& context,
bool printclInfo );

Expand Down Expand Up @@ -102,6 +115,34 @@ inline cl_int OpenCL_V_Throw ( cl_int res, const std::string& msg, size_t lineno
}
#define OPENCL_V_THROW(_status,_message) OpenCL_V_Throw (_status, _message, __LINE__)

inline cl_int OpenCL_V_Warn(cl_int res, const std::string& msg, size_t lineno)
{
switch (res)
{
case CL_SUCCESS: /**< No error */
break;
case CL_DEVICE_NOT_FOUND:
// This happens all the time when discovering the OpenCL capabilities of the system,
// so do nothing here.
break;
default:
{
std::stringstream tmp;
tmp << "OPENCL_V_WARN< ";
tmp << prettyPrintclFFTStatus(res);
tmp << " > (";
tmp << lineno;
tmp << "): ";
tmp << msg;
std::string errorm(tmp.str());
std::cout << errorm << std::endl;
}
}

return res;
}
#define OPENCL_V_WARN(_status,_message) OpenCL_V_Warn (_status, _message, __LINE__);

/*
* \brief Release OpenCL resources (Context, Memory etc.)
*/
Expand Down
52 changes: 52 additions & 0 deletions src/examples/CMakeLists.txt
@@ -0,0 +1,52 @@
# ########################################################################
# Copyright 2013 Advanced Micro Devices, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ########################################################################

INCLUDE_DIRECTORIES(
"${CMAKE_CURRENT_SOURCE_DIR}"
"${OPENCL_INCLUDE_DIRS}"
"${PROJECT_SOURCE_DIR}/include"
"${PROJECT_BINARY_DIR}/include"
)

LINK_DIRECTORIES("${PROJECT_BINARY_DIR}/package/lib${SUFFIX_LIB}")

FILE(GLOB FILES "*.c")

FOREACH(FILE ${FILES})

if( MSVC )
if( MSVC_VERSION LESS 1800 )
# Use C++ with Microsoft compiler
SET_SOURCE_FILES_PROPERTIES( ${FILE} PROPERTIES LANGUAGE CXX)
endif ()
endif( )

GET_FILENAME_COMPONENT(EXAMPLE ${FILE} NAME_WE)
GET_FILENAME_COMPONENT(FULL_DIR_NAME ${FILE} PATH)
GET_FILENAME_COMPONENT(DIR_NAME ${FULL_DIR_NAME} NAME)
SET(EXAMPLE_NAME example_${DIR_NAME}_${EXAMPLE})
ADD_EXECUTABLE(${EXAMPLE_NAME} ${FILE})

TARGET_LINK_LIBRARIES(${EXAMPLE_NAME} clFFT ${OPENCL_LIBRARIES})

SET_TARGET_PROPERTIES(${EXAMPLE_NAME}
PROPERTIES
OUTPUT_NAME ${EXAMPLE}
RUNTIME_OUTPUT_DIRECTORY ${DIR_NAME})

INSTALL(TARGETS ${EXAMPLE_NAME}
RUNTIME DESTINATION "bin${SUFFIX_BIN}/examples")
ENDFOREACH()
136 changes: 136 additions & 0 deletions src/examples/fft1d.c
@@ -0,0 +1,136 @@
/* ************************************************************************
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ************************************************************************/

#include <stdio.h>
#include <stdlib.h>

/* No need to explicitely include the OpenCL headers */
#include <clFFT.h>

int main( void )
{
cl_int err;
cl_platform_id platform = 0;
cl_device_id device = 0;
cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
cl_context ctx = 0;
cl_command_queue queue = 0;
cl_mem bufX;
float *X;
cl_event event = NULL;
int ret = 0;
size_t N = 16;
char platform_name[128];
char device_name[128];

/* FFT library realted declarations */
clfftPlanHandle planHandle;
clfftDim dim = CLFFT_1D;
size_t clLengths[1] = {N};

/* Setup OpenCL environment. */
err = clGetPlatformIDs( 1, &platform, NULL );

size_t ret_param_size = 0;
err = clGetPlatformInfo(platform, CL_PLATFORM_NAME,
sizeof(platform_name), platform_name,
&ret_param_size);
printf("Platform found: %s\n", platform_name);

err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL );

err = clGetDeviceInfo(device, CL_DEVICE_NAME,
sizeof(device_name), device_name,
&ret_param_size);
printf("Device found on the above platform: %s\n", device_name);

props[1] = (cl_context_properties)platform;
ctx = clCreateContext( props, 1, &device, NULL, NULL, &err );
queue = clCreateCommandQueue( ctx, device, 0, &err );

/* Setup clFFT. */
clfftSetupData fftSetup;
err = clfftInitSetupData(&fftSetup);
err = clfftSetup(&fftSetup);

/* Allocate host & initialize data. */
/* Only allocation shown for simplicity. */
X = (float *)malloc(N * 2 * sizeof(*X));

/* print input array */
printf("\nPerforming fft on an one dimensional array of size N = %ld\n", N);
int print_iter = 0;
while(print_iter<N) {
float x = print_iter;
float y = print_iter*3;
X[2*print_iter ] = x;
X[2*print_iter+1] = y;
printf("(%f, %f) ", x, y);
print_iter++;
}
printf("\n\nfft result: \n");

/* Prepare OpenCL memory objects and place data inside them. */
bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, N * 2 * sizeof(*X), NULL, &err );

err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0,
N * 2 * sizeof( *X ), X, 0, NULL, NULL );

/* Create a default plan for a complex FFT. */
err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);

/* Set plan parameters. */
err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE);
err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
err = clfftSetResultLocation(planHandle, CLFFT_INPLACE);

/* Bake the plan. */
err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL);

/* Execute the plan. */
err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL);

/* Wait for calculations to be finished. */
err = clFinish(queue);

/* Fetch results of calculations. */
err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, N * 2 * sizeof( *X ), X, 0, NULL, NULL );

/* print output array */
print_iter = 0;
while(print_iter<N) {
printf("(%f, %f) ", X[2*print_iter], X[2*print_iter+1]);
print_iter++;
}
printf("\n");

/* Release OpenCL memory objects. */
clReleaseMemObject( bufX );

free(X);

/* Release the plan. */
err = clfftDestroyPlan( &planHandle );

/* Release clFFT library. */
clfftTeardown( );

/* Release OpenCL working objects. */
clReleaseCommandQueue( queue );
clReleaseContext( ctx );

return ret;
}
145 changes: 145 additions & 0 deletions src/examples/fft2d.c
@@ -0,0 +1,145 @@
/* ************************************************************************
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ************************************************************************/

#include <stdio.h>
#include <stdlib.h>

/* No need to explicitely include the OpenCL headers */
#include <clFFT.h>

int main( void )
{
cl_int err;
cl_platform_id platform = 0;
cl_device_id device = 0;
cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
cl_context ctx = 0;
cl_command_queue queue = 0;
cl_mem bufX;
float *X;
cl_event event = NULL;
int ret = 0;

const size_t N0 = 8, N1 = 8;
char platform_name[128];
char device_name[128];

/* FFT library realted declarations */
clfftPlanHandle planHandle;
clfftDim dim = CLFFT_2D;
size_t clLengths[2] = {N0, N1};

/* Setup OpenCL environment. */
err = clGetPlatformIDs( 1, &platform, NULL );

size_t ret_param_size = 0;
err = clGetPlatformInfo(platform, CL_PLATFORM_NAME,
sizeof(platform_name), platform_name,
&ret_param_size);
printf("Platform found: %s\n", platform_name);

err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL );

err = clGetDeviceInfo(device, CL_DEVICE_NAME,
sizeof(device_name), device_name,
&ret_param_size);
printf("Device found on the above platform: %s\n", device_name);

props[1] = (cl_context_properties)platform;
ctx = clCreateContext( props, 1, &device, NULL, NULL, &err );
queue = clCreateCommandQueue( ctx, device, 0, &err );

/* Setup clFFT. */
clfftSetupData fftSetup;
err = clfftInitSetupData(&fftSetup);
err = clfftSetup(&fftSetup);

/* Allocate host & initialize data. */
/* Only allocation shown for simplicity. */
size_t buffer_size = N0 * N1 * 2 * sizeof(*X);
X = (float *)malloc(buffer_size);

/* print input array just using the
* indices to fill the array with data */
printf("\nPerforming fft on an two dimensional array of size N0 x N1 : %ld x %ld\n", N0, N1);
int i, j;
i = j = 0;
for (i=0; i<N0; ++i) {
for (j=0; j<N1; ++j) {
float x = 0.5f;
float y = 0.5f;
unsigned idx = 2*(j+i*N0);
X[idx] = x;
X[idx+1] = y;
printf("(%f, %f) ", x, y);
}
printf("\n");
}

/* Prepare OpenCL memory objects and place data inside them. */
bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, buffer_size, NULL, &err );

err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0, buffer_size, X, 0, NULL, NULL );

/* Create a default plan for a complex FFT. */
err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);

/* Set plan parameters. */
err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE);
err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
err = clfftSetResultLocation(planHandle, CLFFT_INPLACE);

/* Bake the plan. */
err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL);

/* Execute the plan. */
err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL);

/* Wait for calculations to be finished. */
err = clFinish(queue);

/* Fetch results of calculations. */
err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, buffer_size, X, 0, NULL, NULL );

/* print output array */
printf("\n\nfft result: \n");
i = j = 0;
for (i=0; i<N0; ++i) {
for (j=0; j<N1; ++j) {
unsigned idx = 2*(j+i*N0);
printf("(%f, %f) ", X[idx], X[idx+1]);
}
printf("\n");
}
printf("\n");

/* Release OpenCL memory objects. */
clReleaseMemObject( bufX );

free(X);

/* Release the plan. */
err = clfftDestroyPlan( &planHandle );

/* Release clFFT library. */
clfftTeardown( );

/* Release OpenCL working objects. */
clReleaseCommandQueue( queue );
clReleaseContext( ctx );

return ret;
}
154 changes: 154 additions & 0 deletions src/examples/fft3d.c
@@ -0,0 +1,154 @@
/* ************************************************************************
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ************************************************************************/

#include <stdio.h>
#include <stdlib.h>

/* No need to explicitely include the OpenCL headers */
#include <clFFT.h>

int main( void )
{
cl_int err;
cl_platform_id platform = 0;
cl_device_id device = 0;
cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
cl_context ctx = 0;
cl_command_queue queue = 0;
cl_mem bufX;
float *X;
cl_event event = NULL;
int ret = 0;

const size_t N0 = 4, N1 = 4, N2 = 4;
char platform_name[128];
char device_name[128];

/* FFT library realted declarations */
clfftPlanHandle planHandle;
clfftDim dim = CLFFT_3D;
size_t clLengths[3] = {N0, N1, N2};

/* Setup OpenCL environment. */
err = clGetPlatformIDs( 1, &platform, NULL );

size_t ret_param_size = 0;
err = clGetPlatformInfo(platform, CL_PLATFORM_NAME,
sizeof(platform_name), platform_name,
&ret_param_size);
printf("Platform found: %s\n", platform_name);

err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL );

err = clGetDeviceInfo(device, CL_DEVICE_NAME,
sizeof(device_name), device_name,
&ret_param_size);
printf("Device found on the above platform: %s\n", device_name);

props[1] = (cl_context_properties)platform;
ctx = clCreateContext( props, 1, &device, NULL, NULL, &err );
queue = clCreateCommandQueue( ctx, device, 0, &err );

/* Setup clFFT. */
clfftSetupData fftSetup;
err = clfftInitSetupData(&fftSetup);
err = clfftSetup(&fftSetup);

/* Allocate host & initialize data. */
/* Only allocation shown for simplicity. */
size_t buffer_size = N0 * N1 * N2 * 2 * sizeof(*X);
X = (float *)malloc(buffer_size);

/* print input array just using the
* indices to fill the array with data */
printf("\nPerforming fft on an two dimensional array of size N0 x N1 x N2 : %ld x %ld x %ld\n", N0, N1, N2);
int i, j, k;
i = j = k = 0;
for (i=0; i<N0; ++i) {
for (j=0; j<N1; ++j) {
for (k=0; k<N2; ++k) {
float x = 0.0f;
float y = 0.0f;
if (i==0 && j==0 && k==0) {
x = y = 0.5f;
}
unsigned idx = 2*(k+j*N1+i*N0*N1);
X[idx] = x;
X[idx+1] = y;
printf("(%f, %f) ", X[idx], X[idx+1]);
}
printf("\n");
}
printf("\n");
}

/* Prepare OpenCL memory objects and place data inside them. */
bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, buffer_size, NULL, &err );

err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0, buffer_size, X, 0, NULL, NULL );

/* Create a default plan for a complex FFT. */
err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);

/* Set plan parameters. */
err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE);
err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
err = clfftSetResultLocation(planHandle, CLFFT_INPLACE);

/* Bake the plan. */
err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL);

/* Execute the plan. */
err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL);

/* Wait for calculations to be finished. */
err = clFinish(queue);

/* Fetch results of calculations. */
err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, buffer_size, X, 0, NULL, NULL );

/* print output array */
printf("\n\nfft result: \n");
i = j = k = 0;
for (i=0; i<N0; ++i) {
for (j=0; j<N1; ++j) {
for (k=0; k<N2; ++k) {
unsigned idx = 2*(k+j*N1+i*N0*N1);
printf("(%f, %f) ", X[idx], X[idx+1]);
}
printf("\n");
}
printf("\n");
}
printf("\n");

/* Release OpenCL memory objects. */
clReleaseMemObject( bufX );

free(X);

/* Release the plan. */
err = clfftDestroyPlan( &planHandle );

/* Release clFFT library. */
clfftTeardown( );

/* Release OpenCL working objects. */
clReleaseCommandQueue( queue );
clReleaseContext( ctx );

return ret;
}
79 changes: 79 additions & 0 deletions src/gtest.cmake
@@ -0,0 +1,79 @@

option(USE_SYSTEM_GTEST "Use system installed gtest when set to ON, or build gtest locally when set to OFF" OFF)

if(USE_SYSTEM_GTEST)
if( (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 2.8) )
message( STATUS "Cmake version 2.8 or greater needed to use GTest" )
else()
# This will define GTEST_FOUND
find_package( GTest )
endif()
else()
if(CMAKE_VERSION VERSION_LESS 3.2 AND CMAKE_GENERATOR MATCHES "Ninja")
message(WARNING "Building GTest with Ninja has known issues with CMake older than 3.2")
endif()

include(ExternalProject)

set(GTEST_LIBRARIES gtest gtest_main)
# the binary dir must be know before creating the external project in order
# to pass the byproducts
set(prefix "${CMAKE_CURRENT_BINARY_DIR}/gtest-external-prefix")
set(binary_dir "${prefix}/src/gtest-external-build")

set(byproducts)
foreach(lib ${GTEST_LIBRARIES})
set(${lib}_location
${binary_dir}/${CMAKE_CFG_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${lib}${CMAKE_STATIC_LIBRARY_SUFFIX})
list(APPEND byproducts ${${lib}_location})
endforeach()

if( MSVC )
if( MSVC_VERSION LESS 1800 )
set(EXTRA_FLAG "/D_VARIADIC_MAX=10 ")
else()
set(EXTRA_FLAG "")
endif()
else()
set(EXTRA_FLAG "")
endif()

ExternalProject_Add(
gtest-external
URL http://googletest.googlecode.com/files/gtest-1.7.0.zip
URL_MD5 2d6ec8ccdf5c46b05ba54a9fd1d130d7
PREFIX ${prefix}
BINARY_DIR ${binary_dir}
CMAKE_CACHE_ARGS
-DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER}
-DCMAKE_CXX_FLAGS:STRING=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_DEBUG:STRING=${EXTRA_FLAG}${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_CXX_FLAGS_MINSIZEREL:STRING=${EXTRA_FLAG}${CMAKE_CXX_FLAGS_MINSIZEREL}
-DCMAKE_CXX_FLAGS_RELEASE:STRING=${EXTRA_FLAG}${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_RELWITHDEBINFO:STRING=${EXTRA_FLAG}${CMAKE_CXX_FLAGS_RELWITHDEBINFO}
-DCMAKE_C_COMPILER:FILEPATH=${CMAKE_C_COMPILER}
-DCMAKE_C_FLAGS:STRING=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG:STRING=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_MINSIZEREL:STRING=${CMAKE_C_FLAGS_MINSIZEREL}
-DCMAKE_C_FLAGS_RELEASE:STRING=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_C_FLAGS_RELWITHDEBINFO:STRING=${CMAKE_C_FLAGS_RELWITHDEBINFO}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-Dgtest_force_shared_crt:BOOL=ON
BUILD_BYPRODUCTS ${byproducts}
INSTALL_COMMAND "")

foreach(lib ${GTEST_LIBRARIES})
add_library(${lib} IMPORTED STATIC)
add_dependencies(${lib} gtest-external)
set_target_properties(${lib} PROPERTIES IMPORTED_LOCATION ${${lib}_location})
endforeach()

ExternalProject_Get_Property(gtest-external source_dir)
set(GTEST_INCLUDE_DIRS ${source_dir}/include)
set(GTEST_FOUND ON)
endif()

# Hack to get googletest v1.6 to work with vs2012
if( MSVC11 )
add_definitions( "/D_VARIADIC_MAX=10" )
endif( )
16 changes: 13 additions & 3 deletions src/include/sharedLibrary.h
Expand Up @@ -29,21 +29,31 @@
#include <dlfcn.h>
#endif

inline void* LoadSharedLibrary( std::string linuxPrefix, std::string libraryName, bool quiet )
inline void* LoadSharedLibrary( std::string unixPrefix, std::string libraryName, bool quiet )
{
#if defined( _WIN32 )
libraryName += ".dll";

// HMODULE is actually the load address; function returns NULL if it cannot find the shared library
HMODULE fileHandle = ::LoadLibraryExA( libraryName.c_str( ), NULL, NULL );
#else
tstring linuxName = linuxPrefix;
#elif defined(__linux__)
tstring linuxName = unixPrefix;
linuxName += libraryName += ".so";
void* fileHandle = ::dlopen( linuxName.c_str( ), RTLD_NOW );
if( !quiet && !fileHandle )
{
std::cerr << ::dlerror( ) << std::endl;
}
#elif defined(__APPLE__)
tstring appleName = unixPrefix;
appleName += libraryName += ".dylib";
void* fileHandle = ::dlopen( appleName.c_str( ), RTLD_NOW );
if( !quiet && !fileHandle )
{
std::cerr << ::dlerror( ) << std::endl;
}
#else
#error "unsupported platform""
#endif

return fileHandle;
Expand Down
1 change: 1 addition & 0 deletions src/library/CMakeLists.txt
Expand Up @@ -103,6 +103,7 @@ endif( )

# CPack configuration; include the executable into the package
install( TARGETS clFFT
EXPORT Library
RUNTIME DESTINATION bin${SUFFIX_BIN}
LIBRARY DESTINATION lib${SUFFIX_LIB}
ARCHIVE DESTINATION lib${SUFFIX_LIB}/import
Expand Down
2 changes: 1 addition & 1 deletion src/library/mainpage.h
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
* ************************************************************************/

/*! @file clFFT.mainpage.h
/*! @file mainpage.h
This file contains all documentation, no code, in the form of comment text. It's purpose is to provide
chapter 1 of the documentation we produce with doxygen. This included the title page, installation instructions
Expand Down
7 changes: 5 additions & 2 deletions src/scripts/perf/CMakeLists.txt
Expand Up @@ -20,5 +20,8 @@ set(GRAPHING_SCRIPTS measurePerformance.py
errorHandler.py
performanceUtility.py
)

install( FILES ${GRAPHING_SCRIPTS} DESTINATION bin${SUFFIX_BIN} )
if( WIN32 )
install( FILES ${GRAPHING_SCRIPTS} DESTINATION bin${SUFFIX_BIN} )
else ( )
install( FILES ${GRAPHING_SCRIPTS} DESTINATION share/clFFT )
endif( )
1 change: 1 addition & 0 deletions src/statTimer/CMakeLists.txt
Expand Up @@ -80,6 +80,7 @@ endif( )

# CPack configuration; include the executable into the package
install( TARGETS StatTimer
EXPORT Library
RUNTIME DESTINATION bin${SUFFIX_BIN}
LIBRARY DESTINATION lib${SUFFIX_LIB}
ARCHIVE DESTINATION lib${SUFFIX_LIB}/import
Expand Down
2 changes: 1 addition & 1 deletion src/tests/buffer.h
Expand Up @@ -245,7 +245,7 @@ class buffer {
preinitialize_strides_to_1_1_1();

// we need to calculate the strides if tightly packed
if( strides_in == tightly_packed) {
if( strides_in == nullptr ) {
_strides[dimx] = 1;
for( size_t i = 1; i < _number_of_dimensions; ++i )
{
Expand Down
7 changes: 4 additions & 3 deletions src/tests/cl_transform.h
Expand Up @@ -263,8 +263,9 @@ class clfft {
{
cl_context tempContext = NULL;
device_id = initializeCL(
device_type,
device_gpu_list,
g_device_type,
g_device_id,
g_platform_id,
tempContext,
printInfo
);
Expand Down Expand Up @@ -646,7 +647,7 @@ class clfft {

// we don't want to bog down the CPU with ginormous problem sizes
// so we chop the global memory way down to keep things manageable
if( device_type == CL_DEVICE_TYPE_CPU )
if( g_device_type == CL_DEVICE_TYPE_CPU )
{
global_memory_size /= 8;
}
Expand Down
34 changes: 23 additions & 11 deletions src/tests/gtest_main.cpp
Expand Up @@ -57,8 +57,10 @@ void inline BSF (unsigned long * index, size_t & mask) {
bool suppress_output = false;

// Globals that user can set on the command line, that need to be passed down to unit tests
cl_device_type device_type = CL_DEVICE_TYPE_GPU;
cl_uint device_gpu_list = ~0x0;
cl_device_type g_device_type = CL_DEVICE_TYPE_ALL;
cl_int g_device_id = 0;
cl_int g_platform_id = 0;

bool comparison_type = root_mean_square;

int main( int argc, char **argv )
Expand Down Expand Up @@ -103,7 +105,10 @@ int main( int argc, char **argv )
( "noVersion", "Don't print version information from the clFFT library" )
( "noInfoCL", "Don't print information from the OpenCL runtime" )
( "cpu,c", "Run tests on a CPU device" )
( "gpu,g", "Run tests on a GPU device (default)" )
( "gpu,g", "Run tests on a GPU device" )
( "all,a", "Run tests on any device type (default)" )
( "platform", po::value< cl_int >( &g_platform_id )->default_value( 0 ), "Select a specific OpenCL platform id as it is reported by clinfo" )
( "device", po::value< cl_int >( &g_device_id )->default_value( 0 ), "Select a specific OpenCL device id as it is reported by clinfo" )
( "pointwise,p", "Do a pointwise comparison to determine test correctness (default: use root mean square)" )
( "tolerance,t", po::value< float >( &tolerance )->default_value( 0.001f ), "tolerance level to use when determining test pass/fail" )
( "numRandom,r", po::value< size_t >( &number_of_random_tests )->default_value( 2000 ), "number of random tests to run" )
Expand All @@ -125,25 +130,32 @@ int main( int argc, char **argv )
std::cout << std::endl;

size_t mutex = ((vm.count( "gpu" ) > 0) ? 1 : 0)
| ((vm.count( "cpu" ) > 0) ? 2 : 0);
| ((vm.count( "cpu" ) > 0) ? 2 : 0)
| ((vm.count( "all" ) > 0) ? 4 : 0);
if ((mutex & (mutex-1)) != 0) {
terr << _T("You have selected mutually-exclusive OpenCL device options:") << std::endl;
if (vm.count ( "cpu" ) > 0) terr << _T(" cpu, c Run tests on a CPU device" ) << std::endl;
if (vm.count ( "gpu" ) > 0) terr << _T(" gpu, g Run tests on a GPU device" ) << std::endl;
if (vm.count ( "gpu" ) > 0) terr << _T(" gpu,g Force selection of OpenCL GPU devices only" ) << std::endl;
if (vm.count ( "cpu" ) > 0) terr << _T(" cpu,c Force selection of OpenCL CPU devices only" ) << std::endl;
if (vm.count ( "all" ) > 0) terr << _T(" all,a Force selection of all OpenCL devices (default)" ) << std::endl;
return 1;
}

if( vm.count( "gpu" ) )
{
g_device_type = CL_DEVICE_TYPE_GPU;
}

if( vm.count( "cpu" ) )
{
device_type = CL_DEVICE_TYPE_CPU;
g_device_type = CL_DEVICE_TYPE_CPU;
}

if( vm.count( "gpu" ) )
if( vm.count( "all" ) )
{
device_type = CL_DEVICE_TYPE_GPU;
device_gpu_list = ~0;
g_device_type = CL_DEVICE_TYPE_ALL;
}


// Print version by default
if( !vm.count( "noVersion" ) )
{
Expand All @@ -168,7 +180,7 @@ int main( int argc, char **argv )
cl_context tempContext = NULL;
cl_command_queue tempQueue = NULL;
cl_event tempEvent = NULL;
std::vector< cl_device_id > device_id = ::initializeCL( device_type, device_gpu_list, tempContext, true );
::initializeCL(g_device_type, g_device_id, g_platform_id, tempContext, true);
::cleanupCL( &tempContext, &tempQueue, 0, NULL, 0, NULL, &tempEvent );
}

Expand Down
5 changes: 3 additions & 2 deletions src/tests/test_constants.cpp
Expand Up @@ -87,8 +87,9 @@ size_t max_mem_available_on_cl_device(size_t device_index) {
std::vector< cl_device_id > device_id;
cl_context tempContext = NULL;
device_id = initializeCL(
device_type,
device_gpu_list,
g_device_type,
(cl_int)device_index,
g_platform_id,
tempContext,
false
);
Expand Down
5 changes: 3 additions & 2 deletions src/tests/test_constants.h
Expand Up @@ -61,8 +61,9 @@ const double magnitude_lower_limit = 1.0E-100;

extern float tolerance;

extern cl_device_type device_type;
extern cl_uint device_gpu_list;
extern cl_device_type g_device_type;
extern cl_int g_device_id;
extern cl_int g_platform_id;

extern size_t number_of_random_tests;
extern time_t random_test_parameter_seed;
Expand Down
5 changes: 3 additions & 2 deletions src/tests/unit_test.cpp
Expand Up @@ -19,6 +19,7 @@
#include <complex>
#include "clFFT.h"
#include "../client/openCL.misc.h"
#include "test_constants.h"

class clfft_UnitTest : public ::testing::Test {
protected:
Expand All @@ -29,12 +30,12 @@ class clfft_UnitTest : public ::testing::Test {
lengths[ 0 ] = 32;
lengths[ 1 ] = 32;
lengths[ 2 ] = 32;
cl_uint deviceGpuList = ~0; // a bitmap set

commandQueueFlags = 0;

size_t memSizeBytes = lengths[ 0 ] * lengths[ 1 ] * lengths[ 2 ] * sizeof( std::complex< float > );

device_id = initializeCL( CL_DEVICE_TYPE_CPU, deviceGpuList, context, printInfo );
device_id = initializeCL( g_device_type, g_device_id, g_platform_id, context, printInfo );
createOpenCLCommandQueue( context,
commandQueueFlags,
queue,
Expand Down