You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I am trying to use the mergeSort function in large array sizes (i.e. 10^8) but the output array that I get turns out not to be sorted. I tried to compile my file with CUDA 8 and CUDA 6.5. I am using a GTX Titan X (compute capability 5.2). I attachthe code that I'm using, I took the code from /app/simpleCUDPP/simpleCUDPP.cu and adapted it to call the mergesort algorithm. I might be doing something wrong, but I cannot discover what is wrong in the code.
Thanks!
`// -------------------------------------------------------------
// cuDPP -- CUDA Data Parallel Primitives library
// -------------------------------------------------------------
// $Revision$
// $Date$
// -------------------------------------------------------------
// This source code is distributed under the terms of license.txt in
// the root directory of this source distribution.
// -------------------------------------------------------------
/*
This is a basic example of how to use the CUDPP library.
*/
switch (type) {
case 'A':
for (int i = 0; i < count-1; i++) {
if (arr[i] > arr[i+1]) {
check = false;
break;
}
}
break;
case 'D':
for (int i = 0; i < count-1; i++) {
if (arr[i] < arr[i+1]) {
check = false;
break;
}
}
break;
default:
printf("error wrong type of sorting");
break;
}
return check;
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv)
{
runTest( argc, argv);
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void
runTest( int argc, char** argv)
{
int deviceCount;
cudaGetDeviceCount(&deviceCount);
if (deviceCount == 0) {
fprintf(stderr, "error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
int dev = 0;
if (argc > 1) {
std::string arg = argv[1];
size_t pos = arg.find("=");
if (arg.find("device") && pos != std::string::npos) {
dev = atoi(arg.c_str() + (pos + 1));
}
}
if (dev < 0) dev = 0;
if (dev > deviceCount-1) dev = deviceCount - 1;
cudaSetDevice(dev);
cudaDeviceProp prop;
if (cudaGetDeviceProperties(&prop, dev) == cudaSuccess)
{
printf("Using device %d:\n", dev);
printf("%s; global mem: %dB; compute v%d.%d; clock: %d kHz\n",
prop.name, (int)prop.totalGlobalMem, (int)prop.major,
(int)prop.minor, (int)prop.clockRate);
}
unsigned int numElements = 100000000; //32768; //10;
unsigned int memSize = sizeof( uint) * numElements;
// allocate host memory
uint* h_idata = (uint*) malloc( memSize);
uint* h_values = (uint*) malloc(memSize);
// initalize the memory
for (unsigned int i = 0; i < numElements; ++i)
{
h_idata[i] = rand();
h_values[i] = i;
}
// allocate device memory
uint* d_idata;
uint* d_values;
cudaError_t result = cudaMalloc( (void**) &d_idata, memSize);
if (result != cudaSuccess) {
printf("Error: %s\n", cudaGetErrorString(result));
exit(-1);
}
result = cudaMalloc( (void**) &d_values, memSize);
if (result != cudaSuccess) {
printf("Error: %s\n", cudaGetErrorString(result));
exit(-1);
}
// copy host memory to device
result = cudaMemcpy( d_idata, h_idata, memSize, cudaMemcpyHostToDevice);
if (result != cudaSuccess) {
printf("Error: %s\n", cudaGetErrorString(result));
exit(-1);
}
result = cudaMemcpy( d_values, h_values, memSize, cudaMemcpyHostToDevice);
if (result != cudaSuccess) {
printf("Error: %s\n", cudaGetErrorString(result));
exit(-1);
}
// Initialize the CUDPP Library
CUDPPHandle theCudpp;
cudppCreate(&theCudpp);
CUDPPConfiguration config;
//config.op = CUDPP_MAX; //CUDPP_ADD;
config.datatype = CUDPP_UINT;
config.algorithm = CUDPP_SORT_MERGE;
config.options = CUDPP_OPTION_KEY_VALUE_PAIRS;
CUDPPHandle mergesort_plan;
CUDPPResult res = cudppPlan(theCudpp, &mergesort_plan, config, numElements, 1, 0);
if (CUDPP_SUCCESS != res)
{
printf("Error creating CUDPPPlan\n");
exit(-1);
}
// Run the merge sort
res = cudppMergeSort(mergesort_plan, d_idata, d_values, numElements);
if (CUDPP_SUCCESS != res)
{
printf("Error in cudppScan()\n");
exit(-1);
}
// allocate mem for the result on host side
uint* h_odata = (uint*) malloc( memSize);
uint* h_ovalues = (uint*) malloc (memSize);
// copy result from device to host
result = cudaMemcpy( h_odata, d_idata, memSize, cudaMemcpyDeviceToHost);
if (result != cudaSuccess) {
printf("Error: %s\n", cudaGetErrorString(result));
exit(-1);
}
result = cudaMemcpy( h_ovalues, d_values, memSize, cudaMemcpyDeviceToHost);
if (result != cudaSuccess) {
printf("Error: %s\n", cudaGetErrorString(result));
exit(-1);
}
if(!isSorted(h_odata, numElements, 'A'))
printf("Data NOT sorted\n");
else
printf("Data sorted\n");
res = cudppDestroyPlan(mergesort_plan);
if (CUDPP_SUCCESS != res)
{
printf("Error destroying CUDPPPlan\n");
exit(-1);
}
// shut down the CUDPP library
cudppDestroy(theCudpp);
free(h_idata);
free(h_values);
cudaFree(d_idata);
cudaFree(d_values);
}
`
The text was updated successfully, but these errors were encountered:
I am trying to use the mergeSort function in large array sizes (i.e. 10^8) but the output array that I get turns out not to be sorted. I tried to compile my file with CUDA 8 and CUDA 6.5. I am using a GTX Titan X (compute capability 5.2). I attachthe code that I'm using, I took the code from /app/simpleCUDPP/simpleCUDPP.cu and adapted it to call the mergesort algorithm. I might be doing something wrong, but I cannot discover what is wrong in the code.
Thanks!
`// -------------------------------------------------------------$Revision$ $Date$
// cuDPP -- CUDA Data Parallel Primitives library
// -------------------------------------------------------------
//
//
// -------------------------------------------------------------
// This source code is distributed under the terms of license.txt in
// the root directory of this source distribution.
// -------------------------------------------------------------
/*
*/
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, project
#include "cudpp.h"
#include
////////////////////////////////////////////////////////////////////////////////
// declaration, forward
void runTest( int argc, char** argv);
bool isSorted(uint *arr, int count, char type)
{
bool check = true;
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv)
{
runTest( argc, argv);
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void
runTest( int argc, char** argv)
{
int deviceCount;
cudaGetDeviceCount(&deviceCount);
if (deviceCount == 0) {
fprintf(stderr, "error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
int dev = 0;
if (argc > 1) {
std::string arg = argv[1];
size_t pos = arg.find("=");
if (arg.find("device") && pos != std::string::npos) {
dev = atoi(arg.c_str() + (pos + 1));
}
}
if (dev < 0) dev = 0;
if (dev > deviceCount-1) dev = deviceCount - 1;
cudaSetDevice(dev);
}
`
The text was updated successfully, but these errors were encountered: