MergeSort returns unsorted array for large arrays #174

vcampmany · 2017-06-24T17:36:54Z

I am trying to use the mergeSort function in large array sizes (i.e. 10^8) but the output array that I get turns out not to be sorted. I tried to compile my file with CUDA 8 and CUDA 6.5. I am using a GTX Titan X (compute capability 5.2). I attachthe code that I'm using, I took the code from /app/simpleCUDPP/simpleCUDPP.cu and adapted it to call the mergesort algorithm. I might be doing something wrong, but I cannot discover what is wrong in the code.

Thanks!

`// -------------------------------------------------------------
// cuDPP -- CUDA Data Parallel Primitives library
// -------------------------------------------------------------
// $Revision$
// $Date$
// -------------------------------------------------------------
// This source code is distributed under the terms of license.txt in
// the root directory of this source distribution.
// -------------------------------------------------------------

/*

This is a basic example of how to use the CUDPP library.
*/

// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

// includes, project
#include "cudpp.h"

#include

////////////////////////////////////////////////////////////////////////////////
// declaration, forward
void runTest( int argc, char** argv);

bool isSorted(uint *arr, int count, char type)
{
bool check = true;

switch (type) {
case 'A': 
	for (int i = 0; i < count-1; i++) {
		if (arr[i] > arr[i+1]) {
    		check = false;
    		break;
		}
	}
	break;
case 'D':
	for (int i = 0; i < count-1; i++) {
		if (arr[i] < arr[i+1]) {
    		check = false;
			break;
		}
	}
	break;
default:
	printf("error wrong type of sorting");
	break;
}
return check;

}

////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv)
{
runTest( argc, argv);
}

////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void
runTest( int argc, char** argv)
{
int deviceCount;
cudaGetDeviceCount(&deviceCount);
if (deviceCount == 0) {
fprintf(stderr, "error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
int dev = 0;
if (argc > 1) {
std::string arg = argv[1];
size_t pos = arg.find("=");
if (arg.find("device") && pos != std::string::npos) {
dev = atoi(arg.c_str() + (pos + 1));
}
}
if (dev < 0) dev = 0;
if (dev > deviceCount-1) dev = deviceCount - 1;
cudaSetDevice(dev);

cudaDeviceProp prop;
if (cudaGetDeviceProperties(&prop, dev) == cudaSuccess)
{
    printf("Using device %d:\n", dev);
    printf("%s; global mem: %dB; compute v%d.%d; clock: %d kHz\n",
           prop.name, (int)prop.totalGlobalMem, (int)prop.major, 
           (int)prop.minor, (int)prop.clockRate);
}

unsigned int numElements = 100000000; //32768; //10;
unsigned int memSize = sizeof( uint) * numElements;

// allocate host memory
uint* h_idata = (uint*) malloc( memSize);
uint* h_values = (uint*) malloc(memSize); 
// initalize the memory
for (unsigned int i = 0; i < numElements; ++i) 
{
    h_idata[i] = rand();
    h_values[i] = i;
}

// allocate device memory
uint* d_idata;
uint* d_values;
cudaError_t result = cudaMalloc( (void**) &d_idata, memSize);
if (result != cudaSuccess) {
    printf("Error: %s\n", cudaGetErrorString(result));
    exit(-1);
}
result = cudaMalloc( (void**) &d_values, memSize);
if (result != cudaSuccess) {
    printf("Error: %s\n", cudaGetErrorString(result));
    exit(-1);
}

// copy host memory to device
result = cudaMemcpy( d_idata, h_idata, memSize, cudaMemcpyHostToDevice);
if (result != cudaSuccess) {
    printf("Error: %s\n", cudaGetErrorString(result));
    exit(-1);
}
result = cudaMemcpy( d_values, h_values, memSize, cudaMemcpyHostToDevice);
if (result != cudaSuccess) {
    printf("Error: %s\n", cudaGetErrorString(result));
    exit(-1);
}

// Initialize the CUDPP Library
CUDPPHandle theCudpp;
cudppCreate(&theCudpp);

CUDPPConfiguration config;
//config.op = CUDPP_MAX; //CUDPP_ADD;
config.datatype = CUDPP_UINT;
config.algorithm = CUDPP_SORT_MERGE;
config.options = CUDPP_OPTION_KEY_VALUE_PAIRS;

CUDPPHandle mergesort_plan;
CUDPPResult res = cudppPlan(theCudpp, &mergesort_plan, config, numElements, 1, 0);  

if (CUDPP_SUCCESS != res)
{
    printf("Error creating CUDPPPlan\n");
    exit(-1);
}

// Run the merge sort
res = cudppMergeSort(mergesort_plan, d_idata, d_values, numElements);
if (CUDPP_SUCCESS != res)
{
    printf("Error in cudppScan()\n");
    exit(-1);
}

// allocate mem for the result on host side
uint* h_odata = (uint*) malloc( memSize);
uint* h_ovalues = (uint*) malloc (memSize);
// copy result from device to host
result = cudaMemcpy( h_odata, d_idata, memSize, cudaMemcpyDeviceToHost);
if (result != cudaSuccess) {
    printf("Error: %s\n", cudaGetErrorString(result));
    exit(-1);
}
result = cudaMemcpy( h_ovalues, d_values, memSize, cudaMemcpyDeviceToHost);
if (result != cudaSuccess) {
    printf("Error: %s\n", cudaGetErrorString(result));
    exit(-1);
}

if(!isSorted(h_odata, numElements, 'A'))
	printf("Data NOT sorted\n");
else
	printf("Data sorted\n");

res = cudppDestroyPlan(mergesort_plan);
if (CUDPP_SUCCESS != res)
{
    printf("Error destroying CUDPPPlan\n");
    exit(-1);
}

// shut down the CUDPP library
cudppDestroy(theCudpp);

free(h_idata);
free(h_values);
cudaFree(d_idata);
cudaFree(d_values);

}
`

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

MergeSort returns unsorted array for large arrays #174

MergeSort returns unsorted array for large arrays #174

vcampmany commented Jun 24, 2017

MergeSort returns unsorted array for large arrays #174

MergeSort returns unsorted array for large arrays #174

Comments

vcampmany commented Jun 24, 2017