Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MergeSort returns unsorted array for large arrays #174

Open
vcampmany opened this issue Jun 24, 2017 · 0 comments
Open

MergeSort returns unsorted array for large arrays #174

vcampmany opened this issue Jun 24, 2017 · 0 comments

Comments

@vcampmany
Copy link

I am trying to use the mergeSort function in large array sizes (i.e. 10^8) but the output array that I get turns out not to be sorted. I tried to compile my file with CUDA 8 and CUDA 6.5. I am using a GTX Titan X (compute capability 5.2). I attachthe code that I'm using, I took the code from /app/simpleCUDPP/simpleCUDPP.cu and adapted it to call the mergesort algorithm. I might be doing something wrong, but I cannot discover what is wrong in the code.

Thanks!

`// -------------------------------------------------------------
// cuDPP -- CUDA Data Parallel Primitives library
// -------------------------------------------------------------
// $Revision$
// $Date$
// -------------------------------------------------------------
// This source code is distributed under the terms of license.txt in
// the root directory of this source distribution.
// -------------------------------------------------------------

/*

  • This is a basic example of how to use the CUDPP library.
    */

// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

// includes, project
#include "cudpp.h"

#include

////////////////////////////////////////////////////////////////////////////////
// declaration, forward
void runTest( int argc, char** argv);

bool isSorted(uint *arr, int count, char type)
{
bool check = true;

switch (type) {
case 'A': 
	for (int i = 0; i < count-1; i++) {
		if (arr[i] > arr[i+1]) {
    		check = false;
    		break;
		}
	}
	break;
case 'D':
	for (int i = 0; i < count-1; i++) {
		if (arr[i] < arr[i+1]) {
    		check = false;
			break;
		}
	}
	break;
default:
	printf("error wrong type of sorting");
	break;
}
return check;

}

////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv)
{
runTest( argc, argv);
}

////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void
runTest( int argc, char** argv)
{
int deviceCount;
cudaGetDeviceCount(&deviceCount);
if (deviceCount == 0) {
fprintf(stderr, "error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
int dev = 0;
if (argc > 1) {
std::string arg = argv[1];
size_t pos = arg.find("=");
if (arg.find("device") && pos != std::string::npos) {
dev = atoi(arg.c_str() + (pos + 1));
}
}
if (dev < 0) dev = 0;
if (dev > deviceCount-1) dev = deviceCount - 1;
cudaSetDevice(dev);

cudaDeviceProp prop;
if (cudaGetDeviceProperties(&prop, dev) == cudaSuccess)
{
    printf("Using device %d:\n", dev);
    printf("%s; global mem: %dB; compute v%d.%d; clock: %d kHz\n",
           prop.name, (int)prop.totalGlobalMem, (int)prop.major, 
           (int)prop.minor, (int)prop.clockRate);
}

unsigned int numElements = 100000000; //32768; //10;
unsigned int memSize = sizeof( uint) * numElements;

// allocate host memory
uint* h_idata = (uint*) malloc( memSize);
uint* h_values = (uint*) malloc(memSize); 
// initalize the memory
for (unsigned int i = 0; i < numElements; ++i) 
{
    h_idata[i] = rand();
    h_values[i] = i;
}

// allocate device memory
uint* d_idata;
uint* d_values;
cudaError_t result = cudaMalloc( (void**) &d_idata, memSize);
if (result != cudaSuccess) {
    printf("Error: %s\n", cudaGetErrorString(result));
    exit(-1);
}
result = cudaMalloc( (void**) &d_values, memSize);
if (result != cudaSuccess) {
    printf("Error: %s\n", cudaGetErrorString(result));
    exit(-1);
}

// copy host memory to device
result = cudaMemcpy( d_idata, h_idata, memSize, cudaMemcpyHostToDevice);
if (result != cudaSuccess) {
    printf("Error: %s\n", cudaGetErrorString(result));
    exit(-1);
}
result = cudaMemcpy( d_values, h_values, memSize, cudaMemcpyHostToDevice);
if (result != cudaSuccess) {
    printf("Error: %s\n", cudaGetErrorString(result));
    exit(-1);
}

// Initialize the CUDPP Library
CUDPPHandle theCudpp;
cudppCreate(&theCudpp);

CUDPPConfiguration config;
//config.op = CUDPP_MAX; //CUDPP_ADD;
config.datatype = CUDPP_UINT;
config.algorithm = CUDPP_SORT_MERGE;
config.options = CUDPP_OPTION_KEY_VALUE_PAIRS;

CUDPPHandle mergesort_plan;
CUDPPResult res = cudppPlan(theCudpp, &mergesort_plan, config, numElements, 1, 0);  

if (CUDPP_SUCCESS != res)
{
    printf("Error creating CUDPPPlan\n");
    exit(-1);
}

// Run the merge sort
res = cudppMergeSort(mergesort_plan, d_idata, d_values, numElements);
if (CUDPP_SUCCESS != res)
{
    printf("Error in cudppScan()\n");
    exit(-1);
}

// allocate mem for the result on host side
uint* h_odata = (uint*) malloc( memSize);
uint* h_ovalues = (uint*) malloc (memSize);
// copy result from device to host
result = cudaMemcpy( h_odata, d_idata, memSize, cudaMemcpyDeviceToHost);
if (result != cudaSuccess) {
    printf("Error: %s\n", cudaGetErrorString(result));
    exit(-1);
}
result = cudaMemcpy( h_ovalues, d_values, memSize, cudaMemcpyDeviceToHost);
if (result != cudaSuccess) {
    printf("Error: %s\n", cudaGetErrorString(result));
    exit(-1);
}

if(!isSorted(h_odata, numElements, 'A'))
	printf("Data NOT sorted\n");
else
	printf("Data sorted\n");

res = cudppDestroyPlan(mergesort_plan);
if (CUDPP_SUCCESS != res)
{
    printf("Error destroying CUDPPPlan\n");
    exit(-1);
}

// shut down the CUDPP library
cudppDestroy(theCudpp);

free(h_idata);
free(h_values);
cudaFree(d_idata);
cudaFree(d_values);

}
`

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant