From 6ba1b57ac458639dd4d3639feb3d5fabc4cfacdf Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Tue, 2 Jun 2015 16:11:56 -0700 Subject: [PATCH 01/36] small makefile fix for simple_example --- simple_example/Makefile | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/simple_example/Makefile b/simple_example/Makefile index 9762bce03..842ba0e19 100644 --- a/simple_example/Makefile +++ b/simple_example/Makefile @@ -94,7 +94,7 @@ else ARCH = -m64 endif -NVCCFLAGS = -Xptxas -v -Xcudafe -\# +NVCCFLAGS = -Xptxas -v -Xcudafe -\# -lineinfo ifeq (WIN_NT, $(findstring WIN_NT, $(OSUPPER))) NVCCFLAGS += -Xcompiler /bigobj -Xcompiler /Zm500 @@ -123,15 +123,15 @@ endif # Dependency Lists #------------------------------------------------------------------------------- -DEPS = ./Makefile \ - $(wildcard ../gunrock/util/*.cuh) \ - $(wildcard ../gunrock/util/**/*.cuh) \ - $(wildcard ../gunrock/*.cuh) \ - $(wildcard ../gunrock/graphio/*.cuh) \ - $(wildcard ../gunrock/oprtr/*.cuh) \ - $(wildcard ../gunrock/oprtr/**/*.cuh) \ - $(wildcard ../gunrock/app/*.cuh) \ - $(wildcard ../gunrock/app/**/*.cuh) +DEPS = ./Makefile \ + $(wildcard ../gunrock/util/*.cuh) \ + (wildcard ../gunrock/util/**/*.cuh) \ + $(wildcard ../gunrock/*.cuh) \ + $(wildcard ../gunrock/graphio/*.cuh) \ + $(wildcard ../gunrock/oprtr/*.cuh) \ + $(wildcard ../gunrock/oprtr/**/*.cuh) \ + $(wildcard ../gunrock/app/*.cuh) \ + $(wildcard ../gunrock/app/**/*.cuh) #------------------------------------------------------------------------------- # (make simple) Simple example driver for three primitives: CC, BFS and BC @@ -139,9 +139,9 @@ DEPS = ./Makefile \ simple: bin/simple_example_$(NVCC_VERSION)_$(ARCH_SUFFIX) -bin/simple_example_$(NVCC_VERSION)_$(ARCH_SUFFIX) : simple_example.cu ../externals/moderngpu/src/mgpucontext.cu ../externals/moderngpu/src/mgpuutil.cpp $(DEPS) +bin/simple_example_$(NVCC_VERSION)_$(ARCH_SUFFIX) : simple_example.cu cpu_graph_lib.cpp ../gunrock/util/error_utils.cu ../externals/moderngpu/src/mgpucontext.cu ../externals/moderngpu/src/mgpuutil.cpp $(DEPS) mkdir -p bin - $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/simple_example_$(NVCC_VERSION)_$(ARCH_SUFFIX) simple_example.cu ../externals/moderngpu/src/mgpucontext.cu ../externals/moderngpu/src/mgpuutil.cpp $(NVCCFLAGS) $(ARCH) $(INC) -lcuda -O3 + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/simple_example_$(NVCC_VERSION)_$(ARCH_SUFFIX) simple_example.cu cpu_graph_lib.cpp ../gunrock/util/error_utils.cu ../externals/moderngpu/src/mgpucontext.cu ../externals/moderngpu/src/mgpuutil.cpp $(NVCCFLAGS) $(ARCH) $(INC) -lcuda -O3 #------------------------------------------------------------------------------- # Clean From e01e65c5f9c5ead89d5dc8df5e47c64c7648379a Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Tue, 9 Jun 2015 09:54:57 -0700 Subject: [PATCH 02/36] fix a small bug due to missing one condition check --- gunrock/app/mst/mst_functor.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gunrock/app/mst/mst_functor.cuh b/gunrock/app/mst/mst_functor.cuh index 8cf90fe0e..1f5393571 100644 --- a/gunrock/app/mst/mst_functor.cuh +++ b/gunrock/app/mst/mst_functor.cuh @@ -111,7 +111,8 @@ struct EdgeFunctor VertexId s_id, VertexId d_id, DataSlice *problem, VertexId e_id = 0, VertexId e_id_in = 0) { - return problem->d_successors[s_id] == d_id; + return problem->d_successors[s_id] == d_id && + problem->d_reduced_vals[s_id] == problem->d_edge_weights[e_id]; } /** From 9391686be26eb892794cab2760da52e471034d5d Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Tue, 9 Jun 2015 22:27:02 -0700 Subject: [PATCH 03/36] use random value if no input weight values --- gunrock/graphio/market.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunrock/graphio/market.cuh b/gunrock/graphio/market.cuh index 3795d3bca..494200f4b 100644 --- a/gunrock/graphio/market.cuh +++ b/gunrock/graphio/market.cuh @@ -141,7 +141,7 @@ int ReadMarketStream( if (coo) free(coo); return -1; } else if (num_input == 2) { - ll_value = 1; + ll_value = rand() % 64; } } else { if (sscanf(line, "%lld %lld", &ll_col, &ll_row) != 2) { From a560c88260b74672c3f26f938facb653e477af7a Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Wed, 10 Jun 2015 08:31:45 -0700 Subject: [PATCH 04/36] oops missing one $ sign in makefile --- simple_example/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simple_example/Makefile b/simple_example/Makefile index 842ba0e19..5fade4d66 100644 --- a/simple_example/Makefile +++ b/simple_example/Makefile @@ -125,7 +125,7 @@ endif DEPS = ./Makefile \ $(wildcard ../gunrock/util/*.cuh) \ - (wildcard ../gunrock/util/**/*.cuh) \ + $(wildcard ../gunrock/util/**/*.cuh) \ $(wildcard ../gunrock/*.cuh) \ $(wildcard ../gunrock/graphio/*.cuh) \ $(wildcard ../gunrock/oprtr/*.cuh) \ From 329f4e7521614e8f47db8648f408be7574d71f52 Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Thu, 11 Jun 2015 13:02:40 -0700 Subject: [PATCH 05/36] changed the BC forward phase cache offset index to stack. Still have bugs for single node BC. --- gunrock/app/bc/bc_enactor.cuh | 42 +++++++++++++++++------------------ gunrock/app/bc/bc_functor.cuh | 12 +++++----- gunrock/util/test_utils.h | 1 + 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/gunrock/app/bc/bc_enactor.cuh b/gunrock/app/bc/bc_enactor.cuh index 49526a5f2..2a0c621f7 100644 --- a/gunrock/app/bc/bc_enactor.cuh +++ b/gunrock/app/bc/bc_enactor.cuh @@ -242,8 +242,8 @@ class BCEnactor : public EnactorBase frontier_attribute.queue_reset = true; - std::vector forward_queue_offsets(graph_slice->nodes); - forward_queue_offsets.push_back(0); + std::stack forward_queue_offsets; + forward_queue_offsets.push(0); if (AdvanceKernelPolicy::ADVANCE_MODE == gunrock::oprtr::advance::LB) { if (retval = util::GRError(cudaMalloc( @@ -259,12 +259,12 @@ class BCEnactor : public EnactorBase while (done[0] < 0) { if (frontier_attribute.queue_length > 0 && enactor_stats.iteration > 0) { - SizeT cur_offset = forward_queue_offsets.back(); + SizeT cur_offset = forward_queue_offsets.top(); //printf("offset:%d, current length:%d\n", cur_offset, frontier_attribute.queue_length); util::MemsetCopyVectorKernel<<<128, 128>>>(&problem->data_slices[0]->d_forward_output[cur_offset], graph_slice->frontier_queues.d_keys[frontier_attribute.selector], frontier_attribute.queue_length); //util::DisplayDeviceResults(graph_slice->frontier_queues.d_keys[frontier_attribute.selector], frontier_attribute.queue_length); //util::DisplayDeviceResults(&problem->data_slices[0]->d_forward_output[cur_offset], frontier_attribute.queue_length); - forward_queue_offsets.push_back(frontier_attribute.queue_length+cur_offset); + forward_queue_offsets.push(frontier_attribute.queue_length+cur_offset); } // Edge Map @@ -351,15 +351,12 @@ class BCEnactor : public EnactorBase if (/*DEBUG &&*/ (retval = util::GRError(cudaThreadSynchronize(), "filter_forward::Kernel failed", __FILE__, __LINE__))) break; cudaEventQuery(throttle_event); // give host memory mapped visibility to GPU updates - frontier_attribute.queue_index++; frontier_attribute.selector ^= 1; - enactor_stats.iteration++; if (AdvanceKernelPolicy::ADVANCE_MODE == gunrock::oprtr::advance::LB) { if (retval = work_progress.GetQueueLength(frontier_attribute.queue_index, frontier_attribute.queue_length)) break; } - if (INSTRUMENT || DEBUG) { if (retval = work_progress.GetQueueLength(frontier_attribute.queue_index, frontier_attribute.queue_length)) break; if (DEBUG) printf(", %lld", (long long) frontier_attribute.queue_length); @@ -373,6 +370,8 @@ class BCEnactor : public EnactorBase // Check if done if (done[0] == 0) break; + enactor_stats.iteration++; + if (DEBUG) printf("\n%lld", (long long) enactor_stats.iteration); } @@ -396,8 +395,12 @@ class BCEnactor : public EnactorBase if (DEBUG) printf("\nStart backward phase\n%lld", (long long) enactor_stats.iteration); // Backward BC iteration - for (int iter = forward_queue_offsets.size()-3; iter >=0; --iter) { - frontier_attribute.queue_length = forward_queue_offsets[iter+1]-forward_queue_offsets[iter]; + SizeT top_offset = forward_queue_offsets.top(); + if (DEBUG) printf("top offsets:%d\n", top_offset); + forward_queue_offsets.pop(); + while (!forward_queue_offsets.empty()) { + frontier_attribute.queue_length = top_offset-forward_queue_offsets.top(); + printf("queue length:%d\n", frontier_attribute.queue_length); /*frontier_attribute.queue_length = graph_slice->nodes; // Fill in the frontier_queues util::MemsetIdxKernel<<<128, 128>>>(graph_slice->frontier_queues.d_keys[0], graph_slice->nodes); @@ -460,7 +463,7 @@ class BCEnactor : public EnactorBase // Check if done if (done[0] == 0) break;*/ // Edge Map - if (iter > 0) { + if (forward_queue_offsets.top() > 0) { gunrock::oprtr::advance::LaunchKernel( d_done, enactor_stats, @@ -470,8 +473,8 @@ class BCEnactor : public EnactorBase (bool*)NULL, (bool*)NULL, d_scanned_edges, - &problem->data_slices[0]->d_forward_output[forward_queue_offsets[iter]], // d_in_queue - graph_slice->frontier_queues.d_keys[0], // d_out_queue + &problem->data_slices[0]->d_forward_output[forward_queue_offsets.top()], // d_in_queue + NULL, // d_out_queue (VertexId*)NULL, (VertexId*)NULL, graph_slice->d_row_offsets, @@ -493,8 +496,8 @@ class BCEnactor : public EnactorBase (bool*)NULL, (bool*)NULL, d_scanned_edges, - &problem->data_slices[0]->d_forward_output[forward_queue_offsets[iter]], // d_in_queue - graph_slice->frontier_queues.d_keys[0], // d_out_queue + &problem->data_slices[0]->d_forward_output[0], // d_in_queue + NULL, // d_out_queue (VertexId*)NULL, (VertexId*)NULL, graph_slice->d_row_offsets, @@ -508,7 +511,7 @@ class BCEnactor : public EnactorBase gunrock::oprtr::advance::V2V); } - if (/*DEBUG &&*/ (retval = util::GRError(cudaThreadSynchronize(), "filter_forward::Kernel failed", __FILE__, __LINE__))) break; + if (DEBUG && (retval = util::GRError(cudaThreadSynchronize(), "filter_forward::Kernel failed", __FILE__, __LINE__))) break; cudaEventQuery(throttle_event); // give host memory mapped visibility to GPU updates //frontier_attribute.queue_index++; @@ -518,7 +521,6 @@ class BCEnactor : public EnactorBase if (INSTRUMENT || DEBUG) { if (retval = work_progress.GetQueueLength(frontier_attribute.queue_index, frontier_attribute.queue_length)) break; - if (DEBUG) printf(", %lld", (long long) frontier_attribute.queue_length); if (INSTRUMENT) { if (retval = enactor_stats.filter_kernel_stats.Accumulate( enactor_stats.filter_grid_size, @@ -526,11 +528,9 @@ class BCEnactor : public EnactorBase enactor_stats.total_lifetimes)) break; } } - // Check if done - if (done[0] == 0) break; - - if (DEBUG) printf("\n%lld", (long long) enactor_stats.iteration-1); - + top_offset = forward_queue_offsets.top(); + forward_queue_offsets.pop(); + if (DEBUG) printf("top offsets:%d\n", top_offset); } if (retval) break; diff --git a/gunrock/app/bc/bc_functor.cuh b/gunrock/app/bc/bc_functor.cuh index 4e0fdecfb..6b8e594d6 100644 --- a/gunrock/app/bc/bc_functor.cuh +++ b/gunrock/app/bc/bc_functor.cuh @@ -204,12 +204,12 @@ struct BackwardFunctor //Accumulate delta value //Accumulate bc value - //atomicAdd(&problem->d_ebc_values[e_id], result); + atomicAdd(&problem->d_ebc_values[e_id], result); if (s_id != problem->d_src_node[0]) { atomicAdd(&problem->d_deltas[s_id], result); atomicAdd(&problem->d_bc_values[s_id], result); - } + } } /** @@ -305,17 +305,17 @@ struct BackwardFunctor2 util::io::ModifiedLoad::Ld( to_delta, problem->d_deltas + d_id); - //Value result = from_sigma / to_sigma * (1.0 + to_delta); + Value result = from_sigma / to_sigma * (1.0 + to_delta); //Accumulate delta value //Accumulate bc value - //atomicAdd(&problem->d_ebc_values[e_id], result); + atomicAdd(&problem->d_ebc_values[e_id], result); - /*if (s_id != problem->d_src_node[0]) { + if (s_id != problem->d_src_node[0]) { atomicAdd(&problem->d_deltas[s_id], result); atomicAdd(&problem->d_bc_values[s_id], result); - }*/ + } } /** diff --git a/gunrock/util/test_utils.h b/gunrock/util/test_utils.h index b5e291587..c33d27d00 100644 --- a/gunrock/util/test_utils.h +++ b/gunrock/util/test_utils.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include From 01f997c474b4c95758fd027c625aae82e438ca43 Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Fri, 12 Jun 2015 12:42:04 -0700 Subject: [PATCH 06/36] debugging bc bug. Added DisplayDeviceResults with given indices list. --- gunrock/app/bc/bc_enactor.cuh | 3 +-- gunrock/util/test_utils.cuh | 38 +++++++++++++++++++++++++++++++++++ gunrock/util/test_utils.h | 1 + 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/gunrock/app/bc/bc_enactor.cuh b/gunrock/app/bc/bc_enactor.cuh index 2a0c621f7..24b4abe99 100644 --- a/gunrock/app/bc/bc_enactor.cuh +++ b/gunrock/app/bc/bc_enactor.cuh @@ -263,7 +263,6 @@ class BCEnactor : public EnactorBase //printf("offset:%d, current length:%d\n", cur_offset, frontier_attribute.queue_length); util::MemsetCopyVectorKernel<<<128, 128>>>(&problem->data_slices[0]->d_forward_output[cur_offset], graph_slice->frontier_queues.d_keys[frontier_attribute.selector], frontier_attribute.queue_length); //util::DisplayDeviceResults(graph_slice->frontier_queues.d_keys[frontier_attribute.selector], frontier_attribute.queue_length); - //util::DisplayDeviceResults(&problem->data_slices[0]->d_forward_output[cur_offset], frontier_attribute.queue_length); forward_queue_offsets.push(frontier_attribute.queue_length+cur_offset); } @@ -400,7 +399,7 @@ class BCEnactor : public EnactorBase forward_queue_offsets.pop(); while (!forward_queue_offsets.empty()) { frontier_attribute.queue_length = top_offset-forward_queue_offsets.top(); - printf("queue length:%d\n", frontier_attribute.queue_length); + util::DisplayDeviceResults(problem->data_slices[0]->d_sigmas, &problem->data_slices[0]->d_forward_output[forward_queue_offsets.top()], graph_slice->nodes, frontier_attribute.queue_length); /*frontier_attribute.queue_length = graph_slice->nodes; // Fill in the frontier_queues util::MemsetIdxKernel<<<128, 128>>>(graph_slice->frontier_queues.d_keys[0], graph_slice->nodes); diff --git a/gunrock/util/test_utils.cuh b/gunrock/util/test_utils.cuh index 91ca03c7e..93f223642 100644 --- a/gunrock/util/test_utils.cuh +++ b/gunrock/util/test_utils.cuh @@ -248,6 +248,44 @@ void DisplayDeviceResults( if (h_data) free(h_data); } +/** + * Verify the contents of a device array match those + * of a host array + */ +template +void DisplayDeviceResults( + DATATYPE *d_data, + INDEXTYPE *d_indices, + size_t num_elements, + size_t num_indices) +{ + printf("num_elements:%d\n", num_elements); + printf("num_indices:%d\n", num_indices); + // Allocate array on host + DATATYPE *h_data = (DATATYPE*) malloc(num_elements * sizeof(DATATYPE)); + INDEXTYPE *h_indices = (INDEXTYPE*) malloc(num_indices * sizeof(INDEXTYPE)); + + // Reduction data back + cudaMemcpy(h_data, d_data, sizeof(DATATYPE) * num_elements, cudaMemcpyDeviceToHost); + cudaMemcpy(h_indices, d_indices, sizeof(INDEXTYPE) * num_indices, cudaMemcpyDeviceToHost); + + // Display data + printf("\n\nData:\n"); + for (int i = 0; i < num_indices; i++) + { + PrintValue(h_indices[i]); + printf(":"); + assert(h_indices[i] < num_elements); + PrintValue(h_data[h_indices[i]]); + printf(", "); + } + printf("\n\n"); + + // Cleanup + if (h_data) free(h_data); + if (h_indices) free(h_indices); +} + /****************************************************************************** * Timing ******************************************************************************/ diff --git a/gunrock/util/test_utils.h b/gunrock/util/test_utils.h index c33d27d00..8c5e9d573 100644 --- a/gunrock/util/test_utils.h +++ b/gunrock/util/test_utils.h @@ -26,6 +26,7 @@ #include #include +#include #include #include #include From 1d2ff81fdb1515b44bf7033b7076e2a32f33ebf7 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Mon, 15 Jun 2015 12:15:12 -0700 Subject: [PATCH 07/36] add graph connectivity test before running mst --- tests/mst/test_mst.cu | 44 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/tests/mst/test_mst.cu b/tests/mst/test_mst.cu index 6083d9bd5..4e295ad56 100644 --- a/tests/mst/test_mst.cu +++ b/tests/mst/test_mst.cu @@ -28,6 +28,7 @@ #include // MST includes +#include #include #include #include @@ -83,6 +84,11 @@ void Usage() /** * @brief Displays the MST result * + * @tparam VertexId + * @tparam Value + * @tparam SizeT + * + * @param[in] graph reference to the CSR graph we process on */ //////////////////////////////////////////////////////////////////////////////// template @@ -122,6 +128,26 @@ void DisplaySolution(const Csr &graph, int *mst_output) if (source) { delete [] source; } } +/** + * @brief A simple connnectivity check utility + * + * @tparam VertexId + * @tparam Value + * @tparam SizeT + * + * @param[in] graph reference to the CSR graph we process on + */ +template +bool IsConnected(const Csr & graph) +{ + // malloc output graph + GunrockGraph *graph_output = + (GunrockGraph*)malloc(sizeof(GunrockGraph)); + unsigned int *components = (unsigned int*)malloc(sizeof(unsigned int)); + run_cc(graph_output, components, graph, 0, 1); + return *components == 1; +} + /** * @brief A simple CPU-based reference MST implementation. * @@ -214,10 +240,10 @@ long long int SimpleReferenceMST( //////////////////////////////////////////////////////////////////////////////// template void RunTests( - const Csr &graph, + const Csr & graph, int max_grid_size, int num_gpus, - mgpu::CudaContext& context) + mgpu::CudaContext & context) { printf("\nMINIMUM SPANNING TREE TEST\n"); @@ -411,13 +437,19 @@ int main(int argc, char** argv) * weight per edge. Note it only support FULLY-CONNECTED graphs * ***************************************************************/ - // run GPU tests - RunTests(csr, args, *context); - + // test graph connectivity and run test + if (IsConnected(csr)) + { + RunTests(csr, args, *context); + } + else + { + fprintf(stderr, "Unsupported non-fully connected graph input.\n"); + } } else { - fprintf(stderr, "Unspecified graph type\n"); + fprintf(stderr, "Unspecified graph type.\n"); return 1; } From dd22c01038b47668a03f1c174864e3c52134f222 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Mon, 15 Jun 2015 13:22:31 -0700 Subject: [PATCH 08/36] template type minor change --- tests/mst/test_mst.cu | 45 +++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/tests/mst/test_mst.cu b/tests/mst/test_mst.cu index 4e295ad56..afefdab23 100644 --- a/tests/mst/test_mst.cu +++ b/tests/mst/test_mst.cu @@ -140,11 +140,10 @@ void DisplaySolution(const Csr &graph, int *mst_output) template bool IsConnected(const Csr & graph) { - // malloc output graph - GunrockGraph *graph_output = - (GunrockGraph*)malloc(sizeof(GunrockGraph)); + GunrockGraph *temp = (GunrockGraph*)malloc(sizeof(GunrockGraph)); unsigned int *components = (unsigned int*)malloc(sizeof(unsigned int)); - run_cc(graph_output, components, graph, 0, 1); + run_cc(temp, components, graph, 0, 1); + if (temp) free(temp); return *components == 1; } @@ -269,7 +268,7 @@ void RunTests( "MST Problem Data Reset Failed", __FILE__, __LINE__); // perform MST - GpuTimer gpu_timer; // record the kernel running time + GpuTimer gpu_timer; // record the kernel running time gpu_timer.Start(); @@ -287,7 +286,7 @@ void RunTests( util::GRError(mst_problem->Extract(h_mst_output), "MST Problem Data Extraction Failed", __FILE__, __LINE__); - if (!g_quick) // run CPU reference test + if (!g_quick) // run CPU reference test { // calculate GPU final number of selected edges int num_selected_gpu = 0; @@ -342,17 +341,16 @@ void RunTests( */ template void RunTests( - const Csr &graph, - CommandLineArgs &args, - mgpu::CudaContext& context) + const Csr & graph, + CommandLineArgs & args, + mgpu::CudaContext & context) { - bool instrumented = false; // do not collect instrumentation from kernels - int max_grid_size = 0; // maximum grid size (up to the enactor) - int num_gpus = 1; // number of GPUs for multi-gpu enactor to use - g_quick = false; // Whether or not to skip ref validation + bool instrumented = 0; // do not collect instrumentation from kernels + int max_grid_size = 0; // maximum grid size (up to the enactor) + int num_gpus = 1; // number of GPUs for multi-gpu enactor to use + g_quick = 0; // Whether or not to skip ref validation instrumented = args.CheckCmdLineFlag("instrumented"); - g_quick = args.CheckCmdLineFlag("quick"); g_verbose = args.CheckCmdLineFlag("v"); @@ -405,9 +403,9 @@ int main(int argc, char** argv) // matrix-market coordinate-formatted graph file - typedef int VertexId; // use as the vertex identifier type - typedef int Value; // use as the value type - typedef int SizeT; // use as the graph size type + typedef int VertexId; // use as the vertex identifier type + typedef int Value; // use as the value type + typedef int SizeT; // use as the graph size type // default value for stream_from_host is false if (graph_args < 1) @@ -427,17 +425,14 @@ int main(int argc, char** argv) g_undirected, false) != 0) { return 1; } - // display graph + // display input graph // csr.DisplayGraph(); - /*************************************************************** - * To make sure two graphs have same weight value for each edge * - * we have to change ll_value = rand()%64 in market.cuh file to * - * some NON-RANDOM value if the original graph does NOT contain * - * weight per edge. Note it only support FULLY-CONNECTED graphs * - ***************************************************************/ + /************************************************************************** + * Note: Minimum Spanning Tree only supports undirected, connected graphs * + **************************************************************************/ - // test graph connectivity and run test + // test graph connectivity if (IsConnected(csr)) { RunTests(csr, args, *context); From 098df4fe0365efffb243aa5c4af2a96d517d98fb Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Tue, 16 Jun 2015 11:24:17 -0700 Subject: [PATCH 09/36] added test script --- tests/mst/run.sh | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 tests/mst/run.sh diff --git a/tests/mst/run.sh b/tests/mst/run.sh new file mode 100644 index 000000000..694a12fc0 --- /dev/null +++ b/tests/mst/run.sh @@ -0,0 +1,36 @@ +#!/bin/sh + +OPTION="--quick" + +# --quick running without CPU reference algorithm, if you want to test CPU +# reference algorithm, delete $OPTION2 in some lines. Warning: for large +# data this can take a long time. + +# get all execution files in ./bin +files=(./bin/*) + +# split file names into arr +arr=$(echo $files | tr " " "\n") +max_ver_num="$" +exe_file=${arr[0]} + +# iterate over all file names to get the largest version number +for x in $arr +do + output=$(grep -o "[0-9]\.[0-9]" <<<"$x") + if [ "$output" \> "$max_ver_num" ]; then + exe_file=$x + fi +done + +# put OS and Device here +SUFFIX="ubuntu12.04.k40c" + +mkdir -p eval/$SUFFIX + +for i in belgium_osm coAuthorsDBLP delaunay_n13 delaunay_n21 +do + echo $exe_file market ../../dataset/large/$i/$i.mtx $OPTION + $exe_file market ../../dataset/large/$i/$i.mtx $OPTION > eval/$SUFFIX/$i.$SUFFIX.txt + sleep 1 +done From 54f5673b1a68231da901bb40cc9405ddb125aef9 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Wed, 17 Jun 2015 11:09:39 -0700 Subject: [PATCH 10/36] data type changes, MST now supports float/double weight values --- gunrock/app/mst/mst_enactor.cuh | 299 +++----------------------------- gunrock/app/mst/mst_functor.cuh | 27 +-- gunrock/app/mst/mst_problem.cuh | 69 ++++---- gunrock/util/select_utils.cuh | 216 ++++++++++------------- tests/mst/test_mst.cu | 49 +++--- 5 files changed, 183 insertions(+), 477 deletions(-) diff --git a/gunrock/app/mst/mst_enactor.cuh b/gunrock/app/mst/mst_enactor.cuh index 7d7d7a0a1..4908b71b9 100644 --- a/gunrock/app/mst/mst_enactor.cuh +++ b/gunrock/app/mst/mst_enactor.cuh @@ -297,9 +297,9 @@ public: problem->data_slices[0]->d_keys_array, problem->data_slices[0]->d_edge_weights, graph_slice->edges, - std::numeric_limits::max(), - mgpu::minimum(), - mgpu::equal_to(), + std::numeric_limits::max(), + mgpu::minimum(), + mgpu::equal_to(), problem->data_slices[0]->d_reduced_keys, problem->data_slices[0]->d_reduced_vals, &num_segments, (int*)0, context); @@ -341,8 +341,8 @@ public: util::MemsetKernel<<<128, 128>>>(problem->data_slices[0]->d_successors, std::numeric_limits::max(), graph_slice->nodes); util::MemsetKernel<<<128, 128>>>( - problem->data_slices[0]->d_temp_storage, - std::numeric_limits::max(), graph_slice->nodes); + problem->data_slices[0]->d_temp_index, + std::numeric_limits::max(), graph_slice->nodes); util::MemsetIdxKernel<<<128, 128>>>( graph_slice->frontier_queues.d_keys[frontier_attribute.selector], graph_slice->nodes); @@ -680,41 +680,41 @@ public: //////////////////////////////////////////////////////////////////////// // filter to remove all -1 in d_col_indices util::MemsetCopyVectorKernel<<<128, 128>>>( - problem->data_slices[0]->d_temp_storage, + problem->data_slices[0]->d_temp_index, problem->data_slices[0]->d_col_indices, graph_slice->edges); util::CUBSelect( - problem->data_slices[0]->d_temp_storage, graph_slice->edges, + problem->data_slices[0]->d_temp_index, graph_slice->edges, problem->data_slices[0]->d_col_indices, num_selected); //////////////////////////////////////////////////////////////////////// // filter to remove all -1 in d_edge_weights util::MemsetCopyVectorKernel<<<128, 128>>>( - problem->data_slices[0]->d_temp_storage, + problem->data_slices[0]->d_temp_value, problem->data_slices[0]->d_edge_weights, graph_slice->edges); - util::CUBSelect( - problem->data_slices[0]->d_temp_storage, graph_slice->edges, + util::CUBSelect( + problem->data_slices[0]->d_temp_value, graph_slice->edges, problem->data_slices[0]->d_edge_weights, num_selected); //////////////////////////////////////////////////////////////////////// // filter to remove all -1 in d_keys_array util::MemsetCopyVectorKernel<<<128, 128>>>( - problem->data_slices[0]->d_temp_storage, + problem->data_slices[0]->d_temp_index, problem->data_slices[0]->d_keys_array, graph_slice->edges); util::CUBSelect( - problem->data_slices[0]->d_temp_storage, graph_slice->edges, + problem->data_slices[0]->d_temp_index, graph_slice->edges, problem->data_slices[0]->d_keys_array, num_selected); //////////////////////////////////////////////////////////////////////// // filter to remove all -1 in d_origin_edges util::MemsetCopyVectorKernel<<<128, 128>>>( - problem->data_slices[0]->d_temp_storage, + problem->data_slices[0]->d_temp_index, problem->data_slices[0]->d_origin_edges, graph_slice->edges); util::CUBSelect( - problem->data_slices[0]->d_temp_storage, graph_slice->edges, + problem->data_slices[0]->d_temp_index, graph_slice->edges, problem->data_slices[0]->d_origin_edges, num_selected); if (DEBUG) printf(" * finished remove edges in one super-vertex.\n"); @@ -785,12 +785,13 @@ public: //////////////////////////////////////////////////////////////////////// // bring edges, weights, origin_eids together according to keys util::MemsetCopyVectorKernel<<<128, 128>>>( - problem->data_slices[0]->d_temp_storage, + problem->data_slices[0]->d_temp_index, problem->data_slices[0]->d_keys_array, graph_slice->edges); util::MemsetCopyVectorKernel<<<128, 128>>>( - problem->data_slices[0]->d_tmp_storage, + //problem->data_slices[0]->d_temp_value, + problem->data_slices[0]->d_super_edges, // used as temp_index problem->data_slices[0]->d_keys_array, graph_slice->edges); @@ -801,276 +802,16 @@ public: util::CUBRadixSort( true, graph_slice->edges, - problem->data_slices[0]->d_temp_storage, + problem->data_slices[0]->d_temp_index, problem->data_slices[0]->d_edge_weights); util::CUBRadixSort( true, graph_slice->edges, - problem->data_slices[0]->d_tmp_storage, + //problem->data_slices[0]->d_temp_value, + problem->data_slices[0]->d_super_edges, // used as temp_index problem->data_slices[0]->d_origin_edges); if (DEBUG) printf(" * finished sort according to new vertex ids.\n"); - - /* - //////////////////////////////////////////////////////////////////////// - // remove duplicated edges between super-vertices (optional operation) - if (false)//(enactor_stats.iteration == 0) - { - ////////////////////////////////////////////////////////////////////// - // generate edge flag array based on source vertices list [1] - // using MarkSegmentFromKeys on d_keys_array - util::MemsetKernel<<<128, 128>>>( - problem->data_slices[0]->d_flags_array, 0, graph_slice->edges); - util::MarkSegmentFromKeys<<<128, 128>>>( - problem->data_slices[0]->d_flags_array, - problem->data_slices[0]->d_keys_array, - graph_slice->edges); - - if (debug_info) - { - printf(":: mark segment to generate edge flag array [1] ::"); - util::DisplayDeviceResults( - problem->data_slices[0]->d_flags_array, graph_slice->edges); - } - - ////////////////////////////////////////////////////////////////////// - // generate edge flag array based on destination vertices list [2] - // create a flags array on the output of segmented sort based on the - // difference in u-v pair using MarkSegmentsFromKeys kernel function - util::MarkSegmentFromKeys<<<128, 128>>>( - problem->data_slices[0]->d_edge_flags, - problem->data_slices[0]->d_col_indices, - graph_slice->edges); - - if (debug_info) - { - printf(":: mark segment to generate edge flag array [2] ::"); - util::DisplayDeviceResults( - problem->data_slices[0]->d_edge_flags, graph_slice->edges); - } - - ////////////////////////////////////////////////////////////////////// - // do or operation for d_edge_flags and d_flags_array - u-v pair - frontier_attribute.queue_index = 0; - frontier_attribute.selector = 0; - frontier_attribute.queue_length = graph_slice->edges; - frontier_attribute.queue_reset = true; - - gunrock::oprtr::filter::Kernel - - <<>>( - enactor_stats.iteration + 1, - frontier_attribute.queue_reset, - frontier_attribute.queue_index, - enactor_stats.num_gpus, - frontier_attribute.queue_length, - NULL, - graph_slice->frontier_queues.d_values[frontier_attribute.selector], - NULL, - graph_slice->frontier_queues.d_values[frontier_attribute.selector^1], - data_slice, - NULL, - work_progress, - graph_slice->frontier_elements[frontier_attribute.selector], - graph_slice->frontier_elements[frontier_attribute.selector^1], - enactor_stats.filter_kernel_stats); - - if (DEBUG && (retval = util::GRError(cudaDeviceSynchronize(), - "filter::Kernel failed", __FILE__, __LINE__))) break; - - if (DEBUG) printf(" * finished edge flags - second edge removal.\n"); - - if (debug_info) - { - printf(":: duplicated edges between super-vertex d_edge_flags ::"); - util::DisplayDeviceResults( - problem->data_slices[0]->d_edge_flags, graph_slice->edges); - printf(":: edge removal u list (d_keys_array) ::"); - util::DisplayDeviceResults( - problem->data_slices[0]->d_keys_array, graph_slice->edges); - printf(":: edge removal v list (d_col_indices) ::"); - util::DisplayDeviceResults( - problem->data_slices[0]->d_col_indices, graph_slice->edges); - printf(":: edge removal w list (d_edge_weights) ::"); - util::DisplayDeviceResults( - problem->data_slices[0]->d_edge_weights, graph_slice->edges); - } - - ////////////////////////////////////////////////////////////////////// - // scan edge_flags to get edge_keys used for sorting - Scan( - (int*)problem->data_slices[0]->d_edge_flags, graph_slice->edges, - (int)0, mgpu::plus(), (int*)0, (int*)0, - (int*)problem->data_slices[0]->d_temp_storage, context); - - // set first bit of edge_flags back to 1 - util::MemsetKernel<<<1, 1>>>( - problem->data_slices[0]->d_edge_flags, 1, 1); - - ////////////////////////////////////////////////////////////////////// - // calculate the number of segments for edge_offsets - num_segments = Reduce( - problem->data_slices[0]->d_edge_flags, graph_slice->edges, context); - - ////////////////////////////////////////////////////////////////////// - // generate edge_offsets used for SegSortFromIndices - // edge_flags stored in d_row_offsets - frontier_attribute.queue_index = 0; - frontier_attribute.selector = 0; - frontier_attribute.queue_length = graph_slice->edges; - frontier_attribute.queue_reset = true; - - gunrock::oprtr::filter::Kernel - - <<>>( - enactor_stats.iteration + 1, - frontier_attribute.queue_reset, - frontier_attribute.queue_index, - enactor_stats.num_gpus, - frontier_attribute.queue_length, - NULL, - graph_slice->frontier_queues.d_values[frontier_attribute.selector], - NULL, - graph_slice->frontier_queues.d_values[frontier_attribute.selector^1], - data_slice, - NULL, - work_progress, - graph_slice->frontier_elements[frontier_attribute.selector], - graph_slice->frontier_elements[frontier_attribute.selector^1], - enactor_stats.filter_kernel_stats); - - if (DEBUG && (retval = util::GRError(cudaDeviceSynchronize(), - "filter::Kernel failed", __FILE__, __LINE__))) break; - - ////////////////////////////////////////////////////////////////////// - // segmented sort d_col_indices, d_edge_weights and d_origin_edges - // copy d_edge_weights to d_temp_storage to use for segmented sort - util::MemsetCopyVectorKernel<<<128, 128>>>( - problem->data_slices[0]->d_temp_storage, - problem->data_slices[0]->d_edge_weights, - graph_slice->edges); - - util::SegSortFromIndices( - context, - num_segments, - problem->data_slices[0]->d_row_offsets, - graph_slice->edges, - problem->data_slices[0]->d_edge_weights, - problem->data_slices[0]->d_col_indices); - - util::SegSortFromIndices( - context, - num_segments, - problem->data_slices[0]->d_row_offsets, - graph_slice->edges, - problem->data_slices[0]->d_temp_storage, - problem->data_slices[0]->d_origin_edges); - - if (DEBUG) printf(" * finished segmentedSort for edge reduction.\n"); - - if (debug_info) - { - printf(":: second reduction segmented sort d_col_indices ::"); - util::DisplayDeviceResults( - problem->data_slices[0]->d_col_indices, graph_slice->edges); - printf(":: second reduction segmented sort d_edge_weights ::"); - util::DisplayDeviceResults( - problem->data_slices[0]->d_edge_weights, graph_slice->edges); - printf(":: second reduction segmented sort d_origin_edges ::"); - util::DisplayDeviceResults( - problem->data_slices[0]->d_origin_edges, graph_slice->edges); - } - - ////////////////////////////////////////////////////////////////////// - // mark -1 to edges that needed to be removed using advance kernel - frontier_attribute.queue_index = 0; - frontier_attribute.selector = 0; - frontier_attribute.queue_length = graph_slice->edges; - frontier_attribute.queue_reset = true; - - gunrock::oprtr::filter::Kernel - - <<>>( - enactor_stats.iteration + 1, - frontier_attribute.queue_reset, - frontier_attribute.queue_index, - enactor_stats.num_gpus, - frontier_attribute.queue_length, - NULL, - graph_slice->frontier_queues.d_values[frontier_attribute.selector], - NULL, - graph_slice->frontier_queues.d_values[frontier_attribute.selector^1], - data_slice, - NULL, - work_progress, - graph_slice->frontier_elements[frontier_attribute.selector], - graph_slice->frontier_elements[frontier_attribute.selector^1], - enactor_stats.filter_kernel_stats); - - if (DEBUG && (retval = util::GRError(cudaDeviceSynchronize(), - "filter::Kernel failed", __FILE__, __LINE__))) break; - - if (DEBUG) printf(" * finished mark -1 for duplicated edges.\n"); - - ////////////////////////////////////////////////////////////////////// - // filter to remove all -1 in d_col_indices - util::MemsetCopyVectorKernel<<<128, 128>>>( - problem->data_slices[0]->d_temp_storage, - problem->data_slices[0]->d_col_indices, - graph_slice->edges); - util::CUBSelect( - problem->data_slices[0]->d_temp_storage, - graph_slice->edges, - problem->data_slices[0]->d_col_indices, - num_selected); - - ////////////////////////////////////////////////////////////////////// - // filter to remove all -1 in d_edge_weights - util::MemsetCopyVectorKernel<<<128, 128>>>( - problem->data_slices[0]->d_temp_storage, - problem->data_slices[0]->d_edge_weights, - graph_slice->edges); - util::CUBSelect( - problem->data_slices[0]->d_temp_storage, - graph_slice->edges, - problem->data_slices[0]->d_edge_weights, - num_selected); - - ////////////////////////////////////////////////////////////////////// - // filter to remove all -1 in d_keys_array - util::MemsetCopyVectorKernel<<<128, 128>>>( - problem->data_slices[0]->d_temp_storage, - problem->data_slices[0]->d_keys_array, - graph_slice->edges); - util::CUBSelect( - problem->data_slices[0]->d_temp_storage, - graph_slice->edges, - problem->data_slices[0]->d_keys_array, - num_selected); - - ////////////////////////////////////////////////////////////////////// - // filter to remove all -1 in d_origin_edges - util::MemsetCopyVectorKernel<<<128, 128>>>( - problem->data_slices[0]->d_temp_storage, - problem->data_slices[0]->d_origin_edges, - graph_slice->edges); - util::CUBSelect( - problem->data_slices[0]->d_temp_storage, - graph_slice->edges, - problem->data_slices[0]->d_origin_edges, - num_selected); - - if (DEBUG) - printf(" * finished remove edges between super-vertices.\n"); - - graph_slice->edges = *num_selected; - - if (DEBUG) - printf(" * finished update #edges: %d [2]\n", graph_slice->edges); - - } // end of removing duplicated edges between super-vertices - */ - if (DEBUG) printf(" (d). Constructing the Vertex List.\n"); //////////////////////////////////////////////////////////////////////// diff --git a/gunrock/app/mst/mst_functor.cuh b/gunrock/app/mst/mst_functor.cuh index 1f5393571..7f38e5d31 100644 --- a/gunrock/app/mst/mst_functor.cuh +++ b/gunrock/app/mst/mst_functor.cuh @@ -129,7 +129,7 @@ struct EdgeFunctor VertexId e_id = 0, VertexId e_id_in = 0) { util::io::ModifiedStore::St( - problem->d_origin_edges[e_id], problem->d_temp_storage + s_id); + problem->d_origin_edges[e_id], problem->d_temp_index + s_id); } }; @@ -185,7 +185,7 @@ struct MarkFunctor { // mark minimum spanning tree output edges util::io::ModifiedStore::St( - 1, problem->d_mst_output + problem->d_temp_storage[s_id]); + 1, problem->d_mst_output + problem->d_temp_index[s_id]); } }; @@ -247,7 +247,7 @@ struct CyRmFunctor // remove some edges in the MST output result util::io::ModifiedStore::St( - 0, problem->d_mst_output + problem->d_temp_storage[s_id]); + 0, problem->d_mst_output + problem->d_temp_index[s_id]); } }; @@ -364,13 +364,14 @@ struct EgRmFunctor VertexId e_id = 0, VertexId e_id_in = 0) { util::io::ModifiedStore::St( - -1, problem->d_keys_array + e_id); + (VertexId)-1, problem->d_keys_array + e_id); util::io::ModifiedStore::St( - -1, problem->d_col_indices + e_id); + (VertexId)-1, problem->d_col_indices + e_id); + //util::io::ModifiedStore::St( + // (Value)-1, problem->d_edge_weights + e_id); + problem->d_edge_weights[e_id] = (Value) -1; util::io::ModifiedStore::St( - -1, problem->d_edge_weights + e_id); - util::io::ModifiedStore::St( - -1, problem->d_origin_edges + e_id); + (VertexId)-1, problem->d_origin_edges + e_id); } /** @@ -506,7 +507,7 @@ struct EIdxFunctor VertexId node, DataSlice *problem, Value v = 0, SizeT nid=0) { util::io::ModifiedStore::St( - node, problem->d_row_offsets + problem->d_temp_storage[node]); + node, problem->d_row_offsets + problem->d_temp_index[node]); } }; @@ -607,13 +608,13 @@ struct SuRmFunctor VertexId node, DataSlice *problem, Value v = 0, SizeT nid=0) { util::io::ModifiedStore::St( - -1, problem->d_keys_array + node); + (VertexId)-1, problem->d_keys_array + node); util::io::ModifiedStore::St( - -1, problem->d_col_indices + node); + (VertexId)-1, problem->d_col_indices + node); util::io::ModifiedStore::St( - -1, problem->d_edge_weights + node); + (Value) -1, problem->d_edge_weights + node); util::io::ModifiedStore::St( - -1, problem->d_origin_edges + node); + (VertexId)-1, problem->d_origin_edges + node); } }; diff --git a/gunrock/app/mst/mst_problem.cuh b/gunrock/app/mst/mst_problem.cuh index b2b8e7f1f..9f4e3db6b 100644 --- a/gunrock/app/mst/mst_problem.cuh +++ b/gunrock/app/mst/mst_problem.cuh @@ -47,7 +47,7 @@ struct MSTProblem : ProblemBase<_VertexId, _SizeT, _USE_DOUBLE_BUFFER> typedef _SizeT SizeT; typedef _Value Value; - static const bool MARK_PREDECESSORS = true; + static const bool MARK_PREDECESSORS = true; static const bool ENABLE_IDEMPOTENCE = false; // helper structures @@ -71,10 +71,10 @@ struct MSTProblem : ProblemBase<_VertexId, _SizeT, _USE_DOUBLE_BUFFER> VertexId *d_origin_edges; // origin edge list keep track of e_ids VertexId *d_super_edges; // super edge list for next iteration VertexId *d_col_indices; // column indices of CSR graph (edges) + VertexId *d_temp_index; // used for storing temp index + Value *d_temp_value; // used for storing temp value Value *d_reduced_vals; // store reduced minimum weights Value *d_edge_weights; // store weights per edge - Value *d_temp_storage; // used for storing temp arrays - Value *d_tmp_storage; // used for storing temp arrays SizeT *d_supervtx_ids; // super vertex ids scanned from flags SizeT *d_row_offsets; // row offsets of CSR graph }; @@ -107,10 +107,7 @@ struct MSTProblem : ProblemBase<_VertexId, _SizeT, _USE_DOUBLE_BUFFER> * @brief MSTProblem default constructor */ - MSTProblem(): - nodes(0), - edges(0), - num_gpus(0) {} + MSTProblem(): nodes(0), edges(0), num_gpus(0) {} /** * @brief MSTProblem constructor @@ -153,9 +150,9 @@ struct MSTProblem : ProblemBase<_VertexId, _SizeT, _USE_DOUBLE_BUFFER> if (data_slices[i]->d_keys_array) util::GRError(cudaFree(data_slices[i]->d_keys_array), "GpuSlice cudaFree d_keys_array failed", __FILE__, __LINE__); - if (data_slices[i]->d_temp_storage) - util::GRError(cudaFree(data_slices[i]->d_temp_storage), - "GpuSlice cudaFree d_temp_storage failed", __FILE__, __LINE__); + if (data_slices[i]->d_temp_index) + util::GRError(cudaFree(data_slices[i]->d_temp_index), + "GpuSlice cudaFree d_temp_index failed", __FILE__, __LINE__); if (data_slices[i]->d_reduced_keys) util::GRError(cudaFree(data_slices[i]->d_reduced_keys), "GpuSlice cudaFree d_reduced_keys failed", __FILE__, __LINE__); @@ -183,9 +180,9 @@ struct MSTProblem : ProblemBase<_VertexId, _SizeT, _USE_DOUBLE_BUFFER> if (data_slices[i]->d_edge_flags) util::GRError(cudaFree(data_slices[i]->d_edge_flags), "GpuSlice cudaFree d_edge_flags failed", __FILE__, __LINE__); - if (data_slices[i]->d_tmp_storage) - util::GRError(cudaFree(data_slices[i]->d_tmp_storage), - "GpuSlice cudaFree d_tmp_storage failed", __FILE__, __LINE__); + if (data_slices[i]->d_temp_value) + util::GRError(cudaFree(data_slices[i]->d_temp_value), + "GpuSlice cudaFree d_temp_value failed", __FILE__, __LINE__); if (data_slices[i]->d_super_edges) util::GRError(cudaFree(data_slices[i]->d_super_edges), "GpuSlice cudaFree d_super_edges failed", __FILE__, __LINE__); @@ -348,7 +345,7 @@ struct MSTProblem : ProblemBase<_VertexId, _SizeT, _USE_DOUBLE_BUFFER> __FILE__, __LINE__)) return retval; data_slices[0]->d_reduced_vals = d_reduced_vals; util::MemsetKernel<<<128, 128>>>( - data_slices[0]->d_reduced_vals, 0, nodes); + data_slices[0]->d_reduced_vals, (Value)0, nodes); unsigned int *d_flags_array; if (retval = util::GRError(cudaMalloc( @@ -370,15 +367,15 @@ struct MSTProblem : ProblemBase<_VertexId, _SizeT, _USE_DOUBLE_BUFFER> util::MemsetKernel<<<128, 128>>>( data_slices[0]->d_keys_array, 0, edges); - SizeT *d_temp_storage; + VertexId *d_temp_index; if (retval = util::GRError(cudaMalloc( - (void**)&d_temp_storage, - edges * sizeof(SizeT)), - "MSTProblem cudaMalloc d_temp_storage Failed", + (void**)&d_temp_index, + edges * sizeof(VertexId)), + "MSTProblem cudaMalloc d_temp_index Failed", __FILE__, __LINE__)) return retval; - data_slices[0]->d_temp_storage = d_temp_storage; + data_slices[0]->d_temp_index = d_temp_index; util::MemsetKernel<<<128, 128>>>( - data_slices[0]->d_temp_storage, 0, edges); + data_slices[0]->d_temp_index, (VertexId)0, edges); VertexId *d_reduced_keys; if (retval = util::GRError(cudaMalloc( @@ -473,15 +470,15 @@ struct MSTProblem : ProblemBase<_VertexId, _SizeT, _USE_DOUBLE_BUFFER> util::MemsetKernel<<<128, 128>>>( data_slices[0]->d_edge_flags, 0, edges); - Value *d_tmp_storage; + Value *d_temp_value; if (retval = util::GRError(cudaMalloc( - (void**)&d_tmp_storage, + (void**)&d_temp_value, edges * sizeof(Value)), - "MSTProblem cudaMalloc d_tmp_storage Failed", + "MSTProblem cudaMalloc d_temp_value Failed", __FILE__, __LINE__)) return retval; - data_slices[0]->d_tmp_storage = d_tmp_storage; + data_slices[0]->d_temp_value = d_temp_value; util::MemsetKernel<<<128, 128>>>( - data_slices[0]->d_tmp_storage, 0, edges); + data_slices[0]->d_temp_value, (Value)0, edges); data_slices[0]->d_labels = NULL; } @@ -576,14 +573,14 @@ struct MSTProblem : ProblemBase<_VertexId, _SizeT, _USE_DOUBLE_BUFFER> data_slices[gpu]->d_keys_array = d_keys_array; } - if (!data_slices[gpu]->d_temp_storage) + if (!data_slices[gpu]->d_temp_index) { - SizeT *d_temp_storage; + VertexId *d_temp_index; if (retval = util::GRError(cudaMalloc( - (void**)&d_temp_storage, edges * sizeof(SizeT)), - "MSTProblem cudaMalloc d_temp_storage Failed", + (void**)&d_temp_index, edges * sizeof(VertexId)), + "MSTProblem cudaMalloc d_temp_index Failed", __FILE__, __LINE__)) return retval; - data_slices[gpu]->d_temp_storage = d_temp_storage; + data_slices[gpu]->d_temp_index = d_temp_index; } if (!data_slices[gpu]->d_successors) @@ -685,14 +682,14 @@ struct MSTProblem : ProblemBase<_VertexId, _SizeT, _USE_DOUBLE_BUFFER> data_slices[gpu]->d_edge_flags = d_edge_flags; } - if (!data_slices[gpu]->d_tmp_storage) + if (!data_slices[gpu]->d_temp_value) { - Value *d_tmp_storage; + Value *d_temp_value; if (retval = util::GRError(cudaMalloc( - (void**)&d_tmp_storage, edges * sizeof(Value)), - "MSTProblem cudaMalloc d_tmp_storage Failed", + (void**)&d_temp_value, edges * sizeof(Value)), + "MSTProblem cudaMalloc d_temp_value Failed", __FILE__, __LINE__)) return retval; - data_slices[gpu]->d_tmp_storage = d_tmp_storage; + data_slices[gpu]->d_temp_value = d_temp_value; } data_slices[0]->d_labels = NULL; @@ -727,4 +724,4 @@ struct MSTProblem : ProblemBase<_VertexId, _SizeT, _USE_DOUBLE_BUFFER> // Local Variables: // mode:c++ // c-file-style: "NVIDIA" -// End: \ No newline at end of file +// End: diff --git a/gunrock/util/select_utils.cuh b/gunrock/util/select_utils.cuh index 2db66ca14..1da71e8fa 100644 --- a/gunrock/util/select_utils.cuh +++ b/gunrock/util/select_utils.cuh @@ -18,136 +18,104 @@ namespace gunrock { namespace util { - /** - * \addtogroup PublicInterface - * @{ - */ - - //--------------------------------------------------------------------- - // Globals, constants and typedefs - //--------------------------------------------------------------------- - struct GreaterThan - { - int compare; - - __host__ __device__ __forceinline__ - GreaterThan(int compare) : compare(compare) { } - - __host__ __device__ __forceinline__ - bool operator()(const int &a) const { return (a > compare); } - }; - - /** - * @brief selects items from from a sequence of int keys using a - * section functor (greater-than) - * - */ - template - cudaError_t CUBSelect( - VertexId *d_input, - SizeT num_elements, - VertexId *d_output, - unsigned int *num_selected) - { - cudaError_t retval = cudaSuccess; - - /* - VertexId *input = NULL; - VertexId *output = NULL; - - if (util::GRError((retval = cudaMalloc( - &input, sizeof(VertexId)*d_num_elements)), - "CUBSelect input malloc failed", - __FILE__, __LINE__)) return retval; - if (util::GRError((retval = cudaMalloc( - &output, sizeof(VertexId)*d_num_elements)), - "CUBSelect output malloc failed", - __FILE__, __LINE__)) return retval; - - cub::DoubleBuffer d_input_buffer(d_input, input); - cub::DoubleBuffer d_output_buffer(d_output, output); - */ - - unsigned int *d_num_selected = NULL; - if (util::GRError((retval = cudaMalloc( - (void**)&d_num_selected, sizeof(unsigned int))), - "CUBSelect d_num_selected malloc failed", - __FILE__, __LINE__)) return retval; - - void *d_temp_storage = NULL; - size_t temp_storage_bytes = 0; - GreaterThan select_op(-1); - - // determine temporary device storage requirements - if (util::GRError((retval = cub::DeviceSelect::If( - d_temp_storage, - temp_storage_bytes, - d_input, - d_output, - d_num_selected, - num_elements, - select_op)), - "CUBSelect cub::DeviceSelect::If failed", - __FILE__, __LINE__)) return retval; - - // allocate temporary storage - if (util::GRError((retval = cudaMalloc( - &d_temp_storage, temp_storage_bytes)), - "CUBSelect malloc d_temp_storage failed", - __FILE__, __LINE__)) return retval; - - // run selection - if (util::GRError((retval = cub::DeviceSelect::If( - d_temp_storage, - temp_storage_bytes, - d_input, - d_output, - d_num_selected, - num_elements, - select_op)), +/** + * \addtogroup PublicInterface + * @{ + */ + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- +struct GreaterThan +{ + int compare; + + __host__ __device__ __forceinline__ + GreaterThan(int compare) : compare(compare) { } + + __host__ __device__ __forceinline__ + bool operator()(const int &a) const { return (a > compare); } +}; + +/** + * @brief selects items from from a sequence of int keys using a + * section functor (greater-than) + * + */ +template +cudaError_t CUBSelect( + T *d_input, + SizeT num_elements, + T *d_output, + unsigned int *num_selected) +{ + cudaError_t retval = cudaSuccess; + unsigned int *d_num_selected = NULL; + + if (util::GRError( + (retval = cudaMalloc((void**)&d_num_selected, sizeof(unsigned int))), + "CUBSelect d_num_selected malloc failed", + __FILE__, __LINE__)) return retval; + + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + GreaterThan select_op(-1); + + // determine temporary device storage requirements + if (util::GRError( + (retval = cub::DeviceSelect::If( + d_temp_storage, + temp_storage_bytes, + d_input, + d_output, + d_num_selected, + num_elements, + select_op)), "CUBSelect cub::DeviceSelect::If failed", __FILE__, __LINE__)) return retval; - /* - // copy back output - if (util::GRError((retval = cudaMemcpy( - d_output, - d_output_buffer.Current(), - sizeof(VertexId)*(*d_num_selected), - cudaMemcpyDeviceToDevice)), - "CUBSelect copy back output failed", - __FILE__, __LINE__)) return retval; - */ - - if (util::GRError((retval = cudaMemcpy( - num_selected, - d_num_selected, - sizeof(unsigned int), - cudaMemcpyDeviceToHost)), - "CUBSelect copy back num_selected failed", - __FILE__, __LINE__)) return retval; - - // clean up - if (util::GRError((retval = cudaFree(d_temp_storage)), - "CUBSelect free d_temp_storage failed", - __FILE__, __LINE__)) return retval; - if (util::GRError((retval = cudaFree(d_num_selected)), - "CUBSelect free d_num_selected failed", - __FILE__, __LINE__)) return retval; + // allocate temporary storage + if (util::GRError( + (retval = cudaMalloc(&d_temp_storage, temp_storage_bytes)), + "CUBSelect malloc d_temp_storage failed", + __FILE__, __LINE__)) return retval; + + // run selection + if (util::GRError( + (retval = cub::DeviceSelect::If( + d_temp_storage, + temp_storage_bytes, + d_input, + d_output, + d_num_selected, + num_elements, + select_op)), + "CUBSelect cub::DeviceSelect::If failed", + __FILE__, __LINE__)) return retval; - /* - if (util::GRError((retval = cudaFree(input)), - "CUBSelect free input failed", - __FILE__, __LINE__)) return retval; - if (util::GRError((retval = cudaFree(output)), - "CUBSelect free output failed", - __FILE__, __LINE__)) return retval; - */ + if (util::GRError( + (retval = cudaMemcpy( + num_selected, + d_num_selected, + sizeof(unsigned int), + cudaMemcpyDeviceToHost)), + "CUBSelect copy back num_selected failed", + __FILE__, __LINE__)) return retval; + + // clean up + if (util::GRError( + (retval = cudaFree(d_temp_storage)), + "CUBSelect free d_temp_storage failed", + __FILE__, __LINE__)) return retval; + if (util::GRError( + (retval = cudaFree(d_num_selected)), + "CUBSelect free d_num_selected failed", + __FILE__, __LINE__)) return retval; - return retval; - } + return retval; +} - /** @} */ +/** @} */ } //util } //gunrock diff --git a/tests/mst/test_mst.cu b/tests/mst/test_mst.cu index afefdab23..d74e89233 100644 --- a/tests/mst/test_mst.cu +++ b/tests/mst/test_mst.cu @@ -161,18 +161,18 @@ bool IsConnected(const Csr & graph) */ //////////////////////////////////////////////////////////////////////////////// template -long long int SimpleReferenceMST( +Value SimpleReferenceMST( const Value *edge_values, const Csr &graph) { - printf("\nREFERENCE TEST\n"); + printf("\nMST CPU REFERENCE TEST\n"); // Kruskal minimum spanning tree preparations using namespace boost; - typedef adjacency_list < vecS, vecS, undirectedS, - no_property, property < edge_weight_t, int > > Graph; + typedef adjacency_list< vecS, vecS, undirectedS, + no_property, property > Graph; typedef graph_traits < Graph >::edge_descriptor Edge; typedef graph_traits < Graph >::vertex_descriptor Vertex; - typedef std::pair E; + typedef std::pair E; E *edge_pairs = new E[graph.edges]; int idx = 0; @@ -190,16 +190,18 @@ long long int SimpleReferenceMST( CpuTimer cpu_timer; // record the kernel running time cpu_timer.Start(); + // compute reference using kruskal_min_spanning_tree algorithm kruskal_minimum_spanning_tree(g, std::back_inserter(spanning_tree)); + cpu_timer.Stop(); float elapsed_cpu = cpu_timer.ElapsedMillis(); // analyze reference results - SizeT num_selected_cpu = 0; - long long int total_weight_cpu = 0; + SizeT num_selected_cpu = 0; + Value total_weight_cpu = 0; - if (graph.nodes <= 50) printf("CPU Minimum Spanning Tree\n"); + if (graph.nodes <= 50) { printf("CPU Minimum Spanning Tree\n"); } for (std::vector < Edge >::iterator ei = spanning_tree.begin(); ei != spanning_tree.end(); ++ei) { @@ -207,7 +209,7 @@ long long int SimpleReferenceMST( { // print the edge pairs in the minimum spanning tree printf("%ld %ld\n", source(*ei, g), target(*ei, g)); - // printf(" with weight of %d\n", weight[*ei]); + // printf(" with weight of %f\n", weight[*ei]); } ++num_selected_cpu; total_weight_cpu += weight[*ei]; @@ -297,27 +299,27 @@ void RunTests( // printf("\nGPU - Number of Edges in MST: %d\n", num_selected_gpu); // calculate GPU total selected MST weights for validation - long long int total_weight_gpu = 0; + Value total_weight_gpu = 0; for (int iter = 0; iter < graph.edges; ++iter) { total_weight_gpu += h_mst_output[iter] * graph.edge_values[iter]; } // correctness validation - long long int total_weight_cpu = - SimpleReferenceMST(graph.edge_values, graph); + Value total_weight_cpu = SimpleReferenceMST(graph.edge_values, graph); if (total_weight_cpu == total_weight_gpu) { // print the edge pairs in the minimum spanning tree DisplaySolution(graph, h_mst_output); printf("\nCORRECT.\n"); + std::cout << "CPU Computed Total Weight = " << total_weight_cpu << std::endl; + std::cout << "GPU Computed Total Weight = " << total_weight_gpu << std::endl; } else { - printf("INCORRECT. \n" - "CPU Computed Total Weight = %lld\n" - "GPU Computed Total Weight = %lld\n", - total_weight_cpu, total_weight_gpu); + printf("INCORRECT.\n"); + std::cout << "CPU Computed Total Weight = " << total_weight_cpu << std::endl; + std::cout << "GPU Computed Total Weight = " << total_weight_gpu << std::endl; } } @@ -400,12 +402,12 @@ int main(int argc, char** argv) if (graph_type == "market") { - // matrix-market coordinate-formatted graph file - typedef int VertexId; // use as the vertex identifier type + // currently support Value type: int, float, double + typedef int VertexId; // use as the vertex identifier typedef int Value; // use as the value type - typedef int SizeT; // use as the graph size type + typedef int SizeT; // use as the graph size // default value for stream_from_host is false if (graph_args < 1) @@ -420,13 +422,10 @@ int main(int argc, char** argv) // template argument = true because the graph has edge values Csr csr(false); if (graphio::BuildMarketGraph( - market_filename, - csr, - g_undirected, - false) != 0) { return 1; } + market_filename, csr, g_undirected, false) != 0) { return 1; } // display input graph - // csr.DisplayGraph(); + // csr.DisplayGraph(true); /************************************************************************** * Note: Minimum Spanning Tree only supports undirected, connected graphs * @@ -455,4 +454,4 @@ int main(int argc, char** argv) // Local Variables: // mode:c++ // c-file-style: "NVIDIA" -// End: +// End From 69b922725851d862933d3d3a4da2398a4d901de0 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Wed, 17 Jun 2015 11:10:44 -0700 Subject: [PATCH 11/36] minor template type fix for parsing input graph --- gunrock/csr.cuh | 192 ++++++++++++++++--------------------- gunrock/graphio/market.cuh | 68 ++++++------- 2 files changed, 108 insertions(+), 152 deletions(-) diff --git a/gunrock/csr.cuh b/gunrock/csr.cuh index 5e9cc8e4e..d4d5da2ff 100644 --- a/gunrock/csr.cuh +++ b/gunrock/csr.cuh @@ -34,8 +34,7 @@ namespace gunrock { * the graph as a sparse matrix. */ template -struct Csr -{ +struct Csr { SizeT nodes; /**< Number of nodes in the graph. */ SizeT edges; /**< Number of edges in the graph. */ SizeT out_nodes; /**< Number of nodes which have outgoing edges. */ @@ -57,8 +56,7 @@ struct Csr * @param[in] pinned Use pinned memory for CSR data structure * (default: do not use pinned memory) */ - Csr(bool pinned = false) - { + Csr(bool pinned = false) { nodes = 0; edges = 0; average_degree = 0; @@ -79,8 +77,7 @@ struct Csr * @param[in] edges Number of edges in COO-format graph */ template - void FromScratch(SizeT nodes, SizeT edges) - { + void FromScratch(SizeT nodes, SizeT edges) { this->nodes = nodes; this->edges = edges; @@ -89,32 +86,32 @@ struct Csr // Put our graph in pinned memory int flags = cudaHostAllocMapped; if (gunrock::util::GRError( - cudaHostAlloc((void **)&row_offsets, - sizeof(SizeT) * (nodes + 1), flags), - "Csr cudaHostAlloc row_offsets failed", __FILE__, __LINE__)) + cudaHostAlloc((void **)&row_offsets, + sizeof(SizeT) * (nodes + 1), flags), + "Csr cudaHostAlloc row_offsets failed", __FILE__, __LINE__)) exit(1); if (gunrock::util::GRError( - cudaHostAlloc((void **)&column_indices, - sizeof(VertexId) * edges, flags), - "Csr cudaHostAlloc column_indices failed", - __FILE__, __LINE__)) + cudaHostAlloc((void **)&column_indices, + sizeof(VertexId) * edges, flags), + "Csr cudaHostAlloc column_indices failed", + __FILE__, __LINE__)) exit(1); if (LOAD_NODE_VALUES) { if (gunrock::util::GRError( - cudaHostAlloc((void **)&node_values, - sizeof(Value) * nodes, flags), - "Csr cudaHostAlloc node_values failed", - __FILE__, __LINE__)) + cudaHostAlloc((void **)&node_values, + sizeof(Value) * nodes, flags), + "Csr cudaHostAlloc node_values failed", + __FILE__, __LINE__)) exit(1); } if (LOAD_EDGE_VALUES) { if (gunrock::util::GRError( - cudaHostAlloc((void **)&edge_values, - sizeof(Value) * edges, flags), - "Csr cudaHostAlloc edge_values failed", - __FILE__, __LINE__)) + cudaHostAlloc((void **)&edge_values, + sizeof(Value) * edges, flags), + "Csr cudaHostAlloc edge_values failed", + __FILE__, __LINE__)) exit(1); } @@ -124,9 +121,9 @@ struct Csr row_offsets = (SizeT*) malloc(sizeof(SizeT) * (nodes + 1)); column_indices = (VertexId*) malloc(sizeof(VertexId) * edges); node_values = (LOAD_NODE_VALUES) ? - (Value*) malloc(sizeof(Value) * nodes) : NULL; + (Value*) malloc(sizeof(Value) * nodes) : NULL; edge_values = (LOAD_EDGE_VALUES) ? - (Value*) malloc(sizeof(Value) * edges) : NULL; + (Value*) malloc(sizeof(Value) * edges) : NULL; } } @@ -143,20 +140,17 @@ struct Csr SizeT num_edges, SizeT *row_offsets, VertexId *col_indices, - Value *edge_values = NULL) - { + Value *edge_values = NULL) { std::ofstream output(file_name); - if (output.is_open()) - { + if (output.is_open()) { output << num_nodes << " " << num_edges << " "; std::copy(row_offsets, row_offsets + num_nodes + 1, - std::ostream_iterator(output, " ")); + std::ostream_iterator(output, " ")); std::copy(column_indices, column_indices + num_edges, - std::ostream_iterator(output, " ")); - if (edge_values != NULL) - { + std::ostream_iterator(output, " ")); + if (edge_values != NULL) { std::copy(edge_values, edge_values + num_edges, - std::ostream_iterator(output, " ")); + std::ostream_iterator(output, " ")); } output.close(); } @@ -168,21 +162,20 @@ struct Csr * */ template - void FromCsr(char *f_in, bool undirected, bool reversed) - { + void FromCsr(char *f_in, bool undirected, bool reversed) { printf(" Reading directly from previously stored CSR arrays ...\n"); std::ifstream _file(f_in); - if (_file.is_open()) - { + if (_file.is_open()) { time_t mark1 = time(NULL); - std::istream_iterator start(_file), end; - std::vector v(start, end); + std::istream_iterator start(_file), end; + std::vector v(start, end); + + SizeT csr_nodes = v[0]; + SizeT csr_edges = v[1]; - SizeT csr_nodes = v.at(0); - SizeT csr_edges = v.at(1); FromScratch(csr_nodes, csr_edges); @@ -190,8 +183,7 @@ struct Csr std::copy(v.begin() + 3 + csr_nodes, v.begin() + 3 + csr_nodes + csr_edges, column_indices); - if(LOAD_EDGE_VALUES) - { + if (LOAD_EDGE_VALUES) { std::copy(v.begin() + 3 + csr_nodes + csr_edges, v.end(), edge_values); } @@ -200,27 +192,20 @@ struct Csr printf("Done reading (%ds).\n", (int) (mark2 - mark1)); v.clear(); - } - else - { + } else { perror("Unable To Open The File."); } // compute out_nodes SizeT out_node = 0; - for (SizeT node = 0; node < nodes; node++) - { - if (row_offsets[node+1] - row_offsets[node] > 0) - { + for (SizeT node = 0; node < nodes; node++) { + if (row_offsets[node + 1] - row_offsets[node] > 0) { ++out_node; } } out_nodes = out_node; - - fflush(stdout); } - /** * @brief Build CSR graph from COO graph, sorted or unsorted * @@ -241,8 +226,7 @@ struct Csr SizeT coo_edges, bool ordered_rows = false, bool undirected = false, - bool reversed = false) - { + bool reversed = false) { printf(" Converting %d vertices, %d directed edges (%s tuples) " "to CSR format...\n", coo_nodes, coo_edges, ordered_rows ? "ordered" : "unordered"); @@ -259,20 +243,18 @@ struct Csr Tuple *new_coo = (Tuple*) malloc(sizeof(Tuple) * coo_edges); SizeT real_edge = 0; if (coo[0].col != coo[0].row) { - new_coo[0].row = coo[0].row; - new_coo[0].col = coo[0].col; - new_coo[0].val = coo[0].val; - real_edge++; + new_coo[0].row = coo[0].row; + new_coo[0].col = coo[0].col; + new_coo[0].val = coo[0].val; + real_edge++; } - for (int i = 0; i < coo_edges-1; ++i) - { - if (((coo[i+1].col != coo[i].col) || - (coo[i+1].row != coo[i].row)) && - (coo[i+1].col != coo[i+1].row)) - { - new_coo[real_edge].col = coo[i+1].col; - new_coo[real_edge].row = coo[i+1].row; - new_coo[real_edge++].val = coo[i+1].val; + for (int i = 0; i < coo_edges - 1; ++i) { + if (((coo[i + 1].col != coo[i].col) || + (coo[i + 1].row != coo[i].row)) && + (coo[i + 1].col != coo[i + 1].row)) { + new_coo[real_edge].col = coo[i + 1].col; + new_coo[real_edge].row = coo[i + 1].row; + new_coo[real_edge++].val = coo[i + 1].val; } } @@ -303,13 +285,10 @@ struct Csr printf("Done converting (%ds).\n", (int)(mark2 - mark1)); // Write offsets, indices, node, edges etc. into file - if (LOAD_EDGE_VALUES) - { + if (LOAD_EDGE_VALUES) { WriteToFile(output_file, undirected, reversed, nodes, edges, row_offsets, column_indices, edge_values); - } - else - { + } else { WriteToFile(output_file, undirected, reversed, nodes, edges, row_offsets, column_indices); } @@ -320,8 +299,7 @@ struct Csr // Compute out_nodes SizeT out_node = 0; for (SizeT node = 0; node < nodes; node++) { - if (row_offsets[node+1] - row_offsets[node] > 0) - { + if (row_offsets[node + 1] - row_offsets[node] > 0) { ++out_node; } } @@ -336,8 +314,7 @@ struct Csr /** * @brief Print log-scale degree histogram of the graph. */ - void PrintHistogram() - { + void PrintHistogram() { fflush(stdout); // Initialize @@ -380,18 +357,17 @@ struct Csr /** * @brief Display CSR graph to console */ - void DisplayGraph(bool with_edge_value = false) - { - SizeT displayed_node_num = (nodes > 40) ? 40:nodes; + void DisplayGraph(bool with_edge_value = false) { + SizeT displayed_node_num = (nodes > 40) ? 40 : nodes; printf("First %d nodes's neighbor list of the input graph:\n", displayed_node_num); for (SizeT node = 0; node < displayed_node_num; node++) { util::PrintValue(node); printf(":"); for (SizeT edge = row_offsets[node]; - edge < row_offsets[node + 1]; - edge++) { - printf("["); + edge < row_offsets[node + 1]; + edge++) { + printf("["); util::PrintValue(column_indices[edge]); if (with_edge_value) { printf(","); @@ -403,23 +379,22 @@ struct Csr } } - bool CheckValue() - { + bool CheckValue() { for (SizeT node = 0; node < nodes; ++node) { for (SizeT edge = row_offsets[node]; - edge < row_offsets[node+1]; - ++edge) { - int src_node = node; - int dst_node = column_indices[edge]; - int edge_value = edge_values[edge]; - for (SizeT r_edge = row_offsets[dst_node]; - r_edge < row_offsets[dst_node+1]; - ++r_edge) { + edge < row_offsets[node + 1]; + ++edge) { + int src_node = node; + int dst_node = column_indices[edge]; + int edge_value = edge_values[edge]; + for (SizeT r_edge = row_offsets[dst_node]; + r_edge < row_offsets[dst_node + 1]; + ++r_edge) { if (column_indices[r_edge] == src_node) { if (edge_values[r_edge] != edge_value) return false; } - } + } } } return true; @@ -428,14 +403,12 @@ struct Csr /** * @brief Find node with largest neighbor list */ - int GetNodeWithHighestDegree(int& max_degree) - { + int GetNodeWithHighestDegree(int& max_degree) { int degree = 0; int src = 0; for (SizeT node = 0; node < nodes; node++) { - if (row_offsets[node+1] - row_offsets[node] > degree) - { - degree = row_offsets[node+1]-row_offsets[node]; + if (row_offsets[node + 1] - row_offsets[node] > degree) { + degree = row_offsets[node + 1] - row_offsets[node]; src = node; } } @@ -446,16 +419,15 @@ struct Csr /** * @brief Display the neighbor list of a given node */ - void DisplayNeighborList(VertexId node) - { + void DisplayNeighborList(VertexId node) { if (node < 0 || node >= nodes) return; for (SizeT edge = row_offsets[node]; - edge < row_offsets[node + 1]; - edge++) { - util::PrintValue(column_indices[edge]); - printf(", "); - } - printf("\n"); + edge < row_offsets[node + 1]; + edge++) { + util::PrintValue(column_indices[edge]); + printf(", "); + } + printf("\n"); } /** @@ -466,7 +438,7 @@ struct Csr double mean = 0, count = 0; for (SizeT node = 0; node < nodes; ++node) { count += 1; - mean += (row_offsets[node+1]- row_offsets[node] - mean) / count; + mean += (row_offsets[node + 1] - row_offsets[node] - mean) / count; } average_degree = static_cast(mean); } @@ -512,8 +484,7 @@ struct Csr /** * @brief Deallocates CSR graph */ - void Free() - { + void Free() { if (row_offsets) { if (pinned) { gunrock::util::GRError(cudaFreeHost(row_offsets), @@ -544,8 +515,7 @@ struct Csr /** * @brief CSR destructor */ - ~Csr() - { + ~Csr() { Free(); } }; diff --git a/gunrock/graphio/market.cuh b/gunrock/graphio/market.cuh index 494200f4b..74284f065 100644 --- a/gunrock/graphio/market.cuh +++ b/gunrock/graphio/market.cuh @@ -56,8 +56,7 @@ int ReadMarketStream( char *output_file, Csr &csr_graph, bool undirected, - bool reversed) -{ + bool reversed) { typedef Coo EdgeTupleType; SizeT edges_read = -1; @@ -73,7 +72,7 @@ int ReadMarketStream( bool ordered_rows = true; - while(true) { + while (true) { if (fscanf(f_in, "%[^\n]\n", line) <= 0) { break; @@ -110,7 +109,7 @@ int ReadMarketStream( fflush(stdout); // Allocate coo graph - coo = (EdgeTupleType*) malloc(sizeof(EdgeTupleType) * edges); + coo = (EdgeTupleType*)malloc(sizeof(EdgeTupleType) * edges); edges_read++; @@ -122,20 +121,21 @@ int ReadMarketStream( return -1; } if (edges_read >= edges) { - fprintf(stderr, - "Error parsing MARKET graph:" - "encountered more than %d edges\n", - edges); - if (coo) free(coo); - return -1; + fprintf(stderr, + "Error parsing MARKET graph:" + "encountered more than %d edges\n", + edges); + if (coo) free(coo); + return -1; } - long long ll_row, ll_col, ll_value; + long long ll_row, ll_col; + Value ll_value; int num_input; if (LOAD_VALUES) { if ((num_input = sscanf( - line, "%lld %lld %lld", - &ll_col, &ll_row, &ll_value)) < 2) { + line, "%lld %lld %d", + &ll_col, &ll_row, &ll_value)) < 2) { fprintf(stderr, "Error parsing MARKET graph: badly formed edge\n"); if (coo) free(coo); @@ -205,7 +205,6 @@ int ReadMarketStream( undirected, reversed); free(coo); - fflush(stdout); return 0; @@ -220,8 +219,7 @@ int ReadCsrArrays( char *f_in, Csr &csr_graph, bool undirected, - bool reversed) -{ + bool reversed) { csr_graph.template FromCsr(f_in, undirected, reversed); return 0; } @@ -249,34 +247,30 @@ int BuildMarketGraph( char *output_file, Csr &csr_graph, bool undirected, - bool reversed) -{ + bool reversed) { FILE *_file = fopen(output_file, "r"); - if (_file) - { + if (_file) { fclose(_file); if (ReadCsrArrays( - output_file, csr_graph, undirected, reversed) != 0) { + output_file, csr_graph, undirected, reversed) != 0) { return -1; } - } - else { + } else { if (mm_filename == NULL) { // Read from stdin printf("Reading from stdin:\n"); if (ReadMarketStream( - stdin, output_file, csr_graph, undirected, reversed) != 0) { + stdin, output_file, csr_graph, undirected, reversed) != 0) { return -1; } - } - else { + } else { // Read from file FILE *f_in = fopen(mm_filename, "r"); if (f_in) { printf("Reading from %s:\n", mm_filename); if (ReadMarketStream( - f_in, output_file, csr_graph, - undirected, reversed) != 0) { + f_in, output_file, csr_graph, + undirected, reversed) != 0) { fclose(f_in); return -1; } @@ -299,37 +293,29 @@ int BuildMarketGraph( char *file_in, Csr &graph, bool undirected, - bool reversed) -{ + bool reversed) { // seperate the graph path and the file name char *temp1 = strdup(file_in); char *temp2 = strdup(file_in); char *file_path = dirname (temp1); char *file_name = basename(temp2); - if (undirected) - { + if (undirected) { char ud[256]; sprintf(ud, "%s/.%s_undirected_csr", file_path, file_name); if (BuildMarketGraph(file_in, ud, graph, true, false) != 0) return 1; - } - else if (!undirected && reversed) - { + } else if (!undirected && reversed) { char rv[256]; sprintf(rv, "%s/.%s_reversed_csr", file_path, file_name); if (BuildMarketGraph(file_in, rv, graph, false, true) != 0) return 1; - } - else if (!undirected && !reversed) - { + } else if (!undirected && !reversed) { char nr[256]; sprintf(nr, "%s/.%s_nonreversed_csr", file_path, file_name); if (BuildMarketGraph(file_in, nr, graph, false, false) != 0) return 1; - } - else - { + } else { fprintf(stderr, "Unspecified Graph Type.\n"); } return 0; From a42e70bf2204314e1397303c292ef2aaffd80f60 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Wed, 17 Jun 2015 11:14:40 -0700 Subject: [PATCH 12/36] small test mst sample dataset --- dataset/small/test_mst.mtx | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 dataset/small/test_mst.mtx diff --git a/dataset/small/test_mst.mtx b/dataset/small/test_mst.mtx new file mode 100644 index 000000000..a6949843f --- /dev/null +++ b/dataset/small/test_mst.mtx @@ -0,0 +1,18 @@ +9 9 17 +1 2 2 +2 3 2 +2 4 17 +3 1 2 +3 4 38 +3 5 10 +4 5 2 +5 1 82 +5 2 11 +6 3 100 +6 4 100 +6 5 210 +6 7 2 +6 8 21 +7 3 120 +7 5 110 +8 9 2 \ No newline at end of file From c5ff9203f12dfadb27d776f73bf61a3125bce878 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Wed, 17 Jun 2015 12:02:29 -0700 Subject: [PATCH 13/36] changed some types to template rather than fixed uint --- gunrock/app/sssp/sssp_enactor.cuh | 7 +- gunrock/app/sssp/sssp_functor.cuh | 12 ++-- gunrock/app/sssp/sssp_problem.cuh | 3 +- tests/sssp/test_sssp.cu | 109 ++++++++++++++---------------- 4 files changed, 65 insertions(+), 66 deletions(-) diff --git a/gunrock/app/sssp/sssp_enactor.cuh b/gunrock/app/sssp/sssp_enactor.cuh index 7d9c5da43..855cbf9f6 100644 --- a/gunrock/app/sssp/sssp_enactor.cuh +++ b/gunrock/app/sssp/sssp_enactor.cuh @@ -207,16 +207,19 @@ class SSSPEnactor : public EnactorBase double queue_sizing, int max_grid_size = 0) { - typedef typename SSSPProblem::SizeT SizeT; - typedef typename SSSPProblem::VertexId VertexId; + typedef typename SSSPProblem::VertexId VertexId; + typedef typename SSSPProblem::Value Value; + typedef typename SSSPProblem::SizeT SizeT; typedef SSSPFunctor< VertexId, + Value, SizeT, SSSPProblem> SsspFunctor; typedef PQFunctor< VertexId, + Value, SizeT, SSSPProblem> PqFunctor; diff --git a/gunrock/app/sssp/sssp_functor.cuh b/gunrock/app/sssp/sssp_functor.cuh index fa66e1dbd..37652a73f 100644 --- a/gunrock/app/sssp/sssp_functor.cuh +++ b/gunrock/app/sssp/sssp_functor.cuh @@ -32,7 +32,7 @@ namespace sssp { * @tparam ProblemData Problem data type which contains data slice for SSSP problem * */ -template +template struct SSSPFunctor { typedef typename ProblemData::DataSlice DataSlice; @@ -51,7 +51,7 @@ struct SSSPFunctor */ static __device__ __forceinline__ bool CondEdge(VertexId s_id, VertexId d_id, DataSlice *problem, VertexId e_id = 0, VertexId e_id_in = 0) { - unsigned int label, weight; + Value label, weight; util::io::ModifiedLoad::Ld( label, problem->d_labels + s_id); @@ -111,7 +111,7 @@ struct SSSPFunctor } }; -template +template struct PQFunctor { typedef typename ProblemData::DataSlice DataSlice; @@ -126,15 +126,15 @@ struct PQFunctor * * \return Whether to load the apply function for the edge and include the destination node in the next frontier. */ - static __device__ __forceinline__ unsigned int ComputePriorityScore(VertexId node_id, DataSlice *problem) + static __device__ __forceinline__ Value ComputePriorityScore(VertexId node_id, DataSlice *problem) { - unsigned int weight; + Value weight; util::io::ModifiedLoad::Ld( weight, problem->d_labels + node_id); float delta; util::io::ModifiedLoad::Ld( delta, problem->d_delta); - return (delta == 0) ? weight : weight/delta; + return (delta == 0) ? weight : weight / delta; } }; diff --git a/gunrock/app/sssp/sssp_problem.cuh b/gunrock/app/sssp/sssp_problem.cuh index c3957caf5..01fb49738 100644 --- a/gunrock/app/sssp/sssp_problem.cuh +++ b/gunrock/app/sssp/sssp_problem.cuh @@ -14,6 +14,7 @@ #pragma once +#include #include #include @@ -322,7 +323,7 @@ struct SSSPProblem : ProblemBase<_VertexId, _SizeT, false> data_slices[gpu]->d_labels = d_labels; } - util::MemsetKernel<<<128, 128>>>(data_slices[gpu]->d_labels, UINT_MAX, nodes); + util::MemsetKernel<<<128, 128>>>(data_slices[gpu]->d_labels, std::numeric_limits::max(), nodes); if (!data_slices[gpu]->d_preds && MARK_PATHS) { VertexId *d_preds; diff --git a/tests/sssp/test_sssp.cu b/tests/sssp/test_sssp.cu index 71c319f68..b084842b6 100644 --- a/tests/sssp/test_sssp.cu +++ b/tests/sssp/test_sssp.cu @@ -155,7 +155,7 @@ template< void DisplayStats( Stats &stats, VertexId src, - unsigned int *h_labels, + Value *h_labels, const Csr &graph, double elapsed, VertexId search_depth, @@ -238,25 +238,24 @@ template< typename SizeT, bool MARK_PREDECESSORS> void SimpleReferenceSssp( - const Csr &graph, - unsigned int *node_values, - unsigned int *node_preds, - VertexId src) + const Csr &graph, + Value *node_values, + VertexId *node_preds, + VertexId src) { using namespace boost; // Prepare Boost Datatype and Data structure typedef adjacency_list > Graph; + property > Graph; typedef graph_traits::vertex_descriptor vertex_descriptor; typedef graph_traits::edge_descriptor edge_descriptor; - typedef std::pair Edge; + typedef std::pair Edge; - Edge* edges = (Edge*)malloc(sizeof(Edge)*graph.edges); - unsigned int *weight = - (unsigned int*)malloc(sizeof(unsigned int)*graph.edges); + Edge *edges = ( Edge*)malloc(sizeof( Edge)*graph.edges); + Value *weight = (Value*)malloc(sizeof(Value)*graph.edges); for (int i = 0; i < graph.nodes; ++i) { @@ -269,7 +268,7 @@ void SimpleReferenceSssp( Graph g(edges, edges + graph.edges, weight, graph.nodes); - std::vector d(graph.nodes); + std::vector d(graph.nodes); std::vector p(graph.nodes); vertex_descriptor s = vertex(src, g); @@ -282,28 +281,30 @@ void SimpleReferenceSssp( CpuTimer cpu_timer; cpu_timer.Start(); - if (MARK_PREDECESSORS) - dijkstra_shortest_paths( - g, s, - predecessor_map(boost::make_iterator_property_map(p.begin(), get(boost::vertex_index, g))). - distance_map(boost::make_iterator_property_map(d.begin(), get(boost::vertex_index, g)))); - else - dijkstra_shortest_paths( - g, s, - distance_map(boost::make_iterator_property_map(d.begin(), get(boost::vertex_index, g)))); + if (MARK_PREDECESSORS) { + dijkstra_shortest_paths(g, s, + predecessor_map(boost::make_iterator_property_map( + p.begin(), get(boost::vertex_index, g))).distance_map( + boost::make_iterator_property_map( + d.begin(), get(boost::vertex_index, g)))); + } else { + dijkstra_shortest_paths(g, s, + distance_map(boost::make_iterator_property_map( + d.begin(), get(boost::vertex_index, g)))); + } cpu_timer.Stop(); float elapsed = cpu_timer.ElapsedMillis(); printf("CPU SSSP finished in %lf msec.\n", elapsed); - Coo* sort_dist = NULL; - Coo* sort_pred = NULL; - sort_dist = (Coo*)malloc( - sizeof(Coo) * graph.nodes); - if (MARK_PREDECESSORS) - sort_pred = (Coo*)malloc( - sizeof(Coo) * graph.nodes); - + Coo* sort_dist = NULL; + Coo* sort_pred = NULL; + sort_dist = (Coo*)malloc( + sizeof(Coo) * graph.nodes); + if (MARK_PREDECESSORS) { + sort_pred = (Coo*)malloc( + sizeof(Coo) * graph.nodes); + } graph_traits < Graph >::vertex_iterator vi, vend; for (tie(vi, vend) = vertices(g); vi != vend; ++vi) { @@ -312,7 +313,7 @@ void SimpleReferenceSssp( } std::stable_sort( sort_dist, sort_dist + graph.nodes, - RowFirstTupleCompare >); + RowFirstTupleCompare >); if (MARK_PREDECESSORS) { @@ -323,21 +324,21 @@ void SimpleReferenceSssp( } std::stable_sort( sort_pred, sort_pred + graph.nodes, - RowFirstTupleCompare >); + RowFirstTupleCompare< Coo >); } for (int i = 0; i < graph.nodes; ++i) { node_values[i] = sort_dist[i].col; } - if (MARK_PREDECESSORS) + if (MARK_PREDECESSORS) { for (int i = 0; i < graph.nodes; ++i) { node_preds[i] = sort_pred[i].col; } - - free(sort_dist); - if (MARK_PREDECESSORS) free(sort_pred); + } + if (sort_dist) free(sort_dist); + if (sort_pred) free(sort_pred); } /** @@ -382,18 +383,17 @@ void RunTests( Value, MARK_PREDECESSORS> Problem; - // Allocate host-side label array (for both reference and gpu-computed results) - unsigned int *reference_labels = (unsigned int*)malloc(sizeof(unsigned int) * graph.nodes); - unsigned int *h_labels = (unsigned int*)malloc(sizeof(unsigned int) * graph.nodes); - unsigned int *reference_check_label = (g_quick) ? NULL : reference_labels; - unsigned int *reference_preds = NULL; - VertexId *h_preds = NULL; - unsigned int *reference_check_pred = NULL; + // Allocate host-side arrays (for both reference and gpu-computed results) + Value *reference_labels = (Value*)malloc(sizeof(Value) * graph.nodes); + Value *h_labels = (Value*)malloc(sizeof(Value) * graph.nodes); + Value *reference_check_label = (g_quick) ? NULL : reference_labels; + VertexId *reference_preds = NULL; + VertexId *h_preds = NULL; + VertexId *reference_check_pred = NULL; if (MARK_PREDECESSORS) { - reference_preds = - (unsigned int*)malloc(sizeof(unsigned int) * graph.nodes); + reference_preds = (VertexId*)malloc(sizeof(VertexId) * graph.nodes); h_preds = (VertexId*)malloc(sizeof(VertexId) * graph.nodes); reference_check_pred = (g_quick) ? NULL : reference_preds; } @@ -453,7 +453,6 @@ void RunTests( } elapsed /= iterations; - sssp_enactor.GetStatistics(total_queued, search_depth, avg_duty); // Copy out results @@ -499,13 +498,13 @@ void RunTests( avg_duty); - // Cleanup - delete stats; - if (csr_problem) delete csr_problem; + // Clean up + if (stats) delete stats; + if (csr_problem) delete csr_problem; if (reference_labels) free(reference_labels); - if (h_labels) free(h_labels); - if (reference_preds) free(reference_preds); - if (h_preds) free(h_preds); + if (h_labels) free(h_labels); + if (reference_preds) free(reference_preds); + if (h_preds) free(h_preds); cudaDeviceSynchronize(); } @@ -540,7 +539,7 @@ void RunTests( int iterations = 1; // Number of runs for testing int delta_factor = 16; // Delta factor for priority queue int traversal_mode = -1; // traversal mode: 0 for LB, 1 for TWC - g_quick = false; // Whether or not to skip ref validation + g_quick = 0; // Whether or not to skip ref validation // source vertex to start args.GetCmdLineArgument("src", src_str); @@ -647,15 +646,10 @@ int main( int argc, char** argv) return 1; } - //DeviceInit(args); - //cudaSetDeviceFlags(cudaDeviceMapHost); int dev = 0; args.GetCmdLineArgument("device", dev); ContextPtr context = mgpu::CreateCudaDevice(dev); - //srand(0); // Presently deterministic - //srand(time(NULL)); - // Parse graph-contruction params g_undirected = args.CheckCmdLineFlag("undirected"); std::string graph_type = argv[1]; @@ -678,6 +672,7 @@ int main( int argc, char** argv) typedef int VertexId; // Use as the node identifier typedef unsigned int Value; // Use as the value type typedef int SizeT; // Use as the graph size type + Csr csr(false); // default for stream_from_host if (graph_args < 1) { Usage(); return 1; } @@ -692,7 +687,7 @@ int main( int argc, char** argv) } csr.PrintHistogram(); - //csr.DisplayGraph(true); //print graph with edge_value + csr.DisplayGraph(true); //print graph with edge_value //csr.GetAverageEdgeValue(); //csr.GetAverageDegree(); //int max_degree; From fa477a3192e41574893bc46958a083bf929a320c Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Wed, 17 Jun 2015 13:40:32 -0700 Subject: [PATCH 14/36] clean up --- gunrock/app/mst/mst_enactor.cuh | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/gunrock/app/mst/mst_enactor.cuh b/gunrock/app/mst/mst_enactor.cuh index 4908b71b9..0c44b617c 100644 --- a/gunrock/app/mst/mst_enactor.cuh +++ b/gunrock/app/mst/mst_enactor.cuh @@ -215,10 +215,8 @@ public: typedef SuRmFunctor SuRmFunctor; typedef EIdxFunctor EIdxFunctor; typedef MarkFunctor MarkFunctor; - //typedef OrFunctor OrFunctor; cudaError_t retval = cudaSuccess; - unsigned int *d_scanned_edges = NULL; do @@ -273,7 +271,7 @@ public: } // generate d_flags_array from d_row_offsets using MarkSegment kernel - util::MarkSegmentFromIndices<<<128, 128>>>( + util::MarkSegmentFromIndices<<<128, 128>>>( problem->data_slices[0]->d_flags_array, graph_slice->d_row_offsets, graph_slice->nodes); @@ -584,7 +582,7 @@ public: //////////////////////////////////////////////////////////////////////// // create a flag to mark the boundaries of representative vertices - util::MarkSegmentFromKeys<<<128, 128>>>( + util::MarkSegmentFromKeys<<<128, 128>>>( problem->data_slices[0]->d_flags_array, problem->data_slices[0]->d_supervtx_ids, graph_slice->nodes); @@ -790,8 +788,7 @@ public: graph_slice->edges); util::MemsetCopyVectorKernel<<<128, 128>>>( - //problem->data_slices[0]->d_temp_value, - problem->data_slices[0]->d_super_edges, // used as temp_index + problem->data_slices[0]->d_super_edges, // used as temp_index problem->data_slices[0]->d_keys_array, graph_slice->edges); @@ -807,7 +804,6 @@ public: util::CUBRadixSort( true, graph_slice->edges, - //problem->data_slices[0]->d_temp_value, problem->data_slices[0]->d_super_edges, // used as temp_index problem->data_slices[0]->d_origin_edges); From 0c375d00e61e67b44d2598d72801405fa6a2a493 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Wed, 17 Jun 2015 13:43:31 -0700 Subject: [PATCH 15/36] blah --- gunrock/app/mst/mst_enactor.cuh | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/gunrock/app/mst/mst_enactor.cuh b/gunrock/app/mst/mst_enactor.cuh index 4908b71b9..0c44b617c 100644 --- a/gunrock/app/mst/mst_enactor.cuh +++ b/gunrock/app/mst/mst_enactor.cuh @@ -215,10 +215,8 @@ public: typedef SuRmFunctor SuRmFunctor; typedef EIdxFunctor EIdxFunctor; typedef MarkFunctor MarkFunctor; - //typedef OrFunctor OrFunctor; cudaError_t retval = cudaSuccess; - unsigned int *d_scanned_edges = NULL; do @@ -273,7 +271,7 @@ public: } // generate d_flags_array from d_row_offsets using MarkSegment kernel - util::MarkSegmentFromIndices<<<128, 128>>>( + util::MarkSegmentFromIndices<<<128, 128>>>( problem->data_slices[0]->d_flags_array, graph_slice->d_row_offsets, graph_slice->nodes); @@ -584,7 +582,7 @@ public: //////////////////////////////////////////////////////////////////////// // create a flag to mark the boundaries of representative vertices - util::MarkSegmentFromKeys<<<128, 128>>>( + util::MarkSegmentFromKeys<<<128, 128>>>( problem->data_slices[0]->d_flags_array, problem->data_slices[0]->d_supervtx_ids, graph_slice->nodes); @@ -790,8 +788,7 @@ public: graph_slice->edges); util::MemsetCopyVectorKernel<<<128, 128>>>( - //problem->data_slices[0]->d_temp_value, - problem->data_slices[0]->d_super_edges, // used as temp_index + problem->data_slices[0]->d_super_edges, // used as temp_index problem->data_slices[0]->d_keys_array, graph_slice->edges); @@ -807,7 +804,6 @@ public: util::CUBRadixSort( true, graph_slice->edges, - //problem->data_slices[0]->d_temp_value, problem->data_slices[0]->d_super_edges, // used as temp_index problem->data_slices[0]->d_origin_edges); From 9e78f8bf51ae3a63f0922496e71ecdf56aa6ae37 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Wed, 17 Jun 2015 13:45:18 -0700 Subject: [PATCH 16/36] clean up --- gunrock/app/mst/mst_enactor.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gunrock/app/mst/mst_enactor.cuh b/gunrock/app/mst/mst_enactor.cuh index 0c44b617c..fa8dac343 100644 --- a/gunrock/app/mst/mst_enactor.cuh +++ b/gunrock/app/mst/mst_enactor.cuh @@ -271,7 +271,7 @@ public: } // generate d_flags_array from d_row_offsets using MarkSegment kernel - util::MarkSegmentFromIndices<<<128, 128>>>( + util::MarkSegmentFromIndices<<<128, 128>>>( problem->data_slices[0]->d_flags_array, graph_slice->d_row_offsets, graph_slice->nodes); @@ -582,7 +582,7 @@ public: //////////////////////////////////////////////////////////////////////// // create a flag to mark the boundaries of representative vertices - util::MarkSegmentFromKeys<<<128, 128>>>( + util::MarkSegmentFromKeys<<<128, 128>>>( problem->data_slices[0]->d_flags_array, problem->data_slices[0]->d_supervtx_ids, graph_slice->nodes); From 127eba3d41a03632a7d3bb1f1c48c00bb61a9c78 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Thu, 18 Jun 2015 08:29:37 -0700 Subject: [PATCH 17/36] Vertex-Induced Subgraph primitive --- gunrock/app/vis/vis_enactor.cuh | 395 ++++++++++++++++++++++++++++++++ gunrock/app/vis/vis_functor.cuh | 108 +++++++++ gunrock/app/vis/vis_problem.cuh | 294 ++++++++++++++++++++++++ tests/vis/Makefile | 108 +++++++++ tests/vis/run.sh | 28 +++ tests/vis/test_vis.cu | 346 ++++++++++++++++++++++++++++ 6 files changed, 1279 insertions(+) create mode 100644 gunrock/app/vis/vis_enactor.cuh create mode 100644 gunrock/app/vis/vis_functor.cuh create mode 100644 gunrock/app/vis/vis_problem.cuh create mode 100644 tests/vis/Makefile create mode 100644 tests/vis/run.sh create mode 100644 tests/vis/test_vis.cu diff --git a/gunrock/app/vis/vis_enactor.cuh b/gunrock/app/vis/vis_enactor.cuh new file mode 100644 index 000000000..590863cb6 --- /dev/null +++ b/gunrock/app/vis/vis_enactor.cuh @@ -0,0 +1,395 @@ +// ---------------------------------------------------------------------------- +// Gunrock -- High-Performance Graph Primitives on GPU +// ---------------------------------------------------------------------------- +// This source code is distributed under the terms of LICENSE.TXT +// in the root directory of this source distribution. +// ---------------------------------------------------------------------------- + +/** + * @file vis_enactor.cuh + * @brief Primitive problem enactor for Vertex-Induced Subgraph + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +namespace gunrock { +namespace app { +namespace vis { + +/** + * @brief Primitive enactor class. + * @tparam INSTRUMWENT Boolean indicate collect per-CTA clock-count statistics + */ +template +class VISEnactor : public EnactorBase { + protected: + /** + * A pinned, mapped word that the traversal kernels will signal when done + */ + volatile int *done; + int *d_done; + cudaEvent_t throttle_event; + + /** + * @brief Prepare the enactor for kernel call. + * @param[in] problem Problem object holds both graph and primitive data. + * \return cudaError_t object indicates the success of all CUDA functions. + */ + template + cudaError_t Setup(ProblemData *problem) { + typedef typename ProblemData::SizeT SizeT; + typedef typename ProblemData::VertexId VertexId; + + cudaError_t retval = cudaSuccess; + + // initialize the host-mapped "done" + if (!done) { + int flags = cudaHostAllocMapped; + + // allocate pinned memory for done + if (retval = util::GRError( + cudaHostAlloc((void**)&done, sizeof(int) * 1, flags), + "Enactor cudaHostAlloc done failed", + __FILE__, __LINE__)) return retval; + + // map done into GPU space + if (retval = util::GRError( + cudaHostGetDevicePointer((void**)&d_done, (void*) done, 0), + "Enactor cudaHostGetDevicePointer done failed", + __FILE__, __LINE__)) return retval; + + // create throttle event + if (retval = util::GRError( + cudaEventCreateWithFlags(&throttle_event, cudaEventDisableTiming), + "Enactor cudaEventCreateWithFlags throttle_event failed", + __FILE__, __LINE__)) return retval; + } + + done[0] = -1; + + // graph slice + typename ProblemData::GraphSlice *graph_slice = problem->graph_slices[0]; + // TODO: uncomment if using data_slice to store primitive-specific array + //typename ProblemData::DataSlice *data_slice = problem->data_slices[0]; + + do { + // bind row-offsets and bit-mask texture + cudaChannelFormatDesc row_offsets_desc = cudaCreateChannelDesc(); + oprtr::edge_map_forward::RowOffsetTex::ref.channelDesc = row_offsets_desc; + if (retval = util::GRError( + cudaBindTexture( + 0, + oprtr::edge_map_forward::RowOffsetTex::ref, + graph_slice->d_row_offsets, + (graph_slice->nodes + 1) * sizeof(SizeT)), + "Enactor cudaBindTexture row_offset_tex_ref failed", + __FILE__, __LINE__)) break; + } while (0); + return retval; + } + + public: + /** + * @brief Constructor + */ + explicit VISEnactor(bool DEBUG = false) : + EnactorBase(EDGE_FRONTIERS, DEBUG), done(NULL), d_done(NULL) {} + + /** + * @brief Destructor + */ + virtual ~VISEnactor() { + if (done) { + util::GRError(cudaFreeHost((void*)done), + "Enactor FreeHost done failed", __FILE__, __LINE__); + util::GRError(cudaEventDestroy(throttle_event), + "Enactor Destroy throttle_event failed", __FILE__, __LINE__); + } + } + + /** + * \addtogroup PublicInterface + * @{ + */ + + /** + * @brief Obtain statistics the primitive enacted. + * @param[out] num_iterations Number of iterations (BSP super-steps). + */ + template + void GetStatistics(VertexId &num_iterations) { + cudaThreadSynchronize(); + num_iterations = enactor_stats.iteration; + } + + /** @} */ + + /** + * @brief Enacts computing on the specified graph. + * + * @tparam AdvanceKernelPolicy Kernel policy for advance operator. + * @tparam FilterKernelPolicy Kernel policy for filter operator. + * @tparam Problem Problem type. + * + * @param[in] context CudaContext pointer for ModernGPU APIs + * @param[in] problem Problem object. + * @param[in] max_grid_size Max grid size for kernel calls. + * + * \return cudaError_t object indicates the success of all CUDA functions. + */ + template < + typename AdvanceKernelPolicy, + typename FilterKernelPolicy, + typename Problem > + cudaError_t EnactVIS( + CudaContext & context, + Problem * problem, + int max_grid_size = 0) { + typedef typename Problem::VertexId VertexId; + typedef typename Problem::Value Value; + typedef typename Problem::SizeT SizeT; + + typedef VISFunctor Functor; + + cudaError_t retval = cudaSuccess; + + do { + unsigned int *d_scanned_edges = NULL; + + fflush(stdout); + + // lazy initialization + if (retval = Setup(problem)) break; + + if (retval = EnactorBase::Setup( + max_grid_size, + AdvanceKernelPolicy::CTA_OCCUPANCY, + FilterKernelPolicy::CTA_OCCUPANCY)) + break; + + // single-gpu graph slice and data slice + typename Problem::GraphSlice *g_slice = problem->graph_slices[0]; + typename Problem::DataSlice *d_slice = problem->d_data_slices[0]; + + if (AdvanceKernelPolicy::ADVANCE_MODE == oprtr::advance::LB) { + if (retval = util::GRError( + cudaMalloc((void**)&d_scanned_edges, + g_slice->edges * sizeof(unsigned int)), + "VISProblem cudaMalloc d_scanned_edges failed", + __FILE__, __LINE__)) return retval; + } + + frontier_attribute.queue_length = g_slice->nodes; + frontier_attribute.queue_index = 0; // work queue index + frontier_attribute.selector = 0; + frontier_attribute.queue_reset = true; + + // filter: intput all vertices in graph, output selected vertices + oprtr::filter::Kernel + <<>>( + enactor_stats.iteration + 1, + frontier_attribute.queue_reset, + frontier_attribute.queue_index, + enactor_stats.num_gpus, + frontier_attribute.queue_length, + d_done, + g_slice->frontier_queues.d_keys[frontier_attribute.selector], + NULL, + g_slice->frontier_queues.d_keys[frontier_attribute.selector^1], + d_slice, + NULL, + work_progress, + g_slice->frontier_elements[frontier_attribute.selector], + g_slice->frontier_elements[frontier_attribute.selector^1], + enactor_stats.filter_kernel_stats); + + if (DEBUG && (retval = util::GRError(cudaThreadSynchronize(), + "filter::Kernel failed", __FILE__, __LINE__))) break; + cudaEventQuery(throttle_event); + + frontier_attribute.queue_index++; + frontier_attribute.selector ^= 1; + + if (retval = work_progress.GetQueueLength( + frontier_attribute.queue_index, + frontier_attribute.queue_length)) break; + if (DEBUG) { + printf("filter queue length: %lld", + (long long) frontier_attribute.queue_length); + util::DisplayDeviceResults( + problem->data_slices[0]->d_bitmask, g_slice->nodes); + printf("input queue for advance:\n"); + util::DisplayDeviceResults( + g_slice->frontier_queues.d_keys[frontier_attribute.selector], + frontier_attribute.queue_length); + } + + oprtr::advance::LaunchKernel( + NULL, + enactor_stats, + frontier_attribute, + d_slice, + (VertexId*)NULL, + (bool*)NULL, + (bool*)NULL, + d_scanned_edges, + g_slice->frontier_queues.d_keys[frontier_attribute.selector], + g_slice->frontier_queues.d_keys[frontier_attribute.selector^1], + (VertexId*)NULL, + (VertexId*)NULL, + g_slice->d_row_offsets, + g_slice->d_column_indices, + (SizeT*)NULL, + (VertexId*)NULL, + g_slice->nodes, + g_slice->edges, + this->work_progress, + context, + gunrock::oprtr::advance::V2V); + + if (DEBUG && (retval = util::GRError(cudaThreadSynchronize(), + "advance::Kernel failed", __FILE__, __LINE__))) break; + cudaEventQuery(throttle_event); + + frontier_attribute.queue_index++; + + if (DEBUG) { + if (retval = work_progress.GetQueueLength( + frontier_attribute.queue_index, + frontier_attribute.queue_length)) break; + printf("advance queue length: %lld", + (long long) frontier_attribute.queue_length); + util::DisplayDeviceResults( + g_slice->frontier_queues.d_keys[frontier_attribute.selector^1], + frontier_attribute.queue_length); + } + + // TODO: extract graph with proper format (edge list, csr, etc.) + + if (d_scanned_edges) cudaFree(d_scanned_edges); + + } while (0); + + if (DEBUG) { + printf("\nGPU Vertex-Induced Subgraph Enact Done.\n"); + } + + return retval; + } + + /** + * \addtogroup PublicInterface + * @{ + */ + + /** + * @brief Primitive enact kernel entry. + * + * @tparam Problem Problem type. @see Problem + * + * @param[in] context CudaContext pointer for ModernGPU APIs + * @param[in] problem Pointer to Problem object. + * @param[in] max_grid_size Max grid size for kernel calls. + * @param[in] traversal_mode Traversal Mode for advance operator: + * Load-balanced or Dynamic cooperative + * + * \return cudaError_t object indicates the success of all CUDA functions. + */ + template + cudaError_t Enact( + CudaContext &context, + Problem *problem, + int max_grid_size = 0, + int traversal_mode = 0) { + if (this->cuda_props.device_sm_version >= 300) { + typedef oprtr::filter::KernelPolicy < + Problem, // Problem data type + 300, // CUDA_ARCH + INSTRUMENT, // INSTRUMENT + 0, // SATURATION QUIT + true, // DEQUEUE_PROBLEM_SIZE + 8, // MIN_CTA_OCCUPANCY + 8, // LOG_THREADS + 1, // LOG_LOAD_VEC_SIZE + 0, // LOG_LOADS_PER_TILE + 5, // LOG_RAKING_THREADS + 5, // END_BITMASK_CULL + 8 > // LOG_SCHEDULE_GRANULARITY + FilterKernelPolicy; + + typedef oprtr::advance::KernelPolicy < + Problem, // Problem data type + 300, // CUDA_ARCH + INSTRUMENT, // INSTRUMENT + 1, // MIN_CTA_OCCUPANCY + 7, // LOG_THREADS + 8, // LOG_BLOCKS + 32 * 128, // LIGHT_EDGE_THRESHOLD (used for LB) + 1, // LOG_LOAD_VEC_SIZE + 0, // LOG_LOADS_PER_TILE + 5, // LOG_RAKING_THREADS + 32, // WARP_GATHER_THRESHOLD + 128 * 4, // CTA_GATHER_THRESHOLD + 7, // LOG_SCHEDULE_GRANULARITY + oprtr::advance::TWC_FORWARD > + ForwardAdvanceKernelPolicy; + + typedef oprtr::advance::KernelPolicy < + Problem, // Problem data type + 300, // CUDA_ARCH + INSTRUMENT, // INSTRUMENT + 1, // MIN_CTA_OCCUPANCY + 10, // LOG_THREADS + 8, // LOG_BLOCKS + 32 * 128, // LIGHT_EDGE_THRESHOLD (used for LB) + 1, // LOG_LOAD_VEC_SIZE + 0, // LOG_LOADS_PER_TILE + 5, // LOG_RAKING_THREADS + 32, // WARP_GATHER_THRESHOLD + 128 * 4, // CTA_GATHER_THRESHOLD + 7, // LOG_SCHEDULE_GRANULARITY + oprtr::advance::LB > + LBAdvanceKernelPolicy; + + if (traversal_mode == 0) { + return EnactVIS< + LBAdvanceKernelPolicy, FilterKernelPolicy, Problem>( + context, problem, max_grid_size); + } else { // traversal_mode == 1 + return EnactVIS< + ForwardAdvanceKernelPolicy, FilterKernelPolicy, Problem>( + context, problem, max_grid_size); + } + } + + // to reduce compile time, get rid of other architecture for now + // TODO: add all the kernel policy setting for all architectures + + printf("Not yet tuned for this architecture\n"); + return cudaErrorInvalidDeviceFunction; + } + + /** @} */ +}; + +} // namespace vis +} // namespace app +} // namespace gunrock + +// Leave this at the end of the file +// Local Variables: +// mode:c++ +// c-file-style: "NVIDIA" +// End: diff --git a/gunrock/app/vis/vis_functor.cuh b/gunrock/app/vis/vis_functor.cuh new file mode 100644 index 000000000..7611d42d0 --- /dev/null +++ b/gunrock/app/vis/vis_functor.cuh @@ -0,0 +1,108 @@ +// ---------------------------------------------------------------------------- +// Gunrock -- High-Performance Graph Primitives on GPU +// ---------------------------------------------------------------------------- +// This source code is distributed under the terms of LICENSE.TXT +// in the root directory of this source distribution. +// ---------------------------------------------------------------------------- + +/** + * @file vis_functor.cuh + * @brief Device functions for Vertex-Induced Subgraph + */ + +#pragma once + +#include +#include + +namespace gunrock { +namespace app { +namespace vis { + +/** + * @brief Structure contains device functions + * + * @tparam VertexId Type used for vertex id (e.g., uint32) + * @tparam SizeT Type used for array indexing. (e.g., uint32) + * @tparam Value Type used for calculation values (e.g., float) + * @tparam ProblemData Problem data type which contains data slice + * + */ +template +struct VISFunctor { + typedef typename ProblemData::DataSlice DataSlice; + + /** + * @brief Advance condition function + * + * @param[in] s_id Vertex Id of the edge source node + * @param[in] d_id Vertex Id of the edge destination node + * @param[in] problem Data slice object + * @param[in] e_id Output edge id + * @param[in] e_id_in Input edge id + * + * \return Whether to load the apply function for the edge and + * include the destination node in the next frontier. + */ + static __device__ __forceinline__ bool + CondEdge(VertexId s_id, VertexId d_id, DataSlice *problem, + VertexId e_id = 0, VertexId e_id_in = 0) { + return problem->d_bitmask[d_id]; + } + + /** + * @brief Advance apply function + * + * @param[in] s_id Vertex Id of the edge source node + * @param[in] d_id Vertex Id of the edge destination node + * @param[in] problem Data slice object + * @param[in] e_id Output edge id + * @param[in] e_id_in Input edge id + * + */ + static __device__ __forceinline__ void + ApplyEdge(VertexId s_id, VertexId d_id, DataSlice *problem, + VertexId e_id = 0, VertexId e_id_in = 0) { + printf("select edges: sid: %d, did: %d, eid: %d\n", s_id, d_id, e_id); + } + + /** + * @brief filter condition function + * + * @param[in] node Vertex Id + * @param[in] problem Data slice object + * @param[in] v Auxiliary value + * + * \return Whether to load the apply function for the node and + * include it in the outgoing vertex frontier. + */ + static __device__ __forceinline__ bool + CondFilter(VertexId node, DataSlice *problem, Value v = 0, SizeT nid = 0) { + return (node % 2) == 0; // TODO: USER-DEFINED FILTER CONDITION HERE + } + + /** + * @brief filter apply function + * + * @param[in] node Vertex Id + * @param[in] problem Data slice object + * @param[in] v Auxiliary value + * + */ + static __device__ __forceinline__ void + ApplyFilter(VertexId node, DataSlice *problem, Value v = 0, SizeT nid = 0) { + util::io::ModifiedStore::St( + true, problem->d_bitmask + node); + } +}; + +} // namespace vis +} // namespace app +} // namespace gunrock + +// Leave this at the end of the file +// Local Variables: +// mode:c++ +// c-file-style: "NVIDIA" +// End: diff --git a/gunrock/app/vis/vis_problem.cuh b/gunrock/app/vis/vis_problem.cuh new file mode 100644 index 000000000..85519391b --- /dev/null +++ b/gunrock/app/vis/vis_problem.cuh @@ -0,0 +1,294 @@ +// ---------------------------------------------------------------------------- +// Gunrock -- High-Performance Graph Primitives on GPU +// ---------------------------------------------------------------------------- +// This source code is distributed under the terms of LICENSE.TXT +// in the root directory of this source distribution. +// ---------------------------------------------------------------------------- + +/** + * @file vis_problem.cuh + * @brief GPU storage management structure for Vertex-Induced Subgraph + */ + +#pragma once + +#include +#include + +namespace gunrock { +namespace app { +namespace vis { + +/** + * @brief Problem structure stores device-side vectors + * @tparam _VertexId Type use as vertex id (e.g., uint32) + * @tparam _SizeT Type use for array indexing. (e.g., uint32) + * @tparam _Value Type use for computed value. + */ +template +struct VISProblem : ProblemBase<_VertexId, _SizeT, false> { + typedef _VertexId VertexId; + typedef _SizeT SizeT; + typedef _Value Value; + + static const bool MARK_PREDECESSORS = true; + static const bool ENABLE_IDEMPOTENCE = false; + + /** + * @brief Data slice structure which contains problem specific data. + */ + struct DataSlice { + // device storage arrays + VertexId *d_labels; // used for ... + bool *d_bitmask; // used for indicating if vertex is in subgraph + }; + + int num_gpus; + SizeT nodes; + SizeT edges; + + // data slices (one for each GPU) + DataSlice **data_slices; + + // putting structure on device while keeping the SoA structure + DataSlice **d_data_slices; + + // device index for each data slice + int *gpu_idx; + + /** + * @brief Default constructor + */ + VISProblem(): nodes(0), edges(0), num_gpus(0) {} + + /** + * @brief Constructor + * @param[in] stream_from_host Whether to stream data from host. + * @param[in] graph Reference to the CSR graph object we process on. + * @param[in] num_gpus Number of the GPUs used. + */ + VISProblem(bool stream_from_host, // only meaningful for single-GPU + const Csr &graph, + int num_gpus) : + num_gpus(num_gpus) { + Init(stream_from_host, graph, num_gpus); + } + + /** + * @brief Default destructor + */ + ~VISProblem() { + for (int i = 0; i < num_gpus; ++i) { + if (util::GRError( + cudaSetDevice(gpu_idx[i]), + "~Problem cudaSetDevice failed", __FILE__, __LINE__)) break; + + if (data_slices[i]->d_labels) + util::GRError(cudaFree(data_slices[i]->d_labels), + "GpuSlice cudaFree d_labels failed", __FILE__, __LINE__); + + if (data_slices[i]->d_bitmask) + util::GRError(cudaFree(data_slices[i]->d_bitmask), + "DataSlice cudaFree d_bitmask failed", __FILE__, __LINE__); + + if (d_data_slices[i]) + util::GRError(cudaFree(d_data_slices[i]), + "GpuSlice cudaFree data_slices failed", __FILE__, __LINE__); + } + if (d_data_slices) delete[] d_data_slices; + if (data_slices) delete[] data_slices; + } + + /** + * \addtogroup PublicInterface + * @{ + */ + + /** + * @brief Copy results computed on the GPU back to host-side vectors. + * @param[out] h_labels + *\return cudaError_t object indicates the success of all CUDA functions. + */ + cudaError_t Extract(VertexId *h_labels) { + cudaError_t retval = cudaSuccess; + + do { + if (num_gpus == 1) { + if (util::GRError(cudaSetDevice(gpu_idx[0]), + "Problem cudaSetDevice failed", + __FILE__, __LINE__)) break; + + if (retval = util::GRError( + cudaMemcpy(h_labels, + data_slices[0]->d_labels, + sizeof(VertexId) * nodes, + cudaMemcpyDeviceToHost), + "Problem cudaMemcpy d_labels failed", + __FILE__, __LINE__)) break; + + // TODO: code to extract other results here + + } else { + // multi-GPU extension code + } + } while (0); + + return retval; + } + + /** + * @brief Problem initialization + * + * @param[in] stream_from_host Whether to stream data from host. + * @param[in] graph Reference to the CSR graph object we process on. + * @param[in] _num_gpus Number of the GPUs used. + * + * \return cudaError_t object indicates the success of all CUDA functions. + */ + cudaError_t Init( + bool stream_from_host, // only meaningful for single-GPU + const Csr &graph, + int _num_gpus) { + num_gpus = _num_gpus; + nodes = graph.nodes; + edges = graph.edges; + VertexId *h_row_offsets = graph.row_offsets; + VertexId *h_column_indices = graph.column_indices; + + ProblemBase<_VertexId, _SizeT, false>::Init( + stream_from_host, + nodes, + edges, + h_row_offsets, + h_column_indices, + NULL, + NULL, + num_gpus); + + // no data in DataSlice needs to be copied from host + + /** + * Allocate output labels + */ + cudaError_t retval = cudaSuccess; + data_slices = new DataSlice * [num_gpus]; + d_data_slices = new DataSlice * [num_gpus]; + + do { + if (num_gpus <= 1) { + gpu_idx = (int*)malloc(sizeof(int)); + + // create a single data slice for the currently-set GPU + int gpu; + if (retval = util::GRError( + cudaGetDevice(&gpu), "Problem cudaGetDevice failed", + __FILE__, __LINE__)) break; + gpu_idx[0] = gpu; + + data_slices[0] = new DataSlice; + if (retval = util::GRError( + cudaMalloc((void**)&d_data_slices[0], sizeof(DataSlice)), + "Problem cudaMalloc d_data_slices failed", + __FILE__, __LINE__)) return retval; + + // create SoA on device + VertexId *d_labels; + if (retval = util::GRError( + cudaMalloc((void**)&d_labels, nodes * sizeof(VertexId)), + "Problem cudaMalloc d_labels failed", + __FILE__, __LINE__)) return retval; + data_slices[0]->d_labels = d_labels; + + bool *d_bitmask; + if (retval = util::GRError( + cudaMalloc((void**)&d_bitmask, nodes * sizeof(bool)), + "Problem cudaMalloc d_bitmask failed", + __FILE__, __LINE__)) return retval; + data_slices[0]->d_bitmask = d_bitmask; + util::MemsetKernel<<<128, 128>>>( + data_slices[0]->d_bitmask, (bool)false, nodes); + } + // add multi-GPU allocation code + } while (0); + + return retval; + } + + /** + * @brief Performs any initialization work needed for primitive + * @param[in] frontier_type Frontier type (i.e., edge / vertex / mixed) + * @param[in] queue_sizing Size scaling factor for work queue allocation + * \return cudaError_t object indicates the success of all CUDA functions. + */ + cudaError_t Reset( + FrontierType frontier_type, // type (i.e., edge / vertex / mixed) + double queue_sizing) { + // size scaling factor for work queue allocation (e.g., 1.0 creates + // n-element and m-element vertex and edge frontiers, respectively). + // 0.0 is unspecified. + + typedef ProblemBase<_VertexId, _SizeT, false> BaseProblem; + + // load ProblemBase Reset + BaseProblem::Reset(frontier_type, queue_sizing); + + cudaError_t retval = cudaSuccess; + + for (int gpu = 0; gpu < num_gpus; ++gpu) { + // setting device + if (retval = util::GRError( + cudaSetDevice(gpu_idx[gpu]), + "Problem cudaSetDevice failed", + __FILE__, __LINE__)) return retval; + + // allocate output labels if necessary + if (!data_slices[gpu]->d_labels) { + VertexId *d_labels; + if (retval = util::GRError( + cudaMalloc((void**)&d_labels, nodes * sizeof(VertexId)), + "Problem cudaMalloc d_labels failed", + __FILE__, __LINE__)) return retval; + data_slices[gpu]->d_labels = d_labels; + } + + util::MemsetKernel<<< 128, 128>>>( + data_slices[gpu]->d_labels, -1, nodes); + + if (!data_slices[gpu]->d_bitmask) { + bool *d_bitmask; + if (retval = util::GRError(cudaMalloc( + (void**)&d_bitmask, nodes * sizeof(bool)), + "MSTProblem cudaMalloc d_temp_value Failed", + __FILE__, __LINE__)) return retval; + data_slices[gpu]->d_bitmask = d_bitmask; + } + + if (retval = util::GRError( + cudaMemcpy(d_data_slices[gpu], + data_slices[gpu], + sizeof(DataSlice), + cudaMemcpyHostToDevice), + "Problem cudaMemcpy data_slices to d_data_slices failed", + __FILE__, __LINE__)) return retval; + } + + // TODO: fill in the initial input_queue for problem + // e.g., put every vertex in frontier queue + util::MemsetIdxKernel<<<128, 128>>>( + BaseProblem::graph_slices[0]->frontier_queues.d_keys[0], nodes); + + return retval; + } + + /** @} */ +}; + +} // namespace vis +} // namespace app +} // namespace gunrock + +// Leave this at the end of the file +// Local Variables: +// mode:c++ +// c-file-style: "NVIDIA" +// End: diff --git a/tests/vis/Makefile b/tests/vis/Makefile new file mode 100644 index 000000000..7931cd948 --- /dev/null +++ b/tests/vis/Makefile @@ -0,0 +1,108 @@ +# ----------------------------------------------------------------------------- +# Gunrock -- High-Performance Graph Primitives on GPU +# ----------------------------------------------------------------------------- +# This source code is distributed under the terms of LICENSE.TXT +# in the root directory of this source distribution. +# ----------------------------------------------------------------------------- +# Build script for project +# ----------------------------------------------------------------------------- + +force64 = 1 +NVCC = "$(shell which nvcc)" +NVCC_VERSION = $(strip $(shell nvcc --version | grep release | sed 's/.*release //' | sed 's/,.*//')) + +KERNELS = + +# detect OS +OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:]) + +# ----------------------------------------------------------------------------- +# Gen targets +# ----------------------------------------------------------------------------- + +GEN_SM35 = -gencode=arch=compute_35,code=\"sm_35,compute_35\" +GEN_SM30 = -gencode=arch=compute_30,code=\"sm_30,compute_30\" +SM_TARGETS = $(GEN_SM35) + +# ----------------------------------------------------------------------------- +# Libs +# ----------------------------------------------------------------------------- + + +# ----------------------------------------------------------------------------- +# Includes +# ----------------------------------------------------------------------------- + +CUDA_INC = "$(shell dirname $(NVCC))/../include" +MGPU_INC = "../../externals/moderngpu/include" +INC = -I$(CUDA_INC) -I$(MGPU_INC) -I.. -I../.. + +# ----------------------------------------------------------------------------- +# Defines +# ----------------------------------------------------------------------------- + +DEFINES = + +# ----------------------------------------------------------------------------- +# Compiler Flags +# ----------------------------------------------------------------------------- + +ifneq ($(force64), 1) + # Compile with 32-bit device pointers by default + ARCH_SUFFIX = i386 + ARCH = -m32 +else + ARCH_SUFFIX = x86_64 + ARCH = -m64 +endif + +NVCCFLAGS = -Xcudafe -\# + +ifeq (WIN_NT, $(findstring WIN_NT, $(OSUPPER))) + NVCCFLAGS += -Xcompiler /bigobj -Xcompiler /Zm500 +endif + + +ifeq ($(verbose), 1) + NVCCFLAGS += -v +endif + +ifeq ($(keep), 1) + NVCCFLAGS += -keep +endif + +ifdef maxregisters + NVCCFLAGS += -maxrregcount $(maxregisters) +endif + +# ----------------------------------------------------------------------------- +# Dependency Lists +# ----------------------------------------------------------------------------- + +DEPS = ./Makefile \ + $(wildcard ../../gunrock/util/*.cuh) \ + $(wildcard ../../gunrock/util/**/*.cuh) \ + $(wildcard ../../gunrock/*.cuh) \ + $(wildcard ../../gunrock/graphio/*.cuh) \ + $(wildcard ../../gunrock/oprtr/*.cuh) \ + $(wildcard ../../gunrock/oprtr/**/*.cuh) \ + $(wildcard ../../gunrock/app/*.cuh) \ + $(wildcard ../../gunrock/app/**/*.cuh) + +# ----------------------------------------------------------------------------- +# (make test) Test driver for +# ----------------------------------------------------------------------------- + +test: bin/test_vis_$(NVCC_VERSION)_$(ARCH_SUFFIX) + +bin/test_vis_$(NVCC_VERSION)_$(ARCH_SUFFIX) : test_vis.cu ../../gunrock/util/test_utils.cu ../../gunrock/util/error_utils.cu ../../externals/moderngpu/src/mgpucontext.cu ../../externals/moderngpu/src/mgpuutil.cpp $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_vis_$(NVCC_VERSION)_$(ARCH_SUFFIX) test_vis.cu ../../gunrock/util/test_utils.cu ../../gunrock/util/error_utils.cu ../../externals/moderngpu/src/mgpucontext.cu ../../externals/moderngpu/src/mgpuutil.cpp $(NVCCFLAGS) $(ARCH) $(INC) -O3 + +# ----------------------------------------------------------------------------- +# Clean +# ----------------------------------------------------------------------------- + +clean : + rm -f bin/*_$(NVCC_VERSION)_$(ARCH_SUFFIX)* + rm -f *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx *.hash *.cu.cpp *.o diff --git a/tests/vis/run.sh b/tests/vis/run.sh new file mode 100644 index 000000000..708cedfec --- /dev/null +++ b/tests/vis/run.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +# get all execution files in ./bin +files=(./bin/*) +# split file names into arr +arr=$(echo $files | tr " " "\n") +max_ver_num="$" +exe_file=${arr[0]} +# iterate over all file names to get the largest version number +for x in $arr +do + output=$(grep -o "[0-9]\.[0-9]" <<<"$x") + if [ "$output" \> "$max_ver_num" ]; then + exe_file=$x + fi +done + +# put OS and Device type here +SUFFIX="ubuntu12.04.k40c" + +mkdir -p eval/$SUFFIX + +for i in test_bc +do + echo $exe_file market ../../dataset/small/$i.mtx + $exe_file market ../../dataset/small/$i.mtx > eval/$SUFFIX/$i.$SUFFIX.txt + sleep 1 +done diff --git a/tests/vis/test_vis.cu b/tests/vis/test_vis.cu new file mode 100644 index 000000000..3584b9bff --- /dev/null +++ b/tests/vis/test_vis.cu @@ -0,0 +1,346 @@ +// ---------------------------------------------------------------------------- +// Gunrock -- High-Performance Graph Primitives on GPU +// ---------------------------------------------------------------------------- +// This source code is distributed under the terms of LICENSE.TXT +// in the root directory of this source distribution. +// ---------------------------------------------------------------------------- + +/** + * @file test_vis.cuh + * @brief Simple test driver program for Vertex-Induced Subgraph + */ + +#include +#include +#include +#include +#include + +// utilities for correctness checking +#include + +// graph construction utilities +#include + +// primitive-specific headers include +#include +#include +#include + +// gunrock abstraction graph operators +#include +#include + +#include + +using namespace gunrock; +using namespace gunrock::util; +using namespace gunrock::oprtr; +using namespace gunrock::app::vis; + +// ---------------------------------------------------------------------------- +// Defines, constants, globals +// ---------------------------------------------------------------------------- + +bool g_verbose; +bool g_undirected; +bool g_quick; +bool g_stream_from_host; + +// ---------------------------------------------------------------------------- +// Housekeeping Routines +// ---------------------------------------------------------------------------- +void Usage() { + printf( + " test_vis [--undirected] [--quick]\n" + " [--device=] [--instrumented] [--iteration-num=]\n" + " [--v] [--traversal-mode=<0|1>] [--queue-sizing=]\n" + "Graph types and arguments:\n" + " market \n" + " Reads a Matrix-Market coordinate-formatted graph,\n" + " edges from STDIN (or from the optionally-specified file)\n" + " --device= Set GPU device to run. [Default: 0]\n" + " --undirected Convert the graph to undirected\n" + " --instrumented Keep kernels statics [Default: Disable]\n" + " total_queued, search_depth and avg_duty\n" + " (a relative indicator of load imbalance)\n" + " --quick Skip the CPU validation [Default: false]\n" + " --queue-sizing= Allocates a frontier queue sized at: \n" + " (graph-edges * ) [Default: 1.0]\n" + " --v Print verbose per iteration debug info\n" + " --iteration-num= Number of tests to run [Default: 1]\n" + " --traversal-mode=<0 | 1> Set strategy, 0 for Load-Balanced,\n" + " 1 for Dynamic-Cooperative\n" + " [Default: according to topology]\n"); +} + +/** + * @brief Displays primitive result + * + * @tparam VertexId + * @tparam SizeT + * @tparam Value + */ +template +void DisplaySolution(const Csr &graph) { + // TODO: code to print out results +} + + +/** + * @brief Performance / Evaluation statistics + */ +struct Stats { + const char *name; + Statistic num_iterations; + Stats() : name(NULL), num_iterations() {} + explicit Stats(const char *name) : name(name), num_iterations() {} +}; + +/** + * @brief Displays timing and correctness statistics + * + * @tparam VertexId + * @tparam SizeT + * @tparam Value + * + * @param[in] stats Reference to the Stats object + * @param[in] graph Reference to the CSR graph we process on + */ +template +void DisplayStats(const Stats &stats, const Csr &graph, + const float elapsed, const long long iterations) { + printf("[%s] finished.\n", stats.name); + printf("elapsed: %.4f ms\n", elapsed); +} + +// ---------------------------------------------------------------------------- +// Testing Routines +// ---------------------------------------------------------------------------- + +/** + * @brief A simple CPU-based reference implementation. + * + * @tparam VertexId + * @tparam SizeT + * @tparam Value + * + * @param[in] graph Reference to the CSR graph we process on + */ +template +void SimpleReference(const Csr &graph) { + // initialization + + // perform calculation + + CpuTimer cpu_timer; + cpu_timer.Start(); + + // TODO: CPU validation code here + + cpu_timer.Stop(); + + float cpu_elapsed = cpu_timer.ElapsedMillis(); + printf("CPU reference finished in %lf ms.\n\n", cpu_elapsed); +} + +/** + * @brief Sample test + * + * @tparam VertexId + * @tparam SizeT + * @tparam Value + * + * @param[in] graph Reference to the CSR graph we process on + * @param[in] max_grid_size Maximum CTA occupancy + * @param[in] num_gpus Number of GPUs + * @param[in] max_queue_sizing Scaling factor used in edge mapping + * @param[in] iterations Number of iterations for running the test + * @param[in] traversal_mode Strategy: Load-balanced or Dynamic cooperative + * @param[in] context CudaContext pointer for ModernGPU APIs + * + */ +template +void RunTest( + const Csr &graph, + int max_grid_size, + int num_gpus, + double max_queue_sizing, + int iterations, + int traversal_mode, + CudaContext& context) { + typedef VISProblem Problem; + + // allocate host-side array (for both reference and GPU-computed results) + VertexId *r_labels = (VertexId*)malloc(sizeof(VertexId) * graph.nodes); + VertexId *h_labels = (VertexId*)malloc(sizeof(VertexId) * graph.nodes); + + // allocate primitive enactor map + VISEnactor enactor(g_verbose); + + // allocate primitive problem on GPU + Problem *csr_problem = new Problem; + util::GRError(csr_problem->Init( + g_stream_from_host, + graph, + num_gpus), + "Problem Initialization Failed", __FILE__, __LINE__); + + Stats *stats = new Stats("Vertex-Induced Subgraph"); + + // perform calculation + GpuTimer gpu_timer; + + float elapsed = 0.0f; + + for (int iter = 0; iter < iterations; ++iter) { + util::GRError( + csr_problem->Reset(enactor.GetFrontierType(), + max_queue_sizing), + "Problem Data Reset Failed", __FILE__, __LINE__); + gpu_timer.Start(); + util::GRError( + enactor.template Enact(context, csr_problem, + max_grid_size, traversal_mode), + "Problem Enact Failed", __FILE__, __LINE__); + gpu_timer.Stop(); + elapsed += gpu_timer.ElapsedMillis(); + } + + elapsed /= iterations; + + // extract results + util::GRError(csr_problem->Extract(h_labels), + "Problem Data Extraction Failed", __FILE__, __LINE__); + + // compute reference CPU validation solution + if (!g_quick) { + printf("-- computing reference value ... (currently missing)\n"); + SimpleReference(graph); + printf("-- validation: (currently missing)\n"); + } + + // display solution + DisplaySolution(graph); + + // display statistics + VertexId num_iteratios = 0; + enactor.GetStatistics(num_iteratios); + DisplayStats(*stats, graph, elapsed, num_iteratios); + + // clean up + delete stats; + if (csr_problem) delete csr_problem; + if (r_labels) free(r_labels); + if (h_labels) free(h_labels); + + cudaDeviceSynchronize(); +} + +/** + * @brief Test entry + * + * @tparam VertexId + * @tparam SizeT + * @tparam Value + * + * @param[in] graph Reference to the CSR graph we process on + * @param[in] args Reference to the command line arguments + * @param[in] context CudaContext pointer for ModernGPU APIs + */ +template +void RunTest( + Csr &graph, + CommandLineArgs &args, + CudaContext& context) { + bool instrumented = 0; // Collect instrumentation from kernels + int max_grid_size = 0; // Maximum grid size (0: up to the enactor) + int num_gpus = 1; // Number of GPUs for multi-GPU enactor + double max_queue_sizing = 1.0; // Maximum scaling factor for work queues + int iterations = 1; // Number of runs for testing + int traversal_mode = -1; // Load-balanced or Dynamic cooperative + g_quick = 0; // Whether or not to skip CPU validation + + // choose traversal mode + args.GetCmdLineArgument("traversal-mode", traversal_mode); + if (traversal_mode == -1) { + traversal_mode = graph.GetAverageDegree() > 8 ? 0 : 1; + } + + g_verbose = args.CheckCmdLineFlag("v"); + instrumented = args.CheckCmdLineFlag("instrumented"); + g_quick = args.CheckCmdLineFlag("quick"); + + args.GetCmdLineArgument("iteration-num", iterations); + args.GetCmdLineArgument("grid-size", max_grid_size); + args.GetCmdLineArgument("queue-sizing", max_queue_sizing); + + if (instrumented) { + RunTest( + graph, + max_grid_size, + num_gpus, + max_queue_sizing, + iterations, + traversal_mode, + context); + } else { + RunTest( + graph, + max_grid_size, + num_gpus, + max_queue_sizing, + iterations, + traversal_mode, + context); + } +} + +// ---------------------------------------------------------------------------- +// Main +// ---------------------------------------------------------------------------- +int main(int argc, char** argv) { + CommandLineArgs args(argc, argv); + if ((argc < 2) || (args.CheckCmdLineFlag("help"))) { + Usage(); + return 1; + } + + int device = 0; + args.GetCmdLineArgument("device", device); + ContextPtr context = mgpu::CreateCudaDevice(device); + + // parse graph-construction parameters + g_undirected = args.CheckCmdLineFlag("undirected"); + + std::string graph_type = argv[1]; + int flags = args.ParsedArgc(); + int graph_args = argc - flags - 1; + if (graph_args < 1) { + Usage(); + return 1; + } + + typedef int VertexId; // Use as the vertex identifier + typedef int SizeT; // Use as the graph size type + typedef int Value; // Use as the value type + + if (graph_type == "market") { + // matrix-market coordinate-formatted graph + Csr csr(false); + char *name = (graph_args == 2) ? argv[2] : NULL; + if (graphio::BuildMarketGraph( + name, csr, g_undirected, false) != 0) { + return 1; + } + + csr.DisplayGraph(); // display graph adjacent list + csr.PrintHistogram(); // display graph histogram + RunTest(csr, args, *context); // run sample test + + } else { + fprintf(stderr, "Unspecified graph type\n"); + return 1; + } + return 0; +} From b61303a6aa5227be39707a5e214442c9d37db3e7 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Fri, 19 Jun 2015 11:35:23 -0700 Subject: [PATCH 18/36] cache binary csr, way much faster for testing graph i/o --- gunrock/coo.cuh | 9 ++-- gunrock/csr.cuh | 86 ++++++++++++++------------------------ gunrock/graphio/market.cuh | 27 ++++++------ 3 files changed, 46 insertions(+), 76 deletions(-) diff --git a/gunrock/coo.cuh b/gunrock/coo.cuh index e6b585a4c..008dec22b 100644 --- a/gunrock/coo.cuh +++ b/gunrock/coo.cuh @@ -37,8 +37,7 @@ struct Coo { Coo(VertexId row, VertexId col, Value val) : row(row), col(col), val(val) {} - void Val(Value &value) - { + void Val(Value &value) { value = val; } }; @@ -71,8 +70,7 @@ struct Coo { template bool RowFirstTupleCompare ( Coo elem1, - Coo elem2) -{ + Coo elem2) { if (elem1.row < elem2.row) { // Sort edges by source node return true; @@ -97,8 +95,7 @@ bool RowFirstTupleCompare ( template bool ColumnFirstTupleCompare ( Coo elem1, - Coo elem2) -{ + Coo elem2) { if (elem1.col < elem2.col) { // Sort edges by source node return true; diff --git a/gunrock/csr.cuh b/gunrock/csr.cuh index d4d5da2ff..0c2f46373 100644 --- a/gunrock/csr.cuh +++ b/gunrock/csr.cuh @@ -129,30 +129,22 @@ struct Csr { /** * - * @brief Store graph information into files + * @brief Store graph information into a file * */ - void WriteToFile( - char *file_name, - bool undirected, - bool reversed, - SizeT num_nodes, - SizeT num_edges, - SizeT *row_offsets, - VertexId *col_indices, - Value *edge_values = NULL) { - std::ofstream output(file_name); - if (output.is_open()) { - output << num_nodes << " " << num_edges << " "; - std::copy(row_offsets, row_offsets + num_nodes + 1, - std::ostream_iterator(output, " ")); - std::copy(column_indices, column_indices + num_edges, - std::ostream_iterator(output, " ")); + void WriteToFile(char *file_name, SizeT v, SizeT e, SizeT *row, + VertexId *col, Value *edge_values = NULL) { + std::ofstream fout(file_name); + if (fout.is_open()) { + fout.write(reinterpret_cast(&v), sizeof(SizeT)); + fout.write(reinterpret_cast(&e), sizeof(SizeT)); + fout.write(reinterpret_cast(row), (v+1)*sizeof(SizeT)); + fout.write(reinterpret_cast(col), e*sizeof(VertexId)); if (edge_values != NULL) { - std::copy(edge_values, edge_values + num_edges, - std::ostream_iterator(output, " ")); + fout.write(reinterpret_cast(edge_values), + e * sizeof(Value)); } - output.close(); + fout.close(); } } @@ -162,40 +154,26 @@ struct Csr { * */ template - void FromCsr(char *f_in, bool undirected, bool reversed) { - printf(" Reading directly from previously stored CSR arrays ...\n"); - - std::ifstream _file(f_in); - - if (_file.is_open()) { - time_t mark1 = time(NULL); - - std::istream_iterator start(_file), end; - std::vector v(start, end); - - SizeT csr_nodes = v[0]; - SizeT csr_edges = v[1]; - + void FromCsr(char *f_in) { + printf(" Reading directly from stored binary CSR arrays ...\n"); + time_t mark1 = time(NULL); - FromScratch(csr_nodes, csr_edges); + std::ifstream input(f_in); + SizeT v, e; + input.read(reinterpret_cast(&v), sizeof(SizeT)); + input.read(reinterpret_cast(&e), sizeof(SizeT)); - std::copy(v.begin() + 2, v.begin() + 3 + csr_nodes, row_offsets); - std::copy(v.begin() + 3 + csr_nodes, - v.begin() + 3 + csr_nodes + csr_edges, - column_indices); - if (LOAD_EDGE_VALUES) { - std::copy(v.begin() + 3 + csr_nodes + csr_edges, - v.end(), edge_values); - } + FromScratch(v, e); - time_t mark2 = time(NULL); - printf("Done reading (%ds).\n", (int) (mark2 - mark1)); - - v.clear(); - } else { - perror("Unable To Open The File."); + input.read(reinterpret_cast(row_offsets), (v + 1)*sizeof(SizeT)); + input.read(reinterpret_cast(column_indices), e*sizeof(VertexId)); + if (LOAD_EDGE_VALUES) { + input.read(reinterpret_cast(edge_values), e*sizeof(Value)); } + time_t mark2 = time(NULL); + printf("Done reading (%ds).\n", (int) (mark2 - mark1)); + // compute out_nodes SizeT out_node = 0; for (SizeT node = 0; node < nodes; node++) { @@ -286,15 +264,14 @@ struct Csr { // Write offsets, indices, node, edges etc. into file if (LOAD_EDGE_VALUES) { - WriteToFile(output_file, undirected, reversed, nodes, edges, + WriteToFile(output_file, nodes, edges, row_offsets, column_indices, edge_values); } else { - WriteToFile(output_file, undirected, reversed, nodes, edges, + WriteToFile(output_file, nodes, edges, row_offsets, column_indices); } if (new_coo) free(new_coo); - fflush(stdout); // Compute out_nodes SizeT out_node = 0; @@ -342,7 +319,6 @@ struct Csr { } printf("\nDegree Histogram (%lld vertices, %lld edges):\n", (long long) nodes, (long long) edges); - printf(" Degree 0: %d (%.2f%%)\n", log_counts[0], (float) log_counts[0] * 100.0 / nodes); for (int i = 0; i < max_log_length + 1; i++) { @@ -369,7 +345,7 @@ struct Csr { edge++) { printf("["); util::PrintValue(column_indices[edge]); - if (with_edge_value) { + if (with_edge_value && edge_values != NULL) { printf(","); util::PrintValue(edge_values[edge]); } @@ -438,7 +414,7 @@ struct Csr { double mean = 0, count = 0; for (SizeT node = 0; node < nodes; ++node) { count += 1; - mean += (row_offsets[node + 1] - row_offsets[node] - mean) / count; + mean += (row_offsets[node+1]-row_offsets[node]-mean)/count; } average_degree = static_cast(mean); } diff --git a/gunrock/graphio/market.cuh b/gunrock/graphio/market.cuh index 74284f065..2fd7e92a1 100644 --- a/gunrock/graphio/market.cuh +++ b/gunrock/graphio/market.cuh @@ -215,12 +215,9 @@ int ReadMarketStream( * */ template -int ReadCsrArrays( - char *f_in, - Csr &csr_graph, - bool undirected, - bool reversed) { - csr_graph.template FromCsr(f_in, undirected, reversed); +int ReadCsrArrays(char *f_in, Csr &csr_graph, + bool undirected, bool reversed) { + csr_graph.template FromCsr(f_in); return 0; } @@ -301,19 +298,19 @@ int BuildMarketGraph( char *file_name = basename(temp2); if (undirected) { - char ud[256]; - sprintf(ud, "%s/.%s_undirected_csr", file_path, file_name); - if (BuildMarketGraph(file_in, ud, graph, true, false) != 0) + char ud[256]; // undirected graph + sprintf(ud, "%s/.%s.ud.bin", file_path, file_name); + if (BuildMarketGraph(file_in, ud, graph, true, false) != 0) return 1; } else if (!undirected && reversed) { - char rv[256]; - sprintf(rv, "%s/.%s_reversed_csr", file_path, file_name); - if (BuildMarketGraph(file_in, rv, graph, false, true) != 0) + char rv[256]; // reversed graph + sprintf(rv, "%s/.%s.rv.bin", file_path, file_name); + if (BuildMarketGraph(file_in, rv, graph, false, true) != 0) return 1; } else if (!undirected && !reversed) { - char nr[256]; - sprintf(nr, "%s/.%s_nonreversed_csr", file_path, file_name); - if (BuildMarketGraph(file_in, nr, graph, false, false) != 0) + char di[256]; // directed graph + sprintf(di, "%s/.%s.di.bin", file_path, file_name); + if (BuildMarketGraph(file_in, di, graph, false, false) != 0) return 1; } else { fprintf(stderr, "Unspecified Graph Type.\n"); From b51ee68af6ecefb060bda96a1357aad64fa9fff5 Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Fri, 19 Jun 2015 14:17:45 -0700 Subject: [PATCH 19/36] Almost fixed the BC bug. still occasionally get early quit in advance kernel. Need to figure out why. --- gunrock/app/bc/bc_enactor.cuh | 90 +------------------ gunrock/app/bc/bc_functor.cuh | 1 - gunrock/oprtr/edge_map_partitioned/kernel.cuh | 23 ++--- gunrock/util/test_utils.cuh | 5 +- tests/bc/test_bc.cu | 8 +- 5 files changed, 19 insertions(+), 108 deletions(-) diff --git a/gunrock/app/bc/bc_enactor.cuh b/gunrock/app/bc/bc_enactor.cuh index 24b4abe99..6cc6436c2 100644 --- a/gunrock/app/bc/bc_enactor.cuh +++ b/gunrock/app/bc/bc_enactor.cuh @@ -257,12 +257,9 @@ class BCEnactor : public EnactorBase //util::MemsetAddKernel<<<128, 128>>>(d_scanned_edges, (unsigned int)0, graph_slice->edges); // Forward BC iteration while (done[0] < 0) { - if (frontier_attribute.queue_length > 0 && enactor_stats.iteration > 0) { SizeT cur_offset = forward_queue_offsets.top(); - //printf("offset:%d, current length:%d\n", cur_offset, frontier_attribute.queue_length); util::MemsetCopyVectorKernel<<<128, 128>>>(&problem->data_slices[0]->d_forward_output[cur_offset], graph_slice->frontier_queues.d_keys[frontier_attribute.selector], frontier_attribute.queue_length); - //util::DisplayDeviceResults(graph_slice->frontier_queues.d_keys[frontier_attribute.selector], frontier_attribute.queue_length); forward_queue_offsets.push(frontier_attribute.queue_length+cur_offset); } @@ -374,93 +371,14 @@ class BCEnactor : public EnactorBase if (DEBUG) printf("\n%lld", (long long) enactor_stats.iteration); } - //delete[] sigmas; - //delete[] labels; - //delete[] vids; - //util::DisplayDeviceResults(problem->data_slices[0]->d_forward_output, forward_queue_offsets.back()); - - /*enactor_stats.iteration = enactor_stats.iteration - 2; - - frontier_attribute.queue_length = graph_slice->nodes; - frontier_attribute.queue_index = 0; // Work queue index - frontier_attribute.selector = 0; frontier_attribute.queue_reset = true; - done[0] = -1; - - // Prepare the label array - VertexId label_adjust = -enactor_stats.iteration; - util::MemsetAddKernel<<<128, 128>>>(problem->data_slices[0]->d_labels, label_adjust, graph_slice->nodes);*/ - if (DEBUG) printf("\nStart backward phase\n%lld", (long long) enactor_stats.iteration); // Backward BC iteration SizeT top_offset = forward_queue_offsets.top(); - if (DEBUG) printf("top offsets:%d\n", top_offset); forward_queue_offsets.pop(); while (!forward_queue_offsets.empty()) { frontier_attribute.queue_length = top_offset-forward_queue_offsets.top(); - util::DisplayDeviceResults(problem->data_slices[0]->d_sigmas, &problem->data_slices[0]->d_forward_output[forward_queue_offsets.top()], graph_slice->nodes, frontier_attribute.queue_length); - /*frontier_attribute.queue_length = graph_slice->nodes; - // Fill in the frontier_queues - util::MemsetIdxKernel<<<128, 128>>>(graph_slice->frontier_queues.d_keys[0], graph_slice->nodes); - - // Filter - gunrock::oprtr::filter::Kernel - <<>>( - -1, - frontier_attribute.queue_reset, - frontier_attribute.queue_index, - enactor_stats.num_gpus, - frontier_attribute.queue_length, - d_done, - graph_slice->frontier_queues.d_keys[0], // d_in_queue - NULL, - graph_slice->frontier_queues.d_keys[1], // d_out_queue - data_slice, - NULL, - work_progress, - graph_slice->nodes, // max_in_queue - graph_slice->edges, // max_out_queue - enactor_stats.filter_kernel_stats);*/ - - - // Only need to reset queue for once - /*if (frontier_attribute.queue_reset) - frontier_attribute.queue_reset = false; */ - - //if (/*DEBUG &&*/ (retval = util::GRError(cudaThreadSynchronize(), "edge_map_backward::Kernel failed", __FILE__, __LINE__))) break; - /*cudaEventQuery(throttle_event); // give host memory mapped visibility to GPU updates - - frontier_attribute.queue_index++; - frontier_attribute.selector ^= 1; - - if (AdvanceKernelPolicy::ADVANCE_MODE == gunrock::oprtr::advance::LB) { - if (retval = work_progress.GetQueueLength(frontier_attribute.queue_index, frontier_attribute.queue_length)) break; - } - - if (DEBUG) { - if (retval = work_progress.GetQueueLength(frontier_attribute.queue_index, frontier_attribute.queue_length)) break; - printf(", %lld", (long long) frontier_attribute.queue_length); - } - - if (INSTRUMENT) { - if (retval = enactor_stats.advance_kernel_stats.Accumulate( - enactor_stats.advance_grid_size, - enactor_stats.total_runtimes, - enactor_stats.total_lifetimes)) break; - } - - // Throttle - if (enactor_stats.iteration & 1) { - if (retval = util::GRError(cudaEventRecord(throttle_event), - "BCEnactor cudaEventRecord throttle_event failed", __FILE__, __LINE__)) break; - } else { - if (retval = util::GRError(cudaEventSynchronize(throttle_event), - "BCEnactor cudaEventSynchronize throttle_event failed", __FILE__, __LINE__)) break; - } - - // Check if done - if (done[0] == 0) break;*/ // Edge Map if (forward_queue_offsets.top() > 0) { gunrock::oprtr::advance::LaunchKernel( @@ -513,11 +431,6 @@ class BCEnactor : public EnactorBase if (DEBUG && (retval = util::GRError(cudaThreadSynchronize(), "filter_forward::Kernel failed", __FILE__, __LINE__))) break; cudaEventQuery(throttle_event); // give host memory mapped visibility to GPU updates - //frontier_attribute.queue_index++; - //frontier_attribute.selector ^= 1; - - //util::MemsetAddKernel<<<128, 128>>>(problem->data_slices[0]->d_labels, 1, graph_slice->nodes); - if (INSTRUMENT || DEBUG) { if (retval = work_progress.GetQueueLength(frontier_attribute.queue_index, frontier_attribute.queue_length)) break; if (INSTRUMENT) { @@ -529,7 +442,6 @@ class BCEnactor : public EnactorBase } top_offset = forward_queue_offsets.top(); forward_queue_offsets.pop(); - if (DEBUG) printf("top offsets:%d\n", top_offset); } if (retval) break; @@ -593,7 +505,7 @@ class BCEnactor : public EnactorBase BCProblem, // Problem data type 300, // CUDA_ARCH INSTRUMENT, // INSTRUMENT - 8, // MIN_CTA_OCCUPANCY + 1, // MIN_CTA_OCCUPANCY 10, // LOG_THREADS 8, // LOG_BLOCKS 32*128, // LIGHT_EDGE_THRESHOLD (used for partitioned advance mode) diff --git a/gunrock/app/bc/bc_functor.cuh b/gunrock/app/bc/bc_functor.cuh index 6b8e594d6..2e8ed3fbc 100644 --- a/gunrock/app/bc/bc_functor.cuh +++ b/gunrock/app/bc/bc_functor.cuh @@ -162,7 +162,6 @@ struct BackwardFunctor */ static __device__ __forceinline__ bool CondEdge(VertexId s_id, VertexId d_id, DataSlice *problem, VertexId e_id = 0, VertexId e_id_in = 0) { - VertexId s_label; VertexId d_label; util::io::ModifiedLoad::Ld( diff --git a/gunrock/oprtr/edge_map_partitioned/kernel.cuh b/gunrock/oprtr/edge_map_partitioned/kernel.cuh index 55e2a53b4..ba7653589 100644 --- a/gunrock/oprtr/edge_map_partitioned/kernel.cuh +++ b/gunrock/oprtr/edge_map_partitioned/kernel.cuh @@ -272,15 +272,12 @@ struct Dispatch my_thread_start = bid * partition_size; my_thread_end = (bid+1)*partition_size < output_queue_len ? (bid+1)*partition_size : output_queue_len; - //printf("tid:%d, bid:%d, m_thread_start:%d, m_thread_end:%d\n",tid, bid, my_thread_start, my_thread_end); if (my_thread_start >= output_queue_len) return; int my_start_partition = partition_starts[bid]; int my_end_partition = partition_starts[bid+1] > input_queue_len ? partition_starts[bid+1] : input_queue_len; - //if (tid == 0 && bid == 252) - // printf("bid(%d) < num_partitions-1(%d)?, partition_starts[bid+1]+1:%d\n", bid, num_partitions-1, partition_starts[bid+1]+1); __shared__ typename KernelPolicy::SmemStorage smem_storage; // smem_storage.s_edges[NT] @@ -302,8 +299,6 @@ struct Dispatch __syncthreads(); s_edges[tid] = (my_start_partition + tid < my_end_partition ? d_scanned_edges[my_start_partition + tid] - pre_offset : max_edges); - //if (bid == 252 && tid == 2) - // printf("start_partition+tid:%d < my_end_partition:%d ?, d_queue[%d]:%d\n", my_start_partition+tid, my_end_partition, my_start_partition+tid, d_queue[my_start_partition+tid]); if (ADVANCE_TYPE == gunrock::oprtr::advance::V2V || ADVANCE_TYPE == gunrock::oprtr::advance::V2E) { s_vertices[tid] = my_start_partition + tid < my_end_partition ? d_queue[my_start_partition+tid] : -1; s_edge_ids[tid] = 0; @@ -553,12 +548,12 @@ struct Dispatch } // Determine work decomposition - if (blockIdx.x == 0 && threadIdx.x == 0) { + if (blockIdx.x == 0 && threadIdx.x == 0) { // obtain problem size if (queue_reset) { - work_progress.StoreQueueLength(input_queue_len, queue_index); + //work_progress.StoreQueueLength(input_queue_len, queue_index); } else { @@ -568,17 +563,17 @@ struct Dispatch if (input_queue_len == 0) { if (d_done) d_done[0] = input_queue_len; } - } + } work_progress.Enqueue(output_queue_len, queue_index+1); // Reset our next outgoing queue counter to zero work_progress.template StoreQueueLength(0, queue_index + 2); work_progress.template PrepResetSteal(queue_index + 1); - } + } // Barrier to protect work decomposition - __syncthreads(); + __syncthreads(); unsigned int range = input_queue_len; int tid = threadIdx.x; @@ -606,16 +601,16 @@ struct Dispatch else s_vertices[tid] = (my_id < range ? d_column_indices[d_queue[my_id]] : max_vertices); s_edge_ids[tid] = (my_id < range ? d_queue[my_id] : max_vertices); - } + } __syncthreads(); - unsigned int size = s_edges[end_id]; + unsigned int size = s_edges[end_id]; VertexId v, e, e_id; int v_index = BinarySearch(tid, s_edges); v = s_vertices[v_index]; e_id = s_edge_ids[v_index]; - int end_last = (v_index < KernelPolicy::THREADS ? s_edges[v_index] : max_vertices); + int end_last = (v_index < KernelPolicy::THREADS ? s_edges[v_index] : max_vertices); for (int i = tid; i < size; i += KernelPolicy::THREADS) { @@ -713,7 +708,7 @@ struct Dispatch } } } - } else { + } else { //v:pre, u:neighbor, outoffset:offset+i if (Functor::CondEdge(v, u, problem, lookup, e_id)) { Functor::ApplyEdge(v, u, problem, lookup, e_id); diff --git a/gunrock/util/test_utils.cuh b/gunrock/util/test_utils.cuh index 93f223642..11eff1ddb 100644 --- a/gunrock/util/test_utils.cuh +++ b/gunrock/util/test_utils.cuh @@ -446,7 +446,8 @@ int CompareResults( is_right = false; } } - if (!is_right && flag == 0) + + if (!is_right) { printf("\nINCORRECT: [%lu]: ", (unsigned long) i); PrintValue(computed[i]); @@ -473,7 +474,7 @@ int CompareResults( flag += 1; //return flag; } - if (!is_right && flag > 0) flag += 1; + //if (!is_right && flag > 0) flag += 1; } printf("\n"); if (!flag) diff --git a/tests/bc/test_bc.cu b/tests/bc/test_bc.cu index 09a3d1f2d..a3d2f5514 100644 --- a/tests/bc/test_bc.cu +++ b/tests/bc/test_bc.cu @@ -287,9 +287,12 @@ void RefCPUBC( for (int iter = search_depth - 2; iter > 0; --iter) { + + int cur_level = 0; for (int node = 0; node < graph.nodes; ++node) { if (source_path[node] == iter) { + ++cur_level; int edges_begin = graph.row_offsets[node]; int edges_end = graph.row_offsets[node+1]; @@ -306,9 +309,7 @@ void RefCPUBC( } for (int i = 0; i < graph.nodes; ++i) - { bc_values[i] *= 0.5f; - } cpu_timer.Stop(); float elapsed = cpu_timer.ElapsedMillis(); @@ -433,6 +434,7 @@ void RunTests( gpu_timer.Start(); for (VertexId i = start_src; i < end_src; ++i) { + printf("src:%d\n", i); util::GRError(csr_problem->Reset(i, bc_enactor.GetFrontierType(), max_queue_sizing), "BC Problem Data Reset Failed", __FILE__, __LINE__); util::GRError(bc_enactor.template Enact(context, csr_problem, i, max_grid_size), "BC Problem Enact Failed", __FILE__, __LINE__); } @@ -615,7 +617,9 @@ int main( int argc, char** argv) csr.PrintHistogram(); //csr.DisplayGraph(); + csr.DisplayNeighborList(1263); fflush(stdout); + printf("1263 row offsets:%d\n", csr.row_offsets[1263]); // Run tests RunTests(csr, args, *context); From 0f0e19c0cb481e7de51acdb292d1ecaa6c180b5c Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Fri, 19 Jun 2015 17:53:31 -0700 Subject: [PATCH 20/36] go back to prev timer which correct on large dataset --- gunrock/util/test_utils.h | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/gunrock/util/test_utils.h b/gunrock/util/test_utils.h index 8c5e9d573..c2433c00c 100644 --- a/gunrock/util/test_utils.h +++ b/gunrock/util/test_utils.h @@ -19,7 +19,6 @@ #undef small // Windows is terrible for polluting macro namespace #else #include - #include #endif #include @@ -217,34 +216,6 @@ struct CpuTimer return (stop - start) * 1000; } -#elif defined(CLOCK_PROCESS_CPUTIME_ID) - - timespec start; - timespec stop; - - void Start() - { - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start); - } - - void Stop() - { - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &stop); - } - - float ElapsedMillis() - { - timespec temp; - if ((stop.tv_nsec-start.tv_nsec)<0) { - temp.tv_sec = stop.tv_sec-start.tv_sec-1; - temp.tv_nsec = 1000000000+stop.tv_nsec-start.tv_nsec; - } else { - temp.tv_sec = stop.tv_sec-start.tv_sec; - temp.tv_nsec = stop.tv_nsec-start.tv_nsec; - } - return temp.tv_nsec/1000000.0; - } - #else rusage start; From 767ca47649a8db6f9b05185689ba71d1f1ee9085 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Sat, 20 Jun 2015 09:02:49 -0700 Subject: [PATCH 21/36] oops, wrong value type for cpu validation code --- tests/mst/test_mst.cu | 10 +++++----- tests/sssp/test_sssp.cu | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/mst/test_mst.cu b/tests/mst/test_mst.cu index d74e89233..b563517ac 100644 --- a/tests/mst/test_mst.cu +++ b/tests/mst/test_mst.cu @@ -169,7 +169,7 @@ Value SimpleReferenceMST( // Kruskal minimum spanning tree preparations using namespace boost; typedef adjacency_list< vecS, vecS, undirectedS, - no_property, property > Graph; + no_property, property > Graph; typedef graph_traits < Graph >::edge_descriptor Edge; typedef graph_traits < Graph >::vertex_descriptor Vertex; typedef std::pair E; @@ -312,14 +312,14 @@ void RunTests( // print the edge pairs in the minimum spanning tree DisplaySolution(graph, h_mst_output); printf("\nCORRECT.\n"); - std::cout << "CPU Computed Total Weight = " << total_weight_cpu << std::endl; - std::cout << "GPU Computed Total Weight = " << total_weight_gpu << std::endl; + std::cout << "CPU Total Weight = " << total_weight_cpu << std::endl; + std::cout << "GPU Total Weight = " << total_weight_gpu << std::endl; } else { printf("INCORRECT.\n"); - std::cout << "CPU Computed Total Weight = " << total_weight_cpu << std::endl; - std::cout << "GPU Computed Total Weight = " << total_weight_gpu << std::endl; + std::cout << "CPU Total Weight = " << total_weight_cpu << std::endl; + std::cout << "GPU Total Weight = " << total_weight_gpu << std::endl; } } diff --git a/tests/sssp/test_sssp.cu b/tests/sssp/test_sssp.cu index b084842b6..2bc204495 100644 --- a/tests/sssp/test_sssp.cu +++ b/tests/sssp/test_sssp.cu @@ -247,7 +247,7 @@ void SimpleReferenceSssp( // Prepare Boost Datatype and Data structure typedef adjacency_list > Graph; + property > Graph; typedef graph_traits::vertex_descriptor vertex_descriptor; typedef graph_traits::edge_descriptor edge_descriptor; From 29ce8a000ce8920583e2ce694555435994aad5ce Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Mon, 22 Jun 2015 08:34:37 -0700 Subject: [PATCH 22/36] BC works correctly now. disabled edge bc for now though. --- gunrock/oprtr/edge_map_partitioned/kernel.cuh | 2 +- tests/bc/test_bc.cu | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/gunrock/oprtr/edge_map_partitioned/kernel.cuh b/gunrock/oprtr/edge_map_partitioned/kernel.cuh index ba7653589..bce9a13b2 100644 --- a/gunrock/oprtr/edge_map_partitioned/kernel.cuh +++ b/gunrock/oprtr/edge_map_partitioned/kernel.cuh @@ -553,7 +553,7 @@ struct Dispatch // obtain problem size if (queue_reset) { - //work_progress.StoreQueueLength(input_queue_len, queue_index); + work_progress.StoreQueueLength(input_queue_len, queue_index); } else { diff --git a/tests/bc/test_bc.cu b/tests/bc/test_bc.cu index a3d2f5514..1eb3e6c20 100644 --- a/tests/bc/test_bc.cu +++ b/tests/bc/test_bc.cu @@ -225,7 +225,7 @@ void RefCPUBC( for (idx = 0; idx < graph.edges; ++idx) { //std::cout << coo[idx].row << "," << coo[idx].col << ":" << coo[idx].val << std::endl; - ebc_values[idx] = coo[idx].val; + //ebc_values[idx] = coo[idx].val; } printf("CPU BC finished in %lf msec.", elapsed); @@ -369,7 +369,7 @@ void RunTests( Value *h_bc_values = (Value*)malloc(sizeof(Value) * graph.nodes); Value *h_ebc_values = (Value*)malloc(sizeof(Value) * graph.edges); Value *reference_check_bc_values = (g_quick) ? NULL : reference_bc_values; - Value *reference_check_ebc_values = (g_quick || (src != -1)) ? NULL : reference_ebc_values; + Value *reference_check_ebc_values = NULL;//(g_quick || (src != -1)) ? NULL : reference_ebc_values; Value *reference_check_sigmas = (g_quick || (src == -1)) ? NULL : reference_sigmas; // Allocate BC enactor map @@ -434,7 +434,6 @@ void RunTests( gpu_timer.Start(); for (VertexId i = start_src; i < end_src; ++i) { - printf("src:%d\n", i); util::GRError(csr_problem->Reset(i, bc_enactor.GetFrontierType(), max_queue_sizing), "BC Problem Data Reset Failed", __FILE__, __LINE__); util::GRError(bc_enactor.template Enact(context, csr_problem, i, max_grid_size), "BC Problem Enact Failed", __FILE__, __LINE__); } @@ -617,9 +616,6 @@ int main( int argc, char** argv) csr.PrintHistogram(); //csr.DisplayGraph(); - csr.DisplayNeighborList(1263); - fflush(stdout); - printf("1263 row offsets:%d\n", csr.row_offsets[1263]); // Run tests RunTests(csr, args, *context); From 64a12ba2941a3c75978408b352f95e89f59501bb Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Mon, 22 Jun 2015 08:43:13 -0700 Subject: [PATCH 23/36] fix warning --- gunrock/graphio/market.cuh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gunrock/graphio/market.cuh b/gunrock/graphio/market.cuh index 2fd7e92a1..12c9a3235 100644 --- a/gunrock/graphio/market.cuh +++ b/gunrock/graphio/market.cuh @@ -129,12 +129,12 @@ int ReadMarketStream( return -1; } - long long ll_row, ll_col; - Value ll_value; + long long ll_row, ll_col, ll_value; + // Value ll_value; // used for parse float / double int num_input; if (LOAD_VALUES) { if ((num_input = sscanf( - line, "%lld %lld %d", + line, "%lld %lld %lld", &ll_col, &ll_row, &ll_value)) < 2) { fprintf(stderr, "Error parsing MARKET graph: badly formed edge\n"); From fd3f5b9e51aeea82aa1947e540a3c7b0cd756950 Mon Sep 17 00:00:00 2001 From: wyd855 Date: Mon, 22 Jun 2015 10:24:46 -0700 Subject: [PATCH 24/36] fix a warning --- gunrock/app/pr/pr_enactor.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunrock/app/pr/pr_enactor.cuh b/gunrock/app/pr/pr_enactor.cuh index 6217704c6..1ed922bc1 100644 --- a/gunrock/app/pr/pr_enactor.cuh +++ b/gunrock/app/pr/pr_enactor.cuh @@ -243,7 +243,7 @@ public: cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); - SizeT frontier_attribute_queue_length = graph_slice->nodes; + long long frontier_attribute_queue_length = graph_slice->nodes; // Step through PageRank iterations while (done[0] < 0) { From 260b3e5829fb5778470116d9a88558489b0540a9 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Mon, 22 Jun 2015 12:19:20 -0700 Subject: [PATCH 25/36] adding new primitve into shared library --- gunrock/CMakeLists.txt | 1 + gunrock/app/mst/mst_app.cu | 166 ++++++++++++++++++++++++++++++++ gunrock/gunrock.h | 7 ++ shared_lib_tests/CMakeLists.txt | 17 ++-- shared_lib_tests/test_mst.c | 62 ++++++++++++ 5 files changed, 246 insertions(+), 7 deletions(-) create mode 100644 gunrock/app/mst/mst_app.cu create mode 100644 shared_lib_tests/test_mst.c diff --git a/gunrock/CMakeLists.txt b/gunrock/CMakeLists.txt index 625225964..0f1d8d6ef 100644 --- a/gunrock/CMakeLists.txt +++ b/gunrock/CMakeLists.txt @@ -26,6 +26,7 @@ set(CUFILES app/cc/cc_app.cu app/sssp/sssp_app.cu app/pr/pr_app.cu + app/mst/mst_app.cu util/test_utils.cu util/error_utils.cu ${mgpu_SOURCE_FILES}) diff --git a/gunrock/app/mst/mst_app.cu b/gunrock/app/mst/mst_app.cu new file mode 100644 index 000000000..283345aaa --- /dev/null +++ b/gunrock/app/mst/mst_app.cu @@ -0,0 +1,166 @@ +// ---------------------------------------------------------------------------- +// Gunrock -- Fast and Efficient GPU Graph Library +// ---------------------------------------------------------------------------- +// This source code is distributed under the terms of LICENSE.TXT +// in the root directory of this source distribution. +// ---------------------------------------------------------------------------- + +/** + * @file mst_app.cu + * + * @brief minimum spanning tree (MST) problem implementation + */ + +#include +#include + +// Graph construction utils +#include + +// Primitive-specific includes +#include +#include +#include + +// ModernGPU include +#include + +using namespace gunrock; +using namespace gunrock::util; +using namespace gunrock::oprtr; +using namespace gunrock::app::mst; + +/** + * @brief run minimum spanning tree + * + * @tparam VertexId + * @tparam Value + * @tparam SizeT + * + * @param[out] graph_o GunrockGraph type output graph + * @param[in] csr Reference to the CSR graph we process on + * @param[in] max_grid_size Maximum CTA occupancy + * @param[in] num_gpus Number of GPUs + * @param[in] context moderngpu context + */ +template +void run_mst( + GunrockGraph *graph_o, + const Csr &csr, + const int max_grid_size, + const int num_gpus, + CudaContext &context) { + typedef MSTProblem Problem; // preperations + MSTEnactor enactor(false); // enactor map + VertexId *h_mst = new VertexId[csr.edges]; // host array + Problem *problem = new Problem; // problem on GPU + util::GRError(problem->Init(false, csr, num_gpus), + "MST Problem Data Initialization Failed", __FILE__, __LINE__); + + util::GRError(problem->Reset(enactor.GetFrontierType()), + "MST Problem Data Reset Failed", __FILE__, __LINE__); + + CpuTimer gpu_timer; + + gpu_timer.Start(); + util::GRError(enactor.template Enact( + context, problem, max_grid_size), + "MST Problem Enact Failed", __FILE__, __LINE__); + gpu_timer.Stop(); + float elapsed = gpu_timer.ElapsedMillis(); + + util::GRError(problem->Extract(h_mst), + "MST Problem Data Extraction Failed", __FILE__, __LINE__); + + // output mst results: 0 | 1 mask for all edges + graph_o->edge_values = (int*)&h_mst[0]; + + if (problem) { delete problem; } + + cudaDeviceSynchronize(); +} + +/** + * @brief dispatch function to handle data types + * + * @param[out] graph_o GunrockGraph type output graph + * @param[in] graph_i GunrockGraph type input graph + * @param[in] configs MST-specific configurations + * @param[in] datatype data type configurations + * @param[in] context moderngpu context parameter + */ +void dispatch_mst( + GunrockGraph *graph_o, + const GunrockGraph *graph_i, + const GunrockConfig configs, + const GunrockDataType datatype, + CudaContext &context) { + switch (datatype.VTXID_TYPE) { + case VTXID_INT: { + switch (datatype.SIZET_TYPE) { + case SIZET_INT: { + switch (datatype.VALUE_TYPE) { + case VALUE_INT: { // template type = + // create a CSR formatted graph + Csr csr(false); + csr.nodes = graph_i->num_nodes; + csr.edges = graph_i->num_edges; + csr.row_offsets = (int*)graph_i->row_offsets; + csr.column_indices = (int*)graph_i->col_indices; + csr.edge_values = (int*)graph_i->edge_values; + // configurations if necessary + int num_gpus = 1; // number of GPU(s) to use + int max_grid_size = 0; // leave it up tp the enactor + run_mst( + graph_o, csr, max_grid_size, num_gpus, context); + // reset for free memory + csr.row_offsets = NULL; + csr.column_indices = NULL; + csr.edge_values = NULL; + break; + } + case VALUE_UINT: { // template type = + printf("Not Yet Support This DataType Combination.\n"); + break; + } + case VALUE_FLOAT: { // template type = + printf("Not Yet Support This DataType Combination.\n"); + break; + } + } + break; + } + } + break; + } + } +} + +/** + * @brief run_mst entry + * + * @tparam VertexId + * @tparam Value + * @tparam SizeT + * + * @param[out] graph_o GunrockGraph type output graph + * @param[in] graph_i GunrockGraph type input graph + * @param[in] configs Gunrock primitive-specific configurations + * @param[in] datatype data type configurations + */ +void gunrock_mst( + GunrockGraph *graph_o, + const GunrockGraph *graph_i, + const GunrockConfig configs, + const GunrockDataType datatype) { + int device = 0; // default use GPU 0 + device = configs.device; + ContextPtr context = mgpu::CreateCudaDevice(device); + dispatch_mst(graph_o, graph_i, configs, datatype, *context); +} + +// Leave this at the end of the file +// Local Variables: +// mode:c++ +// c-file-style: "NVIDIA" +// End: diff --git a/gunrock/gunrock.h b/gunrock/gunrock.h index ee695951f..96f1ddf41 100644 --- a/gunrock/gunrock.h +++ b/gunrock/gunrock.h @@ -143,6 +143,13 @@ void gunrock_topk_func( struct GunrockConfig configs, struct GunrockDataType data_type); +// Minimum spanning tree +void gunrock_mst( + struct GunrockGraph *graph_out, + const struct GunrockGraph *graph_in, + struct GunrockConfig configs, + struct GunrockDataType data_type); + // TODO: Add other algorithms #ifdef __cplusplus diff --git a/shared_lib_tests/CMakeLists.txt b/shared_lib_tests/CMakeLists.txt index 3d3f638db..96d2ee37a 100644 --- a/shared_lib_tests/CMakeLists.txt +++ b/shared_lib_tests/CMakeLists.txt @@ -1,20 +1,23 @@ # gunrock test rig cmake file # include_directories(${gunrock_INCLUDE_DIRS}/gunrock) -add_executable (test_topk test_topk.c) +add_executable(test_topk test_topk.c) target_link_libraries(test_topk gunrock) -add_executable (test_bfs test_bfs.c) +add_executable(test_bfs test_bfs.c) target_link_libraries(test_bfs gunrock) -add_executable (test_bc test_bc.c) +add_executable(test_bc test_bc.c) target_link_libraries(test_bc gunrock) -add_executable (test_cc test_cc.c) +add_executable(test_cc test_cc.c) target_link_libraries(test_cc gunrock) -add_executable (test_sssp test_sssp.c) +add_executable(test_sssp test_sssp.c) target_link_libraries(test_sssp gunrock) -add_executable (test_pr test_pr.c) -target_link_libraries(test_pr gunrock) \ No newline at end of file +add_executable(test_pr test_pr.c) +target_link_libraries(test_pr gunrock) + +add_executable(test_mst test_mst.c) +target_link_libraries(test_mst gunrock) \ No newline at end of file diff --git a/shared_lib_tests/test_mst.c b/shared_lib_tests/test_mst.c new file mode 100644 index 000000000..47592a206 --- /dev/null +++ b/shared_lib_tests/test_mst.c @@ -0,0 +1,62 @@ +/** + * @brief MST test for shared library + * @file test_mst.c + * + * set input graph, configs and call function gunrock_mst + * return per node or per edge values in graph_out node_values + */ + +#include +#include + +int main(int argc, char* argv[]) +{ + // set problem data types + struct GunrockDataType dt; + dt.VTXID_TYPE = VTXID_INT; + dt.SIZET_TYPE = SIZET_INT; + dt.VALUE_TYPE = VALUE_INT; + + // configurations (optional) + struct GunrockConfig configs; + configs.device = 0; + + // tiny sample graph + size_t num_nodes = 7; + size_t num_edges = 26; + int row_offsets[8] = {0, 3, 6, 11, 15, 19, 23, 26}; + int col_indices[26] = {1, 2, 3, 0, 2, 4, 0, 1, 3, 4, 5, 0, 2, + 5, 6, 1, 2, 5, 6, 2, 3, 4, 6, 3, 4, 5}; + int edge_values[26] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + + // build graph as input + struct GunrockGraph *graph_input = + (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); + graph_input->num_nodes = num_nodes; + graph_input->num_edges = num_edges; + graph_input->row_offsets = (void*)&row_offsets[0]; + graph_input->col_indices = (void*)&col_indices[0]; + graph_input->edge_values = (void*)&edge_values[0]; + + // malloc output graph + struct GunrockGraph *graph_output = + (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); + + // call MST + gunrock_mst(graph_output, graph_input, configs, dt); + + // demo test print + printf("Demo Outputs:\n"); + int *mst_mask = (int*)malloc(sizeof(int) * num_edges); + mst_mask = (int*)graph_output->edge_values; + int edge; + for (edge = 0; edge < num_edges; ++edge) { + printf("Edge ID [%d] : Label [%d]\n", edge, mst_mask[edge]); + } + + if (graph_input) { free(graph_input); } + if (graph_output) { free(graph_output); } + + return 0; +} From 46db6fb5f255ee6764ae023e658ab43ceb20be97 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Tue, 23 Jun 2015 07:53:56 -0700 Subject: [PATCH 26/36] sssp add int support, mst add float support, refactor .so --- CMakeLists.txt | 2 +- gunrock/app/bc/bc_app.cu | 188 +++++++++--------------- gunrock/app/bfs/bfs_app.cu | 188 ++++++++++-------------- gunrock/app/cc/cc_app.cu | 142 +++++++----------- gunrock/app/mst/mst_app.cu | 129 +++++++++-------- gunrock/app/pr/pr_app.cu | 211 +++++++++++---------------- gunrock/app/pr/pr_enactor.cuh | 6 +- gunrock/app/sssp/sssp_app.cu | 263 +++++++++++++++++++--------------- gunrock/app/topk/topk_app.cu | 149 +++++++------------ gunrock/gunrock.h | 178 +++++++++++------------ shared_lib_tests/test_bc.c | 107 +++++++------- shared_lib_tests/test_bfs.c | 110 +++++++------- shared_lib_tests/test_cc.c | 103 ++++++------- shared_lib_tests/test_mst.c | 101 +++++++------ shared_lib_tests/test_pr.c | 118 +++++++-------- shared_lib_tests/test_sssp.c | 120 +++++++--------- shared_lib_tests/test_topk.c | 107 +++++++------- tests/hits/CMakeLists.txt | 2 +- tests/mst/test_mst.cu | 2 +- 19 files changed, 998 insertions(+), 1228 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 82a315c20..a58744b37 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -193,7 +193,7 @@ add_test(NAME TestSSSP COMMAND test_sssp) set_tests_properties(TestSSSP PROPERTIES PASS_REGULAR_EXPRESSION "Node ID.*1.*: Label.*39.*: Predecessor.*0") -add_test(NAME TestPR COMMAND test_pr --undirected) +add_test(NAME TestPR COMMAND test_pr) set_tests_properties(TestPR PROPERTIES PASS_REGULAR_EXPRESSION "Node ID.*2.*: Page Rank.*0.357069.") diff --git a/gunrock/app/bc/bc_app.cu b/gunrock/app/bc/bc_app.cu index fb70e9d11..af413d79b 100644 --- a/gunrock/app/bc/bc_app.cu +++ b/gunrock/app/bc/bc_app.cu @@ -8,16 +8,15 @@ /** * @file bc_app.cu * - * @brief Gunrock Betweeness Centrality Implementation + * @brief Gunrock betweeness centrality (BC) application */ -#include #include -// Graph construction utils +// graph construction utilities #include -// BC includes +// betweeness centrality includes #include #include #include @@ -36,150 +35,112 @@ using namespace gunrock::app::bc; * @tparam Value * @tparam SizeT * - * @param[out] ggraph_out Pointer to the output CSR graph object - * @param[in] graph Reference to the CSR graph object defined in main driver + * @param[out] graph_o Pointer to the output CSR graph object + * @param[in] csr Reference to the CSR graph object defined in main driver * @param[in] source * @param[in] max_grid_size * @param[in] num_gpus * @param[in] max_queue_sizing * @param[in] context Reference to CudaContext used by moderngpu functions */ -template < - typename VertexId, - typename Value, - typename SizeT > +template void run_bc( - GunrockGraph *ggraph_out, - const Csr &graph, - VertexId source, - int max_grid_size, - int num_gpus, - double max_queue_sizing, - CudaContext& context) { - typedef BCProblem < - VertexId, - SizeT, - Value, - true, // MARK_PREDECESSORS - false > Problem; //does not use double buffer - + GRGraph *graph_o, + const Csr &csr, + const VertexId source, + const int max_grid_size, + const int num_gpus, + const double max_queue_sizing, + CudaContext &context) { + typedef BCProblem Problem; // Allocate host-side array (for both reference and gpu-computed results) - Value *h_sigmas = (Value*)malloc(sizeof(Value) * graph.nodes); - Value *h_bc_values = (Value*)malloc(sizeof(Value) * graph.nodes); - Value *h_ebc_values = (Value*)malloc(sizeof(Value) * graph.edges); - - // Allocate BC enactor map - BCEnactor bc_enactor(false); + Value *h_sigmas = (Value*)malloc(sizeof(Value) * csr.nodes); + Value *h_bc_values = (Value*)malloc(sizeof(Value) * csr.nodes); + Value *h_ebc_values = (Value*)malloc(sizeof(Value) * csr.edges); + BCEnactor enactor(false); // Allocate BC enactor map + Problem *problem = new Problem; // Allocate problem on GPU - // Allocate problem on GPU - Problem *csr_problem = new Problem; - util::GRError(csr_problem->Init( - false, - graph, - num_gpus), + util::GRError(problem->Init(false, csr, num_gpus), "BC Problem Initialization Failed", __FILE__, __LINE__); - // Perform BC - GpuTimer gpu_timer; - VertexId start_source; VertexId end_source; if (source == -1) { start_source = 0; - end_source = graph.nodes; + end_source = csr.nodes; } else { start_source = source; end_source = source + 1; } - gpu_timer.Start(); for (VertexId i = start_source; i < end_source; ++i) { - util::GRError(csr_problem->Reset( - i, bc_enactor.GetFrontierType(), max_queue_sizing), + util::GRError(problem->Reset( + i, enactor.GetFrontierType(), max_queue_sizing), "BC Problem Data Reset Failed", __FILE__, __LINE__); - util::GRError(bc_enactor.template Enact( - context, csr_problem, i, max_grid_size), + util::GRError(enactor.template Enact( + context, problem, i, max_grid_size), "BC Problem Enact Failed", __FILE__, __LINE__); } util::MemsetScaleKernel <<< 128, 128>>>( - csr_problem->data_slices[0]->d_bc_values, (Value)0.5f, (int)graph.nodes); - - gpu_timer.Stop(); - - float elapsed = gpu_timer.ElapsedMillis(); + problem->data_slices[0]->d_bc_values, (Value)0.5f, (int)csr.nodes); - //double avg_duty = 0.0; - //bc_enactor.GetStatistics(avg_duty); - - // Copy out results to Host Device - util::GRError(csr_problem->Extract(h_sigmas, h_bc_values, h_ebc_values), + util::GRError(problem->Extract(h_sigmas, h_bc_values, h_ebc_values), "BC Problem Data Extraction Failed", __FILE__, __LINE__); - // copy h_bc_values per node to GunrockGraph output - ggraph_out->node_values = (float*)&h_bc_values[0]; - // copy h_ebc_values per edge to GunrockGraph output - ggraph_out->edge_values = (float*)&h_ebc_values[0]; - - printf("GPU Betweeness Centrality finished in %lf msec.\n", elapsed); - - // Cleanup - if (csr_problem) delete csr_problem; - //if (h_sigmas) free(h_sigmas); - //if (h_bc_values) free(h_bc_values); + graph_o->node_values = (float*)&h_bc_values[0]; // h_bc_values per node + graph_o->edge_values = (float*)&h_ebc_values[0]; // h_ebc_values per edge + if (problem) { delete problem; } cudaDeviceSynchronize(); } /** * @brief dispatch function to handle data_types * - * @param[out] ggraph_out GunrockGraph type output - * @param[in] ggraph_in GunrockGraph type input graph - * @param[in] bc_config bc specific configurations - * @param[in] data_type bc data_type configurations - * @param[in] context moderngpu context + * @param[out] graph_o GRGraph type output + * @param[in] graph_i GRGraph type input graph + * @param[in] config Specific configurations + * @param[in] data_t Data type configurations + * @param[in] context ModernGPU context */ void dispatch_bc( - GunrockGraph *ggraph_out, - const GunrockGraph *ggraph_in, - GunrockConfig bc_config, - GunrockDataType data_type, - CudaContext& context) { - switch (data_type.VTXID_TYPE) { + GRGraph *graph_o, + const GRGraph *graph_i, + const GRSetup config, + const GRTypes data_t, + CudaContext &context) { + switch (data_t.VTXID_TYPE) { case VTXID_INT: { - switch (data_type.SIZET_TYPE) { + switch (data_t.SIZET_TYPE) { case SIZET_INT: { - switch (data_type.VALUE_TYPE) { - case VALUE_INT: { - // template type = + switch (data_t.VALUE_TYPE) { + case VALUE_INT: { // template type = // not support yet printf("Not Yet Support This DataType Combination.\n"); break; } - case VALUE_UINT: { - // template type = + case VALUE_UINT: { // template type = // not support yet printf("Not Yet Support This DataType Combination.\n"); break; } - case VALUE_FLOAT: { - // template type = + case VALUE_FLOAT: { // template type = // build input csr format graph Csr csr_graph(false); - csr_graph.nodes = ggraph_in->num_nodes; - csr_graph.edges = ggraph_in->num_edges; - csr_graph.row_offsets = (int*)ggraph_in->row_offsets; - csr_graph.column_indices = (int*)ggraph_in->col_indices; + csr_graph.nodes = graph_i->num_nodes; + csr_graph.edges = graph_i->num_edges; + csr_graph.row_offsets = (int*)graph_i->row_offsets; + csr_graph.column_indices = (int*)graph_i->col_indices; // bc configurations - int src_node = -1; //!< Use whatever the specified graph-type's default is - int max_grid_size = 0; //!< maximum grid size (0: leave it up to the enactor) - int num_gpus = 1; //!< Number of GPUs for multi-gpu enactor to use - float max_queue_sizing = 1.0; //!< Maximum size scaling factor for work queues + int src_node = -1; // default source vertex to start + int max_grid_size = 0; // leave it up to the enactor + int num_gpus = 1; // Number of GPUs for multi-gpu + float max_queue_sizing = 1.0; // Maximum size scaling factor // determine source vertex to start bc - switch (bc_config.src_mode) { + switch (config.src_mode) { case randomize: { src_node = graphio::RandomNode(csr_graph.nodes); break; @@ -190,7 +151,7 @@ void dispatch_bc( break; } case manually: { - src_node = bc_config.src_node; + src_node = config.src_node; break; } default: { @@ -198,11 +159,11 @@ void dispatch_bc( break; } } - max_queue_sizing = bc_config.queue_size; + max_queue_sizing = config.queue_size; // lunch bc function run_bc( - ggraph_out, + graph_o, csr_graph, src_node, max_grid_size, @@ -227,29 +188,20 @@ void dispatch_bc( /* * @brief gunrock_bc function * - * @param[out] ggraph_out output of bc problem - * @param[in] ggraph_in input graph need to process on - * @param[in] bc_config gunrock primitive specific configurations - * @param[in] data_type gunrock datatype struct + * @param[out] graph_o output of bc problem + * @param[in] graph_i input graph need to process on + * @param[in] config gunrock primitive specific configurations + * @param[in] data_t gunrock data_t struct */ -void gunrock_bc_func( - GunrockGraph *ggraph_out, - const GunrockGraph *ggraph_in, - GunrockConfig bc_config, - GunrockDataType data_type) { - - // moderngpu preparations - int device = 0; - device = bc_config.device; +void gunrock_bc( + GRGraph *graph_o, + const GRGraph *graph_i, + const GRSetup config, + const GRTypes data_t) { + unsigned int device = 0; + device = config.device; ContextPtr context = mgpu::CreateCudaDevice(device); - - // lunch dispatch function - dispatch_bc( - ggraph_out, - ggraph_in, - bc_config, - data_type, - *context); + dispatch_bc(graph_o, graph_i, config, data_t, *context); } // Leave this at the end of the file diff --git a/gunrock/app/bfs/bfs_app.cu b/gunrock/app/bfs/bfs_app.cu index 026c7d6fc..1fe0300b5 100644 --- a/gunrock/app/bfs/bfs_app.cu +++ b/gunrock/app/bfs/bfs_app.cu @@ -8,21 +8,19 @@ /** * @file bfs_app.cu * - * @brief Gunrock Breadth-First Search implementation + * @brief Gunrock breadth-first search (BFS) application */ -#include #include -// Graph construction utils +// graph construction utilities #include -// BFS includes +// breadth-first search includes #include #include #include -// MGPU include #include using namespace gunrock; @@ -39,8 +37,8 @@ using namespace gunrock::app::bfs; * @tparam MARK_PREDECESSORS * @tparam ENABLE_IDEMPOTENCE * - * @param[out] ggraph_out Pointer to the output CSR graph - * @param[in] ggraph_in Reference to the CSR graph we process on + * @param[out] graph_o Pointer to the output CSR graph + * @param[in] graph_i Reference to the CSR graph we process on * @param[in] src Source node where BFS starts * @param[in] max_grid_size Maximum CTA occupancy * @param[in] num_gpus Number of GPUs @@ -48,115 +46,87 @@ using namespace gunrock::app::bfs; * @param[in] context Reference to CudaContext used by moderngpu functions * */ -template < - typename VertexId, - typename Value, - typename SizeT, - bool MARK_PREDECESSORS, - bool ENABLE_IDEMPOTENCE > +template void run_bfs( - GunrockGraph *ggraph_out, - const Csr &ggraph_in, - const VertexId src, - int max_grid_size, - int num_gpus, - double max_queue_sizing, - CudaContext& context) { - // Preparations - typedef BFSProblem < - VertexId, - SizeT, - Value, - MARK_PREDECESSORS, - ENABLE_IDEMPOTENCE, - (MARK_PREDECESSORS && ENABLE_IDEMPOTENCE) > Problem; - + GRGraph *graph_o, + const Csr &csr, + const VertexId src, + const int max_grid_size, + const int num_gpus, + const double max_queue_sizing, + CudaContext &context) { + typedef BFSProblem Problem; // Allocate host-side label array for gpu-computed results - VertexId *h_labels = (VertexId*)malloc(sizeof(VertexId) * ggraph_in.nodes); + VertexId *h_labels = (VertexId*)malloc(sizeof(VertexId) * csr.nodes); VertexId *h_preds = NULL; if (MARK_PREDECESSORS) { - //h_preds = (VertexId*)malloc(sizeof(VertexId) * ggraph_in.nodes); + //h_preds = (VertexId*)malloc(sizeof(VertexId) * csr.nodes); } - // Allocate BFS enactor map - BFSEnactor bfs_enactor(false); - - // Allocate problem on GPU - Problem *csr_problem = new Problem; - util::GRError(csr_problem->Init( - false, - ggraph_in, - num_gpus), - "Problem BFS Initialization Failed", __FILE__, __LINE__); + BFSEnactor enactor(false); // Allocate BFS enactor map + Problem *problem = new Problem; // Allocate problem on GPU - // Perform BFS - GpuTimer gpu_timer; + util::GRError(problem->Init(false, csr, num_gpus), + "BFS Problem Initialization Failed", __FILE__, __LINE__); - util::GRError(csr_problem->Reset( - src, bfs_enactor.GetFrontierType(), max_queue_sizing), + util::GRError(problem->Reset( + src, enactor.GetFrontierType(), max_queue_sizing), "BFS Problem Data Reset Failed", __FILE__, __LINE__); - gpu_timer.Start(); - util::GRError(bfs_enactor.template Enact( - context, csr_problem, src, max_grid_size), + util::GRError(enactor.template Enact( + context, problem, src, max_grid_size), "BFS Problem Enact Failed", __FILE__, __LINE__); - gpu_timer.Stop(); - - float elapsed = gpu_timer.ElapsedMillis(); - // Copy out results back to Host - util::GRError(csr_problem->Extract(h_labels, h_preds), + util::GRError(problem->Extract(h_labels, h_preds), "BFS Problem Data Extraction Failed", __FILE__, __LINE__); - // label per node to GunrockGraph struct - ggraph_out->node_values = (int*)&h_labels[0]; + graph_o->node_values = (int*)&h_labels[0]; // label per node to GRGraph struct - // Clean up - if (csr_problem) delete csr_problem; + if (problem) delete problem; //if (h_preds) free(h_preds); - cudaDeviceSynchronize(); } /** * @brief dispatch function to handle data_types * - * @param[out] ggraph_out GunrockGraph type output - * @param[in] ggraph_in GunrockGraph type input graph - * @param[in] bfs_config bfs specific configurations - * @param[in] data_type bfs data_type configurations - * @param[in] context moderngpu context + * @param[out] graph_o GRGraph type output + * @param[in] graph_i GRGraph type input graph + * @param[in] config Specific configurations + * @param[in] data_t Data type configurations + * @param[in] context ModernGPU context */ void dispatch_bfs( - GunrockGraph *ggraph_out, - const GunrockGraph *ggraph_in, - GunrockConfig bfs_config, - GunrockDataType data_type, - CudaContext& context) { - switch (data_type.VTXID_TYPE) { + GRGraph *graph_o, + const GRGraph *graph_i, + const GRSetup config, + const GRTypes data_t, + CudaContext &context) { + switch (data_t.VTXID_TYPE) { case VTXID_INT: { - switch (data_type.SIZET_TYPE) { + switch (data_t.SIZET_TYPE) { case SIZET_INT: { - switch (data_type.VALUE_TYPE) { - case VALUE_INT: { - // template type = + switch (data_t.VALUE_TYPE) { + case VALUE_INT: { // template type = // build input csr format graph Csr csr_graph(false); - csr_graph.nodes = ggraph_in->num_nodes; - csr_graph.edges = ggraph_in->num_edges; - csr_graph.row_offsets = (int*)ggraph_in->row_offsets; - csr_graph.column_indices = (int*)ggraph_in->col_indices; + csr_graph.nodes = graph_i->num_nodes; + csr_graph.edges = graph_i->num_edges; + csr_graph.row_offsets = (int*)graph_i->row_offsets; + csr_graph.column_indices = (int*)graph_i->col_indices; // default configurations - int src_node = 0; //!< default source vertex to start - int num_gpus = 1; //!< number of GPUs for multi-gpu enactor to use - int max_grid_size = 0; //!< maximum grid size (0: leave it up to the enactor) - bool mark_pred = false; //!< whether to mark predecessor or not - bool idempotence = false; //!< whether or not to enable idempotence - float max_queue_sizing = 1.0f; //!< maximum size scaling factor for work queues + int src_node = 0; // default source vertex to start + int num_gpus = 1; // number of GPUs for multi-gpu + int max_grid_size = 0; // leave it up to the enactor + bool mark_pred = 0; // whether to mark predecessor or not + bool idempotence = 0; // whether or not enable idempotence + float max_queue_sizing = 1.0f; // maximum size scaling factor // determine source vertex to start bfs - switch (bfs_config.src_mode) { + switch (config.src_mode) { case randomize: { src_node = graphio::RandomNode(csr_graph.nodes); break; @@ -167,7 +137,7 @@ void dispatch_bfs( break; } case manually: { - src_node = bfs_config.src_node; + src_node = config.src_node; break; } default: { @@ -175,14 +145,14 @@ void dispatch_bfs( break; } } - mark_pred = bfs_config.mark_pred; - idempotence = bfs_config.idempotence; - max_queue_sizing = bfs_config.queue_size; + mark_pred = config.mark_pred; + idempotence = config.idempotence; + max_queue_sizing = config.queue_size; if (mark_pred) { if (idempotence) { run_bfs( - ggraph_out, + graph_o, csr_graph, src_node, max_grid_size, @@ -191,7 +161,7 @@ void dispatch_bfs( context); } else { run_bfs( - ggraph_out, + graph_o, csr_graph, src_node, max_grid_size, @@ -202,7 +172,7 @@ void dispatch_bfs( } else { if (idempotence) { run_bfs( - ggraph_out, + graph_o, csr_graph, src_node, max_grid_size, @@ -211,7 +181,7 @@ void dispatch_bfs( context); } else { run_bfs( - ggraph_out, + graph_o, csr_graph, src_node, max_grid_size, @@ -225,14 +195,12 @@ void dispatch_bfs( csr_graph.column_indices = NULL; break; } - case VALUE_UINT: { - // template type = + case VALUE_UINT: { // template type = // not yet support printf("Not Yet Support This DataType Combination.\n"); break; } - case VALUE_FLOAT: { - // template type = + case VALUE_FLOAT: { // template type = // not yet support printf("Not Yet Support This DataType Combination.\n"); break; @@ -249,24 +217,20 @@ void dispatch_bfs( /* * @brief gunrock_bfs function * - * @param[out] ggraph_out output subgraph of bfs problem - * @param[in] ggraph_in input graph need to process on - * @param[in] bfs_config gunrock primitive specific configurations - * @param[in] data_type gunrock datatype struct + * @param[out] graph_o output subgraph of bfs problem + * @param[in] graph_i input graph need to process on + * @param[in] config gunrock primitive specific configurations + * @param[in] data_t gunrock data_t struct */ -void gunrock_bfs_func( - GunrockGraph *ggraph_out, - const GunrockGraph *ggraph_in, - GunrockConfig bfs_config, - GunrockDataType data_type) { - - // moderngpu preparations - int device = 0; - device = bfs_config.device; +void gunrock_bfs( + GRGraph *graph_o, + const GRGraph *graph_i, + const GRSetup config, + const GRTypes data_t) { + unsigned int device = 0; + device = config.device; ContextPtr context = mgpu::CreateCudaDevice(device); - - // launch dispatch function - dispatch_bfs(ggraph_out, ggraph_in, bfs_config, data_type, *context); + dispatch_bfs(graph_o, graph_i, config, data_t, *context); } // Leave this at the end of the file diff --git a/gunrock/app/cc/cc_app.cu b/gunrock/app/cc/cc_app.cu index 97723087c..1f49d0e2b 100644 --- a/gunrock/app/cc/cc_app.cu +++ b/gunrock/app/cc/cc_app.cu @@ -8,20 +8,15 @@ /** * @file cc_app.cu * - * @brief connected component implementation. + * @brief connected component (CC) application */ -#include -#include -#include -#include -#include #include -// Graph construction utils +// graph construction utilities #include -// CC includes +// connected component includes #include #include #include @@ -38,112 +33,83 @@ using namespace gunrock::app::cc; * @tparam Value * @tparam SizeT * - * @param[out] ggraph_out Pointer to output CSR graph + * @param[out] graph_o Pointer to output CSR graph * @param[in] csr_graph Reference to the CSR graph we process on * @param[in] max_grid_size Maximum CTA occupancy for CC kernels * @param[in] num_gpus Number of GPUs */ -template < - typename VertexId, - typename Value, - typename SizeT > +template void run_cc( - GunrockGraph *ggraph_out, + GRGraph *graph_o, unsigned int *components, - const Csr &csr_graph, + const Csr &csr, const int max_grid_size, const int num_gpus) { - - // Define CCProblem - typedef CCProblem < - VertexId, - SizeT, - Value, - true > Problem; //use double buffer + typedef CCProblem Problem; // double buffer // Allocate host-side label array for gpu-computed results VertexId *h_component_ids - = (VertexId*)malloc(sizeof(VertexId) * csr_graph.nodes); - - // Allocate CC enactor map - CCEnactor cc_enactor(false); + = (VertexId*)malloc(sizeof(VertexId) * csr.nodes); + CCEnactor cc_enactor(false); // Allocate CC enactor map + Problem *problem = new Problem; // Allocate problem on GPU - // Allocate problem on GPU - Problem *csr_problem = new Problem; - util::GRError(csr_problem->Init( - false, - csr_graph, - num_gpus), + util::GRError(problem->Init(false, csr, num_gpus), "CC Problem Initialization Failed", __FILE__, __LINE__); - // Reset CC Problem Data - util::GRError(csr_problem->Reset( + util::GRError(problem->Reset( cc_enactor.GetFrontierType()), "CC Problem Data Reset Failed", __FILE__, __LINE__); - // Perform Connected Component - GpuTimer gpu_timer; - gpu_timer.Start(); - // Lunch CC Enactor util::GRError(cc_enactor.template Enact( - csr_problem, max_grid_size), + problem, max_grid_size), "CC Problem Enact Failed", __FILE__, __LINE__); - gpu_timer.Stop(); - float elapsed = gpu_timer.ElapsedMillis(); - // Copy out results back to Host Device - util::GRError(csr_problem->Extract(h_component_ids), + util::GRError(problem->Extract(h_component_ids), "CC Problem Data Extraction Failed", __FILE__, __LINE__); // Compute number of components in graph - unsigned int temp = csr_problem->num_components; + unsigned int temp = problem->num_components; *components = temp; - // copy component_id per node to GunrockGraph struct - ggraph_out->node_values = (int*)&h_component_ids[0]; - - printf("GPU Connected Component finished in %lf msec.\n", elapsed); - - // Cleanup - if (csr_problem) delete csr_problem; + // copy component_id per node to GRGraph struct + graph_o->node_values = (int*)&h_component_ids[0]; + if (problem) delete problem; cudaDeviceSynchronize(); } /** * @brief dispatch function to handle data_types * - * @param[out] ggraph_out GunrockGraph type output - * @param[in] ggraph_in GunrockGraph type input graph - * @param[in] cc_config cc specific configurations - * @param[in] data_type data type configurations + * @param[out] graph_o GRGraph type output + * @param[in] graph_i GRGraph type input graph + * @param[in] config cc specific configurations + * @param[in] data_t data type configurations */ void dispatch_cc( - GunrockGraph *ggraph_out, - unsigned int *components, - const GunrockGraph *ggraph_in, - const GunrockConfig cc_config, - const GunrockDataType data_type) { - switch (data_type.VTXID_TYPE) { + GRGraph *graph_o, + unsigned int *components, + const GRGraph *graph_i, + const GRSetup config, + const GRTypes data_t) { + switch (data_t.VTXID_TYPE) { case VTXID_INT: { - switch (data_type.SIZET_TYPE) { + switch (data_t.SIZET_TYPE) { case SIZET_INT: { - switch (data_type.VALUE_TYPE) { - case VALUE_INT: { - // template type = + switch (data_t.VALUE_TYPE) { + case VALUE_INT: { // template type = // build input csr format graph Csr csr_graph(false); - csr_graph.nodes = ggraph_in->num_nodes; - csr_graph.edges = ggraph_in->num_edges; - csr_graph.row_offsets = (int*)ggraph_in->row_offsets; - csr_graph.column_indices = (int*)ggraph_in->col_indices; + csr_graph.nodes = graph_i->num_nodes; + csr_graph.edges = graph_i->num_edges; + csr_graph.row_offsets = (int*)graph_i->row_offsets; + csr_graph.column_indices = (int*)graph_i->col_indices; - int max_grid_size = 0; //!< 0: leave it up to the enactor - int num_gpus = 1; //!< number of GPUs + int max_grid_size = 0; // 0: leave it up to the enactor + int num_gpus = 1; // number of GPUs - // lunch cc dispatch function run_cc( - ggraph_out, + graph_o, (unsigned int*)components, csr_graph, max_grid_size, @@ -154,13 +120,11 @@ void dispatch_cc( csr_graph.column_indices = NULL; break; } - case VALUE_UINT: { - // template type = + case VALUE_UINT: { // template type = printf("Not Yet Support This DataType Combination.\n"); break; } - case VALUE_FLOAT: { - // template type = + case VALUE_FLOAT: { // template type = printf("Not Yet Support This DataType Combination.\n"); break; } @@ -176,20 +140,18 @@ void dispatch_cc( /* * @brief gunrock_cc function * - * @param[out] ggraph_out output subgraph of cc problem - * @param[in] ggraph_in input graph need to process on - * @param[in] cc_configs primitive specific configurations - * @param[in] data_type gunrock data_type struct + * @param[out] graph_o output subgraph of cc problem + * @param[in] graph_i input graph need to process on + * @param[in] config primitive specific configurations + * @param[in] data_t gunrock data_t struct */ -void gunrock_cc_func( - GunrockGraph *ggraph_out, - unsigned int *components, - const GunrockGraph *ggraph_in, - const GunrockConfig cc_configs, - const GunrockDataType data_type) { - - // lunch dispatch function - dispatch_cc(ggraph_out, components, ggraph_in, cc_configs, data_type); +void gunrock_cc( + GRGraph *graph_o, + unsigned int *components, + const GRGraph *graph_i, + const GRSetup config, + const GRTypes data_t) { + dispatch_cc(graph_o, components, graph_i, config, data_t); } // Leave this at the end of the file diff --git a/gunrock/app/mst/mst_app.cu b/gunrock/app/mst/mst_app.cu index 283345aaa..55e350471 100644 --- a/gunrock/app/mst/mst_app.cu +++ b/gunrock/app/mst/mst_app.cu @@ -8,21 +8,19 @@ /** * @file mst_app.cu * - * @brief minimum spanning tree (MST) problem implementation + * @brief minimum spanning tree (MST) application */ -#include #include -// Graph construction utils +// graph construction utilities #include -// Primitive-specific includes +// primitive-specific includes #include #include #include -// ModernGPU include #include using namespace gunrock; @@ -37,43 +35,37 @@ using namespace gunrock::app::mst; * @tparam Value * @tparam SizeT * - * @param[out] graph_o GunrockGraph type output graph - * @param[in] csr Reference to the CSR graph we process on - * @param[in] max_grid_size Maximum CTA occupancy - * @param[in] num_gpus Number of GPUs - * @param[in] context moderngpu context + * @param[out] graph_o GRGraph type output graph + * @param[in] csr Reference to the CSR graph we process on + * @param[in] max_grid Maximum CTA occupancy + * @param[in] num_gpus Number of GPUs + * @param[in] context Modern GPU context */ template void run_mst( - GunrockGraph *graph_o, + GRGraph *graph_o, const Csr &csr, - const int max_grid_size, - const int num_gpus, - CudaContext &context) { - typedef MSTProblem Problem; // preperations + const int max_grid, + const int num_gpus, + CudaContext &context) { + typedef MSTProblem Problem; // preparations MSTEnactor enactor(false); // enactor map - VertexId *h_mst = new VertexId[csr.edges]; // host array + VertexId *h_mst = new VertexId[csr.edges]; // results array Problem *problem = new Problem; // problem on GPU + util::GRError(problem->Init(false, csr, num_gpus), - "MST Problem Data Initialization Failed", __FILE__, __LINE__); + "MST Data Initialization Failed", __FILE__, __LINE__); util::GRError(problem->Reset(enactor.GetFrontierType()), - "MST Problem Data Reset Failed", __FILE__, __LINE__); - - CpuTimer gpu_timer; + "MST Data Reset Failed", __FILE__, __LINE__); - gpu_timer.Start(); - util::GRError(enactor.template Enact( - context, problem, max_grid_size), - "MST Problem Enact Failed", __FILE__, __LINE__); - gpu_timer.Stop(); - float elapsed = gpu_timer.ElapsedMillis(); + util::GRError(enactor.template Enact(context, problem, max_grid), + "MST Enact Failed", __FILE__, __LINE__); util::GRError(problem->Extract(h_mst), - "MST Problem Data Extraction Failed", __FILE__, __LINE__); + "MST Data Extraction Failed", __FILE__, __LINE__); - // output mst results: 0 | 1 mask for all edges - graph_o->edge_values = (int*)&h_mst[0]; + graph_o->edge_values = (int*)&h_mst[0]; // output: 0|1 mask for all edges if (problem) { delete problem; } @@ -83,23 +75,23 @@ void run_mst( /** * @brief dispatch function to handle data types * - * @param[out] graph_o GunrockGraph type output graph - * @param[in] graph_i GunrockGraph type input graph - * @param[in] configs MST-specific configurations - * @param[in] datatype data type configurations - * @param[in] context moderngpu context parameter + * @param[out] graph_o GRGraph type output graph + * @param[in] graph_i GRGraph type input graph + * @param[in] config MST-specific configurations + * @param[in] data_t Data type configurations + * @param[in] context Modern GPU context parameter */ void dispatch_mst( - GunrockGraph *graph_o, - const GunrockGraph *graph_i, - const GunrockConfig configs, - const GunrockDataType datatype, + GRGraph *graph_o, + const GRGraph *graph_i, + const GRSetup config, + const GRTypes data_t, CudaContext &context) { - switch (datatype.VTXID_TYPE) { + switch (data_t.VTXID_TYPE) { case VTXID_INT: { - switch (datatype.SIZET_TYPE) { + switch (data_t.SIZET_TYPE) { case SIZET_INT: { - switch (datatype.VALUE_TYPE) { + switch (data_t.VALUE_TYPE) { case VALUE_INT: { // template type = // create a CSR formatted graph Csr csr(false); @@ -108,23 +100,42 @@ void dispatch_mst( csr.row_offsets = (int*)graph_i->row_offsets; csr.column_indices = (int*)graph_i->col_indices; csr.edge_values = (int*)graph_i->edge_values; + // configurations if necessary - int num_gpus = 1; // number of GPU(s) to use - int max_grid_size = 0; // leave it up tp the enactor + int num_gpus = 1; // number of GPU(s) to use + int max_grid = 0; // leave it up to the enactor run_mst( - graph_o, csr, max_grid_size, num_gpus, context); + graph_o, csr, max_grid, num_gpus, context); + // reset for free memory - csr.row_offsets = NULL; + csr.row_offsets = NULL; csr.column_indices = NULL; - csr.edge_values = NULL; + csr.edge_values = NULL; break; } - case VALUE_UINT: { // template type = + case VALUE_UINT: { // template type = printf("Not Yet Support This DataType Combination.\n"); break; } case VALUE_FLOAT: { // template type = - printf("Not Yet Support This DataType Combination.\n"); + // create a CSR formatted graph + Csr csr(false); + csr.nodes = graph_i->num_nodes; + csr.edges = graph_i->num_edges; + csr.row_offsets = (int*)graph_i->row_offsets; + csr.column_indices = (int*)graph_i->col_indices; + csr.edge_values = (float*)graph_i->edge_values; + + // configurations if necessary + int num_gpus = 1; // number of GPU(s) to use + int max_grid = 0; // leave it up to the enactor + run_mst( + graph_o, csr, max_grid, num_gpus, context); + + // reset for free memory + csr.row_offsets = NULL; + csr.column_indices = NULL; + csr.edge_values = NULL; break; } } @@ -143,20 +154,20 @@ void dispatch_mst( * @tparam Value * @tparam SizeT * - * @param[out] graph_o GunrockGraph type output graph - * @param[in] graph_i GunrockGraph type input graph - * @param[in] configs Gunrock primitive-specific configurations - * @param[in] datatype data type configurations + * @param[out] graph_o GRGraph type output graph + * @param[in] graph_i GRGraph type input graph + * @param[in] config Primitive-specific configurations + * @param[in] data_t Data type configurations */ void gunrock_mst( - GunrockGraph *graph_o, - const GunrockGraph *graph_i, - const GunrockConfig configs, - const GunrockDataType datatype) { - int device = 0; // default use GPU 0 - device = configs.device; + GRGraph *graph_o, + const GRGraph *graph_i, + const GRSetup config, + const GRTypes data_t) { + unsigned int device = 0; + device = config.device; ContextPtr context = mgpu::CreateCudaDevice(device); - dispatch_mst(graph_o, graph_i, configs, datatype, *context); + dispatch_mst(graph_o, graph_i, config, data_t, *context); } // Leave this at the end of the file diff --git a/gunrock/app/pr/pr_app.cu b/gunrock/app/pr/pr_app.cu index 8a7200595..47a9e5862 100644 --- a/gunrock/app/pr/pr_app.cu +++ b/gunrock/app/pr/pr_app.cu @@ -8,21 +8,19 @@ /** * @file pr_app.cu * - * @brief Gunrock PageRank Implementation + * @brief Gunrock PageRank application */ -#include #include -// Graph construction utils +// graph construction utilities #include -// Page Rank includes +// page-rank includes #include #include #include -// Moderngpu include #include using namespace gunrock; @@ -31,33 +29,30 @@ using namespace gunrock::oprtr; using namespace gunrock::app::pr; /** - * @brief run page rank + * @brief run page-rank * * @tparam VertexId * @tparam Value * @tparam SizeT * - * @param[out] ggraph_out Pointer to output CSR graph + * @param[out] graph_o Pointer to output CSR graph * @param[out] node_ids Pointer to output node IDs * @param[out] page_rank Pointer to output PageRanks - * @param[in] graph Reference to the CSR graph we process on - * @param[in] source Source ID for personalized PageRank (-1 for general PageRank) - * @param[in] delta Delta value for computing Page Rank, usually set to .85 + * @param[in] csr Reference to the CSR graph we process on + * @param[in] source Source ID for personalized PR (-1 for general PageRank) + * @param[in] delta Delta value for computing PageRank, usually set to 0.85 * @param[in] error Error threshold value * @param[in] max_iter Max iteration for Page Rank computing * @param[in] max_grid_size Maximum CTA occupancy * @param[in] num_gpus Number of GPUs * @param[in] context CudaContext for moderngpu to use */ -template < - typename VertexId, - typename Value, - typename SizeT > -void run_page_rank( - GunrockGraph *ggraph_out, +template + void run_pagerank( + GRGraph *graph_o, VertexId *node_ids, Value *page_rank, - const Csr &graph, + const Csr &csr, const VertexId source, const Value delta, const Value error, @@ -65,106 +60,78 @@ void run_page_rank( const int max_grid_size, const int num_gpus, CudaContext& context) { - typedef PRProblem < - VertexId, - SizeT, - Value > Problem; - - // Allocate host-side label array for gpu-computed results - //Value *h_rank = (Value*)malloc(sizeof(Value) * graph.nodes); - //VertexId *h_node_id = (VertexId*)malloc(sizeof(VertexId) * graph.nodes); - - // Allocate Page Rank enactor map - PREnactor pr_enactor(false); - - // Allocate problem on GPU - Problem *csr_problem = new Problem; - util::GRError(csr_problem->Init( - false, - graph, - num_gpus), - "PageRank Problem Initialization Failed", __FILE__, __LINE__); - - // Perform PageRank - GpuTimer gpu_timer; - - util::GRError(csr_problem->Reset( - source, delta, error, pr_enactor.GetFrontierType()), - "PageRank Problem Data Reset Failed", __FILE__, __LINE__); - gpu_timer.Start(); - util::GRError(pr_enactor.template Enact( - context, csr_problem, max_iter, max_grid_size), - "PageRank Problem Enact Failed", __FILE__, __LINE__); - gpu_timer.Stop(); - - float elapsed = gpu_timer.ElapsedMillis(); - - // Copy out results - util::GRError(csr_problem->Extract(page_rank, node_ids), - "PageRank Problem Data Extraction Failed", - __FILE__, __LINE__); - - // Cleanup - if (csr_problem) delete csr_problem; - //if (h_node_id) free(h_node_id); - //if (h_rank) free(h_rank); + typedef PRProblem Problem; + PREnactor enactor(false); // PageRank enactor map + Problem *problem = new Problem; // Allocate problem on GPU + util::GRError(problem->Init(false, csr, num_gpus), + "PR Problem Initialization Failed", __FILE__, __LINE__); + + util::GRError(problem->Reset( + source, delta, error, enactor.GetFrontierType()), + "PR Problem Data Reset Failed", __FILE__, __LINE__); + + util::GRError(enactor.template Enact( + context, problem, max_iter, max_grid_size), + "PR Problem Enact Failed", __FILE__, __LINE__); + + util::GRError(problem->Extract(page_rank, node_ids), + "PR Problem Extraction Failed", __FILE__, __LINE__); + + if (problem) delete problem; cudaDeviceSynchronize(); } /** * @brief dispatch function to handle data_types * - * @param[out] ggraph_out output of pr problem + * @param[out] graph_o output of pr problem * @param[out] node_ids output of pr problem * @param[out] page_rank output of pr problem - * @param[in] ggraph_in GunrockGraph type input graph - * @param[in] pr_config pr specific configurations - * @param[in] data_type data type configurations + * @param[in] graph_i GRGraph type input graph + * @param[in] config specific configurations + * @param[in] data_t data type configurations * @param[in] context moderngpu context */ -void dispatch_page_rank( - GunrockGraph *ggraph_out, - void *node_ids, - void *page_rank, - const GunrockGraph *ggraph_in, - const GunrockConfig pr_config, - const GunrockDataType data_type, - CudaContext& context) { - switch (data_type.VTXID_TYPE) { +void dispatch_pagerank( + GRGraph *graph_o, + void *node_ids, + void *pagerank, + const GRGraph *graph_i, + const GRSetup config, + const GRTypes data_t, + CudaContext &context) { + switch (data_t.VTXID_TYPE) { case VTXID_INT: { - switch (data_type.SIZET_TYPE) { + switch (data_t.SIZET_TYPE) { case SIZET_INT: { - switch (data_type.VALUE_TYPE) { - case VALUE_INT: { - // template type = + switch (data_t.VALUE_TYPE) { + case VALUE_INT: { // template type = printf("Not Yet Support This DataType Combination.\n"); break; } - case VALUE_UINT: { - // template type = + case VALUE_UINT: { // template type = printf("Not Yet Support This DataType Combination.\n"); break; } - case VALUE_FLOAT: { - // template type = + case VALUE_FLOAT: { // template type = // build input csr format graph Csr csr_graph(false); - csr_graph.nodes = ggraph_in->num_nodes; - csr_graph.edges = ggraph_in->num_edges; - csr_graph.row_offsets = (int*)ggraph_in->row_offsets; - csr_graph.column_indices = (int*)ggraph_in->col_indices; - - // page rank configurations - float delta = 0.85f; //!< default delta value - float error = 0.01f; //!< error threshold - int max_iter = 20; //!< maximum number of iterations - int max_grid_size = 0; //!< 0: leave it up to the enactor - int num_gpus = 1; //!< for multi-gpu enactor to use - int src_node = -1; //!< source node to start + csr_graph.nodes = graph_i->num_nodes; + csr_graph.edges = graph_i->num_edges; + csr_graph.row_offsets = (int*)graph_i->row_offsets; + csr_graph.column_indices = (int*)graph_i->col_indices; + + // page-rank configurations + float delta = 0.85f; // default delta value + float error = 0.01f; // error threshold + int max_iter = 20; // maximum number of iterations + int max_grid_size = 0; // 0: leave it up to the enactor + int num_gpus = 1; // for multi-gpu enactor to use + int src_node = -1; // source node to start // determine source vertex to start sssp - switch (pr_config.src_mode) { + switch (config.src_mode) { case randomize: { src_node = graphio::RandomNode(csr_graph.nodes); break; @@ -175,7 +142,7 @@ void dispatch_page_rank( break; } case manually: { - src_node = pr_config.src_node; + src_node = config.src_node; break; } default: { @@ -183,14 +150,14 @@ void dispatch_page_rank( break; } } - delta = pr_config.delta; - error = pr_config.error; - max_iter = pr_config.max_iter; + delta = config.delta; + error = config.error; + max_iter = config.max_iter; - run_page_rank( - ggraph_out, + run_pagerank( + graph_o, (int*)node_ids, - (float*)page_rank, + (float*)pagerank, csr_graph, src_node, delta, @@ -215,37 +182,27 @@ void dispatch_page_rank( } /** - * @brief run_page_rank entry + * @brief run_pr entry * - * @param[out] ggraph_out output of pr problem + * @param[out] graph_o output of pr problem * @param[out] node_ids output of pr problem * @param[out] page_rank output of pr problem - * @param[in] ggraph_in input graph need to process on - * @param[in] pr_config gunrock primitive specific configurations - * @param[in] data_type gunrock datatype struct + * @param[in] graph_i input graph need to process on + * @param[in] config gunrock primitive specific configurations + * @param[in] data_t gunrock data_t struct */ -void gunrock_pr_func( - GunrockGraph *ggraph_out, - void *node_ids, - void *page_rank, - const GunrockGraph *ggraph_in, - const GunrockConfig pr_config, - const GunrockDataType data_type) { - - // moderngpu preparations - int device = 0; - device = pr_config.device; +void gunrock_pagerank( + GRGraph *graph_o, + void *node_ids, + void *pagerank, + const GRGraph *graph_i, + const GRSetup config, + const GRTypes data_t) { + unsigned int device = 0; + device = config.device; ContextPtr context = mgpu::CreateCudaDevice(device); - - // luanch dispatch function - dispatch_page_rank( - ggraph_out, - node_ids, - page_rank, - ggraph_in, - pr_config, - data_type, - *context); + dispatch_pagerank( + graph_o, node_ids, pagerank, graph_i, config, data_t, *context); } // Leave this at the end of the file diff --git a/gunrock/app/pr/pr_enactor.cuh b/gunrock/app/pr/pr_enactor.cuh index 1ed922bc1..423196b92 100644 --- a/gunrock/app/pr/pr_enactor.cuh +++ b/gunrock/app/pr/pr_enactor.cuh @@ -243,7 +243,7 @@ public: cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); - long long frontier_attribute_queue_length = graph_slice->nodes; + SizeT frontier_attribute_queue_length = graph_slice->nodes; // Step through PageRank iterations while (done[0] < 0) { @@ -283,7 +283,7 @@ public: if (retval = work_progress.GetQueueLength( frontier_attribute.queue_index+1, frontier_attribute_queue_length)) break; - printf(", %lld", + printf(", %d", (long long) frontier_attribute_queue_length); } @@ -360,7 +360,7 @@ public: enactor_stats.total_queued += frontier_attribute_queue_length; if (DEBUG) { - printf(", %lld", frontier_attribute_queue_length); + printf(", %d", frontier_attribute_queue_length); } if (INSTRUMENT) { if (retval=enactor_stats.filter_kernel_stats.Accumulate( diff --git a/gunrock/app/sssp/sssp_app.cu b/gunrock/app/sssp/sssp_app.cu index fa55888be..fd02d0b3c 100644 --- a/gunrock/app/sssp/sssp_app.cu +++ b/gunrock/app/sssp/sssp_app.cu @@ -8,21 +8,19 @@ /** * @file sssp_app.cu * - * @brief single-source shortest path problem implementation + * @brief single-source shortest path (SSSP) application */ -#include #include -// Graph construction utils +// graph construction utilities #include -// SSSP includes +// single-source shortest path includes #include #include #include -// Moderngpu include #include using namespace gunrock; @@ -38,7 +36,7 @@ using namespace gunrock::app::sssp; * @tparam SizeT * @tparam MARK_PREDECESSORS * - * @param[out] ggraph_out GunrockGraph type output + * @param[out] graph_o GRGraph type output * @param[out] predecessor return predeessor if mark_pred = true * @param[in] graph Reference to the CSR graph we process on * @param[in] source Source node where SSSP starts @@ -48,125 +46,164 @@ using namespace gunrock::app::sssp; * @param[in] delta_factor user set * @param[in] context moderngpu context */ -template < - typename VertexId, - typename Value, - typename SizeT, - bool MARK_PREDECESSORS > +template void run_sssp( - GunrockGraph *ggraph_out, + GRGraph *graph_o, VertexId *predecessor, - const Csr &graph, - const VertexId source, + const Csr &csr, + const VertexId src, const int max_grid_size, const float queue_sizing, const int num_gpus, const int delta_factor, - CudaContext& context) { - // Preparations - typedef SSSPProblem < - VertexId, - SizeT, - Value, - MARK_PREDECESSORS > Problem; - + CudaContext &context) { + typedef SSSPProblem Problem; // Allocate host-side label array for gpu-computed results - unsigned int *h_labels - = (unsigned int*)malloc(sizeof(unsigned int) * graph.nodes); + Value *h_labels = (Value*)malloc(sizeof(Value) * csr.nodes); //VertexId *h_preds = NULL; if (MARK_PREDECESSORS) { - //h_preds = (VertexId*)malloc(sizeof(VertexId) * graph.nodes); + //h_preds = (VertexId*)malloc(sizeof(VertexId) * csr.nodes); } - // Allocate SSSP enactor map - SSSPEnactor sssp_enactor(false); - - // Allocate problem on GPU - Problem *csr_problem = new Problem; - util::GRError(csr_problem->Init( - false, - graph, - num_gpus, - delta_factor), - "Problem SSSP Initialization Failed", __FILE__, __LINE__); + SSSPEnactor enactor(false); // enactor map + Problem *problem = new Problem; + util::GRError(problem->Init(false, csr, num_gpus, delta_factor), + "SSSP Problem Initialization Failed", __FILE__, __LINE__); - // Perform SSSP - CpuTimer gpu_timer; - - util::GRError(csr_problem->Reset( - source, sssp_enactor.GetFrontierType(), queue_sizing), + util::GRError(problem->Reset(src, enactor.GetFrontierType(), queue_sizing), "SSSP Problem Data Reset Failed", __FILE__, __LINE__); - gpu_timer.Start(); - util::GRError(sssp_enactor.template Enact( - context, csr_problem, source, - queue_sizing, max_grid_size), + + util::GRError(enactor.template Enact( + context, problem, src, queue_sizing, max_grid_size), "SSSP Problem Enact Failed", __FILE__, __LINE__); - gpu_timer.Stop(); - float elapsed = gpu_timer.ElapsedMillis(); - // Copy out results - util::GRError(csr_problem->Extract(h_labels, predecessor), + util::GRError(problem->Extract(h_labels, predecessor), "SSSP Problem Data Extraction Failed", __FILE__, __LINE__); - // copy label_values per node to GunrockGraph output - ggraph_out->node_values = (unsigned int*)&h_labels[0]; - - if (csr_problem) delete csr_problem; - //if (h_labels) free(h_labels); - //if (h_preds) free(h_preds); + // copy label_values per node to GRGraph output + graph_o->node_values = (Value*)&h_labels[0]; + if (problem) { delete problem; } cudaDeviceSynchronize(); } /** * @brief dispatch function to handle data_types * - * @param[out] ggraph_out GunrockGraph type output - * @param[out] predecessor return predeessor if mark_pred = true - * @param[in] ggraph_in GunrockGraph type input graph - * @param[in] sssp_config sssp specific configurations - * @param[in] data_type sssp data_type configurations - * @param[in] context moderngpu context + * @param[out] graph_o GRGraph type output + * @param[out] predecessor Return predeessor if mark_pred = true + * @param[in] graph_i GRGraph type input graph + * @param[in] config Primitive-specific configurations + * @param[in] data_t Data type configurations + * @param[in] context ModernGPU context */ void dispatch_sssp( - GunrockGraph *ggraph_out, - void *predecessor, - const GunrockGraph *ggraph_in, - const GunrockConfig sssp_config, - const GunrockDataType data_type, - CudaContext& context) { - switch (data_type.VTXID_TYPE) { + GRGraph *graph_o, + void *predecessor, + const GRGraph *graph_i, + const GRSetup config, + const GRTypes data_t, + CudaContext &context) { + switch (data_t.VTXID_TYPE) { case VTXID_INT: { - switch (data_type.SIZET_TYPE) { + switch (data_t.SIZET_TYPE) { case SIZET_INT: { - switch (data_type.VALUE_TYPE) { - case VALUE_INT: { - // template type = - // not support yet - printf("Not Yet Support This DataType Combination.\n"); + switch (data_t.VALUE_TYPE) { + case VALUE_INT: { // template type = + Csr csr_graph(false); + csr_graph.nodes = graph_i->num_nodes; + csr_graph.edges = graph_i->num_edges; + csr_graph.row_offsets = (int*)graph_i->row_offsets; + csr_graph.column_indices = (int*)graph_i->col_indices; + csr_graph.edge_values = (int*)graph_i->edge_values; + + // sssp configurations + bool mark_pred = 0; // whether to mark predecessors + int src_node = 0; // source vertex to start + int num_gpus = 1; // number of GPUs + int delta_factor = 1; // default delta_factor = 1 + int max_grid_size = 0; // leave it up to the enactor + float max_queue_sizing = 1.0; // default maximum queue sizing + + // determine source vertex to start sssp + switch (config.src_mode) { + case randomize: { + src_node = graphio::RandomNode(csr_graph.nodes); + break; + } + case largest_degree: { + int max_deg = 0; + src_node = csr_graph.GetNodeWithHighestDegree(max_deg); + break; + } + case manually: { + src_node = config.src_node; + break; + } + default: { + src_node = 0; + break; + } + } + mark_pred = config.mark_pred; + delta_factor = config.delta_factor; + max_queue_sizing = config.queue_size; + + switch (mark_pred) { + case true: { + run_sssp( + graph_o, + (int*)predecessor, + csr_graph, + src_node, + max_grid_size, + max_queue_sizing, + num_gpus, + delta_factor, + context); + break; + } + case false: { + run_sssp( + graph_o, + (int*)predecessor, + csr_graph, + src_node, + max_grid_size, + max_queue_sizing, + num_gpus, + delta_factor, + context); + break; + } + } + // reset for free memory + csr_graph.row_offsets = NULL; + csr_graph.column_indices = NULL; + csr_graph.edge_values = NULL; break; } - case VALUE_UINT: { - // template type = + case VALUE_UINT: { // template type = // build input csr format graph Csr csr_graph(false); - csr_graph.nodes = ggraph_in->num_nodes; - csr_graph.edges = ggraph_in->num_edges; - csr_graph.row_offsets = (int*)ggraph_in->row_offsets; - csr_graph.column_indices = (int*)ggraph_in->col_indices; - csr_graph.edge_values = (unsigned int*)ggraph_in->edge_values; + csr_graph.nodes = graph_i->num_nodes; + csr_graph.edges = graph_i->num_edges; + csr_graph.row_offsets = (int*)graph_i->row_offsets; + csr_graph.column_indices = (int*)graph_i->col_indices; + csr_graph.edge_values = (unsigned int*)graph_i->edge_values; // sssp configurations - bool mark_pred = false; - int src_node = 0; //!< use whatever the specified graph-type's default is - int num_gpus = 1; //!< number of GPUs for multi-gpu enactor to use - int delta_factor = 1; //!< default delta_factor = 1 - int max_grid_size = 0; //!< maximum grid size (0: leave it up to the enactor) - float max_queue_sizing = 1.0; //!< default maximum queue sizing + bool mark_pred = 0; // whether to mark predecessors + int src_node = 0; // source vertex to start + int num_gpus = 1; // number of GPUs + int delta_factor = 1; // default delta_factor = 1 + int max_grid_size = 0; // leave it up to the enactor + float max_queue_sizing = 1.0; // default maximum queue sizing // determine source vertex to start sssp - switch (sssp_config.src_mode) { + switch (config.src_mode) { case randomize: { src_node = graphio::RandomNode(csr_graph.nodes); break; @@ -177,7 +214,7 @@ void dispatch_sssp( break; } case manually: { - src_node = sssp_config.src_node; + src_node = config.src_node; break; } default: { @@ -185,14 +222,14 @@ void dispatch_sssp( break; } } - mark_pred = sssp_config.mark_pred; - delta_factor = sssp_config.delta_factor; - max_queue_sizing = sssp_config.queue_size; + mark_pred = config.mark_pred; + delta_factor = config.delta_factor; + max_queue_sizing = config.queue_size; switch (mark_pred) { case true: { run_sssp( - ggraph_out, + graph_o, (int*)predecessor, csr_graph, src_node, @@ -205,7 +242,7 @@ void dispatch_sssp( } case false: { run_sssp( - ggraph_out, + graph_o, (int*)predecessor, csr_graph, src_node, @@ -245,32 +282,22 @@ void dispatch_sssp( * @tparam Value * @tparam SizeT * - * @param[out] ggraph_out GunrockGraph type output - * @param[out] predecessor return predeessor if mark_pred = true - * @param[in] ggraph_in GunrockGraph type input graph - * @param[in] sssp_config gunrock primitive specific configurations - * @param[in] data_type data_type configurations + * @param[out] graph_o GRGraph type output + * @param[out] predecessor Return predeessor if mark_pred = true + * @param[in] graph_i GRGraph type input graph + * @param[in] config Primitive specific configurations + * @param[in] data_t Data type configurations */ -void gunrock_sssp_func( - GunrockGraph *ggraph_out, - void *predecessor, - const GunrockGraph *ggraph_in, - const GunrockConfig sssp_config, - const GunrockDataType data_type) { - - // moderngpu preparations - int device = 0; - device = sssp_config.device; +void gunrock_sssp( + GRGraph *graph_o, + void *predecessor, + const GRGraph *graph_i, + const GRSetup config, + const GRTypes data_t) { + unsigned int device = 0; + device = config.device; ContextPtr context = mgpu::CreateCudaDevice(device); - - // lunch dispatch function - dispatch_sssp( - ggraph_out, - predecessor, - ggraph_in, - sssp_config, - data_type, - *context); + dispatch_sssp(graph_o, predecessor, graph_i, config, data_t, *context); } // Leave this at the end of the file diff --git a/gunrock/app/topk/topk_app.cu b/gunrock/app/topk/topk_app.cu index 5b2855259..0e38c2fcf 100644 --- a/gunrock/app/topk/topk_app.cu +++ b/gunrock/app/topk/topk_app.cu @@ -1,20 +1,16 @@ -// ---------------------------------------------------------------- +// ---------------------------------------------------------------------------- // Gunrock -- Fast and Efficient GPU Graph Library -// ---------------------------------------------------------------- +// ---------------------------------------------------------------------------- // This source code is distributed under the terms of LICENSE.TXT // in the root directory of this source distribution. -// ---------------------------------------------------------------- +// ---------------------------------------------------------------------------- /** * @file topk_app.cu * - * @brief top k degree centralities implementation + * @brief top k degree centralities application */ -#include -#include -#include -#include #include #include #include @@ -77,7 +73,7 @@ template < typename Value, typename SizeT > void build_topk_subgraph( - GunrockGraph *subgraph, + GRGraph *subgraph, const Csr &graph_original, const Csr &graph_reversed, VertexId *node_ids, @@ -173,49 +169,32 @@ template < typename Value, typename SizeT > void run_topk( - GunrockGraph *graph_out, + GRGraph *graph_out, VertexId *node_ids, Value *in_degrees, Value *out_degrees, const Csr &graph_original, const Csr &graph_reversed, SizeT top_nodes) { - // preparations typedef TOPKProblem Problem; - TOPKEnactor topk_enactor(false); - Problem *topk_problem = new Problem; - - // reset top_nodes if necessary + TOPKEnactor enactor(false); + Problem *problem = new Problem; top_nodes = (top_nodes > graph_original.nodes) ? graph_original.nodes : top_nodes; - // initialization - util::GRError(topk_problem->Init( - false, - graph_original, - graph_reversed, - 1), + util::GRError(problem->Init(false, graph_original, graph_reversed, 1), "Problem TOPK Initialization Failed", __FILE__, __LINE__); - // reset data slices - util::GRError(topk_problem->Reset(topk_enactor.GetFrontierType()), + util::GRError(problem->Reset(enactor.GetFrontierType()), "TOPK Problem Data Reset Failed", __FILE__, __LINE__); - // launch gpu topk enactor to calculate top k nodes - util::GRError(topk_enactor.template Enact( - topk_problem, - top_nodes), + util::GRError(enactor.template Enact(problem, top_nodes), "TOPK Problem Enact Failed", __FILE__, __LINE__); - // copy out results back to cpu - util::GRError(topk_problem->Extract( - node_ids, - in_degrees, - out_degrees, - top_nodes), + util::GRError(problem->Extract(node_ids, in_degrees, out_degrees, top_nodes), "TOPK Problem Data Extraction Failed", __FILE__, __LINE__); - // build a subgraph contains only top k nodes on cpu + // build vertex-induced subgraph contains only top k nodes build_topk_subgraph( graph_out, graph_original, @@ -223,62 +202,54 @@ void run_topk( (int*)node_ids, top_nodes); - // cleanup if neccessary - if (topk_problem) { delete topk_problem; } - + if (problem) { delete problem; } cudaDeviceSynchronize(); } /** * @brief dispatch function to handle data_types * - * @param[out] ggraph_out GunrockGraph type output + * @param[out] graph_o GRGraph type output * @param[out] node_ids output top k node ids * @param[out] in_degrees output top k in-degree centralities * @param[out] out_degrees output top k out-degree centralities - * @param[in] ggraph_in GunrockGraph type input graph - * @param[in] topk_config topk specific configurations - * @param[in] data_type topk data_type configurations + * @param[in] graph_i GRGraph type input graph + * @param[in] config topk specific configurations + * @param[in] data_t topk data_t configurations */ void dispatch_topk( - GunrockGraph *ggraph_out, - void *node_ids, - void *in_degrees, - void *out_degrees, - const GunrockGraph *ggraph_in, - const GunrockConfig topk_config, - const GunrockDataType data_type) { - switch (data_type.VTXID_TYPE) { + GRGraph *graph_o, + void *node_ids, + void *in_degrees, + void *out_degrees, + const GRGraph *graph_i, + const GRSetup config, + const GRTypes data_t) { + switch (data_t.VTXID_TYPE) { case VTXID_INT: { - switch (data_type.SIZET_TYPE) { + switch (data_t.SIZET_TYPE) { case SIZET_INT: { - switch (data_type.VALUE_TYPE) { - case VALUE_INT: { - // template type = - // original graph + switch (data_t.VALUE_TYPE) { + case VALUE_INT: { // template type = Csr graph_original(false); - graph_original.nodes = ggraph_in->num_nodes; - graph_original.edges = ggraph_in->num_edges; - graph_original.row_offsets = (int*)ggraph_in->row_offsets; - graph_original.column_indices = (int*)ggraph_in->col_indices; - - // reversed graph + graph_original.nodes = graph_i->num_nodes; + graph_original.edges = graph_i->num_edges; + graph_original.row_offsets = (int*)graph_i->row_offsets; + graph_original.column_indices = (int*)graph_i->col_indices; Csr graph_reversed(false); - graph_reversed.nodes = ggraph_in->num_nodes; - graph_reversed.edges = ggraph_in->num_edges; - graph_reversed.row_offsets = (int*)ggraph_in->col_offsets; - graph_reversed.column_indices = (int*)ggraph_in->row_indices; - - //graph_original.DisplayGraph(); + graph_reversed.nodes = graph_i->num_nodes; + graph_reversed.edges = graph_i->num_edges; + graph_reversed.row_offsets = (int*)graph_i->col_offsets; + graph_reversed.column_indices = (int*)graph_i->row_indices; run_topk( - ggraph_out, + graph_o, (int*)node_ids, (int*)in_degrees, (int*)out_degrees, graph_original, graph_reversed, - topk_config.top_nodes); + config.top_nodes); // reset for free memory graph_original.row_offsets = NULL; @@ -287,13 +258,11 @@ void dispatch_topk( graph_reversed.column_indices = NULL; break; } - case VALUE_UINT: { - // template type = + case VALUE_UINT: { // template type = printf("Not Yet Support This DataType Combination.\n"); break; } - case VALUE_FLOAT: { - // template type = + case VALUE_FLOAT: { // template type = printf("Not Yet Support This DataType Combination.\n"); break; } @@ -309,32 +278,24 @@ void dispatch_topk( /* * @brief topk dispatch function base on gunrock data types * - * @param[out] ggraph_out output subgraph of topk problem + * @param[out] graph_o output subgraph of topk problem * @param[out] node_ids output top k node_ids * @param[out] in_degrees output associated centrality values * @param[out] out_degrees output associated centrality values - * @param[in] ggraph_in input graph need to process on - * @param[in] topk_config gunrock primitive specific configurations - * @param[in] data_type gunrock datatype struct + * @param[in] graph_i input graph need to process on + * @param[in] config gunrock primitive specific configurations + * @param[in] data_t gunrock data_t struct */ -void gunrock_topk_func( - GunrockGraph *ggraph_out, - void *node_ids, - void *in_degrees, - void *out_degrees, - const GunrockGraph *ggraph_in, - const GunrockConfig topk_config, - const GunrockDataType data_type) { - - // launch topk dispatch function - dispatch_topk( - ggraph_out, - node_ids, - in_degrees, - out_degrees, - ggraph_in, - topk_config, - data_type); +void gunrock_topk( + GRGraph *graph_o, + void *node_ids, + void *in_degrees, + void *out_degrees, + const GRGraph *graph_i, + const GRSetup config, + const GRTypes data_t) { + dispatch_topk(graph_o, node_ids, in_degrees, out_degrees, + graph_i, config, data_t); } // Leave this at the end of the file diff --git a/gunrock/gunrock.h b/gunrock/gunrock.h index 96f1ddf41..441d77a85 100644 --- a/gunrock/gunrock.h +++ b/gunrock/gunrock.h @@ -21,136 +21,136 @@ /** * @brief VertexId data type enumerators. */ -enum VertexIdType { - VTXID_INT, //!< integer type +enum VtxIdType { + VTXID_INT, // integer type }; /** * @brief SizeT data type enumerators. */ enum SizeTType { - SIZET_INT, //!< unsigned integer type + SIZET_INT, // unsigned integer type }; /** * @brief Value data type enumerators. */ enum ValueType { - VALUE_INT, //!< integer type - VALUE_UINT, //!< unsigned int type - VALUE_FLOAT, //!< float type + VALUE_INT, // integer type + VALUE_UINT, // unsigned int type + VALUE_FLOAT, // float type }; /** * @brief data-type configuration used to specify data types */ -struct GunrockDataType { - enum VertexIdType VTXID_TYPE; //!< VertexId data-type - enum SizeTType SIZET_TYPE; //!< SizeT data-type - enum ValueType VALUE_TYPE; //!< Value data-type +struct GRTypes { + enum VtxIdType VTXID_TYPE; // VertexId data type + enum SizeTType SIZET_TYPE; // SizeT data type + enum ValueType VALUE_TYPE; // Value data type }; /** * @brief GunrockGraph as a standard graph interface */ -struct GunrockGraph { - size_t num_nodes; //!< number of nodes in graph - size_t num_edges; //!< number of edges in graph - void *row_offsets; //!< C.S.R. row offsets - void *col_indices; //!< C.S.R. column indices - void *col_offsets; //!< C.S.C. column offsets - void *row_indices; //!< C.S.C. row indices - void *node_values; //!< associated values per node - void *edge_values; //!< associated values per edge +struct GRGraph { + size_t num_nodes; // number of nodes in graph + size_t num_edges; // number of edges in graph + void *row_offsets; // CSR row offsets + void *col_indices; // CSR column indices + void *col_offsets; // CSC column offsets + void *row_indices; // CSC row indices + void *node_values; // associated values per node + void *edge_values; // associated values per edge }; /** * @brief Source Vertex Mode enumerators. */ enum SrcMode { - manually, //!< manually set up source node - randomize, //!< random generate source node - largest_degree, //!< set to largest-degree node + manually, // manually set up source node + randomize, // random generate source node + largest_degree, // set to largest-degree node }; /** * @brief arguments configuration used to specify arguments */ -struct GunrockConfig { - bool mark_pred; //!< whether to mark predecessor or not - bool idempotence; //!< whether or not to enable idempotent - int src_node; //!< source vertex define where to start - int device; //!< setting which gpu device to use - int max_iter; //!< maximum number of iterations allowed - int top_nodes; //!< k value for topk / page_rank problem - int delta_factor; //!< sssp delta-factor parameter - float delta; //!< pagerank specific value - float error; //!< pagerank specific value - float queue_size; //!< setting frontier queue size - enum SrcMode src_mode; //!< source mode rand/largest_degree +struct GRSetup { + bool mark_pred; // whether to mark predecessor or not + bool idempotence; // whether or not to enable idempotent + int src_node; // source vertex define where to start + int device; // setting which device to use + int max_iter; // maximum number of iterations allowed + int top_nodes; // k value for top k / pagerank problem + int delta_factor; // sssp delta-factor parameter + float delta; // pagerank specific value + float error; // pagerank specific value + float queue_size; // setting frontier queue size + enum SrcMode src_mode; // source mode rand/largest_degree }; #ifdef __cplusplus extern "C" { #endif -// BFS Function Define -void gunrock_bfs_func( - struct GunrockGraph *graph_out, - const struct GunrockGraph *graph_in, - struct GunrockConfig configs, - struct GunrockDataType data_type); - -// BC Function Define -void gunrock_bc_func( - struct GunrockGraph *graph_out, - const struct GunrockGraph *graph_in, - struct GunrockConfig configs, - struct GunrockDataType data_type); - -// CC Function Define -void gunrock_cc_func( - struct GunrockGraph *graph_out, - unsigned int *components, - const struct GunrockGraph *graph_in, - struct GunrockConfig configs, - struct GunrockDataType data_type); - -// SSSP Function Define -void gunrock_sssp_func( - struct GunrockGraph *graph_out, - void *predecessor, - const struct GunrockGraph *graph_in, - struct GunrockConfig congis, - struct GunrockDataType data_type); - -// PR Function Define -void gunrock_pr_func( - struct GunrockGraph *graph_out, - void *node_ids, - void *page_rank, - const struct GunrockGraph *graph_in, - struct GunrockConfig configs, - struct GunrockDataType data_type); - -// TopK Function Define -void gunrock_topk_func( - struct GunrockGraph *graph_out, - void *node_ids, - void *in_degrees, - void *out_degrees, - const struct GunrockGraph *graph_in, - struct GunrockConfig configs, - struct GunrockDataType data_type); - -// Minimum spanning tree +// breath-first search +void gunrock_bfs( + struct GRGraph *graph_o, + const struct GRGraph *graph_i, + struct GRSetup config, + struct GRTypes data_t); + +// betweenness centrality +void gunrock_bc( + struct GRGraph *graph_o, + const struct GRGraph *graph_i, + struct GRSetup config, + struct GRTypes data_t); + +// connected component +void gunrock_cc( + struct GRGraph *graph_o, + unsigned int *components, + const struct GRGraph *graph_i, + struct GRSetup config, + struct GRTypes data_t); + +// single-source shortest path +void gunrock_sssp( + struct GRGraph *graph_o, + void *predecessor, + const struct GRGraph *graph_i, + struct GRSetup config, + struct GRTypes data_t); + +// page-rank +void gunrock_pagerank( + struct GRGraph *graph_o, + void *node_ids, + void *pagerank, + const struct GRGraph *graph_i, + struct GRSetup config, + struct GRTypes data_t); + +// degree centrality +void gunrock_topk( + struct GRGraph *graph_o, + void *node_ids, + void *in_degrees, + void *out_degrees, + const struct GRGraph *graph_i, + struct GRSetup config, + struct GRTypes data_t); + +// minimum spanning tree void gunrock_mst( - struct GunrockGraph *graph_out, - const struct GunrockGraph *graph_in, - struct GunrockConfig configs, - struct GunrockDataType data_type); + struct GRGraph *graph_o, + const struct GRGraph *graph_i, + struct GRSetup config, + struct GRTypes data_t); -// TODO: Add other algorithms +// TODO(ydwu): Add other primitives #ifdef __cplusplus } diff --git a/shared_lib_tests/test_bc.c b/shared_lib_tests/test_bc.c index 0eb4fdf0f..177585a58 100644 --- a/shared_lib_tests/test_bc.c +++ b/shared_lib_tests/test_bc.c @@ -1,76 +1,65 @@ /** * @brief BC test for shared library * @file test_bc.c - * - * set input graph, configs and call function gunrock_bc_func - * return per node label values in graph_out node_values */ #include #include -int main(int argc, char* argv[]) -{ - // define data types - struct GunrockDataType data_type; - data_type.VTXID_TYPE = VTXID_INT; - data_type.SIZET_TYPE = SIZET_INT; - data_type.VALUE_TYPE = VALUE_FLOAT; +int main(int argc, char* argv[]) { + // define data types + struct GRTypes data_t; + data_t.VTXID_TYPE = VTXID_INT; + data_t.SIZET_TYPE = SIZET_INT; + data_t.VALUE_TYPE = VALUE_FLOAT; - // bc configurations (optional) - struct GunrockConfig bc_config; - bc_config.device = 0; - bc_config.src_node = -1; //!< source vertex to begin search - bc_config.queue_size = 1.0f; - bc_config.src_mode = manually; + // bc configurations (optional) + struct GRSetup config; + config.device = 0; + config.src_node = -1; // source vertex to begin search + config.queue_size = 1.0f; + config.src_mode = manually; - // define graph (undirected graph) - size_t num_nodes = 7; - size_t num_edges = 26; - int row_offsets[8] = {0, 3, 6, 11, 15, 19, 23, 26}; - int col_indices[26] = {1, 2, 3, 0, 2, 4, 0, 1, 3, 4, 5, 0, 2, - 5, 6, 1, 2, 5, 6, 2, 3, 4, 6, 3, 4, 5}; + // define graph (undirected graph) + size_t num_nodes = 7; + size_t num_edges = 26; + int row_offsets[8] = {0, 3, 6, 11, 15, 19, 23, 26}; + int col_indices[26] = {1, 2, 3, 0, 2, 4, 0, 1, 3, 4, 5, 0, 2, + 5, 6, 1, 2, 5, 6, 2, 3, 4, 6, 3, 4, 5}; - // build graph as input - struct GunrockGraph *graph_input = - (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); - graph_input->num_nodes = num_nodes; - graph_input->num_edges = num_edges; - graph_input->row_offsets = (void*)&row_offsets[0]; - graph_input->col_indices = (void*)&col_indices[0]; + // build graph as input + struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + graph_i->num_nodes = num_nodes; + graph_i->num_edges = num_edges; + graph_i->row_offsets = (void*)&row_offsets[0]; + graph_i->col_indices = (void*)&col_indices[0]; - // malloc output graph - struct GunrockGraph *graph_output = - (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); + // malloc output graph + struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); - // run bc calculations - gunrock_bc_func( - graph_output, - graph_input, - bc_config, - data_type); + // run bc calculations + gunrock_bc(graph_o, graph_i, config, data_t); - // test print - int i; - printf("Demo Outputs:\n"); - // print per node betweeness centrality values - float *bc_vals = (float*)malloc(sizeof(float) * graph_input->num_nodes); - bc_vals = (float*)graph_output->node_values; - for (i = 0; i < graph_input->num_nodes; ++i) - { - printf("Node_ID [%d] : BC[%f]\n", i, bc_vals[i]); - } - printf("\n"); - // print per edge betweeness centrality values - float *ebc_vals = (float*)malloc(sizeof(float)*graph_input->num_edges); - ebc_vals = (float*)graph_output->edge_values; - for (i = 0; i < graph_input->num_edges; ++i) - { - printf("Edge_ID [%d] : EBC[%f]\n", i, ebc_vals[i]); - } + // test print + int i; + printf("Demo Outputs:\n"); + // print per node betweeness centrality values + float *bc_vals = (float*)malloc(sizeof(float) * graph_i->num_nodes); + bc_vals = (float*)graph_o->node_values; + for (i = 0; i < graph_i->num_nodes; ++i) { + printf("Node_ID [%d] : BC[%f]\n", i, bc_vals[i]); + } + printf("\n"); + // print per edge betweeness centrality values + float *ebc_vals = (float*)malloc(sizeof(float) * graph_i->num_edges); + ebc_vals = (float*)graph_o->edge_values; + for (i = 0; i < graph_i->num_edges; ++i) { + printf("Edge_ID [%d] : EBC[%f]\n", i, ebc_vals[i]); + } - if (graph_input) { free(graph_input); } - if (graph_output) { free(graph_output); } + // clean up + if (graph_i) { free(graph_i); } + if (graph_o) { free(graph_o); } - return 0; + return 0; } diff --git a/shared_lib_tests/test_bfs.c b/shared_lib_tests/test_bfs.c index d3f57b747..11b43b2a5 100644 --- a/shared_lib_tests/test_bfs.c +++ b/shared_lib_tests/test_bfs.c @@ -1,69 +1,59 @@ /** * @brief BFS test for shared library * @file test_bfs.c - * - * set input graph, configs and call function gunrock_bfs_func - * return per node label values in graph_out node_values */ #include #include -int main(int argc, char* argv[]) -{ - // define data types - struct GunrockDataType data_type; - data_type.VTXID_TYPE = VTXID_INT; - data_type.SIZET_TYPE = SIZET_INT; - data_type.VALUE_TYPE = VALUE_INT; - - // bfs configurations (optional) - struct GunrockConfig bfs_config; - bfs_config.device = 0; - bfs_config.src_mode = randomize; - bfs_config.src_node = 1; //!< source vertex to begin search - bfs_config.mark_pred = false; //!< do not mark predecessors - bfs_config.idempotence = false; //!< wether enable idempotence - bfs_config.queue_size = 1.0f; - - // define graph - size_t num_nodes = 7; - size_t num_edges = 15; - int row_offsets[8] = {0,3,6,9,11,14,15,15}; - int col_indices[15] = {1,2,3,0,2,4,3,4,5,5,6,2,5,6,6}; - - // build graph as input - struct GunrockGraph *graph_input = - (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); - graph_input->num_nodes = num_nodes; - graph_input->num_edges = num_edges; - graph_input->row_offsets = (void*)&row_offsets[0]; - graph_input->col_indices = (void*)&col_indices[0]; - - // malloc output graph - struct GunrockGraph *graph_output = - (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); - - // run bfs calculations - gunrock_bfs_func( - graph_output, - graph_input, - bfs_config, - data_type); - - // test print - int i; - printf("Demo Outputs:\n"); - int *labels = (int*)malloc(sizeof(int) * graph_input->num_nodes); - labels = (int*)graph_output->node_values; - for (i = 0; i < graph_input->num_nodes; ++i) - { - printf("Node_ID [%d] : Label [%d]\n", i, labels[i]); - } - - if (graph_input) { free(graph_input); } - if (graph_output) { free(graph_output); } - if (labels) { free(labels); } - - return 0; +int main(int argc, char* argv[]) { + // define data types + struct GRTypes data_t; + data_t.VTXID_TYPE = VTXID_INT; + data_t.SIZET_TYPE = SIZET_INT; + data_t.VALUE_TYPE = VALUE_INT; + + // bfs configurations (optional) + struct GRSetup config; + config.device = 0; + config.src_mode = randomize; + config.src_node = 1; // source vertex to begin search + config.mark_pred = false; // do not mark predecessors + config.idempotence = false; // wether enable idempotence + config.queue_size = 1.0f; + + // define graph + size_t num_nodes = 7; + size_t num_edges = 15; + int row_offsets[8] = {0, 3, 6, 9, 11, 14, 15, 15}; + int col_indices[15] = {1, 2, 3, 0, 2, 4, 3, 4, 5, 5, 6, 2, 5, 6, 6}; + + // build graph as input + struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + graph_i->num_nodes = num_nodes; + graph_i->num_edges = num_edges; + graph_i->row_offsets = (void*)&row_offsets[0]; + graph_i->col_indices = (void*)&col_indices[0]; + + // malloc output graph + struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + + // run bfs calculations + gunrock_bfs(graph_o, graph_i, config, data_t); + + // test print + int i; + printf("Demo Outputs:\n"); + int *labels = (int*)malloc(sizeof(int) * graph_i->num_nodes); + labels = (int*)graph_o->node_values; + for (i = 0; i < graph_i->num_nodes; ++i) { + printf("Node_ID [%d] : Label [%d]\n", i, labels[i]); + } + + // clean up + if (graph_i) { free(graph_i); } + if (graph_o) { free(graph_o); } + if (labels) { free(labels); } + + return 0; } diff --git a/shared_lib_tests/test_cc.c b/shared_lib_tests/test_cc.c index a230619b9..0dbd67bc1 100644 --- a/shared_lib_tests/test_cc.c +++ b/shared_lib_tests/test_cc.c @@ -1,66 +1,55 @@ /** * @brief CC test for shared library * @file test_cc.c - * - * set input graph, configs and call function gunrock_cc_func - * return per node label values in graph_out node_values */ #include #include -int main(int argc, char* argv[]) -{ - // define data types - struct GunrockDataType data_type; - data_type.VTXID_TYPE = VTXID_INT; - data_type.SIZET_TYPE = SIZET_INT; - data_type.VALUE_TYPE = VALUE_INT; - - // connected component configurations - struct GunrockConfig configs; - configs.device = 0; - - // define graph - size_t num_nodes = 7; - size_t num_edges = 15; - int row_offsets[8] = {0,3,6,9,11,14,15,15}; - int col_indices[15] = {1,2,3,0,2,4,3,4,5,5,6,2,5,6,6}; - - // build graph as input - struct GunrockGraph *graph_input = - (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); - graph_input->num_nodes = num_nodes; - graph_input->num_edges = num_edges; - graph_input->row_offsets = (void*)&row_offsets[0]; - graph_input->col_indices = (void*)&col_indices[0]; - - // malloc output graph - struct GunrockGraph *graph_output = - (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); - unsigned int *components = (unsigned int*)malloc(sizeof(unsigned int)); - - // run connected component calculations - gunrock_cc_func( - graph_output, - components, - graph_input, - configs, - data_type); - - // test print - int i; - printf("Number of Components: %d\n", components[0]); - printf("Demo Outputs:\n"); - int *component_ids = (int*)malloc(sizeof(int) * graph_input->num_nodes); - component_ids = (int*)graph_output->node_values; - for (i = 0; i < graph_input->num_nodes; ++i) - { - printf("Node_ID [%d] : Component_ID [%d]\n", i, component_ids[i]); - } - - if (graph_input) { free(graph_input); } - if (graph_output) { free(graph_output); } - - return 0; +int main(int argc, char* argv[]) { + // define data types + struct GRTypes data_t; + data_t.VTXID_TYPE = VTXID_INT; + data_t.SIZET_TYPE = SIZET_INT; + data_t.VALUE_TYPE = VALUE_INT; + + // connected component configurations + struct GRSetup config; + config.device = 0; + + // define graph + size_t num_nodes = 7; + size_t num_edges = 15; + int row_offsets[8] = {0, 3, 6, 9, 11, 14, 15, 15}; + int col_indices[15] = {1, 2, 3, 0, 2, 4, 3, 4, 5, 5, 6, 2, 5, 6, 6}; + + // build graph as input + struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + graph_i->num_nodes = num_nodes; + graph_i->num_edges = num_edges; + graph_i->row_offsets = (void*)&row_offsets[0]; + graph_i->col_indices = (void*)&col_indices[0]; + + // malloc output graph + struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + unsigned int *components = (unsigned int*)malloc(sizeof(unsigned int)); + + // run connected component calculations + gunrock_cc(graph_o, components, graph_i, config, data_t); + + // demo test print + printf("Number of Components: %d\n", components[0]); + printf("Demo Outputs:\n"); + int *component_ids = (int*)malloc(sizeof(int) * graph_i->num_nodes); + component_ids = (int*)graph_o->node_values; + int node; + for (node = 0; node < graph_i->num_nodes; ++node) { + printf("Node_ID [%d] : Component_ID [%d]\n", node, component_ids[node]); + } + + // clean up + if (graph_i) { free(graph_i); } + if (graph_o) { free(graph_o); } + + return 0; } diff --git a/shared_lib_tests/test_mst.c b/shared_lib_tests/test_mst.c index 47592a206..07fbdb11c 100644 --- a/shared_lib_tests/test_mst.c +++ b/shared_lib_tests/test_mst.c @@ -1,62 +1,57 @@ /** * @brief MST test for shared library * @file test_mst.c - * - * set input graph, configs and call function gunrock_mst - * return per node or per edge values in graph_out node_values */ #include #include -int main(int argc, char* argv[]) -{ - // set problem data types - struct GunrockDataType dt; - dt.VTXID_TYPE = VTXID_INT; - dt.SIZET_TYPE = SIZET_INT; - dt.VALUE_TYPE = VALUE_INT; - - // configurations (optional) - struct GunrockConfig configs; - configs.device = 0; - - // tiny sample graph - size_t num_nodes = 7; - size_t num_edges = 26; - int row_offsets[8] = {0, 3, 6, 11, 15, 19, 23, 26}; - int col_indices[26] = {1, 2, 3, 0, 2, 4, 0, 1, 3, 4, 5, 0, 2, - 5, 6, 1, 2, 5, 6, 2, 3, 4, 6, 3, 4, 5}; - int edge_values[26] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - - // build graph as input - struct GunrockGraph *graph_input = - (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); - graph_input->num_nodes = num_nodes; - graph_input->num_edges = num_edges; - graph_input->row_offsets = (void*)&row_offsets[0]; - graph_input->col_indices = (void*)&col_indices[0]; - graph_input->edge_values = (void*)&edge_values[0]; - - // malloc output graph - struct GunrockGraph *graph_output = - (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); - - // call MST - gunrock_mst(graph_output, graph_input, configs, dt); - - // demo test print - printf("Demo Outputs:\n"); - int *mst_mask = (int*)malloc(sizeof(int) * num_edges); - mst_mask = (int*)graph_output->edge_values; - int edge; - for (edge = 0; edge < num_edges; ++edge) { - printf("Edge ID [%d] : Label [%d]\n", edge, mst_mask[edge]); - } - - if (graph_input) { free(graph_input); } - if (graph_output) { free(graph_output); } - - return 0; +int main(int argc, char* argv[]) { + // set problem data types + struct GRTypes data_t; + data_t.VTXID_TYPE = VTXID_INT; + data_t.VALUE_TYPE = VALUE_INT; + data_t.SIZET_TYPE = SIZET_INT; + + // configurations (optional) + struct GRSetup config; + config.device = 0; + + // tiny sample graph + size_t num_nodes = 7; + size_t num_edges = 26; + int row_offsets[8] = {0, 3, 6, 11, 15, 19, 23, 26}; + int col_indices[26] = {1, 2, 3, 0, 2, 4, 0, 1, 3, 4, 5, 0, 2, + 5, 6, 1, 2, 5, 6, 2, 3, 4, 6, 3, 4, 5}; + int edge_values[26] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + + // build an graph as input + struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + graph_i->num_nodes = num_nodes; + graph_i->num_edges = num_edges; + graph_i->row_offsets = (void*)&row_offsets[0]; + graph_i->col_indices = (void*)&col_indices[0]; + graph_i->edge_values = (void*)&edge_values[0]; + + // malloc output graph + struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + + // call minimum spanning tree + gunrock_mst(graph_o, graph_i, config, data_t); + + // demo test print + printf("Demo Outputs:\n"); + int *mst_mask = (int*)malloc(sizeof(int) * num_edges); + mst_mask = (int*)graph_o->edge_values; + int edge; + for (edge = 0; edge < num_edges; ++edge) { + printf("Edge ID [%d] : Mask [%d]\n", edge, mst_mask[edge]); + } + + // clean up + if (graph_i) { free(graph_i); } + if (graph_o) { free(graph_o); } + + return 0; } diff --git a/shared_lib_tests/test_pr.c b/shared_lib_tests/test_pr.c index 0b8ceae0c..cb36b4df1 100644 --- a/shared_lib_tests/test_pr.c +++ b/shared_lib_tests/test_pr.c @@ -1,74 +1,62 @@ /** * @brief PR test for shared library * @file test_pr.c - * - * set input graph, configs and call function gunrock_pr_func - * return per node or per edge values in graph_out node_values */ #include #include -int main(int argc, char* argv[]) -{ - // define data types - struct GunrockDataType data_type; - data_type.VTXID_TYPE = VTXID_INT; //!< integer type vertex_ids - data_type.SIZET_TYPE = SIZET_INT; //!< integer type graph size - data_type.VALUE_TYPE = VALUE_FLOAT; //!< float type value for pr - - // pr configurations (optional) - struct GunrockConfig pr_config; - pr_config.device = 0; //!< use device 0 - pr_config.delta = 0.85f; //!< default delta value - pr_config.error = 0.01f; //!< default error threshold - pr_config.max_iter = 20; //!< maximum number of iterations - pr_config.top_nodes = 10; //!< number of top nodes - pr_config.src_node = 0; //!< source node to begin page rank - pr_config.src_mode = manually; //!< set source node manually - - // define graph (undirected graph) - size_t num_nodes = 7; - size_t num_edges = 15; - int row_offsets[8] = {0,3,6,9,11,14,15,15}; - int col_indices[15] = {1,2,3,0,2,4,3,4,5,5,6,2,5,6,6}; - - // build graph as input - struct GunrockGraph *graph_input = - (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); - graph_input->num_nodes = num_nodes; - graph_input->num_edges = num_edges; - graph_input->row_offsets = (void*)&row_offsets[0]; - graph_input->col_indices = (void*)&col_indices[0]; - - // malloc output graph - struct GunrockGraph *graph_output = - (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); - int *node_ids = (int*)malloc(sizeof(int) * pr_config.top_nodes); - float *page_rank = (float*)malloc(sizeof(float) * pr_config.top_nodes); - - // run pr calculations - gunrock_pr_func( - graph_output, - node_ids, - page_rank, - graph_input, - pr_config, - data_type); - - // test print - int i; - printf("Demo Outputs:\n"); - if (pr_config.top_nodes > num_nodes) pr_config.top_nodes = num_nodes; - for (i = 0; i < pr_config.top_nodes; ++i) - { - printf("Node ID [%d] : Page Rank [%f] \n", node_ids[i], page_rank[i]); - } - - if (node_ids) { free(node_ids); } - if (page_rank) { free(page_rank); } - if (graph_input) { free(graph_input); } - if (graph_output) { free(graph_output); } - - return 0; +int main(int argc, char* argv[]) { + // define data types + struct GRTypes data_t; + data_t.VTXID_TYPE = VTXID_INT; // integer type vertex_ids + data_t.SIZET_TYPE = SIZET_INT; // integer type graph size + data_t.VALUE_TYPE = VALUE_FLOAT; // float type value for pr + + // pr configurations (optional) + struct GRSetup config; + config.device = 0; // use device 0 + config.delta = 0.85f; // default delta value + config.error = 0.01f; // default error threshold + config.max_iter = 20; // maximum number of iterations + config.top_nodes = 10; // number of top nodes + config.src_node = 0; // source node to begin page rank + config.src_mode = manually; // set source node manually + + // define graph (undirected graph) + size_t num_nodes = 7; + size_t num_edges = 15; + int row_offsets[8] = {0, 3, 6, 9, 11, 14, 15, 15}; + int col_indices[15] = {1, 2, 3, 0, 2, 4, 3, 4, 5, 5, 6, 2, 5, 6, 6}; + + // build graph as input + struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + graph_i->num_nodes = num_nodes; + graph_i->num_edges = num_edges; + graph_i->row_offsets = (void*)&row_offsets[0]; + graph_i->col_indices = (void*)&col_indices[0]; + + // malloc output graph + struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + int *node_ids = (int*)malloc(sizeof(int) * config.top_nodes); + float *pagerank = (float*)malloc(sizeof(float) * config.top_nodes); + + // run pr calculations + gunrock_pagerank(graph_o, node_ids, pagerank, graph_i, config, data_t); + + // test print + int i; + printf("Demo Outputs:\n"); + if (config.top_nodes > num_nodes) config.top_nodes = num_nodes; + for (i = 0; i < config.top_nodes; ++i) { + printf("Node ID [%d] : Page Rank [%f] \n", node_ids[i], pagerank[i]); + } + + // clean up + if (node_ids) { free(node_ids); } + if (pagerank) { free(pagerank); } + if (graph_i) { free(graph_i); } + if (graph_o) { free(graph_o); } + + return 0; } diff --git a/shared_lib_tests/test_sssp.c b/shared_lib_tests/test_sssp.c index f4fc0fe5b..e22370a3d 100644 --- a/shared_lib_tests/test_sssp.c +++ b/shared_lib_tests/test_sssp.c @@ -1,75 +1,63 @@ /** * @brief SSSP test for shared library * @file test_sssp.c - * - * set input graph, configs and call function gunrock_sssp_func - * return per node or per edge values in graph_out node_values */ #include #include -int main(int argc, char* argv[]) -{ - // define data types - struct GunrockDataType data_type; - data_type.VTXID_TYPE = VTXID_INT; - data_type.SIZET_TYPE = SIZET_INT; - data_type.VALUE_TYPE = VALUE_UINT; - - // pr configurations (optional) - struct GunrockConfig sssp_config; - sssp_config.device = 0; - sssp_config.mark_pred = true; - sssp_config.queue_size = 1.0f; - sssp_config.delta_factor = 1; - sssp_config.src_mode = randomize; - //sssp_config.src_node = 1; - - // define graph - size_t num_nodes = 7; - size_t num_edges = 15; - - int row_offsets[8] = {0,3,6,9,11,14,15,15}; - int col_indices[15] = {1,2,3,0,2,4,3,4,5,5,6,2,5,6,6}; - unsigned int edge_values[15] = {39,6,41,51,63,17,10,44,41,13,58,43,50,59,35}; - - // build graph as input - struct GunrockGraph *graph_input = - (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); - graph_input->num_nodes = num_nodes; - graph_input->num_edges = num_edges; - graph_input->row_offsets = (void*)&row_offsets[0]; - graph_input->col_indices = (void*)&col_indices[0]; - graph_input->edge_values = (void*)&edge_values[0]; - - // malloc output graph - struct GunrockGraph *graph_output = - (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); - int *predecessor = (int*)malloc(sizeof(int) * num_nodes); - - // run sssp calculations - gunrock_sssp_func( - graph_output, - predecessor, - graph_input, - sssp_config, - data_type); - - // test print - int i; - printf("Demo Outputs:\n"); - int *label = (int*)malloc(sizeof(int) * num_nodes); - label = (int*)graph_output->node_values; - for (i = 0; i < num_nodes; ++i) - { - printf("Node ID [%d] : Label [%d] : Predecessor [%d]\n", - i, label[i], predecessor[i]); - } - - if (predecessor) { free(predecessor); } - if (graph_input) { free(graph_input); } - if (graph_output) { free(graph_output); } - - return 0; +int main(int argc, char* argv[]) { + // define data types + struct GRTypes data_t; + data_t.VTXID_TYPE = VTXID_INT; + data_t.SIZET_TYPE = SIZET_INT; + data_t.VALUE_TYPE = VALUE_UINT; + + // configurations (optional) + struct GRSetup config; + config.device = 0; + config.mark_pred = true; + config.queue_size = 1.0f; + config.delta_factor = 1; + config.src_mode = randomize; + + // define graph + size_t num_nodes = 7; + size_t num_edges = 15; + + int row_offsets[8] = {0, 3, 6, 9, 11, 14, 15, 15}; + int col_indices[15] = {1, 2, 3, 0, 2, 4, 3, 4, 5, 5, 6, 2, 5, 6, 6}; + unsigned int edge_values[15] = {39, 6, 41, 51, 63, 17, 10, 44, 41, 13, 58, 43, 50, 59, 35}; + + // build graph as input + struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + graph_i->num_nodes = num_nodes; + graph_i->num_edges = num_edges; + graph_i->row_offsets = (void*)&row_offsets[0]; + graph_i->col_indices = (void*)&col_indices[0]; + graph_i->edge_values = (void*)&edge_values[0]; + + // malloc output graph + struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + int *predecessor = (int*)malloc(sizeof(int) * num_nodes); + + // run calculations + gunrock_sssp(graph_o, predecessor, graph_i, config, data_t); + + // demo test print + printf("Demo Outputs:\n"); + int *label = (int*)malloc(sizeof(int) * num_nodes); + label = (int*)graph_o->node_values; + int node; + for (node = 0; node < num_nodes; ++node) { + printf("Node ID [%d] : Label [%d] : Predecessor [%d]\n", + node, label[node], predecessor[node]); + } + + // clean up + if (predecessor) { free(predecessor); } + if (graph_i) { free(graph_i); } + if (graph_o) { free(graph_o); } + + return 0; } diff --git a/shared_lib_tests/test_topk.c b/shared_lib_tests/test_topk.c index 1feea5e97..416fe9f7c 100644 --- a/shared_lib_tests/test_topk.c +++ b/shared_lib_tests/test_topk.c @@ -1,68 +1,65 @@ +/** + * @brief Top K test for shared library + * @file test_topk.c + */ + #include #include -int main(int argc, char* argv[]) -{ - // define data types - struct GunrockDataType data_type; - data_type.VTXID_TYPE = VTXID_INT; - data_type.SIZET_TYPE = SIZET_INT; - data_type.VALUE_TYPE = VALUE_INT; +int main(int argc, char* argv[]) { + // define data types + struct GRTypes data_t; + data_t.VTXID_TYPE = VTXID_INT; + data_t.SIZET_TYPE = SIZET_INT; + data_t.VALUE_TYPE = VALUE_INT; + + struct GRSetup config; + config.device = 0; + config.top_nodes = 3; - struct GunrockConfig topk_config; - topk_config.device = 0; - topk_config.top_nodes = 3; + // define graph (directed, reversed and non-reversed) + size_t num_nodes = 7; + size_t num_edges = 15; - // define graph (directed, reversed and non-reversed) - size_t num_nodes = 7; - size_t num_edges = 15; + int row_offsets[8] = {0, 3, 6, 9, 11, 14, 15, 15}; + int col_indices[15] = {1, 2, 3, 0, 2, 4, 3, 4, 5, 5, 6, 2, 5, 6, 6}; - int row_offsets[8] = {0,3,6,9,11,14,15,15}; - int col_indices[15] = {1,2,3,0,2,4,3,4,5,5,6,2,5,6,6}; + int col_offsets[8] = {0, 1, 2, 5, 7, 9, 12, 15}; + int row_indices[15] = {1, 0, 0, 1, 4, 0, 2, 1, 2, 2, 3, 4, 3, 4, 5}; - int col_offsets[8] = {0,1,2,5,7,9,12,15}; - int row_indices[15] = {1,0,0,1,4,0,2,1,2,2,3,4,3,4,5}; + // build graph as input + struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + graph_i->num_nodes = num_nodes; + graph_i->num_edges = num_edges; + graph_i->row_offsets = (void*)&row_offsets[0]; + graph_i->col_indices = (void*)&col_indices[0]; + graph_i->col_offsets = (void*)&col_offsets[0]; + graph_i->row_indices = (void*)&row_indices[0]; - // build graph as input - struct GunrockGraph *graph_input = - (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); - graph_input->num_nodes = num_nodes; - graph_input->num_edges = num_edges; - graph_input->row_offsets = (void*)&row_offsets[0]; - graph_input->col_indices = (void*)&col_indices[0]; - graph_input->col_offsets = (void*)&col_offsets[0]; - graph_input->row_indices = (void*)&row_indices[0]; + // malloc output result arrays + struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + int *node_ids = (int*)malloc(sizeof(int) * config.top_nodes); + int *in_degrees = (int*)malloc(sizeof(int) * config.top_nodes); + int *out_degrees = (int*)malloc(sizeof(int) * config.top_nodes); - // malloc output result arrays - struct GunrockGraph *graph_output = - (struct GunrockGraph*)malloc(sizeof(struct GunrockGraph)); - int *node_ids = (int*)malloc(sizeof(int) * topk_config.top_nodes); - int *in_degrees = (int*)malloc(sizeof(int) * topk_config.top_nodes); - int *out_degrees = (int*)malloc(sizeof(int) * topk_config.top_nodes); + // run topk calculations + gunrock_topk( + graph_o, node_ids, in_degrees, out_degrees, graph_i, config, data_t); - // run topk calculations - gunrock_topk_func( - graph_output, - node_ids, - in_degrees, - out_degrees, - graph_input, - topk_config, - data_type); + // print results for check correctness + printf("Demo Outputs:\n"); + int node; + for (node = 0; node < config.top_nodes; ++node) { + printf("Node ID [%d] : in_degrees [%d] : out_degrees [%d] \n", + node_ids[node], in_degrees[node], out_degrees[node]); + } - // print results for check correctness - int i; - printf("Demo Outputs:\n"); - for (i = 0; i < topk_config.top_nodes; ++i) - { - printf("Node ID [%d] : in_degrees [%d] : out_degrees [%d] \n", - node_ids[i], in_degrees[i], out_degrees[i]); - } + // clean up + if (in_degrees) free(in_degrees); + if (out_degrees) free(out_degrees); + if (node_ids) free(node_ids); + if (graph_i) free(graph_i); + if (graph_o) free(graph_o); - if (in_degrees) free(in_degrees); - if (out_degrees) free(out_degrees); - if (node_ids) free(node_ids); - if (graph_input) free(graph_input); - if (graph_output) free(graph_output); - return 0; + return 0; } \ No newline at end of file diff --git a/tests/hits/CMakeLists.txt b/tests/hits/CMakeLists.txt index 2ba54cb95..ef9e22ff5 100644 --- a/tests/hits/CMakeLists.txt +++ b/tests/hits/CMakeLists.txt @@ -12,7 +12,7 @@ set (mgpu_SOURCE_FILES ${mgpu_SOURCE_DIRS}/mgpucontext.cu ${mgpu_SOURCE_DIRS}/mgpuutil.cpp) -CUDA_ADD_EXECUTABLE(hyperlink_induced_topic_search +CUDA_ADD_EXECUTABLE(HITS test_hits.cu ${CMAKE_SOURCE_DIR}/gunrock/util/test_utils.cu ${CMAKE_SOURCE_DIR}/gunrock/util/error_utils.cu diff --git a/tests/mst/test_mst.cu b/tests/mst/test_mst.cu index b563517ac..7c9eb5768 100644 --- a/tests/mst/test_mst.cu +++ b/tests/mst/test_mst.cu @@ -140,7 +140,7 @@ void DisplaySolution(const Csr &graph, int *mst_output) template bool IsConnected(const Csr & graph) { - GunrockGraph *temp = (GunrockGraph*)malloc(sizeof(GunrockGraph)); + GRGraph *temp = (GRGraph*)malloc(sizeof(GRGraph)); unsigned int *components = (unsigned int*)malloc(sizeof(unsigned int)); run_cc(temp, components, graph, 0, 1); if (temp) free(temp); From e75447fe76517d827ae234ec9c17689b2e43cd16 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Wed, 24 Jun 2015 11:25:43 -0700 Subject: [PATCH 27/36] a new simpler interface takes in csr --- gunrock/app/bfs/bfs_app.cu | 59 ++++++++++++++++++++---- gunrock/gunrock.h | 9 ++++ shared_lib_tests/simple_interface_test.c | 27 +++++++++++ 3 files changed, 87 insertions(+), 8 deletions(-) create mode 100644 shared_lib_tests/simple_interface_test.c diff --git a/gunrock/app/bfs/bfs_app.cu b/gunrock/app/bfs/bfs_app.cu index 1fe0300b5..d02a51551 100644 --- a/gunrock/app/bfs/bfs_app.cu +++ b/gunrock/app/bfs/bfs_app.cu @@ -46,23 +46,23 @@ using namespace gunrock::app::bfs; * @param[in] context Reference to CudaContext used by moderngpu functions * */ -template +template void run_bfs( GRGraph *graph_o, - const Csr &csr, - const VertexId src, + const Csr &csr, + const VertexId src, const int max_grid_size, const int num_gpus, const double max_queue_sizing, CudaContext &context) { - typedef BFSProblem Problem; // Allocate host-side label array for gpu-computed results VertexId *h_labels = (VertexId*)malloc(sizeof(VertexId) * csr.nodes); VertexId *h_preds = NULL; if (MARK_PREDECESSORS) { - //h_preds = (VertexId*)malloc(sizeof(VertexId) * csr.nodes); + // h_preds = (VertexId*)malloc(sizeof(VertexId) * csr.nodes); } BFSEnactor enactor(false); // Allocate BFS enactor map @@ -75,17 +75,23 @@ void run_bfs( src, enactor.GetFrontierType(), max_queue_sizing), "BFS Problem Data Reset Failed", __FILE__, __LINE__); + GpuTimer gpu_timer; + float elapsed = 0.0f; + gpu_timer.Start(); util::GRError(enactor.template Enact( context, problem, src, max_grid_size), "BFS Problem Enact Failed", __FILE__, __LINE__); + gpu_timer.Stop(); + elapsed = gpu_timer.ElapsedMillis(); util::GRError(problem->Extract(h_labels, h_preds), "BFS Problem Data Extraction Failed", __FILE__, __LINE__); - graph_o->node_values = (int*)&h_labels[0]; // label per node to GRGraph struct + graph_o->node_values = (int*)&h_labels[0]; // label per node to graph_o + printf(" elapsed time: %.4f ms\n", elapsed); if (problem) delete problem; - //if (h_preds) free(h_preds); + // if (h_preds) free(h_preds); cudaDeviceSynchronize(); } @@ -233,6 +239,43 @@ void gunrock_bfs( dispatch_bfs(graph_o, graph_i, config, data_t, *context); } +/* + * @brief bfs interface take in CSR arrays as input + */ +void bfs( + int *bfs_label, + const int num_nodes, + const int num_edges, + const int *row_offsets, + const int *col_indices, + const int source, + const int device) { + printf("-------------------- setting --------------------\n"); + struct GRTypes data_t; // primitive-specific data types + data_t.VTXID_TYPE = VTXID_INT; // integer + data_t.SIZET_TYPE = SIZET_INT; // integer + data_t.VALUE_TYPE = VALUE_INT; // integer + struct GRSetup config; // primitive-specific configures + config.device = device; // setting device to run + config.src_node = source; // source vertex to begin + config.mark_pred = false; // do not mark predecessors + config.idempotence = false; // wether enable idempotence + config.queue_size = 1.0f; // maximum queue size factor + struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + graph_i->num_nodes = num_nodes; + graph_i->num_edges = num_edges; + graph_i->row_offsets = (void*)&row_offsets[0]; + graph_i->col_indices = (void*)&col_indices[0]; + printf(" loaded num nodes: %d, num edges: %d\n", num_nodes, num_edges); + printf("-------------------- running --------------------\n"); + gunrock_bfs(graph_o, graph_i, config, data_t); + memcpy(bfs_label, (int*)graph_o->node_values, num_nodes * sizeof(int)); + printf("-------------------- cleanup --------------------\n"); + if (graph_i) free(graph_i); + if (graph_o) free(graph_o); +} + // Leave this at the end of the file // Local Variables: // mode:c++ diff --git a/gunrock/gunrock.h b/gunrock/gunrock.h index 441d77a85..87caf4a19 100644 --- a/gunrock/gunrock.h +++ b/gunrock/gunrock.h @@ -101,6 +101,15 @@ void gunrock_bfs( struct GRSetup config, struct GRTypes data_t); +void bfs( + int *bfs_label, + const int num_nodes, + const int num_edges, + const int *row, + const int *col, + const int src, + const int dev); + // betweenness centrality void gunrock_bc( struct GRGraph *graph_o, diff --git a/shared_lib_tests/simple_interface_test.c b/shared_lib_tests/simple_interface_test.c new file mode 100644 index 000000000..d9d8b3ae0 --- /dev/null +++ b/shared_lib_tests/simple_interface_test.c @@ -0,0 +1,27 @@ +/** + * @brief Simple test for shared library simple interface + * @file simple_interface_test.c + */ + +#include +#include + +int main(int argc, char* argv[]) { + int row_offsets[] = {0, 3, 6, 9, 11, 14, 15, 15}; + int col_indices[] = {1, 2, 3, 0, 2, 4, 3, 4, 5, 5, 6, 2, 5, 6, 6}; + size_t num_nodes = sizeof(row_offsets) / sizeof(row_offsets[0]) - 1; + size_t num_edges = sizeof(col_indices) / sizeof(col_indices[0]); + + int *labels = (int*)malloc(sizeof(int) * num_nodes); + + // test simple breath-first search interface + bfs(labels, num_nodes, num_edges, row_offsets, col_indices, 0, 0); + printf("-------------------- outputs --------------------\n"); + int node; for (node = 0; node < num_nodes; ++node) { + printf(" node: [%d] | label (depth): [%d]\n", node, labels[node]); + } + printf("------------------- completed -------------------\n"); + + if (labels) { free(labels); } + return 0; +} From 16248352adbfa7e0b14e3d79855fb4e9389d04b0 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Wed, 24 Jun 2015 11:26:09 -0700 Subject: [PATCH 28/36] update cmakelist --- shared_lib_tests/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/shared_lib_tests/CMakeLists.txt b/shared_lib_tests/CMakeLists.txt index 96d2ee37a..7d880fae6 100644 --- a/shared_lib_tests/CMakeLists.txt +++ b/shared_lib_tests/CMakeLists.txt @@ -1,6 +1,9 @@ # gunrock test rig cmake file # include_directories(${gunrock_INCLUDE_DIRS}/gunrock) +add_executable(simple_interface_test simple_interface_test.c) +target_link_libraries(simple_interface_test gunrock) + add_executable(test_topk test_topk.c) target_link_libraries(test_topk gunrock) From ac1d96360b9f414bad1a213f94836aa8df887d91 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Wed, 24 Jun 2015 20:22:15 -0700 Subject: [PATCH 29/36] update several interfaces --- gunrock/app/bfs/bfs_app.cu | 22 ++-- gunrock/gunrock.h | 124 ++++++++++++----------- shared_lib_tests/simple_interface_test.c | 2 +- 3 files changed, 72 insertions(+), 76 deletions(-) diff --git a/gunrock/app/bfs/bfs_app.cu b/gunrock/app/bfs/bfs_app.cu index d02a51551..627e52554 100644 --- a/gunrock/app/bfs/bfs_app.cu +++ b/gunrock/app/bfs/bfs_app.cu @@ -52,7 +52,6 @@ void run_bfs( GRGraph *graph_o, const Csr &csr, const VertexId src, - const int max_grid_size, const int num_gpus, const double max_queue_sizing, CudaContext &context) { @@ -75,14 +74,12 @@ void run_bfs( src, enactor.GetFrontierType(), max_queue_sizing), "BFS Problem Data Reset Failed", __FILE__, __LINE__); - GpuTimer gpu_timer; - float elapsed = 0.0f; - gpu_timer.Start(); - util::GRError(enactor.template Enact( - context, problem, src, max_grid_size), + GpuTimer gpu_timer; float elapsed = 0.0f; gpu_timer.Start(); // start timer + + util::GRError(enactor.template Enact(context, problem, src), "BFS Problem Enact Failed", __FILE__, __LINE__); - gpu_timer.Stop(); - elapsed = gpu_timer.ElapsedMillis(); + + gpu_timer.Stop(); elapsed = gpu_timer.ElapsedMillis(); // calculate elapsed util::GRError(problem->Extract(h_labels, h_preds), "BFS Problem Data Extraction Failed", __FILE__, __LINE__); @@ -90,8 +87,8 @@ void run_bfs( graph_o->node_values = (int*)&h_labels[0]; // label per node to graph_o printf(" elapsed time: %.4f ms\n", elapsed); - if (problem) delete problem; - // if (h_preds) free(h_preds); + if (problem) { delete problem; } + if (h_preds) { free(h_preds); } cudaDeviceSynchronize(); } @@ -126,7 +123,6 @@ void dispatch_bfs( // default configurations int src_node = 0; // default source vertex to start int num_gpus = 1; // number of GPUs for multi-gpu - int max_grid_size = 0; // leave it up to the enactor bool mark_pred = 0; // whether to mark predecessor or not bool idempotence = 0; // whether or not enable idempotence float max_queue_sizing = 1.0f; // maximum size scaling factor @@ -161,7 +157,6 @@ void dispatch_bfs( graph_o, csr_graph, src_node, - max_grid_size, num_gpus, max_queue_sizing, context); @@ -170,7 +165,6 @@ void dispatch_bfs( graph_o, csr_graph, src_node, - max_grid_size, num_gpus, max_queue_sizing, context); @@ -181,7 +175,6 @@ void dispatch_bfs( graph_o, csr_graph, src_node, - max_grid_size, num_gpus, max_queue_sizing, context); @@ -190,7 +183,6 @@ void dispatch_bfs( graph_o, csr_graph, src_node, - max_grid_size, num_gpus, max_queue_sizing, context); diff --git a/gunrock/gunrock.h b/gunrock/gunrock.h index 87caf4a19..a50309615 100644 --- a/gunrock/gunrock.h +++ b/gunrock/gunrock.h @@ -12,7 +12,6 @@ * The Gunrock public interface is a C-only interface to enable linking * with code written in other languages. While the internals of Gunrock * are not limited to C. - * */ #include @@ -54,14 +53,14 @@ struct GRTypes { * @brief GunrockGraph as a standard graph interface */ struct GRGraph { - size_t num_nodes; // number of nodes in graph - size_t num_edges; // number of edges in graph - void *row_offsets; // CSR row offsets - void *col_indices; // CSR column indices - void *col_offsets; // CSC column offsets - void *row_indices; // CSC row indices - void *node_values; // associated values per node - void *edge_values; // associated values per edge + size_t num_nodes; // number of nodes in graph + size_t num_edges; // number of edges in graph + void *row_offsets; // CSR row offsets + void *col_indices; // CSR column indices + void *col_offsets; // CSC column offsets + void *row_indices; // CSC row indices + void *node_values; // associated values per node + void *edge_values; // associated values per edge }; /** @@ -77,17 +76,17 @@ enum SrcMode { * @brief arguments configuration used to specify arguments */ struct GRSetup { - bool mark_pred; // whether to mark predecessor or not - bool idempotence; // whether or not to enable idempotent - int src_node; // source vertex define where to start - int device; // setting which device to use - int max_iter; // maximum number of iterations allowed - int top_nodes; // k value for top k / pagerank problem - int delta_factor; // sssp delta-factor parameter - float delta; // pagerank specific value - float error; // pagerank specific value - float queue_size; // setting frontier queue size - enum SrcMode src_mode; // source mode rand/largest_degree + bool mark_pred; // whether to mark predecessor or not + bool idempotence; // whether or not to enable idempotent + int src_node; // source vertex define where to start + int device; // setting which device to use + int max_iter; // maximum number of iterations allowed + int top_nodes; // k value for top k / pagerank problem + int delta_factor; // sssp delta-factor parameter + float delta; // pagerank specific value + float error; // pagerank specific value + float queue_size; // setting frontier queue size + enum SrcMode src_mode; // source mode rand/largest_degree }; #ifdef __cplusplus @@ -96,68 +95,73 @@ extern "C" { // breath-first search void gunrock_bfs( - struct GRGraph *graph_o, - const struct GRGraph *graph_i, - struct GRSetup config, - struct GRTypes data_t); + struct GRGraph* graph_o, + const struct GRGraph* graph_i, + const struct GRSetup config, + const struct GRTypes data_t); +// simple interface void bfs( - int *bfs_label, + int* bfs_label, const int num_nodes, const int num_edges, - const int *row, - const int *col, - const int src, - const int dev); + const int* row_offsets, + const int* col_indices, + const int source, + const int device); // betweenness centrality void gunrock_bc( - struct GRGraph *graph_o, - const struct GRGraph *graph_i, - struct GRSetup config, - struct GRTypes data_t); + struct GRGraph* graph_o, + const struct GRGraph* graph_i, + const struct GRSetup config, + const struct GRTypes data_t); // connected component void gunrock_cc( - struct GRGraph *graph_o, - unsigned int *components, - const struct GRGraph *graph_i, - struct GRSetup config, - struct GRTypes data_t); + struct GRGraph* graph_o, + unsigned int* components, + const struct GRGraph* graph_i, + const struct GRSetup config, + const struct GRTypes data_t); +/* +int cc(int *component, const int num_nodes, const int num_edges, + const int *offsets, const int *indices, const int device); +*/ // single-source shortest path void gunrock_sssp( - struct GRGraph *graph_o, - void *predecessor, - const struct GRGraph *graph_i, - struct GRSetup config, - struct GRTypes data_t); + struct GRGraph* graph_o, + void* predecessor, + const struct GRGraph* graph_i, + const struct GRSetup config, + const struct GRTypes data_t); // page-rank void gunrock_pagerank( - struct GRGraph *graph_o, - void *node_ids, - void *pagerank, - const struct GRGraph *graph_i, - struct GRSetup config, - struct GRTypes data_t); + struct GRGraph* graph_o, + void* node_ids, + void* pagerank, + const struct GRGraph* graph_i, + const struct GRSetup config, + const struct GRTypes data_t); // degree centrality void gunrock_topk( - struct GRGraph *graph_o, - void *node_ids, - void *in_degrees, - void *out_degrees, - const struct GRGraph *graph_i, - struct GRSetup config, - struct GRTypes data_t); + struct GRGraph* graph_o, + void* node_ids, + void* in_degrees, + void* out_degrees, + const struct GRGraph* graph_i, + const struct GRSetup config, + const struct GRTypes data_t); // minimum spanning tree void gunrock_mst( - struct GRGraph *graph_o, - const struct GRGraph *graph_i, - struct GRSetup config, - struct GRTypes data_t); + struct GRGraph* graph_o, + const struct GRGraph* graph_i, + const struct GRSetup config, + const struct GRTypes data_t); // TODO(ydwu): Add other primitives diff --git a/shared_lib_tests/simple_interface_test.c b/shared_lib_tests/simple_interface_test.c index d9d8b3ae0..2d4842e94 100644 --- a/shared_lib_tests/simple_interface_test.c +++ b/shared_lib_tests/simple_interface_test.c @@ -14,7 +14,7 @@ int main(int argc, char* argv[]) { int *labels = (int*)malloc(sizeof(int) * num_nodes); - // test simple breath-first search interface + printf(" testing breath-first search ...\n"); // test bfs bfs(labels, num_nodes, num_edges, row_offsets, col_indices, 0, 0); printf("-------------------- outputs --------------------\n"); int node; for (node = 0; node < num_nodes; ++node) { From fbf72be8cc68ca966966f9fd158f2488fa1dc1c5 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Thu, 25 Jun 2015 08:25:10 -0700 Subject: [PATCH 30/36] added BC, CC simple interface --- gunrock/app/bc/bc_app.cu | 58 +++++++++++++++++- gunrock/app/bfs/bfs_app.cu | 76 ++++++++++++++---------- gunrock/app/cc/cc_app.cu | 63 ++++++++++++++++++-- gunrock/gunrock.h | 64 ++++++++++++++++---- shared_lib_tests/simple_interface_test.c | 29 ++++++--- 5 files changed, 230 insertions(+), 60 deletions(-) diff --git a/gunrock/app/bc/bc_app.cu b/gunrock/app/bc/bc_app.cu index af413d79b..6d1379b73 100644 --- a/gunrock/app/bc/bc_app.cu +++ b/gunrock/app/bc/bc_app.cu @@ -45,13 +45,13 @@ using namespace gunrock::app::bc; */ template void run_bc( - GRGraph *graph_o, - const Csr &csr, + GRGraph* graph_o, + const Csr& csr, const VertexId source, const int max_grid_size, const int num_gpus, const double max_queue_sizing, - CudaContext &context) { + CudaContext& context) { typedef BCProblem Problem; // Allocate host-side array (for both reference and gpu-computed results) Value *h_sigmas = (Value*)malloc(sizeof(Value) * csr.nodes); @@ -63,6 +63,8 @@ void run_bc( util::GRError(problem->Init(false, csr, num_gpus), "BC Problem Initialization Failed", __FILE__, __LINE__); + GpuTimer gpu_timer; float elapsed = 0.0f; gpu_timer.Start(); // start timer + VertexId start_source; VertexId end_source; if (source == -1) { @@ -85,6 +87,9 @@ void run_bc( util::MemsetScaleKernel <<< 128, 128>>>( problem->data_slices[0]->d_bc_values, (Value)0.5f, (int)csr.nodes); + gpu_timer.Stop(); elapsed = gpu_timer.ElapsedMillis(); // calculate elapsed + printf(" device elapsed time: %.4f ms\n", elapsed); + util::GRError(problem->Extract(h_sigmas, h_bc_values, h_ebc_values), "BC Problem Data Extraction Failed", __FILE__, __LINE__); @@ -204,6 +209,53 @@ void gunrock_bc( dispatch_bc(graph_o, graph_i, config, data_t, *context); } +/* + * @brief Simple interface take in CSR arrays as input + * @param[out] bfs_label Return BC node centrality per nodes + * @param[in] num_nodes Number of nodes of the input graph + * @param[in] num_edges Number of edges of the input graph + * @param[in] row_offsets CSR-formatted graph input row offsets + * @param[in] col_indices CSR-formatted graph input column indices + * @param[in] source Source to begin traverse + */ +void bc( + float* bc_scores, + const int num_nodes, + const int num_edges, + const int* row_offsets, + const int* col_indices, + const int source) { + printf("-------------------- setting --------------------\n"); + + struct GRTypes data_t; // primitive-specific data types + data_t.VTXID_TYPE = VTXID_INT; // integer + data_t.SIZET_TYPE = SIZET_INT; // integer + data_t.VALUE_TYPE = VALUE_FLOAT; // float BC scores + + struct GRSetup config; // primitive-specific configures + config.device = 0; // setting device to run + config.src_node = source; // source vertex to begin + config.queue_size = 1.0f; // maximum queue size factor + + struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + graph_i->num_nodes = num_nodes; + graph_i->num_edges = num_edges; + graph_i->row_offsets = (void*)&row_offsets[0]; + graph_i->col_indices = (void*)&col_indices[0]; + + printf(" loaded %d nodes and %d edges\n", num_nodes, num_edges); + + printf("-------------------- running --------------------\n"); + gunrock_bc(graph_o, graph_i, config, data_t); + memcpy(bc_scores, (float*)graph_o->node_values, num_nodes * sizeof(float)); + + if (graph_i) free(graph_i); + if (graph_o) free(graph_o); + + printf("------------------- completed -------------------\n"); +} + // Leave this at the end of the file // Local Variables: // mode:c++ diff --git a/gunrock/app/bfs/bfs_app.cu b/gunrock/app/bfs/bfs_app.cu index 627e52554..b0421f48b 100644 --- a/gunrock/app/bfs/bfs_app.cu +++ b/gunrock/app/bfs/bfs_app.cu @@ -49,15 +49,15 @@ using namespace gunrock::app::bfs; template void run_bfs( - GRGraph *graph_o, - const Csr &csr, + GRGraph* graph_o, + const Csr& csr, const VertexId src, - const int num_gpus, - const double max_queue_sizing, - CudaContext &context) { + const int num_gpus, + const double max_queue_sizing, + CudaContext& context) { typedef BFSProblem Problem; - // Allocate host-side label array for gpu-computed results + // Allocate host-side label array for GPU-computed results VertexId *h_labels = (VertexId*)malloc(sizeof(VertexId) * csr.nodes); VertexId *h_preds = NULL; if (MARK_PREDECESSORS) { @@ -80,12 +80,12 @@ void run_bfs( "BFS Problem Enact Failed", __FILE__, __LINE__); gpu_timer.Stop(); elapsed = gpu_timer.ElapsedMillis(); // calculate elapsed - + printf(" device elapsed time: %.4f ms\n", elapsed); + util::GRError(problem->Extract(h_labels, h_preds), "BFS Problem Data Extraction Failed", __FILE__, __LINE__); graph_o->node_values = (int*)&h_labels[0]; // label per node to graph_o - printf(" elapsed time: %.4f ms\n", elapsed); if (problem) { delete problem; } if (h_preds) { free(h_preds); } @@ -102,11 +102,11 @@ void run_bfs( * @param[in] context ModernGPU context */ void dispatch_bfs( - GRGraph *graph_o, - const GRGraph *graph_i, + GRGraph* graph_o, + const GRGraph* graph_i, const GRSetup config, const GRTypes data_t, - CudaContext &context) { + CudaContext& context) { switch (data_t.VTXID_TYPE) { case VTXID_INT: { switch (data_t.SIZET_TYPE) { @@ -122,12 +122,12 @@ void dispatch_bfs( // default configurations int src_node = 0; // default source vertex to start - int num_gpus = 1; // number of GPUs for multi-gpu + int num_gpus = 1; // number of GPUs for multi-GPU bool mark_pred = 0; // whether to mark predecessor or not - bool idempotence = 0; // whether or not enable idempotence + bool idempotence = 0; // whether or not enable idempotent float max_queue_sizing = 1.0f; // maximum size scaling factor - // determine source vertex to start bfs + // determine source vertex to start switch (config.src_mode) { case randomize: { src_node = graphio::RandomNode(csr_graph.nodes); @@ -215,14 +215,14 @@ void dispatch_bfs( /* * @brief gunrock_bfs function * - * @param[out] graph_o output subgraph of bfs problem + * @param[out] graph_o output subgraph of the problem * @param[in] graph_i input graph need to process on * @param[in] config gunrock primitive specific configurations * @param[in] data_t gunrock data_t struct */ void gunrock_bfs( - GRGraph *graph_o, - const GRGraph *graph_i, + GRGraph* graph_o, + const GRGraph* graph_i, const GRSetup config, const GRTypes data_t) { unsigned int device = 0; @@ -232,40 +232,52 @@ void gunrock_bfs( } /* - * @brief bfs interface take in CSR arrays as input + * @brief Simple interface take in CSR arrays as input + * @param[out] bfs_label Return BFS labels per nodes + * @param[in] num_nodes Number of nodes of the input graph + * @param[in] num_edges Number of edges of the input graph + * @param[in] row_offsets CSR-formatted graph input row offsets + * @param[in] col_indices CSR-formatted graph input column indices + * @param[in] source Source to begin traverse */ void bfs( - int *bfs_label, + int* bfs_label, const int num_nodes, const int num_edges, - const int *row_offsets, - const int *col_indices, - const int source, - const int device) { + const int* row_offsets, + const int* col_indices, + const int source) { printf("-------------------- setting --------------------\n"); - struct GRTypes data_t; // primitive-specific data types + + struct GRTypes data_t; // primitive-specific data types data_t.VTXID_TYPE = VTXID_INT; // integer data_t.SIZET_TYPE = SIZET_INT; // integer data_t.VALUE_TYPE = VALUE_INT; // integer - struct GRSetup config; // primitive-specific configures - config.device = device; // setting device to run - config.src_node = source; // source vertex to begin - config.mark_pred = false; // do not mark predecessors - config.idempotence = false; // wether enable idempotence - config.queue_size = 1.0f; // maximum queue size factor + + struct GRSetup config; // primitive-specific configures + config.device = 0; // setting device to run + config.src_node = source; // source vertex to begin + config.mark_pred = false; // do not mark predecessors + config.idempotence = false; // whether enable idempotent + config.queue_size = 1.0f; // maximum queue size factor + struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); graph_i->num_nodes = num_nodes; graph_i->num_edges = num_edges; graph_i->row_offsets = (void*)&row_offsets[0]; graph_i->col_indices = (void*)&col_indices[0]; - printf(" loaded num nodes: %d, num edges: %d\n", num_nodes, num_edges); + + printf(" loaded %d nodes and %d edges\n", num_nodes, num_edges); + printf("-------------------- running --------------------\n"); gunrock_bfs(graph_o, graph_i, config, data_t); memcpy(bfs_label, (int*)graph_o->node_values, num_nodes * sizeof(int)); - printf("-------------------- cleanup --------------------\n"); + if (graph_i) free(graph_i); if (graph_o) free(graph_o); + + printf("------------------- completed -------------------\n"); } // Leave this at the end of the file diff --git a/gunrock/app/cc/cc_app.cu b/gunrock/app/cc/cc_app.cu index 1f49d0e2b..ccab7ad2a 100644 --- a/gunrock/app/cc/cc_app.cu +++ b/gunrock/app/cc/cc_app.cu @@ -40,9 +40,9 @@ using namespace gunrock::app::cc; */ template void run_cc( - GRGraph *graph_o, - unsigned int *components, - const Csr &csr, + GRGraph* graph_o, + unsigned int* components, + const Csr& csr, const int max_grid_size, const int num_gpus) { typedef CCProblem Problem; // double buffer @@ -60,10 +60,15 @@ void run_cc( cc_enactor.GetFrontierType()), "CC Problem Data Reset Failed", __FILE__, __LINE__); + GpuTimer gpu_timer; float elapsed = 0.0f; gpu_timer.Start(); // start timer + util::GRError(cc_enactor.template Enact( problem, max_grid_size), "CC Problem Enact Failed", __FILE__, __LINE__); + gpu_timer.Stop(); elapsed = gpu_timer.ElapsedMillis(); // calculate elapsed + printf(" device elapsed time: %.4f ms\n", elapsed); + util::GRError(problem->Extract(h_component_ids), "CC Problem Data Extraction Failed", __FILE__, __LINE__); @@ -87,9 +92,9 @@ void run_cc( * @param[in] data_t data type configurations */ void dispatch_cc( - GRGraph *graph_o, - unsigned int *components, - const GRGraph *graph_i, + GRGraph* graph_o, + unsigned int* components, + const GRGraph* graph_i, const GRSetup config, const GRTypes data_t) { switch (data_t.VTXID_TYPE) { @@ -154,6 +159,52 @@ void gunrock_cc( dispatch_cc(graph_o, components, graph_i, config, data_t); } +/* + * @brief Simple interface take in CSR arrays as input + * @param[out] components Return component ID for each node + * @param[out] num_comps Return number of components calculated + * @param[in] num_nodes Number of nodes of the input graph + * @param[in] num_edges Number of edges of the input graph + * @param[in] row_offsets CSR-formatted graph input row offsets + * @param[in] col_indices CSR-formatted graph input column indices + */ +int cc( + int* components, + const int num_nodes, + const int num_edges, + const int* row_offsets, + const int* col_indices) { + printf("-------------------- setting --------------------\n"); + + struct GRTypes data_t; // primitive-specific data types + data_t.VTXID_TYPE = VTXID_INT; // integer + data_t.SIZET_TYPE = SIZET_INT; // integer + data_t.VALUE_TYPE = VALUE_INT; // integer + + struct GRSetup config; // primitive-specific configures + config.device = 0; // setting device to run + + unsigned int num_components = 0; + struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + graph_i->num_nodes = num_nodes; + graph_i->num_edges = num_edges; + graph_i->row_offsets = (void*)&row_offsets[0]; + graph_i->col_indices = (void*)&col_indices[0]; + + printf(" loaded %d nodes and %d edges\n", num_nodes, num_edges); + + printf("-------------------- running --------------------\n"); + gunrock_cc(graph_o, &num_components, graph_i, config, data_t); + memcpy(components, (int*)graph_o->node_values, num_nodes * sizeof(int)); + + if (graph_i) free(graph_i); + if (graph_o) free(graph_o); + + printf("------------------- completed -------------------\n"); + return num_components; +} + // Leave this at the end of the file // Local Variables: // mode:c++ diff --git a/gunrock/gunrock.h b/gunrock/gunrock.h index a50309615..2511701fa 100644 --- a/gunrock/gunrock.h +++ b/gunrock/gunrock.h @@ -93,43 +93,60 @@ struct GRSetup { extern "C" { #endif -// breath-first search +/** + * breath-first search + */ void gunrock_bfs( struct GRGraph* graph_o, const struct GRGraph* graph_i, const struct GRSetup config, const struct GRTypes data_t); -// simple interface void bfs( int* bfs_label, const int num_nodes, const int num_edges, const int* row_offsets, const int* col_indices, - const int source, - const int device); + const int source); -// betweenness centrality +/** + * betweenness centrality + */ void gunrock_bc( struct GRGraph* graph_o, const struct GRGraph* graph_i, const struct GRSetup config, const struct GRTypes data_t); -// connected component +void bc( + float* bc_scores, + const int num_nodes, + const int num_edges, + const int* row_offsets, + const int* col_indices, + const int source); + +/** + * connected component + */ void gunrock_cc( struct GRGraph* graph_o, unsigned int* components, const struct GRGraph* graph_i, const struct GRSetup config, const struct GRTypes data_t); -/* -int cc(int *component, const int num_nodes, const int num_edges, - const int *offsets, const int *indices, const int device); -*/ -// single-source shortest path +int cc( + int* component, + const int num_nodes, + const int num_edges, + const int* row_offsets, + const int* col_indices); + +/** + * single-source shortest path + */ void gunrock_sssp( struct GRGraph* graph_o, void* predecessor, @@ -137,7 +154,15 @@ void gunrock_sssp( const struct GRSetup config, const struct GRTypes data_t); -// page-rank +void sssp( + int* distances, + const int num_nodes, + const int num_edges, + const int* row_offsets, + const int* col_indices, + const int source); + +// pagerank void gunrock_pagerank( struct GRGraph* graph_o, void* node_ids, @@ -146,6 +171,14 @@ void gunrock_pagerank( const struct GRSetup config, const struct GRTypes data_t); +void pagerank( + int* node_ids, + float* pagerank, + const int num_nodes, + const int num_edges, + const int* row_offsets, + const int* col_indices); + // degree centrality void gunrock_topk( struct GRGraph* graph_o, @@ -163,6 +196,13 @@ void gunrock_mst( const struct GRSetup config, const struct GRTypes data_t); +void mst( + bool* edge_mask, + const int num_nodes, + const int num_edges, + const int* row_offsets, + const int* col_indices); + // TODO(ydwu): Add other primitives #ifdef __cplusplus diff --git a/shared_lib_tests/simple_interface_test.c b/shared_lib_tests/simple_interface_test.c index 2d4842e94..baf84ff6d 100644 --- a/shared_lib_tests/simple_interface_test.c +++ b/shared_lib_tests/simple_interface_test.c @@ -7,20 +7,35 @@ #include int main(int argc, char* argv[]) { - int row_offsets[] = {0, 3, 6, 9, 11, 14, 15, 15}; - int col_indices[] = {1, 2, 3, 0, 2, 4, 3, 4, 5, 5, 6, 2, 5, 6, 6}; + int row_offsets[] = {0, 3, 6, 11, 15, 19, 23, 26}; + int col_indices[] = {1, 2, 3, 0, 2, 4, 0, 1, 3, 4, 5, 0, 2, + 5, 6, 1, 2, 5, 6, 2, 3, 4, 6, 3, 4, 5}; size_t num_nodes = sizeof(row_offsets) / sizeof(row_offsets[0]) - 1; size_t num_edges = sizeof(col_indices) / sizeof(col_indices[0]); + printf("\n testing breath-first search ...\n"); int *labels = (int*)malloc(sizeof(int) * num_nodes); - - printf(" testing breath-first search ...\n"); // test bfs - bfs(labels, num_nodes, num_edges, row_offsets, col_indices, 0, 0); - printf("-------------------- outputs --------------------\n"); + bfs(labels, num_nodes, num_edges, row_offsets, col_indices, 0); int node; for (node = 0; node < num_nodes; ++node) { printf(" node: [%d] | label (depth): [%d]\n", node, labels[node]); } - printf("------------------- completed -------------------\n"); + + printf("\n testing betweenness centrality ...\n"); + float *scores = (float*)malloc(sizeof(float) * num_nodes); + bc(scores, num_nodes, num_edges, row_offsets, col_indices, -1); + for (node = 0; node < num_nodes; ++node) { + printf(" node: [%d] | score: [%.4f]\n", node, scores[node]); + } + + printf("\n testing connected components ...\n"); + int *components = (int*)malloc(sizeof(int) * num_nodes); + int ret = cc(components, num_nodes, num_edges, row_offsets, col_indices); + printf(" total number of components: %d\n", ret); + for (node = 0; node < num_nodes; ++node) { + printf(" node: [%d] | component: [%d]\n", node, components[node]); + } + + // TODO(ydwu): add other primitive tests if (labels) { free(labels); } return 0; From 495214c187f5de3e583f7b2d9d0577d52607c90a Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Thu, 25 Jun 2015 12:12:40 -0700 Subject: [PATCH 31/36] added pr sssp and cc simple interface --- gunrock/app/bc/bc_app.cu | 13 ++-- gunrock/app/bfs/bfs_app.cu | 13 ++-- gunrock/app/cc/cc_app.cu | 13 ++-- gunrock/app/pr/pr_app.cu | 87 +++++++++++++++-------- gunrock/app/pr/pr_enactor.cuh | 9 ++- gunrock/app/sssp/sssp_app.cu | 90 +++++++++++++++++++----- gunrock/gunrock.h | 13 ++-- shared_lib_tests/simple_interface_test.c | 69 ++++++++++++++---- 8 files changed, 219 insertions(+), 88 deletions(-) diff --git a/gunrock/app/bc/bc_app.cu b/gunrock/app/bc/bc_app.cu index 6d1379b73..353d107ba 100644 --- a/gunrock/app/bc/bc_app.cu +++ b/gunrock/app/bc/bc_app.cu @@ -63,7 +63,7 @@ void run_bc( util::GRError(problem->Init(false, csr, num_gpus), "BC Problem Initialization Failed", __FILE__, __LINE__); - GpuTimer gpu_timer; float elapsed = 0.0f; gpu_timer.Start(); // start timer + GpuTimer gpu_timer; float elapsed = 0.0f; gpu_timer.Start(); // start VertexId start_source; VertexId end_source; @@ -87,13 +87,13 @@ void run_bc( util::MemsetScaleKernel <<< 128, 128>>>( problem->data_slices[0]->d_bc_values, (Value)0.5f, (int)csr.nodes); - gpu_timer.Stop(); elapsed = gpu_timer.ElapsedMillis(); // calculate elapsed + gpu_timer.Stop(); elapsed = gpu_timer.ElapsedMillis(); // elapsed time printf(" device elapsed time: %.4f ms\n", elapsed); util::GRError(problem->Extract(h_sigmas, h_bc_values, h_ebc_values), "BC Problem Data Extraction Failed", __FILE__, __LINE__); - graph_o->node_values = (float*)&h_bc_values[0]; // h_bc_values per node + graph_o->node_values = (float*)&h_bc_values[0]; // h_bc_values per node graph_o->edge_values = (float*)&h_ebc_values[0]; // h_ebc_values per edge if (problem) { delete problem; } @@ -239,17 +239,18 @@ void bc( struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + graph_i->num_nodes = num_nodes; graph_i->num_edges = num_edges; graph_i->row_offsets = (void*)&row_offsets[0]; graph_i->col_indices = (void*)&col_indices[0]; - printf(" loaded %d nodes and %d edges\n", num_nodes, num_edges); - + printf(" loaded %d nodes and %d edges\n", num_nodes, num_edges); + printf("-------------------- running --------------------\n"); gunrock_bc(graph_o, graph_i, config, data_t); memcpy(bc_scores, (float*)graph_o->node_values, num_nodes * sizeof(float)); - + if (graph_i) free(graph_i); if (graph_o) free(graph_o); diff --git a/gunrock/app/bfs/bfs_app.cu b/gunrock/app/bfs/bfs_app.cu index b0421f48b..b6313fffa 100644 --- a/gunrock/app/bfs/bfs_app.cu +++ b/gunrock/app/bfs/bfs_app.cu @@ -74,14 +74,14 @@ void run_bfs( src, enactor.GetFrontierType(), max_queue_sizing), "BFS Problem Data Reset Failed", __FILE__, __LINE__); - GpuTimer gpu_timer; float elapsed = 0.0f; gpu_timer.Start(); // start timer + GpuTimer gpu_timer; float elapsed = 0.0f; gpu_timer.Start(); // start util::GRError(enactor.template Enact(context, problem, src), "BFS Problem Enact Failed", __FILE__, __LINE__); - gpu_timer.Stop(); elapsed = gpu_timer.ElapsedMillis(); // calculate elapsed + gpu_timer.Stop(); elapsed = gpu_timer.ElapsedMillis(); // elapsed time printf(" device elapsed time: %.4f ms\n", elapsed); - + util::GRError(problem->Extract(h_labels, h_preds), "BFS Problem Data Extraction Failed", __FILE__, __LINE__); @@ -263,17 +263,18 @@ void bfs( struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + graph_i->num_nodes = num_nodes; graph_i->num_edges = num_edges; graph_i->row_offsets = (void*)&row_offsets[0]; graph_i->col_indices = (void*)&col_indices[0]; - printf(" loaded %d nodes and %d edges\n", num_nodes, num_edges); - + printf(" loaded %d nodes and %d edges\n", num_nodes, num_edges); + printf("-------------------- running --------------------\n"); gunrock_bfs(graph_o, graph_i, config, data_t); memcpy(bfs_label, (int*)graph_o->node_values, num_nodes * sizeof(int)); - + if (graph_i) free(graph_i); if (graph_o) free(graph_o); diff --git a/gunrock/app/cc/cc_app.cu b/gunrock/app/cc/cc_app.cu index ccab7ad2a..b4ac393b6 100644 --- a/gunrock/app/cc/cc_app.cu +++ b/gunrock/app/cc/cc_app.cu @@ -49,7 +49,7 @@ void run_cc( // Allocate host-side label array for gpu-computed results VertexId *h_component_ids - = (VertexId*)malloc(sizeof(VertexId) * csr.nodes); + = (VertexId*)malloc(sizeof(VertexId) * csr.nodes); CCEnactor cc_enactor(false); // Allocate CC enactor map Problem *problem = new Problem; // Allocate problem on GPU @@ -60,13 +60,13 @@ void run_cc( cc_enactor.GetFrontierType()), "CC Problem Data Reset Failed", __FILE__, __LINE__); - GpuTimer gpu_timer; float elapsed = 0.0f; gpu_timer.Start(); // start timer + GpuTimer gpu_timer; float elapsed = 0.0f; gpu_timer.Start(); // start util::GRError(cc_enactor.template Enact( problem, max_grid_size), "CC Problem Enact Failed", __FILE__, __LINE__); - gpu_timer.Stop(); elapsed = gpu_timer.ElapsedMillis(); // calculate elapsed + gpu_timer.Stop(); elapsed = gpu_timer.ElapsedMillis(); // elapsed time printf(" device elapsed time: %.4f ms\n", elapsed); util::GRError(problem->Extract(h_component_ids), @@ -187,17 +187,18 @@ int cc( unsigned int num_components = 0; struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + graph_i->num_nodes = num_nodes; graph_i->num_edges = num_edges; graph_i->row_offsets = (void*)&row_offsets[0]; graph_i->col_indices = (void*)&col_indices[0]; - printf(" loaded %d nodes and %d edges\n", num_nodes, num_edges); - + printf(" loaded %d nodes and %d edges\n", num_nodes, num_edges); + printf("-------------------- running --------------------\n"); gunrock_cc(graph_o, &num_components, graph_i, config, data_t); memcpy(components, (int*)graph_o->node_values, num_nodes * sizeof(int)); - + if (graph_i) free(graph_i); if (graph_o) free(graph_o); diff --git a/gunrock/app/pr/pr_app.cu b/gunrock/app/pr/pr_app.cu index 47a9e5862..2d6d2c376 100644 --- a/gunrock/app/pr/pr_app.cu +++ b/gunrock/app/pr/pr_app.cu @@ -51,9 +51,8 @@ template void run_pagerank( GRGraph *graph_o, VertexId *node_ids, - Value *page_rank, + Value *pagerank, const Csr &csr, - const VertexId source, const Value delta, const Value error, const SizeT max_iter, @@ -67,15 +66,19 @@ template util::GRError(problem->Init(false, csr, num_gpus), "PR Problem Initialization Failed", __FILE__, __LINE__); - util::GRError(problem->Reset( - source, delta, error, enactor.GetFrontierType()), + util::GRError(problem->Reset(0, delta, error, enactor.GetFrontierType()), "PR Problem Data Reset Failed", __FILE__, __LINE__); + GpuTimer gpu_timer; float elapsed = 0.0f; gpu_timer.Start(); // start + util::GRError(enactor.template Enact( context, problem, max_iter, max_grid_size), "PR Problem Enact Failed", __FILE__, __LINE__); - util::GRError(problem->Extract(page_rank, node_ids), + gpu_timer.Stop(); elapsed = gpu_timer.ElapsedMillis(); // elapsed time + printf(" device elapsed time: %.4f ms\n", elapsed); + + util::GRError(problem->Extract(pagerank, node_ids), "PR Problem Extraction Failed", __FILE__, __LINE__); if (problem) delete problem; @@ -122,34 +125,13 @@ void dispatch_pagerank( csr_graph.row_offsets = (int*)graph_i->row_offsets; csr_graph.column_indices = (int*)graph_i->col_indices; - // page-rank configurations + // pagerank configurations float delta = 0.85f; // default delta value float error = 0.01f; // error threshold int max_iter = 20; // maximum number of iterations int max_grid_size = 0; // 0: leave it up to the enactor int num_gpus = 1; // for multi-gpu enactor to use - int src_node = -1; // source node to start - - // determine source vertex to start sssp - switch (config.src_mode) { - case randomize: { - src_node = graphio::RandomNode(csr_graph.nodes); - break; - } - case largest_degree: { - int max_node = 0; - src_node = csr_graph.GetNodeWithHighestDegree(max_node); - break; - } - case manually: { - src_node = config.src_node; - break; - } - default: { - src_node = -1; - break; - } - } + delta = config.delta; error = config.error; max_iter = config.max_iter; @@ -159,7 +141,6 @@ void dispatch_pagerank( (int*)node_ids, (float*)pagerank, csr_graph, - src_node, delta, error, max_iter, @@ -205,6 +186,54 @@ void gunrock_pagerank( graph_o, node_ids, pagerank, graph_i, config, data_t, *context); } +/* + * @brief Simple interface take in CSR arrays as input + * @param[out] pagerank Return PageRank scores per node + * @param[in] num_nodes Number of nodes of the input graph + * @param[in] num_edges Number of edges of the input graph + * @param[in] row_offsets CSR-formatted graph input row offsets + * @param[in] col_indices CSR-formatted graph input column indices + * @param[in] source Source to begin traverse + */ +void pagerank( + int* node_ids, + float* pagerank, + const int num_nodes, + const int num_edges, + const int* row_offsets, + const int* col_indices) { + printf("-------------------- setting --------------------\n"); + + struct GRTypes data_t; // primitive-specific data types + data_t.VTXID_TYPE = VTXID_INT; // integer + data_t.SIZET_TYPE = SIZET_INT; // integer + data_t.VALUE_TYPE = VALUE_FLOAT; // float ranks + + struct GRSetup config; // primitive-specific configures + config.device = 0; // setting device to run + config.delta = 0.85f; // default delta value + config.error = 0.01f; // default error threshold + config.max_iter = 20; // maximum number of iterations + + struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + + graph_i->num_nodes = num_nodes; + graph_i->num_edges = num_edges; + graph_i->row_offsets = (void*)&row_offsets[0]; + graph_i->col_indices = (void*)&col_indices[0]; + + printf(" loaded %d nodes and %d edges\n", num_nodes, num_edges); + + printf("-------------------- running --------------------\n"); + gunrock_pagerank(graph_o, node_ids, pagerank, graph_i, config, data_t); + + if (graph_i) free(graph_i); + if (graph_o) free(graph_o); + + printf("------------------- completed -------------------\n"); +} + // Leave this at the end of the file // Local Variables: // mode:c++ diff --git a/gunrock/app/pr/pr_enactor.cuh b/gunrock/app/pr/pr_enactor.cuh index 423196b92..61dbedbd7 100644 --- a/gunrock/app/pr/pr_enactor.cuh +++ b/gunrock/app/pr/pr_enactor.cuh @@ -283,7 +283,7 @@ public: if (retval = work_progress.GetQueueLength( frontier_attribute.queue_index+1, frontier_attribute_queue_length)) break; - printf(", %d", + printf(", %lld", (long long) frontier_attribute_queue_length); } @@ -390,10 +390,9 @@ public: if (retval) break; // sort according to the rank values - util::CUBRadixSort( - false, graph_slice->nodes, - problem->data_slices[0]->d_rank_curr, - problem->data_slices[0]->d_node_ids); + MergesortPairs(problem->data_slices[0]->d_rank_curr, + problem->data_slices[0]->d_node_ids, + graph_slice->nodes, mgpu::greater(), context); if (d_scanned_edges) cudaFree(d_scanned_edges); diff --git a/gunrock/app/sssp/sssp_app.cu b/gunrock/app/sssp/sssp_app.cu index fd02d0b3c..c7621b8a0 100644 --- a/gunrock/app/sssp/sssp_app.cu +++ b/gunrock/app/sssp/sssp_app.cu @@ -46,18 +46,18 @@ using namespace gunrock::app::sssp; * @param[in] delta_factor user set * @param[in] context moderngpu context */ -template void run_sssp( - GRGraph *graph_o, - VertexId *predecessor, - const Csr &csr, + GRGraph* graph_o, + VertexId* predecessor, + const Csr& csr, const VertexId src, const int max_grid_size, const float queue_sizing, const int num_gpus, const int delta_factor, - CudaContext &context) { + CudaContext& context) { typedef SSSPProblem Problem; // Allocate host-side label array for gpu-computed results Value *h_labels = (Value*)malloc(sizeof(Value) * csr.nodes); @@ -75,10 +75,15 @@ void run_sssp( util::GRError(problem->Reset(src, enactor.GetFrontierType(), queue_sizing), "SSSP Problem Data Reset Failed", __FILE__, __LINE__); + GpuTimer gpu_timer; float elapsed = 0.0f; gpu_timer.Start(); // start + util::GRError(enactor.template Enact( context, problem, src, queue_sizing, max_grid_size), "SSSP Problem Enact Failed", __FILE__, __LINE__); + gpu_timer.Stop(); elapsed = gpu_timer.ElapsedMillis(); // elapsed time + printf(" device elapsed time: %.4f ms\n", elapsed); + util::GRError(problem->Extract(h_labels, predecessor), "SSSP Problem Data Extraction Failed", __FILE__, __LINE__); @@ -100,12 +105,12 @@ void run_sssp( * @param[in] context ModernGPU context */ void dispatch_sssp( - GRGraph *graph_o, - void *predecessor, - const GRGraph *graph_i, - const GRSetup config, - const GRTypes data_t, - CudaContext &context) { + GRGraph* graph_o, + void* predecessor, + const GRGraph* graph_i, + const GRSetup config, + const GRTypes data_t, + CudaContext& context) { switch (data_t.VTXID_TYPE) { case VTXID_INT: { switch (data_t.SIZET_TYPE) { @@ -289,17 +294,70 @@ void dispatch_sssp( * @param[in] data_t Data type configurations */ void gunrock_sssp( - GRGraph *graph_o, - void *predecessor, - const GRGraph *graph_i, - const GRSetup config, - const GRTypes data_t) { + GRGraph* graph_o, + void* predecessor, + const GRGraph* graph_i, + const GRSetup config, + const GRTypes data_t) { unsigned int device = 0; device = config.device; ContextPtr context = mgpu::CreateCudaDevice(device); dispatch_sssp(graph_o, predecessor, graph_i, config, data_t, *context); } +/* + * @brief Simple interface take in CSR arrays as input + * @param[out] distances Return shortest distance to source per nodes + * @param[in] num_nodes Number of nodes of the input graph + * @param[in] num_edges Number of edges of the input graph + * @param[in] row_offsets CSR-formatted graph input row offsets + * @param[in] col_indices CSR-formatted graph input column indices + * @param[in] source Source to begin traverse + */ +void sssp( + unsigned int* distances, + const int num_nodes, + const int num_edges, + const int* row_offsets, + const int* col_indices, + const unsigned int* edge_values, + const int source) { + printf("-------------------- setting --------------------\n"); + + struct GRTypes data_t; // primitive-specific data types + data_t.VTXID_TYPE = VTXID_INT; // integer + data_t.SIZET_TYPE = SIZET_INT; // integer + data_t.VALUE_TYPE = VALUE_UINT; // unsigned integer + + struct GRSetup config; // primitive-specific configures + config.device = 0; // setting device to run + config.src_node = source; // source vertex to begin + config.mark_pred = false; // do not mark predecessors + config.delta_factor = 32; // delta factor for delta-stepping + config.queue_size = 1.0f; // maximum queue size factor + + struct GRGraph *graph_o = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + struct GRGraph *graph_i = (struct GRGraph*)malloc(sizeof(struct GRGraph)); + + graph_i->num_nodes = num_nodes; + graph_i->num_edges = num_edges; + graph_i->row_offsets = (void*)&row_offsets[0]; + graph_i->col_indices = (void*)&col_indices[0]; + graph_i->edge_values = (void*)&edge_values[0]; + + printf(" loaded %d nodes and %d edges\n", num_nodes, num_edges); + + printf("-------------------- running --------------------\n"); + gunrock_sssp(graph_o, (void*)NULL, graph_i, config, data_t); + memcpy(distances, (unsigned int*)graph_o->node_values, + num_nodes * sizeof(unsigned int)); + + if (graph_i) free(graph_i); + if (graph_o) free(graph_o); + + printf("------------------- completed -------------------\n"); +} + // Leave this at the end of the file // Local Variables: // mode:c++ diff --git a/gunrock/gunrock.h b/gunrock/gunrock.h index 2511701fa..24d8d421f 100644 --- a/gunrock/gunrock.h +++ b/gunrock/gunrock.h @@ -155,12 +155,13 @@ void gunrock_sssp( const struct GRTypes data_t); void sssp( - int* distances, - const int num_nodes, - const int num_edges, - const int* row_offsets, - const int* col_indices, - const int source); + unsigned int* distances, + const int num_nodes, + const int num_edges, + const int* row_offsets, + const int* col_indices, + const unsigned int* edge_values, + const int source); // pagerank void gunrock_pagerank( diff --git a/shared_lib_tests/simple_interface_test.c b/shared_lib_tests/simple_interface_test.c index baf84ff6d..66cd14c70 100644 --- a/shared_lib_tests/simple_interface_test.c +++ b/shared_lib_tests/simple_interface_test.c @@ -7,36 +7,77 @@ #include int main(int argc, char* argv[]) { - int row_offsets[] = {0, 3, 6, 11, 15, 19, 23, 26}; - int col_indices[] = {1, 2, 3, 0, 2, 4, 0, 1, 3, 4, 5, 0, 2, - 5, 6, 1, 2, 5, 6, 2, 3, 4, 6, 3, 4, 5}; + + /////////////////////////////////////////////////////////////////////////// + // define input graph + int row_offsets[] = { + 0, 3, 6, 11, 15, 19, 23, 26}; + int col_indices[] = { + 1, 2, 3, 0, 2, 4, 0, 1, 3, 4, 5, 0, 2, + 5, 6, 1, 2, 5, 6, 2, 3, 4, 6, 3, 4, 5}; + unsigned int edge_values[] = { + 3, 4, 5, 3, 5, 7, 4, 5, 7, 8, 9, 5, 7, 10, + 11, 7, 8, 11, 12, 9, 10, 11, 13, 11, 12, 13}; + + // nodes = length of row offsets-1, edges = length of column indices size_t num_nodes = sizeof(row_offsets) / sizeof(row_offsets[0]) - 1; size_t num_edges = sizeof(col_indices) / sizeof(col_indices[0]); + /////////////////////////////////////////////////////////////////////////// + // allocate host arrays to store test results + int* bfs_label = ( int*)malloc(sizeof( int) * num_nodes); + float* bc_scores = (float*)malloc(sizeof(float) * num_nodes); + int* conn_comp = ( int*)malloc(sizeof( int) * num_nodes); + unsigned int *sssp_dist = + (unsigned int*)malloc(sizeof( unsigned int) * num_nodes); + int* pr_nodes = ( int*)malloc(sizeof( int) * num_nodes); + float* pr_ranks = (float*)malloc(sizeof(float) * num_nodes); + + /////////////////////////////////////////////////////////////////////////// printf("\n testing breath-first search ...\n"); - int *labels = (int*)malloc(sizeof(int) * num_nodes); - bfs(labels, num_nodes, num_edges, row_offsets, col_indices, 0); + bfs(bfs_label, num_nodes, num_edges, row_offsets, col_indices, 0); int node; for (node = 0; node < num_nodes; ++node) { - printf(" node: [%d] | label (depth): [%d]\n", node, labels[node]); + printf(" node: [%d] | label (depth): [%d]\n", node, bfs_label[node]); } + /////////////////////////////////////////////////////////////////////////// printf("\n testing betweenness centrality ...\n"); - float *scores = (float*)malloc(sizeof(float) * num_nodes); - bc(scores, num_nodes, num_edges, row_offsets, col_indices, -1); + bc(bc_scores, num_nodes, num_edges, row_offsets, col_indices, -1); for (node = 0; node < num_nodes; ++node) { - printf(" node: [%d] | score: [%.4f]\n", node, scores[node]); + printf(" node: [%d] | score: [%.4f]\n", node, bc_scores[node]); } + /////////////////////////////////////////////////////////////////////////// printf("\n testing connected components ...\n"); - int *components = (int*)malloc(sizeof(int) * num_nodes); - int ret = cc(components, num_nodes, num_edges, row_offsets, col_indices); - printf(" total number of components: %d\n", ret); + int num_comp = cc(conn_comp, num_nodes, num_edges, row_offsets, col_indices); + printf(" total number of components: %d\n", num_comp); + for (node = 0; node < num_nodes; ++node) { + printf(" node: [%d] | component: [%d]\n", node, conn_comp[node]); + } + + /////////////////////////////////////////////////////////////////////////// + printf("\n testing single-source shortest path ...\n"); + sssp(sssp_dist, num_nodes, num_edges, row_offsets, col_indices, edge_values, 0); for (node = 0; node < num_nodes; ++node) { - printf(" node: [%d] | component: [%d]\n", node, components[node]); + printf(" node: [%d] | component: [%d]\n", node, sssp_dist[node]); + } + + /////////////////////////////////////////////////////////////////////////// + printf("\n testing pagerank ...\n"); + pagerank(pr_nodes, pr_ranks, num_nodes, num_edges, row_offsets, col_indices); + for (node = 0; node < num_nodes; ++node) { + printf(" node: [%d] | rank: [%.4f]\n", pr_nodes[node], pr_ranks[node]); } // TODO(ydwu): add other primitive tests - if (labels) { free(labels); } + // clean ups + if (bfs_label) free(bfs_label); + if (bc_scores) free(bc_scores); + if (conn_comp) free(conn_comp); + if (sssp_dist) free(sssp_dist); + if (pr_nodes) free(pr_nodes); + if (pr_ranks) free(pr_ranks); + return 0; } From e95765cb86ae5364e7b27fcc83d252958ff897d8 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Thu, 25 Jun 2015 13:51:39 -0700 Subject: [PATCH 32/36] added py samples --- python/betweenness_centrality.py | 26 ++++++++++++++++++++++++ python/breath_first_search.py | 26 ++++++++++++++++++++++++ python/connected_components.py | 27 +++++++++++++++++++++++++ python/pagerank.py | 29 +++++++++++++++++++++++++++ python/single_source_shortest_path.py | 28 ++++++++++++++++++++++++++ python/toy_graph/col.txt | 26 ++++++++++++++++++++++++ python/toy_graph/row.txt | 8 ++++++++ python/toy_graph/val.txt | 26 ++++++++++++++++++++++++ 8 files changed, 196 insertions(+) create mode 100644 python/betweenness_centrality.py create mode 100644 python/breath_first_search.py create mode 100644 python/connected_components.py create mode 100644 python/pagerank.py create mode 100644 python/single_source_shortest_path.py create mode 100644 python/toy_graph/col.txt create mode 100644 python/toy_graph/row.txt create mode 100644 python/toy_graph/val.txt diff --git a/python/betweenness_centrality.py b/python/betweenness_centrality.py new file mode 100644 index 000000000..fd27df3be --- /dev/null +++ b/python/betweenness_centrality.py @@ -0,0 +1,26 @@ +### sample python interface - betweenness centrality + +from ctypes import * + +### load gunrock shared library - libgunrock +gunrock = cdll.LoadLibrary('./libgunrock.so') + +### read in input CSR arrays from files +row_list = [int(x.strip()) for x in open('toy_graph/row.txt')] +col_list = [int(x.strip()) for x in open('toy_graph/col.txt')] + +### convert CSR graph inputs for gunrock input +row = pointer((c_int * len(row_list))(*row_list)) +col = pointer((c_int * len(col_list))(*col_list)) +nodes = len(row_list) - 1 +edges = len(col_list) + +### output array +scores = pointer((c_float * nodes)()) + +### call gunrock function on device +gunrock.bc(scores, nodes, edges, row, col, 2) + +### sample results +print ' node bc scores:', +for idx in range(nodes): print scores[0][idx], diff --git a/python/breath_first_search.py b/python/breath_first_search.py new file mode 100644 index 000000000..93c322484 --- /dev/null +++ b/python/breath_first_search.py @@ -0,0 +1,26 @@ +### sample python interface - breath-first search + +from ctypes import * + +### load gunrock shared library - libgunrock +gunrock = cdll.LoadLibrary('./libgunrock.so') + +### read in input CSR arrays from files +row_list = [int(x.strip()) for x in open('toy_graph/row.txt')] +col_list = [int(x.strip()) for x in open('toy_graph/col.txt')] + +### convert CSR graph inputs for gunrock input +row = pointer((c_int * len(row_list))(*row_list)) +col = pointer((c_int * len(col_list))(*col_list)) +nodes = len(row_list) - 1 +edges = len(col_list) + +### output array +labels = pointer((c_int * nodes)()) + +### call gunrock function on device +gunrock.bfs(labels, nodes, edges, row, col, 0) + +### sample results +print ' bfs labels (depth):', +for idx in range(nodes): print labels[0][idx], diff --git a/python/connected_components.py b/python/connected_components.py new file mode 100644 index 000000000..a113a36d8 --- /dev/null +++ b/python/connected_components.py @@ -0,0 +1,27 @@ +### sample python interface - connected components + +from ctypes import * + +### load gunrock shared library - libgunrock +gunrock = cdll.LoadLibrary('./libgunrock.so') + +### read in input CSR arrays from files +row_list = [int(x.strip()) for x in open('toy_graph/row.txt')] +col_list = [int(x.strip()) for x in open('toy_graph/col.txt')] + +### convert CSR graph inputs for gunrock input +row = pointer((c_int * len(row_list))(*row_list)) +col = pointer((c_int * len(col_list))(*col_list)) +nodes = len(row_list) - 1 +edges = len(col_list) + +### output array +labels = pointer((c_int * nodes)()) + +### call gunrock function on device +num_components = gunrock.cc(labels, nodes, edges, row, col) + +### sample results +print ' number of components: ' + str(num_components) +print ' component ids:', +for idx in range(nodes): print labels[0][idx], diff --git a/python/pagerank.py b/python/pagerank.py new file mode 100644 index 000000000..6b95a246d --- /dev/null +++ b/python/pagerank.py @@ -0,0 +1,29 @@ +### sample python interface - pagerank + +from ctypes import * + +### load gunrock shared library - libgunrock +gunrock = cdll.LoadLibrary('./libgunrock.so') + +### read in input CSR arrays from files +row_list = [int(x.strip()) for x in open('toy_graph/row.txt')] +col_list = [int(x.strip()) for x in open('toy_graph/col.txt')] + +### convert CSR graph inputs for gunrock input +row = pointer((c_int * len(row_list))(*row_list)) +col = pointer((c_int * len(col_list))(*col_list)) +nodes = len(row_list) - 1 +edges = len(col_list) + +### output array +node = pointer((c_int * nodes)()) +rank = pointer((c_float * nodes)()) + +### call gunrock function on device +gunrock.pagerank(node, rank, nodes, edges, row, col) + +### sample results +print 'top page rank:' +for idx in range(nodes): + print node[0][idx], + print rank[0][idx] diff --git a/python/single_source_shortest_path.py b/python/single_source_shortest_path.py new file mode 100644 index 000000000..ca67fcfc9 --- /dev/null +++ b/python/single_source_shortest_path.py @@ -0,0 +1,28 @@ +### sample python interface - single-source shortest path + +from ctypes import * + +### load gunrock shared library - libgunrock +gunrock = cdll.LoadLibrary('./libgunrock.so') + +### read in input CSR arrays from files +row_list = [int(x.strip()) for x in open('toy_graph/row.txt')] +col_list = [int(x.strip()) for x in open('toy_graph/col.txt')] +val_list = [int(x.strip()) for x in open('toy_graph/val.txt')] + +### convert CSR graph inputs for gunrock input +row = pointer((c_int * len(row_list))(*row_list)) +col = pointer((c_int * len(col_list))(*col_list)) +val = pointer((c_uint * len(val_list))(*val_list)) +nodes = len(row_list) - 1 +edges = len(col_list) + +### output array +labels = pointer((c_uint * nodes)()) + +### call gunrock function on device +gunrock.sssp(labels, nodes, edges, row, col, val, 0) + +### sample results +print ' sssp labels (distance):', +for idx in range(nodes): print labels[0][idx], diff --git a/python/toy_graph/col.txt b/python/toy_graph/col.txt new file mode 100644 index 000000000..12c10b45e --- /dev/null +++ b/python/toy_graph/col.txt @@ -0,0 +1,26 @@ +1 +2 +3 +0 +2 +4 +0 +1 +3 +4 +5 +0 +2 +5 +6 +1 +2 +5 +6 +2 +3 +4 +6 +3 +4 +5 diff --git a/python/toy_graph/row.txt b/python/toy_graph/row.txt new file mode 100644 index 000000000..1a84c1d97 --- /dev/null +++ b/python/toy_graph/row.txt @@ -0,0 +1,8 @@ +0 +3 +6 +11 +15 +19 +23 +26 diff --git a/python/toy_graph/val.txt b/python/toy_graph/val.txt new file mode 100644 index 000000000..15282b913 --- /dev/null +++ b/python/toy_graph/val.txt @@ -0,0 +1,26 @@ +3 +4 +5 +3 +5 +7 +4 +5 +7 +8 +9 +5 +7 +10 +11 +7 +8 +11 +12 +9 +10 +11 +13 +11 +12 +13 From 93e7bb976a658e6e1683ace1d232cbe2a3dfbd2b Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Fri, 26 Jun 2015 09:29:52 -0700 Subject: [PATCH 33/36] change lib path --- python/betweenness_centrality.py | 2 +- python/breath_first_search.py | 2 +- python/connected_components.py | 2 +- python/pagerank.py | 2 +- python/single_source_shortest_path.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/betweenness_centrality.py b/python/betweenness_centrality.py index fd27df3be..6f7031e6b 100644 --- a/python/betweenness_centrality.py +++ b/python/betweenness_centrality.py @@ -3,7 +3,7 @@ from ctypes import * ### load gunrock shared library - libgunrock -gunrock = cdll.LoadLibrary('./libgunrock.so') +gunrock = cdll.LoadLibrary('../../build/lib/libgunrock.so') ### read in input CSR arrays from files row_list = [int(x.strip()) for x in open('toy_graph/row.txt')] diff --git a/python/breath_first_search.py b/python/breath_first_search.py index 93c322484..b67fe80c0 100644 --- a/python/breath_first_search.py +++ b/python/breath_first_search.py @@ -3,7 +3,7 @@ from ctypes import * ### load gunrock shared library - libgunrock -gunrock = cdll.LoadLibrary('./libgunrock.so') +gunrock = cdll.LoadLibrary('../../build/lib/libgunrock.so') ### read in input CSR arrays from files row_list = [int(x.strip()) for x in open('toy_graph/row.txt')] diff --git a/python/connected_components.py b/python/connected_components.py index a113a36d8..89fd824cb 100644 --- a/python/connected_components.py +++ b/python/connected_components.py @@ -3,7 +3,7 @@ from ctypes import * ### load gunrock shared library - libgunrock -gunrock = cdll.LoadLibrary('./libgunrock.so') +gunrock = cdll.LoadLibrary('../../build/lib/libgunrock.so') ### read in input CSR arrays from files row_list = [int(x.strip()) for x in open('toy_graph/row.txt')] diff --git a/python/pagerank.py b/python/pagerank.py index 6b95a246d..642fa2e12 100644 --- a/python/pagerank.py +++ b/python/pagerank.py @@ -3,7 +3,7 @@ from ctypes import * ### load gunrock shared library - libgunrock -gunrock = cdll.LoadLibrary('./libgunrock.so') +gunrock = cdll.LoadLibrary('../../build/lib/libgunrock.so') ### read in input CSR arrays from files row_list = [int(x.strip()) for x in open('toy_graph/row.txt')] diff --git a/python/single_source_shortest_path.py b/python/single_source_shortest_path.py index ca67fcfc9..69edc2b39 100644 --- a/python/single_source_shortest_path.py +++ b/python/single_source_shortest_path.py @@ -3,7 +3,7 @@ from ctypes import * ### load gunrock shared library - libgunrock -gunrock = cdll.LoadLibrary('./libgunrock.so') +gunrock = cdll.LoadLibrary('../../build/lib/libgunrock.so') ### read in input CSR arrays from files row_list = [int(x.strip()) for x in open('toy_graph/row.txt')] From fdeb7fbd855f16045c412c8f2becebcdc1cc460d Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Fri, 26 Jun 2015 13:17:53 -0400 Subject: [PATCH 34/36] Update betweenness_centrality.py --- python/betweenness_centrality.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/betweenness_centrality.py b/python/betweenness_centrality.py index 6f7031e6b..e7978d04f 100644 --- a/python/betweenness_centrality.py +++ b/python/betweenness_centrality.py @@ -19,7 +19,7 @@ scores = pointer((c_float * nodes)()) ### call gunrock function on device -gunrock.bc(scores, nodes, edges, row, col, 2) +gunrock.bc(scores, nodes, edges, row, col, -1) ### sample results print ' node bc scores:', From fe385efbf17f97922fe76f72a0c0ba42155d8d26 Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Fri, 26 Jun 2015 10:43:29 -0700 Subject: [PATCH 35/36] only display the first incorrect value, count for others, but do not display. Added WriteToLigraFile function to form Ligra adjlist input file. make LB the default traversal mode in SSSP. --- gunrock/csr.cuh | 23 +++++++++++++++++++++++ gunrock/util/test_utils.cuh | 3 +-- tests/sssp/ppopp-test.sh | 4 ++-- tests/sssp/test_sssp.cu | 4 ++-- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/gunrock/csr.cuh b/gunrock/csr.cuh index 0c2f46373..81f2c50e7 100644 --- a/gunrock/csr.cuh +++ b/gunrock/csr.cuh @@ -148,6 +148,27 @@ struct Csr { } } + void WriteToLigraFile(char *file_name, SizeT v, SizeT e, SizeT *row, + VertexId *col, Value *edge_values = NULL) { + char adj_name[256]; + sprintf(adj_name, "%s.adj", file_name); + printf("writing to ligra .adj file.\n"); + + std::ofstream fout3(adj_name); + if (fout3.is_open()) { + fout3 << v << " " << v << " " << e << std::endl; + for (int i = 0; i < v; ++i) + fout3 << row[i] << std::endl; + for (int i = 0; i < e; ++i) + fout3 << col[i] << std::endl; + if (edge_values != NULL) { + for (int i = 0; i < e; ++i) + fout3 << edge_values[i] << std::endl; + } + fout3.close(); + } + } + /** * * @brief Read from stored row_offsets, column_indices arrays @@ -266,6 +287,8 @@ struct Csr { if (LOAD_EDGE_VALUES) { WriteToFile(output_file, nodes, edges, row_offsets, column_indices, edge_values); + WriteToLigraFile(output_file, nodes, edges, + row_offsets, column_indices, edge_values); } else { WriteToFile(output_file, nodes, edges, row_offsets, column_indices); diff --git a/gunrock/util/test_utils.cuh b/gunrock/util/test_utils.cuh index 11eff1ddb..7169c4e50 100644 --- a/gunrock/util/test_utils.cuh +++ b/gunrock/util/test_utils.cuh @@ -472,9 +472,8 @@ int CompareResults( printf("...]"); } flag += 1; - //return flag; } - //if (!is_right && flag > 0) flag += 1; + if (!is_right && flag > 0) flag += 1; } printf("\n"); if (!flag) diff --git a/tests/sssp/ppopp-test.sh b/tests/sssp/ppopp-test.sh index cbc55562d..8934de90b 100644 --- a/tests/sssp/ppopp-test.sh +++ b/tests/sssp/ppopp-test.sh @@ -1,7 +1,7 @@ mkdir -p eval/PPOPP15 for i in 1-soc 2-bitcoin 3-kron 6-roadnet do - echo ./bin/test_sssp_6.5_x86_64 market /data/PPOPP15/$i.mtx --src=0 --undirected --iteration-num=10 - ./bin/test_sssp_6.5_x86_64 market /data/PPOPP15/$i.mtx --src=0 --undirected --iteration-num=10 --delta-factor=32 > eval/PPOPP15/$i.txt + echo ./bin/test_sssp_7.0_x86_64 market /data/PPOPP15/$i.mtx --src=0 --undirected --iteration-num=10 + ./bin/test_sssp_7.0_x86_64 market /data/PPOPP15/$i.mtx --src=0 --undirected --iteration-num=10 --delta-factor=32 > eval/PPOPP15/$i.txt sleep 1 done diff --git a/tests/sssp/test_sssp.cu b/tests/sssp/test_sssp.cu index 2bc204495..200c668a3 100644 --- a/tests/sssp/test_sssp.cu +++ b/tests/sssp/test_sssp.cu @@ -566,7 +566,7 @@ void RunTests( args.GetCmdLineArgument("traversal-mode", traversal_mode); if (traversal_mode == -1) { - traversal_mode = graph.GetAverageDegree() > 8 ? 0 : 1; + traversal_mode = 0; } instrumented = args.CheckCmdLineFlag("instrumented"); @@ -687,7 +687,7 @@ int main( int argc, char** argv) } csr.PrintHistogram(); - csr.DisplayGraph(true); //print graph with edge_value + //csr.DisplayGraph(true); //print graph with edge_value //csr.GetAverageEdgeValue(); //csr.GetAverageDegree(); //int max_degree; From 57842ba66aa9eb6b53a91e87c594b182f29b2d52 Mon Sep 17 00:00:00 2001 From: Yuduo Wu Date: Fri, 26 Jun 2015 14:47:06 -0400 Subject: [PATCH 36/36] Delete FAQ.markdown --- FAQ.markdown | 135 --------------------------------------------------- 1 file changed, 135 deletions(-) delete mode 100644 FAQ.markdown diff --git a/FAQ.markdown b/FAQ.markdown deleted file mode 100644 index 1af4bb1a7..000000000 --- a/FAQ.markdown +++ /dev/null @@ -1,135 +0,0 @@ -Gunrock FAQ -=========== - -What does it do? ----------------- - -Gunrock is a fast-and-efficient graph processing library on the GPU which -provides a set of graph algorithms used in big data analytics and visualization -with high performance. It also provides a set of operators which abstract the -general operations in graph processing for other developers to build -high-performance graph algorithm prototyes with minimum programming effort. - -How does it do it? ------------------ - -Gunrock takes advantage of the immense computational power available in -commodity-level, off-the-shelf Graphics Processing Units (GPUs), originally -designed to handle the parallel computational tasks in computer graphics, to -perform graph traversal and computation in parallel on thousands of GPU's -computing cores. - -Who should want this? ---------------------- - -Gunrock is built with two kinds of users in mind: The first kind of users are -programmers who build big graph analytics and visualization project and need to -use existing graph primitives provided by Gunrock. The second kind of users -are programmers who want to use Gunrock's high-level, programmable abstraction -to express, develop, and refine their own (and often more complicated) graph -primitives. - -What is the skill set users need to use it? -------------------------------------------- - -for the first kind of users, C/C++ background is sufficient. We are also -building Gunrock as a shared library with C interfaces which can be loaded by -other languages such as Python and Julia. for the second kind of users, they -need to have the C/C++ background and also an understanding of parallel -programming, especially BSP (Bulk-Synchronous Programming) model which Gunrock -uses. - -What platforms/languages do people need to know in order to modify or integrate it with other tools? ----------------------------------------------------------------------------------------------------- - -Using the exposed interface, the users do not need to know CUDA or OpenCL to -modify or integrate Gunrock to their own tools. However, an essential -understanding of parallel programming and BSP model is necessary if one wants -to add/modify graph primitives in Gunrock. - -Why would someone want this? ----------------------------- - -The study of social networks, webgraphs, biological networks, and unstructured -meshes in scientific simulation has raised a significant demand for efficient -parallel frameworks for processing and analytics on large-scale graphs. Initial -research efforts in using GPUs for graph processing and anlytics are promising. - -How is it better than the current state of the art? ---------------------------------------------------- - -Most existing CPU large graph processing libraries perform worse on large -graphs with billions of edges. Supercomputer or expensive clusters can achieve -close to real-time feedback with high cost on hardware infrastructure. With -GPUs, we can achieve the same real-time feedback with much lower cost on -hardware. Gunrock has the best performance among the limited research efforts -put on GPU graph processing. With a set of general graph processing operators -exposed to users, it is also more flexible than other GPU/CPU graph library in -terms of programmability. - -How would someone get it? -------------------------- - -Gunrock is an open-sourced library. The code, documentation, and quick start -guide are all on its [github page](gunrock.github.io). - -Is a user account required? ---------------------------- - -No. One can use either git clone or download directly to get the source code -and documentation of Gunrock. - -Are all of its components/dependencies easy to find? ----------------------------------------------------- - -Gunrock has three dependencies. Two of them are also GPU primitive library which -also reside on github. The third one is Boost (Gunrock uses Boost Graph Library -to implement CPU reference testing algorithms). All dependencies do not require -installation. To use, one only needs to download or git clone them and put them -in the according directories. More details in the installation section of this -documentation. - -How would someone install it? ------------------------------ - -For C/C++ programmer, integrating Gunrock into your projects is easy. Since it -is a template based library, just add the include files in your code. The -simple example and all the testrigs will provide detailed information on how to -do this. - -For programmers who use Python, Julia, or other language and want to call -Gunrock APIs, we are building a shared library with binary compatible -C interfaces. It will be included in the soon-to-arrive next release of -Gunrock. - -Can anyone install it? Do they need IT help? --------------------------------------------- - -Gunrock is targeted at developers who are familiar with basic software -engineering. For non-technical people, IT help might needed. - -Does this process actually work? All the time? On all systems specified? ------------------------------------------------------------------------- -Currently, Gunrock has been tested on two Linux distributions: Linux Mint and -Ubuntu. But we expect it to run correctly on other Linux distributions too. -We are currently building a Cmake solution to port Gunrock to Mac and Windows. -The feature will be included in the soon-to-arrive next release of Gunrock. - -How would someone test that it's working with provided sample data? -------------------------------------------------------------------- - -Testrigs are provided as well as a small simple example for users to test the -correctness and performance of every graph primitive. - -Is the "using" of sample data clear? ------------------------------------- - -On linux, one only needs to go to the dataset directory and run "make", the -script will automatically download all the needed datasets. One can also choose -to download a single dataset in its separated directory. - -How would someone use it with their own data? ---------------------------------------------- - -Gunrock supports Matrix Market (.mtx) file format, users need to pre-process -the graph data into this format before running Gunrock.