-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
sravan-college
committed
Apr 22, 2023
1 parent
0b8f686
commit 3ce367d
Showing
25 changed files
with
2,796 additions
and
1,179 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
// FOR BC: nvcc bc_dsl_v2.cu -arch=sm_60 -std=c++14 -rdc=true # HW must support CC 6.0+ Pascal or after | ||
#include "PageRankDSLV2.h" | ||
|
||
void Compute_PR(graph& g,float beta,float delta,int maxIter, | ||
float* pageRank) | ||
|
||
{ | ||
// CSR BEGIN | ||
int V = g.num_nodes(); | ||
int E = g.num_edges(); | ||
|
||
printf("#nodes:%d\n",V); | ||
printf("#edges:%d\n",E); | ||
int* edgeLen = g.getEdgeLen(); | ||
|
||
int *h_meta; | ||
int *h_data; | ||
int *h_src; | ||
int *h_weight; | ||
int *h_rev_meta; | ||
|
||
h_meta = (int *)malloc( (V+1)*sizeof(int)); | ||
h_data = (int *)malloc( (E)*sizeof(int)); | ||
h_src = (int *)malloc( (E)*sizeof(int)); | ||
h_weight = (int *)malloc( (E)*sizeof(int)); | ||
h_rev_meta = (int *)malloc( (V+1)*sizeof(int)); | ||
|
||
for(int i=0; i<= V; i++) { | ||
int temp = g.indexofNodes[i]; | ||
h_meta[i] = temp; | ||
temp = g.rev_indexofNodes[i]; | ||
h_rev_meta[i] = temp; | ||
} | ||
|
||
for(int i=0; i< E; i++) { | ||
int temp = g.edgeList[i]; | ||
h_data[i] = temp; | ||
temp = g.srcList[i]; | ||
h_src[i] = temp; | ||
temp = edgeLen[i]; | ||
h_weight[i] = temp; | ||
} | ||
|
||
|
||
int* d_meta; | ||
int* d_data; | ||
int* d_src; | ||
int* d_weight; | ||
int* d_rev_meta; | ||
bool* d_modified_next; | ||
|
||
cudaMalloc(&d_meta, sizeof(int)*(1+V)); | ||
cudaMalloc(&d_data, sizeof(int)*(E)); | ||
cudaMalloc(&d_src, sizeof(int)*(E)); | ||
cudaMalloc(&d_weight, sizeof(int)*(E)); | ||
cudaMalloc(&d_rev_meta, sizeof(int)*(V+1)); | ||
cudaMalloc(&d_modified_next, sizeof(bool)*(V)); | ||
|
||
cudaMemcpy( d_meta, h_meta, sizeof(int)*(V+1), cudaMemcpyHostToDevice); | ||
cudaMemcpy( d_data, h_data, sizeof(int)*(E), cudaMemcpyHostToDevice); | ||
cudaMemcpy( d_src, h_src, sizeof(int)*(E), cudaMemcpyHostToDevice); | ||
cudaMemcpy(d_weight, h_weight, sizeof(int)*(E), cudaMemcpyHostToDevice); | ||
cudaMemcpy(d_rev_meta, h_rev_meta, sizeof(int)*((V+1)), cudaMemcpyHostToDevice); | ||
|
||
// CSR END | ||
//LAUNCH CONFIG | ||
const unsigned threadsPerBlock = 512; | ||
unsigned numThreads = (V < threadsPerBlock)? 512: V; | ||
unsigned numBlocks = (V+threadsPerBlock-1)/threadsPerBlock; | ||
|
||
|
||
// TIMER START | ||
cudaEvent_t start, stop; | ||
cudaEventCreate(&start); | ||
cudaEventCreate(&stop); | ||
float milliseconds = 0; | ||
cudaEventRecord(start,0); | ||
|
||
|
||
//DECLAR DEVICE AND HOST vars in params | ||
float* d_pageRank; | ||
cudaMalloc(&d_pageRank, sizeof(float)*(V)); | ||
|
||
|
||
//BEGIN DSL PARSING | ||
float* d_pageRank_nxt; | ||
cudaMalloc(&d_pageRank_nxt, sizeof(float)*(V)); | ||
|
||
float num_nodes = (float)g.num_nodes( ); // asst in .cu | ||
|
||
merged_kernel_1<<<numBlocks,threadsPerBlock>>>(V, d_pageRank, (float)1 / num_nodes, d_pageRank_nxt, (float)0); | ||
int iterCount = 0; // asst in .cu | ||
|
||
float diff; // asst in .cu | ||
|
||
bool tempVar_0 = false; // asst in .cu | ||
|
||
do{ | ||
if (tempVar_0){ // if filter begin | ||
|
||
} // if filter end | ||
tempVar_0 = true; | ||
diff = 0.000000; | ||
cudaMemcpyToSymbol(::diff, &diff, sizeof(float), 0, cudaMemcpyHostToDevice); | ||
cudaMemcpyToSymbol(::delta, &delta, sizeof(float), 0, cudaMemcpyHostToDevice); | ||
cudaMemcpyToSymbol(::num_nodes, &num_nodes, sizeof(float), 0, cudaMemcpyHostToDevice); | ||
Compute_PR_kernel<<<numBlocks, threadsPerBlock>>>(V,E,d_meta,d_data,d_src,d_weight,d_rev_meta,d_modified_next,d_pageRank,d_pageRank_nxt); | ||
cudaDeviceSynchronize(); | ||
|
||
|
||
|
||
; // asst in .cu | ||
|
||
; // asst in .cu | ||
|
||
cudaMemcpy(d_pageRank, d_pageRank_nxt, sizeof(float)*V, cudaMemcpyDeviceToDevice); | ||
iterCount++; | ||
cudaMemcpyFromSymbol(&diff, ::diff, sizeof(float), 0, cudaMemcpyDeviceToHost); | ||
|
||
}while((diff > beta) && (iterCount < maxIter)); | ||
|
||
//cudaFree up!! all propVars in this BLOCK! | ||
cudaFree(d_pageRank_nxt); | ||
|
||
//TIMER STOP | ||
cudaEventRecord(stop,0); | ||
cudaEventSynchronize(stop); | ||
cudaEventElapsedTime(&milliseconds, start, stop); | ||
printf("GPU Time: %.6f ms\n", milliseconds); | ||
|
||
cudaMemcpy(pageRank, d_pageRank, sizeof(float)*(V), cudaMemcpyDeviceToHost); | ||
} //end FUN |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
// FOR BC: nvcc bc_dsl_v2.cu -arch=sm_60 -std=c++14 -rdc=true # HW must support CC 6.0+ Pascal or after | ||
#ifndef GENCPP_PAGERANKDSLV2_H | ||
#define GENCPP_PAGERANKDSLV2_H | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <limits.h> | ||
#include <cuda.h> | ||
#include "../graph.hpp" | ||
#include "../libcuda.cuh" | ||
#include <cooperative_groups.h> | ||
|
||
void Compute_PR(graph& g,float beta,float delta,int maxIter, | ||
float* pageRank); | ||
|
||
__device__ float beta ; | ||
__device__ float delta ; | ||
__device__ int maxIter ; | ||
|
||
|
||
__device__ float num_nodes ; // DEVICE ASSTMENT in .h | ||
|
||
__global__ void merged_kernel_1(unsigned V, float* array_1, float val_1, float* array_2, float val_2){ | ||
unsigned id = threadIdx.x + blockDim.x * blockIdx.x; | ||
if (id < V) { | ||
array_1[id] = val_1; | ||
array_2[id] = val_2; | ||
} | ||
} | ||
; // DEVICE ASSTMENT in .h | ||
|
||
__device__ float diff ; // DEVICE ASSTMENT in .h | ||
|
||
; // DEVICE ASSTMENT in .h | ||
|
||
__global__ void Compute_PR_kernel(int V, int E, int* d_meta, int* d_data, int* d_src, int* d_weight, int *d_rev_meta,bool *d_modified_next,float* d_pageRank,float* d_pageRank_nxt){ // BEGIN KER FUN via ADDKERNEL | ||
float num_nodes = V; | ||
unsigned v = blockIdx.x * blockDim.x + threadIdx.x; | ||
if(v >= V) return; | ||
float sum = 0.000000; // DEVICE ASSTMENT in .h | ||
|
||
for (int edge = d_rev_meta[v]; edge < d_rev_meta[v+1]; edge++) | ||
{int nbr = d_src[edge] ; | ||
sum = sum + d_pageRank[nbr] / (d_meta[nbr+1]-d_meta[nbr]); | ||
|
||
} // end FOR NBR ITR. TMP FIX! | ||
float val = (1 - delta) / num_nodes + delta * sum; // DEVICE ASSTMENT in .h | ||
|
||
atomicAdd(& diff, (float)val - d_pageRank[v]); | ||
d_pageRank_nxt[v] = val; | ||
} // end KER FUNC | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
// FOR BC: nvcc bc_dsl_v2.cu -arch=sm_60 -std=c++14 -rdc=true # HW must support CC 6.0+ Pascal or after | ||
#include "bc_dslV2.h" | ||
|
||
void Compute_BC(graph& g,float* BC,std::set<int>& sourceSet) | ||
|
||
{ | ||
// CSR BEGIN | ||
int V = g.num_nodes(); | ||
int E = g.num_edges(); | ||
|
||
printf("#nodes:%d\n",V); | ||
printf("#edges:%d\n",E); | ||
int* edgeLen = g.getEdgeLen(); | ||
|
||
int *h_meta; | ||
int *h_data; | ||
int *h_src; | ||
int *h_weight; | ||
int *h_rev_meta; | ||
|
||
h_meta = (int *)malloc( (V+1)*sizeof(int)); | ||
h_data = (int *)malloc( (E)*sizeof(int)); | ||
h_src = (int *)malloc( (E)*sizeof(int)); | ||
h_weight = (int *)malloc( (E)*sizeof(int)); | ||
h_rev_meta = (int *)malloc( (V+1)*sizeof(int)); | ||
|
||
for(int i=0; i<= V; i++) { | ||
int temp = g.indexofNodes[i]; | ||
h_meta[i] = temp; | ||
temp = g.rev_indexofNodes[i]; | ||
h_rev_meta[i] = temp; | ||
} | ||
|
||
for(int i=0; i< E; i++) { | ||
int temp = g.edgeList[i]; | ||
h_data[i] = temp; | ||
temp = g.srcList[i]; | ||
h_src[i] = temp; | ||
temp = edgeLen[i]; | ||
h_weight[i] = temp; | ||
} | ||
|
||
|
||
int* d_meta; | ||
int* d_data; | ||
int* d_src; | ||
int* d_weight; | ||
int* d_rev_meta; | ||
bool* d_modified_next; | ||
|
||
cudaMalloc(&d_meta, sizeof(int)*(1+V)); | ||
cudaMalloc(&d_data, sizeof(int)*(E)); | ||
cudaMalloc(&d_src, sizeof(int)*(E)); | ||
cudaMalloc(&d_weight, sizeof(int)*(E)); | ||
cudaMalloc(&d_rev_meta, sizeof(int)*(V+1)); | ||
cudaMalloc(&d_modified_next, sizeof(bool)*(V)); | ||
|
||
cudaMemcpy( d_meta, h_meta, sizeof(int)*(V+1), cudaMemcpyHostToDevice); | ||
cudaMemcpy( d_data, h_data, sizeof(int)*(E), cudaMemcpyHostToDevice); | ||
cudaMemcpy( d_src, h_src, sizeof(int)*(E), cudaMemcpyHostToDevice); | ||
cudaMemcpy(d_weight, h_weight, sizeof(int)*(E), cudaMemcpyHostToDevice); | ||
cudaMemcpy(d_rev_meta, h_rev_meta, sizeof(int)*((V+1)), cudaMemcpyHostToDevice); | ||
|
||
// CSR END | ||
//LAUNCH CONFIG | ||
const unsigned threadsPerBlock = 512; | ||
unsigned numThreads = (V < threadsPerBlock)? 512: V; | ||
unsigned numBlocks = (V+threadsPerBlock-1)/threadsPerBlock; | ||
|
||
|
||
// TIMER START | ||
cudaEvent_t start, stop; | ||
cudaEventCreate(&start); | ||
cudaEventCreate(&stop); | ||
float milliseconds = 0; | ||
cudaEventRecord(start,0); | ||
|
||
|
||
//DECLAR DEVICE AND HOST vars in params | ||
float* d_BC; | ||
cudaMalloc(&d_BC, sizeof(float)*(V)); | ||
|
||
|
||
//BEGIN DSL PARSING | ||
float* d_sigma; | ||
cudaMalloc(&d_sigma, sizeof(float)*(V)); | ||
|
||
float* d_delta; | ||
cudaMalloc(&d_delta, sizeof(float)*(V)); | ||
|
||
initKernel<float> <<<numBlocks,threadsPerBlock>>>(V,d_BC,(float)0); | ||
|
||
//FOR SIGNATURE of SET - Assumes set for on .cu only | ||
std::set<int>::iterator itr; | ||
for(itr=sourceSet.begin();itr!=sourceSet.end();itr++) | ||
{ | ||
int src = *itr; | ||
merged_kernel_1<<<numBlocks,threadsPerBlock>>>(V, d_delta, (float)0, d_sigma, (float)0, d_sigma, src, (float)1); | ||
|
||
//EXTRA vars for ITBFS AND REVBFS | ||
bool finished; | ||
int hops_from_source=0; | ||
bool* d_finished; cudaMalloc(&d_finished,sizeof(bool) *(1)); | ||
int* d_hops_from_source;cudaMalloc(&d_hops_from_source, sizeof(int)); cudaMemset(d_hops_from_source,0,sizeof(int)); | ||
int* d_level; cudaMalloc(&d_level,sizeof(int) *(V)); | ||
|
||
//EXTRA vars INITIALIZATION | ||
initKernel<int> <<<numBlocks,threadsPerBlock>>>(V,d_level,-1); | ||
initIndex<int><<<1,1>>>(V,d_level,src, 0); | ||
|
||
// long k =0 ;// For DEBUG | ||
do { | ||
finished = true; | ||
cudaMemcpy(d_finished, &finished, sizeof(bool)*(1), cudaMemcpyHostToDevice); | ||
|
||
//Kernel LAUNCH | ||
fwd_pass<<<numBlocks,threadsPerBlock>>>(V, d_meta, d_data,d_weight, d_delta, d_sigma, d_level, d_hops_from_source, d_finished,d_BC); ///DONE from varList | ||
|
||
incrementDeviceVar<<<1,1>>>(d_hops_from_source); | ||
cudaDeviceSynchronize(); //MUST - rupesh | ||
++hops_from_source; // updating the level to process in the next iteration | ||
// k++; //DEBUG | ||
|
||
cudaMemcpy(&finished, d_finished, sizeof(bool)*(1), cudaMemcpyDeviceToHost); | ||
}while(!finished); | ||
|
||
hops_from_source--; | ||
cudaMemcpy(d_hops_from_source, &hops_from_source, sizeof(int)*(1), cudaMemcpyHostToDevice); | ||
|
||
//BACKWARD PASS | ||
while(hops_from_source > 1) { | ||
|
||
//KERNEL Launch | ||
back_pass<<<numBlocks,threadsPerBlock>>>(V, d_meta, d_data, d_weight, d_delta, d_sigma, d_level, d_hops_from_source, d_finished | ||
,d_BC); ///DONE from varList | ||
|
||
hops_from_source--; | ||
cudaMemcpy(d_hops_from_source, &hops_from_source, sizeof(int)*(1), cudaMemcpyHostToDevice); | ||
} | ||
//accumulate_bc<<<numBlocks,threadsPerBlock>>>(V,d_delta, d_BC, d_level, src); | ||
|
||
} | ||
|
||
//cudaFree up!! all propVars in this BLOCK! | ||
cudaFree(d_delta); | ||
cudaFree(d_sigma); | ||
|
||
//TIMER STOP | ||
cudaEventRecord(stop,0); | ||
cudaEventSynchronize(stop); | ||
cudaEventElapsedTime(&milliseconds, start, stop); | ||
printf("GPU Time: %.6f ms\n", milliseconds); | ||
|
||
cudaMemcpy( BC, d_BC, sizeof(float)*(V), cudaMemcpyDeviceToHost); | ||
} //end FUN |
Oops, something went wrong.