Skip to content

Commit

Permalink
merging the analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
sravan-college committed Apr 22, 2023
1 parent 0b8f686 commit 3ce367d
Show file tree
Hide file tree
Showing 25 changed files with 2,796 additions and 1,179 deletions.
132 changes: 132 additions & 0 deletions graphcode/generated_cuda/PageRankDSLV2.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// FOR BC: nvcc bc_dsl_v2.cu -arch=sm_60 -std=c++14 -rdc=true # HW must support CC 6.0+ Pascal or after
#include "PageRankDSLV2.h"

void Compute_PR(graph& g,float beta,float delta,int maxIter,
float* pageRank)

{
// CSR BEGIN
int V = g.num_nodes();
int E = g.num_edges();

printf("#nodes:%d\n",V);
printf("#edges:%d\n",E);
int* edgeLen = g.getEdgeLen();

int *h_meta;
int *h_data;
int *h_src;
int *h_weight;
int *h_rev_meta;

h_meta = (int *)malloc( (V+1)*sizeof(int));
h_data = (int *)malloc( (E)*sizeof(int));
h_src = (int *)malloc( (E)*sizeof(int));
h_weight = (int *)malloc( (E)*sizeof(int));
h_rev_meta = (int *)malloc( (V+1)*sizeof(int));

for(int i=0; i<= V; i++) {
int temp = g.indexofNodes[i];
h_meta[i] = temp;
temp = g.rev_indexofNodes[i];
h_rev_meta[i] = temp;
}

for(int i=0; i< E; i++) {
int temp = g.edgeList[i];
h_data[i] = temp;
temp = g.srcList[i];
h_src[i] = temp;
temp = edgeLen[i];
h_weight[i] = temp;
}


int* d_meta;
int* d_data;
int* d_src;
int* d_weight;
int* d_rev_meta;
bool* d_modified_next;

cudaMalloc(&d_meta, sizeof(int)*(1+V));
cudaMalloc(&d_data, sizeof(int)*(E));
cudaMalloc(&d_src, sizeof(int)*(E));
cudaMalloc(&d_weight, sizeof(int)*(E));
cudaMalloc(&d_rev_meta, sizeof(int)*(V+1));
cudaMalloc(&d_modified_next, sizeof(bool)*(V));

cudaMemcpy( d_meta, h_meta, sizeof(int)*(V+1), cudaMemcpyHostToDevice);
cudaMemcpy( d_data, h_data, sizeof(int)*(E), cudaMemcpyHostToDevice);
cudaMemcpy( d_src, h_src, sizeof(int)*(E), cudaMemcpyHostToDevice);
cudaMemcpy(d_weight, h_weight, sizeof(int)*(E), cudaMemcpyHostToDevice);
cudaMemcpy(d_rev_meta, h_rev_meta, sizeof(int)*((V+1)), cudaMemcpyHostToDevice);

// CSR END
//LAUNCH CONFIG
const unsigned threadsPerBlock = 512;
unsigned numThreads = (V < threadsPerBlock)? 512: V;
unsigned numBlocks = (V+threadsPerBlock-1)/threadsPerBlock;


// TIMER START
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
float milliseconds = 0;
cudaEventRecord(start,0);


//DECLAR DEVICE AND HOST vars in params
float* d_pageRank;
cudaMalloc(&d_pageRank, sizeof(float)*(V));


//BEGIN DSL PARSING
float* d_pageRank_nxt;
cudaMalloc(&d_pageRank_nxt, sizeof(float)*(V));

float num_nodes = (float)g.num_nodes( ); // asst in .cu

merged_kernel_1<<<numBlocks,threadsPerBlock>>>(V, d_pageRank, (float)1 / num_nodes, d_pageRank_nxt, (float)0);
int iterCount = 0; // asst in .cu

float diff; // asst in .cu

bool tempVar_0 = false; // asst in .cu

do{
if (tempVar_0){ // if filter begin

} // if filter end
tempVar_0 = true;
diff = 0.000000;
cudaMemcpyToSymbol(::diff, &diff, sizeof(float), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(::delta, &delta, sizeof(float), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(::num_nodes, &num_nodes, sizeof(float), 0, cudaMemcpyHostToDevice);
Compute_PR_kernel<<<numBlocks, threadsPerBlock>>>(V,E,d_meta,d_data,d_src,d_weight,d_rev_meta,d_modified_next,d_pageRank,d_pageRank_nxt);
cudaDeviceSynchronize();



; // asst in .cu

; // asst in .cu

cudaMemcpy(d_pageRank, d_pageRank_nxt, sizeof(float)*V, cudaMemcpyDeviceToDevice);
iterCount++;
cudaMemcpyFromSymbol(&diff, ::diff, sizeof(float), 0, cudaMemcpyDeviceToHost);

}while((diff > beta) && (iterCount < maxIter));

//cudaFree up!! all propVars in this BLOCK!
cudaFree(d_pageRank_nxt);

//TIMER STOP
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&milliseconds, start, stop);
printf("GPU Time: %.6f ms\n", milliseconds);

cudaMemcpy(pageRank, d_pageRank, sizeof(float)*(V), cudaMemcpyDeviceToHost);
} //end FUN
52 changes: 52 additions & 0 deletions graphcode/generated_cuda/PageRankDSLV2.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// FOR BC: nvcc bc_dsl_v2.cu -arch=sm_60 -std=c++14 -rdc=true # HW must support CC 6.0+ Pascal or after
#ifndef GENCPP_PAGERANKDSLV2_H
#define GENCPP_PAGERANKDSLV2_H
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <cuda.h>
#include "../graph.hpp"
#include "../libcuda.cuh"
#include <cooperative_groups.h>

void Compute_PR(graph& g,float beta,float delta,int maxIter,
float* pageRank);

__device__ float beta ;
__device__ float delta ;
__device__ int maxIter ;


__device__ float num_nodes ; // DEVICE ASSTMENT in .h

__global__ void merged_kernel_1(unsigned V, float* array_1, float val_1, float* array_2, float val_2){
unsigned id = threadIdx.x + blockDim.x * blockIdx.x;
if (id < V) {
array_1[id] = val_1;
array_2[id] = val_2;
}
}
; // DEVICE ASSTMENT in .h

__device__ float diff ; // DEVICE ASSTMENT in .h

; // DEVICE ASSTMENT in .h

__global__ void Compute_PR_kernel(int V, int E, int* d_meta, int* d_data, int* d_src, int* d_weight, int *d_rev_meta,bool *d_modified_next,float* d_pageRank,float* d_pageRank_nxt){ // BEGIN KER FUN via ADDKERNEL
float num_nodes = V;
unsigned v = blockIdx.x * blockDim.x + threadIdx.x;
if(v >= V) return;
float sum = 0.000000; // DEVICE ASSTMENT in .h

for (int edge = d_rev_meta[v]; edge < d_rev_meta[v+1]; edge++)
{int nbr = d_src[edge] ;
sum = sum + d_pageRank[nbr] / (d_meta[nbr+1]-d_meta[nbr]);

} // end FOR NBR ITR. TMP FIX!
float val = (1 - delta) / num_nodes + delta * sum; // DEVICE ASSTMENT in .h

atomicAdd(& diff, (float)val - d_pageRank[v]);
d_pageRank_nxt[v] = val;
} // end KER FUNC

#endif
155 changes: 155 additions & 0 deletions graphcode/generated_cuda/bc_dslV2.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
// FOR BC: nvcc bc_dsl_v2.cu -arch=sm_60 -std=c++14 -rdc=true # HW must support CC 6.0+ Pascal or after
#include "bc_dslV2.h"

void Compute_BC(graph& g,float* BC,std::set<int>& sourceSet)

{
// CSR BEGIN
int V = g.num_nodes();
int E = g.num_edges();

printf("#nodes:%d\n",V);
printf("#edges:%d\n",E);
int* edgeLen = g.getEdgeLen();

int *h_meta;
int *h_data;
int *h_src;
int *h_weight;
int *h_rev_meta;

h_meta = (int *)malloc( (V+1)*sizeof(int));
h_data = (int *)malloc( (E)*sizeof(int));
h_src = (int *)malloc( (E)*sizeof(int));
h_weight = (int *)malloc( (E)*sizeof(int));
h_rev_meta = (int *)malloc( (V+1)*sizeof(int));

for(int i=0; i<= V; i++) {
int temp = g.indexofNodes[i];
h_meta[i] = temp;
temp = g.rev_indexofNodes[i];
h_rev_meta[i] = temp;
}

for(int i=0; i< E; i++) {
int temp = g.edgeList[i];
h_data[i] = temp;
temp = g.srcList[i];
h_src[i] = temp;
temp = edgeLen[i];
h_weight[i] = temp;
}


int* d_meta;
int* d_data;
int* d_src;
int* d_weight;
int* d_rev_meta;
bool* d_modified_next;

cudaMalloc(&d_meta, sizeof(int)*(1+V));
cudaMalloc(&d_data, sizeof(int)*(E));
cudaMalloc(&d_src, sizeof(int)*(E));
cudaMalloc(&d_weight, sizeof(int)*(E));
cudaMalloc(&d_rev_meta, sizeof(int)*(V+1));
cudaMalloc(&d_modified_next, sizeof(bool)*(V));

cudaMemcpy( d_meta, h_meta, sizeof(int)*(V+1), cudaMemcpyHostToDevice);
cudaMemcpy( d_data, h_data, sizeof(int)*(E), cudaMemcpyHostToDevice);
cudaMemcpy( d_src, h_src, sizeof(int)*(E), cudaMemcpyHostToDevice);
cudaMemcpy(d_weight, h_weight, sizeof(int)*(E), cudaMemcpyHostToDevice);
cudaMemcpy(d_rev_meta, h_rev_meta, sizeof(int)*((V+1)), cudaMemcpyHostToDevice);

// CSR END
//LAUNCH CONFIG
const unsigned threadsPerBlock = 512;
unsigned numThreads = (V < threadsPerBlock)? 512: V;
unsigned numBlocks = (V+threadsPerBlock-1)/threadsPerBlock;


// TIMER START
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
float milliseconds = 0;
cudaEventRecord(start,0);


//DECLAR DEVICE AND HOST vars in params
float* d_BC;
cudaMalloc(&d_BC, sizeof(float)*(V));


//BEGIN DSL PARSING
float* d_sigma;
cudaMalloc(&d_sigma, sizeof(float)*(V));

float* d_delta;
cudaMalloc(&d_delta, sizeof(float)*(V));

initKernel<float> <<<numBlocks,threadsPerBlock>>>(V,d_BC,(float)0);

//FOR SIGNATURE of SET - Assumes set for on .cu only
std::set<int>::iterator itr;
for(itr=sourceSet.begin();itr!=sourceSet.end();itr++)
{
int src = *itr;
merged_kernel_1<<<numBlocks,threadsPerBlock>>>(V, d_delta, (float)0, d_sigma, (float)0, d_sigma, src, (float)1);

//EXTRA vars for ITBFS AND REVBFS
bool finished;
int hops_from_source=0;
bool* d_finished; cudaMalloc(&d_finished,sizeof(bool) *(1));
int* d_hops_from_source;cudaMalloc(&d_hops_from_source, sizeof(int)); cudaMemset(d_hops_from_source,0,sizeof(int));
int* d_level; cudaMalloc(&d_level,sizeof(int) *(V));

//EXTRA vars INITIALIZATION
initKernel<int> <<<numBlocks,threadsPerBlock>>>(V,d_level,-1);
initIndex<int><<<1,1>>>(V,d_level,src, 0);

// long k =0 ;// For DEBUG
do {
finished = true;
cudaMemcpy(d_finished, &finished, sizeof(bool)*(1), cudaMemcpyHostToDevice);

//Kernel LAUNCH
fwd_pass<<<numBlocks,threadsPerBlock>>>(V, d_meta, d_data,d_weight, d_delta, d_sigma, d_level, d_hops_from_source, d_finished,d_BC); ///DONE from varList

incrementDeviceVar<<<1,1>>>(d_hops_from_source);
cudaDeviceSynchronize(); //MUST - rupesh
++hops_from_source; // updating the level to process in the next iteration
// k++; //DEBUG

cudaMemcpy(&finished, d_finished, sizeof(bool)*(1), cudaMemcpyDeviceToHost);
}while(!finished);

hops_from_source--;
cudaMemcpy(d_hops_from_source, &hops_from_source, sizeof(int)*(1), cudaMemcpyHostToDevice);

//BACKWARD PASS
while(hops_from_source > 1) {

//KERNEL Launch
back_pass<<<numBlocks,threadsPerBlock>>>(V, d_meta, d_data, d_weight, d_delta, d_sigma, d_level, d_hops_from_source, d_finished
,d_BC); ///DONE from varList

hops_from_source--;
cudaMemcpy(d_hops_from_source, &hops_from_source, sizeof(int)*(1), cudaMemcpyHostToDevice);
}
//accumulate_bc<<<numBlocks,threadsPerBlock>>>(V,d_delta, d_BC, d_level, src);

}

//cudaFree up!! all propVars in this BLOCK!
cudaFree(d_delta);
cudaFree(d_sigma);

//TIMER STOP
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&milliseconds, start, stop);
printf("GPU Time: %.6f ms\n", milliseconds);

cudaMemcpy( BC, d_BC, sizeof(float)*(V), cudaMemcpyDeviceToHost);
} //end FUN
Loading

0 comments on commit 3ce367d

Please sign in to comment.