Skip to content

Commit

Permalink
Remove unused code
Browse files Browse the repository at this point in the history
  • Loading branch information
victorapm committed Mar 25, 2023
1 parent 278c3f8 commit 47e763f
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 182 deletions.
188 changes: 18 additions & 170 deletions src/seq_mv/csr_matop_device.c
Expand Up @@ -2001,175 +2001,13 @@ hypre_CSRMatrixPermuteDevice( hypre_CSRMatrix *A,

#endif /* HYPRE_USING_CUDA || defined(HYPRE_USING_HIP) */

#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)

__global__ void
hypreGPUKernel_CSRMatrixComputePermutedRowPtrs( hypre_DeviceItem &item,
HYPRE_Int n,
HYPRE_Int *perm,
HYPRE_Int *row_ptrs_in,
HYPRE_Int *row_ptrs_out)
{
HYPRE_Int i = hypre_gpu_get_grid_thread_id<1, 1>(item);
if (i < n)
{
HYPRE_Int p = perm[i];
HYPRE_Int r0 = row_ptrs_in[p];
HYPRE_Int r1 = row_ptrs_in[p+1];
row_ptrs_out[i] = r1 - r0;
}
}

HYPRE_Int
hypreDevice_CSRMatrixComputePermutedRowPtrs(HYPRE_Int n,
HYPRE_Int *perm,
HYPRE_Int *row_ptrs_in,
HYPRE_Int *row_ptrs_out)
{
/* trivial case */
if (n <= 0)
{
return hypre_error_flag;
}

dim3 bDim = hypre_GetDefaultDeviceBlockDimension();
dim3 gDim = hypre_GetDefaultDeviceGridDimension(n, "thread", bDim);

HYPRE_GPU_LAUNCH( hypreGPUKernel_CSRMatrixComputePermutedRowPtrs, gDim, bDim, n,
perm, row_ptrs_in, row_ptrs_out);

return hypre_error_flag;
}



__global__ void
hypreGPUKernel_CSRMatrixPermuteColsVals( hypre_DeviceItem &item,
HYPRE_Int num_rows, HYPRE_Int nnz, HYPRE_Int threads_per_row,
HYPRE_Int * perm, HYPRE_Int * rqperm,
HYPRE_Int * src_i, HYPRE_Int * src_j, HYPRE_Complex * src_data,
HYPRE_Int * dst_i, HYPRE_Int * dst_j, HYPRE_Complex * dst_data)
{
HYPRE_Int tidx = hypre_gpu_get_thread_id<1>(item);
HYPRE_Int tid = tidx%threads_per_row;
HYPRE_Int rowSmall = tidx/threads_per_row;
HYPRE_Int row = (blockDim.x/threads_per_row)*blockIdx.x + rowSmall;

HYPRE_Int src_r0=0, src_r1=0, dst_r0=0, dst_r1=0;
if (row<num_rows) {
/* read the row pointers by the first thread in the warp */
if (tid==0) {
HYPRE_Int p = perm[row];
src_r0 = src_i[p];
src_r1 = src_i[p+1];
dst_r0 = dst_i[row];
dst_r1 = dst_i[row+1];
}
}
/* broadcast across the (sub) warp */
src_r0 = __shfl_sync(0xffffffff, src_r0, 0, threads_per_row);
src_r1 = __shfl_sync(0xffffffff, src_r1, 0, threads_per_row);
dst_r0 = __shfl_sync(0xffffffff, dst_r0, 0, threads_per_row);
dst_r1 = __shfl_sync(0xffffffff, dst_r1, 0, threads_per_row);

if (row<num_rows) {
for (HYPRE_Int t=tid; t<src_r1-src_r0; t+=threads_per_row) {
//if (src_r0+t<nnz && dst_r0+t<nnz) {
dst_j[dst_r0+t] = rqperm[src_j[src_r0+t]]; //]
dst_data[dst_r0+t] = src_data[src_r0+t];
//}
}
}
}

HYPRE_Int nextPowerOfTwo(HYPRE_Int v) {
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
}

HYPRE_Int
hypreDevice_CSRMatrixPermuteColsVals(HYPRE_Int nrows, HYPRE_Int nnz,
HYPRE_Int * perm, HYPRE_Int * rqperm,
HYPRE_Int * src_i, HYPRE_Int * src_j, HYPRE_Complex * src_data,
HYPRE_Int * dst_i, HYPRE_Int * dst_j, HYPRE_Complex * dst_data)
{
HYPRE_Int num_threads=128;
HYPRE_Int threads_per_row = (nnz + nrows - 1)/nrows;
threads_per_row = std::min(nextPowerOfTwo(threads_per_row), HYPRE_WARP_SIZE);
HYPRE_Int num_rows_per_block = num_threads/threads_per_row;
HYPRE_Int num_blocks = (nrows + num_rows_per_block - 1)/num_rows_per_block;

/* compute the location of the diagonal in each row */
const dim3 bDim(num_threads,1,1);
const dim3 gDim(num_blocks,1,1);

HYPRE_GPU_LAUNCH( hypreGPUKernel_CSRMatrixPermuteColsVals, gDim, bDim, nrows, nnz, threads_per_row,
perm, rqperm, src_i, src_j, src_data, dst_i, dst_j, dst_data);
return hypre_error_flag;
}

HYPRE_Int
hypre_CSRMatrixApplyRowColPermutation( hypre_CSRMatrix *A,
HYPRE_Int *perm,
HYPRE_Int *rqperm,
hypre_CSRMatrix **B )
{
HYPRE_Int *A_i = hypre_CSRMatrixI(A);
HYPRE_Int *A_j = hypre_CSRMatrixJ(A);
HYPRE_Complex *A_data = hypre_CSRMatrixData(A);
HYPRE_Int n = hypre_CSRMatrixNumRows(A);
HYPRE_Int nnz = hypre_CSRMatrixNumNonzeros(A);

#if defined(HYPRE_USING_CUDA)
cudaEvent_t start, stop;
float time;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
#endif

/* No schur complement makes everything easy :) */
*B = hypre_CSRMatrixCreate(n, n, nnz);
hypre_CSRMatrixInitialize(*B);
HYPRE_Int *B_i = hypre_CSRMatrixI(*B);
HYPRE_Int *B_j = hypre_CSRMatrixJ(*B);
HYPRE_Complex *B_data = hypre_CSRMatrixData(*B);

/* compute permuted row ptrs */
hypreDevice_CSRMatrixComputePermutedRowPtrs(n, perm, A_i, B_i);
hypreDevice_IntegerExclusiveScan(n+1, B_i);

/* permute the cols/values */
hypreDevice_CSRMatrixPermuteColsVals(n, nnz, perm, rqperm, A_i, A_j, A_data, B_i, B_j, B_data);

#if defined(HYPRE_USING_CUDA)
cudaEventRecord( stop, 0 );
cudaEventSynchronize( stop );
cudaEventElapsedTime( &time, start, stop );
//printf("%s %s %d : time=%1.5g\n",__FILE__,__FUNCTION__,__LINE__,time/1000.);
cudaEventDestroy( start );
cudaEventDestroy( stop );
#endif
return hypre_error_flag;
}

#endif /* HYPRE_USING_CUDA || defined(HYPRE_USING_HIP) */

#if defined(HYPRE_USING_GPU)

HYPRE_Int
hypre_CSRMatrixTransposeDevice(hypre_CSRMatrix *A,
hypre_CSRMatrix **AT_ptr,
HYPRE_Int data)
{
hypre_GpuProfilingPushRange("CSRMatrixTranspose");

HYPRE_Complex *A_data = hypre_CSRMatrixData(A);
HYPRE_Int *A_i = hypre_CSRMatrixI(A);
HYPRE_Int *A_j = hypre_CSRMatrixJ(A);
Expand All @@ -2181,6 +2019,8 @@ hypre_CSRMatrixTransposeDevice(hypre_CSRMatrix *A,
HYPRE_Int *C_j;
hypre_CSRMatrix *C;

HYPRE_ANNOTATE_FUNC_BEGIN;
hypre_GpuProfilingPushRange("CSRMatrixTranspose");

/* trivial case */
if (nnz_A == 0)
Expand Down Expand Up @@ -2222,6 +2062,7 @@ hypre_CSRMatrixTransposeDevice(hypre_CSRMatrix *A,
hypre_SyncComputeStream(hypre_handle());

hypre_GpuProfilingPopRange();
HYPRE_ANNOTATE_FUNC_END;

return hypre_error_flag;
}
Expand All @@ -2248,14 +2089,21 @@ hypre_CSRMatrixSortRow(hypre_CSRMatrix *A)
}

#if defined(HYPRE_USING_CUSPARSE)
/* @brief This functions sorts values and column indices in each row in ascending order INPLACE
* @param[in] n Number of rows
* @param[in] m Number of columns
* @param[in] nnzA Number of nonzeroes
* @param[in] *d_ia (Unsorted) Row indices
* @param[in,out] *d_ja_sorted On Start: Unsorted column indices. On return: Sorted column indices
* @param[in,out] *d_a_sorted On Start: Unsorted values. On Return: Sorted values corresponding with column indices
*/

/*--------------------------------------------------------------------------
* hypre_SortCSRCusparse
*
* Sorts values and column indices in each row in ascending order INPLACE
*
* Parameters:
* n: Number of rows [in]
* m: Number of columns [in]
* nnzA: Number of nonzeros [in]
* d_ia: row pointers [in/out]
* d_ja_sorted: column indices [in/out]
* d_a_sorted: coefficients [in/out]
*--------------------------------------------------------------------------*/

void
hypre_SortCSRCusparse( HYPRE_Int n,
HYPRE_Int m,
Expand Down
6 changes: 0 additions & 6 deletions src/seq_mv/protos.h
Expand Up @@ -344,7 +344,6 @@ HYPRE_Int hypre_CSRMatrixIntSpMVDevice( HYPRE_Int num_rows, HYPRE_Int num_nonzer
HYPRE_Int alpha, HYPRE_Int *d_ia, HYPRE_Int *d_ja,
HYPRE_Int *d_a, HYPRE_Int *d_x, HYPRE_Int beta,
HYPRE_Int *d_y );
HYPRE_Int hypre_CSRMatrixGetMainDiag(hypre_CSRMatrix *A, hypre_Vector *diag);

#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
hypre_CsrsvData* hypre_CsrsvDataCreate();
Expand All @@ -359,8 +358,3 @@ hypre_GpuMatData* hypre_CSRMatrixGetGPUMatData(hypre_CSRMatrix *matrix);
#define hypre_CSRMatrixGPUMatSpMVBuffer(matrix) ( hypre_GpuMatDataSpMVBuffer (hypre_CSRMatrixGetGPUMatData(matrix)) )
#endif
void hypre_CSRMatrixGpuSpMVAnalysis(hypre_CSRMatrix *matrix);

#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
HYPRE_Int
hypre_CSRMatrixApplyRowColPermutation( hypre_CSRMatrix *A, HYPRE_Int *perm, HYPRE_Int *rqperm, hypre_CSRMatrix **B );
#endif
6 changes: 0 additions & 6 deletions src/seq_mv/seq_mv.h
Expand Up @@ -616,7 +616,6 @@ HYPRE_Int hypre_CSRMatrixIntSpMVDevice( HYPRE_Int num_rows, HYPRE_Int num_nonzer
HYPRE_Int alpha, HYPRE_Int *d_ia, HYPRE_Int *d_ja,
HYPRE_Int *d_a, HYPRE_Int *d_x, HYPRE_Int beta,
HYPRE_Int *d_y );
HYPRE_Int hypre_CSRMatrixGetMainDiag(hypre_CSRMatrix *A, hypre_Vector *diag);

#if defined(HYPRE_USING_CUSPARSE) || defined(HYPRE_USING_ROCSPARSE) || defined(HYPRE_USING_ONEMKLSPARSE)
hypre_CsrsvData* hypre_CsrsvDataCreate();
Expand All @@ -632,11 +631,6 @@ hypre_GpuMatData* hypre_CSRMatrixGetGPUMatData(hypre_CSRMatrix *matrix);
#endif
void hypre_CSRMatrixGpuSpMVAnalysis(hypre_CSRMatrix *matrix);

#if defined(HYPRE_USING_CUDA) || defined(HYPRE_USING_HIP)
HYPRE_Int
hypre_CSRMatrixApplyRowColPermutation( hypre_CSRMatrix *A, HYPRE_Int *perm, HYPRE_Int *rqperm, hypre_CSRMatrix **B );
#endif

#ifdef __cplusplus
}
#endif
Expand Down

0 comments on commit 47e763f

Please sign in to comment.