diff --git a/source/module_base/blas_connector.cpp b/source/module_base/blas_connector.cpp index 61ea4b390f..69f51b744f 100644 --- a/source/module_base/blas_connector.cpp +++ b/source/module_base/blas_connector.cpp @@ -69,17 +69,17 @@ float BlasConnector::dot( const int n, const float *X, const int incX, const flo { if (device_type == base_device::AbacusDevice_t::CpuDevice) { return sdot_(&n, X, &incX, Y, &incY); + } return sdot_(&n, X, &incX, Y, &incY); } -} double BlasConnector::dot( const int n, const double *X, const int incX, const double *Y, const int incY, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice) { return ddot_(&n, X, &incX, Y, &incY); + } return ddot_(&n, X, &incX, Y, &incY); } -} // C = a * A.? * B.? + b * C void BlasConnector::gemm(const char transa, const char transb, const int m, const int n, const int k, @@ -196,39 +196,39 @@ float BlasConnector::nrm2( const int n, const float *X, const int incX, base_dev { if (device_type == base_device::AbacusDevice_t::CpuDevice) { return snrm2_( &n, X, &incX ); + } return snrm2_( &n, X, &incX ); } -} double BlasConnector::nrm2( const int n, const double *X, const int incX, base_device::AbacusDevice_t device_type ) { if (device_type == base_device::AbacusDevice_t::CpuDevice) { return dnrm2_( &n, X, &incX ); + } return dnrm2_( &n, X, &incX ); } -} double BlasConnector::nrm2( const int n, const std::complex *X, const int incX, base_device::AbacusDevice_t device_type ) { if (device_type == base_device::AbacusDevice_t::CpuDevice) { return dznrm2_( &n, X, &incX ); + } return dznrm2_( &n, X, &incX ); } -} // copies a into b void BlasConnector::copy(const long n, const double *a, const int incx, double *b, const int incy, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice) { dcopy_(&n, a, &incx, b, &incy); -} + } } void BlasConnector::copy(const long n, const std::complex *a, const int incx, std::complex *b, const int incy, base_device::AbacusDevice_t device_type) { if (device_type == base_device::AbacusDevice_t::CpuDevice) { zcopy_(&n, a, &incx, b, &incy); -} + } } \ No newline at end of file diff --git a/source/module_hsolver/kernels/cuda/math_kernel_op.cu b/source/module_hsolver/kernels/cuda/math_kernel_op.cu index c5a49b85e3..6185433895 100644 --- a/source/module_hsolver/kernels/cuda/math_kernel_op.cu +++ b/source/module_hsolver/kernels/cuda/math_kernel_op.cu @@ -12,7 +12,7 @@ namespace hsolver { const int warp_size = 32; -const unsigned int full_mask = 0xffffffff; +// const unsigned int full_mask = 0xffffffff; const int thread_per_block = 256; } @@ -65,11 +65,11 @@ void destoryBLAShandle(){ } } -template -__forceinline__ __device__ void warp_reduce(FPTYPE& val) { - for (int offset = 16; offset > 0; offset >>= 1) - val += __shfl_down_sync(full_mask, val, offset); -} +// template +// __forceinline__ __device__ void warp_reduce(FPTYPE& val) { +// for (int offset = 16; offset > 0; offset >>= 1) +// val += __shfl_down_sync(full_mask, val, offset); +// } template __global__ void line_minimize_with_block(