forked from abacusmodeling/abacus-develop
-
Notifications
You must be signed in to change notification settings - Fork 119
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
GPU: Add multi device support for HPsi(veff_pw) (#1456)
* add multi device support for hpsi(veff_pw) * add UTs * fix compilation errors with cuda environment * remove cuda flags * fix CI error * fix Intel compilation error
- Loading branch information
Showing
30 changed files
with
997 additions
and
121 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#ifndef MODULE_HAMILT_VEFF_H | ||
#define MODULE_HAMILT_VEFF_H | ||
|
||
#include "module_psi/psi.h" | ||
#include <complex> | ||
|
||
namespace hamilt { | ||
template <typename FPTYPE, typename Device> | ||
struct veff_pw_op { | ||
void operator() ( | ||
const Device* dev, | ||
const int& size, | ||
std::complex<FPTYPE>* out, | ||
const FPTYPE* in); | ||
}; | ||
|
||
#if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM | ||
// Partially specialize functor for psi::GpuDevice. | ||
template <typename FPTYPE> | ||
struct veff_pw_op<FPTYPE, psi::DEVICE_GPU> { | ||
void operator() ( | ||
const psi::DEVICE_GPU* dev, | ||
const int& size, | ||
std::complex<FPTYPE>* out, | ||
const FPTYPE* in); | ||
}; | ||
#endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM | ||
} // namespace hamilt | ||
#endif //MODULE_HAMILT_VEFF_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#include "module_hamilt/include/veff.h" | ||
#include <complex> | ||
#include <thrust/complex.h> | ||
#include "cuda_runtime.h" | ||
|
||
namespace hamilt{ | ||
|
||
#define THREADS_PER_BLOCK 256 | ||
|
||
template <typename FPTYPE> | ||
__global__ void veff_pw( | ||
const int size, | ||
thrust::complex<FPTYPE>* out, | ||
const FPTYPE* in) | ||
{ | ||
int idx = blockIdx.x * blockDim.x + threadIdx.x; | ||
if(idx >= size) {return;} | ||
out[idx] *= in[idx]; | ||
} | ||
|
||
template <typename FPTYPE> | ||
void veff_pw_op<FPTYPE, psi::DEVICE_GPU>::operator() ( | ||
const psi::DEVICE_GPU* dev, | ||
const int& size, | ||
std::complex<FPTYPE>* out, | ||
const FPTYPE* in) | ||
{ | ||
const int block = (size + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; | ||
veff_pw<FPTYPE><<<block, THREADS_PER_BLOCK>>>( | ||
size, // control params | ||
reinterpret_cast<thrust::complex<FPTYPE>*>(out), // array of data | ||
in); // array of data | ||
// cpu part: | ||
// for (int ir = 0; ir < size; ++ir) | ||
// { | ||
// out[ir] *= in[ir]; | ||
// } | ||
// >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> | ||
} | ||
|
||
template struct veff_pw_op<double, psi::DEVICE_GPU>; | ||
|
||
} // namespace hamilt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.