Skip to content

Commit

Permalink
pass integer zero to memset
Browse files Browse the repository at this point in the history
  • Loading branch information
njzjz committed Feb 22, 2022
1 parent d9a4a86 commit 90995b6
Show file tree
Hide file tree
Showing 14 changed files with 60 additions and 60 deletions.
12 changes: 6 additions & 6 deletions source/lib/src/cuda/prod_env_mat.cu
Original file line number Diff line number Diff line change
Expand Up @@ -539,9 +539,9 @@ void prod_env_mat_a_gpu_cuda(
{
const int nnei = sec.back();
const int ndescrpt = nnei * 4;
DPErrcheck(cudaMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(cudaMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(cudaMemset(rij, 0., sizeof(FPTYPE) * nloc * nnei * 3));
DPErrcheck(cudaMemset(em, 0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(cudaMemset(em_deriv, 0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(cudaMemset(rij, 0, sizeof(FPTYPE) * nloc * nnei * 3));

format_nbor_list_gpu_cuda(
nlist,
Expand Down Expand Up @@ -578,9 +578,9 @@ void prod_env_mat_r_gpu_cuda(
{
const int nnei = sec.back();
const int ndescrpt = nnei * 1;
DPErrcheck(cudaMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(cudaMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(cudaMemset(rij, 0., sizeof(FPTYPE) * nloc * nnei * 3));
DPErrcheck(cudaMemset(em, 0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(cudaMemset(em_deriv, 0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(cudaMemset(rij, 0, sizeof(FPTYPE) * nloc * nnei * 3));

format_nbor_list_gpu_cuda(
nlist,
Expand Down
4 changes: 2 additions & 2 deletions source/lib/src/cuda/prod_force.cu
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ void prod_force_a_gpu_cuda(
const int ndescrpt = nnei * 4;
DPErrcheck(cudaMemset(
force,
0.0, sizeof(FPTYPE) * nall * 3));
0, sizeof(FPTYPE) * nall * 3));

force_deriv_wrt_center_atom<FPTYPE, TPB> <<<nloc, TPB>>>(
force,
Expand Down Expand Up @@ -141,7 +141,7 @@ void prod_force_r_gpu_cuda(
const int ndescrpt = nnei * 1;
DPErrcheck(cudaMemset(
force,
0.0, sizeof(FPTYPE) * nall * 3));
0, sizeof(FPTYPE) * nall * 3));

force_deriv_wrt_center_atom<FPTYPE, TPB> <<<nloc, TPB>>>(
force,
Expand Down
4 changes: 2 additions & 2 deletions source/lib/src/cuda/prod_force_grad.cu
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ void prod_force_grad_a_gpu_cuda(
const int ndescrpt = nnei * 4;
DPErrcheck(cudaMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int nblock = (ndescrpt + TPB - 1) / TPB;
dim3 block_grid(nloc, nblock);
dim3 thread_grid(TPB, 1);
Expand Down Expand Up @@ -122,7 +122,7 @@ void prod_force_grad_r_gpu_cuda(
const int ndescrpt = nnei * 1;
DPErrcheck(cudaMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int nblock = (ndescrpt + TPB - 1) / TPB;
dim3 block_grid(nloc, nblock);
dim3 thread_grid(TPB, 1);
Expand Down
8 changes: 4 additions & 4 deletions source/lib/src/cuda/prod_virial.cu
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,10 @@ void prod_virial_a_gpu_cuda(
{
DPErrcheck(cudaMemset(
virial,
0.0, sizeof(FPTYPE) * 9));
0, sizeof(FPTYPE) * 9));
DPErrcheck(cudaMemset(
atom_virial,
0.0, sizeof(FPTYPE) * 9 * nall));
0, sizeof(FPTYPE) * 9 * nall));

const int LEN = 16;
int nblock = (nnei + LEN - 1) / LEN;
Expand Down Expand Up @@ -153,10 +153,10 @@ void prod_virial_r_gpu_cuda(
{
DPErrcheck(cudaMemset(
virial,
0.0, sizeof(FPTYPE) * 9));
0, sizeof(FPTYPE) * 9));
DPErrcheck(cudaMemset(
atom_virial,
0.0, sizeof(FPTYPE) * 9 * nall));
0, sizeof(FPTYPE) * 9 * nall));

const int LEN = 16;
int nblock = (nnei + LEN - 1) / LEN;
Expand Down
4 changes: 2 additions & 2 deletions source/lib/src/cuda/prod_virial_grad.cu
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ void prod_virial_grad_a_gpu_cuda(
const int ndescrpt = nnei * 4;
DPErrcheck(cudaMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int LEN = 128;
const int nblock = (nloc + LEN -1) / LEN;
dim3 block_grid(nblock, nnei);
Expand All @@ -125,7 +125,7 @@ void prod_virial_grad_r_gpu_cuda(
const int ndescrpt = nnei;
DPErrcheck(cudaMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int LEN = 128;
const int nblock = (nloc + LEN -1) / LEN;
dim3 block_grid(nblock, nnei);
Expand Down
16 changes: 8 additions & 8 deletions source/lib/src/cuda/tabulate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -648,10 +648,10 @@ void tabulate_fusion_se_a_grad_gpu_cuda(
if (nloc <= 0) {return;}
DPErrcheck(cudaMemset(
dy_dem_x,
0.0, sizeof(FPTYPE) * nloc * nnei));
0, sizeof(FPTYPE) * nloc * nnei));
DPErrcheck(cudaMemset(
dy_dem,
0.0, sizeof(FPTYPE) * nloc * nnei * 4));
0, sizeof(FPTYPE) * nloc * nnei * 4));

tabulate_fusion_se_a_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size>>>(
dy_dem_x, dy_dem,
Expand All @@ -676,7 +676,7 @@ void tabulate_fusion_se_a_grad_grad_gpu_cuda(
if (nloc <= 0) {return;}
DPErrcheck(cudaMemset(
dz_dy,
0.0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
tabulate_fusion_se_a_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size>>>(
dz_dy,
table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
Expand Down Expand Up @@ -721,10 +721,10 @@ void tabulate_fusion_se_t_grad_gpu_cuda(
if (nloc <= 0) {return;}
DPErrcheck(cudaMemset(
dy_dem_x,
0.0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
DPErrcheck(cudaMemset(
dy_dem,
0.0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));

tabulate_fusion_se_t_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * last_layer_size>>>(
dy_dem_x, dy_dem,
Expand All @@ -750,7 +750,7 @@ void tabulate_fusion_se_t_grad_grad_gpu_cuda(
if (nloc <= 0) {return;}
DPErrcheck(cudaMemset(
dz_dy,
0.0, sizeof(FPTYPE) * nloc * last_layer_size));
0, sizeof(FPTYPE) * nloc * last_layer_size));

tabulate_fusion_se_t_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size>>>(
dz_dy,
Expand Down Expand Up @@ -791,7 +791,7 @@ void tabulate_fusion_se_r_grad_gpu_cuda(
if (nloc <= 0) {return;}
DPErrcheck(cudaMemset(
dy_dem,
0.0, sizeof(FPTYPE) * nloc * nnei));
0, sizeof(FPTYPE) * nloc * nnei));

tabulate_fusion_se_r_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size>>>(
dy_dem,
Expand All @@ -814,7 +814,7 @@ void tabulate_fusion_se_r_grad_grad_gpu_cuda(
if (nloc <= 0) {return;}
DPErrcheck(cudaMemset(
dz_dy,
0.0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
tabulate_fusion_se_r_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size>>>(
dz_dy,
table, em, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
Expand Down
2 changes: 1 addition & 1 deletion source/lib/src/prod_force.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ prod_force_a_cpu(
{
const int ndescrpt = 4 * nnei;

memset(force, 0.0, sizeof(FPTYPE) * nall * 3);
memset(force, 0, sizeof(FPTYPE) * nall * 3);
// compute force of a frame
#pragma omp parallel
for (int i_idx = 0; i_idx < nloc; ++i_idx) {
Expand Down
12 changes: 6 additions & 6 deletions source/lib/src/rocm/prod_env_mat.hip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -537,9 +537,9 @@ void prod_env_mat_a_gpu_rocm(
{
const int nnei = sec.back();
const int ndescrpt = nnei * 4;
DPErrcheck(hipMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(hipMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(hipMemset(rij, 0.0, sizeof(FPTYPE) * nloc * nnei * 3));
DPErrcheck(hipMemset(em, 0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(hipMemset(em_deriv, 0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(hipMemset(rij, 0, sizeof(FPTYPE) * nloc * nnei * 3));

format_nbor_list_gpu_rocm(
nlist,
Expand Down Expand Up @@ -576,9 +576,9 @@ void prod_env_mat_r_gpu_rocm(
{
const int nnei = sec.back();
const int ndescrpt = nnei * 1;
DPErrcheck(hipMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(hipMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(hipMemset(rij, 0.0, sizeof(FPTYPE) * nloc * nnei * 3));
DPErrcheck(hipMemset(em, 0, sizeof(FPTYPE) * nloc * ndescrpt));
DPErrcheck(hipMemset(em_deriv, 0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
DPErrcheck(hipMemset(rij, 0, sizeof(FPTYPE) * nloc * nnei * 3));

format_nbor_list_gpu_rocm(
nlist,
Expand Down
4 changes: 2 additions & 2 deletions source/lib/src/rocm/prod_force.hip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ namespace deepmd {
const int ndescrpt = nnei * 4;
DPErrcheck(hipMemset(
force,
0.0, sizeof(FPTYPE) * nall * 3));
0, sizeof(FPTYPE) * nall * 3));

hipLaunchKernelGGL(HIP_KERNEL_NAME(force_deriv_wrt_center_atom<FPTYPE, TPB>), nloc, TPB, 0, 0,
force,
Expand Down Expand Up @@ -141,7 +141,7 @@ namespace deepmd {
const int ndescrpt = nnei * 1;
DPErrcheck(hipMemset(
force,
0.0, sizeof(FPTYPE) * nall * 3));
0, sizeof(FPTYPE) * nall * 3));

hipLaunchKernelGGL(HIP_KERNEL_NAME(force_deriv_wrt_center_atom<FPTYPE, TPB>), nloc, TPB, 0, 0,
force,
Expand Down
4 changes: 2 additions & 2 deletions source/lib/src/rocm/prod_force_grad.hip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ void prod_force_grad_a_gpu_rocm(
const int ndescrpt = nnei * 4;
DPErrcheck(hipMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int nblock = (ndescrpt + TPB - 1) / TPB;
dim3 block_grid(nloc, nblock);
dim3 thread_grid(TPB, 1);
Expand Down Expand Up @@ -121,7 +121,7 @@ void prod_force_grad_r_gpu_rocm(
const int ndescrpt = nnei * 1;
DPErrcheck(hipMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int nblock = (ndescrpt + TPB - 1) / TPB;
dim3 block_grid(nloc, nblock);
dim3 thread_grid(TPB, 1);
Expand Down
8 changes: 4 additions & 4 deletions source/lib/src/rocm/prod_virial.hip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,10 @@ void prod_virial_a_gpu_rocm(
{
DPErrcheck(hipMemset(
virial,
0.0, sizeof(FPTYPE) * 9));
0, sizeof(FPTYPE) * 9));
DPErrcheck(hipMemset(
atom_virial,
0.0, sizeof(FPTYPE) * 9 * nall));
0, sizeof(FPTYPE) * 9 * nall));

const int LEN = 16;
int nblock = (nnei + LEN -1) / LEN;
Expand Down Expand Up @@ -150,10 +150,10 @@ void prod_virial_r_gpu_rocm(
{
DPErrcheck(hipMemset(
virial,
0.0, sizeof(FPTYPE) * 9));
0, sizeof(FPTYPE) * 9));
DPErrcheck(hipMemset(
atom_virial,
0.0, sizeof(FPTYPE) * 9 * nall));
0, sizeof(FPTYPE) * 9 * nall));

const int LEN = 16;
int nblock = (nnei + LEN -1) / LEN;
Expand Down
4 changes: 2 additions & 2 deletions source/lib/src/rocm/prod_virial_grad.hip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ void prod_virial_grad_a_gpu_rocm(
const int ndescrpt = nnei * 4;
DPErrcheck(hipMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int LEN = 128;
const int nblock = (nloc + LEN -1) / LEN;
dim3 block_grid(nblock, nnei);
Expand All @@ -125,7 +125,7 @@ void prod_virial_grad_r_gpu_rocm(
const int ndescrpt = nnei;
DPErrcheck(hipMemset(
grad_net,
0.0, sizeof(FPTYPE) * nloc * ndescrpt));
0, sizeof(FPTYPE) * nloc * ndescrpt));
const int LEN = 128;
const int nblock = (nloc + LEN -1) / LEN;
dim3 block_grid(nblock, nnei);
Expand Down
16 changes: 8 additions & 8 deletions source/lib/src/rocm/tabulate.hip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -637,10 +637,10 @@ void tabulate_fusion_se_a_grad_gpu_rocm(
if(nloc <= 0) {return;}
DPErrcheck(hipMemset(
dy_dem_x,
0.0, sizeof(FPTYPE) * nloc * nnei));
0, sizeof(FPTYPE) * nloc * nnei));
DPErrcheck(hipMemset(
dy_dem,
0.0, sizeof(FPTYPE) * nloc * nnei * 4));
0, sizeof(FPTYPE) * nloc * nnei * 4));

hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_a_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size, 0,
dy_dem_x, dy_dem,
Expand All @@ -665,7 +665,7 @@ void tabulate_fusion_se_a_grad_grad_gpu_rocm(
if(nloc <= 0) {return;}
DPErrcheck(hipMemset(
dz_dy,
0.0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_a_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0,
dz_dy,
table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
Expand Down Expand Up @@ -710,10 +710,10 @@ void tabulate_fusion_se_t_grad_gpu_rocm(
if(nloc <= 0) {return;}
DPErrcheck(hipMemset(
dy_dem_x,
0.0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
DPErrcheck(hipMemset(
dy_dem,
0.0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));

hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_t_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, KK * WARP_SIZE, sizeof(FPTYPE) * last_layer_size, 0,
dy_dem_x, dy_dem,
Expand All @@ -739,7 +739,7 @@ void tabulate_fusion_se_t_grad_grad_gpu_rocm(
if(nloc <= 0) {return;}
DPErrcheck(hipMemset(
dz_dy,
0.0, sizeof(FPTYPE) * nloc * last_layer_size));
0, sizeof(FPTYPE) * nloc * last_layer_size));
hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_t_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, 0, 0,
dz_dy,
table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei_i, nnei_j, last_layer_size);
Expand Down Expand Up @@ -779,7 +779,7 @@ void tabulate_fusion_se_r_grad_gpu_rocm(
if(nloc <= 0) {return;}
DPErrcheck(hipMemset(
dy_dem,
0.0, sizeof(FPTYPE) * nloc * nnei));
0, sizeof(FPTYPE) * nloc * nnei));

hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_r_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size, 0,
dy_dem,
Expand All @@ -802,7 +802,7 @@ void tabulate_fusion_se_r_grad_grad_gpu_rocm(
if(nloc <= 0) {return;}
DPErrcheck(hipMemset(
dz_dy,
0.0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_r_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0,
dz_dy,
table, em, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
Expand Down
Loading

0 comments on commit 90995b6

Please sign in to comment.