diff --git a/source/lib/src/cuda/prod_env_mat.cu b/source/lib/src/cuda/prod_env_mat.cu
index 0832b0c814..b2cd4dcaf7 100644
--- a/source/lib/src/cuda/prod_env_mat.cu
+++ b/source/lib/src/cuda/prod_env_mat.cu
@@ -539,9 +539,9 @@ void prod_env_mat_a_gpu_cuda(
 {
   const int nnei = sec.back();
   const int ndescrpt = nnei * 4;
-  DPErrcheck(cudaMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt));
-  DPErrcheck(cudaMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
-  DPErrcheck(cudaMemset(rij, 0., sizeof(FPTYPE) * nloc * nnei * 3));
+  DPErrcheck(cudaMemset(em, 0, sizeof(FPTYPE) * nloc * ndescrpt));
+  DPErrcheck(cudaMemset(em_deriv, 0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
+  DPErrcheck(cudaMemset(rij, 0, sizeof(FPTYPE) * nloc * nnei * 3));
 
   format_nbor_list_gpu_cuda(
       nlist, 
@@ -578,9 +578,9 @@ void prod_env_mat_r_gpu_cuda(
 {
   const int nnei = sec.back();
   const int ndescrpt = nnei * 1;
-  DPErrcheck(cudaMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt));
-  DPErrcheck(cudaMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
-  DPErrcheck(cudaMemset(rij, 0., sizeof(FPTYPE) * nloc * nnei * 3));
+  DPErrcheck(cudaMemset(em, 0, sizeof(FPTYPE) * nloc * ndescrpt));
+  DPErrcheck(cudaMemset(em_deriv, 0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
+  DPErrcheck(cudaMemset(rij, 0, sizeof(FPTYPE) * nloc * nnei * 3));
 
   format_nbor_list_gpu_cuda(
       nlist, 
diff --git a/source/lib/src/cuda/prod_force.cu b/source/lib/src/cuda/prod_force.cu
index 0c8625ee66..4f634b3fb6 100644
--- a/source/lib/src/cuda/prod_force.cu
+++ b/source/lib/src/cuda/prod_force.cu
@@ -109,7 +109,7 @@ void prod_force_a_gpu_cuda(
   const int ndescrpt = nnei * 4;
   DPErrcheck(cudaMemset(
       force, 
-      0.0, sizeof(FPTYPE) * nall * 3));
+      0, sizeof(FPTYPE) * nall * 3));
 
   force_deriv_wrt_center_atom<FPTYPE, TPB> <<<nloc, TPB>>>(
       force, 
@@ -141,7 +141,7 @@ void prod_force_r_gpu_cuda(
   const int ndescrpt = nnei * 1;
   DPErrcheck(cudaMemset(
       force, 
-      0.0, sizeof(FPTYPE) * nall * 3));
+      0, sizeof(FPTYPE) * nall * 3));
 
   force_deriv_wrt_center_atom<FPTYPE, TPB> <<<nloc, TPB>>>(
       force, 
diff --git a/source/lib/src/cuda/prod_force_grad.cu b/source/lib/src/cuda/prod_force_grad.cu
index b852f039d9..33955faade 100644
--- a/source/lib/src/cuda/prod_force_grad.cu
+++ b/source/lib/src/cuda/prod_force_grad.cu
@@ -89,7 +89,7 @@ void prod_force_grad_a_gpu_cuda(
     const int ndescrpt = nnei * 4;
     DPErrcheck(cudaMemset(
         grad_net, 
-        0.0, sizeof(FPTYPE) * nloc * ndescrpt));
+        0, sizeof(FPTYPE) * nloc * ndescrpt));
     const int nblock = (ndescrpt + TPB - 1) / TPB;
     dim3 block_grid(nloc, nblock);
     dim3 thread_grid(TPB, 1);
@@ -122,7 +122,7 @@ void prod_force_grad_r_gpu_cuda(
     const int ndescrpt = nnei * 1;
     DPErrcheck(cudaMemset(
         grad_net, 
-        0.0, sizeof(FPTYPE) * nloc * ndescrpt));
+        0, sizeof(FPTYPE) * nloc * ndescrpt));
     const int nblock = (ndescrpt + TPB - 1) / TPB;
     dim3 block_grid(nloc, nblock);
     dim3 thread_grid(TPB, 1);
diff --git a/source/lib/src/cuda/prod_virial.cu b/source/lib/src/cuda/prod_virial.cu
index 8761dd93e3..e806af4e57 100644
--- a/source/lib/src/cuda/prod_virial.cu
+++ b/source/lib/src/cuda/prod_virial.cu
@@ -116,10 +116,10 @@ void prod_virial_a_gpu_cuda(
 {
   DPErrcheck(cudaMemset(
       virial, 
-      0.0, sizeof(FPTYPE) * 9));
+      0, sizeof(FPTYPE) * 9));
   DPErrcheck(cudaMemset(
       atom_virial, 
-      0.0, sizeof(FPTYPE) * 9 * nall));
+      0, sizeof(FPTYPE) * 9 * nall));
     
   const int LEN = 16;
   int nblock = (nnei + LEN - 1) / LEN;
@@ -153,10 +153,10 @@ void prod_virial_r_gpu_cuda(
 {
   DPErrcheck(cudaMemset(
       virial, 
-      0.0, sizeof(FPTYPE) * 9));
+      0, sizeof(FPTYPE) * 9));
   DPErrcheck(cudaMemset(
       atom_virial, 
-      0.0, sizeof(FPTYPE) * 9 * nall));
+      0, sizeof(FPTYPE) * 9 * nall));
     
   const int LEN = 16;
   int nblock = (nnei + LEN - 1) / LEN;
diff --git a/source/lib/src/cuda/prod_virial_grad.cu b/source/lib/src/cuda/prod_virial_grad.cu
index 91d06b7fd0..0209ba933a 100644
--- a/source/lib/src/cuda/prod_virial_grad.cu
+++ b/source/lib/src/cuda/prod_virial_grad.cu
@@ -100,7 +100,7 @@ void prod_virial_grad_a_gpu_cuda(
     const int ndescrpt = nnei * 4;
     DPErrcheck(cudaMemset(
         grad_net, 
-        0.0, sizeof(FPTYPE) * nloc * ndescrpt));
+        0, sizeof(FPTYPE) * nloc * ndescrpt));
     const int LEN = 128;
     const int nblock = (nloc + LEN -1) / LEN;
     dim3 block_grid(nblock, nnei);
@@ -125,7 +125,7 @@ void prod_virial_grad_r_gpu_cuda(
     const int ndescrpt = nnei;
     DPErrcheck(cudaMemset(
         grad_net, 
-        0.0, sizeof(FPTYPE) * nloc * ndescrpt));
+        0, sizeof(FPTYPE) * nloc * ndescrpt));
     const int LEN = 128;
     const int nblock = (nloc + LEN -1) / LEN;
     dim3 block_grid(nblock, nnei);
diff --git a/source/lib/src/cuda/tabulate.cu b/source/lib/src/cuda/tabulate.cu
index feee7268b3..538f750a57 100644
--- a/source/lib/src/cuda/tabulate.cu
+++ b/source/lib/src/cuda/tabulate.cu
@@ -648,10 +648,10 @@ void tabulate_fusion_se_a_grad_gpu_cuda(
   if (nloc <= 0) {return;}
   DPErrcheck(cudaMemset(
       dy_dem_x,
-      0.0, sizeof(FPTYPE) * nloc * nnei));
+      0, sizeof(FPTYPE) * nloc * nnei));
   DPErrcheck(cudaMemset(
       dy_dem,
-      0.0, sizeof(FPTYPE) * nloc * nnei * 4));
+      0, sizeof(FPTYPE) * nloc * nnei * 4));
 
   tabulate_fusion_se_a_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size>>>(
       dy_dem_x, dy_dem,
@@ -676,7 +676,7 @@ void tabulate_fusion_se_a_grad_grad_gpu_cuda(
   if (nloc <= 0) {return;}
   DPErrcheck(cudaMemset(
     dz_dy,
-    0.0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
+    0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
   tabulate_fusion_se_a_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size>>>(
       dz_dy,
       table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
@@ -721,10 +721,10 @@ void tabulate_fusion_se_t_grad_gpu_cuda(
   if (nloc <= 0) {return;}
   DPErrcheck(cudaMemset(
       dy_dem_x,
-      0.0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
+      0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
   DPErrcheck(cudaMemset(
       dy_dem,
-      0.0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
+      0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
 
   tabulate_fusion_se_t_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * last_layer_size>>>(
       dy_dem_x, dy_dem,
@@ -750,7 +750,7 @@ void tabulate_fusion_se_t_grad_grad_gpu_cuda(
   if (nloc <= 0) {return;}
   DPErrcheck(cudaMemset(
     dz_dy,
-    0.0, sizeof(FPTYPE) * nloc * last_layer_size));
+    0, sizeof(FPTYPE) * nloc * last_layer_size));
 
   tabulate_fusion_se_t_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size>>>(
       dz_dy,
@@ -791,7 +791,7 @@ void tabulate_fusion_se_r_grad_gpu_cuda(
   if (nloc <= 0) {return;}
   DPErrcheck(cudaMemset(
       dy_dem,
-      0.0, sizeof(FPTYPE) * nloc * nnei));
+      0, sizeof(FPTYPE) * nloc * nnei));
 
   tabulate_fusion_se_r_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size>>>(
       dy_dem,
@@ -814,7 +814,7 @@ void tabulate_fusion_se_r_grad_grad_gpu_cuda(
   if (nloc <= 0) {return;}
   DPErrcheck(cudaMemset(
     dz_dy,
-    0.0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
+    0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
   tabulate_fusion_se_r_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size>>>(
       dz_dy,
       table, em, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
diff --git a/source/lib/src/prod_force.cc b/source/lib/src/prod_force.cc
index b1286e7b14..a21d33cb91 100644
--- a/source/lib/src/prod_force.cc
+++ b/source/lib/src/prod_force.cc
@@ -34,7 +34,7 @@ prod_force_a_cpu(
 {
   const int ndescrpt = 4 * nnei;
 
-  memset(force, 0.0, sizeof(FPTYPE) * nall * 3);
+  memset(force, 0, sizeof(FPTYPE) * nall * 3);
   // compute force of a frame
   #pragma omp parallel
   for (int i_idx = 0; i_idx < nloc; ++i_idx) {
diff --git a/source/lib/src/rocm/prod_env_mat.hip.cu b/source/lib/src/rocm/prod_env_mat.hip.cu
index b724fe7619..45fa0deb41 100644
--- a/source/lib/src/rocm/prod_env_mat.hip.cu
+++ b/source/lib/src/rocm/prod_env_mat.hip.cu
@@ -537,9 +537,9 @@ void prod_env_mat_a_gpu_rocm(
 {
   const int nnei = sec.back();
   const int ndescrpt = nnei * 4;
-  DPErrcheck(hipMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt));
-  DPErrcheck(hipMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
-  DPErrcheck(hipMemset(rij, 0.0, sizeof(FPTYPE) * nloc * nnei * 3));
+  DPErrcheck(hipMemset(em, 0, sizeof(FPTYPE) * nloc * ndescrpt));
+  DPErrcheck(hipMemset(em_deriv, 0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
+  DPErrcheck(hipMemset(rij, 0, sizeof(FPTYPE) * nloc * nnei * 3));
 
   format_nbor_list_gpu_rocm(
       nlist, 
@@ -576,9 +576,9 @@ void prod_env_mat_r_gpu_rocm(
 {
   const int nnei = sec.back();
   const int ndescrpt = nnei * 1;
-  DPErrcheck(hipMemset(em, 0.0, sizeof(FPTYPE) * nloc * ndescrpt));
-  DPErrcheck(hipMemset(em_deriv, 0.0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
-  DPErrcheck(hipMemset(rij, 0.0, sizeof(FPTYPE) * nloc * nnei * 3));
+  DPErrcheck(hipMemset(em, 0, sizeof(FPTYPE) * nloc * ndescrpt));
+  DPErrcheck(hipMemset(em_deriv, 0, sizeof(FPTYPE) * nloc * ndescrpt * 3));
+  DPErrcheck(hipMemset(rij, 0, sizeof(FPTYPE) * nloc * nnei * 3));
 
   format_nbor_list_gpu_rocm(
       nlist, 
diff --git a/source/lib/src/rocm/prod_force.hip.cu b/source/lib/src/rocm/prod_force.hip.cu
index 258c92ec44..19ca5f0b89 100644
--- a/source/lib/src/rocm/prod_force.hip.cu
+++ b/source/lib/src/rocm/prod_force.hip.cu
@@ -109,7 +109,7 @@ namespace deepmd {
     const int ndescrpt = nnei * 4;
     DPErrcheck(hipMemset(
         force, 
-        0.0, sizeof(FPTYPE) * nall * 3));
+        0, sizeof(FPTYPE) * nall * 3));
   
     hipLaunchKernelGGL(HIP_KERNEL_NAME(force_deriv_wrt_center_atom<FPTYPE, TPB>), nloc, TPB, 0, 0, 
         force, 
@@ -141,7 +141,7 @@ namespace deepmd {
     const int ndescrpt = nnei * 1;
     DPErrcheck(hipMemset(
         force, 
-        0.0, sizeof(FPTYPE) * nall * 3));
+        0, sizeof(FPTYPE) * nall * 3));
   
     hipLaunchKernelGGL(HIP_KERNEL_NAME(force_deriv_wrt_center_atom<FPTYPE, TPB>), nloc, TPB, 0, 0, 
         force, 
diff --git a/source/lib/src/rocm/prod_force_grad.hip.cu b/source/lib/src/rocm/prod_force_grad.hip.cu
index 6a78a1dc1d..2329552896 100644
--- a/source/lib/src/rocm/prod_force_grad.hip.cu
+++ b/source/lib/src/rocm/prod_force_grad.hip.cu
@@ -89,7 +89,7 @@ void prod_force_grad_a_gpu_rocm(
     const int ndescrpt = nnei * 4;
     DPErrcheck(hipMemset(
         grad_net, 
-        0.0, sizeof(FPTYPE) * nloc * ndescrpt));
+        0, sizeof(FPTYPE) * nloc * ndescrpt));
     const int nblock = (ndescrpt + TPB - 1) / TPB;
     dim3 block_grid(nloc, nblock);
     dim3 thread_grid(TPB, 1);
@@ -121,7 +121,7 @@ void prod_force_grad_r_gpu_rocm(
     const int ndescrpt = nnei * 1;
     DPErrcheck(hipMemset(
         grad_net, 
-        0.0, sizeof(FPTYPE) * nloc * ndescrpt));
+        0, sizeof(FPTYPE) * nloc * ndescrpt));
     const int nblock = (ndescrpt + TPB - 1) / TPB;
     dim3 block_grid(nloc, nblock);
     dim3 thread_grid(TPB, 1);
diff --git a/source/lib/src/rocm/prod_virial.hip.cu b/source/lib/src/rocm/prod_virial.hip.cu
index 04e04c242f..5c7cc05721 100644
--- a/source/lib/src/rocm/prod_virial.hip.cu
+++ b/source/lib/src/rocm/prod_virial.hip.cu
@@ -113,10 +113,10 @@ void prod_virial_a_gpu_rocm(
 {
     DPErrcheck(hipMemset(
         virial, 
-        0.0, sizeof(FPTYPE) * 9));
+        0, sizeof(FPTYPE) * 9));
     DPErrcheck(hipMemset(
       atom_virial, 
-      0.0, sizeof(FPTYPE) * 9 * nall));
+      0, sizeof(FPTYPE) * 9 * nall));
     
   const int LEN = 16;
   int nblock = (nnei + LEN -1) / LEN;
@@ -150,10 +150,10 @@ void prod_virial_r_gpu_rocm(
 {
     DPErrcheck(hipMemset(
         virial, 
-        0.0, sizeof(FPTYPE) * 9));
+        0, sizeof(FPTYPE) * 9));
     DPErrcheck(hipMemset(
       atom_virial, 
-      0.0, sizeof(FPTYPE) * 9 * nall));
+      0, sizeof(FPTYPE) * 9 * nall));
     
   const int LEN = 16;
   int nblock = (nnei + LEN -1) / LEN;
diff --git a/source/lib/src/rocm/prod_virial_grad.hip.cu b/source/lib/src/rocm/prod_virial_grad.hip.cu
index 88310b5e10..18c009f0c8 100644
--- a/source/lib/src/rocm/prod_virial_grad.hip.cu
+++ b/source/lib/src/rocm/prod_virial_grad.hip.cu
@@ -100,7 +100,7 @@ void prod_virial_grad_a_gpu_rocm(
     const int ndescrpt = nnei * 4;
     DPErrcheck(hipMemset(
         grad_net, 
-        0.0, sizeof(FPTYPE) * nloc * ndescrpt));
+        0, sizeof(FPTYPE) * nloc * ndescrpt));
     const int LEN = 128;
     const int nblock = (nloc + LEN -1) / LEN;
     dim3 block_grid(nblock, nnei);
@@ -125,7 +125,7 @@ void prod_virial_grad_r_gpu_rocm(
     const int ndescrpt = nnei;
     DPErrcheck(hipMemset(
         grad_net, 
-        0.0, sizeof(FPTYPE) * nloc * ndescrpt));
+        0, sizeof(FPTYPE) * nloc * ndescrpt));
     const int LEN = 128;
     const int nblock = (nloc + LEN -1) / LEN;
     dim3 block_grid(nblock, nnei);
diff --git a/source/lib/src/rocm/tabulate.hip.cu b/source/lib/src/rocm/tabulate.hip.cu
index 22dbcae916..4152668dfc 100644
--- a/source/lib/src/rocm/tabulate.hip.cu
+++ b/source/lib/src/rocm/tabulate.hip.cu
@@ -637,10 +637,10 @@ void tabulate_fusion_se_a_grad_gpu_rocm(
   if(nloc <= 0) {return;}
   DPErrcheck(hipMemset(
       dy_dem_x,
-      0.0, sizeof(FPTYPE) * nloc * nnei));
+      0, sizeof(FPTYPE) * nloc * nnei));
   DPErrcheck(hipMemset(
       dy_dem,
-      0.0, sizeof(FPTYPE) * nloc * nnei * 4));
+      0, sizeof(FPTYPE) * nloc * nnei * 4));
 
   hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_a_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size, 0, 
       dy_dem_x, dy_dem,
@@ -665,7 +665,7 @@ void tabulate_fusion_se_a_grad_grad_gpu_rocm(
   if(nloc <= 0) {return;}
   DPErrcheck(hipMemset(
     dz_dy,
-    0.0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
+    0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
   hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_a_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0, 
     dz_dy,
     table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
@@ -710,10 +710,10 @@ void tabulate_fusion_se_t_grad_gpu_rocm(
   if(nloc <= 0) {return;}
   DPErrcheck(hipMemset(
       dy_dem_x,
-      0.0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
+      0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
   DPErrcheck(hipMemset(
       dy_dem,
-      0.0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
+      0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
 
   hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_t_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, KK * WARP_SIZE, sizeof(FPTYPE) * last_layer_size, 0, 
       dy_dem_x, dy_dem,
@@ -739,7 +739,7 @@ void tabulate_fusion_se_t_grad_grad_gpu_rocm(
   if(nloc <= 0) {return;}
   DPErrcheck(hipMemset(
     dz_dy,
-    0.0, sizeof(FPTYPE) * nloc * last_layer_size));
+    0, sizeof(FPTYPE) * nloc * last_layer_size));
   hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_t_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, 0, 0, 
     dz_dy,
     table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei_i, nnei_j, last_layer_size);
@@ -779,7 +779,7 @@ void tabulate_fusion_se_r_grad_gpu_rocm(
   if(nloc <= 0) {return;}
   DPErrcheck(hipMemset(
       dy_dem,
-      0.0, sizeof(FPTYPE) * nloc * nnei));
+      0, sizeof(FPTYPE) * nloc * nnei));
 
   hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_r_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size, 0, 
       dy_dem,
@@ -802,7 +802,7 @@ void tabulate_fusion_se_r_grad_grad_gpu_rocm(
   if(nloc <= 0) {return;}
   DPErrcheck(hipMemset(
     dz_dy,
-    0.0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
+    0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
   hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_r_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0, 
     dz_dy,
     table, em, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
diff --git a/source/lib/src/tabulate.cc b/source/lib/src/tabulate.cc
index 29f60684e6..e116711466 100644
--- a/source/lib/src/tabulate.cc
+++ b/source/lib/src/tabulate.cc
@@ -97,7 +97,7 @@ void deepmd::tabulate_fusion_se_a_cpu(
     const int nnei, 
     const int last_layer_size)
 {
-  memset(out, 0.0, sizeof(FPTYPE) * nloc * 4 * last_layer_size);
+  memset(out, 0, sizeof(FPTYPE) * nloc * 4 * last_layer_size);
   const FPTYPE lower   = table_info[0];
   const FPTYPE upper   = table_info[1];
   const FPTYPE _max    = table_info[2];
@@ -160,8 +160,8 @@ void deepmd::tabulate_fusion_se_a_grad_cpu(
     const int nnei, 
     const int last_layer_size) 
 {
-  memset(dy_dem_x, 0.0, sizeof(FPTYPE) * nloc * nnei);
-  memset(dy_dem, 0.0, sizeof(FPTYPE) * nloc * nnei * 4);
+  memset(dy_dem_x, 0, sizeof(FPTYPE) * nloc * nnei);
+  memset(dy_dem, 0, sizeof(FPTYPE) * nloc * nnei * 4);
   FPTYPE const lower   = table_info[0];
   FPTYPE const upper   = table_info[1];
   FPTYPE const _max    = table_info[2];
@@ -235,7 +235,7 @@ void deepmd::tabulate_fusion_se_a_grad_grad_cpu(
     const int nnei,
     const int last_layer_size)
 {
-  memset(dz_dy, 0.0, sizeof(FPTYPE) * nloc * 4 * last_layer_size);
+  memset(dz_dy, 0, sizeof(FPTYPE) * nloc * 4 * last_layer_size);
   const FPTYPE lower   = table_info[0];
   const FPTYPE upper   = table_info[1];
   const FPTYPE _max    = table_info[2];
@@ -304,7 +304,7 @@ void deepmd::tabulate_fusion_se_t_cpu(
     const int nnei_j, 
     const int last_layer_size)
 {
-  memset(out, 0.0, sizeof(FPTYPE) * nloc * last_layer_size);
+  memset(out, 0, sizeof(FPTYPE) * nloc * last_layer_size);
   const FPTYPE lower   = table_info[0];
   const FPTYPE upper   = table_info[1];
   const FPTYPE _max    = table_info[2];
@@ -360,8 +360,8 @@ void deepmd::tabulate_fusion_se_t_grad_cpu(
     const int nnei_j, 
     const int last_layer_size) 
 {
-  memset(dy_dem_x, 0.0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j);
-  memset(dy_dem,   0.0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j);
+  memset(dy_dem_x, 0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j);
+  memset(dy_dem,   0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j);
   FPTYPE const lower   = table_info[0];
   FPTYPE const upper   = table_info[1];
   FPTYPE const _max    = table_info[2];
@@ -426,7 +426,7 @@ void deepmd::tabulate_fusion_se_t_grad_grad_cpu(
     const int nnei_j,
     const int last_layer_size)
 {
-  memset(dz_dy, 0.0, sizeof(FPTYPE) * nloc * last_layer_size);
+  memset(dz_dy, 0, sizeof(FPTYPE) * nloc * last_layer_size);
   const FPTYPE lower   = table_info[0];
   const FPTYPE upper   = table_info[1];
   const FPTYPE _max    = table_info[2];
@@ -478,7 +478,7 @@ void deepmd::tabulate_fusion_se_r_cpu(
     const int nnei, 
     const int last_layer_size)
 {
-  memset(out, 0.0, sizeof(FPTYPE) * nloc * nnei * last_layer_size);
+  memset(out, 0, sizeof(FPTYPE) * nloc * nnei * last_layer_size);
   const FPTYPE lower   = table_info[0];
   const FPTYPE upper   = table_info[1];
   const FPTYPE _max    = table_info[2];
@@ -516,7 +516,7 @@ void deepmd::tabulate_fusion_se_r_grad_cpu(
     const int nnei, 
     const int last_layer_size) 
 {
-  memset(dy_dem, 0.0, sizeof(FPTYPE) * nloc * nnei);
+  memset(dy_dem, 0, sizeof(FPTYPE) * nloc * nnei);
   FPTYPE const lower   = table_info[0];
   FPTYPE const upper   = table_info[1];
   FPTYPE const _max    = table_info[2];
@@ -557,7 +557,7 @@ void deepmd::tabulate_fusion_se_r_grad_grad_cpu(
     const int nnei,
     const int last_layer_size)
 {
-  memset(dz_dy, 0.0, sizeof(FPTYPE) * nloc * nnei * last_layer_size);
+  memset(dz_dy, 0, sizeof(FPTYPE) * nloc * nnei * last_layer_size);
   const FPTYPE lower   = table_info[0];
   const FPTYPE upper   = table_info[1];
   const FPTYPE _max    = table_info[2];