diff --git a/source/lib/src/prod_force.cc b/source/lib/src/prod_force.cc index e9784d3409..b1286e7b14 100644 --- a/source/lib/src/prod_force.cc +++ b/source/lib/src/prod_force.cc @@ -36,14 +36,17 @@ prod_force_a_cpu( memset(force, 0.0, sizeof(FPTYPE) * nall * 3); // compute force of a frame + #pragma omp parallel for (int i_idx = 0; i_idx < nloc; ++i_idx) { // deriv wrt center atom + #pragma omp single for (int aa = 0; aa < ndescrpt; ++aa) { force[i_idx * 3 + 0] -= net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 0]; force[i_idx * 3 + 1] -= net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 1]; force[i_idx * 3 + 2] -= net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 2]; } // deriv wrt neighbors + #pragma omp for for (int jj = 0; jj < nnei; ++jj) { int j_idx = nlist[i_idx * nnei + jj]; if (j_idx < 0) continue; @@ -105,15 +108,18 @@ prod_force_r_cpu( } // compute force of a frame + #pragma omp parallel for (int ii = 0; ii < nloc; ++ii){ int i_idx = ii; // deriv wrt center atom + #pragma omp single for (int aa = 0; aa < ndescrpt; ++aa){ force[i_idx * 3 + 0] -= net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 0]; force[i_idx * 3 + 1] -= net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 1]; force[i_idx * 3 + 2] -= net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 2]; } // deriv wrt neighbors + #pragma omp for for (int jj = 0; jj < nnei; ++jj){ int j_idx = nlist[i_idx * nnei + jj]; // if (j_idx > nloc) j_idx = j_idx % nloc; diff --git a/source/lib/src/prod_force_grad.cc b/source/lib/src/prod_force_grad.cc index 110bf790f4..78bad3c9ca 100644 --- a/source/lib/src/prod_force_grad.cc +++ b/source/lib/src/prod_force_grad.cc @@ -42,6 +42,7 @@ prod_force_grad_a_cpu( } // compute grad of one frame + #pragma omp parallel for for (int ii = 0; ii < nloc; ++ii){ int i_idx = ii; @@ -120,6 +121,7 @@ prod_force_grad_r_cpu( } // compute grad of one frame + #pragma omp parallel for for (int ii = 0; ii < nloc; ++ii){ int i_idx = ii; diff --git a/source/lib/src/prod_virial.cc b/source/lib/src/prod_virial.cc index f1c598c807..d715cf9e5b 100644 --- a/source/lib/src/prod_virial.cc +++ b/source/lib/src/prod_virial.cc @@ -44,6 +44,7 @@ prod_virial_a_cpu( } // compute virial of a frame + #pragma omp parallel for for (int ii = 0; ii < nloc; ++ii){ int i_idx = ii; @@ -58,7 +59,9 @@ prod_virial_a_cpu( for (int dd0 = 0; dd0 < 3; ++dd0){ for (int dd1 = 0; dd1 < 3; ++dd1){ FPTYPE tmp_v = pref * rij[i_idx * nnei * 3 + jj * 3 + dd1] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + dd0]; + #pragma omp atomic virial[dd0 * 3 + dd1] -= tmp_v; + #pragma omp atomic atom_virial[j_idx * 9 + dd0 * 3 + dd1] -= tmp_v; } } @@ -120,6 +123,7 @@ prod_virial_r_cpu( } // compute virial of a frame + #pragma omp parallel for for (int ii = 0; ii < nloc; ++ii){ int i_idx = ii; @@ -131,7 +135,9 @@ prod_virial_r_cpu( for (int dd0 = 0; dd0 < 3; ++dd0){ for (int dd1 = 0; dd1 < 3; ++dd1){ FPTYPE tmp_v = pref * rij[i_idx * nnei * 3 + jj * 3 + dd1] * env_deriv[i_idx * ndescrpt * 3 + jj * 3 + dd0]; + #pragma omp atomic virial[dd0 * 3 + dd1] -= tmp_v; + #pragma omp atomic atom_virial[j_idx * 9 + dd0 * 3 + dd1] -= tmp_v; } } diff --git a/source/lib/src/prod_virial_grad.cc b/source/lib/src/prod_virial_grad.cc index 8e225c0793..0f8495c90e 100644 --- a/source/lib/src/prod_virial_grad.cc +++ b/source/lib/src/prod_virial_grad.cc @@ -41,6 +41,7 @@ prod_virial_grad_a_cpu( } // compute grad of one frame + #pragma omp parallel for for (int ii = 0; ii < nloc; ++ii){ int i_idx = ii; @@ -117,6 +118,7 @@ prod_virial_grad_r_cpu( } // compute grad of one frame + #pragma omp parallel for for (int ii = 0; ii < nloc; ++ii){ int i_idx = ii;