Skip to content

Commit

Permalink
Merge pull request #15 from ddiakiteaneo/fix_strided_version_rec
Browse files Browse the repository at this point in the history
Fix strided version and fix name and accuracy function
  • Loading branch information
ddiakiteaneo committed Jan 12, 2024
2 parents b0b8e08 + 0f82e62 commit d42d881
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 28 deletions.
6 changes: 3 additions & 3 deletions src/alge/cs_gradient.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -5557,11 +5557,11 @@ res_cpu = !compute_cuda;
// Pour l'instant ces lignes sont pour moi
// Elles seront à enlever
// compute_cuda = true;
compute_cpu = true;
// compute_cpu = true;
// res_cpu = false;

// A ne pas garder dans la version finale
perf = true;
// perf = false;
// accuracy = false;


Expand Down Expand Up @@ -5777,7 +5777,7 @@ res_cpu = !compute_cuda;
if(compute_cuda){
if(compute_cpu){
for (cs_lnum_t c_id = 0; c_id < n_cells; c_id++) {
for (cs_lnum_t i = 0; i < 3; i++) {
for (cs_lnum_t i = 0; i < stride; i++) {
for (int j =0; j < 3; ++j) {
auto cpu = grad_cpu[c_id][i][j];
auto cuda = grad_gpu[c_id][i][j];
Expand Down
46 changes: 23 additions & 23 deletions src/alge/cs_gradient_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
* Recompute cocg at boundaries, using saved cocgb
*----------------------------------------------------------------------------*/

#define INSTANTIATE(name, stride) template void name <stride> (const cs_mesh_t *m,\
#define INSTANTIATE_LSQ(name, stride) template void name <stride> (const cs_mesh_t *m,\
const cs_mesh_adjacencies_t *madj,\
const cs_mesh_quantities_t *fvq,\
const cs_halo_type_t halo_type,\
Expand All @@ -86,7 +86,7 @@
cs_lnum_t n_c_iter_max,\
cs_real_t c_eps)

#define INSTANTIATE1(name, stride) template void name <stride> (const cs_mesh_t *m, \
#define INSTANTIATE_RECONSTRUCT(name, stride) template void name <stride> (const cs_mesh_t *m, \
const cs_mesh_adjacencies_t *madj, \
const cs_mesh_quantities_t *fvq, \
cs_halo_type_t halo_type, \
Expand Down Expand Up @@ -1496,9 +1496,9 @@ cs_lsq_vector_gradient_strided_cuda(const cs_mesh_t *m,

}

INSTANTIATE(cs_lsq_vector_gradient_strided_cuda, 1);
INSTANTIATE(cs_lsq_vector_gradient_strided_cuda, 3);
INSTANTIATE(cs_lsq_vector_gradient_strided_cuda, 6);
INSTANTIATE_LSQ(cs_lsq_vector_gradient_strided_cuda, 1);
INSTANTIATE_LSQ(cs_lsq_vector_gradient_strided_cuda, 3);
INSTANTIATE_LSQ(cs_lsq_vector_gradient_strided_cuda, 6);



Expand Down Expand Up @@ -1687,7 +1687,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
dofij,
i_f_face_normal);

// _compute_reconstruct_v_i_face_v2<<<get_gridsize(n_i_faces, blocksize) * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_i_face_v2<stride><<<get_gridsize(n_i_faces, blocksize) * 3, blocksize, 0, stream>>>
// (n_i_faces * 3,
// i_face_cells,
// pvar_d,
Expand All @@ -1699,7 +1699,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// i_f_face_normal);

/*************************************Kernels Scatter conflict free**************************************/
// _compute_reconstruct_v_i_face_cf<<<get_gridsize(n_i_faces, blocksize), blocksize, 0, stream>>>
// _compute_reconstruct_v_i_face_cf<stride><<<get_gridsize(n_i_faces, blocksize), blocksize, 0, stream>>>
// (n_i_faces,
// i_face_cells,
// pvar_d,
Expand All @@ -1710,7 +1710,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// dofij,
// i_f_face_normal);

// _compute_reconstruct_v_i_face_v2_cf<<<get_gridsize(n_i_faces, blocksize) * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_i_face_v2_cf<stride><<<get_gridsize(n_i_faces, blocksize) * 3, blocksize, 0, stream>>>
// (n_i_faces * 3,
// i_face_cells,
// pvar_d,
Expand All @@ -1722,7 +1722,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// i_f_face_normal);

/*************************************Kernels Gather**************************************************/
// _compute_reconstruct_v_i_face_gather<<<get_gridsize(n_cells, blocksize), blocksize, 0, stream>>>
// _compute_reconstruct_v_i_face_gather<stride><<<get_gridsize(n_cells, blocksize), blocksize, 0, stream>>>
// ( n_cells,
// pvar_d,
// weight,
Expand All @@ -1737,7 +1737,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// cell_i_faces_sgn);


// _compute_reconstruct_v_i_face_gather_v2<<<get_gridsize(n_cells, blocksize) * 3 * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_i_face_gather_v2<stride><<<get_gridsize(n_cells, blocksize) * 3 * 3, blocksize, 0, stream>>>
// ( n_cells * 3 * 3,
// pvar_d,
// weight,
Expand All @@ -1754,7 +1754,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,


/*************************************Kernels Gather registers memory************************************/
// _compute_reconstruct_v_i_face_gather_v3<<<get_gridsize(n_cells, blocksize), blocksize, 0, stream>>>
// _compute_reconstruct_v_i_face_gather_v3<stride><<<get_gridsize(n_cells, blocksize), blocksize, 0, stream>>>
// ( n_cells,
// pvar_d,
// weight,
Expand All @@ -1769,7 +1769,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// cell_i_faces_sgn);


// _compute_reconstruct_v_i_face_gather_v4<<<get_gridsize(n_cells, blocksize) * 3 * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_i_face_gather_v4<stride><<<get_gridsize(n_cells, blocksize) * 3 * 3, blocksize, 0, stream>>>
// ( n_cells * 3 * 3,
// pvar_d,
// weight,
Expand Down Expand Up @@ -1843,7 +1843,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
b_face_cells);


// _compute_reconstruct_v_b_face_v2<<<get_gridsize(n_b_faces, blocksize) * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_v2<stride><<<get_gridsize(n_b_faces, blocksize) * 3, blocksize, 0, stream>>>
// ( n_b_faces * 3,
// coefb_d,
// coefa_d,
Expand All @@ -1856,7 +1856,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// b_face_cells);

/*************************************Kernels Scatter conflict free************************************/
// _compute_reconstruct_v_b_face_cf<<<get_gridsize(n_b_faces, blocksize), blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_cf<stride><<<get_gridsize(n_b_faces, blocksize), blocksize, 0, stream>>>
// ( n_b_faces,
// coefb_d,
// coefa_d,
Expand All @@ -1868,7 +1868,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// b_f_face_normal,
// b_face_cells);

// _compute_reconstruct_v_b_face_v2_cf<<<get_gridsize(n_b_faces, blocksize) * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_v2_cf<stride><<<get_gridsize(n_b_faces, blocksize) * 3, blocksize, 0, stream>>>
// ( n_b_faces * 3,
// coefb_d,
// coefa_d,
Expand All @@ -1881,7 +1881,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// b_face_cells);

/*************************************Kernels Gather**************************************************/
// _compute_reconstruct_v_b_face_gather<<<get_gridsize(n_b_cells, blocksize), blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_gather<stride><<<get_gridsize(n_b_cells, blocksize), blocksize, 0, stream>>>
// ( n_b_cells,
// coefb_d,
// coefa_d,
Expand All @@ -1896,7 +1896,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// cell_b_faces_idx);


// _compute_reconstruct_v_b_face_gather_v2<<<get_gridsize(n_b_cells, blocksize) * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_gather_v2<stride><<<get_gridsize(n_b_cells, blocksize) * 3, blocksize, 0, stream>>>
// ( n_b_cells * 3,
// coefb_d,
// coefa_d,
Expand All @@ -1911,7 +1911,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// cell_b_faces_idx);

/*************************************Kernels Gather registers memory***************************************/
// _compute_reconstruct_v_b_face_gather_v3<<<get_gridsize(n_b_cells, blocksize), blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_gather_v3<stride><<<get_gridsize(n_b_cells, blocksize), blocksize, 0, stream>>>
// ( n_b_cells,
// coefb_d,
// coefa_d,
Expand All @@ -1926,7 +1926,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// cell_b_faces_idx);


// _compute_reconstruct_v_b_face_gather_v4<<<get_gridsize(n_b_cells, blocksize) * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_gather_v4<stride><<<get_gridsize(n_b_cells, blocksize) * 3, blocksize, 0, stream>>>
// ( n_b_cells * 3,
// coefb_d,
// coefa_d,
Expand Down Expand Up @@ -1960,7 +1960,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,

CS_CUDA_CHECK(cudaEventRecord(b_faces_2, stream));

// _compute_reconstruct_correction<<<get_gridsize(n_cells, blocksize), blocksize, 0, stream>>>
// _compute_reconstruct_correction<stride><<<get_gridsize(n_cells, blocksize), blocksize, 0, stream>>>
// ( n_cells,
// has_dc,
// c_disable_flag,
Expand Down Expand Up @@ -2182,6 +2182,6 @@ _gradient_vector_cuda(const cs_mesh_t *mesh,
CS_CUDA_CHECK(cudaFree(_bc_coeff_b_d));
}

INSTANTIATE1(cs_reconstruct_vector_gradient_cuda, 1);
INSTANTIATE1(cs_reconstruct_vector_gradient_cuda, 3);
INSTANTIATE1(cs_reconstruct_vector_gradient_cuda, 6);
INSTANTIATE_RECONSTRUCT(cs_reconstruct_vector_gradient_cuda, 1);
INSTANTIATE_RECONSTRUCT(cs_reconstruct_vector_gradient_cuda, 3);
INSTANTIATE_RECONSTRUCT(cs_reconstruct_vector_gradient_cuda, 6);
2 changes: 1 addition & 1 deletion src/alge/cs_reconstruct_vector_gradient_gather_v3.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ _compute_reconstruct_v_b_face_gather_v3(cs_lnum_t n_b_cells,
}
for(cs_lnum_t i = 0; i < stride; i++){
for(cs_lnum_t j = 0; j < 3; j++){
grad[c_id1][i][j] = _grad[i][j];
grad[c_id][i][j] = _grad[i][j];
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/alge/cs_reconstruct_vector_gradient_gather_v5.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ _compute_reconstruct_v_b_face_gather_v5(cs_lnum_t n_b_cells,

for(cs_lnum_t i = 0; i < stride; i++){
for(cs_lnum_t j = 0; j < 3; j++){
grad[c_id1][i][j] = _grad[lindex][i][j];
grad[c_id][i][j] = _grad[lindex][i][j];
}
}
}

0 comments on commit d42d881

Please sign in to comment.