Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix strided version and fix name and accuracy function #15

Merged
merged 1 commit into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/alge/cs_gradient.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -5557,11 +5557,11 @@ res_cpu = !compute_cuda;
// Pour l'instant ces lignes sont pour moi
// Elles seront à enlever
// compute_cuda = true;
compute_cpu = true;
// compute_cpu = true;
// res_cpu = false;

// A ne pas garder dans la version finale
perf = true;
// perf = false;
// accuracy = false;


Expand Down Expand Up @@ -5777,7 +5777,7 @@ res_cpu = !compute_cuda;
if(compute_cuda){
if(compute_cpu){
for (cs_lnum_t c_id = 0; c_id < n_cells; c_id++) {
for (cs_lnum_t i = 0; i < 3; i++) {
for (cs_lnum_t i = 0; i < stride; i++) {
for (int j =0; j < 3; ++j) {
auto cpu = grad_cpu[c_id][i][j];
auto cuda = grad_gpu[c_id][i][j];
Expand Down
46 changes: 23 additions & 23 deletions src/alge/cs_gradient_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
* Recompute cocg at boundaries, using saved cocgb
*----------------------------------------------------------------------------*/

#define INSTANTIATE(name, stride) template void name <stride> (const cs_mesh_t *m,\
#define INSTANTIATE_LSQ(name, stride) template void name <stride> (const cs_mesh_t *m,\
const cs_mesh_adjacencies_t *madj,\
const cs_mesh_quantities_t *fvq,\
const cs_halo_type_t halo_type,\
Expand All @@ -86,7 +86,7 @@
cs_lnum_t n_c_iter_max,\
cs_real_t c_eps)

#define INSTANTIATE1(name, stride) template void name <stride> (const cs_mesh_t *m, \
#define INSTANTIATE_RECONSTRUCT(name, stride) template void name <stride> (const cs_mesh_t *m, \
const cs_mesh_adjacencies_t *madj, \
const cs_mesh_quantities_t *fvq, \
cs_halo_type_t halo_type, \
Expand Down Expand Up @@ -1496,9 +1496,9 @@ cs_lsq_vector_gradient_strided_cuda(const cs_mesh_t *m,

}

INSTANTIATE(cs_lsq_vector_gradient_strided_cuda, 1);
INSTANTIATE(cs_lsq_vector_gradient_strided_cuda, 3);
INSTANTIATE(cs_lsq_vector_gradient_strided_cuda, 6);
INSTANTIATE_LSQ(cs_lsq_vector_gradient_strided_cuda, 1);
INSTANTIATE_LSQ(cs_lsq_vector_gradient_strided_cuda, 3);
INSTANTIATE_LSQ(cs_lsq_vector_gradient_strided_cuda, 6);



Expand Down Expand Up @@ -1687,7 +1687,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
dofij,
i_f_face_normal);

// _compute_reconstruct_v_i_face_v2<<<get_gridsize(n_i_faces, blocksize) * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_i_face_v2<stride><<<get_gridsize(n_i_faces, blocksize) * 3, blocksize, 0, stream>>>
// (n_i_faces * 3,
// i_face_cells,
// pvar_d,
Expand All @@ -1699,7 +1699,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// i_f_face_normal);

/*************************************Kernels Scatter conflict free**************************************/
// _compute_reconstruct_v_i_face_cf<<<get_gridsize(n_i_faces, blocksize), blocksize, 0, stream>>>
// _compute_reconstruct_v_i_face_cf<stride><<<get_gridsize(n_i_faces, blocksize), blocksize, 0, stream>>>
// (n_i_faces,
// i_face_cells,
// pvar_d,
Expand All @@ -1710,7 +1710,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// dofij,
// i_f_face_normal);

// _compute_reconstruct_v_i_face_v2_cf<<<get_gridsize(n_i_faces, blocksize) * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_i_face_v2_cf<stride><<<get_gridsize(n_i_faces, blocksize) * 3, blocksize, 0, stream>>>
// (n_i_faces * 3,
// i_face_cells,
// pvar_d,
Expand All @@ -1722,7 +1722,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// i_f_face_normal);

/*************************************Kernels Gather**************************************************/
// _compute_reconstruct_v_i_face_gather<<<get_gridsize(n_cells, blocksize), blocksize, 0, stream>>>
// _compute_reconstruct_v_i_face_gather<stride><<<get_gridsize(n_cells, blocksize), blocksize, 0, stream>>>
// ( n_cells,
// pvar_d,
// weight,
Expand All @@ -1737,7 +1737,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// cell_i_faces_sgn);


// _compute_reconstruct_v_i_face_gather_v2<<<get_gridsize(n_cells, blocksize) * 3 * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_i_face_gather_v2<stride><<<get_gridsize(n_cells, blocksize) * 3 * 3, blocksize, 0, stream>>>
// ( n_cells * 3 * 3,
// pvar_d,
// weight,
Expand All @@ -1754,7 +1754,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,


/*************************************Kernels Gather registers memory************************************/
// _compute_reconstruct_v_i_face_gather_v3<<<get_gridsize(n_cells, blocksize), blocksize, 0, stream>>>
// _compute_reconstruct_v_i_face_gather_v3<stride><<<get_gridsize(n_cells, blocksize), blocksize, 0, stream>>>
// ( n_cells,
// pvar_d,
// weight,
Expand All @@ -1769,7 +1769,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// cell_i_faces_sgn);


// _compute_reconstruct_v_i_face_gather_v4<<<get_gridsize(n_cells, blocksize) * 3 * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_i_face_gather_v4<stride><<<get_gridsize(n_cells, blocksize) * 3 * 3, blocksize, 0, stream>>>
// ( n_cells * 3 * 3,
// pvar_d,
// weight,
Expand Down Expand Up @@ -1843,7 +1843,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
b_face_cells);


// _compute_reconstruct_v_b_face_v2<<<get_gridsize(n_b_faces, blocksize) * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_v2<stride><<<get_gridsize(n_b_faces, blocksize) * 3, blocksize, 0, stream>>>
// ( n_b_faces * 3,
// coefb_d,
// coefa_d,
Expand All @@ -1856,7 +1856,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// b_face_cells);

/*************************************Kernels Scatter conflict free************************************/
// _compute_reconstruct_v_b_face_cf<<<get_gridsize(n_b_faces, blocksize), blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_cf<stride><<<get_gridsize(n_b_faces, blocksize), blocksize, 0, stream>>>
// ( n_b_faces,
// coefb_d,
// coefa_d,
Expand All @@ -1868,7 +1868,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// b_f_face_normal,
// b_face_cells);

// _compute_reconstruct_v_b_face_v2_cf<<<get_gridsize(n_b_faces, blocksize) * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_v2_cf<stride><<<get_gridsize(n_b_faces, blocksize) * 3, blocksize, 0, stream>>>
// ( n_b_faces * 3,
// coefb_d,
// coefa_d,
Expand All @@ -1881,7 +1881,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// b_face_cells);

/*************************************Kernels Gather**************************************************/
// _compute_reconstruct_v_b_face_gather<<<get_gridsize(n_b_cells, blocksize), blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_gather<stride><<<get_gridsize(n_b_cells, blocksize), blocksize, 0, stream>>>
// ( n_b_cells,
// coefb_d,
// coefa_d,
Expand All @@ -1896,7 +1896,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// cell_b_faces_idx);


// _compute_reconstruct_v_b_face_gather_v2<<<get_gridsize(n_b_cells, blocksize) * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_gather_v2<stride><<<get_gridsize(n_b_cells, blocksize) * 3, blocksize, 0, stream>>>
// ( n_b_cells * 3,
// coefb_d,
// coefa_d,
Expand All @@ -1911,7 +1911,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// cell_b_faces_idx);

/*************************************Kernels Gather registers memory***************************************/
// _compute_reconstruct_v_b_face_gather_v3<<<get_gridsize(n_b_cells, blocksize), blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_gather_v3<stride><<<get_gridsize(n_b_cells, blocksize), blocksize, 0, stream>>>
// ( n_b_cells,
// coefb_d,
// coefa_d,
Expand All @@ -1926,7 +1926,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,
// cell_b_faces_idx);


// _compute_reconstruct_v_b_face_gather_v4<<<get_gridsize(n_b_cells, blocksize) * 3, blocksize, 0, stream>>>
// _compute_reconstruct_v_b_face_gather_v4<stride><<<get_gridsize(n_b_cells, blocksize) * 3, blocksize, 0, stream>>>
// ( n_b_cells * 3,
// coefb_d,
// coefa_d,
Expand Down Expand Up @@ -1960,7 +1960,7 @@ cs_reconstruct_vector_gradient_cuda(const cs_mesh_t *m,

CS_CUDA_CHECK(cudaEventRecord(b_faces_2, stream));

// _compute_reconstruct_correction<<<get_gridsize(n_cells, blocksize), blocksize, 0, stream>>>
// _compute_reconstruct_correction<stride><<<get_gridsize(n_cells, blocksize), blocksize, 0, stream>>>
// ( n_cells,
// has_dc,
// c_disable_flag,
Expand Down Expand Up @@ -2182,6 +2182,6 @@ _gradient_vector_cuda(const cs_mesh_t *mesh,
CS_CUDA_CHECK(cudaFree(_bc_coeff_b_d));
}

INSTANTIATE1(cs_reconstruct_vector_gradient_cuda, 1);
INSTANTIATE1(cs_reconstruct_vector_gradient_cuda, 3);
INSTANTIATE1(cs_reconstruct_vector_gradient_cuda, 6);
INSTANTIATE_RECONSTRUCT(cs_reconstruct_vector_gradient_cuda, 1);
INSTANTIATE_RECONSTRUCT(cs_reconstruct_vector_gradient_cuda, 3);
INSTANTIATE_RECONSTRUCT(cs_reconstruct_vector_gradient_cuda, 6);
2 changes: 1 addition & 1 deletion src/alge/cs_reconstruct_vector_gradient_gather_v3.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ _compute_reconstruct_v_b_face_gather_v3(cs_lnum_t n_b_cells,
}
for(cs_lnum_t i = 0; i < stride; i++){
for(cs_lnum_t j = 0; j < 3; j++){
grad[c_id1][i][j] = _grad[i][j];
grad[c_id][i][j] = _grad[i][j];
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/alge/cs_reconstruct_vector_gradient_gather_v5.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ _compute_reconstruct_v_b_face_gather_v5(cs_lnum_t n_b_cells,

for(cs_lnum_t i = 0; i < stride; i++){
for(cs_lnum_t j = 0; j < 3; j++){
grad[c_id1][i][j] = _grad[lindex][i][j];
grad[c_id][i][j] = _grad[lindex][i][j];
}
}
}