Skip to content

Commit

Permalink
Tracking down darktable-org#13120
Browse files Browse the repository at this point in the history
Can confirm the issue @kofa73 reported in darktable-org#13120

Could track down the problem to bee deep in gaussian blur, to get into this i plainly added
debugging messages all done via -d opencl.
  • Loading branch information
jenshannoschwalm committed Dec 11, 2022
1 parent 839b7f0 commit a1ba7ef
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 41 deletions.
32 changes: 22 additions & 10 deletions src/common/gaussian.c
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,7 @@ dt_gaussian_cl_t *dt_gaussian_init_cl(const int devid,

cl_int dt_gaussian_blur_cl(dt_gaussian_cl_t *g, cl_mem dev_in, cl_mem dev_out)
{
cl_int err = -999;
cl_int err = DT_OPENCL_DEFAULT_ERROR;
const int devid = g->devid;

const int width = g->width;
Expand Down Expand Up @@ -670,17 +670,23 @@ cl_int dt_gaussian_blur_cl(dt_gaussian_cl_t *g, cl_mem dev_in, cl_mem dev_out)
CLARG(width), CLARG(height), CLARG(a0), CLARG(a1), CLARG(a2), CLARG(a3), CLARG(b1), CLARG(b2),
CLARG(coefp), CLARG(coefn), CLARG(Labmax), CLARG(Labmin));
err = dt_opencl_enqueue_kernel_2d(devid, kernel_gaussian_column, sizes);
if(err != CL_SUCCESS) return err;

if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[dt_gaussian_blur_cl] first blur kernel_gaussian_column: %s\n", cl_errstr(err));
return err;
}
// intermediate step: transpose dev_temp2 -> dev_temp1
sizes[0] = bwidth;
sizes[1] = bheight;
sizes[2] = 1;
dt_opencl_set_kernel_args(devid, kernel_gaussian_transpose, 0, CLARG(dev_temp2), CLARG(dev_temp1),
CLARG(width), CLARG(height), CLARG(blocksize), CLLOCAL(bpp * blocksize * (blocksize + 1)));
width, height, CLARG(blocksize), CLLOCAL(bpp * blocksize * (blocksize + 1)));
err = dt_opencl_enqueue_kernel_2d_with_local(devid, kernel_gaussian_transpose, sizes, local);
if(err != CL_SUCCESS) return err;

if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[dt_gaussian_blur_cl] first kernel_gaussian_transpose: %s\n", cl_errstr(err));
return err;
}

// second blur step: column by column of transposed image with dev_temp1 -> dev_temp2 (!! height <-> width
// !!)
Expand All @@ -691,8 +697,11 @@ cl_int dt_gaussian_blur_cl(dt_gaussian_cl_t *g, cl_mem dev_in, cl_mem dev_out)
CLARG(height), CLARG(width), CLARG(a0), CLARG(a1), CLARG(a2), CLARG(a3), CLARG(b1), CLARG(b2),
CLARG(coefp), CLARG(coefn), CLARG(Labmax), CLARG(Labmin));
err = dt_opencl_enqueue_kernel_2d(devid, kernel_gaussian_column, sizes);
if(err != CL_SUCCESS) return err;

if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[dt_gaussian_blur_cl] second step kernel_gaussian_column: %s\n", cl_errstr(err));
return err;
}

// transpose back dev_temp2 -> dev_temp1
sizes[0] = bheight;
Expand All @@ -701,8 +710,11 @@ cl_int dt_gaussian_blur_cl(dt_gaussian_cl_t *g, cl_mem dev_in, cl_mem dev_out)
dt_opencl_set_kernel_args(devid, kernel_gaussian_transpose, 0, CLARG(dev_temp2), CLARG(dev_temp1),
CLARG(height), CLARG(width), CLARG(blocksize), CLLOCAL(bpp * blocksize * (blocksize + 1)));
err = dt_opencl_enqueue_kernel_2d_with_local(devid, kernel_gaussian_transpose, sizes, local);
if(err != CL_SUCCESS) return err;

if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[dt_gaussian_blur_cl] second kernel_gaussian_transpose: %s\n", cl_errstr(err));
return err;
}
// finally produce output in dev_out
err = dt_opencl_enqueue_copy_buffer_to_image(devid, dev_temp1, dev_out, 0, origin, region);
if(err != CL_SUCCESS) return err;
Expand Down
4 changes: 2 additions & 2 deletions src/common/opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -2335,7 +2335,7 @@ int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, cons
{
dt_opencl_t *cl = darktable.opencl;
if(!cl->inited || dev < 0) return -1;
if(kernel < 0 || kernel >= DT_OPENCL_MAX_KERNELS) return -1;
if(kernel < 0 || kernel >= DT_OPENCL_MAX_KERNELS) return CL_INVALID_KERNEL;

char buf[256];
buf[0] = '\0';
Expand All @@ -2346,7 +2346,7 @@ int dt_opencl_enqueue_kernel_2d_with_local(const int dev, const int kernel, cons
2, NULL, sizes, local, 0, NULL, eventp);

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL, "[dt_opencl_enqueue_kernel_2d_with_local] kernel %i on device %d: %s\n", kernel, dev, cl_errstr(err));
dt_print(DT_DEBUG_OPENCL, "[dt_opencl_enqueue_kernel_2d%s] kernel %i on device %d: %s\n", local ? "_with_local" : "", kernel, dev, cl_errstr(err));
_check_clmem_err(dev, err);
return err;
}
Expand Down
104 changes: 75 additions & 29 deletions src/develop/blend.c
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,7 @@ static void _refine_with_detail_mask_cl(struct dt_iop_module_t *self, struct dt_
cl_mem tmp = NULL;
cl_mem blur = NULL;
cl_mem out = NULL;
cl_int err = DT_OPENCL_DEFAULT_ERROR;

dt_dev_pixelpipe_t *p = piece->pipe;
if(p->rawdetail_mask_data == NULL) return;
Expand All @@ -700,23 +701,33 @@ static void _refine_with_detail_mask_cl(struct dt_iop_module_t *self, struct dt_
blur = dt_opencl_alloc_device_buffer(devid, sizeof(float) * iwidth * iheight);
if(blur == NULL) goto error;

err = dt_opencl_write_host_to_device(devid, p->rawdetail_mask_data, tmp, iwidth, iheight, sizeof(float));
if(err != CL_SUCCESS)
{
const int err = dt_opencl_write_host_to_device(devid, p->rawdetail_mask_data, tmp, iwidth, iheight, sizeof(float));
if(err != CL_SUCCESS) goto error;
dt_print(DT_DEBUG_OPENCL, "[refine_with_detail_mask_cl] write rawdetail_mask_data: %s\n", cl_errstr(err));
goto error;
}

{
const int kernel = darktable.opencl->blendop->kernel_read_mask;
const int err = dt_opencl_enqueue_kernel_2d_args(devid, kernel, iwidth, iheight,
err = dt_opencl_enqueue_kernel_2d_args(devid, kernel, iwidth, iheight,
CLARG(out), CLARG(tmp), CLARG(iwidth), CLARG(iheight));
if(err != CL_SUCCESS) goto error;
if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[refine_with_detail_mask_cl] kernel_read_mask: %s\n", cl_errstr(err));
goto error;
}
}

{
const int kernel = darktable.opencl->blendop->kernel_calc_blend;
const int err = dt_opencl_enqueue_kernel_2d_args(devid, kernel, iwidth, iheight,
err = dt_opencl_enqueue_kernel_2d_args(devid, kernel, iwidth, iheight,
CLARG(out), CLARG(blur), CLARG(iwidth), CLARG(iheight), CLARG(threshold), CLARG(detail));
if(err != CL_SUCCESS) goto error;
if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[refine_with_detail_mask_cl] kernel_calc_blend: %s\n", cl_errstr(err));
goto error;
}
}

{
Expand All @@ -727,10 +738,14 @@ static void _refine_with_detail_mask_cl(struct dt_iop_module_t *self, struct dt_
if(dev_blurmat != NULL)
{
const int clkernel = darktable.opencl->blendop->kernel_mask_blur;
const int err = dt_opencl_enqueue_kernel_2d_args(devid, clkernel, iwidth, iheight,
err = dt_opencl_enqueue_kernel_2d_args(devid, clkernel, iwidth, iheight,
CLARG(blur), CLARG(out), CLARG(iwidth), CLARG(iheight), CLARG(dev_blurmat));
dt_opencl_release_mem_object(dev_blurmat);
if(err != CL_SUCCESS) goto error;
if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[refine_with_detail_mask_cl] kernel_mask_blur: %s\n", cl_errstr(err));
goto error;
}
}
else
{
Expand All @@ -741,15 +756,17 @@ static void _refine_with_detail_mask_cl(struct dt_iop_module_t *self, struct dt_

{
const int kernel = darktable.opencl->blendop->kernel_write_mask;
const int err = dt_opencl_enqueue_kernel_2d_args(devid, kernel, iwidth, iheight,
err = dt_opencl_enqueue_kernel_2d_args(devid, kernel, iwidth, iheight,
CLARG(out), CLARG(tmp), CLARG(iwidth), CLARG(iheight));
if(err != CL_SUCCESS) goto error;
if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[refine_with_detail_mask_cl] kernel_write_mask: %s\n", cl_errstr(err));
goto error;
}
}

{
const int err = dt_opencl_read_host_from_device(devid, lum, tmp, iwidth, iheight, sizeof(float));
if(err != CL_SUCCESS) goto error;
}
err = dt_opencl_read_host_from_device(devid, lum, tmp, iwidth, iheight, sizeof(float));
if(err != CL_SUCCESS) goto error;

dt_opencl_release_mem_object(tmp);
dt_opencl_release_mem_object(blur);
Expand Down Expand Up @@ -899,7 +916,7 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe
const int offs[2] = { xoffs, yoffs };
const size_t sizes[] = { ROUNDUPDWD(owidth, devid), ROUNDUPDHT(oheight, devid), 1 };

cl_int err = -999;
cl_int err = DT_OPENCL_DEFAULT_ERROR;
cl_mem dev_blendif_params = NULL;
cl_mem dev_boost_factors = NULL;
cl_mem dev_mask_1 = NULL;
Expand Down Expand Up @@ -936,8 +953,11 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe

err = dt_ioppr_build_iccprofile_params_cl(use_profile ? &profile : NULL, devid, &profile_info_cl,
&profile_lut_cl, &dev_profile_info, &dev_profile_lut);
if(err != CL_SUCCESS) goto error;

if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[opencl_blendop] profile_info_cl: %s\n", cl_errstr(err));
goto error;
}
if(mask_mode == DEVELOP_MASK_ENABLED || suppress_mask)
{
// blend uniformly (no drawn or parametric mask)
Expand All @@ -946,7 +966,11 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe
dt_opencl_set_kernel_args(devid, kernel_set_mask, 0, CLARG(dev_mask_1), CLARG(owidth), CLARG(oheight),
CLARG(opacity));
err = dt_opencl_enqueue_kernel_2d(devid, kernel_set_mask, sizes);
if(err != CL_SUCCESS) goto error;
if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[opencl_blendop] kernel_set_mask: %s\n", cl_errstr(err));
goto error;
}
}
else if(mask_mode & DEVELOP_MASK_RASTER)
{
Expand Down Expand Up @@ -979,7 +1003,11 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe
}

err = dt_opencl_write_host_to_device(devid, mask, dev_mask_1, owidth, oheight, sizeof(float));
if(err != CL_SUCCESS) goto error;
if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[opencl_blendop] write raster mask dev_mask_1: %s\n", cl_errstr(err));
goto error;
}
}
else
{
Expand Down Expand Up @@ -1017,8 +1045,11 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe
dev_mask_2 = dt_opencl_alloc_device(devid, owidth, oheight, sizeof(float));
if(dev_mask_2 == NULL) goto error;
err = dt_opencl_write_host_to_device(devid, mask, dev_mask_1, owidth, oheight, sizeof(float));
if(err != CL_SUCCESS) goto error;

if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[opencl_blendop] write drawn mask dev_mask_1: %s\n", cl_errstr(err));
goto error;
}
// The following call to clFinish() works around a bug in some OpenCL
// drivers (namely AMD).
// Without this synchronization point, reads to dev_in would often not
Expand All @@ -1037,7 +1068,7 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe
err = dt_opencl_enqueue_kernel_2d(devid, kernel_mask, sizes);
if(err != CL_SUCCESS)
{
fprintf(stderr, "[dt_develop_blend_process_cl] error %i enqueue kernel\n", err);
dt_print(DT_DEBUG_OPENCL, "[opencl_blendop] apply global opacity: %s\n", cl_errstr(err));
goto error;
}

Expand Down Expand Up @@ -1096,7 +1127,11 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe
if(!g) goto error;
err = dt_gaussian_blur_cl(g, dev_mask_1, dev_mask_2);
dt_gaussian_free_cl(g);
if(err != CL_SUCCESS) goto error;
if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[opencl_blendop] DEVELOP_MASK_POST_BLUR: %s\n", cl_errstr(err));
goto error;
}
_blend_process_cl_exchange(&dev_mask_1, &dev_mask_2);
}
else if(operation == DEVELOP_MASK_POST_TONE_CURVE)
Expand All @@ -1106,7 +1141,11 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe
dt_opencl_set_kernel_args(devid, kernel_mask_tone_curve, 0, CLARG(dev_mask_1), CLARG(dev_mask_2),
CLARG(owidth), CLARG(oheight), CLARG(e), CLARG(brightness), CLARG(opacity));
err = dt_opencl_enqueue_kernel_2d(devid, kernel_mask_tone_curve, sizes);
if(err != CL_SUCCESS) goto error;
if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[opencl_blendop] DEVELOP_MASK_POST_TONE_CURVE: %s\n", cl_errstr(err));
goto error;
}
_blend_process_cl_exchange(&dev_mask_1, &dev_mask_2);
}
}
Expand Down Expand Up @@ -1137,8 +1176,11 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe

err = dt_ioppr_build_iccprofile_params_cl(work_profile, devid, &work_profile_info_cl, &work_profile_lut_cl,
&dev_work_profile_info, &dev_work_profile_lut);
if(err != CL_SUCCESS) goto error;

if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[opencl_blendop] work_profile_info_cl: %s\n", cl_errstr(err));
goto error;
}
// let us display a specific channel
dt_opencl_set_kernel_args(devid, kernel_display_channel, 0, CLARG(dev_in), CLARG(dev_tmp), CLARG(dev_mask_1),
CLARG(dev_out), CLARG(owidth), CLARG(oheight), CLARRAY(2, offs), CLARG(request_mask_display), CLARG(dev_boost_factors),
Expand All @@ -1147,7 +1189,7 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe
err = dt_opencl_enqueue_kernel_2d(devid, kernel_display_channel, sizes);
if(err != CL_SUCCESS)
{
fprintf(stderr, "[dt_develop_blend_process_cl] error %i enqueue kernel\n", err);
dt_print(DT_DEBUG_OPENCL, "[opencl_blendop] kernel_display_channel: %s\n", cl_errstr(err));
goto error;
}
}
Expand All @@ -1159,7 +1201,11 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe
dt_opencl_set_kernel_args(devid, kernel, 0, CLARG(dev_in), CLARG(dev_tmp), CLARG(dev_mask_1), CLARG(dev_out),
CLARG(owidth), CLARG(oheight), CLARG(blend_mode), CLARG(blend_parameter), CLARRAY(2, offs), CLARG(mask_display));
err = dt_opencl_enqueue_kernel_2d(devid, kernel, sizes);
if(err != CL_SUCCESS) goto error;
if(err != CL_SUCCESS)
{
dt_print(DT_DEBUG_OPENCL, "[opencl_blendop] blend_parameter: %s\n", cl_errstr(err));
goto error;
}
}

// register if _this_ module should expose mask or display channel
Expand Down Expand Up @@ -1207,7 +1253,7 @@ int dt_develop_blend_process_cl(struct dt_iop_module_t *self, struct dt_dev_pixe
dt_ioppr_free_iccprofile_params_cl(&profile_info_cl, &profile_lut_cl, &dev_profile_info, &dev_profile_lut);
dt_ioppr_free_iccprofile_params_cl(&work_profile_info_cl, &work_profile_lut_cl, &dev_work_profile_info,
&dev_work_profile_lut);
dt_print(DT_DEBUG_OPENCL, "[opencl_blendop] couldn't enqueue kernel! %s\n", cl_errstr(err));
dt_print(DT_DEBUG_OPENCL, "[opencl_blendop] error: %s\n", cl_errstr(err));
return FALSE;
}
#endif
Expand Down

0 comments on commit a1ba7ef

Please sign in to comment.