From f475d6f5e1a5c5f64c90b5ca137ac8741c95773c Mon Sep 17 00:00:00 2001 From: "hanno@schwalm-bremen.de" Date: Fri, 15 Mar 2024 06:30:33 +0100 Subject: [PATCH] Refactor OpenCL device locking/unlocking dt_dev_pixelpipe_process() is the central function to process a full pixelpipe run. It tries to claim a CL device due to the chosen strategy via dt_opencl_lock_device(). This PR modifies the callers by adding a parameter 'int devid', if that is a valid OpenCL device id, the called functions - in the end it will be dt_dev_pixelpipe_process() - assume they should **not** lock/unlock the cl device as the caller is doing that instead. The only place where we use the new feature right now is in the overlay/composite module. There we have to process the full pipe of the overlayed image. As we have claimed a CL device for the pipe already we can't reclaim one but use the chosen device and do the dt_dev_image() on that device. --- src/control/jobs/develop_jobs.c | 6 +++--- src/develop/develop.c | 10 ++++++---- src/develop/develop.h | 7 +++++-- src/develop/pixelpipe_hb.c | 29 ++++++++++++++++++++--------- src/develop/pixelpipe_hb.h | 3 ++- src/imageio/imageio.c | 2 +- src/iop/overlay.c | 2 +- src/libs/duplicate.c | 2 +- src/libs/snapshots.c | 2 +- src/views/slideshow.c | 3 ++- 10 files changed, 42 insertions(+), 24 deletions(-) diff --git a/src/control/jobs/develop_jobs.c b/src/control/jobs/develop_jobs.c index ccfe6da5e726..db65c4fdc064 100644 --- a/src/control/jobs/develop_jobs.c +++ b/src/control/jobs/develop_jobs.c @@ -23,7 +23,7 @@ static int32_t dt_dev_process_preview_job_run(dt_job_t *job) { dt_develop_t *dev = dt_control_job_get_params(job); dt_dev_process_image_job(dev, NULL, dev->preview_pipe, - DT_SIGNAL_DEVELOP_PREVIEW_PIPE_FINISHED); + DT_SIGNAL_DEVELOP_PREVIEW_PIPE_FINISHED, DT_DEVICE_NONE); return 0; } @@ -31,7 +31,7 @@ static int32_t dt_dev_process_preview2_job_run(dt_job_t *job) { dt_develop_t *dev = dt_control_job_get_params(job); dt_dev_process_image_job(dev, &dev->preview2, dev->preview2.pipe, - DT_SIGNAL_DEVELOP_PREVIEW2_PIPE_FINISHED); + DT_SIGNAL_DEVELOP_PREVIEW2_PIPE_FINISHED, DT_DEVICE_NONE); return 0; } @@ -39,7 +39,7 @@ static int32_t dt_dev_process_image_job_run(dt_job_t *job) { dt_develop_t *dev = dt_control_job_get_params(job); dt_dev_process_image_job(dev, &dev->full, dev->full.pipe, - DT_SIGNAL_DEVELOP_UI_PIPE_FINISHED); + DT_SIGNAL_DEVELOP_UI_PIPE_FINISHED, DT_DEVICE_NONE); return 0; } diff --git a/src/develop/develop.c b/src/develop/develop.c index c147623fa44c..a923130741f3 100644 --- a/src/develop/develop.c +++ b/src/develop/develop.c @@ -274,7 +274,8 @@ void dt_dev_invalidate_preview(dt_develop_t *dev) void dt_dev_process_image_job(dt_develop_t *dev, dt_dev_viewport_t *port, dt_dev_pixelpipe_t *pipe, - dt_signal_t signal) + dt_signal_t signal, + const int devid) { if(dev->full.pipe->loading && pipe != dev->full.pipe) { @@ -431,7 +432,7 @@ void dt_dev_process_image_job(dt_develop_t *dev, dt_dev_pixelpipe_module_enabled(port->pipe, mod, FALSE); } - if(dt_dev_pixelpipe_process(pipe, dev, x, y, wd, ht, scale)) + if(dt_dev_pixelpipe_process(pipe, dev, x, y, wd, ht, scale, devid)) { // interrupted because image changed? if(dev->image_force_reload || pipe->loading || pipe->input_changed) @@ -3462,7 +3463,8 @@ void dt_dev_image(const dt_imgid_t imgid, float *zoom_x, float *zoom_y, const int snapshot_id, - GList *module_filter_out) + GList *module_filter_out, + const int devid) { dt_develop_t dev; dt_dev_init(&dev, TRUE); @@ -3495,7 +3497,7 @@ void dt_dev_image(const dt_imgid_t imgid, dev.module_filter_out = module_filter_out; - dt_dev_process_image_job(&dev, &dev.full, pipe, -1); + dt_dev_process_image_job(&dev, &dev.full, pipe, -1, devid); // record resulting image and dimensions diff --git a/src/develop/develop.h b/src/develop/develop.h index 3bb952bb5b86..25077c8ee322 100644 --- a/src/develop/develop.h +++ b/src/develop/develop.h @@ -360,7 +360,8 @@ float dt_dev_get_preview_downsampling(); void dt_dev_process_image_job(dt_develop_t *dev, dt_dev_viewport_t *port, struct dt_dev_pixelpipe_t *pipe, - dt_signal_t signal); + dt_signal_t signal, + const int devid); // launch jobs above void dt_dev_process_image(dt_develop_t *dev); void dt_dev_process_preview(dt_develop_t *dev); @@ -631,6 +632,7 @@ void dt_dev_undo_end_record(dt_develop_t *dev); * develop an image and returns the buf and processed width / height. * this is done as in the context of the darkroom, meaning that the * final processed sizes will align perfectly on the darkroom view. + * if called with a valid CL devid that device is used without locking/unlocking as the caller is doing that */ void dt_dev_image(const dt_imgid_t imgid, const size_t width, @@ -643,7 +645,8 @@ void dt_dev_image(const dt_imgid_t imgid, float *zoom_x, float *zoom_y, const int32_t snapshot_id, - GList *module_filter_out); + GList *module_filter_out, + const int devid); gboolean dt_dev_equal_chroma(const float *f, const double *d); diff --git a/src/develop/pixelpipe_hb.c b/src/develop/pixelpipe_hb.c index 65b28cd3f3e7..8d0218c30c78 100644 --- a/src/develop/pixelpipe_hb.c +++ b/src/develop/pixelpipe_hb.c @@ -2520,7 +2520,7 @@ gboolean dt_dev_pixelpipe_process_no_gamma( } if(gamma) gamma->enabled = FALSE; - const gboolean ret = dt_dev_pixelpipe_process(pipe, dev, x, y, width, height, scale); + const gboolean ret = dt_dev_pixelpipe_process(pipe, dev, x, y, width, height, scale, DT_DEVICE_NONE); if(gamma) gamma->enabled = TRUE; return ret; } @@ -2629,16 +2629,20 @@ gboolean dt_dev_pixelpipe_process( const int y, const int width, const int height, - const float scale) + const float scale, + const int devid) { pipe->processing = TRUE; pipe->nocache = FALSE; pipe->runs++; pipe->opencl_enabled = dt_opencl_running(); - pipe->devid = (pipe->opencl_enabled) ? dt_opencl_lock_device(pipe->type) - : DT_DEVICE_CPU; // try to get/lock opencl resource - dt_dev_pixelpipe_cache_checkmem(pipe); + // if devid is a valid CL device we don't lock it as the caller has done so already + const gboolean claimed = devid > DT_DEVICE_CPU; + pipe->devid = pipe->opencl_enabled ? (claimed ? devid : dt_opencl_lock_device(pipe->type)) : DT_DEVICE_CPU; + + if(!claimed) // don't free cachelines as the caller is using them + dt_dev_pixelpipe_cache_checkmem(pipe); dt_print(DT_DEBUG_MEMORY, "[memory] before pixelpipe process\n"); dt_print_mem_usage(); @@ -2695,7 +2699,7 @@ gboolean dt_dev_pixelpipe_process( &roi, modules, pieces, pos); // get status summary of opencl queue by checking the eventlist - const gboolean oclerr = (pipe->devid >= 0) + const gboolean oclerr = (pipe->devid > DT_DEVICE_CPU) ? (dt_opencl_events_flush(pipe->devid, TRUE) != 0) : FALSE; @@ -2708,7 +2712,10 @@ gboolean dt_dev_pixelpipe_process( { // Well, there were errors -> we might need to free an invalid opencl memory object dt_opencl_release_mem_object(cl_mem_out); - dt_opencl_unlock_device(pipe->devid); // release opencl resource + + if(!claimed) // only unlock if locked above + dt_opencl_unlock_device(pipe->devid); // release opencl resource + dt_pthread_mutex_lock(&pipe->busy_mutex); pipe->opencl_enabled = FALSE; // disable opencl for this pipe pipe->opencl_error = FALSE; // reset error status @@ -2750,11 +2757,14 @@ gboolean dt_dev_pixelpipe_process( g_list_free_full(pipe->forms, (void (*)(void *))dt_masks_free_form); pipe->forms = NULL; } + if(pipe->devid > DT_DEVICE_CPU) { - dt_opencl_unlock_device(pipe->devid); + if(!claimed) // only unlock if locked above + dt_opencl_unlock_device(pipe->devid); pipe->devid = DT_DEVICE_CPU; } + // ... and in case of other errors ... if(err) { @@ -2791,7 +2801,8 @@ gboolean dt_dev_pixelpipe_process( pipe->backbuf_height = height; dt_pthread_mutex_unlock(&pipe->backbuf_mutex); - dt_dev_pixelpipe_cache_report(pipe); + if(!claimed) + dt_dev_pixelpipe_cache_report(pipe); dt_print_pipe(DT_DEBUG_PIPE, "pipe finished", pipe, NULL, old_devid, &roi, &roi, "\n\n"); diff --git a/src/develop/pixelpipe_hb.h b/src/develop/pixelpipe_hb.h index cdfd5807f455..ba240b3335a7 100644 --- a/src/develop/pixelpipe_hb.h +++ b/src/develop/pixelpipe_hb.h @@ -277,7 +277,8 @@ gboolean dt_dev_pixelpipe_process(dt_dev_pixelpipe_t *pipe, const int y, const int width, const int height, - const float scale); + const float scale, + const int devid); // convenience method that does not gamma-compress the image. gboolean dt_dev_pixelpipe_process_no_gamma(dt_dev_pixelpipe_t *pipe, struct dt_develop_t *dev, diff --git a/src/imageio/imageio.c b/src/imageio/imageio.c index 8e06f0901ff4..d538091b1965 100644 --- a/src/imageio/imageio.c +++ b/src/imageio/imageio.c @@ -1062,7 +1062,7 @@ int dt_imageio_export_with_flags(const dt_imgid_t imgid, // we can use openmp further down): if(bpp == 8) dt_dev_pixelpipe_process(&pipe, &dev, 0, 0, - processed_width, processed_height, scale); + processed_width, processed_height, scale, DT_DEVICE_NONE); else dt_dev_pixelpipe_process_no_gamma(&pipe, &dev, 0, 0, processed_width, processed_height, scale); diff --git a/src/iop/overlay.c b/src/iop/overlay.c index 5a5dea2ebf82..7b14d6755b1a 100644 --- a/src/iop/overlay.c +++ b/src/iop/overlay.c @@ -306,7 +306,7 @@ static void _setup_overlay(dt_iop_module_t *self, -1, &buf, NULL, &bw, &bh, NULL, NULL, - -1, disabled_modules); + -1, disabled_modules, piece->pipe->devid); uint8_t *old_buf = *pbuf; diff --git a/src/libs/duplicate.c b/src/libs/duplicate.c index dad7a621796c..3df3b649b8ca 100644 --- a/src/libs/duplicate.c +++ b/src/libs/duplicate.c @@ -223,7 +223,7 @@ void gui_post_expose(dt_lib_module_t *self, dt_dev_image(d->imgid, width, height, -1, &d->buf, &d->scale, &d->buf_width, &d->buf_height, - &d->zoom_x, &d->zoom_y, -1, NULL); + &d->zoom_x, &d->zoom_y, -1, NULL, DT_DEVICE_NONE); d->preview_id = d->imgid; } diff --git a/src/libs/snapshots.c b/src/libs/snapshots.c index 0837e6214a3d..3f2cc9533152 100644 --- a/src/libs/snapshots.c +++ b/src/libs/snapshots.c @@ -199,7 +199,7 @@ void gui_post_expose(dt_lib_module_t *self, &snap->buf, &snap->scale, &snap->width, &snap->height, &snap->zoom_x, &snap->zoom_y, - snap->id, NULL); + snap->id, NULL, DT_DEVICE_NONE); d->snap_requested = FALSE; d->expose_again_timeout_id = 0; } diff --git a/src/views/slideshow.c b/src/views/slideshow.c index 728ff49b3cfe..f2025005f8d0 100644 --- a/src/views/slideshow.c +++ b/src/views/slideshow.c @@ -215,7 +215,8 @@ static int _process_image(dt_slideshow_t *d, NULL, NULL, -1, - NULL); + NULL, + DT_DEVICE_NONE); dt_pthread_mutex_lock(&d->lock);