From b852d2de02ce17c87a570bc5ee6624d7a150f776 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20PIERRE?= Date: Tue, 13 Jul 2021 14:07:45 +0200 Subject: [PATCH 1/4] =?UTF-8?q?add=20module=20:=C2=A0scene-referred=20blur?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for motion, lens and gaussian blurs in linear RGB. --- data/kernels/blurs.cl | 55 +++ data/kernels/programs.conf | 1 + src/common/iop_order.c | 3 + src/iop/CMakeLists.txt | 2 + src/iop/blurs.c | 855 +++++++++++++++++++++++++++++++++++++ 5 files changed, 916 insertions(+) create mode 100644 data/kernels/blurs.cl create mode 100644 src/iop/blurs.c diff --git a/data/kernels/blurs.cl b/data/kernels/blurs.cl new file mode 100644 index 000000000000..6af189e07f17 --- /dev/null +++ b/data/kernels/blurs.cl @@ -0,0 +1,55 @@ +/* + This file is part of darktable, + copyright (c) 2021 darktable developers. + + darktable is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + darktable is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with darktable. If not, see . +*/ + +#include "common.h" + +// use our own coordinate sampler +const sampler_t samplerA = CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_NONE | + CLK_FILTER_NEAREST; + +kernel void +convolve(read_only image2d_t in, read_only image2d_t kern, write_only image2d_t out, + const int width, const int height, const int radius) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if(x >= width || y >= height) return; + + float4 pix_in = read_imagef(in, samplerA, (int2)(x, y)); + float4 acc = 0.f; + + for(int l = -radius; l <= radius; l++) + for(int m = -radius; m <= radius; m++) + { + const int ii = clamp(y + l, 0, height - 1); + const int jj = clamp(x + m, 0, width - 1); + const float4 pix = read_imagef(in, samplerA, (int2)(jj, ii)); + + const int ik = l + radius; + const int jk = m + radius; + const float k = read_imagef(kern, samplerA, (int2)(jk, ik)).x; + + acc += k * pix; + } + + // copy alpha + acc.w = pix_in.w; + write_imagef(out, (int2)(x, y), acc); +} diff --git a/data/kernels/programs.conf b/data/kernels/programs.conf index 987cc5d83418..9c41bd5761a1 100644 --- a/data/kernels/programs.conf +++ b/data/kernels/programs.conf @@ -34,3 +34,4 @@ negadoctor.cl 30 demosaic_rcd.cl 31 channelmixer.cl 32 diffuse.cl 33 +blurs.cl 34 diff --git a/src/common/iop_order.c b/src/common/iop_order.c index ece1783ed89e..7a824af8a339 100644 --- a/src/common/iop_order.c +++ b/src/common/iop_order.c @@ -110,6 +110,7 @@ const dt_iop_order_entry_t legacy_order[] = { { {27.5f }, "channelmixerrgb", 0}, { {27.5f }, "censorize", 0}, { {27.5f }, "negadoctor", 0}, + { {27.5f }, "blurs", 0}, { {27.5f }, "basicadj", 0}, { {28.0f }, "colorreconstruct", 0}, { {29.0f }, "colorchecker", 0}, @@ -199,6 +200,7 @@ const dt_iop_order_entry_t v30_order[] = { { {28.5f }, "channelmixerrgb", 0}, { {28.5f }, "censorize", 0}, { {28.5f }, "negadoctor", 0}, // Cineon film encoding comes after scanner input color profile + { {28.5f }, "blurs", 0}, // physically-accurate blurs (motion and lens) { {29.0f }, "nlmeans", 0}, // signal processing (denoising) // -> needs a signal as scene-referred as possible (even if it works in Lab) { {30.0f }, "colorchecker", 0}, // calibration to "neutral" exchange colour space @@ -664,6 +666,7 @@ GList *dt_ioppr_get_iop_order_list(int32_t imgid, gboolean sorted) _insert_before(iop_order_list, "ashift", "cacorrectrgb"); _insert_before(iop_order_list, "graduatednd", "crop"); _insert_before(iop_order_list, "channelmixerrgb", "diffuse"); + _insert_before(iop_order_list, "nlmeans", "blurs"); } } else if(version == DT_IOP_ORDER_LEGACY) diff --git a/src/iop/CMakeLists.txt b/src/iop/CMakeLists.txt index dd0ee1fac4ba..bca765d4900f 100644 --- a/src/iop/CMakeLists.txt +++ b/src/iop/CMakeLists.txt @@ -148,6 +148,8 @@ add_iop(censorize "censorize.c") add_iop(colorbalancergb "colorbalancergb.c") add_iop(cacorrectrgb "cacorrectrgb.c") add_iop(diffuse "diffuse.c") +add_iop(blurs "blurs.c") + if(Rsvg2_FOUND) add_iop(watermark "watermark.c") diff --git a/src/iop/blurs.c b/src/iop/blurs.c new file mode 100644 index 000000000000..b11567b87c16 --- /dev/null +++ b/src/iop/blurs.c @@ -0,0 +1,855 @@ +/* + This file is part of darktable, + Copyright (C) 2021 darktable developers. + + darktable is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + darktable is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with darktable. If not, see . +*/ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +// our includes go first: +#include "bauhaus/bauhaus.h" +#include "develop/imageop.h" +#include "develop/imageop_gui.h" +#include "dtgtk/drawingarea.h" +#include "gui/gtk.h" +#include "iop/iop_api.h" + +// #include // one day, include FFT convolution +#include +#include + +DT_MODULE_INTROSPECTION(1, dt_iop_blurs_params_t) + +typedef enum dt_iop_blur_type_t +{ + DT_BLUR_LENS = 0, // $DESCRIPTION: "lens" + DT_BLUR_MOTION = 1, // $DESCRIPTION: "motion" + DT_BLUR_GAUSSIAN = 2, // $DESCRIPTION: "gaussian" +} dt_iop_blur_type_t; + + +typedef struct dt_iop_blurs_params_t +{ + dt_iop_blur_type_t type; // $DEFAULT: DT_BLUR_LENS $DESCRIPTION: "blur type" + int radius; // $MIN: 4 $MAX: 128 $DEFAULT: 8 $DESCRIPTION: "blur radius" + + // lens blur params + int blades; // $MIN: 3 $MAX: 11 $DEFAULT: 5 $DESCRIPTION: "diaphragm blades" + float concavity; // $MIN: 1. $MAX: 9. $DEFAULT: 1. $DESCRIPTION: "concavity" + float linearity; // $MIN: 0. $MAX: 1. $DEFAULT: 1. $DESCRIPTION: "linearity" + float rotation; // $MIN: -1.57 $MAX: 1.57 $DEFAULT: 0. $DESCRIPTION: "rotation" + + // motion blur params + float angle; // $MIN: -3.14 $MAX: 3.14 $DEFAULT: 0. $DESCRIPTION: "direction" + float curvature; // $MIN: -2. $MAX: 2. $DEFAULT: 0. $DESCRIPTION: "curvature" + float offset; // $MIN: -1. $MAX: 1. $DEFAULT: 0 $DESCRIPTION: "offset" + +} dt_iop_blurs_params_t; + + +typedef struct dt_iop_blurs_gui_data_t +{ + GtkWidget *type, *radius, *blades, *concavity, *linearity, *rotation, *angle, *curvature, *offset; + GtkDrawingArea *area; + unsigned char *img; + int img_cached; + float img_width; +} dt_iop_blurs_gui_data_t; + + +typedef struct dt_iop_blurs_global_data_t +{ + int kernel_blurs_convolve; +} dt_iop_blurs_global_data_t; + + +const char *name() +{ + return _("blurs"); +} + +const char *aliases() +{ + return _("blur|lens|motion"); +} + +const char *description(struct dt_iop_module_t *self) +{ + return dt_iop_set_description(self, + _("simulate physically-accurate lens and motion blurs."), + _("creative"), _("linear, RGB, scene-referred"), _("linear, RGB"), + _("linear, RGB, scene-referred")); +} + +int flags() +{ + return IOP_FLAGS_INCLUDE_IN_STYLES | IOP_FLAGS_SUPPORTS_BLENDING; +} + + +int default_group() +{ + return IOP_GROUP_EFFECTS | IOP_GROUP_EFFECT; +} + + +int default_colorspace(dt_iop_module_t *self, dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece) +{ + return iop_cs_rgb; +} + + +void commit_params(dt_iop_module_t *self, dt_iop_params_t *p1, dt_dev_pixelpipe_t *pipe, dt_dev_pixelpipe_iop_t *piece) +{ + memcpy(piece->data, p1, self->params_size); +} + +// B spline filter +#define FSIZE 5 + +inline static void blur_2D_Bspline(const float *const restrict in, float *const restrict out, + const size_t width, const size_t height) +{ +#ifdef _OPENMP +#pragma omp parallel for default(none) dt_omp_firstprivate(width, height, in, out) \ + schedule(simd: static) \ + collapse(2) +#endif + for(size_t i = 0; i < height; i++) + { + for(size_t j = 0; j < width; j++) + { + const size_t index = (i * width + j); + float acc = 0.f; + + for(size_t ii = 0; ii < FSIZE; ++ii) + for(size_t jj = 0; jj < FSIZE; ++jj) + { + const size_t row = CLAMP((int)i + (int)(ii - (FSIZE - 1) / 2), (int)0, (int)height - 1); + const size_t col = CLAMP((int)j + (int)(jj - (FSIZE - 1) / 2), (int)0, (int)width - 1); + const size_t k_index = (row * width + col); + + const float DT_ALIGNED_ARRAY filter[FSIZE] + = { 1.0f / 16.0f, 4.0f / 16.0f, 6.0f / 16.0f, 4.0f / 16.0f, 1.0f / 16.0f }; + + acc += filter[ii] * filter[jj] * in[k_index]; + } + + out[index] = acc; + } + } +} + + +static inline void init_kernel(float *const restrict buffer, const size_t width, const size_t height) +{ + // init an empty kernel with zeros +#ifdef _OPENMP +#pragma omp parallel for simd default(none) dt_omp_firstprivate(width, height, buffer) \ + schedule(simd: static) aligned(buffer:64) +#endif + for(size_t k = 0; k < height * width; k++) buffer[k] = 0.f; +} + + +static inline void create_lens_kernel(float *const restrict buffer, + const size_t width, const size_t height, + const float n, const float m, const float k, const float rotation) +{ + // n is number of diaphragm blades + // m is the concavity, aka the number of vertices on straight lines (?) + // k is the roundness vs. linearity factor + // see https://math.stackexchange.com/a/4160104/498090 + // buffer sizes need to be odd + + // Spatial coordinates rounding error + const float eps = 1.f / (float)width; + const float radius = (width - 1) / 2.f - 1; + +#ifdef _OPENMP +#pragma omp parallel for simd default(none) dt_omp_firstprivate(width, height, buffer, n, m, k, rotation, eps, radius) \ + schedule(simd: static) aligned(buffer:64) collapse(2) +#endif + for(size_t i = 0; i < height; i++) + for(size_t j = 0; j < width; j++) + { + // get normalized kernel coordinates in [-1 ; 1] + const float x = (i - 1) / radius - 1; + const float y = (j - 1) / radius - 1; + + // get current radial distance from kernel center + const float r = hypotf(x, y); + + // get the radial distance at current angle of the shape envelope + const float M = cosf((2.f * asinf(k) + M_PI_F * m) / (2.f * n)) + / cosf((2.f * asinf(k * cosf(n * (atan2f(y, x) + rotation))) + M_PI_F * m) / (2.f * n)); + + // write 1 if we are inside the envelope of the shape, else 0 + buffer[i * width + j] = (M >= r + eps); + } +} + + +static inline void create_motion_kernel(float *const restrict buffer, + const size_t width, const size_t height, + const float angle, const float curvature, const float offset) +{ + // Compute the polynomial params from user params + const float A = curvature / 2.f; + const float B = 1.f; + const float C = -A * offset * offset + B * offset; + // Note : C ensures the polynomial arc always goes through the central pixel + // so we don't shift pixels. This is meant to allow seamless connection + // with unmasked areas when using masked blur. + + // Spatial coordinates rounding error + const float eps = 1.f / (float)width; + + const float radius = (width - 1) / 2.f - 1; + const float corr_angle = -M_PI_F / 4.f - angle; + + // Matrix of rotation + const float M[2][2] = { { cosf(corr_angle), -sinf(corr_angle) }, + { sinf(corr_angle), cosf(corr_angle) } }; + +#ifdef _OPENMP +#pragma omp parallel for simd default(none) dt_omp_firstprivate(width, height, buffer, A, B, C, radius, offset, M, eps) \ + schedule(simd: static) aligned(buffer:64) +#endif + for(size_t i = 0; i < 8 * width; i++) + { + // Note : for better smoothness of the polynomial discretization, + // we oversample 8 times, meaning we evaluate the polynomial + // every eighth of pixel + + // get normalized kernel coordinates in [-1 ; 1] + const float x = (i / 8.f - 1) / radius - 1; + //const float y = (j - 1) / radius - 1; // not used here + + // build the motion path : 2nd order polynomial + const float X = x - offset; + const float y = X * X * A + X * B + C; + + // rotate the motion path around the kernel center + const float rot_x = x * M[0][0] + y * M[0][1]; + const float rot_y = x * M[1][0] + y * M[1][1]; + + // convert back to kernel absolute coordinates ± eps + const int y_f[2] = { roundf((rot_y + 1) * radius - eps), + roundf((rot_y + 1) * radius + eps) }; + const int x_f[2] = { roundf((rot_x + 1) * radius - eps), + roundf((rot_x + 1) * radius + eps) }; + + // write 1 if we are inside the envelope of the shape, else 0 + // leave 1px padding on each border of the kernel for the anti-aliasing + for(int l = 0; l < 2; l++) + for(int m = 0; m < 2; m++) + { + if(x_f[l] > 0 && x_f[l] < width - 1 && y_f[m] > 0 && y_f[m] < width - 1) + buffer[y_f[m] * width + x_f[l]] = 1.f; + } + } +} + + +static inline void create_gauss_kernel(float *const restrict buffer, + const size_t width, const size_t height) +{ + // This is not optimized. Gauss kernel is separable and can be turned into + // 2 × 1D convolutions. + const float radius = (width - 1) / 2.f - 1; + +#ifdef _OPENMP +#pragma omp parallel for simd default(none) dt_omp_firstprivate(width, height, buffer, radius) \ + schedule(simd: static) aligned(buffer:64) collapse(2) +#endif + for(size_t i = 0; i < height; i++) + for(size_t j = 0; j < width; j++) + { + // get normalized kernel coordinates in [-1 ; 1] + const float x = (i - 1) / radius - 1; + const float y = (j - 1) / radius - 1; + + // get current square radial distance from kernel center + const float r_2 = x * x + y * y; + buffer[i * width + j] = expf(-4.f * r_2); + } +} + + + +static inline void build_gui_kernel(unsigned char *const buffer, const size_t width, const size_t height, + dt_iop_blurs_params_t *p) +{ + float *const restrict kernel_1 = dt_alloc_align_float(width * height); + float *const restrict kernel_2 = dt_alloc_align_float(width * height); + + + if(p->type == DT_BLUR_LENS) + { + create_lens_kernel(kernel_1, width, height, p->blades, p->concavity, p->linearity, p->rotation); + + // anti-aliasing step + blur_2D_Bspline(kernel_1, kernel_2, width, height); + } + else if(p->type == DT_BLUR_MOTION) + { + init_kernel(kernel_1, width, height); + create_motion_kernel(kernel_1, width, height, p->angle, p->curvature, p->offset); + + // anti-aliasing step + blur_2D_Bspline(kernel_1, kernel_2, width, height); + } + else if(p->type == DT_BLUR_GAUSSIAN) + { + create_gauss_kernel(kernel_2, width, height); + } + + // Convert to Gtk/Cairo RGBA 8×4 bits +#ifdef _OPENMP +#pragma omp parallel for simd default(none) dt_omp_firstprivate(width, height, buffer, kernel_2) \ + schedule(simd: static) aligned(buffer, kernel_2:64) +#endif + for(size_t k = 0; k < height * width; k++) + { + buffer[k * 4] = buffer[k * 4 + 1] = buffer[k * 4 + 2] = buffer[k * 4 + 3] = roundf(255.f * kernel_2[k]); + } + + dt_free_align(kernel_1); + dt_free_align(kernel_2); +} + + +static inline float compute_norm(float *const buffer, const size_t width, const size_t height) +{ + float norm = 0.f; + +#ifdef _OPENMP +#pragma omp parallel for simd default(none) dt_omp_firstprivate(width, height, buffer) \ + schedule(simd: static) aligned(buffer:64) reduction(+:norm) +#endif + for(size_t i = 0; i < width * height; i++) + { + norm += buffer[i]; + } + + return norm; +} + + +static inline void normalize(float *const buffer, const size_t width, const size_t height, const float norm) +{ +#ifdef _OPENMP +#pragma omp parallel for simd default(none) dt_omp_firstprivate(width, height, buffer, norm) \ + schedule(simd: static) aligned(buffer:64) +#endif + for(size_t i = 0; i < width * height; i++) + { + buffer[i] /= norm; + } +} + + +static inline void build_pixel_kernel(float *const buffer, const size_t width, const size_t height, + dt_iop_blurs_params_t *p) +{ + float *const restrict kernel_1 = dt_alloc_align_float(width * height); + + if(p->type == DT_BLUR_LENS) + { + create_lens_kernel(kernel_1, width, height, p->blades, p->concavity, p->linearity, p->rotation + M_PI_F); + + // anti-aliasing step + blur_2D_Bspline(kernel_1, buffer, width, height); + } + else if(p->type == DT_BLUR_MOTION) + { + init_kernel(kernel_1, width, height); + create_motion_kernel(kernel_1, width, height, p->angle + M_PI_F, p->curvature, p->offset); + + // anti-aliasing step + blur_2D_Bspline(kernel_1, buffer, width, height); + } + else if(p->type == DT_BLUR_GAUSSIAN) + { + create_gauss_kernel(buffer, width, height); + } + + // normalize to respect the conservation of energy law + float norm = compute_norm(buffer, width, height); + normalize(buffer, width, height, norm); + + dt_free_align(kernel_1); +} + +#if 0 + +// This crashes on the FFT step - not sure why and no time to investigate opaque libs now +// FFT convolution should be faster for large blurs because it is o(N log2(N)) +// where N is the width of the kernel +// TODO + +static void process_fft(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, + const void *const restrict ivoid, void *const restrict ovoid, + const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) +{ + dt_iop_blurs_params_t *p = (dt_iop_blurs_params_t *)piece->data; + const float scale = piece->iscale / roi_in->scale; + + if (!dt_iop_have_required_input_format(4, self, piece->colors, ivoid, ovoid, roi_in, roi_out)) + return; + + const float *const restrict in = __builtin_assume_aligned(ivoid, 64); + //float *const restrict out = __builtin_assume_aligned(ovoid, 64); + + // FFT needs odd buffer sizes, so fix that here + const int is_width_even = (roi_in->width % 2 == 0); + const int is_height_even = (roi_in->height % 2 == 0); + const size_t padded_width = roi_in->width + 1 * is_width_even; + const size_t padded_height = roi_in->height + 1 * is_height_even; + + float *const restrict padded_in = dt_alloc_align_float(padded_width * padded_height * 4); + float *const restrict padded_out = dt_alloc_align_float(padded_width * padded_height * 4); + + // Write the image in the padded buffer +#ifdef _OPENMP +#pragma omp parallel for simd default(none) dt_omp_firstprivate(padded_width, padded_height, roi_in, in, padded_in) \ + schedule(simd: static) aligned(in, padded_in:64) +#endif + for(size_t i = 0; i < roi_in->height; i++) + for(size_t j = 0; j < roi_in->width; j++) + { + const size_t index_in = (i * roi_in->width + j) * 4; + const size_t index_out = (i * padded_width + j) * 4; + for_four_channels(c, aligned(in, padded_in : 64)) padded_in[index_out + c] = in[index_in + c]; + } + + // Write the padding if needed + if(padded_width > roi_in->width) + { +#ifdef _OPENMP +#pragma omp parallel for simd default(none) dt_omp_firstprivate(padded_width, padded_height, roi_in, in, padded_in) \ + schedule(simd: static) aligned(in, padded_in:64) +#endif + for(size_t i = 0; i < roi_in->height; i++) + { + const size_t index_in = (i * (roi_in->width - 1)) * 4; + const size_t index_out = (i * (padded_width - 1)) * 4; + for_four_channels(c, aligned(in, padded_in : 64)) padded_in[index_out + c] = in[index_in + c]; + } + } + + if(padded_height > roi_in->height) + { +#ifdef _OPENMP +#pragma omp parallel for simd default(none) dt_omp_firstprivate(padded_width, padded_height, roi_in, in, padded_in) \ + schedule(simd: static) aligned(in, padded_in:64) +#endif + for(size_t j = 0; j < roi_in->width; j++) + { + const size_t index_in = ((roi_in->height - 1) * roi_in->width + j) * 4; + const size_t index_out = ((padded_height - 1) * padded_width + j) * 4; + for_four_channels(c, aligned(in, padded_in : 64)) padded_in[index_out + c] = in[index_in + c]; + } + } + + // Init the blur kernel + const size_t radius = MAX(roundf(p->radius / scale), 1); + const size_t kernel_width = 2 * radius + 1; + + float *const restrict kernel = dt_alloc_align_float(kernel_width * kernel_width); + build_pixel_kernel(kernel, kernel_width, kernel_width, p); + + // Convert to padded kernel - copy kernel in the center + float *const restrict padded_kernel = dt_alloc_align_float(padded_width * padded_height); + const size_t offset_i = (padded_height - 1) / 2 - (kernel_width - 1) / 2; + const size_t offset_j = (padded_width - 1) / 2 - (kernel_width - 1) / 2; + const size_t i_reach = offset_i + kernel_width; + const size_t j_reach = offset_j + kernel_width; + +#ifdef _OPENMP +#pragma omp parallel for simd default(none) \ + dt_omp_firstprivate(padded_width, padded_height, padded_kernel, kernel, offset_i, offset_j, i_reach, j_reach, kernel_width) \ + schedule(simd: static) aligned(kernel, padded_kernel:64) +#endif + for(size_t i = 0; i < padded_width; i++) + for(size_t j = 0; j < padded_width; j++) + { + const size_t padded_idx = (i * padded_width) + j; + + if(i >= offset_i && i < i_reach && j >= offset_j && j < j_reach) + { + const size_t i_kern = i - offset_i; + const size_t j_kern = j - offset_j; + const size_t kern_idx = i_kern * kernel_width + j_kern; + padded_kernel[padded_idx] = kernel[kern_idx]; + } + else + padded_kernel[padded_idx] = 0.f; + } + + // Init the FFT transforms + int threads = fftw_init_threads(); + fftw_plan_with_nthreads(threads); + + // TODO: things go well until this point + + // Plan the dimensions of the FFT + // notice we use fftwf prefix to use the float 32 variant of fftw + int rank = 2; /* 2D FFT */ + int n[2] = { padded_width, padded_height }; + int howmany = 4; /* 4 channels : RGBa */ + int idist = 1; /* channels are distanced by one float in memory */ + int odist = 1; /* channels are distanced by one float in memory */ + int istride = 4; /* array is not contiguous in memory */ + int ostride = 4; /* array is not contiguous in memory */ + int *inembed = n; + int *onembed = n; + + fftwf_complex *const restrict kernel_fft = fftwf_alloc_complex(padded_height * padded_height * 4); + fftwf_complex *const restrict image_fft = fftwf_alloc_complex(padded_height * padded_height * 4); + + // FFT convert the kernel + fftwf_plan kernel_plan = fftwf_plan_many_dft_r2c(rank, n, howmany, + padded_kernel, inembed, + istride, idist, + kernel_fft, onembed, + ostride, odist, + FFTW_ESTIMATE); + fftwf_execute(kernel_plan); + + // Clean FFT + fftwf_destroy_plan(kernel_plan); + fftwf_free(kernel_fft); + fftwf_free(image_fft); + fftw_cleanup_threads(); + + dt_free_align(kernel); + dt_free_align(padded_kernel); + dt_free_align(padded_in); + dt_free_align(padded_out); +} +#endif + +// Spatial convolution should be slower for large blurs because it is o(N²) where N is the width of the kernel +// but code is much simpler and easier to debug + +void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, + const void *const restrict ivoid, void *const restrict ovoid, + const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) +{ + dt_iop_blurs_params_t *p = (dt_iop_blurs_params_t *)piece->data; + const float scale = fmaxf(piece->iscale / roi_in->scale, 1.f); + + if (!dt_iop_have_required_input_format(4, self, piece->colors, ivoid, ovoid, roi_in, roi_out)) + return; + + const float *const restrict in = __builtin_assume_aligned(ivoid, 64); + float *const restrict out = __builtin_assume_aligned(ovoid, 64); + + // Init the blur kernel + const int radius = MAX(roundf(p->radius / scale), 1); + const size_t kernel_width = 2 * radius + 1; + + float *const restrict kernel = dt_alloc_align_float(kernel_width * kernel_width); + build_pixel_kernel(kernel, kernel_width, kernel_width, p); + +#ifdef _OPENMP +#pragma omp parallel for default(none) \ + dt_omp_firstprivate(roi_out, in, out, kernel, kernel_width, radius) \ + schedule(simd: static) collapse(2) +#endif + for(int i = 0; i < roi_out->height; i++) + for(int j = 0; j < roi_out->width; j++) + { + const size_t index = ((i * roi_out->width) + j) * 4; + float DT_ALIGNED_PIXEL acc[4] = { 0.f }; + + if(i >= radius && j >= radius && i < roi_out->height - radius && j < roi_out->width - radius) + { + // We are in the safe area, no need to check for out-of-bounds + for(int l = -radius; l <= radius; l++) + for(int m = -radius; m <= radius; m++) + { + const int ii = i + l; + const int jj = j + m; + const size_t idx_shift = ((ii * roi_out->width) + jj) * 4; + + const int ik = l + radius; + const int jk = m + radius; + const size_t idx_kernel = (ik * kernel_width) + jk; + const float k = kernel[idx_kernel]; + + for_four_channels(c, aligned(in : 64)) acc[c] += k * in[idx_shift + c]; + } + } + else + { + // We are close to borders, we need to clamp indices to bounds + // assume constant boundary conditions + for(int l = -radius; l <= radius; l++) + for(int m = -radius; m <= radius; m++) + { + const int ii = CLAMP((int)i + l, (int)0, (int)roi_out->height - 1); + const int jj = CLAMP((int)j + m, (int)0, (int)roi_out->width - 1); + const size_t idx_shift = ((ii * roi_out->width) + jj) * 4; + + const int ik = l + radius; + const int jk = m + radius; + const size_t idx_kernel = (ik * kernel_width) + jk; + const float k = kernel[idx_kernel]; + + for_four_channels(c, aligned(in : 64)) acc[c] += k * in[idx_shift + c]; + } + } + + for_each_channel(c, aligned(out : 64) aligned(acc : 16)) out[index + c] = acc[c]; + + // copy alpha + out[index + 3] = in[index + 3]; + } + dt_free_align(kernel); +} + + +#if HAVE_OPENCL +int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_mem dev_in, cl_mem dev_out, + const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out) +{ + dt_iop_blurs_params_t *p = (dt_iop_blurs_params_t *)piece->data; + dt_iop_blurs_global_data_t *const gd = (dt_iop_blurs_global_data_t *)self->global_data; + + cl_int err = -999; + + const int devid = piece->pipe->devid; + const int width = roi_in->width; + const int height = roi_in->height; + + size_t sizes[] = { ROUNDUPWD(width), ROUNDUPHT(height), 1 }; + + // Init the blur kernel + const float scale = fmaxf(piece->iscale / roi_in->scale, 1.f); + const int radius = MAX(roundf(p->radius / scale), 1); + const size_t kernel_width = 2 * radius + 1; + + float *const restrict kernel = dt_alloc_align_float(kernel_width * kernel_width); + build_pixel_kernel(kernel, kernel_width, kernel_width, p); + + cl_mem kernel_cl = dt_opencl_copy_host_to_device(devid, kernel, kernel_width, kernel_width, sizeof(float)); + + dt_opencl_set_kernel_arg(devid, gd->kernel_blurs_convolve, 0, sizeof(cl_mem), (void *)&dev_in); + dt_opencl_set_kernel_arg(devid, gd->kernel_blurs_convolve, 1, sizeof(cl_mem), (void *)&kernel_cl); + dt_opencl_set_kernel_arg(devid, gd->kernel_blurs_convolve, 2, sizeof(cl_mem), (void *)&dev_out); + dt_opencl_set_kernel_arg(devid, gd->kernel_blurs_convolve, 3, sizeof(int), (void *)&roi_out->width); + dt_opencl_set_kernel_arg(devid, gd->kernel_blurs_convolve, 4, sizeof(int), (void *)&roi_out->height); + dt_opencl_set_kernel_arg(devid, gd->kernel_blurs_convolve, 5, sizeof(int), (void *)&radius); + + err = dt_opencl_enqueue_kernel_2d(devid, gd->kernel_blurs_convolve, sizes); + if(err != CL_SUCCESS) goto error; + + // cleanup and exit on success + dt_free_align(kernel); + dt_opencl_release_mem_object(kernel_cl); + return TRUE; + +error: + if(kernel) dt_free_align(kernel); + if(kernel_cl) dt_opencl_release_mem_object(kernel_cl); + dt_print(DT_DEBUG_OPENCL, "[opencl_blurs] couldn't enqueue kernel! %d\n", err); + return FALSE; +} + +void init_global(dt_iop_module_so_t *module) +{ + const int program = 34; + dt_iop_blurs_global_data_t *gd = (dt_iop_blurs_global_data_t *)malloc(sizeof(dt_iop_blurs_global_data_t)); + module->data = gd; + gd->kernel_blurs_convolve = dt_opencl_create_kernel(program, "convolve"); +} + + +void cleanup_global(dt_iop_module_so_t *module) +{ + dt_iop_blurs_global_data_t *gd = (dt_iop_blurs_global_data_t *)module->data; + dt_opencl_free_kernel(gd->kernel_blurs_convolve); + free(module->data); + module->data = NULL; +} +#endif + + +void gui_changed(dt_iop_module_t *self, GtkWidget *w, void *previous) +{ + dt_iop_blurs_params_t *p = (dt_iop_blurs_params_t *)self->params; + dt_iop_blurs_gui_data_t *g = (dt_iop_blurs_gui_data_t *)self->gui_data; + + if(!w || w == g->type) + { + if(p->type == DT_BLUR_LENS) + { + gtk_widget_hide(g->angle); + gtk_widget_hide(g->curvature); + gtk_widget_hide(g->offset); + + gtk_widget_show(g->blades); + gtk_widget_show(g->concavity); + gtk_widget_show(g->rotation); + gtk_widget_show(g->linearity); + } + else if(p->type == DT_BLUR_MOTION) + { + gtk_widget_show(g->angle); + gtk_widget_show(g->curvature); + gtk_widget_show(g->offset); + + gtk_widget_hide(g->blades); + gtk_widget_hide(g->concavity); + gtk_widget_hide(g->rotation); + gtk_widget_hide(g->linearity); + } + else if(p->type == DT_BLUR_GAUSSIAN) + { + gtk_widget_hide(g->angle); + gtk_widget_hide(g->curvature); + gtk_widget_hide(g->offset); + + gtk_widget_hide(g->blades); + gtk_widget_hide(g->concavity); + gtk_widget_hide(g->rotation); + gtk_widget_hide(g->linearity); + } + } + + // update kernel view + if(g->img_cached) + { + build_gui_kernel(g->img, g->img_width, g->img_width, p); + gtk_widget_queue_draw(GTK_WIDGET(g->area)); + } +} + +static gboolean dt_iop_tonecurve_draw(GtkWidget *widget, cairo_t *crf, gpointer user_data) +{ + dt_iop_module_t *self = (dt_iop_module_t *)user_data; + dt_iop_blurs_gui_data_t *g = (dt_iop_blurs_gui_data_t *)self->gui_data; + dt_iop_blurs_params_t *p = (dt_iop_blurs_params_t *)self->params; + + GtkAllocation allocation; + GtkStyleContext *context = gtk_widget_get_style_context(widget); + gtk_widget_get_allocation(widget, &allocation); + gtk_render_background(context, crf, 0, 0, allocation.width, allocation.height); + + if(allocation.width != g->img_width) + { + // Widget size changed, flush the cache buffer and restart + g->img_cached = FALSE; + if(g->img) dt_free_align(g->img); + } + + if(!g->img_cached) + { + g->img = dt_alloc_align(64, allocation.width * allocation.width * 4 * sizeof(unsigned char)); + g->img_width = allocation.width; + g->img_cached = TRUE; + build_gui_kernel(g->img, g->img_width, g->img_width, p); + + // Note: if params change, we silently recompute the img in the buffer + // no need to flush the cache. Flush only if buffer size changes, + // aka GUI widget gets resized. + } + + // Paint the kernel + const int stride = cairo_format_stride_for_width(CAIRO_FORMAT_ARGB32, g->img_width); + cairo_surface_t *cst = cairo_image_surface_create_for_data(g->img, CAIRO_FORMAT_ARGB32, + g->img_width, g->img_width, stride); + + cairo_set_source_surface(crf, cst, 0, 0); + cairo_paint(crf); + cairo_surface_destroy(cst); + return TRUE; +} + +void gui_update(dt_iop_module_t *self) +{ + dt_iop_blurs_gui_data_t *g = (dt_iop_blurs_gui_data_t *)self->gui_data; + dt_iop_blurs_params_t *p = (dt_iop_blurs_params_t *)self->params; + + dt_bauhaus_combobox_set_from_value(g->type, p->type); + dt_bauhaus_slider_set(g->radius, p->radius); + + dt_bauhaus_slider_set(g->blades, p->blades); + dt_bauhaus_slider_set(g->concavity, p->concavity); + dt_bauhaus_slider_set(g->linearity, p->linearity); + dt_bauhaus_slider_set(g->rotation, p->rotation); + + dt_bauhaus_slider_set(g->angle, p->angle); + dt_bauhaus_slider_set(g->curvature, p->curvature); + dt_bauhaus_slider_set(g->offset, p->offset); + + gui_changed(self, NULL, NULL); +} + +#define DEG_TO_RAD 180.f / M_PI_F + +void gui_init(dt_iop_module_t *self) +{ + dt_iop_blurs_gui_data_t *g = IOP_GUI_ALLOC(blurs); + + self->widget = gtk_box_new(GTK_ORIENTATION_VERTICAL, DT_BAUHAUS_SPACE); + + // Image buffer to store the kernel look + // Don't recompute it in the drawing function, only when a param is changed + // then serve it from cache to the drawing function. + g->img_cached = FALSE; + g->img = NULL; + g->img_width = 0.f; + + g->area = GTK_DRAWING_AREA(dtgtk_drawing_area_new_with_aspect_ratio(1.f)); + g_signal_connect(G_OBJECT(g->area), "draw", G_CALLBACK(dt_iop_tonecurve_draw), self); + gtk_box_pack_start(GTK_BOX(self->widget), GTK_WIDGET(g->area), TRUE, TRUE, 0); + + g->radius = dt_bauhaus_slider_from_params(self, "radius"); + dt_bauhaus_slider_set_format(g->radius, "%.f px"); + + g->type = dt_bauhaus_combobox_from_params(self, "type"); + + g->blades = dt_bauhaus_slider_from_params(self, "blades"); + g->concavity = dt_bauhaus_slider_from_params(self, "concavity"); + g->linearity = dt_bauhaus_slider_from_params(self, "linearity"); + g->rotation = dt_bauhaus_slider_from_params(self, "rotation"); + dt_bauhaus_slider_set_factor(g->rotation, DEG_TO_RAD); + dt_bauhaus_slider_set_format(g->rotation, "%.f °"); + + g->angle = dt_bauhaus_slider_from_params(self, "angle"); + dt_bauhaus_slider_set_factor(g->angle, DEG_TO_RAD); + dt_bauhaus_slider_set_format(g->angle, "%.f °"); + + + g->curvature = dt_bauhaus_slider_from_params(self, "curvature"); + g->offset = dt_bauhaus_slider_from_params(self, "offset"); + +} + +void gui_cleanup(dt_iop_module_t *self) +{ + dt_iop_blurs_gui_data_t *g = (dt_iop_blurs_gui_data_t *)self->gui_data; + if(g->img) dt_free_align(g->img); + IOP_GUI_FREE; +} + + +// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.sh +// vim: shiftwidth=2 expandtab tabstop=2 cindent +// kate: tab-indents: off; indent-width 2; replace-tabs on; indent-mode cstyle; remove-trailing-spaces modified; From b907ba19e51d0d57bdedacbb89e899b527728f53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20PIERRE?= Date: Tue, 13 Jul 2021 17:28:32 +0200 Subject: [PATCH 2/4] blurs: fix scale clipping when zoomed out --- src/iop/blurs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/iop/blurs.c b/src/iop/blurs.c index b11567b87c16..7e3b2c1ebd62 100644 --- a/src/iop/blurs.c +++ b/src/iop/blurs.c @@ -560,7 +560,7 @@ void process(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, float *const restrict out = __builtin_assume_aligned(ovoid, 64); // Init the blur kernel - const int radius = MAX(roundf(p->radius / scale), 1); + const int radius = MAX(roundf(p->radius / scale), 2); const size_t kernel_width = 2 * radius + 1; float *const restrict kernel = dt_alloc_align_float(kernel_width * kernel_width); @@ -641,7 +641,7 @@ int process_cl(struct dt_iop_module_t *self, dt_dev_pixelpipe_iop_t *piece, cl_m // Init the blur kernel const float scale = fmaxf(piece->iscale / roi_in->scale, 1.f); - const int radius = MAX(roundf(p->radius / scale), 1); + const int radius = MAX(roundf(p->radius / scale), 2); const size_t kernel_width = 2 * radius + 1; float *const restrict kernel = dt_alloc_align_float(kernel_width * kernel_width); From 144da27643062f7ec31a07af1888d05571d54cb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20PIERRE?= Date: Thu, 22 Jul 2021 17:28:41 +0200 Subject: [PATCH 3/4] blurs: make code style changes --- src/iop/blurs.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/iop/blurs.c b/src/iop/blurs.c index 7e3b2c1ebd62..d5a27318e4e4 100644 --- a/src/iop/blurs.c +++ b/src/iop/blurs.c @@ -20,6 +20,7 @@ #endif // our includes go first: #include "bauhaus/bauhaus.h" +#include "common/dwt.h" #include "develop/imageop.h" #include "develop/imageop_gui.h" #include "dtgtk/drawingarea.h" @@ -141,7 +142,7 @@ inline static void blur_2D_Bspline(const float *const restrict in, float *const const size_t col = CLAMP((int)j + (int)(jj - (FSIZE - 1) / 2), (int)0, (int)width - 1); const size_t k_index = (row * width + col); - const float DT_ALIGNED_ARRAY filter[FSIZE] + static const float DT_ALIGNED_ARRAY filter[FSIZE] = { 1.0f / 16.0f, 4.0f / 16.0f, 6.0f / 16.0f, 4.0f / 16.0f, 1.0f / 16.0f }; acc += filter[ii] * filter[jj] * in[k_index]; @@ -176,7 +177,7 @@ static inline void create_lens_kernel(float *const restrict buffer, // Spatial coordinates rounding error const float eps = 1.f / (float)width; - const float radius = (width - 1) / 2.f - 1; + const float radius = (float)(width - 1) / 2.f - 1; #ifdef _OPENMP #pragma omp parallel for simd default(none) dt_omp_firstprivate(width, height, buffer, n, m, k, rotation, eps, radius) \ @@ -186,8 +187,8 @@ static inline void create_lens_kernel(float *const restrict buffer, for(size_t j = 0; j < width; j++) { // get normalized kernel coordinates in [-1 ; 1] - const float x = (i - 1) / radius - 1; - const float y = (j - 1) / radius - 1; + const float x = (float)(i - 1) / radius - 1; + const float y = (float)(j - 1) / radius - 1; // get current radial distance from kernel center const float r = hypotf(x, y); @@ -217,7 +218,7 @@ static inline void create_motion_kernel(float *const restrict buffer, // Spatial coordinates rounding error const float eps = 1.f / (float)width; - const float radius = (width - 1) / 2.f - 1; + const float radius = (float)(width - 1) / 2.f - 1; const float corr_angle = -M_PI_F / 4.f - angle; // Matrix of rotation @@ -235,7 +236,7 @@ static inline void create_motion_kernel(float *const restrict buffer, // every eighth of pixel // get normalized kernel coordinates in [-1 ; 1] - const float x = (i / 8.f - 1) / radius - 1; + const float x = (float)(i / 8.f - 1) / radius - 1; //const float y = (j - 1) / radius - 1; // not used here // build the motion path : 2nd order polynomial @@ -279,8 +280,8 @@ static inline void create_gauss_kernel(float *const restrict buffer, for(size_t j = 0; j < width; j++) { // get normalized kernel coordinates in [-1 ; 1] - const float x = (i - 1) / radius - 1; - const float y = (j - 1) / radius - 1; + const float x = (float)(i - 1) / radius - 1; + const float y = (float)(j - 1) / radius - 1; // get current square radial distance from kernel center const float r_2 = x * x + y * y; @@ -388,7 +389,7 @@ static inline void build_pixel_kernel(float *const buffer, const size_t width, c } // normalize to respect the conservation of energy law - float norm = compute_norm(buffer, width, height); + const float norm = compute_norm(buffer, width, height); normalize(buffer, width, height, norm); dt_free_align(kernel_1); From 29343d96778c97e5be354092b4674dab917f4506 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20PIERRE?= Date: Thu, 22 Jul 2021 17:50:12 +0200 Subject: [PATCH 4/4] add blurs.c to potfiles --- po/POTFILES.in | 1 + 1 file changed, 1 insertion(+) diff --git a/po/POTFILES.in b/po/POTFILES.in index 4d2160379725..d6b7b92c11db 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -186,6 +186,7 @@ src/iop/basicadj.c src/iop/bilat.c src/iop/bilateral.cc src/iop/bloom.c +src/iop/blurs.c src/iop/borders.c src/iop/cacorrect.c src/iop/cacorrectrgb.c