Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix: make sure correct HLR chroma corrections #13654

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 152 additions & 55 deletions data/kernels/basic.cl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
copyright (c) 2009--2013 johannes hanika.
copyright (c) 2014 Ulrich Pegelow.
copyright (c) 2014 LebedevRI.
Copyright (C) 2022-23 darktable developers.

darktable is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -284,10 +285,16 @@ highlights_1f_clip (read_only image2d_t in, write_only image2d_t out, const int
write_imagef (out, (int2)(x, y), pixel);
}

kernel void
highlights_false_color (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
const int rx, const int ry, const int filters, global const unsigned char (*const xtrans)[6],
global const float *clips)
kernel void highlights_false_color(
read_only image2d_t in,
write_only image2d_t out,
const int width,
const int height,
const int rx,
const int ry,
const unsigned int filters,
global const unsigned char (*const xtrans)[6],
global const float *clips)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
Expand All @@ -301,9 +308,14 @@ highlights_false_color (read_only image2d_t in, write_only image2d_t out, const
write_imagef (out, (int2)(x, y), oval);
}

static inline float _calc_refavg(read_only image2d_t in, global const unsigned char (*const xtrans)[6], const unsigned int filters, int row, int col, int width)
static inline float _calc_refavg(
read_only image2d_t in,
global const unsigned char (*const xtrans)[6],
const unsigned int filters,
int row,
int col,
int width)
{
const int color = (filters == 9u) ? FCxtrans(row, col, xtrans) : FC(row, col, filters);
float mean[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
float cnt[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
for(int dy = -1; dy < 2; dy++)
Expand All @@ -317,45 +329,81 @@ static inline float _calc_refavg(read_only image2d_t in, global const unsigned c
}
}
for(int c = 0; c < 3; c++)
mean[c] = pow(mean[c] / cnt[c], 1.0f / 3.0f);
mean[c] = pow(mean[c] / fmax(1.0f, cnt[c]), 1.0f / 3.0f);

float croot_refavg[4] = { 0.5f * (mean[1] + mean[2]), 0.5f * (mean[0] + mean[2]), 0.5f * (mean[0] + mean[1]), 0.0f};
const float croot_refavg[4] = { 0.5f * (mean[1] + mean[2]), 0.5f * (mean[0] + mean[2]), 0.5f * (mean[0] + mean[1]), 0.0f};
const int color = (filters == 9u) ? FCxtrans(row, col, xtrans) : FC(row, col, filters);
return pow(croot_refavg[color], 3.0f);
}

kernel void
highlights_initmask (read_only image2d_t in, global char *inmask,
const int width, const int height, const int psize, const int pwidth,
const unsigned int filters, global const unsigned char (*const xtrans)[6],
global const float *clips)
kernel void highlights_initmask(
read_only image2d_t in,
global char *inmask,
const int msize,
const int mwidth,
const int mheight,
const unsigned int filters,
global const unsigned char (*const xtrans)[6],
global const float *clips)
{
const int col = get_global_id(0);
const int row = get_global_id(1);
if((col < 1) || (row < 1) || (col > width-2) || (row > height-2)) return;
const int mcol = get_global_id(0);
const int mrow = get_global_id(1);

float val = fmax(0.0f, read_imagef(in, sampleri, (int2)(col, row)).x);
const int color = (filters == 9u) ? FCxtrans(row, col, xtrans) : FC(row, col, filters);
const int idx = color*psize + mad24(row/3, pwidth, col/3);
if((mcol >= mwidth) || (mrow >= mheight))
return;

const size_t mdx = mad24(mrow, mwidth, mcol);

if((val >= clips[color]) && (inmask[idx] == 0))
if((mcol < 1) || (mrow < 1) || (mcol > mwidth -2) || (mrow > mheight-2))
{
inmask[idx] = inmask[idx] | 1;
for(int c = 0; c < 3; c++)
inmask[c*msize + mdx] = 0;
return;
}

char mbuff[4] = { 0, 0, 0, 0 };
for(int y = -1; y < 2; y++)
{
for(int x = -1; x < 2; x++)
{
const int color = (filters == 9u) ? FCxtrans(mrow+y, mcol+x, xtrans) : FC(mrow+y, mcol+x, filters);
const float val = fmax(0.0f, read_imagef(in, sampleri, (int2)(3 * mcol + x, 3 * mrow + y)).x);
mbuff[color] += (val >= clips[color]) ? 1 : 0;
}
}

for(int c = 0; c < 3; c++)
inmask[c*msize + mdx] = (mbuff[c] != 0) ? 1 : 0;
}

kernel void
highlights_dilatemask (global char *in, global char *out,
const int w1, const int height, const int psize)
kernel void highlights_dilatemask(
global char *in,
global char *out,
const int mwidth,
const int mheight,
const int msize)
{
const int col = get_global_id(0);
const int row = get_global_id(1);
if((col < 3) || (row < 3) || (col > w1-4) || (row > height-4)) return;

const int w2 = 2 * w1;
const int w3 = 3 * w1;
if((col >= mwidth) || (row >= mheight))
return;

const int w1 = mwidth;
const int w2 = 2 * mwidth;
const int w3 = 3 * mwidth;
const int moff = mad24(row, w1, col);

int i = mad24(row, w1, col);
out[i] = in[i-w1-1] | in[i-w1] | in[i-w1+1] |
if((col < 3) || (row < 3) || (col > mwidth - 4) || (row > mheight - 4))
{
out[moff] = 0;
out[moff + msize] = 0;
out[moff + 2*msize] = 0;
return;
}

int i = moff;
out[i] = (in[i-w1-1] | in[i-w1] | in[i-w1+1] |
in[i-1] | in[i] | in[i+1] |
in[i+w1-1] | in[i+w1] | in[i+w1+1] |
in[i-w2-1] | in[i-w2] | in[i-w2+1] |
Expand All @@ -365,10 +413,10 @@ highlights_dilatemask (global char *in, global char *out,
in[i-w2-3] | in[i-w2-2] | in[i-w2+2] | in[i-w2+3] |
in[i-w1-3] | in[i-w1+3] | in[i-3] | in[i+3] | in[i+w1-3] | in[i+w1+3] |
in[i+w2-3] | in[i+w2-2] | in[i+w2+2] | in[i+w2+3] |
in[i+w3-2] | in[i+w3-1] | in[i+w3] | in[i+w3+1] | in[i+w3+2];
in[i+w3-2] | in[i+w3-1] | in[i+w3] | in[i+w3+1] | in[i+w3+2]) ? 1 : 0;

i = psize + mad24(row, w1, col);
out[i] = in[i-w1-1] | in[i-w1] | in[i-w1+1] |
i = msize + moff;
out[i] = (in[i-w1-1] | in[i-w1] | in[i-w1+1] |
in[i-1] | in[i] | in[i+1] |
in[i+w1-1] | in[i+w1] | in[i+w1+1] |
in[i-w2-1] | in[i-w2] | in[i-w2+1] |
Expand All @@ -378,10 +426,10 @@ highlights_dilatemask (global char *in, global char *out,
in[i-w2-3] | in[i-w2-2] | in[i-w2+2] | in[i-w2+3] |
in[i-w1-3] | in[i-w1+3] | in[i-3] | in[i+3] | in[i+w1-3] | in[i+w1+3] |
in[i+w2-3] | in[i+w2-2] | in[i+w2+2] | in[i+w2+3] |
in[i+w3-2] | in[i+w3-1] | in[i+w3] | in[i+w3+1] | in[i+w3+2];
in[i+w3-2] | in[i+w3-1] | in[i+w3] | in[i+w3+1] | in[i+w3+2]) ? 1 : 0;

i = 2*psize + mad24(row, w1, col);
out[i] = in[i-w1-1] | in[i-w1] | in[i-w1+1] |
i = 2*msize + moff;
out[i] = (in[i-w1-1] | in[i-w1] | in[i-w1+1] |
in[i-1] | in[i] | in[i+1] |
in[i+w1-1] | in[i+w1] | in[i+w1+1] |
in[i-w2-1] | in[i-w2] | in[i-w2+1] |
Expand All @@ -391,49 +439,98 @@ highlights_dilatemask (global char *in, global char *out,
in[i-w2-3] | in[i-w2-2] | in[i-w2+2] | in[i-w2+3] |
in[i-w1-3] | in[i-w1+3] | in[i-3] | in[i+3] | in[i+w1-3] | in[i+w1+3] |
in[i+w2-3] | in[i+w2-2] | in[i+w2+2] | in[i+w2+3] |
in[i+w3-2] | in[i+w3-1] | in[i+w3] | in[i+w3+1] | in[i+w3+2];
in[i+w3-2] | in[i+w3-1] | in[i+w3] | in[i+w3+1] | in[i+w3+2]) ? 1 : 0;
}

kernel void
highlights_chroma (read_only image2d_t in, global char *mask, global float *accu,
const int width, const int height,
const int pwidth, const int psize,
const int filters, global const unsigned char (*const xtrans)[6],
global const float *clips, global const float *dark)
void atomic_add_f(global float *val, const float delta)
{
union
{
float f;
unsigned int i;
}
old_val;
union
{
float f;
unsigned int i;
}
new_val;
global volatile unsigned int *ival = (global volatile unsigned int *)val;
do
{
// the following is equivalent to old_val.f = *val. however, as according to the opencl standard
// we can not rely on global buffer val to be consistently cached (relaxed memory consistency) we
// access it via a slower but consistent atomic operation.
old_val.i = atomic_add(ival, 0);
new_val.f = old_val.f + delta;
}
while (atomic_cmpxchg (ival, old_val.i, new_val.i) != old_val.i);
}

kernel void highlights_chroma(
read_only image2d_t in,
global char *mask,
global float *accu,
const int width,
const int height,
const int mwidth,
const int msize,
const unsigned int filters,
global const unsigned char (*const xtrans)[6],
global const float *clips)
{
const int row = get_global_id(0);

if((row < 3) || (row > height - 3)) return;
if((row < 3) || (row > height - 4)) return;

float sum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
float cnt[4] = {0.0f, 0.0f, 0.0f, 0.0f};

for(int col = 3; col < width-3; col++)
{
const int idx = mad24(row, width, col);
const size_t idx = mad24(row, width, col);
const int color = (filters == 9u) ? FCxtrans(row, col, xtrans) : FC(row, col, filters);
const float inval = fmax(0.0f, read_imagef(in, sampleri, (int2)(col, row)).x);
const int px = color * psize + mad24(row/3, pwidth, col/3);
if(mask[px] && (inval > dark[color]) && (inval < clips[color]))
const size_t px = color * msize + mad24(row/3, mwidth, col/3);
if(mask[px] && (inval > 0.2f*clips[color]) && (inval < clips[color]))
{
const float ref = _calc_refavg(in, xtrans, filters, row, col, width);
sum[color] += inval - ref;
cnt[color] += 1.0f;
}
}
for(int c = 0; c < 3; c++)

if(cnt[0] > 0.0f)
{
accu[row*6 + c] = sum[c];
accu[row*6 + 3 + c] = cnt[c];
atomic_add_f(&accu[0], sum[0]);
atomic_add_f(&accu[1], cnt[0]);
}
if(cnt[1] > 0.0f)
{
atomic_add_f(&accu[2], sum[1]);
atomic_add_f(&accu[3], cnt[1]);
}
if(cnt[2] > 0.0f)
{
atomic_add_f(&accu[4], sum[2]);
atomic_add_f(&accu[5], cnt[2]);
}
}

kernel void
highlights_opposed (read_only image2d_t in, write_only image2d_t out,
const int owidth, const int oheight, const int iwidth, const int iheight,
const int dx, const int dy,
const int filters, global const unsigned char (*const xtrans)[6],
global const float *clips, global const float *chroma)
kernel void highlights_opposed(
read_only image2d_t in,
write_only image2d_t out,
const int owidth,
const int oheight,
const int iwidth,
const int iheight,
const int dx,
const int dy,
const unsigned int filters,
global const unsigned char (*const xtrans)[6],
global const float *clips,
global const float *chroma)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
Expand Down
Loading