Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Color balance module : add the ASC CDL mode #1734

Merged
merged 14 commits into from Oct 16, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
51 changes: 31 additions & 20 deletions data/kernels/colorspace.cl
Expand Up @@ -72,9 +72,9 @@ float lab_f(float x)
float4 XYZ_to_Lab(float4 xyz)
{
float4 lab;

xyz.x *= (1.0f/0.9642f);
xyz.z *= (1.0f/0.8249f);
const float4 d50 = (float4)(0.9642f, 1.0f, 0.8249f, 1.0f);

xyz /= d50;
xyz.x = lab_f(xyz.x);
xyz.y = lab_f(xyz.y);
xyz.z = lab_f(xyz.z);
Expand Down Expand Up @@ -106,14 +106,33 @@ float4 Lab_to_XYZ(float4 Lab)
return XYZ;
}

float4 Lab_to_prophotorgb(float4 Lab)
float4 prophotorgb_to_XYZ(float4 rgb)
{
const float rgb_to_xyz[3][3] = { // prophoto rgb
{0.7976749f, 0.1351917f, 0.0313534f},
{0.2880402f, 0.7118741f, 0.0000857f},
{0.0000000f, 0.0000000f, 0.8252100f},
};
float4 XYZ = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
XYZ.x += rgb_to_xyz[0][0] * rgb.x;
XYZ.x += rgb_to_xyz[0][1] * rgb.y;
XYZ.x += rgb_to_xyz[0][2] * rgb.z;
XYZ.y += rgb_to_xyz[1][0] * rgb.x;
XYZ.y += rgb_to_xyz[1][1] * rgb.y;
XYZ.y += rgb_to_xyz[1][2] * rgb.z;
XYZ.z += rgb_to_xyz[2][0] * rgb.x;
XYZ.z += rgb_to_xyz[2][1] * rgb.y;
XYZ.z += rgb_to_xyz[2][2] * rgb.z;
return XYZ;
}

float4 XYZ_to_prophotorgb(float4 XYZ)
{
const float xyz_to_rgb[3][3] = { // prophoto rgb d50
{ 1.3459433f, -0.2556075f, -0.0511118f},
{-0.5445989f, 1.5081673f, 0.0205351f},
{ 0.0000000f, 0.0000000f, 1.2118128f},
};
float4 XYZ = Lab_to_XYZ(Lab);
float4 rgb = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
rgb.x += xyz_to_rgb[0][0] * XYZ.x;
rgb.x += xyz_to_rgb[0][1] * XYZ.y;
Expand All @@ -127,23 +146,15 @@ float4 Lab_to_prophotorgb(float4 Lab)
return rgb;
}

float4 Lab_to_prophotorgb(float4 Lab)
{
float4 XYZ = Lab_to_XYZ(Lab);
return XYZ_to_prophotorgb(XYZ);
}

float4 prophotorgb_to_Lab(float4 rgb)
{
const float rgb_to_xyz[3][3] = { // prophoto rgb
{0.7976749f, 0.1351917f, 0.0313534f},
{0.2880402f, 0.7118741f, 0.0000857f},
{0.0000000f, 0.0000000f, 0.8252100f},
};
float4 XYZ = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
XYZ.x += rgb_to_xyz[0][0] * rgb.x;
XYZ.x += rgb_to_xyz[0][1] * rgb.y;
XYZ.x += rgb_to_xyz[0][2] * rgb.z;
XYZ.y += rgb_to_xyz[1][0] * rgb.x;
XYZ.y += rgb_to_xyz[1][1] * rgb.y;
XYZ.y += rgb_to_xyz[1][2] * rgb.z;
XYZ.z += rgb_to_xyz[2][0] * rgb.x;
XYZ.z += rgb_to_xyz[2][1] * rgb.y;
XYZ.z += rgb_to_xyz[2][2] * rgb.z;
float4 XYZ = prophotorgb_to_XYZ(rgb);
return XYZ_to_Lab(XYZ);
}

Expand Down
54 changes: 47 additions & 7 deletions data/kernels/extended.cl
Expand Up @@ -580,26 +580,66 @@ colormapping_mapping (read_only image2d_t in, read_only image2d_t tmp, write_onl
/* kernel for the colorbalance module */
kernel void
colorbalance (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
const float4 lift, const float4 gain, const float4 gamma_inv)
const float4 lift, const float4 gain, const float4 gamma_inv, const float saturation, const float contrast, const float grey)
{
const int x = get_global_id(0);
const int y = get_global_id(1);

if(x >= width || y >= height) return;

float4 Lab = read_imagef(in, sampleri, (int2)(x, y));
const float4 Lab = read_imagef(in, sampleri, (int2)(x, y));
const float4 XYZ = Lab_to_XYZ(Lab);
float4 sRGB = XYZ_to_sRGB(XYZ);
const float4 luma = XYZ.y;
const float4 saturation4 = saturation;
const float4 contrast4 = contrast;
const float4 grey4 = grey;

float4 sRGB = XYZ_to_sRGB(Lab_to_XYZ(Lab));
// saturation
sRGB = luma + saturation4 * (sRGB - luma);

// Lift gamma gain
sRGB = (sRGB <= (float4)0.0031308f) ? 12.92f * sRGB : (1.0f + 0.055f) * pow(sRGB, (float4)1.0f/2.4f) - (float4)0.055f;

sRGB = pow(fmax(((sRGB - (float4)1.0f) * lift + (float4)1.0f) * gain, (float4)0.0f), gamma_inv);

sRGB = (sRGB <= (float4)0.04045f) ? sRGB / 12.92f : pow((sRGB + (float4)0.055f) / (1.0f + 0.055f), (float4)2.4f);

// fulcrum contrast
sRGB = pow(fmax(sRGB, (float4)0.0f) / grey4, contrast4) * grey4;

sRGB = XYZ_to_Lab(sRGB_to_XYZ(sRGB));

write_imagef (out, (int2)(x, y), sRGB);
}

kernel void
colorbalance_cdl (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
const float4 lift, const float4 gain, const float4 gamma_inv, const float saturation, const float contrast, const float grey)
{
const int x = get_global_id(0);
const int y = get_global_id(1);

if(x >= width || y >= height) return;

const float4 Lab = read_imagef(in, sampleri, (int2)(x, y));
const float4 XYZ = Lab_to_XYZ(Lab);
float4 RGB = XYZ_to_prophotorgb(XYZ);
const float4 luma = XYZ.y;
const float4 saturation4 = saturation;
const float4 contrast4 = contrast;
const float4 grey4 = grey;

// saturation
RGB = luma + saturation4 * (RGB - luma);

// lift power slope
RGB = pow(fmax((RGB * gain + lift), (float4)0.0f), gamma_inv);

// fulcrum contrast
RGB = pow(fmax(RGB, (float4)0.0f) / grey4, contrast4) * grey4;

Lab.xyz = XYZ_to_Lab(sRGB_to_XYZ(sRGB)).xyz;
RGB = prophotorgb_to_Lab(RGB);

write_imagef (out, (int2)(x, y), Lab);
write_imagef (out, (int2)(x, y), RGB);
}

/* helpers and kernel for the colorchecker module */
Expand Down
116 changes: 78 additions & 38 deletions src/common/colorspaces_inline_conversions.h
Expand Up @@ -31,9 +31,9 @@ static inline __m128 lab_f_inv_m(const __m128 x)
const __m128 kappa_rcp_x116 = _mm_set1_ps(116.0f * 27.0f / 24389.0f);

// x > epsilon
const __m128 res_big = _mm_mul_ps(_mm_mul_ps(x, x), x);
const __m128 res_big = x * x * x;
// x <= epsilon
const __m128 res_small = _mm_sub_ps(_mm_mul_ps(kappa_rcp_x116, x), kappa_rcp_x16);
const __m128 res_small = kappa_rcp_x116 * x - kappa_rcp_x16;

// blend results according to whether each component is > epsilon or not
const __m128 mask = _mm_cmpgt_ps(x, epsilon);
Expand All @@ -49,10 +49,9 @@ static inline __m128 dt_Lab_to_XYZ_sse2(const __m128 Lab)

// last component ins shuffle taken from 1st component of Lab to make sure it is not nan, so it will become
// 0.0f in f
const __m128 f = _mm_mul_ps(_mm_shuffle_ps(Lab, Lab, _MM_SHUFFLE(0, 2, 0, 1)), coef);
const __m128 f = _mm_shuffle_ps(Lab, Lab, _MM_SHUFFLE(0, 2, 0, 1)) * coef;

return _mm_mul_ps(d50,
lab_f_inv_m(_mm_add_ps(_mm_add_ps(f, _mm_shuffle_ps(f, f, _MM_SHUFFLE(1, 1, 3, 1))), offset)));
return d50 * lab_f_inv_m(f + _mm_shuffle_ps(f, f, _MM_SHUFFLE(1, 1, 3, 1)) + offset);
}

static inline __m128 lab_f_m_sse2(const __m128 x)
Expand All @@ -65,12 +64,11 @@ static inline __m128 lab_f_m_sse2(const __m128 x)
const __m128 a = _mm_castsi128_ps(
_mm_add_epi32(_mm_cvtps_epi32(_mm_div_ps(_mm_cvtepi32_ps(_mm_castps_si128(x)), _mm_set1_ps(3.0f))),
_mm_set1_epi32(709921077)));
const __m128 a3 = _mm_mul_ps(_mm_mul_ps(a, a), a);
const __m128 res_big
= _mm_div_ps(_mm_mul_ps(a, _mm_add_ps(a3, _mm_add_ps(x, x))), _mm_add_ps(_mm_add_ps(a3, a3), x));
const __m128 a3 = a * a * a;
const __m128 res_big = a * (a3 + x + x) / (a3 + a3 + x);

// calculate as if x <= epsilon : result = (kappa*x+16)/116
const __m128 res_small = _mm_div_ps(_mm_add_ps(_mm_mul_ps(kappa, x), _mm_set1_ps(16.0f)), _mm_set1_ps(116.0f));
const __m128 res_small = (kappa * x + _mm_set1_ps(16.0f)) / _mm_set1_ps(116.0f);

// blend results according to whether each component is > epsilon or not
const __m128 mask = _mm_cmpgt_ps(x, epsilon);
Expand All @@ -80,12 +78,11 @@ static inline __m128 lab_f_m_sse2(const __m128 x)
/** uses D50 white point. */
static inline __m128 dt_XYZ_to_Lab_sse2(const __m128 XYZ)
{
const __m128 d50_inv = _mm_set_ps(0.0f, 1.0f / 0.8249f, 1.0f, 1.0f / 0.9642f);
const __m128 d50_inv = _mm_set_ps(1.0f, 0.8249f, 1.0f, 0.9642f);
const __m128 coef = _mm_set_ps(0.0f, 200.0f, 500.0f, 116.0f);
const __m128 f = lab_f_m_sse2(_mm_mul_ps(XYZ, d50_inv));
const __m128 f = lab_f_m_sse2(XYZ / d50_inv);
// because d50_inv.z is 0.0f, lab_f(0) == 16/116, so Lab[0] = 116*f[0] - 16 equal to 116*(f[0]-f[3])
return _mm_mul_ps(coef, _mm_sub_ps(_mm_shuffle_ps(f, f, _MM_SHUFFLE(3, 1, 0, 1)),
_mm_shuffle_ps(f, f, _MM_SHUFFLE(3, 2, 1, 3))));
return coef * (_mm_shuffle_ps(f, f, _MM_SHUFFLE(3, 1, 0, 1)) - _mm_shuffle_ps(f, f, _MM_SHUFFLE(3, 2, 1, 3)));
}

/** uses D50 white point. */
Expand All @@ -98,14 +95,14 @@ static inline __m128 dt_XYZ_to_sRGB_sse2(__m128 XYZ)
const __m128 xyz_to_srgb_2 = _mm_setr_ps(-0.4906146f, 0.0334540f, 1.4052427f, 0.0f);

__m128 rgb
= _mm_add_ps(_mm_mul_ps(xyz_to_srgb_0, _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(0, 0, 0, 0))),
_mm_add_ps(_mm_mul_ps(xyz_to_srgb_1, _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(1, 1, 1, 1))),
_mm_mul_ps(xyz_to_srgb_2, _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(2, 2, 2, 2)))));
= xyz_to_srgb_0 * _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(0, 0, 0, 0)) +
xyz_to_srgb_1 * _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(1, 1, 1, 1)) +
xyz_to_srgb_2 * _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(2, 2, 2, 2));

// linear sRGB -> gamma corrected sRGB
__m128 mask = _mm_cmple_ps(rgb, _mm_set1_ps(0.0031308));
__m128 rgb0 = _mm_mul_ps(_mm_set1_ps(12.92), rgb);
__m128 rgb1 = _mm_sub_ps(_mm_mul_ps(_mm_set1_ps(1.0 + 0.055), _mm_pow_ps1(rgb, 1.0 / 2.4)), _mm_set1_ps(0.055));
__m128 rgb0 = _mm_set1_ps(12.92) * rgb;
__m128 rgb1 = _mm_set1_ps(1.0 + 0.055) * _mm_pow_ps1(rgb, 1.0 / 2.4) - _mm_set1_ps(0.055);
return _mm_or_ps(_mm_and_ps(mask, rgb0), _mm_andnot_ps(mask, rgb1));
}

Expand All @@ -123,9 +120,41 @@ static inline __m128 dt_sRGB_to_XYZ_sse2(__m128 rgb)
rgb = _mm_or_ps(_mm_and_ps(mask, rgb0), _mm_andnot_ps(mask, rgb1));

__m128 XYZ
= _mm_add_ps(_mm_mul_ps(srgb_to_xyz_0, _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(0, 0, 0, 0))),
_mm_add_ps(_mm_mul_ps(srgb_to_xyz_1, _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(1, 1, 1, 1))),
_mm_mul_ps(srgb_to_xyz_2, _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(2, 2, 2, 2)))));
= srgb_to_xyz_0 * _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(0, 0, 0, 0)) +
srgb_to_xyz_1 * _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(1, 1, 1, 1)) +
srgb_to_xyz_2 * _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(2, 2, 2, 2));
return XYZ;
}

/** uses D50 white point. */
// see http://www.brucelindbloom.com/Eqn_RGB_XYZ_Matrix.html for the transformation matrices
static inline __m128 dt_XYZ_to_prophotoRGB_sse2(__m128 XYZ)
{
// XYZ -> prophotoRGB matrix, D50
const __m128 xyz_to_rgb_0 = _mm_setr_ps( 1.3459433f, -0.5445989f, 0.0000000f, 0.0f);
const __m128 xyz_to_rgb_1 = _mm_setr_ps(-0.2556075f, 1.5081673f, 0.0000000f, 0.0f);
const __m128 xyz_to_rgb_2 = _mm_setr_ps(-0.0511118f, 0.0205351f, 1.2118128f, 0.0f);

__m128 rgb
= xyz_to_rgb_0 * _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(0, 0, 0, 0)) +
xyz_to_rgb_1 * _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(1, 1, 1, 1)) +
xyz_to_rgb_2 * _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(2, 2, 2, 2));
return rgb;
}

/** uses D50 white point. */
// see http://www.brucelindbloom.com/Eqn_RGB_XYZ_Matrix.html for the transformation matrices
static inline __m128 dt_prophotoRGB_to_XYZ_sse2(__m128 rgb)
{
// prophotoRGB -> XYZ matrix, D50
const __m128 rgb_to_xyz_0 = _mm_setr_ps(0.7976749f, 0.2880402f, 0.0000000f, 0.0f);
const __m128 rgb_to_xyz_1 = _mm_setr_ps(0.1351917f, 0.7118741f, 0.0000000f, 0.0f);
const __m128 rgb_to_xyz_2 = _mm_setr_ps(0.0313534f, 0.0000857f, 0.8252100f, 0.0f);

__m128 XYZ
= rgb_to_xyz_0 * _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(0, 0, 0, 0)) +
rgb_to_xyz_1 * _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(1, 1, 1, 1)) +
rgb_to_xyz_2 * _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(2, 2, 2, 2));
return XYZ;
}
#endif
Expand Down Expand Up @@ -161,16 +190,16 @@ static inline float lab_f(const float x)
/** uses D50 white point. */
static inline void dt_XYZ_to_Lab(const float *XYZ, float *Lab)
{
const float d50[3] = { 0.9642, 1.0, 0.8249 };
const float f[3] = { lab_f(XYZ[0] / d50[0]), lab_f(XYZ[1] / d50[1]), lab_f(XYZ[2] / d50[2]) };
const float d50[3] = { 0.9642f, 1.0f, 0.8249f };
const float f[3] = { lab_f(XYZ[0] / d50[0]), lab_f(XYZ[1]), lab_f(XYZ[2] / d50[2]) };
Lab[0] = 116.0f * f[1] - 16.0f;
Lab[1] = 500.0f * (f[0] - f[1]);
Lab[2] = 200.0f * (f[1] - f[2]);
}

static inline float lab_f_inv(const float x)
{
const float epsilon = 0.20689655172413796; // cbrtf(216.0f/24389.0f);
const float epsilon = 0.20689655172413796f; // cbrtf(216.0f/24389.0f);
const float kappa = 24389.0f / 27.0f;
if(x > epsilon)
return x * x * x;
Expand All @@ -181,12 +210,12 @@ static inline float lab_f_inv(const float x)
/** uses D50 white point. */
static inline void dt_Lab_to_XYZ(const float *Lab, float *XYZ)
{
const float d50[3] = { 0.9642, 1.0, 0.8249 };
const float d50[3] = { 0.96422f, 1.0f, 0.8249f };
aurelienpierre marked this conversation as resolved.
Show resolved Hide resolved
const float fy = (Lab[0] + 16.0f) / 116.0f;
const float fx = Lab[1] / 500.0f + fy;
const float fz = fy - Lab[2] / 200.0f;
XYZ[0] = d50[0] * lab_f_inv(fx);
XYZ[1] = d50[1] * lab_f_inv(fy);
XYZ[1] = lab_f_inv(fy);
XYZ[2] = d50[2] * lab_f_inv(fz);
}

Expand Down Expand Up @@ -234,33 +263,44 @@ static inline void dt_sRGB_to_XYZ(const float *const sRGB, float *XYZ)
for(int c = 0; c < 3; c++) XYZ[r] += srgb_to_xyz[r][c] * rgb[c];
}

static inline void dt_Lab_to_prophotorgb(const float *const Lab, float *rgb)
static inline void dt_XYZ_to_prophotorgb(const float *const XYZ, float *rgb)
{
const float xyz_to_rgb[3][3] = {
// prophoto rgb d50
{ 1.3459433, -0.2556075, -0.0511118 },
{ -0.5445989, 1.5081673, 0.0205351 },
{ 0.0000000, 0.0000000, 1.2118128 },
{ 1.3459433f, -0.2556075f, -0.0511118f},
{-0.5445989f, 1.5081673f, 0.0205351f},
{ 0.0000000f, 0.0000000f, 1.2118128f},
};

float XYZ[3];
dt_Lab_to_XYZ(Lab, XYZ);
rgb[0] = rgb[1] = rgb[2] = 0.0f;
for(int r = 0; r < 3; r++)
for(int c = 0; c < 3; c++) rgb[r] += xyz_to_rgb[r][c] * XYZ[c];
}

static inline void dt_prophotorgb_to_Lab(const float *const rgb, float *Lab)
static inline void dt_prophotorgb_to_XYZ(const float *const rgb, float *XYZ)
{
const float rgb_to_xyz[3][3] = {
// prophoto rgb
{ 0.7976749, 0.1351917, 0.0313534 },
{ 0.2880402, 0.7118741, 0.0000857 },
{ 0.0000000, 0.0000000, 0.8252100 },
{0.7976749f, 0.1351917f, 0.0313534f},
{0.2880402f, 0.7118741f, 0.0000857f},
{0.0000000f, 0.0000000f, 0.8252100f},
};
float XYZ[3] = { 0.0f };
XYZ[0] = XYZ[1] = XYZ[2] = 0.0f;
for(int r = 0; r < 3; r++)
for(int c = 0; c < 3; c++) XYZ[r] += rgb_to_xyz[r][c] * rgb[c];
}


static inline void dt_Lab_to_prophotorgb(const float *const Lab, float *rgb)
{
float XYZ[3] = { 0.0f };
dt_Lab_to_XYZ(Lab, XYZ);
dt_XYZ_to_prophotorgb(XYZ, rgb);
}

static inline void dt_prophotorgb_to_Lab(const float *const rgb, float *Lab)
{
float XYZ[3] = { 0.0f };
dt_prophotorgb_to_XYZ(rgb, XYZ);
dt_XYZ_to_Lab(XYZ, Lab);
}

Expand Down