darktable-org · TurboGit · Oct 16, 2018 · Oct 13, 2018 · Oct 13, 2018 · Oct 13, 2018
diff --git a/data/kernels/colorspace.cl b/data/kernels/colorspace.cl
@@ -72,9 +72,9 @@ float lab_f(float x)
 float4 XYZ_to_Lab(float4 xyz)
 {
   float4 lab;
-
-  xyz.x *= (1.0f/0.9642f);
-  xyz.z *= (1.0f/0.8249f);
+  const float4 d50 = (float4)(0.9642f, 1.0f, 0.8249f, 1.0f);
+
+  xyz /= d50;
   xyz.x = lab_f(xyz.x);
   xyz.y = lab_f(xyz.y);
   xyz.z = lab_f(xyz.z);
@@ -106,14 +106,33 @@ float4 Lab_to_XYZ(float4 Lab)
   return XYZ;
 }
 
-float4 Lab_to_prophotorgb(float4 Lab)
+float4 prophotorgb_to_XYZ(float4 rgb)
+{
+  const float rgb_to_xyz[3][3] = { // prophoto rgb
+    {0.7976749f, 0.1351917f, 0.0313534f},
+    {0.2880402f, 0.7118741f, 0.0000857f},
+    {0.0000000f, 0.0000000f, 0.8252100f},
+  };
+  float4 XYZ = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
+  XYZ.x += rgb_to_xyz[0][0] * rgb.x;
+  XYZ.x += rgb_to_xyz[0][1] * rgb.y;
+  XYZ.x += rgb_to_xyz[0][2] * rgb.z;
+  XYZ.y += rgb_to_xyz[1][0] * rgb.x;
+  XYZ.y += rgb_to_xyz[1][1] * rgb.y;
+  XYZ.y += rgb_to_xyz[1][2] * rgb.z;
+  XYZ.z += rgb_to_xyz[2][0] * rgb.x;
+  XYZ.z += rgb_to_xyz[2][1] * rgb.y;
+  XYZ.z += rgb_to_xyz[2][2] * rgb.z;
+  return XYZ;
+}
+
+float4 XYZ_to_prophotorgb(float4 XYZ)
 {
   const float xyz_to_rgb[3][3] = { // prophoto rgb d50
     { 1.3459433f, -0.2556075f, -0.0511118f},
     {-0.5445989f,  1.5081673f,  0.0205351f},
     { 0.0000000f,  0.0000000f,  1.2118128f},
   };
-  float4 XYZ = Lab_to_XYZ(Lab);
   float4 rgb = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
   rgb.x += xyz_to_rgb[0][0] * XYZ.x;
   rgb.x += xyz_to_rgb[0][1] * XYZ.y;
@@ -127,23 +146,15 @@ float4 Lab_to_prophotorgb(float4 Lab)
   return rgb;
 }
 
+float4 Lab_to_prophotorgb(float4 Lab)
+{
+  float4 XYZ = Lab_to_XYZ(Lab);
+  return XYZ_to_prophotorgb(XYZ);
+}
+
 float4 prophotorgb_to_Lab(float4 rgb)
 {
-  const float rgb_to_xyz[3][3] = { // prophoto rgb
-    {0.7976749f, 0.1351917f, 0.0313534f},
-    {0.2880402f, 0.7118741f, 0.0000857f},
-    {0.0000000f, 0.0000000f, 0.8252100f},
-  };
-  float4 XYZ = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
-  XYZ.x += rgb_to_xyz[0][0] * rgb.x;
-  XYZ.x += rgb_to_xyz[0][1] * rgb.y;
-  XYZ.x += rgb_to_xyz[0][2] * rgb.z;
-  XYZ.y += rgb_to_xyz[1][0] * rgb.x;
-  XYZ.y += rgb_to_xyz[1][1] * rgb.y;
-  XYZ.y += rgb_to_xyz[1][2] * rgb.z;
-  XYZ.z += rgb_to_xyz[2][0] * rgb.x;
-  XYZ.z += rgb_to_xyz[2][1] * rgb.y;
-  XYZ.z += rgb_to_xyz[2][2] * rgb.z;
+  float4 XYZ = prophotorgb_to_XYZ(rgb);
   return XYZ_to_Lab(XYZ);
 }
 

diff --git a/data/kernels/extended.cl b/data/kernels/extended.cl
@@ -580,26 +580,66 @@ colormapping_mapping (read_only image2d_t in, read_only image2d_t tmp, write_onl
 /* kernel for the colorbalance module */
 kernel void
 colorbalance (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
-              const float4 lift, const float4 gain, const float4 gamma_inv)
+              const float4 lift, const float4 gain, const float4 gamma_inv, const float saturation, const float contrast, const float grey)
 {
   const int x = get_global_id(0);
   const int y = get_global_id(1);
 
   if(x >= width || y >= height) return;
 
-  float4 Lab = read_imagef(in, sampleri, (int2)(x, y));
+  const float4 Lab = read_imagef(in, sampleri, (int2)(x, y));
+  const float4 XYZ = Lab_to_XYZ(Lab);
+  float4 sRGB = XYZ_to_sRGB(XYZ);
+  const float4 luma = XYZ.y;
+  const float4 saturation4 = saturation;
+  const float4 contrast4 = contrast;
+  const float4 grey4 = grey;
 
-  float4 sRGB = XYZ_to_sRGB(Lab_to_XYZ(Lab));
+ // saturation
+  sRGB = luma + saturation4 * (sRGB - luma);
 
+  // Lift gamma gain
   sRGB = (sRGB <= (float4)0.0031308f) ? 12.92f * sRGB : (1.0f + 0.055f) * pow(sRGB, (float4)1.0f/2.4f) - (float4)0.055f;
-
   sRGB = pow(fmax(((sRGB - (float4)1.0f) * lift + (float4)1.0f) * gain, (float4)0.0f), gamma_inv);
-
   sRGB = (sRGB <= (float4)0.04045f) ? sRGB / 12.92f : pow((sRGB + (float4)0.055f) / (1.0f + 0.055f), (float4)2.4f);
+
+  // fulcrum contrast
+  sRGB = pow(fmax(sRGB, (float4)0.0f) / grey4, contrast4) * grey4;
+
+  sRGB = XYZ_to_Lab(sRGB_to_XYZ(sRGB));
+
+  write_imagef (out, (int2)(x, y), sRGB);
+}
+
+kernel void
+colorbalance_cdl (read_only image2d_t in, write_only image2d_t out, const int width, const int height,
+              const float4 lift, const float4 gain, const float4 gamma_inv, const float saturation, const float contrast, const float grey)
+{
+  const int x = get_global_id(0);
+  const int y = get_global_id(1);
+
+  if(x >= width || y >= height) return;
+
+  const float4 Lab = read_imagef(in, sampleri, (int2)(x, y));
+  const float4 XYZ = Lab_to_XYZ(Lab);
+  float4 RGB = XYZ_to_prophotorgb(XYZ);
+  const float4 luma = XYZ.y;
+  const float4 saturation4 = saturation;
+  const float4 contrast4 = contrast;
+  const float4 grey4 = grey;
+
+  // saturation
+  RGB = luma + saturation4 * (RGB - luma);
+
+  // lift power slope
+  RGB = pow(fmax((RGB * gain + lift), (float4)0.0f), gamma_inv);
+
+  // fulcrum contrast
+  RGB = pow(fmax(RGB, (float4)0.0f) / grey4, contrast4) * grey4;
 
-  Lab.xyz = XYZ_to_Lab(sRGB_to_XYZ(sRGB)).xyz;
+  RGB = prophotorgb_to_Lab(RGB);
 
-  write_imagef (out, (int2)(x, y), Lab);
+  write_imagef (out, (int2)(x, y), RGB);
 }
 
 /* helpers and kernel for the colorchecker module */

diff --git a/src/common/colorspaces_inline_conversions.h b/src/common/colorspaces_inline_conversions.h
@@ -31,9 +31,9 @@ static inline __m128 lab_f_inv_m(const __m128 x)
   const __m128 kappa_rcp_x116 = _mm_set1_ps(116.0f * 27.0f / 24389.0f);
 
   // x > epsilon
-  const __m128 res_big = _mm_mul_ps(_mm_mul_ps(x, x), x);
+  const __m128 res_big = x * x * x;
   // x <= epsilon
-  const __m128 res_small = _mm_sub_ps(_mm_mul_ps(kappa_rcp_x116, x), kappa_rcp_x16);
+  const __m128 res_small = kappa_rcp_x116 * x - kappa_rcp_x16;
 
   // blend results according to whether each component is > epsilon or not
   const __m128 mask = _mm_cmpgt_ps(x, epsilon);
@@ -49,10 +49,9 @@ static inline __m128 dt_Lab_to_XYZ_sse2(const __m128 Lab)
 
   // last component ins shuffle taken from 1st component of Lab to make sure it is not nan, so it will become
   // 0.0f in f
-  const __m128 f = _mm_mul_ps(_mm_shuffle_ps(Lab, Lab, _MM_SHUFFLE(0, 2, 0, 1)), coef);
+  const __m128 f = _mm_shuffle_ps(Lab, Lab, _MM_SHUFFLE(0, 2, 0, 1)) * coef;
 
-  return _mm_mul_ps(d50,
-                    lab_f_inv_m(_mm_add_ps(_mm_add_ps(f, _mm_shuffle_ps(f, f, _MM_SHUFFLE(1, 1, 3, 1))), offset)));
+  return d50 * lab_f_inv_m(f + _mm_shuffle_ps(f, f, _MM_SHUFFLE(1, 1, 3, 1)) + offset);
 }
 
 static inline __m128 lab_f_m_sse2(const __m128 x)
@@ -65,12 +64,11 @@ static inline __m128 lab_f_m_sse2(const __m128 x)
   const __m128 a = _mm_castsi128_ps(
       _mm_add_epi32(_mm_cvtps_epi32(_mm_div_ps(_mm_cvtepi32_ps(_mm_castps_si128(x)), _mm_set1_ps(3.0f))),
                     _mm_set1_epi32(709921077)));
-  const __m128 a3 = _mm_mul_ps(_mm_mul_ps(a, a), a);
-  const __m128 res_big
-      = _mm_div_ps(_mm_mul_ps(a, _mm_add_ps(a3, _mm_add_ps(x, x))), _mm_add_ps(_mm_add_ps(a3, a3), x));
+  const __m128 a3 = a * a * a;
+  const __m128 res_big = a * (a3 + x + x) / (a3 + a3 + x);
 
   // calculate as if x <= epsilon : result = (kappa*x+16)/116
-  const __m128 res_small = _mm_div_ps(_mm_add_ps(_mm_mul_ps(kappa, x), _mm_set1_ps(16.0f)), _mm_set1_ps(116.0f));
+  const __m128 res_small = (kappa * x + _mm_set1_ps(16.0f)) / _mm_set1_ps(116.0f);
 
   // blend results according to whether each component is > epsilon or not
   const __m128 mask = _mm_cmpgt_ps(x, epsilon);
@@ -80,12 +78,11 @@ static inline __m128 lab_f_m_sse2(const __m128 x)
 /** uses D50 white point. */
 static inline __m128 dt_XYZ_to_Lab_sse2(const __m128 XYZ)
 {
-  const __m128 d50_inv = _mm_set_ps(0.0f, 1.0f / 0.8249f, 1.0f, 1.0f / 0.9642f);
+  const __m128 d50_inv = _mm_set_ps(1.0f, 0.8249f, 1.0f, 0.9642f);
   const __m128 coef = _mm_set_ps(0.0f, 200.0f, 500.0f, 116.0f);
-  const __m128 f = lab_f_m_sse2(_mm_mul_ps(XYZ, d50_inv));
+  const __m128 f = lab_f_m_sse2(XYZ / d50_inv);
   // because d50_inv.z is 0.0f, lab_f(0) == 16/116, so Lab[0] = 116*f[0] - 16 equal to 116*(f[0]-f[3])
-  return _mm_mul_ps(coef, _mm_sub_ps(_mm_shuffle_ps(f, f, _MM_SHUFFLE(3, 1, 0, 1)),
-                                     _mm_shuffle_ps(f, f, _MM_SHUFFLE(3, 2, 1, 3))));
+  return coef * (_mm_shuffle_ps(f, f, _MM_SHUFFLE(3, 1, 0, 1)) - _mm_shuffle_ps(f, f, _MM_SHUFFLE(3, 2, 1, 3)));
 }
 
 /** uses D50 white point. */
@@ -98,14 +95,14 @@ static inline __m128 dt_XYZ_to_sRGB_sse2(__m128 XYZ)
   const __m128 xyz_to_srgb_2 = _mm_setr_ps(-0.4906146f, 0.0334540f, 1.4052427f, 0.0f);
 
   __m128 rgb
-      = _mm_add_ps(_mm_mul_ps(xyz_to_srgb_0, _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(0, 0, 0, 0))),
-                   _mm_add_ps(_mm_mul_ps(xyz_to_srgb_1, _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(1, 1, 1, 1))),
-                              _mm_mul_ps(xyz_to_srgb_2, _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(2, 2, 2, 2)))));
+      = xyz_to_srgb_0 * _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(0, 0, 0, 0)) +
+        xyz_to_srgb_1 * _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(1, 1, 1, 1)) +
+        xyz_to_srgb_2 * _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(2, 2, 2, 2));
 
   // linear sRGB -> gamma corrected sRGB
   __m128 mask = _mm_cmple_ps(rgb, _mm_set1_ps(0.0031308));
-  __m128 rgb0 = _mm_mul_ps(_mm_set1_ps(12.92), rgb);
-  __m128 rgb1 = _mm_sub_ps(_mm_mul_ps(_mm_set1_ps(1.0 + 0.055), _mm_pow_ps1(rgb, 1.0 / 2.4)), _mm_set1_ps(0.055));
+  __m128 rgb0 = _mm_set1_ps(12.92) * rgb;
+  __m128 rgb1 = _mm_set1_ps(1.0 + 0.055) * _mm_pow_ps1(rgb, 1.0 / 2.4) - _mm_set1_ps(0.055);
   return _mm_or_ps(_mm_and_ps(mask, rgb0), _mm_andnot_ps(mask, rgb1));
 }
 
@@ -123,9 +120,41 @@ static inline __m128 dt_sRGB_to_XYZ_sse2(__m128 rgb)
   rgb = _mm_or_ps(_mm_and_ps(mask, rgb0), _mm_andnot_ps(mask, rgb1));
 
   __m128 XYZ
-      = _mm_add_ps(_mm_mul_ps(srgb_to_xyz_0, _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(0, 0, 0, 0))),
-                   _mm_add_ps(_mm_mul_ps(srgb_to_xyz_1, _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(1, 1, 1, 1))),
-                              _mm_mul_ps(srgb_to_xyz_2, _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(2, 2, 2, 2)))));
+      = srgb_to_xyz_0 * _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(0, 0, 0, 0)) + 
+        srgb_to_xyz_1 * _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(1, 1, 1, 1)) +
+        srgb_to_xyz_2 * _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(2, 2, 2, 2));
+  return XYZ;
+}
+
+/** uses D50 white point. */
+// see http://www.brucelindbloom.com/Eqn_RGB_XYZ_Matrix.html for the transformation matrices
+static inline __m128 dt_XYZ_to_prophotoRGB_sse2(__m128 XYZ)
+{
+  // XYZ -> prophotoRGB matrix, D50
+  const __m128 xyz_to_rgb_0 = _mm_setr_ps( 1.3459433f, -0.5445989f, 0.0000000f, 0.0f);
+  const __m128 xyz_to_rgb_1 = _mm_setr_ps(-0.2556075f,  1.5081673f, 0.0000000f, 0.0f);
+  const __m128 xyz_to_rgb_2 = _mm_setr_ps(-0.0511118f,  0.0205351f,  1.2118128f, 0.0f);
+
+  __m128 rgb
+      = xyz_to_rgb_0 * _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(0, 0, 0, 0)) +
+        xyz_to_rgb_1 * _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(1, 1, 1, 1)) +
+        xyz_to_rgb_2 * _mm_shuffle_ps(XYZ, XYZ, _MM_SHUFFLE(2, 2, 2, 2));
+  return rgb;
+}
+
+/** uses D50 white point. */
+// see http://www.brucelindbloom.com/Eqn_RGB_XYZ_Matrix.html for the transformation matrices
+static inline __m128 dt_prophotoRGB_to_XYZ_sse2(__m128 rgb)
+{
+  // prophotoRGB -> XYZ matrix, D50
+  const __m128 rgb_to_xyz_0 = _mm_setr_ps(0.7976749f, 0.2880402f, 0.0000000f, 0.0f);
+  const __m128 rgb_to_xyz_1 = _mm_setr_ps(0.1351917f, 0.7118741f, 0.0000000f, 0.0f);
+  const __m128 rgb_to_xyz_2 = _mm_setr_ps(0.0313534f, 0.0000857f, 0.8252100f, 0.0f);
+
+  __m128 XYZ
+      = rgb_to_xyz_0 * _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(0, 0, 0, 0)) +
+        rgb_to_xyz_1 * _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(1, 1, 1, 1)) +
+        rgb_to_xyz_2 * _mm_shuffle_ps(rgb, rgb, _MM_SHUFFLE(2, 2, 2, 2));
   return XYZ;
 }
 #endif
@@ -161,16 +190,16 @@ static inline float lab_f(const float x)
 /** uses D50 white point. */
 static inline void dt_XYZ_to_Lab(const float *XYZ, float *Lab)
 {
-  const float d50[3] = { 0.9642, 1.0, 0.8249 };
-  const float f[3] = { lab_f(XYZ[0] / d50[0]), lab_f(XYZ[1] / d50[1]), lab_f(XYZ[2] / d50[2]) };
+  const float d50[3] = { 0.9642f, 1.0f, 0.8249f };
+  const float f[3] = { lab_f(XYZ[0] / d50[0]), lab_f(XYZ[1]), lab_f(XYZ[2] / d50[2]) };
   Lab[0] = 116.0f * f[1] - 16.0f;
   Lab[1] = 500.0f * (f[0] - f[1]);
   Lab[2] = 200.0f * (f[1] - f[2]);
 }
 
 static inline float lab_f_inv(const float x)
 {
-  const float epsilon = 0.20689655172413796; // cbrtf(216.0f/24389.0f);
+  const float epsilon = 0.20689655172413796f; // cbrtf(216.0f/24389.0f);
   const float kappa = 24389.0f / 27.0f;
   if(x > epsilon)
     return x * x * x;
@@ -181,12 +210,12 @@ static inline float lab_f_inv(const float x)
 /** uses D50 white point. */
 static inline void dt_Lab_to_XYZ(const float *Lab, float *XYZ)
 {
-  const float d50[3] = { 0.9642, 1.0, 0.8249 };
+  const float d50[3] = { 0.96422f, 1.0f, 0.8249f };
   const float fy = (Lab[0] + 16.0f) / 116.0f;
   const float fx = Lab[1] / 500.0f + fy;
   const float fz = fy - Lab[2] / 200.0f;
   XYZ[0] = d50[0] * lab_f_inv(fx);
-  XYZ[1] = d50[1] * lab_f_inv(fy);
+  XYZ[1] = lab_f_inv(fy);
   XYZ[2] = d50[2] * lab_f_inv(fz);
 }
 
@@ -234,33 +263,44 @@ static inline void dt_sRGB_to_XYZ(const float *const sRGB, float *XYZ)
     for(int c = 0; c < 3; c++) XYZ[r] += srgb_to_xyz[r][c] * rgb[c];
 }
 
-static inline void dt_Lab_to_prophotorgb(const float *const Lab, float *rgb)
+static inline void dt_XYZ_to_prophotorgb(const float *const XYZ, float *rgb)
 {
   const float xyz_to_rgb[3][3] = {
     // prophoto rgb d50
-    { 1.3459433, -0.2556075, -0.0511118 },
-    { -0.5445989, 1.5081673, 0.0205351 },
-    { 0.0000000, 0.0000000, 1.2118128 },
+    { 1.3459433f, -0.2556075f, -0.0511118f},
+    {-0.5445989f,  1.5081673f,  0.0205351f},
+    { 0.0000000f,  0.0000000f,  1.2118128f},
   };
-
-  float XYZ[3];
-  dt_Lab_to_XYZ(Lab, XYZ);
   rgb[0] = rgb[1] = rgb[2] = 0.0f;
   for(int r = 0; r < 3; r++)
     for(int c = 0; c < 3; c++) rgb[r] += xyz_to_rgb[r][c] * XYZ[c];
 }
 
-static inline void dt_prophotorgb_to_Lab(const float *const rgb, float *Lab)
+static inline void dt_prophotorgb_to_XYZ(const float *const rgb, float *XYZ)
 {
   const float rgb_to_xyz[3][3] = {
     // prophoto rgb
-    { 0.7976749, 0.1351917, 0.0313534 },
-    { 0.2880402, 0.7118741, 0.0000857 },
-    { 0.0000000, 0.0000000, 0.8252100 },
+    {0.7976749f, 0.1351917f, 0.0313534f},
+    {0.2880402f, 0.7118741f, 0.0000857f},
+    {0.0000000f, 0.0000000f, 0.8252100f},
   };
-  float XYZ[3] = { 0.0f };
+  XYZ[0] = XYZ[1] = XYZ[2] = 0.0f;
   for(int r = 0; r < 3; r++)
     for(int c = 0; c < 3; c++) XYZ[r] += rgb_to_xyz[r][c] * rgb[c];
+}
+
+
+static inline void dt_Lab_to_prophotorgb(const float *const Lab, float *rgb)
+{
+  float XYZ[3] = { 0.0f };
+  dt_Lab_to_XYZ(Lab, XYZ);
+  dt_XYZ_to_prophotorgb(XYZ, rgb);
+}
+
+static inline void dt_prophotorgb_to_Lab(const float *const rgb, float *Lab)
+{
+  float XYZ[3] = { 0.0f };
+  dt_prophotorgb_to_XYZ(rgb, XYZ);
   dt_XYZ_to_Lab(XYZ, Lab);
 }