LJpegDecompressor: support degenerate raws with width % 2 != 0

This is needed e.g for DNG's converted from Panasonic DMC-LX1 raws. I have no words. What were they (Adobe) thinking. They discard padding columns, that were garbage, and end up with such an odd raw, where the tiles (LJPEG-compressed) still have padding, yet the DNG image sizes specify that it has to be discarded. What is even worse, the image width is not multiple of two, and the LJPEG's components-per-pixel is 2, so we have to very-special-handle this case, to not completely discard that last required block (then we'd be producing nondeterministic garbage in those last few columns), but also not fully decode it, because we don't have anywhere to write it, We could, of course, introduce something like "internal padding columnts", in fact we already pad the rows to be 16-byte aligned, but that padding is used for ASAN overflow detection. So right now i'm not seeing any more reasonable fix. :/ Fixes https://redmine.darktable.org/issues/12239
darktable-org · Jun 20, 2018 · 5575910 · 5575910
1 parent a3bf06c
commit 5575910
Show file tree

Hide file tree

Showing 2 changed files with 72 additions and 28 deletions.
diff --git a/src/librawspeed/decompressors/LJpegDecompressor.cpp b/src/librawspeed/decompressors/LJpegDecompressor.cpp
@@ -95,38 +95,58 @@ void LJpegDecompressor::decodeScan()
   if ((mRaw->getCpp() * (mRaw->dim.x - offX)) < frame.cps)
     ThrowRDE("Got less pixels than the components per sample");
 
-  const auto tilePixelBlocks = mRaw->getCpp() * w;
-  if (tilePixelBlocks % frame.cps != 0) {
-    ThrowRDE("Tile component width (%u) is not multiple of LJpeg CPS (%u)",
-             tilePixelBlocks, frame.cps);
-  }
+  // How many output pixels are we expected to produce, as per DNG tiling?
+  const auto tileRequiredWidth = mRaw->getCpp() * w;
 
-  wBlocks = tilePixelBlocks / frame.cps;
-  if (frame.w < wBlocks || frame.h < h) {
+  // How many full pixel blocks do we need to consume for that?
+  const auto blocksToConsume = roundUpDivision(tileRequiredWidth, frame.cps);
+  if (frame.w < blocksToConsume || frame.h < h) {
     ThrowRDE("LJpeg frame (%u, %u) is smaller than expected (%u, %u)",
-             frame.cps * frame.w, frame.h, tilePixelBlocks, h);
+             frame.cps * frame.w, frame.h, tileRequiredWidth, h);
   }
 
-  switch (frame.cps) {
-  case 2:
-    decodeN<2>();
-    break;
-  case 3:
-    decodeN<3>();
-    break;
-  case 4:
-    decodeN<4>();
-    break;
-  default:
-    ThrowRDE("Unsupported number of components: %u", frame.cps);
+  // How many full pixel blocks will we produce?
+  fullBlocks = tileRequiredWidth / frame.cps; // Truncating division!
+  // Do we need to also produce part of a block?
+  trailingPixels = tileRequiredWidth % frame.cps;
+
+  if (trailingPixels == 0) {
+    switch (frame.cps) {
+    case 2:
+      decodeN<2>();
+      break;
+    case 3:
+      decodeN<3>();
+      break;
+    case 4:
+      decodeN<4>();
+      break;
+    default:
+      ThrowRDE("Unsupported number of components: %u", frame.cps);
+    }
+  } else /* trailingPixels != 0 */ {
+    // FIXME: using different function just for one tile likely causes
+    // i-cache misses and whatnot. Need to check how not splitting it into
+    // two different functions affects performance of the normal case.
+    switch (frame.cps) {
+    case 2:
+      decodeN<2, /*WeirdWidth=*/true>();
+      break;
+    case 3:
+      decodeN<3, /*WeirdWidth=*/true>();
+      break;
+    case 4:
+      decodeN<4, /*WeirdWidth=*/true>();
+      break;
+    default:
+      ThrowRDE("Unsupported number of components: %u", frame.cps);
+    }
   }
 }
 
 // N_COMP == number of components (2, 3 or 4)
 
-template <int N_COMP>
-void LJpegDecompressor::decodeN()
-{
+template <int N_COMP, bool WeirdWidth> void LJpegDecompressor::decodeN() {
   assert(mRaw->getCpp() > 0);
   assert(N_COMP > 0);
   assert(N_COMP >= mRaw->getCpp());
@@ -161,14 +181,37 @@ void LJpegDecompressor::decodeN()
     // the predictor for the next line is the start of this line
     predNext = dest;
 
-    // For x, we first process all pixels within the image buffer ...
-    for (unsigned x = 0; x < wBlocks; ++x) {
+    unsigned x = 0;
+
+    // For x, we first process all full pixel blocks within the image buffer ...
+    for (; x < fullBlocks; ++x) {
       unroll_loop<N_COMP>([&](int i) {
         *dest++ = pred[i] += ht[i]->decodeNext(bitStream);
       });
     }
+
+    // Sometimes we also need to consume one more block, and produce part of it.
+    if /*constexpr*/ (WeirdWidth) {
+      // FIXME: evaluate i-cache implications due to this being compile-time.
+      static_assert(N_COMP > 1, "can't want part of 1-pixel-wide block");
+      // Some rather esoteric DNG's have odd dimensions, e.g. width % 2 = 1.
+      // We may end up needing just part of last N_COMP pixels.
+      assert(trailingPixels > 0);
+      assert(trailingPixels < N_COMP);
+      unsigned c = 0;
+      for (; c < trailingPixels; ++c) {
+        *dest++ = pred[c] += ht[c]->decodeNext(bitStream);
+      }
+      // Discard the rest of the block.
+      assert(c < N_COMP);
+      for (; c < N_COMP; ++c) {
+        ht[c]->decodeNext(bitStream);
+      }
+      ++x; // We did just process one more block.
+    }
+
     // ... and discard the rest.
-    for (unsigned x = wBlocks; x < frame.w; ++x) {
+    for (; x < frame.w; ++x) {
       unroll_loop<N_COMP>([&](int i) {
         ht[i]->decodeNext(bitStream);
       });

diff --git a/src/librawspeed/decompressors/LJpegDecompressor.h b/src/librawspeed/decompressors/LJpegDecompressor.h
@@ -33,14 +33,15 @@ class RawImage;
 class LJpegDecompressor final : public AbstractLJpegDecompressor
 {
   void decodeScan() override;
-  template<int N_COMP> void decodeN();
+  template <int N_COMP, bool WeirdWidth = false> void decodeN();
 
   uint32 offX = 0;
   uint32 offY = 0;
   uint32 w = 0;
   uint32 h = 0;
 
-  uint32 wBlocks = 0;
+  uint32 fullBlocks = 0;
+  uint32 trailingPixels = 0;
 
 public:
   LJpegDecompressor(const ByteStream& bs, const RawImage& img);