From ba7f59307f147f3ffde729f960c7f3f5a15db4b7 Mon Sep 17 00:00:00 2001 From: Merlijn Wajer Date: Sat, 7 May 2022 16:47:21 +0200 Subject: [PATCH] recode: strip transparency from images in MRC I don't think we have a way to support transparency in the PDF anyway, as we already overlay images with a mask, so just strip the transparency. Alternatively we could hide this behind an option flag and error unless the flag --flatten-transparent-images (or so) is passed. Resolves issue #45 ( https://github.com/internetarchive/archive-pdf-tools/issues/45 ) --- internetarchivepdf/recode.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internetarchivepdf/recode.py b/internetarchivepdf/recode.py index c82c508..2b20896 100644 --- a/internetarchivepdf/recode.py +++ b/internetarchivepdf/recode.py @@ -346,6 +346,12 @@ def insert_images_mrc(to_pdf, hocr_file, from_pdf=None, image_files=None, image = Image.open(imgfile) image.load() + if image.mode in ('RGBA', 'LA'): + if image.mode == 'RGBA': + image = image.convert('RGB') + elif image.mode == 'LA': + image = image.convert('L') + if timing_data is not None: timing_data.append(('image_load', time()-t))