Skip to content
This repository has been archived by the owner on Feb 16, 2023. It is now read-only.

Commit

Permalink
Merge 1e28810 into 7bc8325
Browse files Browse the repository at this point in the history
  • Loading branch information
muellermartin committed Feb 21, 2022
2 parents 7bc8325 + 1e28810 commit f8b736b
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
17 changes: 17 additions & 0 deletions src/paperless_tesseract/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ def is_image(self, mime_type):
"image/gif",
]

def has_alpha(self, image):
with Image.open(image) as im:
return im.mode in ('RGBA', 'LA')

def get_dpi(self, image):
try:
with Image.open(image) as im:
Expand Down Expand Up @@ -182,6 +186,19 @@ def construct_ocrmypdf_parameters(self,
if self.is_image(mime_type):
dpi = self.get_dpi(input_file)
a4_dpi = self.calculate_a4_dpi(input_file)

if self.has_alpha(input_file):
self.log(
"info",
f"Removing alpha layer from {input_file} "
"for compatibility with img2pdf"
)
with Image.open(input_file) as im:
background = Image.new('RGBA', im.size, (255, 255, 255))
background.alpha_composite(im)
background = background.convert('RGB')
background.save(input_file, format=im.format)

if dpi:
self.log(
"debug",
Expand Down
9 changes: 5 additions & 4 deletions src/paperless_tesseract/tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,13 +181,14 @@ def test_image_simple(self):

self.assertContainsStrings(parser.get_text(), ["This is a test document."])

def test_image_simple_alpha_fail(self):
def test_image_simple_alpha(self):
parser = RasterisedDocumentParser(None)

def f():
parser.parse(os.path.join(self.SAMPLE_FILES, "simple-alpha.png"), "image/png")
parser.parse(os.path.join(self.SAMPLE_FILES, "simple-alpha.png"), "image/png")

self.assertRaises(ParseError, f)
self.assertTrue(os.path.isfile(parser.archive_path))

self.assertContainsStrings(parser.get_text(), ["This is a test document."])

def test_image_calc_a4_dpi(self):
parser = RasterisedDocumentParser(None)
Expand Down

0 comments on commit f8b736b

Please sign in to comment.