diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e22031e..84750a21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ -## 0.7.4-dev0 +## 0.7.4-dev1 +* Fixed bug when PDFMiner predicts that an image text occupies the full page and removes annotations by Chipper. +* Added random seed to Chipper text generation to avoid differences between calls to Chipper. * Allows user to use super-gradients model if they have a callback predict function, a yaml file with names field corresponding to classes and a path to the model weights ## 0.7.3 diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index b22b05c4..58c8ef56 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "0.7.4-dev0" # pragma: no cover +__version__ = "0.7.4-dev1" # pragma: no cover diff --git a/unstructured_inference/inference/layoutelement.py b/unstructured_inference/inference/layoutelement.py index 7f7df14a..a602faf3 100644 --- a/unstructured_inference/inference/layoutelement.py +++ b/unstructured_inference/inference/layoutelement.py @@ -125,6 +125,9 @@ def merge_inferred_layout_with_extracted_layout( continue region_matched = False for inferred_region in inferred_layout: + if inferred_region.source in (Source.CHIPPER, Source.CHIPPERV1): + continue + if inferred_region.bbox.intersects(extracted_region.bbox): same_bbox = region_bounding_boxes_are_almost_the_same( inferred_region.bbox, diff --git a/unstructured_inference/models/chipper.py b/unstructured_inference/models/chipper.py index 315b7c57..40df874e 100644 --- a/unstructured_inference/models/chipper.py +++ b/unstructured_inference/models/chipper.py @@ -5,6 +5,7 @@ import cv2 import numpy as np import torch +import transformers from huggingface_hub import hf_hub_download from PIL.Image import Image from transformers import DonutProcessor, VisionEncoderDecoderModel @@ -134,6 +135,7 @@ def predict_tokens( image: Image, ) -> Tuple[List[int], Sequence[Sequence[torch.Tensor]]]: """Predict tokens from image.""" + transformers.set_seed(42) with torch.no_grad(): outputs = self.model.generate( self.processor(