From 93ecce52583992e298ab607643c8f20d41aadcbe Mon Sep 17 00:00:00 2001 From: Antonio Jimeno Yepes Date: Tue, 17 Oct 2023 16:32:09 +1100 Subject: [PATCH 1/3] First commit --- unstructured_inference/models/chipper.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/unstructured_inference/models/chipper.py b/unstructured_inference/models/chipper.py index 79fe2029..1d135416 100644 --- a/unstructured_inference/models/chipper.py +++ b/unstructured_inference/models/chipper.py @@ -138,6 +138,7 @@ def predict(self, image) -> List[LayoutElement]: """Do inference using the wrapped model.""" tokens, decoder_cross_attentions = self.predict_tokens(image) elements = self.postprocess(image, tokens, decoder_cross_attentions) + print(elements) return elements def predict_tokens( @@ -146,13 +147,11 @@ def predict_tokens( ) -> Tuple[List[int], Sequence[Sequence[torch.Tensor]]]: """Predict tokens from image.""" transformers.set_seed(42) + print("page") with torch.no_grad(): encoder_outputs = self.model.encoder( self.processor( - np.array( - image, - np.float32, - ), + image, return_tensors="pt", ).pixel_values.to(self.device), ) @@ -177,9 +176,9 @@ def predict_tokens( encoder_outputs=encoder_outputs, input_ids=self.input_ids, logits_processor=self.logits_processor, - do_sample=False, + do_sample=True, no_repeat_ngram_size=0, - num_beams=5, + num_beams=3, return_dict_in_generate=True, output_attentions=True, output_scores=True, @@ -304,7 +303,7 @@ def postprocess( end = i # If exited before eos is achieved - if start != -1 and start < end and len(parents) > 0: + if start != -1 and start <= end and len(parents) > 0: slicing_end = end + 1 string = self.tokenizer.decode(output_ids[start:slicing_end]) From f61a37a5a97bbeaeef85ef50872379d53e91880b Mon Sep 17 00:00:00 2001 From: Antonio Jimeno Yepes Date: Tue, 17 Oct 2023 16:35:33 +1100 Subject: [PATCH 2/3] Removed print --- unstructured_inference/models/chipper.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/unstructured_inference/models/chipper.py b/unstructured_inference/models/chipper.py index 1d135416..8fcd08a3 100644 --- a/unstructured_inference/models/chipper.py +++ b/unstructured_inference/models/chipper.py @@ -138,7 +138,6 @@ def predict(self, image) -> List[LayoutElement]: """Do inference using the wrapped model.""" tokens, decoder_cross_attentions = self.predict_tokens(image) elements = self.postprocess(image, tokens, decoder_cross_attentions) - print(elements) return elements def predict_tokens( @@ -147,7 +146,6 @@ def predict_tokens( ) -> Tuple[List[int], Sequence[Sequence[torch.Tensor]]]: """Predict tokens from image.""" transformers.set_seed(42) - print("page") with torch.no_grad(): encoder_outputs = self.model.encoder( self.processor( From ce15f8e6718d2d7fab632892cdb539796ab3ca42 Mon Sep 17 00:00:00 2001 From: Antonio Jimeno Yepes Date: Tue, 17 Oct 2023 16:41:11 +1100 Subject: [PATCH 3/3] Version bump --- CHANGELOG.md | 6 ++++++ unstructured_inference/__version__.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 16c0f874..eec18d25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## 0.7.7 + +• Fix a memory leak in DonutProcessor when using large images in numpy format +• Set the right settings for beam search size > 1 +• Fix a bug that in very rare cases made the last element predicted by Chipper to have a bbox = None + ## 0.7.6 * fix a bug where invalid zoom factor lead to exceptions; now invalid zoom factors results in no scaling of the image diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index 11322ac5..bd4e32c6 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "0.7.6" # pragma: no cover +__version__ = "0.7.7" # pragma: no cover