From 106745f8382424d4f1083b3297d0004737cb762b Mon Sep 17 00:00:00 2001 From: Alan Bertl Date: Wed, 11 Oct 2023 18:06:17 -0500 Subject: [PATCH 1/8] chipper should point to chipper latest version --- unstructured_inference/models/chipper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unstructured_inference/models/chipper.py b/unstructured_inference/models/chipper.py index 3a1d04ea..d5a4b24f 100644 --- a/unstructured_inference/models/chipper.py +++ b/unstructured_inference/models/chipper.py @@ -18,7 +18,7 @@ from unstructured_inference.utils import LazyDict MODEL_TYPES: Dict[Optional[str], Union[LazyDict, dict]] = { - "chipper": { + "chipperv1": { "pre_trained_model_repo": "unstructuredio/ved-fine-tuning", "swap_head": False, "start_token_prefix": " Date: Wed, 11 Oct 2023 18:06:28 -0500 Subject: [PATCH 2/8] update sources --- unstructured_inference/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_inference/constants.py b/unstructured_inference/constants.py index 3585b077..f6cc7c73 100644 --- a/unstructured_inference/constants.py +++ b/unstructured_inference/constants.py @@ -11,7 +11,7 @@ class Source(Enum): DETECTRON2_ONNX = "detectron2_onnx" DETECTRON2_LP = "detectron2_lp" CHIPPER = "chipper" - CHIPPERV2 = "chipperv2" + CHIPPERV1 = "chipperv1" PDFMINER = "pdfminer" MERGED = "merged" From 6a72e5dba998fa95275e3b82069e6ab6327bf595 Mon Sep 17 00:00:00 2001 From: Alan Bertl Date: Wed, 11 Oct 2023 18:06:36 -0500 Subject: [PATCH 3/8] update test --- test_unstructured_inference/models/test_chippermodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_unstructured_inference/models/test_chippermodel.py b/test_unstructured_inference/models/test_chippermodel.py index 17840d03..a0b1a6c1 100644 --- a/test_unstructured_inference/models/test_chippermodel.py +++ b/test_unstructured_inference/models/test_chippermodel.py @@ -237,7 +237,7 @@ def test_postprocess_bbox(decoded_str, expected_classes): def test_run_chipper_v2(): - model = get_model("chipperv2") + model = get_model("chipper") img = Image.open("sample-docs/easy_table.jpg") elements = model(img) tables = [el for el in elements if el.type == "Table"] From 256a0a7a38498fb98245c244a6205671c9cc79b7 Mon Sep 17 00:00:00 2001 From: Alan Bertl Date: Wed, 11 Oct 2023 18:07:33 -0500 Subject: [PATCH 4/8] Update sources --- unstructured_inference/models/chipper.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unstructured_inference/models/chipper.py b/unstructured_inference/models/chipper.py index d5a4b24f..f86bfdaf 100644 --- a/unstructured_inference/models/chipper.py +++ b/unstructured_inference/models/chipper.py @@ -26,7 +26,7 @@ "max_length": 1200, "heatmap_h": 52, "heatmap_w": 39, - "source": Source.CHIPPER, + "source": Source.CHIPPERV1, }, "chipper": { "pre_trained_model_repo": "unstructuredio/chipper-fast-fine-tuning", @@ -37,7 +37,7 @@ "max_length": 1536, "heatmap_h": 40, "heatmap_w": 30, - "source": Source.CHIPPERV2, + "source": Source.CHIPPER, }, } @@ -309,7 +309,7 @@ def deduplicate_detected_elements( min_text_size: int = 15, ) -> List[LayoutElement]: """For chipper, remove elements from other sources.""" - return [el for el in elements if el.source in (Source.CHIPPER, Source.CHIPPERV2)] + return [el for el in elements if el.source in (Source.CHIPPER, Source.CHIPPERV1)] def adjust_bbox(self, bbox, x_offset, y_offset, ratio, target_size): """Translate bbox by (x_offset, y_offset) and shrink by ratio.""" From da8b6e76e70193702c550e3027074169ea54a542 Mon Sep 17 00:00:00 2001 From: Alan Bertl Date: Wed, 11 Oct 2023 18:10:16 -0500 Subject: [PATCH 5/8] update changelog --- CHANGELOG.md | 2 +- unstructured_inference/__version__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e054dfee..cdef1f4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.7.3-dev2 +## 0.7.3-dev3 * Integration of Chipperv2 and additional Chipper functionality, which includes automatic detection of GPU, bounding box prediction and hierarchical representation. diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index 7e5ecedb..63e5d507 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "0.7.3-dev2" # pragma: no cover +__version__ = "0.7.3-dev3" # pragma: no cover From 749c365feba7fade6d753af69d0a60be0d5bb2bb Mon Sep 17 00:00:00 2001 From: Alan Bertl Date: Wed, 11 Oct 2023 18:24:00 -0500 Subject: [PATCH 6/8] 2 references to latest version --- unstructured_inference/models/chipper.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/unstructured_inference/models/chipper.py b/unstructured_inference/models/chipper.py index f86bfdaf..315b7c57 100644 --- a/unstructured_inference/models/chipper.py +++ b/unstructured_inference/models/chipper.py @@ -28,7 +28,7 @@ "heatmap_w": 39, "source": Source.CHIPPERV1, }, - "chipper": { + "chipperv2": { "pre_trained_model_repo": "unstructuredio/chipper-fast-fine-tuning", "swap_head": True, "swap_head_hidden_layer_size": 128, @@ -41,6 +41,8 @@ }, } +MODEL_TYPES["chipper"] = MODEL_TYPES["chipperv2"] + class UnstructuredChipperModel(UnstructuredElementExtractionModel): def initialize( From 7eff2bd8e072316bb85a329f2e53ce8f21952cf5 Mon Sep 17 00:00:00 2001 From: Alan Bertl Date: Wed, 11 Oct 2023 18:28:11 -0500 Subject: [PATCH 7/8] Add chipperv2 constant --- unstructured_inference/constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/unstructured_inference/constants.py b/unstructured_inference/constants.py index f6cc7c73..e13e64d1 100644 --- a/unstructured_inference/constants.py +++ b/unstructured_inference/constants.py @@ -12,6 +12,7 @@ class Source(Enum): DETECTRON2_LP = "detectron2_lp" CHIPPER = "chipper" CHIPPERV1 = "chipperv1" + CHIPPERV2 = "chipperv2" PDFMINER = "pdfminer" MERGED = "merged" From 659bc79d75a34fb95d83d471bb6b8bd814d8b7ea Mon Sep 17 00:00:00 2001 From: Alan Bertl Date: Wed, 11 Oct 2023 18:32:15 -0500 Subject: [PATCH 8/8] release version --- CHANGELOG.md | 2 +- unstructured_inference/__version__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cdef1f4d..b10a7d88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.7.3-dev3 +## 0.7.3 * Integration of Chipperv2 and additional Chipper functionality, which includes automatic detection of GPU, bounding box prediction and hierarchical representation. diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index 63e5d507..26d31cde 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "0.7.3-dev3" # pragma: no cover +__version__ = "0.7.3" # pragma: no cover