Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## 0.7.3-dev2
## 0.7.3

* Integration of Chipperv2 and additional Chipper functionality, which includes automatic detection of GPU,
bounding box prediction and hierarchical representation.
Expand Down
2 changes: 1 addition & 1 deletion test_unstructured_inference/models/test_chippermodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def test_postprocess_bbox(decoded_str, expected_classes):


def test_run_chipper_v2():
model = get_model("chipperv2")
model = get_model("chipper")
img = Image.open("sample-docs/easy_table.jpg")
elements = model(img)
tables = [el for el in elements if el.type == "Table"]
Expand Down
2 changes: 1 addition & 1 deletion unstructured_inference/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.7.3-dev2" # pragma: no cover
__version__ = "0.7.3" # pragma: no cover
1 change: 1 addition & 0 deletions unstructured_inference/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class Source(Enum):
DETECTRON2_ONNX = "detectron2_onnx"
DETECTRON2_LP = "detectron2_lp"
CHIPPER = "chipper"
CHIPPERV1 = "chipperv1"
CHIPPERV2 = "chipperv2"
PDFMINER = "pdfminer"
MERGED = "merged"
Expand Down
10 changes: 6 additions & 4 deletions unstructured_inference/models/chipper.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@
from unstructured_inference.utils import LazyDict

MODEL_TYPES: Dict[Optional[str], Union[LazyDict, dict]] = {
"chipper": {
"chipperv1": {
"pre_trained_model_repo": "unstructuredio/ved-fine-tuning",
"swap_head": False,
"start_token_prefix": "<s_",
"prompt": "<s>",
"max_length": 1200,
"heatmap_h": 52,
"heatmap_w": 39,
"source": Source.CHIPPER,
"source": Source.CHIPPERV1,
},
"chipperv2": {
"pre_trained_model_repo": "unstructuredio/chipper-fast-fine-tuning",
Expand All @@ -37,10 +37,12 @@
"max_length": 1536,
"heatmap_h": 40,
"heatmap_w": 30,
"source": Source.CHIPPERV2,
"source": Source.CHIPPER,
},
}

MODEL_TYPES["chipper"] = MODEL_TYPES["chipperv2"]


class UnstructuredChipperModel(UnstructuredElementExtractionModel):
def initialize(
Expand Down Expand Up @@ -309,7 +311,7 @@ def deduplicate_detected_elements(
min_text_size: int = 15,
) -> List[LayoutElement]:
"""For chipper, remove elements from other sources."""
return [el for el in elements if el.source in (Source.CHIPPER, Source.CHIPPERV2)]
return [el for el in elements if el.source in (Source.CHIPPER, Source.CHIPPERV1)]

def adjust_bbox(self, bbox, x_offset, y_offset, ratio, target_size):
"""Translate bbox by (x_offset, y_offset) and shrink by ratio."""
Expand Down