diff --git a/CHANGELOG.md b/CHANGELOG.md index 81fff82a..63e90a04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## 0.3.0 + +* Fix for text block detection +* Add paddleocr dependency to setup for x86_64 machines + ## 0.2.14 * Suppressed processing progress bars diff --git a/requirements/base.txt b/requirements/base.txt index d488f683..d08b0a8d 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -22,7 +22,7 @@ coloredlogs==15.0.1 # via onnxruntime contourpy==1.0.7 # via matplotlib -cryptography==39.0.2 +cryptography==40.0.1 # via pdfminer-six cycler==0.11.0 # via matplotlib @@ -30,14 +30,14 @@ effdet==0.3.0 # via layoutparser fastapi==0.95.0 # via unstructured-inference (setup.py) -filelock==3.10.0 +filelock==3.10.7 # via # huggingface-hub # torch # transformers flatbuffers==23.3.3 # via onnxruntime -fonttools==4.39.2 +fonttools==4.39.3 # via matplotlib h11==0.14.0 # via uvicorn @@ -86,7 +86,7 @@ omegaconf==2.3.0 # via effdet onnxruntime==1.14.1 # via unstructured-inference (setup.py) -opencv-python==4.7.0.72 +opencv-python==4.6.0.66 # via # layoutparser # unstructured-inference (setup.py) @@ -121,7 +121,7 @@ pycocotools==2.0.6 # via effdet pycparser==2.21 # via cffi -pydantic==1.10.6 +pydantic==1.10.7 # via fastapi pyparsing==3.0.9 # via matplotlib @@ -133,7 +133,7 @@ python-dateutil==2.8.2 # pandas python-multipart==0.0.6 # via unstructured-inference (setup.py) -pytz==2022.7.1 +pytz==2023.3 # via pandas pyyaml==6.0 # via @@ -142,7 +142,7 @@ pyyaml==6.0 # omegaconf # timm # transformers -regex==2022.10.31 +regex==2023.3.23 # via transformers requests==2.28.2 # via @@ -161,7 +161,7 @@ sympy==1.11.1 # via # onnxruntime # torch -timm==0.6.12 +timm==0.6.13 # via effdet tokenizers==0.13.2 # via transformers @@ -181,7 +181,7 @@ tqdm==4.65.0 # huggingface-hub # iopath # transformers -transformers==4.27.2 +transformers==4.27.4 # via unstructured-inference (setup.py) typing-extensions==4.5.0 # via diff --git a/requirements/dev.txt b/requirements/dev.txt index 3f580044..4f083cfd 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -6,6 +6,10 @@ # anyio==3.6.2 # via jupyter-server +appnope==0.1.3 + # via + # ipykernel + # ipython argon2-cffi==21.3.0 # via # jupyter-server @@ -31,7 +35,7 @@ cffi==1.15.1 # via argon2-cffi-bindings click==8.1.3 # via pip-tools -comm==0.1.2 +comm==0.1.3 # via ipykernel debugpy==1.6.6 # via ipykernel @@ -57,6 +61,7 @@ importlib-resources==5.12.0 # via jsonschema ipykernel==6.22.0 # via + # ipywidgets # jupyter # jupyter-console # nbclassic @@ -64,7 +69,7 @@ ipykernel==6.22.0 # qtconsole ipython==8.11.0 # via - # -r dev.in + # -r requirements/dev.in # ipykernel # ipywidgets # jupyter-console @@ -73,7 +78,7 @@ ipython-genutils==0.2.0 # nbclassic # notebook # qtconsole -ipywidgets==8.0.5 +ipywidgets==8.0.6 # via jupyter isoduration==20.11.0 # via jsonschema @@ -126,7 +131,7 @@ jupyter-server-terminals==0.4.4 # via jupyter-server jupyterlab-pygments==0.2.2 # via nbconvert -jupyterlab-widgets==3.0.6 +jupyterlab-widgets==3.0.7 # via ipywidgets markupsafe==2.1.2 # via @@ -184,7 +189,7 @@ pip-tools==6.12.3 # via -r requirements/dev.in pkgutil-resolve-name==1.3.10 # via jsonschema -platformdirs==3.1.1 +platformdirs==3.2.0 # via jupyter-core prometheus-client==0.16.0 # via @@ -234,7 +239,7 @@ pyzmq==25.0.2 # qtconsole qtconsole==5.4.1 # via jupyter -qtpy==2.3.0 +qtpy==2.3.1 # via qtconsole rfc3339-validator==0.1.4 # via @@ -269,6 +274,10 @@ terminado==0.17.1 # notebook tinycss2==1.2.1 # via nbconvert +tomli==2.0.1 + # via + # build + # pyproject-hooks tornado==6.2 # via # ipykernel @@ -299,7 +308,7 @@ uri-template==1.2.0 # via jsonschema wcwidth==0.2.6 # via prompt-toolkit -webcolors==1.12 +webcolors==1.13 # via jsonschema webencodings==0.5.1 # via @@ -309,7 +318,7 @@ websocket-client==1.5.1 # via jupyter-server wheel==0.40.0 # via pip-tools -widgetsnbextension==4.0.6 +widgetsnbextension==4.0.7 # via ipywidgets zipp==3.15.0 # via diff --git a/requirements/test.txt b/requirements/test.txt index 367afca5..6093149e 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -10,7 +10,7 @@ appdirs==1.4.4 # via label-studio-tools attrs==22.2.0 # via pytest -black==23.1.0 +black==23.3.0 # via -r requirements/test.in certifi==2022.12.7 # via @@ -29,7 +29,7 @@ coverage[toml]==7.2.2 # pytest-cov exceptiongroup==1.1.1 # via pytest -filelock==3.10.0 +filelock==3.10.7 # via huggingface-hub flake8==6.0.0 # via @@ -82,13 +82,13 @@ pdf2image==1.16.3 # via -r requirements/test.in pillow==9.4.0 # via pdf2image -platformdirs==3.1.1 +platformdirs==3.2.0 # via black pluggy==1.0.0 # via pytest pycodestyle==2.10.0 # via flake8 -pydantic==1.10.6 +pydantic==1.10.7 # via label-studio-sdk pydocstyle==6.3.0 # via flake8-docstrings diff --git a/setup.py b/setup.py index 9a32d82f..7de0e198 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,6 @@ limitations under the License. """ from setuptools import setup, find_packages -from platform import machine from unstructured_inference.__version__ import __version__ @@ -60,6 +59,6 @@ "opencv-python==4.6.0.66", "onnxruntime", "transformers", + 'unstructured.PaddleOCR ; platform_machine=="x86_64"', ], - extras_require={"paddle-ocr": "unstructured.PaddleOCR"}, ) diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py index 9f714b99..fb9998eb 100644 --- a/unstructured_inference/__version__.py +++ b/unstructured_inference/__version__.py @@ -1 +1 @@ -__version__ = "0.2.14" # pragma: no cover +__version__ = "0.3.0" # pragma: no cover diff --git a/unstructured_inference/inference/elements.py b/unstructured_inference/inference/elements.py index 7394c402..a30c49f8 100644 --- a/unstructured_inference/inference/elements.py +++ b/unstructured_inference/inference/elements.py @@ -58,6 +58,11 @@ def is_in(self, other: Rectangle, error_margin: Optional[int] = None): ] ) + @property + def coordinates(self): + """Gets coordinates of the rectangle""" + return ((self.x1, self.y1), (self.x1, self.y2), (self.x2, self.y2), (self.x2, self.y1)) + @dataclass class TextRegion(Rectangle): @@ -77,7 +82,12 @@ class LayoutElement(TextRegion): def to_dict(self) -> dict: """Converts the class instance to dictionary form.""" - return self.__dict__ + out_dict = { + "coordinates": self.coordinates, + "text": self.text, + "type": self.type, + } + return out_dict @classmethod def from_region(cls, region: Rectangle): diff --git a/unstructured_inference/inference/layout.py b/unstructured_inference/inference/layout.py index 36f484d0..e187f17f 100644 --- a/unstructured_inference/inference/layout.py +++ b/unstructured_inference/inference/layout.py @@ -363,12 +363,21 @@ def load_pdf( ) word_objs = [ TextRegion( - x1=word["x0"], y1=word["top"], x2=word["x1"], y2=word["bottom"], text=word["text"] + x1=word["x0"] * dpi / 72, + y1=word["top"] * dpi / 72, + x2=word["x1"] * dpi / 72, + y2=word["bottom"] * dpi / 72, + text=word["text"], ) for word in plumber_words ] image_objs = [ - ImageTextRegion(x1=image["x0"], y1=image["y0"], x2=image["x1"], y2=image["y1"]) + ImageTextRegion( + x1=image["x0"] * dpi / 72, + y1=image["y0"] * dpi / 72, + x2=image["x1"] * dpi / 72, + y2=image["y1"] * dpi / 72, + ) for image in page.images ] layout = word_objs + image_objs