Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 0.3.1

* Pin protobuf version to avoid errors
* Make paddleocr an extra again

## 0.3.0

* Fix for text block detection
Expand Down
6 changes: 4 additions & 2 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ ipykernel==6.22.0
# nbclassic
# notebook
# qtconsole
ipython==8.11.0
ipython==8.12.0
# via
# -r requirements/dev.in
# ipykernel
Expand Down Expand Up @@ -143,7 +143,7 @@ matplotlib-inline==0.1.6
# ipython
mistune==2.0.5
# via nbconvert
nbclassic==0.5.3
nbclassic==0.5.4
# via notebook
nbclient==0.7.2
# via nbconvert
Expand Down Expand Up @@ -304,6 +304,8 @@ traitlets==5.9.0
# nbformat
# notebook
# qtconsole
typing-extensions==4.5.0
# via ipython
uri-template==1.2.0
# via jsonschema
wcwidth==0.2.6
Expand Down
18 changes: 17 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,22 @@
"opencv-python==4.6.0.66",
"onnxruntime",
"transformers",
'unstructured.PaddleOCR ; platform_machine=="x86_64"',
],
extras_require={
"tables": [
'unstructured.PaddleOCR ; platform_machine=="x86_64"',
# NOTE(crag): workaround issue for error output below
# ERROR test_unstructured/partition/test_common.py - TypeError: Descriptors cannot not
# be created directly.
# If this call came from a _pb2.py file, your generated code is out of date and must be
# regenerated with protoc >= 3.19.0.
# If you cannot immediately regenerate your protos, some other possible workarounds are:
# 1. Downgrade the protobuf package to 3.20.x or lower.
# 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python
# parsing and will be much slower).
"protobuf<3.21",
# NOTE(alan): Pin to get around error: undefined symbol: _dl_sym, version GLIBC_PRIVATE
"paddlepaddle>=2.4",
]
},
)
2 changes: 1 addition & 1 deletion unstructured_inference/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.3.0" # pragma: no cover
__version__ = "0.3.1" # pragma: no cover
6 changes: 3 additions & 3 deletions unstructured_inference/models/paddle_ocr.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from unstructured_paddleocr import PaddleOCR

paddle_ocr: PaddleOCR = None
paddle_ocr = None # type: ignore


def load_agent():
"""Loads the PaddleOCR agent as a global variable to ensure that we only load it once."""

from unstructured_paddleocr import PaddleOCR

global paddle_ocr
paddle_ocr = PaddleOCR(use_angle_cls=True, lang="en", mkl_dnn=True, show_log=False)

Expand Down