Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Enhancements

* **Refactor image extraction code.** The image extraction code is moved from `unstructured-inference` to `unstructured`.
* **Refactor pdfminer code.** The pdfminer code is moved from `unstructured-inference` to `unstructured`.

### Features
Expand Down
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ idna==3.6
# requests
imagesize==1.4.1
# via sphinx
importlib-metadata==6.9.0
importlib-metadata==7.0.0
# via sphinx
jinja2==3.1.2
# via
Expand Down
2 changes: 1 addition & 1 deletion requirements/build.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ idna==3.6
# requests
imagesize==1.4.1
# via sphinx
importlib-metadata==6.9.0
importlib-metadata==7.0.0
# via sphinx
jinja2==3.1.2
# via
Expand Down
10 changes: 5 additions & 5 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ idna==3.6
# anyio
# jsonschema
# requests
importlib-metadata==6.9.0
importlib-metadata==7.0.0
# via
# build
# jupyter-client
Expand Down Expand Up @@ -167,7 +167,7 @@ jupyter-events==0.9.0
# via jupyter-server
jupyter-lsp==2.2.1
# via jupyterlab
jupyter-server==2.11.1
jupyter-server==2.11.2
# via
# jupyter-lsp
# jupyterlab
Expand Down Expand Up @@ -198,7 +198,7 @@ mistune==3.0.2
# via nbconvert
nbclient==0.9.0
# via nbconvert
nbconvert==7.11.0
nbconvert==7.12.0
# via
# jupyter
# jupyter-server
Expand Down Expand Up @@ -290,7 +290,7 @@ pyyaml==6.0.1
# -c test.txt
# jupyter-events
# pre-commit
pyzmq==25.1.1
pyzmq==25.1.2
# via
# ipykernel
# jupyter-client
Expand Down Expand Up @@ -405,7 +405,7 @@ webencodings==0.5.1
# via
# bleach
# tinycss2
websocket-client==1.6.4
websocket-client==1.7.0
# via jupyter-server
wheel==0.42.0
# via
Expand Down
2 changes: 1 addition & 1 deletion requirements/extra-markdown.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
# pip-compile --output-file=extra-markdown.txt extra-markdown.in
#
importlib-metadata==6.9.0
importlib-metadata==7.0.0
# via markdown
markdown==3.5.1
# via -r extra-markdown.in
Expand Down
4 changes: 2 additions & 2 deletions requirements/extra-paddleocr.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ flask==3.0.0
# visualdl
flask-babel==4.0.0
# via visualdl
fonttools==4.45.1
fonttools==4.46.0
# via matplotlib
future==0.18.3
# via bce-python-sdk
Expand All @@ -59,7 +59,7 @@ imageio==2.33.0
# scikit-image
imgaug==0.4.0
# via unstructured-paddleocr
importlib-metadata==6.9.0
importlib-metadata==7.0.0
# via flask
importlib-resources==6.1.1
# via matplotlib
Expand Down
2 changes: 1 addition & 1 deletion requirements/extra-pdf-image.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ pikepdf
pypdf
# Do not move to contsraints.in, otherwise unstructured-inference will not be upgraded
# when unstructured library is.
unstructured-inference==0.7.17
unstructured-inference==0.7.18
# unstructured fork of pytesseract that provides an interface to allow for multiple output formats
# from one tesseract call
unstructured.pytesseract>=0.3.12
6 changes: 3 additions & 3 deletions requirements/extra-pdf-image.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ filelock==3.13.1
# transformers
flatbuffers==23.5.26
# via onnxruntime
fonttools==4.45.1
fonttools==4.46.0
# via matplotlib
fsspec==2023.9.1
# via
Expand Down Expand Up @@ -134,7 +134,7 @@ pdfminer-six==20221105
# pdfplumber
pdfplumber==0.10.3
# via layoutparser
pikepdf==8.7.1
pikepdf==8.8.0
# via -r extra-pdf-image.in
pillow==10.0.1
# via
Expand Down Expand Up @@ -250,7 +250,7 @@ typing-extensions==4.8.0
# torch
tzdata==2023.3
# via pandas
unstructured-inference==0.7.17
unstructured-inference==0.7.18
# via -r extra-pdf-image.in
unstructured-pytesseract==0.3.12
# via
Expand Down
2 changes: 1 addition & 1 deletion requirements/ingest/azure.txt
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ idna==3.6
# yarl
isodate==0.6.1
# via azure-storage-blob
msal==1.25.0
msal==1.26.0
# via
# azure-datalake-store
# azure-identity
Expand Down
2 changes: 1 addition & 1 deletion requirements/ingest/delta-table.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
# pip-compile --output-file=ingest/delta-table.txt ingest/delta-table.in
#
deltalake==0.13.0
deltalake==0.14.0
# via -r ingest/delta-table.in
fsspec==2023.9.1
# via
Expand Down
6 changes: 3 additions & 3 deletions requirements/ingest/embed-aws-bedrock.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ jsonpatch==1.33
# langchain-core
jsonpointer==2.4
# via jsonpatch
langchain==0.0.344
langchain==0.0.345
# via -r ingest/embed-aws-bedrock.in
langchain-core==0.0.8
langchain-core==0.0.9
# via langchain
langsmith==0.0.68
langsmith==0.0.69
# via
# langchain
# langchain-core
Expand Down
6 changes: 3 additions & 3 deletions requirements/ingest/embed-huggingface.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@ jsonpatch==1.33
# langchain-core
jsonpointer==2.4
# via jsonpatch
langchain==0.0.344
langchain==0.0.345
# via -r ingest/embed-huggingface.in
langchain-core==0.0.8
langchain-core==0.0.9
# via langchain
langsmith==0.0.68
langsmith==0.0.69
# via
# langchain
# langchain-core
Expand Down
8 changes: 4 additions & 4 deletions requirements/ingest/embed-openai.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ jsonpatch==1.33
# langchain-core
jsonpointer==2.4
# via jsonpatch
langchain==0.0.344
langchain==0.0.345
# via -r ingest/embed-openai.in
langchain-core==0.0.8
langchain-core==0.0.9
# via langchain
langsmith==0.0.68
langsmith==0.0.69
# via
# langchain
# langchain-core
Expand Down Expand Up @@ -125,7 +125,7 @@ tenacity==8.2.3
# via
# langchain
# langchain-core
tiktoken==0.5.1
tiktoken==0.5.2
# via -r ingest/embed-openai.in
tqdm==4.66.1
# via
Expand Down
2 changes: 1 addition & 1 deletion requirements/ingest/onedrive.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ idna==3.6
# via
# -c ingest/../base.txt
# requests
msal==1.25.0
msal==1.26.0
# via
# -r ingest/onedrive.in
# office365-rest-python-client
Expand Down
2 changes: 1 addition & 1 deletion requirements/ingest/outlook.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ idna==3.6
# via
# -c ingest/../base.txt
# requests
msal==1.25.0
msal==1.26.0
# via
# -r ingest/outlook.in
# office365-rest-python-client
Expand Down
2 changes: 1 addition & 1 deletion requirements/ingest/reddit.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ urllib3==1.26.18
# -c ingest/../base.txt
# -c ingest/../constraints.in
# requests
websocket-client==1.6.4
websocket-client==1.7.0
# via praw
2 changes: 1 addition & 1 deletion requirements/ingest/sharepoint.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ idna==3.6
# via
# -c ingest/../base.txt
# requests
msal==1.25.0
msal==1.26.0
# via
# -r ingest/sharepoint.in
# office365-rest-python-client
Expand Down
2 changes: 1 addition & 1 deletion requirements/ingest/slack.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
#
# pip-compile --output-file=ingest/slack.txt ingest/slack.in
#
slack-sdk==3.26.0
slack-sdk==3.26.1
# via -r ingest/slack.in
27 changes: 12 additions & 15 deletions requirements/ingest/weaviate.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,41 @@
# This file is autogenerated by pip-compile with Python 3.8
# by the following command:
#
# pip-compile --constraint=requirements/constraints.in requirements/ingest/weaviate.in
# pip-compile --output-file=ingest/weaviate.txt ingest/weaviate.in
#
authlib==1.2.1
# via weaviate-client
certifi==2023.11.17
# via
# -c requirements/constraints.in
# -c requirements/ingest/../base.txt
# -c requirements/ingest/../constraints.in
# -c ingest/../base.txt
# -c ingest/../constraints.in
# requests
cffi==1.16.0
# via cryptography
charset-normalizer==3.3.2
# via
# -c requirements/ingest/../base.txt
# -c ingest/../base.txt
# requests
cryptography==41.0.5
cryptography==41.0.7
# via authlib
idna==3.4
idna==3.6
# via
# -c requirements/ingest/../base.txt
# -c ingest/../base.txt
# requests
pycparser==2.21
# via cffi
requests==2.31.0
# via
# -c requirements/ingest/../base.txt
# -c ingest/../base.txt
# weaviate-client
urllib3==1.26.18
# via
# -c requirements/constraints.in
# -c requirements/ingest/../base.txt
# -c requirements/ingest/../constraints.in
# -c ingest/../base.txt
# -c ingest/../constraints.in
# requests
validators==0.22.0
# via weaviate-client
weaviate-client==3.25.3
# via
# -c requirements/constraints.in
# -c requirements/ingest/../constraints.in
# -r requirements/ingest/weaviate.in
# -c ingest/../constraints.in
# -r ingest/weaviate.in
4 changes: 2 additions & 2 deletions requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ flake8==6.1.0
# flake8-print
flake8-print==5.0.0
# via -r test.in
freezegun==1.2.2
freezegun==1.3.1
# via -r test.in
grpcio==1.59.3
# via -r test.in
Expand Down Expand Up @@ -111,7 +111,7 @@ requests==2.31.0
# via
# -c base.txt
# label-studio-sdk
ruff==0.1.6
ruff==0.1.7
# via -r test.in
six==1.16.0
# via
Expand Down
Loading