From bde92fda67a77f8e30be7b787ee9c62ea5e9169c Mon Sep 17 00:00:00 2001 From: Stefano Fiorucci Date: Thu, 6 Jun 2024 15:57:18 +0200 Subject: [PATCH] upgrade transformers and reorganize extras (#7815) --- pyproject.toml | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4ef44f00eac..c40d49ea96e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,23 +100,25 @@ format-check = "black --check ." [tool.hatch.envs.test] extra-dependencies = [ - "transformers[torch,sentencepiece]==4.38.2", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators... - "huggingface_hub>=0.23.0", # TGI Generators and TEI Embedders - "spacy>=3.7,<3.8", # NamedEntityExtractor - "spacy-curated-transformers>=0.2,<=0.3", # NamedEntityExtractor - "en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.7.3/en_core_web_trf-3.7.3-py3-none-any.whl", # NamedEntityExtractor + "transformers[torch,sentencepiece]==4.41.2", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators... + "huggingface_hub>=0.23.0", # Hugging Face API Generators and Embedders + "sentence-transformers>=2.2.0", # SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder + "langdetect", # TextLanguageRouter and DocumentLanguageClassifier + "openai-whisper>=20231106", # LocalWhisperTranscriber + + # NamedEntityExtractor + "spacy>=3.7,<3.8", + "spacy-curated-transformers>=0.2,<=0.3", + "en-core-web-trf @ https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.7.3/en_core_web_trf-3.7.3-py3-none-any.whl", # Converters - "pypdf", # PyPDFConverter + "pypdf", # PyPDFToDocument "pdfminer.six", # PDFMinerToDocument "markdown-it-py", # MarkdownToDocument "mdit_plain", # MarkdownToDocument "tika", # TikaDocumentConverter "azure-ai-formrecognizer>=3.2.0b2", # AzureOCRDocumentConverter - "langdetect", # TextLanguageRouter and DocumentLanguageClassifier - "sentence-transformers>=2.2.0", # SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder - "openai-whisper>=20231106", # LocalWhisperTranscriber - "trafilatura", # Fulltext extraction from HTML pages + "trafilatura", # HTMLToDocument # OpenAPI "jsonref", # OpenAPIServiceConnector, OpenAPIServiceToFunctions