Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/unstract/sdk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.11.0"
__version__ = "0.11.1"


def get_sdk_version():
Expand Down
2 changes: 2 additions & 0 deletions src/unstract/sdk/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,13 @@ class ToolSettingsKey:
LLM_ADAPTER_ID (str): The key for the LLM adapter ID.
EMBEDDING_ADAPTER_ID (str): The key for the embedding adapter ID.
VECTOR_DB_ADAPTER_ID (str): The key for the vector DB adapter ID.
X2TEXT_ADAPTER_ID (str): The key for the X2Text adapter ID.
"""

LLM_ADAPTER_ID = "llmAdapterId"
EMBEDDING_ADAPTER_ID = "embeddingAdapterId"
VECTOR_DB_ADAPTER_ID = "vectorDbAdapterId"
X2TEXT_ADAPTER_ID = "x2TextAdapterId"


class FileReaderSettings:
Expand Down
9 changes: 6 additions & 3 deletions src/unstract/sdk/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ def index_file(
self.tool.stream_log("Extracting text from input file")
full_text = []
x2text = X2Text(tool=self.tool)
x2text_adapter: X2TextAdapter = x2text.get_x2text(
x2text_adapter_inst: X2TextAdapter = x2text.get_x2text(
adapter_instance_id=x2text_adapter
)
extracted_text = x2text_adapter.process(input_file_path=file_path)
extracted_text = x2text_adapter_inst.process(input_file_path=file_path)
full_text.append(
{
"section": "full",
Expand All @@ -128,6 +128,7 @@ def index_file(
file_hash=file_hash,
vector_db=vector_db,
embedding=embedding_type,
x2text=x2text_adapter,
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
)
Expand Down Expand Up @@ -257,6 +258,7 @@ def generate_file_id(
file_hash: str,
vector_db: str,
embedding: str,
x2text: str,
chunk_size: str,
chunk_overlap: str,
) -> str:
Expand All @@ -267,13 +269,14 @@ def generate_file_id(
file_hash (str): Hash of the file contents
vector_db (str): UUID of the vector DB adapter
embedding (str): UUID of the embedding adapter
x2text (str): UUID of the X2Text adapter
chunk_size (str): Chunk size for indexing
chunk_overlap (str): Chunk overlap for indexing

Returns:
str: Key representing unique ID for a file
"""
return (
f"{tool_id}|{vector_db}|{embedding}|"
f"{tool_id}|{vector_db}|{embedding}|{x2text}|"
f"{chunk_size}|{chunk_overlap}|{file_hash}"
)
2 changes: 1 addition & 1 deletion src/unstract/sdk/utils/tool_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def json_to_str(json_to_dump: dict[str, Any]) -> str:
return compact_json

@staticmethod
def get_file_mime_type(self, input_file: Path) -> str:
def get_file_mime_type(input_file: Path) -> str:
"""Gets the file MIME type for an input file. Uses libmagic to perform
the same.

Expand Down