Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
15d11f4
Exception handling for Prompt Service
harini-venkataraman May 23, 2024
dca43f7
Merge branch main
harini-venkataraman Jun 9, 2024
cebfaaa
Merge branch 'main' of github.com:Zipstack/unstract-sdk
harini-venkataraman Jun 18, 2024
9a13a56
Merge branch main
harini-venkataraman Aug 8, 2024
7331f57
Merge branch 'main' of github.com:Zipstack/unstract-sdk
harini-venkataraman Oct 7, 2024
c6c6698
Merge branch 'main' of github.com:Zipstack/unstract-sdk
harini-venkataraman Nov 13, 2024
c3b3eb5
Merge branch 'main' of github.com:Zipstack/unstract-sdk
harini-venkataraman Dec 3, 2024
395746b
Merge branch 'main' of github.com:Zipstack/unstract-sdk
harini-venkataraman Dec 4, 2024
3986fb9
Merge branch 'main' of github.com:Zipstack/unstract-sdk
harini-venkataraman Dec 5, 2024
72de20d
Merge branch 'main' of github.com:Zipstack/unstract-sdk
harini-venkataraman Jan 7, 2025
5fa0023
Merge branch 'main' of github.com:Zipstack/unstract-sdk
harini-venkataraman Jan 20, 2025
5a1cc4b
Merge branch 'main' of github.com:Zipstack/unstract-sdk
harini-venkataraman Jan 29, 2025
377193e
Merge branch 'main' of github.com:Zipstack/unstract-sdk
harini-venkataraman Feb 4, 2025
efd358a
Merge branch 'main' of github.com:Zipstack/unstract-sdk
harini-venkataraman Feb 7, 2025
273388f
Merge branch 'main' of github.com:Zipstack/unstract-sdk
harini-venkataraman Feb 13, 2025
5700282
Merge branch 'main' of github.com:Zipstack/unstract-sdk
harini-venkataraman Feb 24, 2025
a6f7403
Merge branch 'main' of github.com:Zipstack/unstract-sdk
harini-venkataraman Mar 6, 2025
1d43206
refactor: Indexing API segregation
harini-venkataraman Mar 6, 2025
071dcb3
refactor: Indexing API segregation
harini-venkataraman Mar 10, 2025
92ad0bd
refactor: Indexing API segregation
harini-venkataraman Mar 10, 2025
025b0ce
Retrievers - Subquestion & Simple
harini-venkataraman Mar 12, 2025
1a0dbb4
Addressing review comments
harini-venkataraman Mar 17, 2025
431391f
Addressing review comments
harini-venkataraman Mar 17, 2025
6372fc6
Moving helpers to application
harini-venkataraman Mar 17, 2025
dc04135
Remove unused exceptions
harini-venkataraman Mar 18, 2025
5479b15
Merge branch 'main' into fix/indexing-refactor
harini-venkataraman Mar 18, 2025
d2e5ae6
Adding Index util to generate index key
harini-venkataraman Mar 18, 2025
01432e8
Merge branch 'fix/indexing-refactor' of github.com:Zipstack/unstract-…
harini-venkataraman Mar 18, 2025
176aa97
Version bump
harini-venkataraman Mar 19, 2025
aa1d8ce
Version bump
harini-venkataraman Mar 19, 2025
3f4c2a4
Adding headers for API
harini-venkataraman Mar 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/unstract/sdk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.60.1"
__version__ = "0.61.0"


def get_sdk_version():
Expand Down
34 changes: 34 additions & 0 deletions src/unstract/sdk/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,40 @@ def answer_prompt(
payload=payload,
params=params,
)

@log_elapsed(operation="INDEX")
def index(
self,
payload: dict[str, Any],
params: Optional[dict[str, str]] = None,
headers: Optional[dict[str, str]] = None,
) -> dict[str, Any]:
url_path = "index"
if self.is_public_call:
url_path = "index-public"
return self._post_call(
url_path=url_path,
payload=payload,
params=params,
headers=headers,
)

@log_elapsed(operation="EXTRACT")
def extract(
self,
payload: dict[str, Any],
params: Optional[dict[str, str]] = None,
headers: Optional[dict[str, str]] = None,
) -> dict[str, Any]:
url_path = "extract"
if self.is_public_call:
url_path = "extract-public"
return self._post_call(
url_path=url_path,
payload=payload,
params=params,
headers=headers,
)

def single_pass_extraction(
self, payload: dict[str, Any], params: Optional[dict[str, str]] = None
Expand Down
54 changes: 54 additions & 0 deletions src/unstract/sdk/utils/indexing_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import json
from typing import Optional

from unstract.sdk.adapter import ToolAdapter
from unstract.sdk.file_storage import FileStorage, FileStorageProvider
from unstract.sdk.tool.base import BaseTool
from unstract.sdk.utils import ToolUtils


class IndexingUtils:
@staticmethod
def generate_index_key(
vector_db: str,
embedding: str,
x2text: str,
chunk_size: str,
chunk_overlap: str,
tool: BaseTool,
file_path: Optional[str] = None,
file_hash: Optional[str] = None,
fs: FileStorage = FileStorage(provider=FileStorageProvider.LOCAL),
) -> str:
"""Generates a unique index key based on the provided configuration,
file information, instance identifiers, and processing options.

Args:
fs (FileStorage, optional): File storage for remote storage.

Returns:
str: A unique index key used for indexing the document.
"""
if not file_path and not file_hash:
raise ValueError("One of `file_path` or `file_hash` need to be provided")

if not file_hash:
file_hash = fs.get_hash_from_file(path=file_path)

# Whole adapter config is used currently even though it contains some keys
# which might not be relevant to indexing. This is easier for now than
# marking certain keys of the adapter config as necessary.
index_key = {
"file_hash": file_hash,
"vector_db_config": ToolAdapter.get_adapter_config(tool, vector_db),
"embedding_config": ToolAdapter.get_adapter_config(tool, embedding),
"x2text_config": ToolAdapter.get_adapter_config(tool, x2text),
# Typed and hashed as strings since the final hash is persisted
# and this is required to be backward compatible
"chunk_size": str(chunk_size),
"chunk_overlap": str(chunk_overlap),
}
# JSON keys are sorted to ensure that the same key gets hashed even in
# case where the fields are reordered.
hashed_index_key = ToolUtils.hash_str(json.dumps(index_key, sort_keys=True))
return hashed_index_key