Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/unstract/sdk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.25.0"
__version__ = "0.25.1"


def get_sdk_version():
Expand Down
2 changes: 1 addition & 1 deletion src/unstract/sdk/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def get_embedding(self, adapter_instance_id: str) -> BaseEmbedding:
self.tool.stream_log(
log=f"Error getting embedding: {e}", level=LogLevel.ERROR
)
raise ToolEmbeddingError(f"Error getting embedding instance: {e}")
raise ToolEmbeddingError(f"Error getting embedding instance: {e}") from e

def get_embedding_length(self, embedding: BaseEmbedding) -> int:
embedding_list = embedding._get_text_embedding(self.__TEST_SNIPPET)
Expand Down
4 changes: 4 additions & 0 deletions src/unstract/sdk/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,9 @@ class ToolVectorDBError(SdkError):
DEFAULT_MESSAGE = "Error ocurred related to vector DB"


class X2TextError(SdkError):
DEFAULT_MESSAGE = "Error ocurred related to text extractor"


class RateLimitError(SdkError):
DEFAULT_MESSAGE = "Running into rate limit errors, please try again later"
62 changes: 33 additions & 29 deletions src/unstract/sdk/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def index_file(
f"Error deleting nodes for {doc_id}: {e}",
level=LogLevel.ERROR,
)
raise SdkError(f"Error deleting nodes for {doc_id}: {e}")
raise SdkError(f"Error deleting nodes for {doc_id}: {e}") from e
doc_id_found = False

if doc_id_found:
Expand Down Expand Up @@ -246,29 +246,33 @@ def index_file(
document.id_ = doc_id
documents.append(document)
self.tool.stream_log(f"Number of documents: {len(documents)}")
if chunk_size == 0:
parser = SimpleNodeParser.from_defaults(
chunk_size=len(documents[0].text) + 10, chunk_overlap=0
)
nodes = parser.get_nodes_from_documents(documents, show_progress=True)
node = nodes[0]
node.embedding = embedding_li.get_query_embedding(" ")
vector_db_li.add(nodes=[node])
self.tool.stream_log("Added node to vector db")
else:
storage_context = StorageContext.from_defaults(vector_store=vector_db_li)
parser = SimpleNodeParser.from_defaults(
chunk_size=chunk_size, chunk_overlap=chunk_overlap
)

# Set callback_manager to collect Usage stats
callback_manager = UNCallbackManager.set_callback_manager(
platform_api_key=self.tool.get_env_or_die(ToolEnv.PLATFORM_API_KEY),
embedding=embedding_li,
)
try:
if chunk_size == 0:
parser = SimpleNodeParser.from_defaults(
chunk_size=len(documents[0].text) + 10, chunk_overlap=0
)
nodes = parser.get_nodes_from_documents(documents, show_progress=True)
node = nodes[0]
node.embedding = embedding_li.get_query_embedding(" ")
vector_db_li.add(nodes=[node])
self.tool.stream_log("Added node to vector db")
else:
storage_context = StorageContext.from_defaults(
vector_store=vector_db_li
)
parser = SimpleNodeParser.from_defaults(
chunk_size=chunk_size, chunk_overlap=chunk_overlap
)

# Set callback_manager to collect Usage stats
callback_manager = UNCallbackManager.set_callback_manager(
platform_api_key=self.tool.get_env_or_die(ToolEnv.PLATFORM_API_KEY),
embedding=embedding_li,
)

self.tool.stream_log("Adding nodes to vector db...")

self.tool.stream_log("Adding nodes to vector db...")
try:
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
Expand All @@ -277,13 +281,13 @@ def index_file(
node_parser=parser,
callback_manager=callback_manager,
)
except Exception as e:
self.tool.stream_log(
f"Error adding nodes to vector db: {e}",
level=LogLevel.ERROR,
)
raise IndexingError(str(e)) from e
self.tool.stream_log("Added nodes to vector db")
except Exception as e:
self.tool.stream_log(
f"Error adding nodes to vector db: {e}",
level=LogLevel.ERROR,
)
raise IndexingError(str(e)) from e
self.tool.stream_log("Added nodes to vector db")

self.tool.stream_log("File has been indexed successfully")
return doc_id
Expand Down
2 changes: 1 addition & 1 deletion src/unstract/sdk/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def get_llm(self, adapter_instance_id: str) -> LLM:
self.tool.stream_log(
log=f"Unable to get llm instance: {e}", level=LogLevel.ERROR
)
raise ToolLLMError(f"Error getting llm instance: {e}")
raise ToolLLMError(f"Error getting llm instance: {e}") from e

def get_max_tokens(self, reserved_for_output: int = 0) -> int:
"""Returns the maximum number of tokens that can be used for the LLM.
Expand Down
2 changes: 1 addition & 1 deletion src/unstract/sdk/vector_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,4 @@ def get_vector_db(
log=f"Unable to get vector_db {adapter_instance_id}: {e}",
level=LogLevel.ERROR,
)
raise ToolVectorDBError(f"Error getting vectorDB instance: {e}")
raise ToolVectorDBError(f"Error getting vectorDB instance: {e}") from e
16 changes: 7 additions & 9 deletions src/unstract/sdk/x2txt.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from unstract.sdk.adapters import ToolAdapter
from unstract.sdk.constants import LogLevel
from unstract.sdk.exceptions import SdkError
from unstract.sdk.exceptions import X2TextError
from unstract.sdk.tool.base import BaseTool


Expand All @@ -28,17 +28,15 @@ def get_x2text(self, adapter_instance_id: str) -> X2TextAdapter:
][Common.ADAPTER]
x2text_metadata = x2text_config.get(Common.ADAPTER_METADATA)
# Add x2text service host, port and platform_service_key
x2text_metadata[
x2text_metadata[X2TextConstants.X2TEXT_HOST] = self.tool.get_env_or_die(
X2TextConstants.X2TEXT_HOST
] = self.tool.get_env_or_die(X2TextConstants.X2TEXT_HOST)
x2text_metadata[
)
x2text_metadata[X2TextConstants.X2TEXT_PORT] = self.tool.get_env_or_die(
X2TextConstants.X2TEXT_PORT
] = self.tool.get_env_or_die(X2TextConstants.X2TEXT_PORT)
)
x2text_metadata[
X2TextConstants.PLATFORM_SERVICE_API_KEY
] = self.tool.get_env_or_die(
X2TextConstants.PLATFORM_SERVICE_API_KEY
)
] = self.tool.get_env_or_die(X2TextConstants.PLATFORM_SERVICE_API_KEY)

x2text_adapter_class = x2text_adapter(x2text_metadata)

Expand All @@ -49,4 +47,4 @@ def get_x2text(self, adapter_instance_id: str) -> X2TextAdapter:
log=f"Unable to get x2text adapter {adapter_instance_id}: {e}",
level=LogLevel.ERROR,
)
raise SdkError(f"Error getting vectorDB instance: {e}")
raise X2TextError(f"Error getting text extractor: {e}") from e