Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 38 additions & 38 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ dependencies = [
"python-magic~=0.4.27",
"python-dotenv==1.0.0",
# LLM Triad
"unstract-adapters~=0.4.0",
"unstract-adapters~=0.4.1",
"llama-index==0.9.28",
"tiktoken~=0.4.0",
"transformers==4.37.0",
Expand Down
2 changes: 1 addition & 1 deletion src/unstract/sdk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.14.0"
__version__ = "0.15.0"


def get_sdk_version():
Expand Down
19 changes: 8 additions & 11 deletions src/unstract/sdk/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
from typing import Any, Optional
class SdkError(Exception):
DEFAULT_MESSAGE = "Something went wrong"

def __init__(self, message: str = DEFAULT_MESSAGE):
super().__init__(message)
# Make it user friendly wherever possible
self.message = message

class SdkException(Exception):
def __init__(
self, *args: Any, user_message: Optional[str] = None, **kwargs: Any
) -> None:
super().__init__(*args, **kwargs)
self._user_message = user_message

@property
def user_message(self) -> Optional[str]:
return self._user_message
def __str__(self) -> str:
return self.message
36 changes: 21 additions & 15 deletions src/unstract/sdk/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
from llama_index import Document, StorageContext, VectorStoreIndex
from llama_index.node_parser import SimpleNodeParser
from llama_index.vector_stores import VectorStoreQuery, VectorStoreQueryResult
from unstract.adapters.exceptions import AdapterError
from unstract.adapters.x2text.x2text_adapter import X2TextAdapter

from unstract.sdk.constants import LogLevel, ToolEnv
from unstract.sdk.embedding import ToolEmbedding
from unstract.sdk.exceptions import SdkException
from unstract.sdk.exceptions import SdkError
from unstract.sdk.tool.base import BaseTool
from unstract.sdk.utils import ToolUtils
from unstract.sdk.utils.service_context import ServiceContext
Expand All @@ -30,7 +31,7 @@ def get_text_from_index(
self.tool.stream_log(
f"Error loading {embedding_type}", level=LogLevel.ERROR
)
raise SdkException(f"Error loading {embedding_type}")
raise SdkError(f"Error loading {embedding_type}")
embedding_dimension = embedd_helper.get_embedding_length(embedding_li)

vdb_helper = ToolVectorDB(
Expand All @@ -45,7 +46,7 @@ def get_text_from_index(
self.tool.stream_log(
f"Error loading {vector_db}", level=LogLevel.ERROR
)
raise SdkException(f"Error loading {vector_db}")
raise SdkError(f"Error loading {vector_db}")

try:
self.tool.stream_log(f">>> Querying {vector_db}...")
Expand All @@ -59,7 +60,7 @@ def get_text_from_index(
self.tool.stream_log(
f"Error querying {vector_db}: {e}", level=LogLevel.ERROR
)
raise SdkException(f"Error querying {vector_db}: {e}")
raise SdkError(f"Error querying {vector_db}: {e}")

n: VectorStoreQueryResult = vector_db_li.query(query=q)
if len(n.nodes) > 0:
Expand Down Expand Up @@ -134,13 +135,18 @@ def index_file(

self.tool.stream_log("Extracting text from input file")
full_text = []
x2text = X2Text(tool=self.tool)
x2text_adapter_inst: X2TextAdapter = x2text.get_x2text(
adapter_instance_id=x2text_adapter
)
extracted_text = x2text_adapter_inst.process(
input_file_path=file_path, output_file_path=output_file_path
)
extracted_text = ""
try:
x2text = X2Text(tool=self.tool)
x2text_adapter_inst: X2TextAdapter = x2text.get_x2text(
adapter_instance_id=x2text_adapter
)
extracted_text = x2text_adapter_inst.process(
input_file_path=file_path, output_file_path=output_file_path
)
except AdapterError as e:
# Wrapping AdapterErrors with SdkError
raise SdkError(str(e)) from e
full_text.append(
{
"section": "full",
Expand Down Expand Up @@ -173,7 +179,7 @@ def index_file(
self.tool.stream_log(
f"Error loading {embedding_type}", level=LogLevel.ERROR
)
raise SdkException(f"Error loading {embedding_type}")
raise SdkError(f"Error loading {embedding_type}")

embedding_dimension = embedd_helper.get_embedding_length(embedding_li)
vector_db_li = vdb_helper.get_vector_db(
Expand All @@ -184,7 +190,7 @@ def index_file(
self.tool.stream_log(
f"Error loading {vector_db}", level=LogLevel.ERROR
)
raise SdkException(f"Error loading {vector_db}")
raise SdkError(f"Error loading {vector_db}")

q = VectorStoreQuery(
query_embedding=embedding_li.get_query_embedding(" "),
Expand Down Expand Up @@ -214,7 +220,7 @@ def index_file(
f"Error deleting nodes for {doc_id}: {e}",
level=LogLevel.ERROR,
)
raise SdkException(f"Error deleting nodes for {doc_id}: {e}")
raise SdkError(f"Error deleting nodes for {doc_id}: {e}")
doc_id_not_found = True

if doc_id_not_found:
Expand Down Expand Up @@ -271,7 +277,7 @@ def index_file(
f"Error adding nodes to vector db: {e}",
level=LogLevel.ERROR,
)
raise SdkException(f"Error adding nodes to vector db: {e}")
raise SdkError(f"Error adding nodes to vector db: {e}")
self.tool.stream_log("Added nodes to vector db")

self.tool.stream_log("Done indexing file")
Expand Down
5 changes: 3 additions & 2 deletions src/unstract/sdk/vector_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
from unstract.adapters.constants import Common
from unstract.adapters.vectordb import adapters
from unstract.adapters.vectordb.constants import VectorDbConstants

from unstract.sdk.adapters import ToolAdapter
from unstract.sdk.constants import LogLevel, ToolEnv, ToolSettingsKey
from unstract.sdk.exceptions import SdkException
from unstract.sdk.exceptions import SdkError
from unstract.sdk.platform import PlatformHelper
from unstract.sdk.tool.base import BaseTool

Expand All @@ -34,7 +35,7 @@ def __get_org_id(self) -> str:
platform_details = platform_helper.get_platform_details()
if not platform_details:
# Errors are logged by the SDK itself
raise SdkException("Error getting platform details")
raise SdkError("Error getting platform details")
account_id = platform_details.get("organization_id")
return account_id

Expand Down