Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions backend/api_v2/deployment_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,9 +375,7 @@ def _enrich_result_with_workflow_metadata(
if fe_lookup:
first_fe = next(iter(fe_lookup.values()))
workflow_execution = first_fe.workflow_execution
tag_names = list(
workflow_execution.tags.values_list("name", flat=True)
)
tag_names = list(workflow_execution.tags.values_list("name", flat=True))

# 4. Enrich each item
for item in result.result:
Expand Down
6 changes: 5 additions & 1 deletion unstract/sdk1/tests/test_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import json
import logging
import os
import tempfile
from typing import Any, Self
from unittest.mock import MagicMock

Expand All @@ -19,6 +21,8 @@
from unstract.sdk1.execution.registry import ExecutorRegistry
from unstract.sdk1.execution.result import ExecutionResult

_TEST_FILE_PATH = os.path.join(tempfile.mkdtemp(), "test.pdf")


class TestExecutionContext:
"""Tests for ExecutionContext serialization and validation."""
Expand All @@ -31,7 +35,7 @@ def _make_context(self, **overrides: Any) -> ExecutionContext:
"run_id": "run-001",
"execution_source": "tool",
"organization_id": "org-123",
"executor_params": {"file_path": "/tmp/test.pdf"},
"executor_params": {"file_path": _TEST_FILE_PATH},
"request_id": "req-abc",
}
defaults.update(overrides)
Expand Down
2 changes: 1 addition & 1 deletion workers/executor/executors/legacy_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
from executor.executors.file_utils import FileUtils

from unstract.sdk1.adapters.exceptions import AdapterError
from unstract.sdk1.constants import LogLevel
from unstract.sdk1.adapters.x2text.constants import X2TextConstants
from unstract.sdk1.adapters.x2text.llm_whisperer.src import LLMWhisperer
from unstract.sdk1.adapters.x2text.llm_whisperer_v2.src import LLMWhispererV2
from unstract.sdk1.constants import LogLevel
from unstract.sdk1.execution.context import ExecutionContext, Operation
from unstract.sdk1.execution.executor import BaseExecutor
from unstract.sdk1.execution.registry import ExecutorRegistry
Expand Down
15 changes: 9 additions & 6 deletions workers/executor/executors/retrievers/automerging.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ def retrieve(self) -> set[str]:
except Exception as e:
logger.error(
"AutoMergingRetriever failed: %s: %s",
type(e).__name__, e,
type(e).__name__,
e,
exc_info=True,
)
raise RetrievalError(
Expand All @@ -83,16 +84,18 @@ def retrieve(self) -> set[str]:
except (ValueError, AttributeError, KeyError, ImportError) as e:
logger.error(
"Error during auto-merging retrieval for %s: %s: %s",
self.doc_id, type(e).__name__, e,
self.doc_id,
type(e).__name__,
e,
exc_info=True,
)
raise RetrievalError(f"{type(e).__name__}: {e}") from e
except Exception as e:
logger.error(
"Unexpected error during auto-merging retrieval for %s: %s: %s",
self.doc_id, type(e).__name__, e,
self.doc_id,
type(e).__name__,
e,
exc_info=True,
)
raise RetrievalError(
f"Unexpected error: {type(e).__name__}: {e}"
) from e
raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e
12 changes: 7 additions & 5 deletions workers/executor/executors/retrievers/fusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,18 @@ def retrieve(self) -> set[str]:
except (ValueError, AttributeError, KeyError, ImportError) as e:
logger.error(
"Error during fusion retrieval for %s: %s: %s",
self.doc_id, type(e).__name__, e,
self.doc_id,
type(e).__name__,
e,
exc_info=True,
)
raise RetrievalError(f"{type(e).__name__}: {e}") from e
except Exception as e:
logger.error(
"Unexpected error during fusion retrieval for %s: %s: %s",
self.doc_id, type(e).__name__, e,
self.doc_id,
type(e).__name__,
e,
exc_info=True,
)
raise RetrievalError(
f"Unexpected error: {type(e).__name__}: {e}"
) from e
raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e
12 changes: 7 additions & 5 deletions workers/executor/executors/retrievers/keyword_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,18 @@ def retrieve(self) -> set[str]:
except (ValueError, AttributeError, KeyError, ImportError) as e:
logger.error(
"Error during keyword retrieval for %s: %s: %s",
self.doc_id, type(e).__name__, e,
self.doc_id,
type(e).__name__,
e,
exc_info=True,
)
raise RetrievalError(f"{type(e).__name__}: {e}") from e
except Exception as e:
logger.error(
"Unexpected error during keyword retrieval for %s: %s: %s",
self.doc_id, type(e).__name__, e,
self.doc_id,
type(e).__name__,
e,
exc_info=True,
)
raise RetrievalError(
f"Unexpected error: {type(e).__name__}: {e}"
) from e
raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e
12 changes: 7 additions & 5 deletions workers/executor/executors/retrievers/recursive.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,18 @@ def retrieve(self) -> set[str]:
except (ValueError, AttributeError, KeyError, ImportError) as e:
logger.error(
"Error during recursive retrieval for %s: %s: %s",
self.doc_id, type(e).__name__, e,
self.doc_id,
type(e).__name__,
e,
exc_info=True,
)
raise RetrievalError(f"{type(e).__name__}: {e}") from e
except Exception as e:
logger.error(
"Unexpected error during recursive retrieval for %s: %s: %s",
self.doc_id, type(e).__name__, e,
self.doc_id,
type(e).__name__,
e,
exc_info=True,
)
raise RetrievalError(
f"Unexpected error: {type(e).__name__}: {e}"
) from e
raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e
12 changes: 7 additions & 5 deletions workers/executor/executors/retrievers/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,16 +149,18 @@ def retrieve(self) -> set[str]:
except (ValueError, AttributeError, KeyError, ImportError) as e:
logger.error(
"Error during router retrieval for %s: %s: %s",
self.doc_id, type(e).__name__, e,
self.doc_id,
type(e).__name__,
e,
exc_info=True,
)
raise RetrievalError(f"{type(e).__name__}: {e}") from e
except Exception as e:
logger.error(
"Unexpected error during router retrieval for %s: %s: %s",
self.doc_id, type(e).__name__, e,
self.doc_id,
type(e).__name__,
e,
exc_info=True,
)
raise RetrievalError(
f"Unexpected error: {type(e).__name__}: {e}"
) from e
raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e
12 changes: 7 additions & 5 deletions workers/executor/executors/retrievers/subquestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,18 @@ def retrieve(self) -> set[str]:
except (ValueError, AttributeError, KeyError, ImportError) as e:
logger.error(
"Error during retrieving chunks %s: %s: %s",
self.doc_id, type(e).__name__, e,
self.doc_id,
type(e).__name__,
e,
exc_info=True,
)
raise RetrievalError(f"{type(e).__name__}: {e}") from e
except Exception as e:
logger.error(
"Unexpected error during retrieving chunks %s: %s: %s",
self.doc_id, type(e).__name__, e,
self.doc_id,
type(e).__name__,
e,
exc_info=True,
)
raise RetrievalError(
f"Unexpected error: {type(e).__name__}: {e}"
) from e
raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e
2 changes: 1 addition & 1 deletion workers/file_processing/structure_tool_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,4 +679,4 @@ def _write_tool_result(
data=json.dumps(existing, indent=2),
)
except Exception as e:
logger.warning("Failed to write tool result to METADATA.json: %s", e)
logger.warning("Failed to write tool result to METADATA.json: %s", e)
7 changes: 4 additions & 3 deletions workers/tests/test_legacy_executor_scaffold.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,11 +203,12 @@ def test_chunking_config_zero_raises(self):
with pytest.raises(ValueError, match="zero chunks"):
ChunkingConfig(chunk_size=0, chunk_overlap=0)

def test_file_info(self):
def test_file_info(self, tmp_path):
from executor.executors.dto import FileInfo

fi = FileInfo(file_path="/tmp/test.pdf", file_hash="abc123")
assert fi.file_path == "/tmp/test.pdf"
test_path = str(tmp_path / "test.pdf")
fi = FileInfo(file_path=test_path, file_hash="abc123")
assert fi.file_path == test_path

def test_instance_identifiers(self):
from executor.executors.dto import InstanceIdentifiers
Expand Down
8 changes: 4 additions & 4 deletions workers/tests/test_phase1_log_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ class TestLegacyExecutorLogPassthrough:
@patch("executor.executors.legacy_executor.X2Text")
@patch("executor.executors.legacy_executor.ExecutorToolShim")
def test_extract_passes_log_info_to_shim(
self, mock_shim_cls, mock_x2text, mock_fs
self, mock_shim_cls, mock_x2text, mock_fs, tmp_path
):
from executor.executors.legacy_executor import LegacyExecutor
from unstract.sdk1.execution.registry import ExecutorRegistry
Expand All @@ -337,7 +337,7 @@ def test_extract_passes_log_info_to_shim(
log_events_id="session-abc",
executor_params={
"x2text_instance_id": "x2t-1",
"file_path": "/tmp/test.pdf",
"file_path": str(tmp_path / "test.pdf"),
"platform_api_key": "sk-test",
},
)
Expand All @@ -357,7 +357,7 @@ def test_extract_passes_log_info_to_shim(
@patch("executor.executors.legacy_executor.X2Text")
@patch("executor.executors.legacy_executor.ExecutorToolShim")
def test_extract_no_log_info_when_absent(
self, mock_shim_cls, mock_x2text, mock_fs
self, mock_shim_cls, mock_x2text, mock_fs, tmp_path
):
from executor.executors.legacy_executor import LegacyExecutor
from unstract.sdk1.execution.registry import ExecutorRegistry
Expand All @@ -380,7 +380,7 @@ def test_extract_no_log_info_when_absent(
execution_source="tool",
executor_params={
"x2text_instance_id": "x2t-1",
"file_path": "/tmp/test.pdf",
"file_path": str(tmp_path / "test.pdf"),
"platform_api_key": "sk-test",
},
)
Expand Down
4 changes: 2 additions & 2 deletions workers/tests/test_sanity_phase6g.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def test_dispatch_sends_to_sps_queue(self):
call_kwargs = mock_app.send_task.call_args
assert call_kwargs.kwargs.get("queue") == "celery_executor_simple_prompt_studio"

def test_dispatch_sps_index_to_correct_queue(self):
def test_dispatch_sps_index_to_correct_queue(self, tmp_path):
mock_app = MagicMock()
mock_result = MagicMock()
mock_result.get.return_value = ExecutionResult(
Expand All @@ -168,7 +168,7 @@ def test_dispatch_sps_index_to_correct_queue(self):
operation="sps_index",
run_id="run-1",
execution_source="tool",
executor_params={"output": {}, "file_path": "/tmp/test.pdf"},
executor_params={"output": {}, "file_path": str(tmp_path / "test.pdf")},
)
result = dispatcher.dispatch(ctx)

Expand Down
6 changes: 3 additions & 3 deletions workers/tests/test_sanity_phase6h.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def test_legacy_returns_failure_for_agentic_summarize(self):
# ---------------------------------------------------------------------------

class TestStructureToolAgenticRouting:
def test_structure_tool_dispatches_agentic_extract(self):
def test_structure_tool_dispatches_agentic_extract(self, tmp_path):
"""Verify _run_agentic_extraction sends executor_name='agentic'."""

from file_processing.structure_tool_task import _run_agentic_extraction
Expand All @@ -210,8 +210,8 @@ def test_structure_tool_dispatches_agentic_extract(self):

result = _run_agentic_extraction(
tool_metadata={"name": "test"},
input_file_path="/tmp/test.pdf",
output_dir_path="/tmp/output",
input_file_path=str(tmp_path / "test.pdf"),
output_dir_path=str(tmp_path / "output"),
tool_instance_metadata={},
dispatcher=mock_dispatcher,
shim=MagicMock(),
Expand Down
6 changes: 3 additions & 3 deletions workers/tests/test_sanity_phase6j.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def test_highlight_plugin_not_installed_no_error(self, _mock_eps):
assert ExecutorPluginLoader.get("highlight-data") is None
# No error — graceful degradation

def test_mock_highlight_plugin_shared_across_executors(self):
def test_mock_highlight_plugin_shared_across_executors(self, tmp_path):
"""Multiple executors can use the same highlight plugin instance."""
from executor.executors.plugins.loader import ExecutorPluginLoader

Expand Down Expand Up @@ -412,8 +412,8 @@ def get_confidence_data(self):
assert cls is FakeHighlight

# Both legacy and agentic contexts can create instances
legacy_hl = cls(file_path="/tmp/doc.txt", execution_source="ide")
agentic_hl = cls(file_path="/tmp/other.txt", execution_source="tool")
legacy_hl = cls(file_path=str(tmp_path / "doc.txt"), execution_source="ide")
agentic_hl = cls(file_path=str(tmp_path / "other.txt"), execution_source="tool")

assert legacy_hl.get_highlight_data() == {"lines": [1, 2, 3]}
assert agentic_hl.get_confidence_data() == {"confidence": 0.95}
Expand Down
4 changes: 2 additions & 2 deletions workers/tests/test_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ class TestMetricsInResult:
)
@patch("executor.executors.legacy_executor.ExecutorToolShim")
def test_answer_prompt_returns_metrics(
self, mock_shim_cls, mock_get_deps, _mock_idx
self, mock_shim_cls, mock_get_deps, _mock_idx, tmp_path
):
"""answer_prompt result includes metrics dict."""
from unstract.sdk1.execution.context import ExecutionContext
Expand Down Expand Up @@ -298,7 +298,7 @@ def test_answer_prompt_returns_metrics(
],
"tool_id": "tool-1",
"file_hash": "hash123",
"file_path": "/tmp/test.txt",
"file_path": str(tmp_path / "test.txt"),
"file_name": "test.txt",
"PLATFORM_SERVICE_API_KEY": "test-key",
},
Expand Down