diff --git a/backend/api_v2/deployment_helper.py b/backend/api_v2/deployment_helper.py index 8b1c540347..8971f21c62 100644 --- a/backend/api_v2/deployment_helper.py +++ b/backend/api_v2/deployment_helper.py @@ -375,9 +375,7 @@ def _enrich_result_with_workflow_metadata( if fe_lookup: first_fe = next(iter(fe_lookup.values())) workflow_execution = first_fe.workflow_execution - tag_names = list( - workflow_execution.tags.values_list("name", flat=True) - ) + tag_names = list(workflow_execution.tags.values_list("name", flat=True)) # 4. Enrich each item for item in result.result: diff --git a/unstract/sdk1/tests/test_execution.py b/unstract/sdk1/tests/test_execution.py index 3839a01073..458c7a8f10 100644 --- a/unstract/sdk1/tests/test_execution.py +++ b/unstract/sdk1/tests/test_execution.py @@ -2,6 +2,8 @@ import json import logging +import os +import tempfile from typing import Any, Self from unittest.mock import MagicMock @@ -19,6 +21,8 @@ from unstract.sdk1.execution.registry import ExecutorRegistry from unstract.sdk1.execution.result import ExecutionResult +_TEST_FILE_PATH = os.path.join(tempfile.mkdtemp(), "test.pdf") + class TestExecutionContext: """Tests for ExecutionContext serialization and validation.""" @@ -31,7 +35,7 @@ def _make_context(self, **overrides: Any) -> ExecutionContext: "run_id": "run-001", "execution_source": "tool", "organization_id": "org-123", - "executor_params": {"file_path": "/tmp/test.pdf"}, + "executor_params": {"file_path": _TEST_FILE_PATH}, "request_id": "req-abc", } defaults.update(overrides) diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index ec44444556..0d8978b431 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -25,10 +25,10 @@ from executor.executors.file_utils import FileUtils from unstract.sdk1.adapters.exceptions import AdapterError -from unstract.sdk1.constants import LogLevel from unstract.sdk1.adapters.x2text.constants import X2TextConstants from unstract.sdk1.adapters.x2text.llm_whisperer.src import LLMWhisperer from unstract.sdk1.adapters.x2text.llm_whisperer_v2.src import LLMWhispererV2 +from unstract.sdk1.constants import LogLevel from unstract.sdk1.execution.context import ExecutionContext, Operation from unstract.sdk1.execution.executor import BaseExecutor from unstract.sdk1.execution.registry import ExecutorRegistry diff --git a/workers/executor/executors/retrievers/automerging.py b/workers/executor/executors/retrievers/automerging.py index c3472964ed..7df911f44f 100644 --- a/workers/executor/executors/retrievers/automerging.py +++ b/workers/executor/executors/retrievers/automerging.py @@ -57,7 +57,8 @@ def retrieve(self) -> set[str]: except Exception as e: logger.error( "AutoMergingRetriever failed: %s: %s", - type(e).__name__, e, + type(e).__name__, + e, exc_info=True, ) raise RetrievalError( @@ -83,16 +84,18 @@ def retrieve(self) -> set[str]: except (ValueError, AttributeError, KeyError, ImportError) as e: logger.error( "Error during auto-merging retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( "Unexpected error during auto-merging retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) - raise RetrievalError( - f"Unexpected error: {type(e).__name__}: {e}" - ) from e + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/fusion.py b/workers/executor/executors/retrievers/fusion.py index a9b27e2eb0..82c6c32967 100644 --- a/workers/executor/executors/retrievers/fusion.py +++ b/workers/executor/executors/retrievers/fusion.py @@ -86,16 +86,18 @@ def retrieve(self) -> set[str]: except (ValueError, AttributeError, KeyError, ImportError) as e: logger.error( "Error during fusion retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( "Unexpected error during fusion retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) - raise RetrievalError( - f"Unexpected error: {type(e).__name__}: {e}" - ) from e + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/keyword_table.py b/workers/executor/executors/retrievers/keyword_table.py index 2f1d345c02..2a61dfd227 100644 --- a/workers/executor/executors/retrievers/keyword_table.py +++ b/workers/executor/executors/retrievers/keyword_table.py @@ -71,16 +71,18 @@ def retrieve(self) -> set[str]: except (ValueError, AttributeError, KeyError, ImportError) as e: logger.error( "Error during keyword retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( "Unexpected error during keyword retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) - raise RetrievalError( - f"Unexpected error: {type(e).__name__}: {e}" - ) from e + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/recursive.py b/workers/executor/executors/retrievers/recursive.py index b49bf298c2..0ad09a6b78 100644 --- a/workers/executor/executors/retrievers/recursive.py +++ b/workers/executor/executors/retrievers/recursive.py @@ -69,16 +69,18 @@ def retrieve(self) -> set[str]: except (ValueError, AttributeError, KeyError, ImportError) as e: logger.error( "Error during recursive retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( "Unexpected error during recursive retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) - raise RetrievalError( - f"Unexpected error: {type(e).__name__}: {e}" - ) from e + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/router.py b/workers/executor/executors/retrievers/router.py index 8dae80271c..0bbb424b3e 100644 --- a/workers/executor/executors/retrievers/router.py +++ b/workers/executor/executors/retrievers/router.py @@ -149,16 +149,18 @@ def retrieve(self) -> set[str]: except (ValueError, AttributeError, KeyError, ImportError) as e: logger.error( "Error during router retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( "Unexpected error during router retrieval for %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) - raise RetrievalError( - f"Unexpected error: {type(e).__name__}: {e}" - ) from e + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/executor/executors/retrievers/subquestion.py b/workers/executor/executors/retrievers/subquestion.py index de0d5047d3..635fa133ac 100644 --- a/workers/executor/executors/retrievers/subquestion.py +++ b/workers/executor/executors/retrievers/subquestion.py @@ -53,16 +53,18 @@ def retrieve(self) -> set[str]: except (ValueError, AttributeError, KeyError, ImportError) as e: logger.error( "Error during retrieving chunks %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) raise RetrievalError(f"{type(e).__name__}: {e}") from e except Exception as e: logger.error( "Unexpected error during retrieving chunks %s: %s: %s", - self.doc_id, type(e).__name__, e, + self.doc_id, + type(e).__name__, + e, exc_info=True, ) - raise RetrievalError( - f"Unexpected error: {type(e).__name__}: {e}" - ) from e + raise RetrievalError(f"Unexpected error: {type(e).__name__}: {e}") from e diff --git a/workers/file_processing/structure_tool_task.py b/workers/file_processing/structure_tool_task.py index 6775a298a4..82c1962b43 100644 --- a/workers/file_processing/structure_tool_task.py +++ b/workers/file_processing/structure_tool_task.py @@ -679,4 +679,4 @@ def _write_tool_result( data=json.dumps(existing, indent=2), ) except Exception as e: - logger.warning("Failed to write tool result to METADATA.json: %s", e) \ No newline at end of file + logger.warning("Failed to write tool result to METADATA.json: %s", e) diff --git a/workers/tests/test_legacy_executor_scaffold.py b/workers/tests/test_legacy_executor_scaffold.py index f2d9935f9b..48789c218d 100644 --- a/workers/tests/test_legacy_executor_scaffold.py +++ b/workers/tests/test_legacy_executor_scaffold.py @@ -203,11 +203,12 @@ def test_chunking_config_zero_raises(self): with pytest.raises(ValueError, match="zero chunks"): ChunkingConfig(chunk_size=0, chunk_overlap=0) - def test_file_info(self): + def test_file_info(self, tmp_path): from executor.executors.dto import FileInfo - fi = FileInfo(file_path="/tmp/test.pdf", file_hash="abc123") - assert fi.file_path == "/tmp/test.pdf" + test_path = str(tmp_path / "test.pdf") + fi = FileInfo(file_path=test_path, file_hash="abc123") + assert fi.file_path == test_path def test_instance_identifiers(self): from executor.executors.dto import InstanceIdentifiers diff --git a/workers/tests/test_phase1_log_streaming.py b/workers/tests/test_phase1_log_streaming.py index 903449d75a..95de9b21bc 100644 --- a/workers/tests/test_phase1_log_streaming.py +++ b/workers/tests/test_phase1_log_streaming.py @@ -313,7 +313,7 @@ class TestLegacyExecutorLogPassthrough: @patch("executor.executors.legacy_executor.X2Text") @patch("executor.executors.legacy_executor.ExecutorToolShim") def test_extract_passes_log_info_to_shim( - self, mock_shim_cls, mock_x2text, mock_fs + self, mock_shim_cls, mock_x2text, mock_fs, tmp_path ): from executor.executors.legacy_executor import LegacyExecutor from unstract.sdk1.execution.registry import ExecutorRegistry @@ -337,7 +337,7 @@ def test_extract_passes_log_info_to_shim( log_events_id="session-abc", executor_params={ "x2text_instance_id": "x2t-1", - "file_path": "/tmp/test.pdf", + "file_path": str(tmp_path / "test.pdf"), "platform_api_key": "sk-test", }, ) @@ -357,7 +357,7 @@ def test_extract_passes_log_info_to_shim( @patch("executor.executors.legacy_executor.X2Text") @patch("executor.executors.legacy_executor.ExecutorToolShim") def test_extract_no_log_info_when_absent( - self, mock_shim_cls, mock_x2text, mock_fs + self, mock_shim_cls, mock_x2text, mock_fs, tmp_path ): from executor.executors.legacy_executor import LegacyExecutor from unstract.sdk1.execution.registry import ExecutorRegistry @@ -380,7 +380,7 @@ def test_extract_no_log_info_when_absent( execution_source="tool", executor_params={ "x2text_instance_id": "x2t-1", - "file_path": "/tmp/test.pdf", + "file_path": str(tmp_path / "test.pdf"), "platform_api_key": "sk-test", }, ) diff --git a/workers/tests/test_sanity_phase6g.py b/workers/tests/test_sanity_phase6g.py index 73bb738911..8b175f8eec 100644 --- a/workers/tests/test_sanity_phase6g.py +++ b/workers/tests/test_sanity_phase6g.py @@ -154,7 +154,7 @@ def test_dispatch_sends_to_sps_queue(self): call_kwargs = mock_app.send_task.call_args assert call_kwargs.kwargs.get("queue") == "celery_executor_simple_prompt_studio" - def test_dispatch_sps_index_to_correct_queue(self): + def test_dispatch_sps_index_to_correct_queue(self, tmp_path): mock_app = MagicMock() mock_result = MagicMock() mock_result.get.return_value = ExecutionResult( @@ -168,7 +168,7 @@ def test_dispatch_sps_index_to_correct_queue(self): operation="sps_index", run_id="run-1", execution_source="tool", - executor_params={"output": {}, "file_path": "/tmp/test.pdf"}, + executor_params={"output": {}, "file_path": str(tmp_path / "test.pdf")}, ) result = dispatcher.dispatch(ctx) diff --git a/workers/tests/test_sanity_phase6h.py b/workers/tests/test_sanity_phase6h.py index 1c43b3d78b..c4249fb05a 100644 --- a/workers/tests/test_sanity_phase6h.py +++ b/workers/tests/test_sanity_phase6h.py @@ -198,7 +198,7 @@ def test_legacy_returns_failure_for_agentic_summarize(self): # --------------------------------------------------------------------------- class TestStructureToolAgenticRouting: - def test_structure_tool_dispatches_agentic_extract(self): + def test_structure_tool_dispatches_agentic_extract(self, tmp_path): """Verify _run_agentic_extraction sends executor_name='agentic'.""" from file_processing.structure_tool_task import _run_agentic_extraction @@ -210,8 +210,8 @@ def test_structure_tool_dispatches_agentic_extract(self): result = _run_agentic_extraction( tool_metadata={"name": "test"}, - input_file_path="/tmp/test.pdf", - output_dir_path="/tmp/output", + input_file_path=str(tmp_path / "test.pdf"), + output_dir_path=str(tmp_path / "output"), tool_instance_metadata={}, dispatcher=mock_dispatcher, shim=MagicMock(), diff --git a/workers/tests/test_sanity_phase6j.py b/workers/tests/test_sanity_phase6j.py index 2336b65d05..c52dcdf490 100644 --- a/workers/tests/test_sanity_phase6j.py +++ b/workers/tests/test_sanity_phase6j.py @@ -382,7 +382,7 @@ def test_highlight_plugin_not_installed_no_error(self, _mock_eps): assert ExecutorPluginLoader.get("highlight-data") is None # No error — graceful degradation - def test_mock_highlight_plugin_shared_across_executors(self): + def test_mock_highlight_plugin_shared_across_executors(self, tmp_path): """Multiple executors can use the same highlight plugin instance.""" from executor.executors.plugins.loader import ExecutorPluginLoader @@ -412,8 +412,8 @@ def get_confidence_data(self): assert cls is FakeHighlight # Both legacy and agentic contexts can create instances - legacy_hl = cls(file_path="/tmp/doc.txt", execution_source="ide") - agentic_hl = cls(file_path="/tmp/other.txt", execution_source="tool") + legacy_hl = cls(file_path=str(tmp_path / "doc.txt"), execution_source="ide") + agentic_hl = cls(file_path=str(tmp_path / "other.txt"), execution_source="tool") assert legacy_hl.get_highlight_data() == {"lines": [1, 2, 3]} assert agentic_hl.get_confidence_data() == {"confidence": 0.95} diff --git a/workers/tests/test_usage.py b/workers/tests/test_usage.py index 2fecc76713..fc08ac825b 100644 --- a/workers/tests/test_usage.py +++ b/workers/tests/test_usage.py @@ -223,7 +223,7 @@ class TestMetricsInResult: ) @patch("executor.executors.legacy_executor.ExecutorToolShim") def test_answer_prompt_returns_metrics( - self, mock_shim_cls, mock_get_deps, _mock_idx + self, mock_shim_cls, mock_get_deps, _mock_idx, tmp_path ): """answer_prompt result includes metrics dict.""" from unstract.sdk1.execution.context import ExecutionContext @@ -298,7 +298,7 @@ def test_answer_prompt_returns_metrics( ], "tool_id": "tool-1", "file_hash": "hash123", - "file_path": "/tmp/test.txt", + "file_path": str(tmp_path / "test.txt"), "file_name": "test.txt", "PLATFORM_SERVICE_API_KEY": "test-key", },