diff --git a/README.md b/README.md index e30665d..7606821 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,7 @@ After modifying the `entrypoint.py` as needed, using any dependencies you add in 4. The SDK automatically packages all dependencies when you run `datacustomcode zip` ```zsh +cd my_package datacustomcode scan ./payload/entrypoint.py datacustomcode deploy --path ./payload --name my_custom_script --cpu-size CPU_L ``` @@ -183,7 +184,7 @@ Options: #### `datacustomcode init` -Initialize a new development environment with a template. +Initialize a new development environment with a code package template. Argument: - `DIRECTORY`: Directory to create project in (default: ".") @@ -213,19 +214,19 @@ Options: #### `datacustomcode zip` -Zip a transformation job in preparation to upload to Data Cloud. +Zip a transformation job in preparation to upload to Data Cloud. Make sure to change directory into your code package folder (e.g., `my_package`) before running this command. Options: -- `--path TEXT`: Path to the code directory (default: ".") +- `--path TEXT`: Path to the code directory i.e. the payload folder (default: ".") - `--network TEXT`: docker network (default: "default") #### `datacustomcode deploy` -Deploy a transformation job to Data Cloud. +Deploy a transformation job to Data Cloud. Note that this command takes care of creating a zip file from provided path before deployment. Make sure to change directory into your code package folder (e.g., `my_package`) before running this command. Options: - `--profile TEXT`: Credential profile name (default: "default") -- `--path TEXT`: Path to the code directory (default: ".") +- `--path TEXT`: Path to the code directory i.e. the payload folder (default: ".") - `--name TEXT`: Name of the transformation job [required] - `--version TEXT`: Version of the transformation job (default: "0.0.1") - `--description TEXT`: Description of the transformation job (default: "") diff --git a/src/datacustomcode/deploy.py b/src/datacustomcode/deploy.py index 869803e..cecb6ef 100644 --- a/src/datacustomcode/deploy.py +++ b/src/datacustomcode/deploy.py @@ -91,9 +91,29 @@ def _make_api_call( logger.debug(f"Request params: {kwargs}") response = requests.request(method=method, url=url, headers=headers, **kwargs) - json_response = response.json() if response.status_code >= 400: - logger.debug(f"Error Response: {json_response}") + logger.debug(f"Error Response Status: {response.status_code}") + logger.debug(f"Error Response Headers: {response.headers}") + logger.debug(f"Error Response Text: {response.text[:500]}") + + if not response.text or response.text.strip() == "": + response.raise_for_status() + raise ValueError( + f"Received empty response from {method} {url}. " + f"Status code: {response.status_code}" + ) + + try: + json_response = response.json() + except requests.exceptions.JSONDecodeError as e: + logger.error(f"Failed to parse JSON response. Status: {response.status_code}") + logger.error(f"Response text: {response.text[:500]}") + raise ValueError( + f"Invalid JSON response from {method} {url}. " + f"Status code: {response.status_code}, " + f"Response: {response.text[:200]}" + ) from e + response.raise_for_status() assert isinstance( json_response, dict diff --git a/src/datacustomcode/scan.py b/src/datacustomcode/scan.py index a2ffd9d..c1e4dc1 100644 --- a/src/datacustomcode/scan.py +++ b/src/datacustomcode/scan.py @@ -15,6 +15,8 @@ from __future__ import annotations import ast +import json +import logging import os import sys from typing import ( @@ -29,12 +31,14 @@ from datacustomcode.version import get_version +logger = logging.getLogger(__name__) + DATA_ACCESS_METHODS = ["read_dlo", "read_dmo", "write_to_dlo", "write_to_dmo"] DATA_TRANSFORM_CONFIG_TEMPLATE = { "sdkVersion": get_version(), "entryPoint": "", - "dataspace": "default", + "dataspace": "", "permissions": { "read": {}, "write": {}, @@ -232,6 +236,40 @@ def dc_config_json_from_file(file_path: str) -> dict[str, Any]: config = DATA_TRANSFORM_CONFIG_TEMPLATE.copy() config["entryPoint"] = file_path.rpartition("/")[-1] + file_dir = os.path.dirname(file_path) + config_json_path = os.path.join(file_dir, "config.json") + + if os.path.exists(config_json_path) and os.path.isfile(config_json_path): + try: + with open(config_json_path, "r") as f: + existing_config = json.load(f) + + if "dataspace" in existing_config: + dataspace_value = existing_config["dataspace"] + if not dataspace_value or ( + isinstance(dataspace_value, str) and dataspace_value.strip() == "" + ): + logger.warning( + f"dataspace in {config_json_path} is empty or None. " + f"Updating config file to use dataspace 'default'. " + ) + config["dataspace"] = "default" + else: + config["dataspace"] = dataspace_value + else: + raise ValueError( + f"dataspace must be defined in {config_json_path}. " + f"Please add a 'dataspace' field to the config.json file. " + ) + except json.JSONDecodeError as e: + raise ValueError( + f"Failed to parse JSON from {config_json_path}: {e}" + ) from e + except OSError as e: + raise OSError(f"Failed to read config file {config_json_path}: {e}") from e + else: + config["dataspace"] = "default" + read: dict[str, list[str]] = {} if output.read_dlo: read["dlo"] = list(output.read_dlo) @@ -244,4 +282,5 @@ def dc_config_json_from_file(file_path: str) -> dict[str, Any]: write["dmo"] = list(output.write_to_dmo) config["permissions"] = {"read": read, "write": write} + return config diff --git a/tests/test_scan.py b/tests/test_scan.py index c922877..2703173 100644 --- a/tests/test_scan.py +++ b/tests/test_scan.py @@ -358,6 +358,238 @@ def test_dmo_to_dmo_config(self): finally: os.remove(temp_path) + @patch( + "datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE", + { + "sdkVersion": "1.2.3", + "entryPoint": "", + "dataspace": "", + "permissions": { + "read": {}, + "write": {}, + }, + }, + ) + def test_preserves_existing_dataspace(self): + """Test that existing dataspace value is preserved when config.json exists.""" + import json + + content = textwrap.dedent( + """ + from datacustomcode.client import Client + + client = Client() + df = client.read_dlo("input_dlo") + client.write_to_dlo("output_dlo", df, "overwrite") + """ + ) + temp_path = create_test_script(content) + file_dir = os.path.dirname(temp_path) + config_path = os.path.join(file_dir, "config.json") + + try: + # Create an existing config.json with a custom dataspace + existing_config = { + "sdkVersion": "1.0.0", + "entryPoint": "test.py", + "dataspace": "my_custom_dataspace", + "permissions": { + "read": {"dlo": ["old_dlo"]}, + "write": {"dlo": ["old_output"]}, + }, + } + with open(config_path, "w") as f: + json.dump(existing_config, f) + + # Generate new config - should preserve dataspace + result = dc_config_json_from_file(temp_path) + assert result["dataspace"] == "my_custom_dataspace" + assert result["permissions"]["read"]["dlo"] == ["input_dlo"] + assert result["permissions"]["write"]["dlo"] == ["output_dlo"] + finally: + os.remove(temp_path) + if os.path.exists(config_path): + os.remove(config_path) + + @patch( + "datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE", + { + "sdkVersion": "1.2.3", + "entryPoint": "", + "dataspace": "", + "permissions": { + "read": {}, + "write": {}, + }, + }, + ) + def test_uses_default_for_empty_dataspace(self, caplog): + """Test that empty dataspace value uses default and logs warning.""" + import json + import logging + + content = textwrap.dedent( + """ + from datacustomcode.client import Client + + client = Client() + df = client.read_dlo("input_dlo") + client.write_to_dlo("output_dlo", df, "overwrite") + """ + ) + temp_path = create_test_script(content) + file_dir = os.path.dirname(temp_path) + config_path = os.path.join(file_dir, "config.json") + + try: + # Create an existing config.json with empty dataspace + existing_config = { + "sdkVersion": "1.0.0", + "entryPoint": "test.py", + "dataspace": "", + "permissions": { + "read": {"dlo": ["old_dlo"]}, + "write": {"dlo": ["old_output"]}, + }, + } + with open(config_path, "w") as f: + json.dump(existing_config, f) + + # Should use "default" for empty dataspace (not raise error) + with caplog.at_level(logging.WARNING): + result = dc_config_json_from_file(temp_path) + + assert result["dataspace"] == "default" + assert result["permissions"]["read"]["dlo"] == ["input_dlo"] + assert result["permissions"]["write"]["dlo"] == ["output_dlo"] + + # Verify that a warning was logged + assert len(caplog.records) > 0 + assert any( + "dataspace" in record.message.lower() + and "empty" in record.message.lower() + for record in caplog.records + ) + finally: + os.remove(temp_path) + if os.path.exists(config_path): + os.remove(config_path) + + @patch( + "datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE", + { + "sdkVersion": "1.2.3", + "entryPoint": "", + "dataspace": "", + "permissions": { + "read": {}, + "write": {}, + }, + }, + ) + def test_uses_default_dataspace_when_no_config(self): + """Test missing config.json uses default dataspace.""" + content = textwrap.dedent( + """ + from datacustomcode.client import Client + + client = Client() + df = client.read_dlo("input_dlo") + client.write_to_dlo("output_dlo", df, "overwrite") + """ + ) + temp_path = create_test_script(content) + + try: + # No existing config.json - should use "default" dataspace + result = dc_config_json_from_file(temp_path) + assert result["dataspace"] == "default" + assert result["permissions"]["read"]["dlo"] == ["input_dlo"] + assert result["permissions"]["write"]["dlo"] == ["output_dlo"] + finally: + os.remove(temp_path) + + @patch( + "datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE", + { + "sdkVersion": "1.2.3", + "entryPoint": "", + "dataspace": "", + "permissions": { + "read": {}, + "write": {}, + }, + }, + ) + def test_rejects_missing_dataspace(self): + """Test that config.json missing dataspace field raises ValueError.""" + import json + + content = textwrap.dedent( + """ + from datacustomcode.client import Client + + client = Client() + df = client.read_dlo("input_dlo") + client.write_to_dlo("output_dlo", df, "overwrite") + """ + ) + temp_path = create_test_script(content) + file_dir = os.path.dirname(temp_path) + config_path = os.path.join(file_dir, "config.json") + + try: + # Create an existing config.json without dataspace field + existing_config = { + "sdkVersion": "1.0.0", + "entryPoint": "test.py", + "permissions": { + "read": {"dlo": ["old_dlo"]}, + "write": {"dlo": ["old_output"]}, + }, + } + with open(config_path, "w") as f: + json.dump(existing_config, f) + + # Should raise ValueError when dataspace field is missing + with pytest.raises( + ValueError, match="dataspace must be defined in.*config.json" + ): + dc_config_json_from_file(temp_path) + finally: + os.remove(temp_path) + if os.path.exists(config_path): + os.remove(config_path) + + def test_raises_error_on_invalid_json(self): + """Test that invalid JSON in config.json raises an error.""" + + content = textwrap.dedent( + """ + from datacustomcode.client import Client + + client = Client() + df = client.read_dlo("input_dlo") + client.write_to_dlo("output_dlo", df, "overwrite") + """ + ) + temp_path = create_test_script(content) + file_dir = os.path.dirname(temp_path) + config_path = os.path.join(file_dir, "config.json") + + try: + # Create an invalid JSON file + with open(config_path, "w") as f: + f.write("{ invalid json }") + + # Should raise ValueError for invalid JSON + with pytest.raises(ValueError, match="Failed to parse JSON"): + dc_config_json_from_file(temp_path) + finally: + os.remove(temp_path) + if os.path.exists(config_path): + os.remove(config_path) + class TestDataAccessLayerCalls: """Tests for the DataAccessLayerCalls class directly."""