Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ After modifying the `entrypoint.py` as needed, using any dependencies you add in
4. The SDK automatically packages all dependencies when you run `datacustomcode zip`

```zsh
cd my_package
datacustomcode scan ./payload/entrypoint.py
datacustomcode deploy --path ./payload --name my_custom_script --cpu-size CPU_L
```
Expand Down Expand Up @@ -183,7 +184,7 @@ Options:


#### `datacustomcode init`
Initialize a new development environment with a template.
Initialize a new development environment with a code package template.

Argument:
- `DIRECTORY`: Directory to create project in (default: ".")
Expand Down Expand Up @@ -213,19 +214,19 @@ Options:


#### `datacustomcode zip`
Zip a transformation job in preparation to upload to Data Cloud.
Zip a transformation job in preparation to upload to Data Cloud. Make sure to change directory into your code package folder (e.g., `my_package`) before running this command.

Options:
- `--path TEXT`: Path to the code directory (default: ".")
- `--path TEXT`: Path to the code directory i.e. the payload folder (default: ".")
- `--network TEXT`: docker network (default: "default")


#### `datacustomcode deploy`
Deploy a transformation job to Data Cloud.
Deploy a transformation job to Data Cloud. Note that this command takes care of creating a zip file from provided path before deployment. Make sure to change directory into your code package folder (e.g., `my_package`) before running this command.

Options:
- `--profile TEXT`: Credential profile name (default: "default")
- `--path TEXT`: Path to the code directory (default: ".")
- `--path TEXT`: Path to the code directory i.e. the payload folder (default: ".")
- `--name TEXT`: Name of the transformation job [required]
- `--version TEXT`: Version of the transformation job (default: "0.0.1")
- `--description TEXT`: Description of the transformation job (default: "")
Expand Down
24 changes: 22 additions & 2 deletions src/datacustomcode/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,29 @@ def _make_api_call(
logger.debug(f"Request params: {kwargs}")

response = requests.request(method=method, url=url, headers=headers, **kwargs)
json_response = response.json()
if response.status_code >= 400:
logger.debug(f"Error Response: {json_response}")
logger.debug(f"Error Response Status: {response.status_code}")
logger.debug(f"Error Response Headers: {response.headers}")
logger.debug(f"Error Response Text: {response.text[:500]}")

if not response.text or response.text.strip() == "":
response.raise_for_status()
raise ValueError(
f"Received empty response from {method} {url}. "
f"Status code: {response.status_code}"
)

try:
json_response = response.json()
except requests.exceptions.JSONDecodeError as e:
logger.error(f"Failed to parse JSON response. Status: {response.status_code}")
logger.error(f"Response text: {response.text[:500]}")
raise ValueError(
f"Invalid JSON response from {method} {url}. "
f"Status code: {response.status_code}, "
f"Response: {response.text[:200]}"
) from e

response.raise_for_status()
assert isinstance(
json_response, dict
Expand Down
41 changes: 40 additions & 1 deletion src/datacustomcode/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from __future__ import annotations

import ast
import json
import logging
import os
import sys
from typing import (
Expand All @@ -29,12 +31,14 @@

from datacustomcode.version import get_version

logger = logging.getLogger(__name__)

DATA_ACCESS_METHODS = ["read_dlo", "read_dmo", "write_to_dlo", "write_to_dmo"]

DATA_TRANSFORM_CONFIG_TEMPLATE = {
"sdkVersion": get_version(),
"entryPoint": "",
"dataspace": "default",
"dataspace": "",
"permissions": {
"read": {},
"write": {},
Expand Down Expand Up @@ -232,6 +236,40 @@ def dc_config_json_from_file(file_path: str) -> dict[str, Any]:
config = DATA_TRANSFORM_CONFIG_TEMPLATE.copy()
config["entryPoint"] = file_path.rpartition("/")[-1]

file_dir = os.path.dirname(file_path)
config_json_path = os.path.join(file_dir, "config.json")

if os.path.exists(config_json_path) and os.path.isfile(config_json_path):
try:
with open(config_json_path, "r") as f:
existing_config = json.load(f)

if "dataspace" in existing_config:
dataspace_value = existing_config["dataspace"]
if not dataspace_value or (
isinstance(dataspace_value, str) and dataspace_value.strip() == ""
):
logger.warning(
f"dataspace in {config_json_path} is empty or None. "
f"Updating config file to use dataspace 'default'. "
)
config["dataspace"] = "default"
else:
config["dataspace"] = dataspace_value
else:
raise ValueError(
f"dataspace must be defined in {config_json_path}. "
f"Please add a 'dataspace' field to the config.json file. "
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The diff between missing, null, or empty string is subtle in my mind. It might be simpler to just default to "default" value for all of those cases, instead of raising an exception specifically if it's missing?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as per our internal follow-up discussion, I'm planning to go with throwing an error when dataspace is excluded by customer explicitly for now. I plan to modify this behavior in case there is feedback indicating some need to handle this differently.

except json.JSONDecodeError as e:
raise ValueError(
f"Failed to parse JSON from {config_json_path}: {e}"
) from e
except OSError as e:
raise OSError(f"Failed to read config file {config_json_path}: {e}") from e
else:
config["dataspace"] = "default"

read: dict[str, list[str]] = {}
if output.read_dlo:
read["dlo"] = list(output.read_dlo)
Expand All @@ -244,4 +282,5 @@ def dc_config_json_from_file(file_path: str) -> dict[str, Any]:
write["dmo"] = list(output.write_to_dmo)

config["permissions"] = {"read": read, "write": write}

return config
232 changes: 232 additions & 0 deletions tests/test_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,238 @@ def test_dmo_to_dmo_config(self):
finally:
os.remove(temp_path)

@patch(
"datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE",
{
"sdkVersion": "1.2.3",
"entryPoint": "",
"dataspace": "",
"permissions": {
"read": {},
"write": {},
},
},
)
def test_preserves_existing_dataspace(self):
"""Test that existing dataspace value is preserved when config.json exists."""
import json

content = textwrap.dedent(
"""
from datacustomcode.client import Client

client = Client()
df = client.read_dlo("input_dlo")
client.write_to_dlo("output_dlo", df, "overwrite")
"""
)
temp_path = create_test_script(content)
file_dir = os.path.dirname(temp_path)
config_path = os.path.join(file_dir, "config.json")

try:
# Create an existing config.json with a custom dataspace
existing_config = {
"sdkVersion": "1.0.0",
"entryPoint": "test.py",
"dataspace": "my_custom_dataspace",
"permissions": {
"read": {"dlo": ["old_dlo"]},
"write": {"dlo": ["old_output"]},
},
}
with open(config_path, "w") as f:
json.dump(existing_config, f)

# Generate new config - should preserve dataspace
result = dc_config_json_from_file(temp_path)
assert result["dataspace"] == "my_custom_dataspace"
assert result["permissions"]["read"]["dlo"] == ["input_dlo"]
assert result["permissions"]["write"]["dlo"] == ["output_dlo"]
finally:
os.remove(temp_path)
if os.path.exists(config_path):
os.remove(config_path)

@patch(
"datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE",
{
"sdkVersion": "1.2.3",
"entryPoint": "",
"dataspace": "",
"permissions": {
"read": {},
"write": {},
},
},
)
def test_uses_default_for_empty_dataspace(self, caplog):
"""Test that empty dataspace value uses default and logs warning."""
import json
import logging

content = textwrap.dedent(
"""
from datacustomcode.client import Client

client = Client()
df = client.read_dlo("input_dlo")
client.write_to_dlo("output_dlo", df, "overwrite")
"""
)
temp_path = create_test_script(content)
file_dir = os.path.dirname(temp_path)
config_path = os.path.join(file_dir, "config.json")

try:
# Create an existing config.json with empty dataspace
existing_config = {
"sdkVersion": "1.0.0",
"entryPoint": "test.py",
"dataspace": "",
"permissions": {
"read": {"dlo": ["old_dlo"]},
"write": {"dlo": ["old_output"]},
},
}
with open(config_path, "w") as f:
json.dump(existing_config, f)

# Should use "default" for empty dataspace (not raise error)
with caplog.at_level(logging.WARNING):
result = dc_config_json_from_file(temp_path)

assert result["dataspace"] == "default"
assert result["permissions"]["read"]["dlo"] == ["input_dlo"]
assert result["permissions"]["write"]["dlo"] == ["output_dlo"]

# Verify that a warning was logged
assert len(caplog.records) > 0
assert any(
"dataspace" in record.message.lower()
and "empty" in record.message.lower()
for record in caplog.records
)
finally:
os.remove(temp_path)
if os.path.exists(config_path):
os.remove(config_path)

@patch(
"datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE",
{
"sdkVersion": "1.2.3",
"entryPoint": "",
"dataspace": "",
"permissions": {
"read": {},
"write": {},
},
},
)
def test_uses_default_dataspace_when_no_config(self):
"""Test missing config.json uses default dataspace."""
content = textwrap.dedent(
"""
from datacustomcode.client import Client

client = Client()
df = client.read_dlo("input_dlo")
client.write_to_dlo("output_dlo", df, "overwrite")
"""
)
temp_path = create_test_script(content)

try:
# No existing config.json - should use "default" dataspace
result = dc_config_json_from_file(temp_path)
assert result["dataspace"] == "default"
assert result["permissions"]["read"]["dlo"] == ["input_dlo"]
assert result["permissions"]["write"]["dlo"] == ["output_dlo"]
finally:
os.remove(temp_path)

@patch(
"datacustomcode.scan.DATA_TRANSFORM_CONFIG_TEMPLATE",
{
"sdkVersion": "1.2.3",
"entryPoint": "",
"dataspace": "",
"permissions": {
"read": {},
"write": {},
},
},
)
def test_rejects_missing_dataspace(self):
"""Test that config.json missing dataspace field raises ValueError."""
import json

content = textwrap.dedent(
"""
from datacustomcode.client import Client

client = Client()
df = client.read_dlo("input_dlo")
client.write_to_dlo("output_dlo", df, "overwrite")
"""
)
temp_path = create_test_script(content)
file_dir = os.path.dirname(temp_path)
config_path = os.path.join(file_dir, "config.json")

try:
# Create an existing config.json without dataspace field
existing_config = {
"sdkVersion": "1.0.0",
"entryPoint": "test.py",
"permissions": {
"read": {"dlo": ["old_dlo"]},
"write": {"dlo": ["old_output"]},
},
}
with open(config_path, "w") as f:
json.dump(existing_config, f)

# Should raise ValueError when dataspace field is missing
with pytest.raises(
ValueError, match="dataspace must be defined in.*config.json"
):
dc_config_json_from_file(temp_path)
finally:
os.remove(temp_path)
if os.path.exists(config_path):
os.remove(config_path)

def test_raises_error_on_invalid_json(self):
"""Test that invalid JSON in config.json raises an error."""

content = textwrap.dedent(
"""
from datacustomcode.client import Client

client = Client()
df = client.read_dlo("input_dlo")
client.write_to_dlo("output_dlo", df, "overwrite")
"""
)
temp_path = create_test_script(content)
file_dir = os.path.dirname(temp_path)
config_path = os.path.join(file_dir, "config.json")

try:
# Create an invalid JSON file
with open(config_path, "w") as f:
f.write("{ invalid json }")

# Should raise ValueError for invalid JSON
with pytest.raises(ValueError, match="Failed to parse JSON"):
dc_config_json_from_file(temp_path)
finally:
os.remove(temp_path)
if os.path.exists(config_path):
os.remove(config_path)


class TestDataAccessLayerCalls:
"""Tests for the DataAccessLayerCalls class directly."""
Expand Down