Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/specify_cli/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,18 @@ def __init__(self, manifest_path: Path):
def _load_yaml(self, path: Path) -> dict:
"""Load YAML file safely."""
try:
with open(path, 'r') as f:
with open(path, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f)
except yaml.YAMLError as e:
raise ValidationError(f"Invalid YAML in {path}: {e}")
except FileNotFoundError:
raise ValidationError(f"Manifest not found: {path}")
except UnicodeDecodeError as e:
raise ValidationError(
f"Manifest is not valid UTF-8: {path} ({e.reason} at byte {e.start})"
)
except OSError as e:
raise ValidationError(f"Could not read manifest {path}: {e}")
if not isinstance(data, dict):
raise ValidationError(
f"Manifest must be a YAML mapping, got {type(data).__name__}: {path}"
Expand Down
17 changes: 15 additions & 2 deletions src/specify_cli/presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,25 @@ def __init__(self, manifest_path: Path):
def _load_yaml(self, path: Path) -> dict:
"""Load YAML file safely."""
try:
with open(path, 'r') as f:
return yaml.safe_load(f) or {}
with open(path, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f)
except yaml.YAMLError as e:
raise PresetValidationError(f"Invalid YAML in {path}: {e}")
except FileNotFoundError:
raise PresetValidationError(f"Manifest not found: {path}")
except UnicodeDecodeError as e:
raise PresetValidationError(
f"Manifest is not valid UTF-8: {path} ({e.reason} at byte {e.start})"
)
except OSError as e:
raise PresetValidationError(f"Could not read manifest {path}: {e}")
if data is None:
return {}
if not isinstance(data, dict):
raise PresetValidationError(
f"Manifest must be a YAML mapping, got {type(data).__name__}: {path}"
)
return data

def _validate(self):
"""Validate manifest structure and required fields."""
Expand Down
29 changes: 29 additions & 0 deletions tests/test_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,35 @@ def test_non_mapping_yaml_raises_validation_error(self, temp_dir):
with pytest.raises(ValidationError, match="YAML mapping"):
ExtensionManifest(manifest_path)

def test_utf8_non_ascii_description_loads(self, temp_dir, valid_manifest_data):
"""Regression for #2325: non-ASCII (UTF-8) description loads on any platform.

On Windows, Python's default text-mode encoding is the locale codepage
(e.g. cp1252/GBK), which raises UnicodeDecodeError on UTF-8 bytes
outside the ASCII range. The loader must open with encoding='utf-8'.
"""
import yaml

valid_manifest_data["extension"]["description"] = "中文测试 — émojis 🚀"
manifest_path = temp_dir / "extension.yml"
# Write UTF-8 bytes explicitly so the test exercises the read path,
# not the (locale-dependent) write path.
manifest_path.write_bytes(
yaml.safe_dump(valid_manifest_data, allow_unicode=True).encode("utf-8")
)

manifest = ExtensionManifest(manifest_path)
assert manifest.description == "中文测试 — émojis 🚀"

def test_invalid_utf8_bytes_raises_validation_error(self, temp_dir):
"""Negative case: file containing invalid UTF-8 bytes raises ValidationError, not raw UnicodeDecodeError."""
manifest_path = temp_dir / "extension.yml"
# 0xFF/0xFE are not valid UTF-8 lead bytes.
manifest_path.write_bytes(b"\xff\xfe not valid utf-8 \xff\n")

with pytest.raises(ValidationError, match="not valid UTF-8"):
ExtensionManifest(manifest_path)
Comment thread
mnriem marked this conversation as resolved.

def test_invalid_extension_id(self, temp_dir, valid_manifest_data):
"""Test manifest with invalid extension ID format."""
import yaml
Expand Down
32 changes: 32 additions & 0 deletions tests/test_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,38 @@ def test_invalid_yaml(self, temp_dir):
with pytest.raises(PresetValidationError, match="Invalid YAML"):
PresetManifest(bad_file)

def test_utf8_non_ascii_description_loads(self, temp_dir, valid_pack_data):
"""Regression for #2325: non-ASCII (UTF-8) description loads on any platform.

On Windows, Python's default text-mode encoding is the locale codepage
(e.g. cp1252/GBK), which raises UnicodeDecodeError on UTF-8 bytes
outside the ASCII range. The loader must open with encoding='utf-8'.
"""
valid_pack_data["preset"]["description"] = "中文测试 — émojis 🚀"
manifest_path = temp_dir / "preset.yml"
manifest_path.write_bytes(
yaml.safe_dump(valid_pack_data, allow_unicode=True).encode("utf-8")
)

manifest = PresetManifest(manifest_path)
assert manifest.description == "中文测试 — émojis 🚀"

def test_invalid_utf8_bytes_raises_validation_error(self, temp_dir):
"""Negative case: file containing invalid UTF-8 bytes raises PresetValidationError, not raw UnicodeDecodeError."""
manifest_path = temp_dir / "preset.yml"
manifest_path.write_bytes(b"\xff\xfe not valid utf-8 \xff\n")

with pytest.raises(PresetValidationError, match="not valid UTF-8"):
PresetManifest(manifest_path)
Comment thread
mnriem marked this conversation as resolved.

def test_non_mapping_yaml_raises_validation_error(self, temp_dir):
"""Manifest whose YAML root is a scalar or list raises PresetValidationError, not TypeError."""
manifest_path = temp_dir / "preset.yml"
for bad_content in ("42\n", "[1, 2]\n"):
manifest_path.write_text(bad_content, encoding="utf-8")
with pytest.raises(PresetValidationError, match="YAML mapping"):
PresetManifest(manifest_path)

def test_missing_schema_version(self, temp_dir, valid_pack_data):
"""Test missing schema_version field."""
del valid_pack_data["schema_version"]
Expand Down
Loading