Skip to content

Commit

Permalink
Merge pull request #107 from iolanta-tech/html-dedent-needed
Browse files Browse the repository at this point in the history
html dedent needed
  • Loading branch information
anatoly-scherbakov committed Jun 16, 2024
2 parents c82b767 + 876f7c0 commit 89456de
Show file tree
Hide file tree
Showing 15 changed files with 270 additions and 38 deletions.
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[submodule "specifications/yaml-ld"]
path = specifications/yaml-ld
url = git@github.com:json-ld/yaml-ld.git
branch = 144-no-jldexpandtest-tests-with-a-yaml-ld-context
branch = one-document-from-stream-test-output-incorrect
[submodule "specifications/json-ld-api"]
path = specifications/json-ld-api
url = git@github.com:w3c/json-ld-api.git
Expand Down
14 changes: 14 additions & 0 deletions ldtest/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
from yaml_ld.to_rdf import ToRDFOptions


SPECIFICATIONS_ROOT = Path(__file__).parent.parent / "specifications"


@dataclass
class TestCase:
"""JSON-LD Test Case."""
Expand All @@ -22,6 +25,17 @@ class TestCase:
extract_all_scripts: bool = False
base: str | None = None

@property
def specification(self) -> str:
return self.input_path.relative_to(SPECIFICATIONS_ROOT).parts[0]

@property
def input_path(self):
if isinstance(self.input, Path):
return self.input

return Path(self.input.path)

@property
def raw_document(self) -> bytes:
"""Read the raw input document contents."""
Expand Down
2 changes: 1 addition & 1 deletion specifications/yaml-ld
4 changes: 0 additions & 4 deletions tests/test_expand.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,6 @@ def test_empty_value():
assert not yaml_ld.expand(document)


@pytest.mark.xfail(
raises=JsonLdError,
reason='`pyld` does not handle `file://` paths.',
)
def test_local_context():
document = specifications_root / 'json-ld-api/tests/expand/0127-in.jsonld'
yaml_ld.expand(document)
Expand Down
20 changes: 14 additions & 6 deletions tests/test_specification.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import functools
import inspect
import json
import operator
Expand All @@ -12,7 +13,7 @@
from documented import Documented, DocumentedError
from pydantic import ValidationError
from pyld import jsonld
from pyld.jsonld import load_document, _is_string
from pyld.jsonld import load_document, _is_string, requests_document_loader
from rdflib import Graph, Namespace
from rdflib_pyld_compat.convert import ( # noqa: WPS450
_rdflib_graph_from_pyld_dataset,
Expand All @@ -23,6 +24,7 @@
from ldtest.models import TestCase
from tests.common import load_tests
from tests.errors import FailureToFail
from yaml_ld.document_loaders.default import DEFAULT_DOCUMENT_LOADER
from yaml_ld.errors import YAMLLDError
from lambdas import _

Expand Down Expand Up @@ -192,7 +194,10 @@ def callable_path(self):
module_name = self.callable.__module__

# Get the object name
obj_name = self.callable.__name__
try:
obj_name = self.callable.__name__
except AttributeError:
obj_name = str(self.callable)

# Construct the import path
if module_name == "__main__":
Expand Down Expand Up @@ -236,7 +241,7 @@ def _test(test_case: TestCase, parse: Callable, expand: Callable) -> None:
**test_case.kwargs,
)

assert actual == expected, test_case.input
assert actual == expected, (test_case.input, test_case.result)

case _:
raise ValueError(f'What to do with this test? {test_case}')
Expand All @@ -252,11 +257,11 @@ def test_expand(
try:
test_against_ld_library(
test_case=test_case,
parse=yaml_ld.parse,
parse=lambda input_: yaml_ld.load_document(input_)['document'],
expand=yaml_ld.expand,
)
except (AssertionError, FailureToFail, YAMLLDError):
if test_case.input.suffix in {'.yamlld', '.yaml'}:
if test_case.specification == 'yaml-ld':
# The source document is in YAML-LD format, and we are failing on it
raise

Expand All @@ -266,7 +271,10 @@ def test_expand(
try:
test_against_ld_library(
test_case=test_case,
parse=_load_json_ld,
parse=lambda input_: jsonld.load_document(
input_,
options={'documentLoader': DEFAULT_DOCUMENT_LOADER},
)['document'],
expand=jsonld.expand,
)
except (AssertionError, FailureToFail):
Expand Down
3 changes: 2 additions & 1 deletion yaml_ld/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from yaml_ld.expand import expand
from yaml_ld.compact import compact
from yaml_ld.parse import parse # noqa: WPS347
from yaml_ld.load_document import load_document # noqa: WPS347
from yaml_ld.to_rdf import to_rdf # noqa: WPS347
from yaml_ld.from_rdf import from_rdf # noqa: WPS347
from yaml_ld.flatten import flatten # noqa: WPS347
from yaml_ld.frame import frame

__all__ = ['parse', 'expand', 'compact', 'to_rdf', 'from_rdf', 'flatten', 'frame'] # noqa: WPS410
__all__ = ['parse', 'expand', 'compact', 'to_rdf', 'from_rdf', 'flatten', 'frame', 'load_document'] # noqa: WPS410
6 changes: 4 additions & 2 deletions yaml_ld/document_loaders/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from abc import ABC
from abc import ABC, abstractmethod
from pathlib import Path
from typing import TypedDict, Any

PyLDResponse = TypedDict(
Expand All @@ -12,5 +13,6 @@


class DocumentLoader(ABC):
def __call__(self, source: str, options: dict[str, Any]) -> PyLDResponse:
@abstractmethod
def __call__(self, source: str | Path, options: dict[str, Any]) -> PyLDResponse:
raise NotImplementedError()
54 changes: 54 additions & 0 deletions yaml_ld/document_loaders/choice_by_scheme.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Iterable

import funcy
from documented import DocumentedError
from pydantic import validate_call
from urlpath import URL

from yaml_ld.document_loaders.base import DocumentLoader, PyLDResponse


@dataclass
class ProtocolNotFound(DocumentedError):
"""
Cannot choose the loader by URL protocol.
* URL: `{self.url}`
* Scheme: `{self.formatted_scheme}`
* Available schemes: {self.formatted_schemes}
"""

url: URL
schemes: Iterable[str]

@property
def formatted_scheme(self):
return self.url.scheme or '(empty string)'

@property
@funcy.joining(', ')
def formatted_schemes(self):
return self.schemes


class ChoiceBySchemeDocumentLoader(DocumentLoader):
loaders: dict[str, DocumentLoader]

def __init__(self, **loaders: DocumentLoader) -> None:
self.loaders = loaders

@validate_call(config=dict(arbitrary_types_allowed=True))
def __call__(self, source: str | Path, options: dict[str, Any]) -> PyLDResponse:
url = URL(source)

try:
loader = self.loaders[url.scheme or 'file']
except KeyError:
raise ProtocolNotFound(
schemes=self.loaders.keys(),
url=url,
)

return loader(source, options)
14 changes: 14 additions & 0 deletions yaml_ld/document_loaders/default.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import functools

from yaml_ld.document_loaders.choice_by_scheme import (
ChoiceBySchemeDocumentLoader,
)
from yaml_ld.document_loaders.http import HTTPDocumentLoader
from yaml_ld.document_loaders.local_file import LocalFileDocumentLoader


DEFAULT_DOCUMENT_LOADER = ChoiceBySchemeDocumentLoader(
file=LocalFileDocumentLoader(),
http=HTTPDocumentLoader(),
https=HTTPDocumentLoader(),
)
92 changes: 92 additions & 0 deletions yaml_ld/document_loaders/http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from pathlib import Path
from typing import Any

import more_itertools
import yaml
from urlpath import URL
from yaml.composer import ComposerError
from yaml.constructor import ConstructorError
from yaml.parser import ParserError

from yaml_ld.document_loaders.base import DocumentLoader, PyLDResponse
from yaml_ld.load_html import load_html
from yaml_ld.loader import YAMLLDLoader


class HTTPDocumentLoader(DocumentLoader):
def _parse_script_content(self, content: str):
return list(
yaml.load_all(
content,
Loader=YAMLLDLoader,
),
)

def __call__(self, source: str | Path, options: dict[str, Any]) -> PyLDResponse:
from yaml_ld.errors import LoadingDocumentFailed, DocumentIsScalar

url = URL(source)

if url.suffix in {'.yaml', '.yml', '.yamlld', '.json', '.jsonld'}:
content = url.get().text

from yaml_ld.errors import MappingKeyError

from yaml.scanner import ScannerError
try:
yaml_document = more_itertools.first(
yaml.load_all( # noqa: S506
stream=content,
Loader=YAMLLDLoader,
),
)
except ConstructorError as err:
if err.problem == 'found unhashable key':
raise MappingKeyError() from err

raise

except ScannerError as err:
raise LoadingDocumentFailed(path=url) from err

except ComposerError as err:
from yaml_ld.errors import UndefinedAliasFound
raise UndefinedAliasFound() from err

except ParserError as err:
from yaml_ld.errors import InvalidScriptElement
raise InvalidScriptElement() from err

if not isinstance(yaml_document, (dict, list)):
raise DocumentIsScalar(yaml_document)

return {
'document': yaml_document,
'documentUrl': source,
'contextUrl': None,
'contentType': 'application/ld+yaml',
}

if url.suffix in {'.html', '.xhtml'}:
content = url.get().text

loaded_html = load_html(
input=content.read(),
url=source,
profile=None,
options=options,
content_type='application/ld+yaml',
parse_script_content=self._parse_script_content,
)

if isinstance(loaded_html, str):
raise DocumentIsScalar(loaded_html)

return {
'document': loaded_html,
'documentUrl': source,
'contextUrl': None,
'contentType': 'application/ld+yaml',
}

raise LoadingDocumentFailed(path=url)
25 changes: 17 additions & 8 deletions yaml_ld/document_loaders/local_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path
from typing import Any

import more_itertools
import yaml
from urlpath import URL
from yaml.composer import ComposerError
Expand All @@ -14,7 +15,15 @@


class LocalFileDocumentLoader(DocumentLoader):
def __call__(self, source: str, options: dict[str, Any]) -> PyLDResponse:
def _parse_script_content(self, content: str):
return list(
yaml.load_all(
content,
Loader=YAMLLDLoader,
),
)

def __call__(self, source: str | Path, options: dict[str, Any]) -> PyLDResponse:
from yaml_ld.errors import LoadingDocumentFailed, DocumentIsScalar

path = Path(URL(source).path)
Expand All @@ -25,9 +34,11 @@ def __call__(self, source: str, options: dict[str, Any]) -> PyLDResponse:

from yaml.scanner import ScannerError
try:
yaml_document = yaml.load( # noqa: S506
stream=f.read(),
Loader=YAMLLDLoader,
yaml_document = more_itertools.first(
yaml.load_all( # noqa: S506
stream=f.read(),
Loader=YAMLLDLoader,
),
)
except ConstructorError as err:
if err.problem == 'found unhashable key':
Expand Down Expand Up @@ -63,10 +74,8 @@ def __call__(self, source: str, options: dict[str, Any]) -> PyLDResponse:
url=source,
profile=None,
options=options,
parse_script_content=functools.partial(
yaml.load,
Loader=YAMLLDLoader,
),
content_type='application/ld+yaml',
parse_script_content=self._parse_script_content,
)

if isinstance(loaded_html, str):
Expand Down
Loading

0 comments on commit 89456de

Please sign in to comment.