-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #107 from iolanta-tech/html-dedent-needed
html dedent needed
- Loading branch information
Showing
15 changed files
with
270 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Submodule yaml-ld
updated
2 files
+5 −5 | tests/cases/html/stream.yamlld | |
+3 −5 | tests/cases/streams/one-document-out.yamlld |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,10 @@ | ||
from yaml_ld.expand import expand | ||
from yaml_ld.compact import compact | ||
from yaml_ld.parse import parse # noqa: WPS347 | ||
from yaml_ld.load_document import load_document # noqa: WPS347 | ||
from yaml_ld.to_rdf import to_rdf # noqa: WPS347 | ||
from yaml_ld.from_rdf import from_rdf # noqa: WPS347 | ||
from yaml_ld.flatten import flatten # noqa: WPS347 | ||
from yaml_ld.frame import frame | ||
|
||
__all__ = ['parse', 'expand', 'compact', 'to_rdf', 'from_rdf', 'flatten', 'frame'] # noqa: WPS410 | ||
__all__ = ['parse', 'expand', 'compact', 'to_rdf', 'from_rdf', 'flatten', 'frame', 'load_document'] # noqa: WPS410 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from dataclasses import dataclass | ||
from pathlib import Path | ||
from typing import Any, Iterable | ||
|
||
import funcy | ||
from documented import DocumentedError | ||
from pydantic import validate_call | ||
from urlpath import URL | ||
|
||
from yaml_ld.document_loaders.base import DocumentLoader, PyLDResponse | ||
|
||
|
||
@dataclass | ||
class ProtocolNotFound(DocumentedError): | ||
""" | ||
Cannot choose the loader by URL protocol. | ||
* URL: `{self.url}` | ||
* Scheme: `{self.formatted_scheme}` | ||
* Available schemes: {self.formatted_schemes} | ||
""" | ||
|
||
url: URL | ||
schemes: Iterable[str] | ||
|
||
@property | ||
def formatted_scheme(self): | ||
return self.url.scheme or '(empty string)' | ||
|
||
@property | ||
@funcy.joining(', ') | ||
def formatted_schemes(self): | ||
return self.schemes | ||
|
||
|
||
class ChoiceBySchemeDocumentLoader(DocumentLoader): | ||
loaders: dict[str, DocumentLoader] | ||
|
||
def __init__(self, **loaders: DocumentLoader) -> None: | ||
self.loaders = loaders | ||
|
||
@validate_call(config=dict(arbitrary_types_allowed=True)) | ||
def __call__(self, source: str | Path, options: dict[str, Any]) -> PyLDResponse: | ||
url = URL(source) | ||
|
||
try: | ||
loader = self.loaders[url.scheme or 'file'] | ||
except KeyError: | ||
raise ProtocolNotFound( | ||
schemes=self.loaders.keys(), | ||
url=url, | ||
) | ||
|
||
return loader(source, options) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
import functools | ||
|
||
from yaml_ld.document_loaders.choice_by_scheme import ( | ||
ChoiceBySchemeDocumentLoader, | ||
) | ||
from yaml_ld.document_loaders.http import HTTPDocumentLoader | ||
from yaml_ld.document_loaders.local_file import LocalFileDocumentLoader | ||
|
||
|
||
DEFAULT_DOCUMENT_LOADER = ChoiceBySchemeDocumentLoader( | ||
file=LocalFileDocumentLoader(), | ||
http=HTTPDocumentLoader(), | ||
https=HTTPDocumentLoader(), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
from pathlib import Path | ||
from typing import Any | ||
|
||
import more_itertools | ||
import yaml | ||
from urlpath import URL | ||
from yaml.composer import ComposerError | ||
from yaml.constructor import ConstructorError | ||
from yaml.parser import ParserError | ||
|
||
from yaml_ld.document_loaders.base import DocumentLoader, PyLDResponse | ||
from yaml_ld.load_html import load_html | ||
from yaml_ld.loader import YAMLLDLoader | ||
|
||
|
||
class HTTPDocumentLoader(DocumentLoader): | ||
def _parse_script_content(self, content: str): | ||
return list( | ||
yaml.load_all( | ||
content, | ||
Loader=YAMLLDLoader, | ||
), | ||
) | ||
|
||
def __call__(self, source: str | Path, options: dict[str, Any]) -> PyLDResponse: | ||
from yaml_ld.errors import LoadingDocumentFailed, DocumentIsScalar | ||
|
||
url = URL(source) | ||
|
||
if url.suffix in {'.yaml', '.yml', '.yamlld', '.json', '.jsonld'}: | ||
content = url.get().text | ||
|
||
from yaml_ld.errors import MappingKeyError | ||
|
||
from yaml.scanner import ScannerError | ||
try: | ||
yaml_document = more_itertools.first( | ||
yaml.load_all( # noqa: S506 | ||
stream=content, | ||
Loader=YAMLLDLoader, | ||
), | ||
) | ||
except ConstructorError as err: | ||
if err.problem == 'found unhashable key': | ||
raise MappingKeyError() from err | ||
|
||
raise | ||
|
||
except ScannerError as err: | ||
raise LoadingDocumentFailed(path=url) from err | ||
|
||
except ComposerError as err: | ||
from yaml_ld.errors import UndefinedAliasFound | ||
raise UndefinedAliasFound() from err | ||
|
||
except ParserError as err: | ||
from yaml_ld.errors import InvalidScriptElement | ||
raise InvalidScriptElement() from err | ||
|
||
if not isinstance(yaml_document, (dict, list)): | ||
raise DocumentIsScalar(yaml_document) | ||
|
||
return { | ||
'document': yaml_document, | ||
'documentUrl': source, | ||
'contextUrl': None, | ||
'contentType': 'application/ld+yaml', | ||
} | ||
|
||
if url.suffix in {'.html', '.xhtml'}: | ||
content = url.get().text | ||
|
||
loaded_html = load_html( | ||
input=content.read(), | ||
url=source, | ||
profile=None, | ||
options=options, | ||
content_type='application/ld+yaml', | ||
parse_script_content=self._parse_script_content, | ||
) | ||
|
||
if isinstance(loaded_html, str): | ||
raise DocumentIsScalar(loaded_html) | ||
|
||
return { | ||
'document': loaded_html, | ||
'documentUrl': source, | ||
'contextUrl': None, | ||
'contentType': 'application/ld+yaml', | ||
} | ||
|
||
raise LoadingDocumentFailed(path=url) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.