Skip to content

Commit

Permalink
Add content attributes JSON schema validation
Browse files Browse the repository at this point in the history
There is now a JSON schema validation available from Snippy
Plugins module. This is used to verify that the parsed tldr
man page is corresponding to the requirement for Snippy tool
content.

Signed-off-by: Heikki Laaksonen <laaksonen.heikki.j@gmail.com>
  • Loading branch information
heilaaks committed Jun 23, 2019
1 parent c84b734 commit d4e5ec9
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 76 deletions.
135 changes: 81 additions & 54 deletions snippy_tldr/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,24 +34,27 @@
try:
from snippy.plugins import Const
from snippy.plugins import Parser
from snippy.plugins import Schema

except ImportError:
from tests.conftest import Const
from tests.conftest import Parser
from tests.conftest import Schema


def snippy_import_hook(logger, uri, validator, parser):
"""Import notes for Snippy.
def snippy_import_hook(logger, uri):
"""Import content for Snippy.
This is an import hook that returns an iterator object. The iterator must
be an iterable class that implements next() and len() methods. The iterator
must return JSON structures that pass the ``validate.note()``.
This is an import hook that must return an iterator object. The iterator
must be iterable class that implements ``next`` and ``len`` methods. The
returned JSON must pass the ``snippy.plugins.Schema.validate()`` method.
If suitable, the ``parse`` object may be used to parse the source data to
JSON note for Snippy.
The ``snippy.plugins.Parser`` class may be used to parse the source data
to a JSON content for Snippy.
The ``tldr man pages`` project seems use term ``page`` loosely in different
contexts. In order to try to provide a bit more maintainable code, a more
detailed terms have been invented for this project::
The ``tldr man pages`` project seems to use the term ``page`` loosely in
different contexts. In order to try to provide a bit more maintainable
code, a more detailed terms have been invented for this project::
# Tldr man pages hierarchical layers.
#
Expand All @@ -69,52 +72,77 @@ def snippy_import_hook(logger, uri, validator, parser):
+ alpine.md
apk.md
================== ======================================================================
Term Decscription
================== ======================================================================
*page* | A single tldr page like ``common``, ``linux`` or ``windows``.
============= ======================================================================
Term Decscription
============= ======================================================================
*page* | A single tldr page like ``common``, ``linux`` or ``windows``.
*pages* | All tldr pages under one translation.
*pages* | All tldr pages under one translation.
*translation* | A tldr man page page translation like ``pages.it`` or ``pages.zh``.
*translation* | A tldr man page page translation like ``pages.it`` or ``pages.zh``.
*tldr file* | A single tldr man page Markdown file. The term ``tldr man page`` is not used in order to
| avoid confusion with term ``page``.
*tldr file* | A single tldr man page Markdown file. The term ``tldr man page`` is not used in order to
| avoid confusion with term ``page``.
*tldr files* | All tldr man page Markdown files under one page.
================== ======================================================================
*tldr files* | All tldr man page Markdown files under one page.
============= ======================================================================
Args
validate (obj): A ``SnippyNotesValidator`` object to validate JSON notes.
parse (obj): A ``SnippyNotesParser`` to parse notes attributes.
uri (str): The value of ``-f|--file`` command line option from Snippy tool.
logger (obj): Logger to be used with the plugin.
uri (str): URI or path where the data is imported.
Returns:
obj: Iterator object to store JSON notes.
obj: Iterator object that stores the JSON content from the plugin.
Examples
--------
>>> def __init__(self, validator, parser, uri):
>>> self.validate = Validator
>>> self.parse = parser
>>> self.uri = uri
>>> self._parse_notes()
>>> from snippy.plugins import Const
>>> from snippy.plugins import Parser
>>> from snippy.plugins import Schema
>>>
>>> class SnippyTldr(object):
>>>
>>> def __init__(self, logger, uri):
>>> self._logger = logger
>>> self._uri = uri
>>> self._schema = Schema()
>>> self._content = []
>>> self._i = 0
>>>
>>> self._read_tldr_files()
>>>
>>> def __len__(self):
>>> return len(self._content)
>>>
>>> def __iter__(self):
>>> return self
>>>
>>> def _parse_notes(self):
>>> filename = 'notes-list.md'
>>> with open(filename, 'w') as infile:
>>> note = self._parse_note(infile)
>>> if validate.note(note):
>>> self.notes.append(note)
>>> def next(self):
>>> if self._i < len(self):
>>> content = self._content[self._i]
>>> self._i += 1
>>> else:
>>> raise StopIteration
>>>
>>> def _parse_note(self, infile)
>>> note = {}
>>> note['category'] = self.parse
>>> return content
>>>
>>> __next__ = next # Python 3 compatible iterator.
>>>
>>> def _read_tldr_files(self):
>>> with open('alpine.md', 'w') as infile:
>>> tldr = self._parse_file(infile.read())
>>> if self._schema.validate(tldr):
>>> self.notes.append(tldr)
>>>
>>> def _parse_file(self, infile)
>>> content = {}
>>> content['category'] = Const.SNIPPET
>>> content['data'] = Parser.format_data(['first line', 'second line'])
>>>
>>> return content
"""

tldr = SnippyTldr(logger, uri, validator, parser)

return tldr
return SnippyTldr(logger, uri)


class SnippyTldr(object): # pylint: disable=too-many-instance-attributes
Expand Down Expand Up @@ -240,13 +268,12 @@ class SnippyTldr(object): # pylint: disable=too-many-instance-attributes
re.MULTILINE | re.VERBOSE,
)

def __init__(self, logger, uri, validator, parser):
def __init__(self, logger, uri):
self._logger = logger
self._validate = validator
self._parse = parser
self._uri = self._get_uri(uri)
self._uri_scheme = urlparse(self._uri).scheme
self._uri_path = urlparse(self._uri).path
self._schema = Schema()
self._snippets = []
self._i = 0

Expand Down Expand Up @@ -377,15 +404,14 @@ def _read_tldr_page_files(self, uri):
files = []
if "http" in uri:
response = requests.get(uri.strip("/"))
print(response.text)
names = sorted(set(self.RE_CATCH_TLDR_FILENAME.findall(response.text)))
for filename in names:
files.append(self._join_paths(uri, filename))
files = files[:3]
# files = files[:3]
filenames = {tldr_page: files}
else:
print("local files")
print("tldr files: %s" % filenames)
# print("tldr files: %s" % filenames)
return filenames

def _get_tldr_pages(self):
Expand All @@ -406,8 +432,8 @@ def _get_tldr_pages(self):
pages = []
if "http" in self._uri_scheme:
html = requests.get(self._uri.strip("/")).text
print("====")
print("responses %s" % html)
# print("====")
# print("responses %s" % html)
pages = self.RE_CATCH_TLDR_PAGE_HTML.findall(html)
else:
try:
Expand All @@ -424,7 +450,7 @@ def _get_tldr_pages(self):
for page in pages:
pages_.append(self._join_paths(self._uri, page))
self._logger.debug("parsed tldr pages: %s", pages_)
print("pages: %s" % pages_)
# print("pages: %s" % pages_)

return pages_

Expand Down Expand Up @@ -453,20 +479,21 @@ def _read_tldr_file(self, page, uri):
"https://raw.githubusercontent.com/tldr-pages/tldr", uri
).strip("/")
self._logger.debug("request tldr file: %s", uri_)
print("read uri: %s" % uri_)
# print("read uri: %s" % uri_)
tldr_file = requests.get(uri_).text
else:
with open(uri, "r") as infile:
self._logger.debug("read tldr file: %s", uri_)
tldr_file = infile.read()

snippet = self._parse_tldr_page(page, uri, tldr_file)
print("snippet: %s" % snippet)
if snippet:
if snippet and self._schema.validate(snippet):
print("validated")
self._snippets.append(snippet)
else:
print("not validated")
self._logger.debug(
"failed to parse tldr man page: %s :from: %s", uri, snippet
"failed to parse tldr man page: %s :from: %s", uri, tldr_file
)

def _join_paths(self, uri, path_object):
Expand Down
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,11 @@ def format_groups(cls, category, value):
@classmethod
def format_tags(cls, category, value):
"""Dummy method for mock."""


class Schema(object): # pylint: disable=too-few-public-methods
"""Dummy mock class."""

@classmethod
def validate(cls, document):
"""Dummy method for mock."""
39 changes: 17 additions & 22 deletions tests/test_snippy_tldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,9 @@ def test_read_github_uri_001():
Read the default tldr man page when URI is not provided.
"""

# Read the default tldr man page when URI is not provided.
responses.add(
responses.GET,
"https://github.com/tldr-pages/tldr/tree/master/pages/linux",
json={},
status=200,
)
_ = SnippyTldr(Logger(), "", None, None)
requests = ["https://github.com/tldr-pages/tldr/tree/master/pages/linux"]
responses.add(responses.GET, requests.pop(0), json={}, status=200)
SnippyTldr(Logger(), "")
assert len(responses.calls) == 1

@staticmethod
Expand Down Expand Up @@ -83,7 +78,7 @@ def test_read_github_uri_002():
responses.add(responses.GET, requests.pop(0), body=body, status=200)
for _ in range(len(requests)):
responses.add(responses.GET, requests.pop(0), body=body, status=200)
_ = SnippyTldr(Logger(), uri, None, None)
_ = SnippyTldr(Logger(), uri)
assert len(responses.calls) == 5

@staticmethod
Expand Down Expand Up @@ -113,7 +108,7 @@ def test_read_github_uri_003():
responses.add(responses.GET, requests.pop(0), body=body, status=200)
for _ in range(len(requests)):
responses.add(responses.GET, requests.pop(0), body=body, status=200)
_ = SnippyTldr(Logger(), uri, None, None)
_ = SnippyTldr(Logger(), uri)
assert len(responses.calls) == 5

@staticmethod
Expand Down Expand Up @@ -143,7 +138,7 @@ def test_read_github_uri_004():
responses.add(responses.GET, requests.pop(0), body=body, status=200)
for _ in range(len(requests)):
responses.add(responses.GET, requests.pop(0), body=body, status=200)
_ = SnippyTldr(Logger(), uri, None, None)
_ = SnippyTldr(Logger(), uri)
assert len(responses.calls) == 5

@staticmethod
Expand Down Expand Up @@ -172,7 +167,7 @@ def test_read_github_uri_005():
responses.add(responses.GET, requests.pop(0), body=body, status=200)
for _ in range(len(requests)):
responses.add(responses.GET, requests.pop(0), body=body, status=200)
_ = SnippyTldr(Logger(), uri, None, None)
_ = SnippyTldr(Logger(), uri)
assert len(responses.calls) == 5

@staticmethod
Expand All @@ -193,7 +188,7 @@ def test_read_github_uri_006():
responses.add(responses.GET, requests.pop(0), body=body, status=200)
for _ in range(len(requests)):
responses.add(responses.GET, requests.pop(0), body=body, status=200)
_ = SnippyTldr(Logger(), uri, None, None)
_ = SnippyTldr(Logger(), uri)
assert len(responses.calls) == 2

@staticmethod
Expand Down Expand Up @@ -221,7 +216,7 @@ def test_read_github_uri_007():
responses.add(responses.GET, requests.pop(0), body="", status=200)
responses.add(responses.GET, requests.pop(0), body="", status=200)
responses.add(responses.GET, requests.pop(0), body="", status=200)
_ = SnippyTldr(Logger(), uri, None, None)
_ = SnippyTldr(Logger(), uri)
assert len(responses.calls) == 4

@staticmethod
Expand All @@ -240,12 +235,12 @@ def test_read_github_file_001():
uri_req = "https://raw.githubusercontent.com/tldr-pages/tldr/master/pages.pt-BR/linux/alpine.md"
body = ""
responses.add(responses.GET, uri_req, body=body, status=200)
_ = SnippyTldr(Logger(), uri_cli, None, None)
_ = SnippyTldr(Logger(), uri_cli)

uri_cli = "https://github.com/tldr-pages/tldr/blob/master/pages/osx/alpine.md"
uri_req = "https://raw.githubusercontent.com/tldr-pages/tldr/master/pages/osx/alpine.md"
responses.add(responses.GET, uri_req, body=body, status=200)
_ = SnippyTldr(Logger(), uri_cli, None, None)
_ = SnippyTldr(Logger(), uri_cli)

@staticmethod
@responses.activate
Expand All @@ -264,12 +259,12 @@ def test_read_github_file_002():
uri_req = "https://raw.githubusercontent.com/tldr-pages/tldr/master/pages.pt-BR/linux/alpine.md"
body = ""
responses.add(responses.GET, uri_req, body=body, status=200)
_ = SnippyTldr(Logger(), uri_cli, None, None)
_ = SnippyTldr(Logger(), uri_cli)

uri_cli = "https://github.com/tldr-pages/tldr/tree/master/pages/osx/alpine.md"
uri_req = "https://raw.githubusercontent.com/tldr-pages/tldr/master/pages/osx/alpine.md"
responses.add(responses.GET, uri_req, body=body, status=200)
_ = SnippyTldr(Logger(), uri_cli, None, None)
_ = SnippyTldr(Logger(), uri_cli)

@staticmethod
@responses.activate
Expand All @@ -285,12 +280,12 @@ def test_read_github_file_003():
uri_req = "https://raw.githubusercontent.com/tldr-pages/tldr/master/pages.pt-BR/linux/alpine.md"
body = ""
responses.add(responses.GET, uri_req, body=body, status=200)
_ = SnippyTldr(Logger(), uri_cli, None, None)
_ = SnippyTldr(Logger(), uri_cli)

uri_cli = "https://raw.githubusercontent.com/tldr-pages/tldr/master/pages/osx/alpine.md"
uri_req = "https://raw.githubusercontent.com/tldr-pages/tldr/master/pages/osx/alpine.md"
responses.add(responses.GET, uri_req, body=body, status=200)
_ = SnippyTldr(Logger(), uri_cli, None, None)
_ = SnippyTldr(Logger(), uri_cli)

@staticmethod
@pytest.mark.skip(reason="no way of currently testing this")
Expand All @@ -316,7 +311,7 @@ def test_999():
# uri = 'file:../tldr/pages/linux/alpine.md'
# uri = 'file:../tld'

# uri = "https://github.com/tldr-pages/tldr/tree/master/pages/linux/"
uri = "https://github.com/tldr-pages/tldr/tree/master/pages/linux/"
# uri = "https://github.com/tldr-pages/tldr/tree/master/pages.zh/"
# uri = "https://github.com/tldr-pages/tldr/tree/master/pages.zh/linux/"
# uri = "https://github.com/tldr-pages/tldr/tree/master/pages"
Expand All @@ -332,7 +327,7 @@ def test_999():
# uri = 'file:../tldr/pages'
# uri = 'file:../tldr/pages/linux/alpine.md'
# uri = 'file:../tld'
SnippyTldr(Logger(), uri, "test", "test")
SnippyTldr(Logger(), uri)


class Logger(object): # pylint: disable=too-few-public-methods
Expand Down

0 comments on commit d4e5ec9

Please sign in to comment.