Test requesting tldr man pages remotely

There are few basic tests to verify very basic functionality to download tldr man pages from GitHub. WIP Signed-off-by: Heikki Laaksonen <laaksonen.heikki.j@gmail.com>
heilaaks · Jun 18, 2019 · 29affb9 · 29affb9
1 parent 5b6d014
commit 29affb9
Show file tree

Hide file tree

Showing 2 changed files with 151 additions and 21 deletions.
diff --git a/snippy_tldr/plugin.py b/snippy_tldr/plugin.py
@@ -83,9 +83,25 @@ def snippy_import_hook(logger, uri, validator, parser):
 class SnippyTldr(object):  # pylint: disable=too-many-instance-attributes
     """Plugin to import tldr man pages for snippy."""
 
-    TLDR_URI = "https://github.com/tldr-pages/tldr/tree/master/pages/linux"
+    TLDR_DEFAULT_URI = "https://github.com/tldr-pages/tldr/tree/master/pages/linux"
     TLDR_PAGES = ("common", "linux", "osx", "sunos", "windows")
 
+    MATCH_TLDR_PAGES = (
+        r"pages(?:\.[a-zA-Z0.9-]+)?"
+    )  # Match examples 'pages', 'pages.pt-BR' and 'pages.it'.
+
+    RE_CATCH_TLDR_PAGE_HTML = re.compile(
+        r"""
+        /tldr-pages/tldr/tree/  # Match part of an URL in HTML page.
+        \S+/                    # Match branch or tag.
+        %s/                     # Match tldr pages and page translations.
+        (?P<page>.*?)           # Catch tldr page ungreedily.
+        ["]{1}                  # Match trailing quotation mark.
+        """
+        % MATCH_TLDR_PAGES,
+        re.VERBOSE,
+    )
+
     RE_CATCH_TLDR_FILENAME = re.compile(
         r"""
         .*[\/]         # Match greedily the last leading forward slash before the filename.
@@ -95,12 +111,11 @@ class SnippyTldr(object):  # pylint: disable=too-many-instance-attributes
         re.VERBOSE,
     )
 
-    RE_CATCH_TLDR_PAGE = re.compile(
+    RE_MATCH_TLDR_PAGES = re.compile(
         r"""
-        /tldr-pages/tldr/tree/\S+/pages/  # Match HTML page to find the pages.
-        (?P<page>.*)                      # Catch tldr page.
-        ["]                               # Match trailing quotation mark.
-        """,
+        ^%s  # Match tldr pages in the beginning of a string.
+        """
+        % MATCH_TLDR_PAGES,
         re.VERBOSE,
     )
 
@@ -176,15 +191,15 @@ def __init__(self, logger, uri, validator, parser):
         self._logger = logger
         self._validate = validator
         self._parse = parser
-        self._uri = uri if uri else self.TLDR_URI
+        self._uri = self._get_uri(uri)
         self._uri_scheme = urlparse(self._uri).scheme
         self._uri_path = urlparse(self._uri).path
         self._snippets = []
         self._i = 0
 
-        print("uri: %s", self._uri)
-        print("scheme: %s", self._uri_scheme)
-        print("path: %s", self._uri_path)
+        print("uri: %s" % self._uri)
+        print("scheme: %s" % self._uri_scheme)
+        print("path: %s" % self._uri_path)
 
         self._read_tldr_pages()
 
@@ -215,6 +230,32 @@ def next(self):
 
         return note
 
+    def _get_uri(self, uri):
+        """Format URI from the user.
+
+        This method makes sure that the URI received from user is in
+        correct format.
+
+        The trailing slash is added if the URL is not pointing to a
+        file. The trailing slash allows ``ulrjoin`` to add e.g. the
+        filenames to URL without removing the last object in the URL
+        path.
+
+        Args:
+            uri (str): URI received from the ``--file`` CLI option.
+
+        Returns:
+            str: Formatted URI for the plugin.
+        """
+
+        uri_ = uri if uri else self.TLDR_DEFAULT_URI
+        _, file_extension = os.path.splitext(urlparse(uri_).path)
+        print("extension: %s" % file_extension)
+        if file_extension != "md" and not uri_.endswith("/"):
+            uri_ = uri_ + "/"
+
+        return uri_
+
     def _read_tldr_pages(self):
         """Read tldr man pages."""
 
@@ -236,7 +277,8 @@ def _get_tlrd_filenames(self, uri):
 
         filenames = {}
         last_object = os.path.basename(os.path.normpath(uri))
-        if last_object == "pages":
+        if self.RE_MATCH_TLDR_PAGES.search(last_object):
+            print("under pages: %s" % uri)
             self._logger.debug("read all tldr man pages from: %s", uri)
             pages = self._get_tldr_pages()
             for page in pages:
@@ -245,14 +287,42 @@ def _get_tlrd_filenames(self, uri):
         elif ".md" in last_object:
             self._logger.debug("read one tldr man page snippet: %s", uri)
             print("file: %s" % uri)
-        elif uri in ("/tldr/pages", "tldr-pages/tldr"):
+        elif any(s in uri for s in ("/tldr/pages", "tldr-pages/tldr")):
             self._logger.debug("read tldr man page: %s", uri)
-            print("group: %s" % uri)
+            self._read_tldr_page_filenames(uri)
         else:
+            print("unknown: %s" % uri)
             self._logger.debug("unknown tldr man page path: %s", uri)
 
         return filenames
 
+    def _read_tldr_page_filenames(self, uri):
+        """Read tldr page snippet filenames.
+
+        Args:
+            uri (str): URI where the tldr snippets are read.
+
+        Returns:
+            list: List of URLs or file names with paths.
+        """
+
+        filenames = {}
+        print("filenames")
+        tldr_page = os.path.basename(os.path.normpath(uri))
+        if tldr_page not in self.TLDR_PAGES:
+            self._logger.debug("unknown tldr man page: %s", uri)
+            return filenames
+
+        if "http" in uri:
+            response = requests.get(uri.strip("/"))
+            files = sorted(set(self.RE_CATCH_TLDR_FILENAME.findall(response.text)))
+            filenames = {tldr_page: files}
+            print(filenames)
+        else:
+            print("local files")
+
+        return filenames
+
     def _get_tldr_pages(self):
         """Read tldr pages from URI or file path.
 
@@ -270,8 +340,10 @@ def _get_tldr_pages(self):
 
         pages = []
         if "http" in self._uri_scheme:
-            html = requests.get(self._uri).text
-            pages = self.RE_CATCH_TLDR_PAGE.findall(html)
+            html = requests.get(self._uri.strip("/")).text
+            print("html %s" % html)
+            pages = self.RE_CATCH_TLDR_PAGE_HTML.findall(html)
+            print("here: %s" % pages)
         else:
             try:
                 pages = os.listdir(self._uri_path)
@@ -289,7 +361,7 @@ def _get_tldr_pages(self):
                 pages_.append(urljoin(self._uri, page))
             else:
                 pages_.append(os.path.join(self._uri, page))
-        self._logger.debug("parsed tldr pages: %s", pages)
+        self._logger.debug("parsed tldr pages: %s", pages_)
 
         return pages_
 
@@ -316,7 +388,7 @@ def _read_tldr_file(self, uri):
 
         tldr_page = ""
         if "http" in self._uri_scheme:
-            tldr_page = requests.get(uri).text
+            tldr_page = requests.get(uri.strip("/")).text
         else:
             with open(uri, "r") as infile:
                 tldr_page = infile.read()

diff --git a/tests/test_snippy_tldr.py b/tests/test_snippy_tldr.py
@@ -24,6 +24,15 @@
 
 from snippy_tldr.plugin import SnippyTldr
 
+# The responses module does not seem to work when responses are set
+# in a for loop by iterating the value of a list. It seems that the
+# module takes a reference to value (pointer to list) and that gets
+# updated in the responses module in every loop.
+#
+# This causes the responses always to use the last value in a loop
+# as a first and only response. The iteration must be made without
+# actually iterating the values but the size of the list.
+
 
 class TestSnippyTldr(object):  # pylint: disable=too-few-public-methods
     """Test snippy-tldr."""
@@ -32,20 +41,67 @@ class TestSnippyTldr(object):  # pylint: disable=too-few-public-methods
     @responses.activate
     @pytest.mark.usefixtures("mock-snippy")
     def test_001():
-        """First test."""
+        """Test reading remote tldr pages."""
 
+        # Read default tldr man page when user did not use ``--file`` option.
         responses.add(
             responses.GET,
             "https://github.com/tldr-pages/tldr/tree/master/pages/linux",
             json={},
             status=200,
         )
-        _ = SnippyTldr(Logger(), "test", "test", "test")
+        _ = SnippyTldr(Logger(), "", None, None)
+        assert len(responses.calls) == 1
+        responses.reset()
+
+        # Read all tldr pages when the URI does not have trailing slash.
+        uri = "https://github.com/tldr-pages/tldr/tree/master/pages"
+        body = (
+            '<span class="css-truncate-target"><a class="js-navigation-open" title="linux" id="e206a54e9f826" href="/tldr-pages/tldr/tree/master/pages/linux">linux</a></span>'  # noqa pylint: disable=line-too-long
+            '<span class="css-truncate-target"><a class="js-navigation-open" title="osx" id="8e4f88e9d55c6" href="/tldr-pages/tldr/tree/master/pages/osx">osx</a></span>'  # noqa pylint: disable=line-too-long
+            '<span class="css-truncate-target"><a class="js-navigation-open" title="sunos" id="cf2aa06853ba7" href="/tldr-pages/tldr/tree/master/pages/sunos">sunos</a></span>'  # noqa pylint: disable=line-too-long
+            '<span class="css-truncate-target"><a class="js-navigation-open" title="windows" id="0f413351f633" href="/tldr-pages/tldr/tree/master/pages/windows">windows</a></span>'  # noqa pylint: disable=line-too-long
+        )
+        requests = [
+            "https://github.com/tldr-pages/tldr/tree/master/pages",
+            "https://github.com/tldr-pages/tldr/tree/master/pages/linux",
+            "https://github.com/tldr-pages/tldr/tree/master/pages/osx",
+            "https://github.com/tldr-pages/tldr/tree/master/pages/sunos",
+            "https://github.com/tldr-pages/tldr/tree/master/pages/windows",
+        ]
+        responses.add(responses.GET, requests.pop(0), body=body, status=200)
+        for _ in range(len(requests)):
+            responses.add(responses.GET, requests.pop(0), body=body, status=200)
+        _ = SnippyTldr(Logger(), uri, None, None)
+        assert len(responses.calls) == 5
+        responses.reset()
+
+        # Test reading all tldr pages under 'pt-BR' translation.
+        uri = "https://github.com/tldr-pages/tldr/tree/master/pages.pt-BR"
+        body = (
+            '<span class="css-truncate-target"><a class="js-navigation-open" title="linux" id="e206a54e9f826" href="/tldr-pages/tldr/tree/master/pages.pt-BR/linux">linux</a></span>'  # noqa pylint: disable=line-too-long
+            '<span class="css-truncate-target"><a class="js-navigation-open" title="osx" id="8e4f88e9d55c6" href="/tldr-pages/tldr/tree/master/pages.pt-BR/osx">osx</a></span>'  # noqa pylint: disable=line-too-long
+            '<span class="css-truncate-target"><a class="js-navigation-open" title="sunos" id="cf2aa06853ba7" href="/tldr-pages/tldr/tree/master/pages.pt-BR/sunos">sunos</a></span>'  # noqa pylint: disable=line-too-long
+            '<span class="css-truncate-target"><a class="js-navigation-open" title="windows" id="0f413351f633" href="/tldr-pages/tldr/tree/master/pages.pt-BR/windows">windows</a></span>'  # noqa pylint: disable=line-too-long
+        )
+        requests = [
+            "https://github.com/tldr-pages/tldr/tree/master/pages.pt-BR",
+            "https://github.com/tldr-pages/tldr/tree/master/pages.pt-BR/linux",
+            "https://github.com/tldr-pages/tldr/tree/master/pages.pt-BR/osx",
+            "https://github.com/tldr-pages/tldr/tree/master/pages.pt-BR/sunos",
+            "https://github.com/tldr-pages/tldr/tree/master/pages.pt-BR/windows",
+        ]
+        responses.add(responses.GET, requests.pop(0), body=body, status=200)
+        for _ in range(len(requests)):
+            responses.add(responses.GET, requests.pop(0), body=body, status=200)
+        _ = SnippyTldr(Logger(), uri, None, None)
+        assert len(responses.calls) == 5
 
         assert 1
 
     @staticmethod
-    def test_002():
+    @pytest.mark.skip(reason="no way of currently testing this")
+    def test_999():
         """First test."""
 
         # Test with
@@ -67,8 +123,10 @@ def test_002():
         #       uri = 'file:../tldr/pages/linux/alpine.md'
         #       uri = 'file:../tld'
 
-        uri = "https://github.com/tldr-pages/tldr/tree/master/pages/linux"
+        # uri = "https://github.com/tldr-pages/tldr/tree/master/pages/linux/"
+        # uri = "https://github.com/tldr-pages/tldr/tree/master/pages.zh/"
         uri = "https://github.com/tldr-pages/tldr/tree/master/pages"
+        # uri = "https://github.com/tldr-pages/tldr/tree/master/pages/"
         # uri = '../tldr/pages/linux/'
         # uri = '../tldr/pages/'
         # uri = 'file:../tldr/pages/linux/'