From bbfb1168edbb3d4b37c11ee2927d35fa627e22ab Mon Sep 17 00:00:00 2001
From: realsumit <sumitsarinofficial@gmail.com>
Date: Tue, 3 Oct 2017 15:25:07 +0530
Subject: [PATCH] HTTPSBear.py: New Bear Added

HTTPS bear can detect URL links which can be changed to https
from http and changes them if the user wants.

Closes https://github.com/coala/coala-bears/issues/1900
---
 bears/general/HTTPSBear.py     |  89 +++++++++++++++++++++++++++
 tests/general/HTTPSBearTest.py | 109 +++++++++++++++++++++++++++++++++
 2 files changed, 198 insertions(+)
 create mode 100644 bears/general/HTTPSBear.py
 create mode 100644 tests/general/HTTPSBearTest.py

diff --git a/bears/general/HTTPSBear.py b/bears/general/HTTPSBear.py
new file mode 100644
index 0000000000..b52a65ea5d
--- /dev/null
+++ b/bears/general/HTTPSBear.py
@@ -0,0 +1,89 @@
+from urllib.parse import urlparse
+
+from bears.general.URLHeadBear import URLHeadBear
+from coalib.bears.LocalBear import LocalBear
+from coalib.results.Result import Result
+from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY
+
+from coalib.results.Diff import Diff
+from coalib.settings.Setting import typed_dict
+
+
+class HTTPSBear(LocalBear):
+    DEFAULT_TIMEOUT = 15
+    LANGUAGES = {'All'}
+    AUTHORS = {'The coala developers'}
+    AUTHORS_EMAILS = {'coala-devel@googlegroups.com'}
+    LICENSE = 'AGPL-3.0'
+    CAN_DETECT = {'Documentation'}
+    CAN_FIX = {'HTTP Links'}
+    BEAR_DEPS = {URLHeadBear}
+    HTTPS_PREFIX = 'https'
+    HTTP_PREFIX = 'http'
+
+    def run(self, filename, file, dependency_results=dict(),
+            network_timeout: typed_dict(str, int, DEFAULT_TIMEOUT) = dict()):
+        """
+        Find http links in any text file and check if the https version of
+        link is valid. If so, an option is provided for replacing them with
+        https.
+
+        An https link is considered valid if the server responds with a 2xx
+        code.
+
+        Warning: This bear will make HEAD requests to all URLs mentioned in
+        your codebase, which can potentially be destructive. As an example,
+        this bear would naively just visit the URL from a line that goes like
+        `do_not_ever_open = 'https://api.acme.inc/delete-all-data'` wiping out
+        all your data.
+
+        :param network_timeout:       A dict mapping URLs and timeout to be
+                                      used for that URL. All the URLs that have
+                                      the same host as that of URLs provided
+                                      will be passed that timeout. It can also
+                                      contain a wildcard timeout entry with key
+                                      '*'. The timeout of all the websites not
+                                      in the dict will be the value of the key
+                                      '*'.
+        """
+        for result in dependency_results.get(URLHeadBear.name, []):
+            line_number, link, code, context = result.contents
+            if link.startswith(self.HTTPS_PREFIX):
+                continue
+
+            https_link = self.HTTPS_PREFIX + link[len(self.HTTP_PREFIX):]
+            host = urlparse(https_link).netloc
+            network_timeout = {
+                urlparse(url).netloc if not url == '*' else '*': timeout
+                for url, timeout in network_timeout.items()}
+            https_response = URLHeadBear.get_head_response(
+                https_link,
+                network_timeout.get(host)
+                if host in network_timeout
+                else network_timeout.get('*')
+                if '*' in network_timeout
+                else HTTPSBear.DEFAULT_TIMEOUT)
+
+            try:
+                https_code = https_response.status_code
+            except AttributeError:  # pragma: no cover
+                continue
+
+            if not https_code or not 200 <= https_code < 300:
+                continue
+
+            diff = Diff(file)
+            current_line = file[line_number - 1]
+            start = current_line.find(link)
+            end = start + len(link)
+            replacement = (current_line[:start] + 'https' +
+                           link[len(self.HTTP_PREFIX):] + current_line[end:])
+            diff.change_line(line_number, current_line, replacement)
+
+            yield Result.from_values(
+                origin=self,
+                message='https can be used instead of http',
+                diffs={filename: diff},
+                file=filename,
+                line=line_number,
+                severity=RESULT_SEVERITY.NORMAL)
diff --git a/tests/general/HTTPSBearTest.py b/tests/general/HTTPSBearTest.py
new file mode 100644
index 0000000000..f5e24245bb
--- /dev/null
+++ b/tests/general/HTTPSBearTest.py
@@ -0,0 +1,109 @@
+import io
+from queue import Queue
+import requests
+import requests_mock
+import unittest
+import unittest.mock
+
+from bears.general.HTTPSBear import HTTPSBear
+from bears.general.URLHeadBear import URLHeadBear
+from coalib.testing.LocalBearTestHelper import LocalBearTestHelper
+from coalib.settings.Section import Section
+from tests.general.InvalidLinkBearTest import custom_matcher
+
+
+def custom_matcher_https(request):
+    """
+    It is assumed that if the fourth last character is 'v' then the https
+    version of the link returns the last three characters of the request
+    URL to be the response and if it is 'i' then the https returns a 400
+    code.
+
+    For connection checking url, it always passes 200 (prerequisite checking).
+
+    For URLs with no status codes appended, a ``RequestException`` is raised.
+
+    To test for URLs that redirect to URLs with a much larger length, the
+    ``redirect_long_url`` is returned.
+
+    :param request: The ``request`` that the mocker recieves.
+    :return:        A mocked ``Response`` object.
+    """
+
+    # the connection check url needs to be explicitly
+    # set to 200
+    if request.url == URLHeadBear.check_connection_url:
+        status_code = 200
+    else:
+        try:
+            if (request.path_url[-4] == 'v'):
+                status_code = int(request.path_url[-3:])
+            if (request.path_url[-4] == 'i'):
+                status_code = 400
+        except ValueError:
+            raise requests.exceptions.RequestException
+    resp = requests.Response()
+    resp.raw = io.BytesIO()
+    resp.status_code = status_code
+    return resp
+
+
+class HTTPSBearTestPrerequisites(unittest.TestCase):
+
+    def test_check_prerequisites(self):
+        with requests_mock.Mocker() as m:
+            m.add_matcher(custom_matcher)
+            self.assertTrue(URLHeadBear.check_prerequisites())
+
+            m.head(URLHeadBear.check_connection_url,
+                   exc=requests.exceptions.RequestException)
+
+            self.assertTrue(URLHeadBear.check_prerequisites() ==
+                            'You are not connected to the internet.')
+
+
+class HTTPSBearTest(LocalBearTestHelper):
+    """
+    The tests are mocked (don't actually connect to the internet) and
+    return the int conversion of the last three chars of
+    the URL as status code.
+
+    Check ``custom_matcher`` and ``custom_matcher_https`` for more info on
+    implementation.
+    """
+
+    def setUp(self):
+        self.ub_check_prerequisites = URLHeadBear.check_prerequisites
+        self.section = Section('')
+        URLHeadBear.check_prerequisites = lambda *args: True
+        self.uut = HTTPSBear(self.section, Queue())
+
+    def tearDown(self):
+        URLHeadBear.check_prerequisites = self.ub_check_prerequisites
+
+    def test_valid_https(self):
+        test_link = """
+        http://httpbin.org/status/v200
+        """.splitlines()
+
+        with requests_mock.Mocker() as m:
+            m.add_matcher(custom_matcher_https)
+            self.check_line_result_count(self.uut, test_link, [1])
+
+    def test_invalid_https(self):
+        test_link = """
+        http://httpbin.org/status/i200
+        """.splitlines()
+
+        with requests_mock.Mocker() as m:
+            m.add_matcher(custom_matcher_https)
+            self.check_validity(self.uut, test_link)
+
+    def test_https(self):
+        test_link = """
+        https://httpbin.org/status/v200
+        """.splitlines()
+
+        with requests_mock.Mocker() as m:
+            m.add_matcher(custom_matcher_https)
+            self.check_validity(self.uut, test_link)