From bbfb1168edbb3d4b37c11ee2927d35fa627e22ab Mon Sep 17 00:00:00 2001 From: realsumit Date: Tue, 3 Oct 2017 15:25:07 +0530 Subject: [PATCH] HTTPSBear.py: New Bear Added HTTPS bear can detect URL links which can be changed to https from http and changes them if the user wants. Closes https://github.com/coala/coala-bears/issues/1900 --- bears/general/HTTPSBear.py | 89 +++++++++++++++++++++++++++ tests/general/HTTPSBearTest.py | 109 +++++++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+) create mode 100644 bears/general/HTTPSBear.py create mode 100644 tests/general/HTTPSBearTest.py diff --git a/bears/general/HTTPSBear.py b/bears/general/HTTPSBear.py new file mode 100644 index 0000000000..b52a65ea5d --- /dev/null +++ b/bears/general/HTTPSBear.py @@ -0,0 +1,89 @@ +from urllib.parse import urlparse + +from bears.general.URLHeadBear import URLHeadBear +from coalib.bears.LocalBear import LocalBear +from coalib.results.Result import Result +from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY + +from coalib.results.Diff import Diff +from coalib.settings.Setting import typed_dict + + +class HTTPSBear(LocalBear): + DEFAULT_TIMEOUT = 15 + LANGUAGES = {'All'} + AUTHORS = {'The coala developers'} + AUTHORS_EMAILS = {'coala-devel@googlegroups.com'} + LICENSE = 'AGPL-3.0' + CAN_DETECT = {'Documentation'} + CAN_FIX = {'HTTP Links'} + BEAR_DEPS = {URLHeadBear} + HTTPS_PREFIX = 'https' + HTTP_PREFIX = 'http' + + def run(self, filename, file, dependency_results=dict(), + network_timeout: typed_dict(str, int, DEFAULT_TIMEOUT) = dict()): + """ + Find http links in any text file and check if the https version of + link is valid. If so, an option is provided for replacing them with + https. + + An https link is considered valid if the server responds with a 2xx + code. + + Warning: This bear will make HEAD requests to all URLs mentioned in + your codebase, which can potentially be destructive. As an example, + this bear would naively just visit the URL from a line that goes like + `do_not_ever_open = 'https://api.acme.inc/delete-all-data'` wiping out + all your data. + + :param network_timeout: A dict mapping URLs and timeout to be + used for that URL. All the URLs that have + the same host as that of URLs provided + will be passed that timeout. It can also + contain a wildcard timeout entry with key + '*'. The timeout of all the websites not + in the dict will be the value of the key + '*'. + """ + for result in dependency_results.get(URLHeadBear.name, []): + line_number, link, code, context = result.contents + if link.startswith(self.HTTPS_PREFIX): + continue + + https_link = self.HTTPS_PREFIX + link[len(self.HTTP_PREFIX):] + host = urlparse(https_link).netloc + network_timeout = { + urlparse(url).netloc if not url == '*' else '*': timeout + for url, timeout in network_timeout.items()} + https_response = URLHeadBear.get_head_response( + https_link, + network_timeout.get(host) + if host in network_timeout + else network_timeout.get('*') + if '*' in network_timeout + else HTTPSBear.DEFAULT_TIMEOUT) + + try: + https_code = https_response.status_code + except AttributeError: # pragma: no cover + continue + + if not https_code or not 200 <= https_code < 300: + continue + + diff = Diff(file) + current_line = file[line_number - 1] + start = current_line.find(link) + end = start + len(link) + replacement = (current_line[:start] + 'https' + + link[len(self.HTTP_PREFIX):] + current_line[end:]) + diff.change_line(line_number, current_line, replacement) + + yield Result.from_values( + origin=self, + message='https can be used instead of http', + diffs={filename: diff}, + file=filename, + line=line_number, + severity=RESULT_SEVERITY.NORMAL) diff --git a/tests/general/HTTPSBearTest.py b/tests/general/HTTPSBearTest.py new file mode 100644 index 0000000000..f5e24245bb --- /dev/null +++ b/tests/general/HTTPSBearTest.py @@ -0,0 +1,109 @@ +import io +from queue import Queue +import requests +import requests_mock +import unittest +import unittest.mock + +from bears.general.HTTPSBear import HTTPSBear +from bears.general.URLHeadBear import URLHeadBear +from coalib.testing.LocalBearTestHelper import LocalBearTestHelper +from coalib.settings.Section import Section +from tests.general.InvalidLinkBearTest import custom_matcher + + +def custom_matcher_https(request): + """ + It is assumed that if the fourth last character is 'v' then the https + version of the link returns the last three characters of the request + URL to be the response and if it is 'i' then the https returns a 400 + code. + + For connection checking url, it always passes 200 (prerequisite checking). + + For URLs with no status codes appended, a ``RequestException`` is raised. + + To test for URLs that redirect to URLs with a much larger length, the + ``redirect_long_url`` is returned. + + :param request: The ``request`` that the mocker recieves. + :return: A mocked ``Response`` object. + """ + + # the connection check url needs to be explicitly + # set to 200 + if request.url == URLHeadBear.check_connection_url: + status_code = 200 + else: + try: + if (request.path_url[-4] == 'v'): + status_code = int(request.path_url[-3:]) + if (request.path_url[-4] == 'i'): + status_code = 400 + except ValueError: + raise requests.exceptions.RequestException + resp = requests.Response() + resp.raw = io.BytesIO() + resp.status_code = status_code + return resp + + +class HTTPSBearTestPrerequisites(unittest.TestCase): + + def test_check_prerequisites(self): + with requests_mock.Mocker() as m: + m.add_matcher(custom_matcher) + self.assertTrue(URLHeadBear.check_prerequisites()) + + m.head(URLHeadBear.check_connection_url, + exc=requests.exceptions.RequestException) + + self.assertTrue(URLHeadBear.check_prerequisites() == + 'You are not connected to the internet.') + + +class HTTPSBearTest(LocalBearTestHelper): + """ + The tests are mocked (don't actually connect to the internet) and + return the int conversion of the last three chars of + the URL as status code. + + Check ``custom_matcher`` and ``custom_matcher_https`` for more info on + implementation. + """ + + def setUp(self): + self.ub_check_prerequisites = URLHeadBear.check_prerequisites + self.section = Section('') + URLHeadBear.check_prerequisites = lambda *args: True + self.uut = HTTPSBear(self.section, Queue()) + + def tearDown(self): + URLHeadBear.check_prerequisites = self.ub_check_prerequisites + + def test_valid_https(self): + test_link = """ + http://httpbin.org/status/v200 + """.splitlines() + + with requests_mock.Mocker() as m: + m.add_matcher(custom_matcher_https) + self.check_line_result_count(self.uut, test_link, [1]) + + def test_invalid_https(self): + test_link = """ + http://httpbin.org/status/i200 + """.splitlines() + + with requests_mock.Mocker() as m: + m.add_matcher(custom_matcher_https) + self.check_validity(self.uut, test_link) + + def test_https(self): + test_link = """ + https://httpbin.org/status/v200 + """.splitlines() + + with requests_mock.Mocker() as m: + m.add_matcher(custom_matcher_https) + self.check_validity(self.uut, test_link)