Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTTPSBear.py: New Bear Added #2063

Merged
merged 1 commit into from
Apr 22, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
89 changes: 89 additions & 0 deletions bears/general/HTTPSBear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from urllib.parse import urlparse

from bears.general.URLHeadBear import URLHeadBear
from coalib.bears.LocalBear import LocalBear
from coalib.results.Result import Result
from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY

from coalib.results.Diff import Diff
from coalib.settings.Setting import typed_dict


class HTTPSBear(LocalBear):
DEFAULT_TIMEOUT = 15
LANGUAGES = {'All'}
AUTHORS = {'The coala developers'}
AUTHORS_EMAILS = {'coala-devel@googlegroups.com'}
LICENSE = 'AGPL-3.0'
CAN_DETECT = {'Documentation'}
CAN_FIX = {'HTTP Links'}
BEAR_DEPS = {URLHeadBear}
HTTPS_PREFIX = 'https'
HTTP_PREFIX = 'http'

def run(self, filename, file, dependency_results=dict(),
network_timeout: typed_dict(str, int, DEFAULT_TIMEOUT) = dict()):
"""
Find http links in any text file and check if the https version of
link is valid. If so, an option is provided for replacing them with
https.

An https link is considered valid if the server responds with a 2xx
code.

Warning: This bear will make HEAD requests to all URLs mentioned in
your codebase, which can potentially be destructive. As an example,
this bear would naively just visit the URL from a line that goes like
`do_not_ever_open = 'https://api.acme.inc/delete-all-data'` wiping out
all your data.

:param network_timeout: A dict mapping URLs and timeout to be
used for that URL. All the URLs that have
the same host as that of URLs provided
will be passed that timeout. It can also
contain a wildcard timeout entry with key
'*'. The timeout of all the websites not
in the dict will be the value of the key
'*'.
"""
for result in dependency_results.get(URLHeadBear.name, []):
line_number, link, code, context = result.contents
if link.startswith(self.HTTPS_PREFIX):
continue

https_link = self.HTTPS_PREFIX + link[len(self.HTTP_PREFIX):]
host = urlparse(https_link).netloc
network_timeout = {
urlparse(url).netloc if not url == '*' else '*': timeout
for url, timeout in network_timeout.items()}
https_response = URLHeadBear.get_head_response(
https_link,
network_timeout.get(host)
if host in network_timeout
else network_timeout.get('*')
if '*' in network_timeout
else HTTPSBear.DEFAULT_TIMEOUT)

try:
https_code = https_response.status_code
except AttributeError: # pragma: no cover
continue

if not https_code or not 200 <= https_code < 300:
continue

diff = Diff(file)
current_line = file[line_number - 1]
start = current_line.find(link)
end = start + len(link)
replacement = (current_line[:start] + 'https' +
link[len(self.HTTP_PREFIX):] + current_line[end:])
diff.change_line(line_number, current_line, replacement)

yield Result.from_values(
origin=self,
message='https can be used instead of http',
diffs={filename: diff},
file=filename,
line=line_number,
severity=RESULT_SEVERITY.NORMAL)
109 changes: 109 additions & 0 deletions tests/general/HTTPSBearTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import io
from queue import Queue
import requests
import requests_mock
import unittest
import unittest.mock

from bears.general.HTTPSBear import HTTPSBear
from bears.general.URLHeadBear import URLHeadBear
from coalib.testing.LocalBearTestHelper import LocalBearTestHelper
from coalib.settings.Section import Section
from tests.general.InvalidLinkBearTest import custom_matcher


def custom_matcher_https(request):
"""
It is assumed that if the fourth last character is 'v' then the https
version of the link returns the last three characters of the request
URL to be the response and if it is 'i' then the https returns a 400
code.

For connection checking url, it always passes 200 (prerequisite checking).

For URLs with no status codes appended, a ``RequestException`` is raised.

To test for URLs that redirect to URLs with a much larger length, the
``redirect_long_url`` is returned.

:param request: The ``request`` that the mocker recieves.
:return: A mocked ``Response`` object.
"""

# the connection check url needs to be explicitly
# set to 200
if request.url == URLHeadBear.check_connection_url:
status_code = 200
else:
try:
if (request.path_url[-4] == 'v'):
status_code = int(request.path_url[-3:])
if (request.path_url[-4] == 'i'):
status_code = 400
except ValueError:
raise requests.exceptions.RequestException
resp = requests.Response()
resp.raw = io.BytesIO()
resp.status_code = status_code
return resp


class HTTPSBearTestPrerequisites(unittest.TestCase):

def test_check_prerequisites(self):
with requests_mock.Mocker() as m:
m.add_matcher(custom_matcher)
self.assertTrue(URLHeadBear.check_prerequisites())

m.head(URLHeadBear.check_connection_url,
exc=requests.exceptions.RequestException)

self.assertTrue(URLHeadBear.check_prerequisites() ==
'You are not connected to the internet.')


class HTTPSBearTest(LocalBearTestHelper):
"""
The tests are mocked (don't actually connect to the internet) and
return the int conversion of the last three chars of
the URL as status code.

Check ``custom_matcher`` and ``custom_matcher_https`` for more info on
implementation.
"""

def setUp(self):
self.ub_check_prerequisites = URLHeadBear.check_prerequisites
self.section = Section('')
URLHeadBear.check_prerequisites = lambda *args: True
self.uut = HTTPSBear(self.section, Queue())

def tearDown(self):
URLHeadBear.check_prerequisites = self.ub_check_prerequisites

def test_valid_https(self):
test_link = """
http://httpbin.org/status/v200
""".splitlines()

with requests_mock.Mocker() as m:
m.add_matcher(custom_matcher_https)
self.check_line_result_count(self.uut, test_link, [1])

def test_invalid_https(self):
test_link = """
http://httpbin.org/status/i200
""".splitlines()

with requests_mock.Mocker() as m:
m.add_matcher(custom_matcher_https)
self.check_validity(self.uut, test_link)

def test_https(self):
test_link = """
https://httpbin.org/status/v200
""".splitlines()

with requests_mock.Mocker() as m:
m.add_matcher(custom_matcher_https)
self.check_validity(self.uut, test_link)