Skip to content

Commit

Permalink
Merge pull request #2623 from lunkwill42/test/do-not-generate-validat…
Browse files Browse the repository at this point in the history
…ion-tests-for-non-html

Stop making validation tests for non HTML content
  • Loading branch information
lunkwill42 committed Jun 5, 2023
2 parents bc2be9d + 40ed7e3 commit 08236b7
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions tests/integration/web/crawler_test.py
Expand Up @@ -113,6 +113,12 @@ def crawl(self):
if page:
yield page

def crawl_only_html(self):
"""Only yields crawled pages that have a content-type of html and is not
blacklisted.
"""
yield from filter(should_validate, self.crawl())

def _visit_with_error_handling(self, url):
try:
page = self._visit(url)
Expand Down Expand Up @@ -256,14 +262,10 @@ def _content_as_string(content):
"(ADMINUSERNAME, ADMINPASSWORD) , skipping crawler "
"tests!",
)
@pytest.mark.parametrize("page", crawler.crawl(), ids=page_id)
@pytest.mark.parametrize("page", crawler.crawl_only_html(), ids=page_id)
def test_page_should_be_valid_html(page):
if page.response != 200:
pytest.skip("not validating non-reachable page")
if not page.content_type or 'html' not in page.content_type.lower():
pytest.skip("not attempting to validate non-html page")
if not should_validate(page.url):
pytest.skip("skip validation of blacklisted page")
if not page.content:
pytest.skip("page has no content")

Expand All @@ -273,8 +275,11 @@ def test_page_should_be_valid_html(page):
assert not errors, "Found following validation errors:\n" + errors


def should_validate(url):
path = normalize_path(url)
def should_validate(page: Page):
"""Returns True if page is eligible for HTML validation, False if not"""
if not page.content_type or 'html' not in page.content_type.lower():
return False
path = normalize_path(page.url)
for blacklisted_path in TIDY_BLACKLIST:
if path.startswith(blacklisted_path):
return False
Expand Down

0 comments on commit 08236b7

Please sign in to comment.