Skip to content

Commit

Permalink
Add --ignore-query-strings to nikola check
Browse files Browse the repository at this point in the history
  • Loading branch information
Kwpolska committed Jan 15, 2023
1 parent d65f412 commit f9a7043
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 6 deletions.
2 changes: 2 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ New in master
Features
--------

* Add a ``-q``, ``--ignore-query-strings`` option to ignore query strings
for internal links, allowing their use for cache busting
* Update reST stylesheets (Issue #3657)
* Add a ``--timeout`` parameter to the ``check`` plugin, defaulting to
30s. (Issue #3643)
Expand Down
24 changes: 18 additions & 6 deletions nikola/plugins/command/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,14 @@ class CommandCheck(Command):
'default': 30,
'help': 'Timeout (in seconds) for HTTP requests in remote checks.',
},
{
'name': 'ignore_query_strings',
'long': 'ignore-query-strings',
'short': 'q',
'type': bool,
'default': False,
'help': 'Ignore query strings for internal links.',
}
]

def _execute(self, options, args):
Expand All @@ -170,7 +178,7 @@ def _execute(self, options, args):
failure = False
self.timeout = options['timeout']
if options['links']:
failure |= self.scan_links(options['find_sources'], options['remote'])
failure |= self.scan_links(options['find_sources'], options['remote'], options['ignore_query_strings'])
if options['files']:
failure |= self.scan_files()
if options['clean']:
Expand All @@ -183,7 +191,7 @@ def _execute(self, options, args):
timeout = None
cache = {}

def analyze(self, fname, find_sources=False, check_remote=False):
def analyze(self, fname, find_sources=False, check_remote=False, ignore_query_strings=False):
"""Analyze links on a page."""
rv = False
self.whitelist = [re.compile(x) for x in self.site.config['LINK_CHECK_WHITELIST']]
Expand Down Expand Up @@ -363,6 +371,10 @@ def analyze(self, fname, find_sources=False, check_remote=False):
else:
target_filename_str = target_filename.decode("utf-8", errors="surrogateescape")

if ignore_query_strings and "?" in target_filename_str:
target_filename, _, _ = target_filename.rpartition("?")
target_filename_str, _, _ = target_filename_str.rpartition("?")

if any(pattern.search(target_filename_str) for pattern in self.whitelist):
continue

Expand All @@ -381,7 +393,7 @@ def analyze(self, fname, find_sources=False, check_remote=False):
self.logger.error(u"Error with: {0} {1}".format(filename, exc))
return rv

def scan_links(self, find_sources=False, check_remote=False):
def scan_links(self, find_sources=False, check_remote=False, ignore_query_strings=False):
"""Check links on the site."""
self.logger.debug("Checking Links:")
self.logger.debug("===============\n")
Expand All @@ -397,13 +409,13 @@ def scan_links(self, find_sources=False, check_remote=False):
for fname in _call_nikola_list(self.site, self.cache)[0]:
if fname.startswith(output_folder):
if '.html' == fname[-5:]:
if self.analyze(fname, find_sources, check_remote):
if self.analyze(fname, find_sources, check_remote, ignore_query_strings):
failure = True
if atom_extension == fname[-len(atom_extension):]:
if self.analyze(fname, find_sources, False):
if self.analyze(fname, find_sources, False, ignore_query_strings):
failure = True
if fname.endswith('sitemap.xml') or fname.endswith('sitemapindex.xml'):
if self.analyze(fname, find_sources, False):
if self.analyze(fname, find_sources, False, ignore_query_strings):
failure = True
if not failure:
self.logger.debug("All links checked.")
Expand Down

0 comments on commit f9a7043

Please sign in to comment.