Skip to content
Permalink
Browse files

Merge pull request #1759 from getnikola/smarter-check

Fix #1758
  • Loading branch information...
ralsina committed May 31, 2015
2 parents aebef5e + 66655e5 commit 423f3a7b659ed526e20b43b29db886d03fbb7914
@@ -14,6 +14,7 @@ Features
Bugfixes
--------

* Extract ``nikola check`` target list from actual task list instead of parsing (Issue #1758)
* Treat special-purpose “draft” tag case-insensitive
* Avoid some rebuild loops (Issue #1747)
* Better error if two posts/pages output conflict (Issue #1749)
@@ -25,6 +25,7 @@
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

from __future__ import print_function
from collections import defaultdict
import os
import re
import sys
@@ -34,6 +35,7 @@
except ImportError:
from urllib.parse import unquote, urlparse, urljoin, urldefrag # NOQA

from doit.loader import generate_tasks
import lxml.html
try:
import requests
@@ -44,33 +46,28 @@
from nikola.utils import get_logger, req_missing


def _call_nikola_list(l, site, arguments):
class NotReallyAStream(object):
"""A massive hack."""
out = []
def _call_nikola_list(site):
files = []
deps = defaultdict(list)
for task in generate_tasks('render_site', site.gen_tasks('render_site', "Task", '')):
files.extend(task.targets)
for target in task.targets:
deps[target].extend(task.file_dep)
for task in generate_tasks('post_render', site.gen_tasks('render_site', "LateTask", '')):
files.extend(task.targets)
for target in task.targets:
deps[target].extend(task.file_dep)
return files, deps

def write(self, t):
self.out.append(t)

oldstream = l.outstream
newstream = NotReallyAStream()
try:
l.outstream = newstream
l.parse_execute(arguments)
return newstream.out
finally:
l.outstream = oldstream


def real_scan_files(l, site):
def real_scan_files(site):
task_fnames = set([])
real_fnames = set([])
output_folder = site.config['OUTPUT_FOLDER']
# First check that all targets are generated in the right places
for task in _call_nikola_list(l, site, ["--all"]):
task = task.strip()
if output_folder in task and ':' in task:
fname = task.split(':', 1)[-1]
for fname in _call_nikola_list(site)[0]:
fname = fname.strip()
if fname.startswith(output_folder):
task_fnames.add(fname)
# And now check that there are no non-target files
for root, dirs, files in os.walk(output_folder, followlinks=True):
@@ -154,7 +151,6 @@ class CommandCheck(Command):
def _execute(self, options, args):
"""Check the generated site."""
self.logger = get_logger('check', self.site.loghandlers)
self.l = self._doitargs['cmds'].get_plugin('list')(config=self.config, **self._doitargs)

if not options['links'] and not options['files'] and not options['clean']:
print(self.help())
@@ -175,21 +171,25 @@ def _execute(self, options, args):
existing_targets = set([])
checked_remote_targets = {}

def analyze(self, task, find_sources=False, check_remote=False):
def analyze(self, fname, find_sources=False, check_remote=False):
rv = False
self.whitelist = [re.compile(x) for x in self.site.config['LINK_CHECK_WHITELIST']]
base_url = urlparse(self.site.config['BASE_URL'])
self.existing_targets.add(self.site.config['SITE_URL'])
self.existing_targets.add(self.site.config['BASE_URL'])
url_type = self.site.config['URL_TYPE']

deps = {}
if find_sources:
deps = _call_nikola_list(self.site)[1]

if check_remote and requests is None:
req_missing(['requests'], 'check remote links')

if url_type in ('absolute', 'full_path'):
url_netloc_to_root = urlparse(self.site.config['BASE_URL']).path
try:
filename = task.split(":")[-1]
filename = fname

if filename.startswith(self.site.config['CACHE_FOLDER']):
# Do not look at links in the cache, which are not parsed by
@@ -262,7 +262,7 @@ def analyze(self, task, find_sources=False, check_remote=False):
self.logger.warn("Broken link in {0}: {1}".format(filename, target))
if find_sources:
self.logger.warn("Possible sources:")
self.logger.warn("\n".join(_call_nikola_list(self.l, self.site, ["--deps", task])))
self.logger.warn("\n".join(deps[filename]))
self.logger.warn("===============================\n")
except Exception as exc:
self.logger.error("Error with: {0} {1}".format(filename, exc))
@@ -273,14 +273,11 @@ def scan_links(self, find_sources=False, check_remote=False):
self.logger.info("===============\n")
self.logger.notice("{0} mode".format(self.site.config['URL_TYPE']))
failure = False
for task in _call_nikola_list(self.l, self.site, ["--all"]):
task = task.strip()
if task.split(':')[0] in (
'render_tags', 'render_archive',
'render_galleries', 'render_indexes',
'render_pages', 'render_posts',
'render_site') and '.html' in task:
if self.analyze(task, find_sources, check_remote):
# Maybe we should just examine all HTML files
output_folder = self.site.config['OUTPUT_FOLDER']
for fname in _call_nikola_list(self.site)[0]:
if fname.startswith(output_folder) and '.html' == fname[-5:]:
if self.analyze(fname, find_sources, check_remote):
failure = True
if not failure:
self.logger.info("All links checked.")
@@ -290,7 +287,7 @@ def scan_files(self):
failure = False
self.logger.info("Checking Files:")
self.logger.info("===============\n")
only_on_output, only_on_input = real_scan_files(self.l, self.site)
only_on_output, only_on_input = real_scan_files(self.site)

# Ignore folders
only_on_output = [p for p in only_on_output if not os.path.isdir(p)]
@@ -312,7 +309,7 @@ def scan_files(self):
return failure

def clean_files(self):
only_on_output, _ = real_scan_files(self.l, self.site)
only_on_output, _ = real_scan_files(self.site)
for f in only_on_output:
os.unlink(f)
return True
@@ -86,8 +86,7 @@ def _execute(self, command, args):
sys.exit(build)

# Clean non-target files
l = self._doitargs['cmds'].get_plugin('list')(config=self.config, **self._doitargs)
only_on_output, _ = real_scan_files(l, self.site)
only_on_output, _ = real_scan_files(self.site)
for f in only_on_output:
os.unlink(f)

@@ -41,6 +41,5 @@ class CommandOrphans(Command):
Output contains filenames only (it is passable to `xargs rm` or the like)."""

def _execute(self, options, args):
l = self._doitargs['cmds'].get_plugin('list')(config=self.config, **self._doitargs)
orphans = real_scan_files(l, self.site)[0]
orphans = real_scan_files(self.site)[0]
print('\n'.join([p for p in orphans if not os.path.isdir(p)]))
@@ -133,7 +133,7 @@ def render_listing(in_name, out_name, input_folder, output_folder, folders=[], f
os.path.join(
self.kw['output_folder'],
output_folder))))
if self.site.config['COPY_SOURCES']:
if self.site.config['COPY_SOURCES'] and in_name:
source_link = permalink[:-5] # remove '.html'
else:
source_link = None

0 comments on commit 423f3a7

Please sign in to comment.
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.