Skip to content
Permalink
Browse files

Merge pull request #1759 from getnikola/smarter-check

Fix #1758
  • Loading branch information
ralsina committed May 31, 2015
2 parents aebef5e + 66655e5 commit 423f3a7b659ed526e20b43b29db886d03fbb7914
@@ -14,6 +14,7 @@ Features
Bugfixes
--------

* Extract ``nikola check`` target list from actual task list instead of parsing (Issue #1758)
* Treat special-purpose “draft” tag case-insensitive
* Avoid some rebuild loops (Issue #1747)
* Better error if two posts/pages output conflict (Issue #1749)
@@ -25,6 +25,7 @@
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

from __future__ import print_function
from collections import defaultdict
import os
import re
import sys
@@ -34,6 +35,7 @@
except ImportError:
from urllib.parse import unquote, urlparse, urljoin, urldefrag # NOQA

from doit.loader import generate_tasks
import lxml.html
try:
import requests
@@ -44,33 +46,28 @@
from nikola.utils import get_logger, req_missing


def _call_nikola_list(l, site, arguments):
class NotReallyAStream(object):
"""A massive hack."""
out = []
def _call_nikola_list(site):
files = []
deps = defaultdict(list)
for task in generate_tasks('render_site', site.gen_tasks('render_site', "Task", '')):
files.extend(task.targets)
for target in task.targets:
deps[target].extend(task.file_dep)
for task in generate_tasks('post_render', site.gen_tasks('render_site', "LateTask", '')):
files.extend(task.targets)
for target in task.targets:
deps[target].extend(task.file_dep)
return files, deps

def write(self, t):
self.out.append(t)

oldstream = l.outstream
newstream = NotReallyAStream()
try:
l.outstream = newstream
l.parse_execute(arguments)
return newstream.out
finally:
l.outstream = oldstream


def real_scan_files(l, site):
def real_scan_files(site):
task_fnames = set([])
real_fnames = set([])
output_folder = site.config['OUTPUT_FOLDER']
# First check that all targets are generated in the right places
for task in _call_nikola_list(l, site, ["--all"]):
task = task.strip()
if output_folder in task and ':' in task:
fname = task.split(':', 1)[-1]
for fname in _call_nikola_list(site)[0]:
fname = fname.strip()
if fname.startswith(output_folder):
task_fnames.add(fname)
# And now check that there are no non-target files
for root, dirs, files in os.walk(output_folder, followlinks=True):
@@ -154,7 +151,6 @@ class CommandCheck(Command):
def _execute(self, options, args):
"""Check the generated site."""
self.logger = get_logger('check', self.site.loghandlers)
self.l = self._doitargs['cmds'].get_plugin('list')(config=self.config, **self._doitargs)

if not options['links'] and not options['files'] and not options['clean']:
print(self.help())
@@ -175,21 +171,25 @@ def _execute(self, options, args):
existing_targets = set([])
checked_remote_targets = {}

def analyze(self, task, find_sources=False, check_remote=False):
def analyze(self, fname, find_sources=False, check_remote=False):
rv = False
self.whitelist = [re.compile(x) for x in self.site.config['LINK_CHECK_WHITELIST']]
base_url = urlparse(self.site.config['BASE_URL'])
self.existing_targets.add(self.site.config['SITE_URL'])
self.existing_targets.add(self.site.config['BASE_URL'])
url_type = self.site.config['URL_TYPE']

deps = {}
if find_sources:
deps = _call_nikola_list(self.site)[1]

if check_remote and requests is None:
req_missing(['requests'], 'check remote links')

if url_type in ('absolute', 'full_path'):
url_netloc_to_root = urlparse(self.site.config['BASE_URL']).path
try:
filename = task.split(":")[-1]
filename = fname

if filename.startswith(self.site.config['CACHE_FOLDER']):
# Do not look at links in the cache, which are not parsed by
@@ -262,7 +262,7 @@ def analyze(self, task, find_sources=False, check_remote=False):
self.logger.warn("Broken link in {0}: {1}".format(filename, target))
if find_sources:
self.logger.warn("Possible sources:")
self.logger.warn("\n".join(_call_nikola_list(self.l, self.site, ["--deps", task])))
self.logger.warn("\n".join(deps[filename]))
self.logger.warn("===============================\n")
except Exception as exc:
self.logger.error("Error with: {0} {1}".format(filename, exc))
@@ -273,14 +273,11 @@ def scan_links(self, find_sources=False, check_remote=False):
self.logger.info("===============\n")
self.logger.notice("{0} mode".format(self.site.config['URL_TYPE']))
failure = False
for task in _call_nikola_list(self.l, self.site, ["--all"]):
task = task.strip()
if task.split(':')[0] in (
'render_tags', 'render_archive',
'render_galleries', 'render_indexes',
'render_pages', 'render_posts',
'render_site') and '.html' in task:
if self.analyze(task, find_sources, check_remote):
# Maybe we should just examine all HTML files
output_folder = self.site.config['OUTPUT_FOLDER']
for fname in _call_nikola_list(self.site)[0]:
if fname.startswith(output_folder) and '.html' == fname[-5:]:
if self.analyze(fname, find_sources, check_remote):
failure = True
if not failure:
self.logger.info("All links checked.")
@@ -290,7 +287,7 @@ def scan_files(self):
failure = False
self.logger.info("Checking Files:")
self.logger.info("===============\n")
only_on_output, only_on_input = real_scan_files(self.l, self.site)
only_on_output, only_on_input = real_scan_files(self.site)

# Ignore folders
only_on_output = [p for p in only_on_output if not os.path.isdir(p)]
@@ -312,7 +309,7 @@ def scan_files(self):
return failure

def clean_files(self):
only_on_output, _ = real_scan_files(self.l, self.site)
only_on_output, _ = real_scan_files(self.site)
for f in only_on_output:
os.unlink(f)
return True
@@ -86,8 +86,7 @@ def _execute(self, command, args):
sys.exit(build)

# Clean non-target files
l = self._doitargs['cmds'].get_plugin('list')(config=self.config, **self._doitargs)
only_on_output, _ = real_scan_files(l, self.site)
only_on_output, _ = real_scan_files(self.site)
for f in only_on_output:
os.unlink(f)

@@ -41,6 +41,5 @@ class CommandOrphans(Command):
Output contains filenames only (it is passable to `xargs rm` or the like)."""

def _execute(self, options, args):
l = self._doitargs['cmds'].get_plugin('list')(config=self.config, **self._doitargs)
orphans = real_scan_files(l, self.site)[0]
orphans = real_scan_files(self.site)[0]
print('\n'.join([p for p in orphans if not os.path.isdir(p)]))
@@ -133,7 +133,7 @@ def render_listing(in_name, out_name, input_folder, output_folder, folders=[], f
os.path.join(
self.kw['output_folder'],
output_folder))))
if self.site.config['COPY_SOURCES']:
if self.site.config['COPY_SOURCES'] and in_name:
source_link = permalink[:-5] # remove '.html'
else:
source_link = None

0 comments on commit 423f3a7

Please sign in to comment.