Skip to content
Permalink
Browse files

Merge pull request #2037 from getnikola/encodelinks

Use encodelink() everywhere
  • Loading branch information
da2x committed Sep 10, 2015
2 parents 078a8b8 + f15f871 commit ef6af2be62cb04922cd92bc9c8021bbcbcaf1354
Showing with 53 additions and 23 deletions.
  1. +12 −7 nikola/nikola.py
  2. +6 −4 nikola/plugins/command/check.py
  3. +6 −6 nikola/plugins/task/sitemap/__init__.py
  4. +5 −4 nikola/post.py
  5. +24 −2 nikola/utils.py
@@ -1252,7 +1252,7 @@ def generic_rss_renderer(self, lang, title, link, description, timeline, output_
"""Take all necessary data, and render a RSS feed in output_path."""
rss_obj = utils.ExtendedRSS2(
title=title,
link=link,
link=utils.encodelink(link),
description=description,
lastBuildDate=datetime.datetime.utcnow(),
generator='https://getnikola.com/',
@@ -1447,7 +1447,9 @@ def register_path_handler(self, kind, f):

def link(self, *args):
"""Create a link."""
return self.path(*args, is_link=True)
url = self.path(*args, is_link=True)
url = utils.encodelink(url)
return url

def abs_link(self, dst, protocol_relative=False):
"""Get an absolute link."""
@@ -1459,6 +1461,7 @@ def abs_link(self, dst, protocol_relative=False):
url = urlparse(dst).geturl()
if protocol_relative:
url = url.split(":", 1)[1]
url = utils.encodelink(url)
return url

def rel_link(self, src, dst):
@@ -1473,7 +1476,7 @@ def rel_link(self, src, dst):
parsed_src = urlsplit(src)
parsed_dst = urlsplit(dst)
if parsed_src[:2] != parsed_dst[:2]:
return dst
return utils.encodelink(dst)
# Now both paths are on the same site and absolute
src_elems = parsed_src.path.split('/')[1:]
dst_elems = parsed_dst.path.split('/')[1:]
@@ -1484,7 +1487,9 @@ def rel_link(self, src, dst):
else:
i += 1
# Now i is the longest common prefix
return '/'.join(['..'] * (len(src_elems) - i - 1) + dst_elems[i:])
url = '/'.join(['..'] * (len(src_elems) - i - 1) + dst_elems[i:])
url = utils.encodelink(url)
return url

def file_exists(self, path, not_empty=False):
"""Check if the file exists. If not_empty is True, it also must not be empty."""
@@ -1635,7 +1640,7 @@ def scan_posts(self, really=False, ignore_quit=False, quiet=False):
utils.LOGGER.error('Tag {0} is used in: {1}'.format(other_tag, ', '.join([p.source_path for p in self.posts_per_tag[other_tag]])))
quit = True
else:
slugged_tags.add(utils.slugify(tag, force=True))
slugged_tags.add(utils.slugify(tag))
self.posts_per_tag[tag].append(post)
for lang in self.config['TRANSLATIONS'].keys():
self.tags_per_language[lang].extend(post.tags_for_language(lang))
@@ -1792,7 +1797,7 @@ def atom_link(link_rel, link_type, link_href):
link = lxml.etree.Element("link")
link.set("rel", link_rel)
link.set("type", link_type)
link.set("href", link_href)
link.set("href", utils.encodelink(link_href))
return link

deps = []
@@ -1828,7 +1833,7 @@ def atom_link(link_rel, link_type, link_href):
feed_root = lxml.etree.Element("feed", nsmap=nslist)
feed_root.addprevious(lxml.etree.ProcessingInstruction(
"xml-stylesheet",
'href="' + feed_xsl_link + '" type="text/xsl media="all"'))
'href="' + utils.encodelink(feed_xsl_link) + '" type="text/xsl media="all"'))
feed_root.set("{http://www.w3.org/XML/1998/namespace}lang", lang)
feed_root.set("xmlns", "http://www.w3.org/2005/Atom")
feed_title = lxml.etree.SubElement(feed_root, "title")
@@ -212,7 +212,7 @@ def analyze(self, fname, find_sources=False, check_remote=False):
# Quietly ignore files that don’t exist; use `nikola check -f` instead (Issue #1831)
return False

if '.html' == fname[-5:]: # DISABLED
if '.html' == fname[-5:]:
d = lxml.html.fromstring(open(filename, 'rb').read())
extra_objs = lxml.html.fromstring('<html/>')

@@ -323,8 +323,9 @@ def analyze(self, fname, find_sources=False, check_remote=False):
target_filename = os.path.abspath(
os.path.join(self.site.config['OUTPUT_FOLDER'], unquote(target.lstrip('/'))))
else: # Relative path
unquoted_target = unquote(target).encode('utf-8') if sys.version_info.major >= 3 else unquote(target).decode('utf-8')
target_filename = os.path.abspath(
os.path.join(os.path.dirname(filename), unquote(target)))
os.path.join(os.path.dirname(filename).encode('utf-8'), unquoted_target))

elif url_type in ('full_path', 'absolute'):
if url_type == 'absolute':
@@ -340,9 +341,10 @@ def analyze(self, fname, find_sources=False, check_remote=False):

if any(re.search(x, target_filename) for x in self.whitelist):
continue

elif target_filename not in self.existing_targets:
if os.path.exists(target_filename):
self.logger.notice("Good link {0} => {1}".format(target, target_filename))
self.logger.notice(u"Good link {0} => {1}".format(target, target_filename))
self.existing_targets.add(target_filename)
else:
rv = True
@@ -352,7 +354,7 @@ def analyze(self, fname, find_sources=False, check_remote=False):
self.logger.warn("\n".join(deps[filename]))
self.logger.warn("===============================\n")
except Exception as exc:
self.logger.error("Error with: {0} {1}".format(filename, exc))
self.logger.error(u"Error with: {0} {1}".format(filename, exc))
return rv

def scan_links(self, find_sources=False, check_remote=False):
@@ -40,7 +40,7 @@
import urllib.robotparser as robotparser # NOQA

from nikola.plugin_categories import LateTask
from nikola.utils import config_changed, apply_filters
from nikola.utils import apply_filters, config_changed, encodelink


urlset_header = """<?xml version="1.0" encoding="UTF-8"?>
@@ -158,10 +158,10 @@ def scan_locs():
if post:
for lang in kw['translations']:
alt_url = post.permalink(lang=lang, absolute=True)
if loc == alt_url:
if encodelink(loc) == alt_url:
continue
alternates.append(alternates_format.format(lang, alt_url))
urlset[loc] = loc_format.format(loc, lastmod, ''.join(alternates))
urlset[loc] = loc_format.format(encodelink(loc), lastmod, ''.join(alternates))
for fname in files:
if kw['strip_indexes'] and fname == kw['index_file']:
continue # We already mapped the folder
@@ -201,7 +201,7 @@ def scan_locs():
path = path.replace(os.sep, '/')
lastmod = self.get_lastmod(real_path)
loc = urljoin(base_url, base_path + path)
sitemapindex[loc] = sitemap_format.format(loc, lastmod)
sitemapindex[loc] = sitemap_format.format(encodelink(loc), lastmod)
continue
else:
continue # ignores all XML files except those presumed to be RSS
@@ -215,10 +215,10 @@ def scan_locs():
if post:
for lang in kw['translations']:
alt_url = post.permalink(lang=lang, absolute=True)
if loc == alt_url:
if encodelink(loc) == alt_url:
continue
alternates.append(alternates_format.format(lang, alt_url))
urlset[loc] = loc_format.format(loc, lastmod, '\n'.join(alternates))
urlset[loc] = loc_format.format(encodelink(loc), lastmod, '\n'.join(alternates))

def robot_fetch(path):
"""Check if robots can fetch a file."""
@@ -711,10 +711,9 @@ def remaining_paragraph_count(self):
def source_link(self, lang=None):
"""Return absolute link to the post's source."""
ext = self.source_ext(True)
return "/" + self.destination_path(
lang=lang,
extension=ext,
sep='/')
link = "/" + self.destination_path(lang=lang, extension=ext, sep='/')
link = utils.encodelink(link)
return link

def destination_path(self, lang=None, extension='.html', sep=os.sep):
"""Destination path for this post, relative to output/.
@@ -751,6 +750,7 @@ def section_link(self, lang=None):
link = urljoin('/' + slug + '/', self.index_file)
else:
link = '/' + slug + '/'
link = utils.encodelink(link)
return link

def section_name(self, lang=None):
@@ -807,6 +807,7 @@ def permalink(self, lang=None, absolute=False, extension='.html', query=None):
link = link[:-index_len]
if query:
link = link + "?" + query
link = utils.encodelink(link)
return link

@property
@@ -45,15 +45,24 @@
import dateutil.parser
import dateutil.tz
import logbook
try:
from urllib import quote as urlquote
from urllib import unquote as urlunquote
from urlparse import urlparse, urlunparse
except ImportError:
from urllib.parse import quote as urlquote # NOQA
from urllib.parse import unquote as urlunquote # NOQA
from urllib.parse import urlparse, urlunparse # NOQA
import warnings
import PyRSS2Gen as rss
from collections import defaultdict, Callable
from collections import defaultdict, Callable, OrderedDict
from logbook.compat import redirect_logging
from logbook.more import ExceptionHandler, ColorizedStderrHandler
from pygments.formatters import HtmlFormatter
from zipfile import ZipFile as zipf
from doit import tools
from unidecode import unidecode
from unicodedata import normalize as unicodenormalize
from pkg_resources import resource_filename
from doit.cmdparse import CmdParse

@@ -725,7 +734,7 @@ def remove_file(source):
elif os.path.isfile(source) or os.path.islink(source):
os.remove(source)

# slugify is copied from
# slugify is adopted from
# http://code.activestate.com/recipes/
# 577257-slugify-make-a-string-usable-in-a-url-or-filename/
_slugify_strip_re = re.compile(r'[^+\w\s-]')
@@ -783,9 +792,22 @@ def unslugify(value, discard_numbers=True):
return value


def encodelink(iri):
"""Given an encoded or unencoded link string, return an encoded string suitable for use as a link in HTML and XML."""
iri = unicodenormalize('NFC', iri)
link = OrderedDict(urlparse(iri)._asdict())
link['path'] = urlquote(urlunquote(link['path']).encode('utf-8'))
try:
link['netloc'] = link['netloc'].encode('utf-8').decode('idna').encode('idna').decode('utf-8')
except UnicodeDecodeError:
link['netloc'] = link['netloc'].encode('idna').decode('utf-8')
encoded_link = urlunparse(link.values())
return encoded_link

# A very slightly safer version of zip.extractall that works on
# python < 2.6


class UnsafeZipException(Exception):

"""Exception for unsafe zip files."""

0 comments on commit ef6af2b

Please sign in to comment.