Skip to content
Permalink
Browse files
Use encoelink() everywhere
  • Loading branch information
da2x committed Sep 8, 2015
1 parent dc1b848 commit 33db32fc95ea82d82d377e1f0f76930400ae642c
Showing with 50 additions and 22 deletions.
  1. +12 −7 nikola/nikola.py
  2. +2 −1 nikola/plugins/command/check.py
  3. +6 −6 nikola/plugins/task/sitemap/__init__.py
  4. +7 −4 nikola/post.py
  5. +23 −4 nikola/utils.py
@@ -1250,7 +1250,7 @@ def generic_rss_renderer(self, lang, title, link, description, timeline, output_
"""Take all necessary data, and render a RSS feed in output_path."""
rss_obj = utils.ExtendedRSS2(
title=title,
link=link,
link=utils.encodelink(link),
description=description,
lastBuildDate=datetime.datetime.utcnow(),
generator='https://getnikola.com/',
@@ -1445,7 +1445,9 @@ def register_path_handler(self, kind, f):

def link(self, *args):
"""Create a link."""
return self.path(*args, is_link=True)
url = self.path(*args, is_link=True)
url = utils.encodelink(url)
return url

def abs_link(self, dst, protocol_relative=False):
"""Get an absolute link."""
@@ -1457,6 +1459,7 @@ def abs_link(self, dst, protocol_relative=False):
url = urlparse(dst).geturl()
if protocol_relative:
url = url.split(":", 1)[1]
url = utils.encodelink(url)
return url

def rel_link(self, src, dst):
@@ -1471,7 +1474,7 @@ def rel_link(self, src, dst):
parsed_src = urlsplit(src)
parsed_dst = urlsplit(dst)
if parsed_src[:2] != parsed_dst[:2]:
return dst
return utils.encodelink(dst)
# Now both paths are on the same site and absolute
src_elems = parsed_src.path.split('/')[1:]
dst_elems = parsed_dst.path.split('/')[1:]
@@ -1482,7 +1485,9 @@ def rel_link(self, src, dst):
else:
i += 1
# Now i is the longest common prefix
return '/'.join(['..'] * (len(src_elems) - i - 1) + dst_elems[i:])
url = '/'.join(['..'] * (len(src_elems) - i - 1) + dst_elems[i:])
url = utils.encodelink(url)
return url

def file_exists(self, path, not_empty=False):
"""Check if the file exists. If not_empty is True, it also must not be empty."""
@@ -1633,7 +1638,7 @@ def scan_posts(self, really=False, ignore_quit=False, quiet=False):
utils.LOGGER.error('Tag {0} is used in: {1}'.format(other_tag, ', '.join([p.source_path for p in self.posts_per_tag[other_tag]])))
quit = True
else:
slugged_tags.add(utils.slugify(tag, force=True))
slugged_tags.add(utils.slugify(tag))
self.posts_per_tag[tag].append(post)
for lang in self.config['TRANSLATIONS'].keys():
self.tags_per_language[lang].extend(post.tags_for_language(lang))
@@ -1790,7 +1795,7 @@ def atom_link(link_rel, link_type, link_href):
link = lxml.etree.Element("link")
link.set("rel", link_rel)
link.set("type", link_type)
link.set("href", link_href)
link.set("href", utils.encodelink(link_href))
return link

deps = []
@@ -1826,7 +1831,7 @@ def atom_link(link_rel, link_type, link_href):
feed_root = lxml.etree.Element("feed", nsmap=nslist)
feed_root.addprevious(lxml.etree.ProcessingInstruction(
"xml-stylesheet",
'href="' + feed_xsl_link + '" type="text/xsl media="all"'))
'href="' + utils.encodelink(feed_xsl_link) + '" type="text/xsl media="all"'))
feed_root.set("{http://www.w3.org/XML/1998/namespace}lang", lang)
feed_root.set("xmlns", "http://www.w3.org/2005/Atom")
feed_title = lxml.etree.SubElement(feed_root, "title")
@@ -212,7 +212,7 @@ def analyze(self, fname, find_sources=False, check_remote=False):
# Quietly ignore files that don’t exist; use `nikola check -f` instead (Issue #1831)
return False

if '.html' == fname[-5:]: # DISABLED
if '.html' == fname[-5:]:
d = lxml.html.fromstring(open(filename, 'rb').read())
extra_objs = lxml.html.fromstring('<html/>')

@@ -340,6 +340,7 @@ def analyze(self, fname, find_sources=False, check_remote=False):

if any(re.search(x, target_filename) for x in self.whitelist):
continue

elif target_filename not in self.existing_targets:
if os.path.exists(target_filename):
self.logger.notice("Good link {0} => {1}".format(target, target_filename))
@@ -40,7 +40,7 @@
import urllib.robotparser as robotparser # NOQA

from nikola.plugin_categories import LateTask
from nikola.utils import config_changed, apply_filters
from nikola.utils import apply_filters, config_changed, encodelink


urlset_header = """<?xml version="1.0" encoding="UTF-8"?>
@@ -158,10 +158,10 @@ def scan_locs():
if post:
for lang in kw['translations']:
alt_url = post.permalink(lang=lang, absolute=True)
if loc == alt_url:
if encodelink(loc) == alt_url:
continue
alternates.append(alternates_format.format(lang, alt_url))
urlset[loc] = loc_format.format(loc, lastmod, ''.join(alternates))
urlset[loc] = loc_format.format(encodelink(loc), lastmod, ''.join(alternates))
for fname in files:
if kw['strip_indexes'] and fname == kw['index_file']:
continue # We already mapped the folder
@@ -201,7 +201,7 @@ def scan_locs():
path = path.replace(os.sep, '/')
lastmod = self.get_lastmod(real_path)
loc = urljoin(base_url, base_path + path)
sitemapindex[loc] = sitemap_format.format(loc, lastmod)
sitemapindex[loc] = sitemap_format.format(encodelink(loc), lastmod)
continue
else:
continue # ignores all XML files except those presumed to be RSS
@@ -215,10 +215,10 @@ def scan_locs():
if post:
for lang in kw['translations']:
alt_url = post.permalink(lang=lang, absolute=True)
if loc == alt_url:
if encodelink(loc) == alt_url:
continue
alternates.append(alternates_format.format(lang, alt_url))
urlset[loc] = loc_format.format(loc, lastmod, '\n'.join(alternates))
urlset[loc] = loc_format.format(encodelink(loc), lastmod, '\n'.join(alternates))

def robot_fetch(path):
"""Check if robots can fetch a file."""
@@ -707,10 +707,11 @@ def remaining_paragraph_count(self):
def source_link(self, lang=None):
"""Return absolute link to the post's source."""
ext = self.source_ext(True)
return "/" + self.destination_path(
lang=lang,
extension=ext,
sep='/')


link = "/" + self.destination_path(lang=lang, extension=ext, sep='/')
link = utils.encodelink(link)
return link

def destination_path(self, lang=None, extension='.html', sep=os.sep):
"""Destination path for this post, relative to output/.
@@ -747,6 +748,7 @@ def section_link(self, lang=None):
link = urljoin('/' + slug + '/', self.index_file)
else:
link = '/' + slug + '/'
link = utils.encodelink(link)
return link

def section_name(self, lang=None):
@@ -803,6 +805,7 @@ def permalink(self, lang=None, absolute=False, extension='.html', query=None):
link = link[:-index_len]
if query:
link = link + "?" + query
link = utils.encodelink(link)
return link

@property
@@ -45,9 +45,17 @@
import dateutil.parser
import dateutil.tz
import logbook
try:
from urllib import quote as urlquote
from urllib import unquote as urlunquote
from urlparse import urlparse, urlunparse
except ImportError:
from urllib.parse import quote as urlquote # NOQA
from urllib.parse import unquote as urlunquote # NOQA
from urllib.parse import urlparse, urlunparse # NOQA
import warnings
import PyRSS2Gen as rss
from collections import defaultdict, Callable
from collections import defaultdict, Callable, OrderedDict
from logbook.compat import redirect_logging
from logbook.more import ExceptionHandler, ColorizedStderrHandler
from pygments.formatters import HtmlFormatter
@@ -725,7 +733,7 @@ def remove_file(source):
elif os.path.isfile(source) or os.path.islink(source):
os.remove(source)

# slugify is copied from
# slugify is adopted from
# http://code.activestate.com/recipes/
# 577257-slugify-make-a-string-usable-in-a-url-or-filename/
_slugify_strip_re = re.compile(r'[^+\w\s-]')
@@ -752,7 +760,7 @@ def slugify(value, force=False):
# This is the standard state of slugify, which actually does some work.
# It is the preferred style, especially for Western languages.
value = unicode_str(unidecode(value))
value = _slugify_strip_re.sub('', value, re.UNICODE).strip().lower()
value = _slugify_strip_re.sub('', value, re.UNICODE).strip()
return _slugify_hyphenate_re.sub('-', value, re.UNICODE)
else:
# This is the “disarmed” state of slugify, which lets the user
@@ -767,7 +775,7 @@ def slugify(value, force=False):

for c in rc:
value = value.replace(c, '-')
return value
return value.lower()


def unslugify(value, discard_numbers=True):
@@ -783,6 +791,17 @@ def unslugify(value, discard_numbers=True):
return value


def encodelink(iri):
"""Given an encoded or unencoded link string, return an encoded string suitable for use as a link in HTML and XML."""
link = OrderedDict(urlparse(iri).__dict__)
link['path'] = urlquote(urlunquote(link['path']))
try:
link['netloc'] = link['netloc'].encode('utf-8').decode('idna').encode('idna').decode('utf-8')
except UnicodeDecodeError:
link['netloc'] = link['netloc'].encode('idna').decode('utf-8')
encoded_link = urlunparse(link.values())
return encoded_link

# A very slightly safer version of zip.extractall that works on
# python < 2.6

0 comments on commit 33db32f

Please sign in to comment.