Permalink
Browse files

Merge pull request #229 from getnikola/jsonfeed

Initial JSON Feed implementation
  • Loading branch information...
Kwpolska committed May 22, 2017
2 parents 54b2bec + f4f03a0 commit 953e196128187015aa6e3d70625ca5a4526f0a8b
Showing with 371 additions and 0 deletions.
  1. +14 −0 v7/jsonfeed/README.md
  2. +2 −0 v7/jsonfeed/conf.py.sample
  3. +12 −0 v7/jsonfeed/jsonfeed.plugin
  4. +343 −0 v7/jsonfeed/jsonfeed.py
View
@@ -0,0 +1,14 @@
+An implementation of the [JSON Feed](https://jsonfeed.org/) specification (version 1).
+
+Supported:
+
+* archives (`/archives/2017/feed.json` — only if archives are indexes)
+* blog index (`/feed.json`)
+* author pages (`/authors/john-doe-feed.json`)
+* categories (`/categories/cat_foo-feed.json`)
+* sections (`/section/feed.json`)
+* tags (`/categories/bar-feed.json`)
+
+Unsupported:
+
+* galleries (requires some changes to Nikola core)
@@ -0,0 +1,2 @@
+# Add links to JSON Feeds to page <head>s, where applicable.
+JSONFEED_APPEND_LINKS = True
@@ -0,0 +1,12 @@
+[Core]
+Name = jsonfeed
+Module = jsonfeed
+
+[Nikola]
+PluginCategory = Task
+
+[Documentation]
+Author = Chris Warrick
+Version = 0.1.0
+Website = https://jsonfeed.org/
+Description = Generate JSON Feeds for a Nikola blog.
View
@@ -0,0 +1,343 @@
+# -*- coding: utf-8 -*-
+
+# Copyright © 2017, Chris Warrick and others.
+
+# Permission is hereby granted, free of charge, to any
+# person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the
+# Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the
+# Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice
+# shall be included in all copies or substantial portions of
+# the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
+# OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+"""Generate JSON Feeds."""
+
+from __future__ import unicode_literals
+import json
+import io
+import os
+import lxml
+
+from nikola.plugin_categories import Task
+from nikola import utils
+
+try:
+ from urlparse import urljoin
+except ImportError:
+ from urllib.parse import urljoin # NOQA
+
+
+class JSONFeed(Task):
+ """Generate JSON feeds."""
+
+ name = "jsonfeed"
+ supported_taxonomies = {
+ 'archive': 'archive_jsonfeed',
+ 'author': 'author_jsonfeed',
+ 'category': 'category_jsonfeed',
+ 'section_index': 'section_index_jsonfeed',
+ 'tag': 'tag_jsonfeed',
+ }
+ _section_archive_link_warned = False
+
+ def set_site(self, site):
+ """Set site, which is a Nikola instance."""
+ super(JSONFeed, self).set_site(site)
+
+ self.kw = {
+ 'feed_links_append_query': self.site.config['FEED_LINKS_APPEND_QUERY'],
+ 'feed_length': self.site.config['FEED_LENGTH'],
+ 'feed_plain': self.site.config['FEED_PLAIN'],
+ 'feed_previewimage': self.site.config['FEED_PREVIEWIMAGE'],
+ 'feed_read_more_link': self.site.config['FEED_READ_MORE_LINK'],
+ 'feed_teasers': self.site.config['FEED_TEASERS'],
+ 'jsonfeed_append_links': self.site.config.get('JSONFEED_APPEND_LINKS', True),
+ 'site_url': self.site.config['SITE_URL'],
+ 'blog_title': self.site.config['BLOG_TITLE'],
+ 'blog_description': self.site.config['BLOG_DESCRIPTION'],
+ 'blog_author': self.site.config['BLOG_AUTHOR'],
+ 'tag_pages_titles': self.site.config['TAG_PAGES_TITLES'],
+ 'category_pages_titles': self.site.config['CATEGORY_PAGES_TITLES'],
+ 'posts_section_title': self.site.config['POSTS_SECTION_TITLE'],
+ 'archives_are_indexes': self.site.config['ARCHIVES_ARE_INDEXES'],
+ }
+
+ self.site.register_path_handler("index_jsonfeed", self.index_jsonfeed_path)
+ for t in self.supported_taxonomies.values():
+ self.site.register_path_handler(t, getattr(self, t + '_path'))
+
+ # Add links if desired
+ if self.kw['jsonfeed_append_links']:
+ self.site.template_hooks['extra_head'].append(self.jsonfeed_html_link, True)
+
+ def gen_tasks(self):
+ """Generate JSON feeds."""
+ self.site.scan_posts()
+ yield self.group_task()
+
+ for lang in self.site.translations:
+ # Main feed
+ title = self.kw['blog_title'](lang)
+ link = self.kw['site_url']
+ description = self.kw['blog_description'](lang)
+ timeline = self.site.posts[:self.kw['feed_length']]
+ output_name = os.path.normpath(os.path.join(self.site.config['OUTPUT_FOLDER'], self.site.path("index_jsonfeed", "", lang)))
+ feed_url = self.get_link("index_jsonfeed", "", lang)
+
+ yield self.generate_feed_task(lang, title, link, description,
+ timeline, feed_url, output_name)
+
+ for classification_name, path_handler in self.supported_taxonomies.items():
+ taxonomy = self.site.taxonomy_plugins[classification_name]
+
+ if classification_name == "archive" and not self.kw['archives_are_indexes']:
+ continue
+
+ classification_timelines = {}
+ for tlang, posts_per_classification in self.site.posts_per_classification[taxonomy.classification_name].items():
+ if lang != tlang and not taxonomy.also_create_classifications_from_other_languages:
+ continue
+ classification_timelines.update(posts_per_classification)
+
+ for classification, timeline in classification_timelines.items():
+ if not classification:
+ continue
+ if taxonomy.has_hierarchy:
+ node = self.site.hierarchy_lookup_per_classification[taxonomy.classification_name][lang][classification]
+ taxo_context = taxonomy.provide_context_and_uptodate(classification, lang, node)[0]
+ else:
+ taxo_context = taxonomy.provide_context_and_uptodate(classification, lang)[0]
+ title = taxo_context.get('title', classification)
+ link = self.get_link(classification_name, classification, lang)
+ description = taxo_context.get('description', self.kw['blog_description'](lang))
+ timeline = timeline[:self.kw['feed_length']]
+ output_name = os.path.normpath(os.path.join(self.site.config['OUTPUT_FOLDER'], self.site.path(path_handler, classification, lang)))
+ feed_url = self.get_link(path_handler, classification, lang)
+
+ # Special handling for author pages
+ if classification_name == "author":
+ primary_author = {
+ 'name': classification,
+ 'url': link
+ }
+ else:
+ primary_author = None
+
+ yield self.generate_feed_task(lang, title, link, description,
+ timeline, feed_url, output_name, primary_author)
+
+ def index_jsonfeed_path(self, name, lang, **kwargs):
+ """Return path to main JSON Feed."""
+ return [_f for _f in [self.site.config['TRANSLATIONS'][lang], 'feed.json'] if _f]
+
+ def archive_jsonfeed_path(self, name, lang, **kwargs):
+ """Return path to archive JSON Feed."""
+ return [_f for _f in [self.site.config['TRANSLATIONS'][lang],
+ self.site.config['ARCHIVE_PATH'], name, 'feed.json'] if _f]
+
+ def author_jsonfeed_path(self, name, lang, **kwargs):
+ """Return path to author JSON Feed."""
+ if self.site.config['SLUG_AUTHOR_PATH']:
+ filename = utils.slugify(name, lang) + '-feed.json'
+ else:
+ filename = name + '-feed.json'
+ return [_f for _f in [self.site.config['TRANSLATIONS'][lang],
+ self.site.config['AUTHOR_PATH'](lang), filename] if _f]
+
+ def category_jsonfeed_path(self, name, lang, **kwargs):
+ """Return path to category JSON Feed."""
+ t = self.site.taxonomy_plugins['category']
+ name = t.slugify_category_name(t.extract_hierarchy(name), lang)[0]
+ return [_f for _f in [self.site.config['TRANSLATIONS'][lang],
+ self.site.config['CATEGORY_PATH'](lang), name + '-feed.json'] if _f]
+
+ def section_index_jsonfeed_path(self, name, lang, **kwargs):
+ """Return path to section JSON Feed."""
+ return [_f for _f in [self.site.config['TRANSLATIONS'][lang],
+ self.site.config['SECTION_PATH'](lang), name, 'feed.json'] if _f]
+
+ def tag_jsonfeed_path(self, name, lang, **kwargs):
+ """Return path to tag JSON Feed."""
+ t = self.site.taxonomy_plugins['tag']
+ name = t.slugify_tag_name(name, lang)
+ return [_f for _f in [self.site.config['TRANSLATIONS'][lang],
+ self.site.config['TAG_PATH'](lang), name + '-feed.json'] if _f]
+
+ def get_link(self, path_handler, classification, lang):
+ """Get link for a page."""
+ return urljoin(self.site.config['BASE_URL'], self.site.link(path_handler, classification, lang).lstrip('/'))
+
+ def jsonfeed_html_link(self, site, context):
+ """Generate HTML fragment with link to JSON feed."""
+ pagekind = context['pagekind']
+ lang = context['lang']
+ fragment = '<link rel="alternate" type="application/json" title="{title}" href="{url}">\n'
+ if 'main_index' in pagekind:
+ path_handler = "index_jsonfeed"
+ name = ""
+ elif 'author_page' in pagekind:
+ path_handler = "author_jsonfeed"
+ name = context["author"]
+ elif 'tag_page' in pagekind:
+ path_handler = context["kind"] + "_jsonfeed"
+ name = context[context["kind"]]
+ elif 'archive_page' in pagekind:
+ path_handler = "archive_jsonfeed"
+ if "archive_name" in context:
+ name = context["archive_name"]
+ else:
+ if not self._section_archive_link_warned:
+ utils.LOGGER.warning("To create links for section and archive JSON feeds, you need Nikola >= 7.8.6.")
+ self._section_archive_link_warned = True
+ return ''
+ elif 'section_page' in pagekind:
+ path_handler = "section_index_jsonfeed"
+ if "section" in context:
+ name = context["section"]
+ else:
+ if not self._section_archive_link_warned:
+ utils.LOGGER.warning("To create links for section and archive JSON feeds, you need Nikola >= 7.8.6.")
+ self._section_archive_link_warned = True
+ return ''
+ else:
+ return '' # Do nothing on unsupported pages
+
+ if len(self.site.translations) > 1:
+ out = ""
+ for lang in self.site.translations:
+ title = "JSON Feed ({0})".format(lang)
+ url = self.site.link(path_handler, name, lang)
+ out += fragment.format(title=title, url=url)
+ return out
+ else:
+ title = "JSON Feed"
+ url = self.site.link(path_handler, name, lang)
+ return fragment.format(title=title, url=url)
+
+ def generate_feed_task(self, lang, title, link, description, timeline,
+ feed_url, output_name, primary_author=None):
+ """Generate a task to create a feed."""
+ # Build dependency list
+ deps = []
+ deps_uptodate = []
+ for post in timeline:
+ deps += post.deps(lang)
+ deps_uptodate += post.deps_uptodate(lang)
+
+ task = {
+ 'basename': str(self.name),
+ 'name': str(output_name),
+ 'targets': [output_name],
+ 'file_dep': deps,
+ 'task_dep': ['render_posts', 'render_taxonomies'],
+ 'actions': [(self.generate_feed, (lang, title, link, description,
+ timeline, feed_url, output_name,
+ primary_author))],
+ 'uptodate': [utils.config_changed(self.kw, 'jsonfeed:' + output_name)] + deps_uptodate,
+ 'clean': True
+ }
+
+ yield utils.apply_filters(task, self.site.config['FILTERS'])
+
+ def generate_feed(self, lang, title, link, description, timeline,
+ feed_url, output_name, primary_author=None):
+ """Generate a feed and write it to file."""
+ utils.LocaleBorg().set_locale(lang)
+ items = []
+ for post in timeline:
+ item = {
+ "id": post.guid(lang),
+ "url": post.permalink(lang),
+ "title": post.title(lang),
+ "date_published": post.date.replace(microsecond=0).isoformat(),
+ "date_modified": post.updated.replace(microsecond=0).isoformat(),
+ "author": {
+ "name": post.author(lang),
+ "url": self.site.link("author", post.author(lang), lang)
+ },
+ "tags": post.tags_for_language(lang),
+ }
+
+ if post.updated == post.date:
+ del item["date_modified"]
+
+ link = post.meta[lang].get('link')
+ if link:
+ item['external_url'] = link
+
+ previewimage = post.meta[lang].get('previewimage')
+ if previewimage:
+ item['image'] = self.site.url_replacer(post.permalink(), previewimage, lang, 'absolute')
+
+ if self.kw['feed_plain']:
+ strip_html = True
+ content_tag = "content_text"
+ else:
+ strip_html = False
+ content_tag = "content_html"
+
+ data = post.text(lang, self.kw['feed_teasers'], strip_html, True, True, self.kw['feed_links_append_query'])
+
+ if feed_url is not None and data:
+ # Copied from nikola.py
+ # Massage the post's HTML (unless plain)
+ if not strip_html:
+ if self.kw["feed_previewimage"] and 'previewimage' in post.meta[lang] and post.meta[lang]['previewimage'] not in data:
+ data = "<figure><img src=\"{}\"></figure> {}".format(post.meta[lang]['previewimage'], data)
+ # FIXME: this is duplicated with code in Post.text()
+ try:
+ doc = lxml.html.document_fromstring(data)
+ doc.rewrite_links(lambda dst: self.site.url_replacer(post.permalink(), dst, lang, 'absolute'))
+ try:
+ body = doc.body
+ data = (body.text or '') + ''.join(
+ [lxml.html.tostring(child, encoding='unicode')
+ for child in body.iterchildren()])
+ except IndexError: # No body there, it happens sometimes
+ data = ''
+ except lxml.etree.ParserError as e:
+ if str(e) == "Document is empty":
+ data = ""
+ else: # let other errors raise
+ raise
+
+ item[content_tag] = data
+ items.append(item)
+
+ if not primary_author:
+ # Override for author pages
+ primary_author = {"name": self.kw['blog_author'](lang)}
+
+ feed = {
+ "version": "https://jsonfeed.org/version/1",
+ "user_comment": ("This feed allows you to read the posts from this "
+ "site in any feed reader that supports the JSON "
+ "Feed format. To add " "this feed to your reader, "
+ "copy the following URL — " + feed_url +
+ " — and add it your reader."),
+ "title": title,
+ "home_page_url": self.kw['site_url'],
+ "feed_url": feed_url,
+ "description": description,
+ "author": primary_author,
+ "items": items
+ }
+
+ utils.makedirs(os.path.dirname(output_name))
+
+ with io.open(output_name, 'w', encoding='utf-8') as fh:
+ json.dump(feed, fh, ensure_ascii=False, indent=4)

0 comments on commit 953e196

Please sign in to comment.