Permalink
Browse files

Merge branch 'master' into idea-for-1814

  • Loading branch information...
2 parents 59d3e15 + dc1b848 commit c2b7b7835dc00568df9e06cc655374c12f09dd2c @ralsina ralsina committed Sep 8, 2015
Showing with 37 additions and 1 deletion.
  1. +36 −1 nikola/plugins/command/import_wordpress.py
  2. +1 −0 tests/test_command_import_wordpress.py
@@ -191,6 +191,12 @@ class CommandImportWordpress(Command, ImportMixin):
'type': bool,
'help': "Automatically installs the WordPress page compiler (either locally or in the new site) if required by other options.\nWarning: the compiler is GPL software!",
},
+ {
+ 'name': 'tag_saniziting_strategy',
+ 'long': 'tag-saniziting-strategy',
+ 'default': 'first',
+ 'help': 'lower: Convert all tag and category names to lower case\nfirst: Keep first spelling of tag or category name',
+ },
]
all_tags = set([])
@@ -239,6 +245,8 @@ def _read_options(self, options, args):
self.install_wordpress_compiler = options.get('install_wordpress_compiler', False)
self.wordpress_page_compiler = None
+ self.tag_saniziting_strategy = options.get('tag_saniziting_strategy', 'first')
+
self.auth = None
if options.get('download_auth') is not None:
username_password = options.get('download_auth')
@@ -750,6 +758,24 @@ def _create_metadata(self, status, excerpt, tags, categories, post_name=None):
tags_cats = tags + categories
return tags_cats, other_meta
+ _tag_sanitize_map = {True: {}, False: {}}
+
+ def _sanitize(self, tag, is_category):
+ if self.tag_saniziting_strategy == 'lower':
+ return tag.lower()
+ if tag.lower() not in self._tag_sanitize_map[is_category]:
+ self._tag_sanitize_map[is_category][tag.lower()] = [tag]
+ return tag
+ previous = self._tag_sanitize_map[is_category][tag.lower()]
+ if self.tag_saniziting_strategy == 'first':
+ if tag != previous[0]:
+ LOGGER.warn("Changing spelling of {0} name '{1}' to {2}.".format('category' if is_category else 'tag', tag, previous[0]))
+ return previous[0]
+ else:
+ LOGGER.error("Unknown tag sanitizing strategy '{0}'!".format(self.tag_saniziting_strategy))
+ sys.exit(1)
+ return tag
+
def import_postpage_item(self, item, wordpress_namespace, out_folder=None, attachments=None):
"""Take an item from the feed and creates a post file."""
if out_folder is None:
@@ -837,7 +863,6 @@ def import_postpage_item(self, item, wordpress_namespace, out_folder=None, attac
type = tag.attrib['domain']
if text == 'Uncategorized' and type == 'category':
continue
- self.all_tags.add(text)
if type == 'category':
categories.append(text)
else:
@@ -846,6 +871,16 @@ def import_postpage_item(self, item, wordpress_namespace, out_folder=None, attac
if '$latex' in content:
tags.append('mathjax')
+ for i, cat in enumerate(categories[:]):
+ cat = self._sanitize(cat, True)
+ categories[i] = cat
+ self.all_tags.add(cat)
+
+ for i, tag in enumerate(tags[:]):
+ tag = self._sanitize(tag, False)
+ tags[i] = tag
+ self.all_tags.add(tag)
+
# Find post format if it's there
post_format = 'wp'
format_tag = [x for x in item.findall('*//{%s}meta_key' % wordpress_namespace) if x.text == '_tc_post_format']
@@ -196,6 +196,7 @@ def test_importing_posts_and_attachments(self):
self.import_command.export_comments = False
self.import_command.transform_to_html = False
self.import_command.use_wordpress_compiler = False
+ self.import_command.tag_saniziting_strategy = 'first'
self.import_command.context = self.import_command.populate_context(
channel)

0 comments on commit c2b7b78

Please sign in to comment.