getnikola · Kwpolska · May 21, 2017 · May 14, 2017 · May 14, 2017 · May 14, 2017
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -4,6 +4,8 @@ New in master
 Features
 --------
 
+* New ``deduplicate_ids``, for preventing duplication of HTML id
+  attributes (Issue #2570)
 * New ``add_header_permalinks`` filter, for Sphinx-style header links
   (Issue #2636)
 

diff --git a/docs/manual.txt b/docs/manual.txt
@@ -1919,7 +1919,7 @@ add_header_permalinks
       .headerlink { opacity: 0.1; margin-left: 0.2em; }
       .headerlink:hover { opacity: 1; text-decoration: none; }
 
-   Additionally, you can provide a custom list of XPath expressions which should be used for finding headers (``{hx}}`` is replaced by headers h1 through h6).
+   Additionally, you can provide a custom list of XPath expressions which should be used for finding headers (``{hx}`` is replaced by headers h1 through h6).
    This is required if you use a custom theme that does not use ``"e-content entry-content"`` as a class for post and page contents.
 
    .. code:: python
@@ -1928,6 +1928,11 @@ add_header_permalinks
         # Include *every* header (not recommended):
         # HEADER_PERMALINKS_XPATH_LIST = ['*//{hx}']
 
+deduplicate_ids
+   Prevent duplicated IDs in HTML output. An incrementing counter is added to
+   offending IDs. If used alongside ``add_header_permalinks``, it will fix
+   those links (it must run **after** that filter)
+
 You can apply filters to specific posts or pages by using the ``filters`` metadata field:
 
 .. code:: restructuredtext

diff --git a/nikola/conf.py.in b/nikola/conf.py.in
@@ -587,7 +587,7 @@ GITHUB_COMMIT_SOURCE = True
 # HTML_TIDY_EXECUTABLE = 'tidy5'
 
 # List of XPath expressions which should be used for finding headers
-# ({hx}} is replaced by headers h1 through h6).
+# ({hx} is replaced by headers h1 through h6).
 # You must change this if you use a custom theme that does not use
 # "e-content entry-content" as a class for post and page contents.
 

diff --git a/nikola/filters.py b/nikola/filters.py
@@ -437,3 +437,38 @@ def add_header_permalinks(data, xpath_list=None):
             new_node = lxml.html.fragment_fromstring('<a href="#{0}" class="headerlink" title="Permalink to this heading">¶</a>'.format(hid))
             node.append(new_node)
     return lxml.html.tostring(doc, encoding="unicode")
+
+
+@apply_to_text_file
+def deduplicate_ids(data):
+    """Post-process HTML via lxml to deduplicate IDs."""
+    doc = lxml.html.document_fromstring(data)
+    elements = doc.xpath('//*')
+    all_ids = [element.attrib.get('id') for element in elements]
+    seen_ids = set()
+    duplicated_ids = set()
+    for i in all_ids:
+        if i is not None and i in seen_ids:
+            duplicated_ids.add(i)
+        else:
+            seen_ids.add(i)
+
+    if duplicated_ids:
+        # Well, that sucks.
+        for i in duplicated_ids:
+            # Results are ordered the same way they are ordered in document
+            offending_elements = doc.xpath('//*[@id="{}"]'.format(i))
+            counter = 2
+            for e in offending_elements[1::-1]:
+                new_id = '{0}-{1}'.format(i, counter)
+                e.attrib['id'] = new_id
+                counter += 1
+                # Find headerlinks that we can fix.
+                headerlinks = e.find_class('headerlink')
+                for hl in headerlinks:
+                    # We might get headerlinks of child elements
+                    if hl.attrib['href'] == '#' + i:
+                        hl.attrib['href'] = '#' + new_id
+        return lxml.html.tostring(doc, encoding='unicode')
+    else:
+        return data