Skip to content
Browse files
Fix #2570 -- new deduplicate_ids filter
Signed-off-by: Chris Warrick <>
  • Loading branch information
Kwpolska committed May 14, 2017
1 parent 3b748b0 commit c393e225fc1d37cb910b2475f76641883b78c8c4
Showing 3 changed files with 41 additions and 0 deletions.
@@ -4,6 +4,8 @@ New in master

* New ``deduplicate_ids``, for preventing duplication of HTML id
attributes (Issue #2570)
* New ``add_header_permalinks`` filter, for Sphinx-style header links
(Issue #2636)

@@ -1928,6 +1928,11 @@ add_header_permalinks
# Include *every* header (not recommended):

Prevent duplicated IDs in HTML output. An incrementing counter is added to
offending IDs. If used alongside ``add_header_permalinks``, it will fix
those links (it must run **after** that filter)

You can apply filters to specific posts or pages by using the ``filters`` metadata field:

.. code:: restructuredtext
@@ -436,3 +436,37 @@ def add_header_permalinks(data, xpath_list=None):
new_node = lxml.html.fragment_fromstring('<a href="#{0}" class="headerlink" title="Permalink to this heading">¶</a>'.format(hid))
return lxml.html.tostring(doc, encoding="unicode")

def deduplicate_ids(data):
"""Post-process HTML via lxml to deduplicate IDs."""
doc = lxml.html.document_fromstring(data)
elements = doc.xpath('//*')
all_ids = [element.attrib.get('id') for element in elements]
seen_ids = set()
duplicated_ids = set()
for i in all_ids:
if i is not None and i in seen_ids:

if duplicated_ids:
# Well, that sucks.
for i in duplicated_ids:
# Results are ordered the same way they are ordered in document
offending_elements = doc.xpath('//*[@id="{}"]'.format(i))
counter = 2
for e in offending_elements[1:]:
new_id = '{0}-{1}'.format(i, counter)
e.attrib['id'] = new_id
counter += 1
# Find headerlinks that we can fix.
headerlinks = e.find_class('headerlink')
for hl in headerlinks:
# We might get headerlinks of child elements
if hl.attrib['href'] == '#' + i:
hl.attrib['href'] = '#' + new_id
return lxml.html.tostring(doc, encoding='unicode')
return data

0 comments on commit c393e22

Please sign in to comment.