Skip to content

Commit

Permalink
Merge pull request #886 from aquinzi/interlinks-fixes
Browse files Browse the repository at this point in the history
Interlinks: Improve HTML tag handling. Fixes #885
  • Loading branch information
justinmayer committed May 1, 2017
2 parents a9690c4 + 1d0a7a3 commit e4c2f16
Showing 1 changed file with 17 additions and 9 deletions.
26 changes: 17 additions & 9 deletions interlinks/interlinks.py
Expand Up @@ -9,6 +9,7 @@
"""

from bs4 import BeautifulSoup
from bs4 import SoupStrainer
from pelican import signals
import re

Expand All @@ -23,32 +24,39 @@ def getSettings (generator):
for key, value in generator.settings['INTERLINKS'].items():
interlinks[key] = value

def content_object_init(instance):

def parse_links(instance):

if instance._content is not None:
content = instance._content
# use Python's built-in parser so no duplicated html & body tags appear, or use tag.unwrap()
text = BeautifulSoup(content, "html.parser")

if 'a' in content:
for link in text.find_all(href=re.compile("(.+?)>")):
if '<a' in content:
text = BeautifulSoup(content, "html.parser", parse_only=SoupStrainer("a"))
for link in text.find_all("a",href=re.compile("(.+?)>")):
old_tag = str(link)
url = link.get('href')
m = re.search(r"(.+?)>", url).groups()
name = m[0]
if name in interlinks:
hi = url.replace(name+">",interlinks[name])
hi = url.replace(name + ">", interlinks[name])
link['href'] = hi
if 'img' in content:

content = content.replace(old_tag, str(link))

if '<img' in content:
text = BeautifulSoup(content, "html.parser", parse_only=SoupStrainer("img"))
for img in text.find_all('img', src=re.compile("(.+?)>")):
old_tag = str(img)
url = img.get('src')
m = re.search(r"(.+?)>", url).groups()
name = m[0]
if name in interlinks:
hi = url.replace(name+">",interlinks[name])
img['src'] = hi
content = content.replace(old_tag, str(link))

instance._content = text.decode()
instance._content = content

def register():
signals.generator_init.connect(getSettings)
signals.content_object_init.connect(content_object_init)
signals.content_object_init.connect(parse_links)

0 comments on commit e4c2f16

Please sign in to comment.