Skip to content

Commit

Permalink
interlinks: Fix unicode issue with py27 #1013
Browse files Browse the repository at this point in the history
Use the ".decode()" method from bs4 rather than `str()` to ensure
consistent unicode strings.
  • Loading branch information
cscutcher committed Apr 13, 2018
1 parent 00475a0 commit 37aef01
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions interlinks/interlinks.py
Expand Up @@ -34,29 +34,31 @@ def parse_links(instance):
text = BeautifulSoup(
content, "html.parser", parse_only=SoupStrainer("a"))
for link in text.find_all("a", href=re.compile("(.+?)>")):
old_tag = str(link)
old_tag = link.decode()
url = link.get('href')
m = re.search(r"(.+?)>", url).groups()
name = m[0]
if name in interlinks:
hi = url.replace(name + ">", interlinks[name])
link['href'] = hi

content = content.replace(old_tag, str(link))
content = content.replace(old_tag, link.decode())

if '<img' in content:
text = BeautifulSoup(
content, "html.parser", parse_only=SoupStrainer("img"))
for img in text.find_all('img', src=re.compile("(.+?)>")):
old_tag = str(img)
old_tag = img.decode()
url = img.get('src')
m = re.search(r"(.+?)>", url).groups()
name = m[0]
if name in interlinks:
hi = url.replace(name+">", interlinks[name])
img['src'] = hi
content = content.replace(
old_tag.replace("&gt;", ">").replace("/>", ">"), str(img))
old_tag.replace("&gt;", ">").replace("/>", ">"),
img.decode()
)

instance._content = content

Expand Down

0 comments on commit 37aef01

Please sign in to comment.