interlinks: Fix unicode issue with py27 #1013

Use the ".decode()" method from bs4 rather than `str()` to ensure consistent unicode strings.
getpelican · Apr 13, 2018 · 37aef01 · 37aef01
1 parent 00475a0
commit 37aef01
Showing 1 changed file with 6 additions and 4 deletions.
diff --git a/interlinks/interlinks.py b/interlinks/interlinks.py
@@ -34,29 +34,31 @@ def parse_links(instance):
             text = BeautifulSoup(
                 content, "html.parser", parse_only=SoupStrainer("a"))
             for link in text.find_all("a", href=re.compile("(.+?)>")):
-                old_tag = str(link)
+                old_tag = link.decode()
                 url = link.get('href')
                 m = re.search(r"(.+?)>", url).groups()
                 name = m[0]
                 if name in interlinks:
                     hi = url.replace(name + ">", interlinks[name])
                     link['href'] = hi
 
-                content = content.replace(old_tag, str(link))
+                content = content.replace(old_tag, link.decode())
 
         if '<img' in content:
             text = BeautifulSoup(
                 content, "html.parser", parse_only=SoupStrainer("img"))
             for img in text.find_all('img', src=re.compile("(.+?)>")):
-                old_tag = str(img)
+                old_tag = img.decode()
                 url = img.get('src')
                 m = re.search(r"(.+?)>", url).groups()
                 name = m[0]
                 if name in interlinks:
                     hi = url.replace(name+">", interlinks[name])
                     img['src'] = hi
                 content = content.replace(
-                    old_tag.replace("&gt;", ">").replace("/>", ">"), str(img))
+                    old_tag.replace("&gt;", ">").replace("/>", ">"),
+                    img.decode()
+                )
 
         instance._content = content