Skip to content

Commit

Permalink
Merge pull request #83 from dlax/apos
Browse files Browse the repository at this point in the history
Replace ' by "'" before parsing HTML
  • Loading branch information
ihabunek committed Jan 2, 2019
2 parents 14a580b + 0f6bd92 commit fc57d26
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 4 deletions.
3 changes: 2 additions & 1 deletion tests/test_console.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def test_timeline(mock_get, monkeypatch, capsys):
'username': 'fz'
},
'created_at': '2017-04-12T15:53:18.174Z',
'content': "<p>The computer can't tell you the emotional story. It can give you the exact mathematical design, but what's missing is the eyebrows.</p>",
'content': "<p>The computer can&apos;t tell you the emotional story. It can give you the exact mathematical design, but what's missing is the eyebrows.</p>",
'reblog': None,
}])

Expand All @@ -136,6 +136,7 @@ def test_timeline(mock_get, monkeypatch, capsys):

out, err = capsys.readouterr()
assert "The computer can't tell you the emotional story." in out
assert "but what's missing is the eyebrows." in out
assert "Frank Zappa" in out
assert "@fz" in out

Expand Down
4 changes: 2 additions & 2 deletions toot/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ def _parse_item(item):
content = item['reblog']['content'] if item['reblog'] else item['content']
reblogged = item['reblog']['account']['username'] if item['reblog'] else None

soup = BeautifulSoup(content, "html.parser")
text = soup.get_text().replace('&apos;', "'")
soup = BeautifulSoup(content.replace('&apos;', "'"), "html.parser")
text = soup.get_text()
time = datetime.strptime(item['created_at'], "%Y-%m-%dT%H:%M:%S.%fZ")

return {
Expand Down
2 changes: 1 addition & 1 deletion toot/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

def get_text(html):
"""Converts html to text, strips all tags."""
text = BeautifulSoup(html, "html.parser").get_text().replace('&apos;', "'")
text = BeautifulSoup(html.replace('&apos;', "'"), "html.parser").get_text()

return unicodedata.normalize('NFKC', text)

Expand Down

0 comments on commit fc57d26

Please sign in to comment.