Skip to content

Commit

Permalink
Fix incorrectly chopped excerpts in .awik/.wik
Browse files Browse the repository at this point in the history
Excerpts with Unicode text were not truncated to fit a single IRC
message properly
  • Loading branch information
sushain97 committed Dec 17, 2013
1 parent 062fdab commit 95d837a
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
8 changes: 4 additions & 4 deletions modules/apertium_wiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ def awik(phenny, input):
else:
text = page.findall(".//*[@id='mw-content-text']")[0]

sentences = text.text_content().split(". ")
sentences = text.text_content().split(". ")
sentence = '"' + sentences[0] + '"'
maxlength = 430 - len(' - ' + wikiuri % (format_term_display(term)))

maxlength = 430 - len((' - ' + wikiuri % (format_term_display(term))).encode('utf-8'))
if len(sentence.encode('utf-8')) > maxlength:
sentence = sentence[:maxlength]
sentence = sentence.encode('utf-8')[:maxlength].decode('utf-8', 'ignore')
words = sentence[:-5].split(' ')
words.pop()
sentence = ' '.join(words) + ' [...]'
Expand Down
4 changes: 2 additions & 2 deletions modules/wikipedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ def parse_wiki_page(url, term, section = None):
sentences = text.text_content().split(". ")
sentence = '"' + sentences[0] + '"'

maxlength = 440 - len(' - ' + url)
maxlength = 430 - len((' - ' + url).encode('utf-8'))
if len(sentence.encode('utf-8')) > maxlength:
sentence = sentence[:maxlength]
sentence = sentence.encode('utf-8')[:maxlength].decode('utf-8', 'ignore')
words = sentence[:-5].split(' ')
words.pop()
sentence = ' '.join(words) + ' [...]'
Expand Down

0 comments on commit 95d837a

Please sign in to comment.