Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Fetching contributors…

Cannot retrieve contributors at this time

175 lines (158 sloc) 7.236 kb
"""
MediaWiki-style markup; from py-wikimarkup
Copyright (C) 2008 David Cramer <dcramer@gmail.com>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import re
_quotePat = re.compile(u"(''+)", re.UNICODE)
default_tags = {'bold': '<strong>',
'bold_close': '</strong>',
'italic': '<em>',
'italic_close': '</em>'}
def parse_one_line(text, tags=default_tags):
arr = _quotePat.split(text.strip())
if len(arr) == 1:
return text
# First, do some preliminary work. This may shift some apostrophes from
# being mark-up to being text. It also counts the number of occurrences
# of bold and italics mark-ups.
numBold = numItalics = 0
for i, r in enumerate(arr):
if i % 2:
l = len(r)
if l == 4:
arr[i-1] += u"'"
arr[i] = u"'''"
elif l > 5:
arr[i-1] += u"'" * (len(arr[i]) - 5)
arr[i] = u"'''''"
if l == 2:
numItalics += 1
elif l == 3:
numBold += 1
elif l == 5:
numItalics += 1
numBold += 1
# If there is an odd number of both bold and italics, it is likely
# that one of the bold ones was meant to be an apostrophe followed
# by italics. Which one we cannot know for certain, but it is more
# likely to be one that has a single-letter word before it.
if numBold % 2 and numItalics % 2:
firstSingleLetterWord = firstMultiLetterWord = firstSpace = -1
for i, r in enumerate(arr):
if i % 2 and len(r) == 3:
x1 = arr[i-1][-1:]
x2 = arr[i-1][-2:-1]
if x1 == u' ':
if firstSpace == -1:
firstSpace = i
elif x2 == u' ':
if firstSingleLetterWord == -1:
firstSingleLetterWord = i
elif firstMultiLetterWord == -1:
firstMultiLetterWord = i
# If there is a single-letter word, use it!
if firstSingleLetterWord > -1:
arr[firstSingleLetterWord] = u"''"
arr[firstSingleLetterWord - 1] += u"'"
# If not, but there's a multi-letter word, use that one.
elif firstMultiLetterWord > -1:
arr[firstMultiLetterWord] = u"''"
arr[firstMultiLetterWord - 1] += u"'"
# ... otherwise use the first one that has neither.
# (notice that it is possible for all three to be -1 if, for example,
# there is only one pentuple-apostrophe in the line)
elif firstSpace > -1:
arr[firstSpace] = u"''"
arr[firstSpace - 1] += u"'"
# Now let's actually convert our apostrophic mush to HTML!
output = []
buffer = []
state = ''
for i, r in enumerate(arr):
if not i % 2:
if state == 'both':
buffer.append(r)
else:
output.append(r)
else:
if len(r) == 2:
if state == 'i':
output.append(tags['italic_close'])
state = ''
elif state == 'bi':
output.append(tags['italic_close'])
state = 'b'
elif state == 'ib':
output.append(tags['bold_close']+tags['italic_close']+tags['bold'])
state = 'b'
elif state == 'both':
output.append(tags['bold']+tags['italic'])
output.append(u''.join(buffer))
output.append(tags['italic_close'])
state = 'b'
else: # ''
output.append(tags['italic'])
state += 'i'
elif len(r) == 3:
if state == 'b':
output.append(tags['bold_close'])
state = ''
elif state == 'bi':
output.append(tags['italic_close']+tags['bold_close']+tags['italic'])
state = 'i'
elif state == 'ib':
output.append(tags['bold_close'])
state = 'i'
elif state == 'both':
output.append(tags['italic']+tags['bold'])
output.append(u''.join(buffer))
output.append(tags['bold_close'])
state = 'i'
else: # ''
output.append(tags['bold'])
state += 'b'
elif len(r) == 5:
if state == 'b':
output.append(tags['bold_close']+tags['italic'])
state = 'i'
elif state == 'i':
output.append(tags['italic_close']+tags['bold'])
state = 'b'
elif state == 'bi':
output.append(tags['italic_close']+tags['bold_close'])
state = ''
elif state == 'ib':
output.append(tags['bold_close']+tags['italic_close'])
state = ''
elif state == 'both':
output.append(tags['italic']+tags['bold'])
output.append(u''.join(buffer))
output.append(tags['bold_close']+tags['italic_close'])
state = ''
else: # ''
buffer = []
state = 'both'
if state == 'b' or state == 'ib':
output.append(tags['bold_close'])
if state == 'i' or state == 'bi' or state == 'ib':
output.append(tags['italic_close'])
if state == 'bi':
output.append(tags['bold_close'])
if state == 'both' and buffer != []:
output.append(tags['italic']+tags['bold'])
output.append(u''.join(buffer))
output.append(tags['bold_close']+tags['italic_close'])
return u''.join(output)
def parse(text, tags=default_tags):
lines = text.split(u'\n')
return u'\n'.join(parse_one_line(line, tags) for line in lines)
Jump to Line
Something went wrong with that request. Please try again.