Permalink
Browse files

Handle \t in input; close #37

  • Loading branch information...
1 parent 9b95252 commit 943953f7c00bb234771e929b9f763f6b56eb6c6e @peter17 peter17 committed Oct 24, 2011
View
@@ -3,6 +3,12 @@ wikitext
def replace_by_space(node):
node.value = ' '
+def replace_by_2_spaces(node):
+ node.value = ' '
+
+def replace_by_8_spaces(node):
+ node.value = ' '
+
<definition>
# Codes
@@ -69,14 +75,15 @@ def replace_by_space(node):
# Characters
- ESC_CHAR : L_BRACKET/R_BRACKET/protocol/PIPE/L_BRACE/R_BRACE/LT/GT/SLASH/AMP/SEMICOLON
+ ESC_CHAR : L_BRACKET/R_BRACKET/protocol/PIPE/L_BRACE/R_BRACE/LT/GT/SLASH/AMP/SEMICOLON/TAB
TITLE_END : TITLE6_END/TITLE5_END/TITLE4_END/TITLE3_END/TITLE2_END/TITLE1_END
ESC_SEQ : SPECIAL_TAG / ESC_CHAR / TITLE_END
- raw_char : !ESC_SEQ [\x20..\xff]
+ tab_to_space : TAB+ : replace_by_space
+ raw_char : (!ESC_SEQ [\x20..\xff])
raw_text : raw_char+ : join render_raw_text
alpha_num : [a..zA..Z0..9]
alpha_num_text : alpha_num+ : join
- any_char : [\x20..\xff]
+ any_char : [\x20..\xff] / tab_to_space
any_text : any_char+ : join
# HTML tags
@@ -129,7 +136,8 @@ def replace_by_space(node):
# Pre and nowiki tags
# Preformatted acts like nowiki (disables wikitext parsing)
- pre_text : (!PRE_END any_char)* : join
+ tab_to_2_spaces : TAB : replace_by_2_spaces
+ pre_text : (tab_to_2_spaces / (!PRE_END any_char))* : join
preformatted : PRE_BEGIN pre_text PRE_END : liftValue
# We allow any char without parsing them as long as the tag is not closed
eol_to_space : EOL* : replace_by_space
@@ -141,7 +149,7 @@ def replace_by_space(node):
styled_text : link / inline_url / html_comment / tag / entity
not_styled_text : preformatted / nowiki
allowed_char : ESC_CHAR{1} : restore liftValue
- allowed_text : raw_text / LT / GT / allowed_char
+ allowed_text : raw_text / LT / GT / tab_to_space / allowed_char
clean_inline : (not_styled_text / styled_text / raw_text)+ : @
inline : (not_styled_text / styled_text / allowed_text)+ : @
@@ -188,9 +196,12 @@ def replace_by_space(node):
# Preformatted
EOL_KEEP : EOL : restore
- preformatted_line : SPACE inline EOL_KEEP : liftValue
+ tab_to_8_spaces : TAB : replace_by_8_spaces
+ any_char_but_tab : raw_text / LT / GT / (!TAB ESC_CHAR) : join
+ preformatted_inline : (tab_to_8_spaces / not_styled_text / styled_text / any_char_but_tab)+
+ preformatted_line : SPACE preformatted_inline EOL_KEEP : liftValue
preformatted_lines : preformatted_line+
- preformatted_text : inline EOL? : liftValue
+ preformatted_text : preformatted_inline EOL? : liftValue
preformatted_paragraph : PRE_BEGIN EOL preformatted_text PRE_END EOL
preformatted_group : preformatted_paragraph / preformatted_lines : render_preformatted
View
@@ -11,12 +11,12 @@ def replace_by_space(node):
CR : '
'
EOL : LF / CR
+ TAB : " "
L_BRACKET : "["
R_BRACKET : "\]"
L_BRACE : "{" : drop
R_BRACE : "}" : drop
SPACE : " " : drop
- TAB : " " : drop
SPACETAB : SPACE / TAB : drop
SPACETABEOL : SPACE / TAB / EOL : drop
PIPE : "|" : drop
@@ -46,7 +46,7 @@ def replace_by_space(node):
any_char : [\x20..\xff] / '/'
esc_char : L_BRACKET/R_BRACKET/PIPE/L_BRACE/R_BRACE/LT/GT/AMP/SEMICOLON
raw_char : !esc_char any_char
- raw_text : raw_char+ : join
+ raw_text : (raw_char / TAB)+ : join
# HTML comments
# HTML comments are totally ignored and do not appear in the final text
@@ -63,7 +63,7 @@ def replace_by_space(node):
# or by their optional default value in any case
parameter_id : raw_char+ : join
- parameter_value : inline? : keep
+ parameter_value : inline? : keep
optional_default_value : (PIPE SPACETABEOL* parameter_value)? SPACETABEOL* : liftNode
template_parameter : PARAMETER_BEGIN parameter_id optional_default_value PARAMETER_END : substitute_template_parameter
View
@@ -7,12 +7,12 @@
CR : '
'
EOL : LF / CR
+ TAB : " "
L_BRACKET : "["
R_BRACKET : "\]"
L_BRACE : "{" : drop
R_BRACE : "}" : drop
SPACE : " " : drop
- TAB : " " : drop
SPACETAB : SPACE / TAB : drop
SPACETABEOL : SPACE / TAB / EOL : drop
PIPE : "|" : drop
@@ -42,7 +42,7 @@
any_char : [\x20..\xff] / '/'
esc_char : L_BRACKET/R_BRACKET/PIPE/L_BRACE/R_BRACE/LT/GT/AMP/SEMICOLON
raw_char : !esc_char any_char
- raw_text : raw_char+ : join
+ raw_text : (raw_char / TAB)+ : join
# HTML comments
# HTML comments are totally ignored and do not appear in the final text
@@ -59,7 +59,7 @@
# or by their optional default value in any case
parameter_id : raw_char+ : join
- parameter_value : inline? : keep
+ parameter_value : inline? : keep
optional_default_value : (PIPE SPACETABEOL* parameter_value)? SPACETABEOL* : liftNode
template_parameter : PARAMETER_BEGIN parameter_id optional_default_value PARAMETER_END : substitute_template_parameter
@@ -153,12 +153,12 @@ def replace_by_space(node):
LF = Char('\n', expression="'\n'", name='LF')
CR = Char('\n', expression="'\n'", name='CR')
EOL = Choice([LF, CR], expression='LF / CR', name='EOL')
+ TAB = Word('\t', expression='"\t"', name='TAB')
L_BRACKET = Word('[', expression='"["', name='L_BRACKET')
R_BRACKET = Word(']', expression='"\\]"', name='R_BRACKET')
L_BRACE = Word('{', expression='"{"', name='L_BRACE')(toolset['drop'])
R_BRACE = Word('}', expression='"}"', name='R_BRACE')(toolset['drop'])
SPACE = Word(' ', expression='" "', name='SPACE')(toolset['drop'])
- TAB = Word('\t', expression='"\t"', name='TAB')(toolset['drop'])
SPACETAB = Choice([SPACE, TAB], expression='SPACE / TAB', name='SPACETAB')(toolset['drop'])
SPACETABEOL = Choice([SPACE, TAB, EOL], expression='SPACE / TAB / EOL', name='SPACETABEOL')(toolset['drop'])
PIPE = Word('|', expression='"|"', name='PIPE')(toolset['drop'])
@@ -188,7 +188,7 @@ def replace_by_space(node):
any_char = Choice([Klass(u' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff', expression='[\\x20..\\xff]'), Char('/', expression="'/'")], expression="[\\x20..\\xff] / '/'", name='any_char')
esc_char = Choice([L_BRACKET, R_BRACKET, PIPE, L_BRACE, R_BRACE, LT, GT, AMP, SEMICOLON], expression='L_BRACKET/R_BRACKET/PIPE/L_BRACE/R_BRACE/LT/GT/AMP/SEMICOLON', name='esc_char')
raw_char = Sequence([NextNot(esc_char, expression='!esc_char'), any_char], expression='!esc_char any_char', name='raw_char')
- raw_text = Repetition(raw_char, numMin=1, numMax=False, expression='raw_char+', name='raw_text')(toolset['join'])
+ raw_text = Repetition(Choice([raw_char, TAB], expression='raw_char / TAB'), numMin=1, numMax=False, expression='(raw_char / TAB)+', name='raw_text')(toolset['join'])
# HTML comments
# HTML comments are totally ignored and do not appear in the final text
@@ -14,7 +14,7 @@ def test_comment_before_preformatted_paragraph(self):
result = """body:
preformatted_lines:
preformatted_line:
- @inline@:
+ preformatted_inline:
raw_text:This is a preformatted paragraph.
EOL_KEEP:
"""
@@ -9,7 +9,7 @@ def test_single_line_paragraph(self):
result = """body:
preformatted_lines:
preformatted_line:
- @inline@:
+ preformatted_inline:
raw_text:This is a preformatted paragraph.
EOL_KEEP:
"""
@@ -22,7 +22,7 @@ def test_preformatted_and_normal_paragraphs(self):
result = """body:
preformatted_lines:
preformatted_line:
- @inline@:
+ preformatted_inline:
raw_text:This is a preformatted paragraph.
EOL_KEEP:
@@ -38,12 +38,12 @@ def test_multiline_paragraph(self):
result = """body:
preformatted_lines:
preformatted_line:
- @inline@:
+ preformatted_inline:
raw_text:This is a multiline
EOL_KEEP:
preformatted_line:
- @inline@:
+ preformatted_inline:
raw_text:preformatted paragraph.
EOL_KEEP:
"""
@@ -55,7 +55,7 @@ def test_style_in_preformatted_paragraph(self):
result = """body:
preformatted_lines:
preformatted_line:
- @inline@:
+ preformatted_inline:
raw_text:Styled text such as ''italic'', '''bold''',
internal_link:
page_name:Template:templates
@@ -64,6 +64,35 @@ def test_style_in_preformatted_paragraph(self):
"""
self.parsed_equal_tree(source, result, None)
+ def test_tabs_in_preformatted_paragraph(self):
+ source = """ Preformatted\tparagraph
+ \twith
+ \t\tmultiple tabs.
+"""
+ result = """body:
+ preformatted_lines:
+ preformatted_line:
+ preformatted_inline:
+ raw_text:Preformatted
+ tab_to_8_spaces:
+ raw_text:paragraph
+ EOL_KEEP:
+
+ preformatted_line:
+ preformatted_inline:
+ tab_to_8_spaces:
+ raw_text:with
+ EOL_KEEP:
+
+ preformatted_line:
+ preformatted_inline:
+ tab_to_8_spaces:
+ tab_to_8_spaces:
+ raw_text:multiple tabs.
+ EOL_KEEP:
+"""
+ self.parsed_equal_tree(source, result, None)
+
def test_html_pre_paragraph(self):
source = """<pre>
Preformatted paragraph.
@@ -4,6 +4,11 @@
class SpecialCharsTests(ParserTestCase):
+ def test_tabs_in_text(self):
+ source = "Some\ttext and\t\ttabs."
+ result = "[raw_text:'Some' tab_to_space:' ' raw_text:'text and' tab_to_space:' ' raw_text:'tabs.']"
+ self.parsed_equal_string(source, result, 'inline')
+
def test_unicode_chars(self):
source = u"Some Unicode characters: 你好."
result = u"[raw_text:'Some Unicode characters: 你好.']"
Oops, something went wrong.

0 comments on commit 943953f

Please sign in to comment.