/
parser.py
executable file
·148 lines (108 loc) · 2.74 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""
Parses the django template comments
"""
__author__ = 'ialbert'
import logging
from string import strip
import ply.lex as lex
import ply.yacc as yacc
logger = logging.getLogger(__name__)
tokens = (
'NAME', 'NUMBER', 'FLOAT', 'EQUAL',
'START', 'END', 'LPAREN', 'RPAREN', 'COMMA',
)
def DjagnoCommentLexer():
# Regular expression rules for simple tokens
t_EQUAL = r'='
t_COMMA = r','
t_LPAREN = r'\['
t_RPAREN = r'\]'
t_ignore = ' \t'
def t_START(t):
r'{\#'
t.lexer.inside = True
return t
def t_END(t):
r'\#}'
t.lexer.inside = False
return t
def t_FLOAT(t):
r'\d+\.\d+'
t.value = float(t.value)
return t
def t_NUMBER(t):
r'\d+'
t.value = int(t.value)
return t
def t_NAME(t):
r'[\w!?+()$@*^#%&`~<>{}\.\-\/]+'
return t
def t_error(t):
print "Illegal character '%s'" % t.value[0]
t.lexer.skip(1)
return lex.lex()
# Grammar definition
def p_expression_factor(p):
'expression : START NAME EQUAL factor END'
p.lexer.meta[p[2]] = p[4]
def p_expression_empty(p):
'expression : START factor END'
pass
def p_factor_flt(p):
'factor : FLOAT'
p[0] = p[1]
def p_factor_int(p):
'factor : NUMBER'
p[0] = p[1]
def p_factor_name(p):
'factor : NAME'
p[0] = p[1]
def p_factor_list(p):
'factor : list'
p[0] = p[1]
def p_factor_factor(p):
'factor : factor factor'
p[0] = str(p[1]) + ' ' + str(p[2])
def p_list_def(p):
'list : LPAREN elem RPAREN'
p[0] = p[2]
def p_elem_one(p):
'elem : factor'
p[0] = [p[1]]
def p_elem_two(p):
'elem : elem COMMA elem'
p[0] = p[1] + p[3]
def p_error(p):
logger.error("syntax error in %s while parsing: %s" % (p.lexer.fname, p.lexer.lexdata))
def process(lines, fname="text"):
lines = map(strip, lines)
# Only process lines that are comments.
lines = filter(lambda x: x.startswith("{#"), lines)
lexer = DjagnoCommentLexer()
lexer.fname=fname
lexer.meta = {}
parser = yacc.yacc(write_tables=0, debug=0)
for line in lines:
parser.parse(line, lexer=lexer)
return lexer.meta
def test():
text = """
This is a test document. Only tags in comments will be parsed.
{# title = Page Title #}
{# name = !@#$%^&* #}
{# x = AAA BBB CCC + some other 34 stuff #}
{# y = zum123 + 234 #}
{# ggg = 3.1 #}
This should raise a sytax error
{# abc #}
aaa = 123
{# value = [ 10, 20, hello world ] #}
<body>Done!</body>
"""
#text = "{# value = [ 10, 20, hello world ] #}"
lines = text.splitlines()
meta = process(lines)
print meta
if __name__ == '__main__':
logging.basicConfig()
test()