forked from smohammadfy/Compiler_PLY
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.py
158 lines (127 loc) · 3.51 KB
/
lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import re
from errors import error
from ply.lex import lex
tokens = [
'ID', 'CONST', 'VAR', 'PRINT', 'IF', 'ELSE', 'ELIF', 'WHILE', 'FUNC','MAIN',
'RETURN', 'FLOAT', 'INTEGER', 'VOID',
'PLUS', 'MINUS', 'MUL', 'DIVIDE', 'MOD',
'ASSIGN', 'SEMI', 'LPAREN', 'RPAREN', 'COMMA',
'INTEGERNUMBER', 'FLOATNUMBER', 'STRING', 'BOOL',
'LT', 'GT', 'LTE', 'GTE', 'EQ', 'NEQ',
'LAND', 'LOR', 'NOT',
'LCURL', 'RCURL', 'LSB', 'RSB', 'ERROR',
]
t_ignore = ' \t\r'
t_PLUS = r'\+'
t_MOD = r'\%'
t_MINUS = r'-'
t_MUL = r'\*'
t_DIVIDE = r'/'
t_ASSIGN = r'='
t_SEMI = r';'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LT = r'\<'
t_GT = r'\>'
t_LTE = r'\<='
t_GTE = r'\>='
t_EQ = r'=='
t_NEQ = r'!='
t_LAND = r'&&'
t_LOR = r'\|\|'
t_NOT = r'!'
t_LCURL = r'\{'
t_RCURL = r'\}'
t_LSB = r'\['
t_RSB = r'\]'
t_COMMA = r','
def t_FLOATNUMBER(t):
r'\d+[eE][-+]?\d+|(\.\d+|\d+\.\d+)([eE][-+]?\d+)?'
t.value = float(t.value) # Conversion to Python float
return t
def t_INTEGERNUMBER(t):
r'(\d+|0[Xx]\d)'
# Conversion to a Python int
if t.value.startswith(('0x', '0X')):
t.value = int(t.value, 16)
elif t.value.startswith('0'):
t.value = int(t.value, 8)
else:
t.value = int(t.value)
return t
def t_BOOL(t):
r'(true|false)'
mapping = {"true": True, "false": False}
t.value = mapping[t.value]
return t
def _replace_escape_codes(t):
literals = {
r"\\n": "\n",
r"\\r": "\r",
r"\\t": "\t",
r"\\\\": r"\\",
r'\\"': r'"'
}
re_byte = r".*\\b(?P<val>[0-9a-fA-F]{2}).*"
byte_pat = re.compile(re_byte)
for pattern, repl in literals.items():
t.value = re.sub(pattern, repl, t.value)
matcher = byte_pat.match(t.value)
if matcher:
val = matcher.groupdict()["val"]
val = chr(int(val, 16))
t.value = re.sub(re_byte[2:-2], val, t.value)
if False:
error(t.lexer.lineno, "Bad string escape code '%s'" % escape_code)
def t_STRING(t):
r'\".*?\"'
t.value = t.value[1:-1]
_replace_escape_codes(t)
return t
keywords = {"var", "const", "print",
"if", "else", "while",
"func", "return", "float", "int", "void",
"main", "elif"}
def t_ID(t):
r'[_A-Za-z][_A-Za-z0-9]*'
if t.value in keywords:
t.type = t.value.upper()
return t
def t_newline(t):
r'\n'
t.lexer.lineno += len(t.value)
def t_COMMENT(t):
r'/\*(.|\n)*?\*/'
t.lexer.lineno += t.value.count('\n')
def t_CPPCOMMENT(t):
r'//.*\n'
t.lexer.lineno += 1
def t_error(t):
error(t.lexer.lineno, "Illegal character %r" % t.value[0])
t.lexer.skip(1)
def t_COMMENT_UNTERM(t):
r'/\*(.|\n)*$'
error(t.lexer.lineno, "Unterminated comment")
def t_STRING_UNTERM(t):
r'\"(\.|.)*?\n'
error(t.lexer.lineno, "Unterminated string literal")
t.lexer.lineno += 1
def make_lexer():
return lex()
if __name__ == '__main__':
import sys
from errors import subscribe_errors
tests = ["test1", "test2", "test3"]
resault = []
lexer = make_lexer()
with subscribe_errors(lambda msg: sys.stderr.write(msg + "\n")):
for test in tests:
lexer.input(open(test + ".txt").read())
for tok in iter(lexer.token, None):
resault.append(tok)
f = open(test + "res.txt", "w+")
print("\n------------------****" + "\t" + test + "\t" + "****------------------\n")
for res in resault:
f.write(str(res) + "\n")
print(res)
f.close()