-
Notifications
You must be signed in to change notification settings - Fork 0
/
tokenizerPy.py
88 lines (69 loc) · 2.72 KB
/
tokenizerPy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import sys
#import msgpack
from anoky.common.old_args_parser import SysArgsParser
from anoky.common.errors import CompilerError
#from anoky.parsers.LycParser import LycParser
from anoky.parsers.anoky_parser import AnokyParser
from anoky.streams.string_stream import StringStream
from anoky.common.record import Record
import anoky.syntax.tokens as Tokens
def unicode_encoder(obj):
return (str(obj)+"\n").encode()
def parse_args(argv):
options = Record({'verbose': False})
args = SysArgsParser(argv)
while args.next() is not None:
arg = args()
if arg == '--verbose':
options.verbose = True
elif arg == '--binary':
options.binary = True
#options.encoder = msgpack.packb
raise NotImplementedError()
elif arg == '--lex':
if 'encoder' not in options:
options.binary = False
options.encoder = unicode_encoder
if args.peek() is None or args.peek().startswith("--"):
options.output = sys.stdout.buffer
else:
output_name = args.next()
if output_name == 'stdout': options.output = sys.stdout.buffer
if output_name == 'stderr': options.output = sys.stderr.buffer
else: options.output = open(output_name, "wb")
elif arg.lower().endswith('.ly') or arg.lower().endswith('.py'):
if 'filename' in options:
print("Multiple filenames found!")
exit(-1)
options['filename'] = arg
else:
print("Unexpected option, '%s'" % arg)
exit(-1)
return options
def tokenize(options):
try:
filename = options.filename
code = open(filename, encoding='utf-8').read()
stream = StringStream(code)
parser = AnokyParser()
if 'output' in options:
output = options.output
encoder = options.encoder
filler_token_value = Tokens.WHITESPACE.value if options.binary else Tokens.WHITESPACE.name
for token, first_index, index_after in parser.tokenize_with_intervals(stream):
if token is None:
bytes_ = encoder((filler_token_value, first_index, index_after))
else:
token_value = token.type.value if options.binary else token.type.name
bytes_ = encoder((token_value, first_index, index_after))
output.write(bytes_)
else:
for token in parser.tokenize(stream):
print(str(token))
except CompilerError as e:
print(e.trace)
def main():
options = parse_args(sys.argv)
tokenize(options)
if __name__ == "__main__":
main()