Skip to content

Commit

Permalink
Mostly finish the bespoke parser
Browse files Browse the repository at this point in the history
  • Loading branch information
iafisher committed Jan 30, 2019
1 parent 1e6dfa5 commit a307518
Show file tree
Hide file tree
Showing 19 changed files with 300 additions and 117 deletions.
14 changes: 12 additions & 2 deletions hera/checker.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
from contextlib import suppress
from typing import Dict, List, Optional, Tuple

from .data import Constant, DataLabel, Label, Op, Messages, Program, Settings, Token
from .data import (
Constant,
DataLabel,
Label,
Op,
Messages,
Program,
Settings,
Token,
TOKEN,
)
from .op import Operation, resolve_ops
from .utils import (
DATA_STATEMENTS,
Expand Down Expand Up @@ -214,6 +224,6 @@ def convert_ops(
def substitute_label(op: Operation, symbol_table: Dict[str, int]) -> Operation:
"""Substitute any label in the instruction with its concrete value."""
for i, arg in enumerate(op.args):
if isinstance(arg, Token) and arg.type == "SYMBOL":
if isinstance(arg, Token) and arg.type == TOKEN.SYMBOL:
op.args[i] = symbol_table[arg]
return op
31 changes: 30 additions & 1 deletion hera/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
Version: December 2018
"""
from collections import namedtuple
from enum import Enum
from typing import Optional, Tuple


Expand Down Expand Up @@ -80,11 +81,39 @@ def __new__(cls, type_, value, loc=None):
return self

def __repr__(self):
return "Token({}, {}, loc={})".format(
return "Token({!r}, {}, loc={})".format(
self.type, super().__repr__(), self.location
)


class TOKEN(Enum):
"""Enumeration for the type field of Token objects."""

# Values
INT = "TOKEN_INT"
REGISTER = "TOKEN_REGISTER"
SYMBOL = "TOKEN_SYMBOL"
STRING = "TOKEN_STRING"
BRACKETED = "TOKEN_BRACKETED"
CHAR = "TOKEN_CHAR"

# Operators
MINUS = "TOKEN_MINUS"
AT = "TOKEN_AT"
ASTERISK = "TOKEN_ASTERISK"
PLUS = "TOKEN_PLUS"
SLASH = "TOKEN_SLASH"

LPAREN = "TOKEN_LPAREN"
RPAREN = "TOKEN_RPAREN"
COMMA = "TOKEN_COMMA"

FMT = "TOKEN_FMT"
INCLUDE = "TOKEN_INCLUDE"
EOF = "TOKEN_EOF"
UNKNOWN = "TOKEN_UNKNOWN"


class Messages:
def __init__(self, msg=None, loc=None, *, warning=False):
self.errors = []
Expand Down
2 changes: 1 addition & 1 deletion hera/debugger/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ def print_flags(self):
def evaluate_node(self, node):
vm = self.debugger.vm
if isinstance(node, IntNode):
if node.value >= 2 ** 16:
if node.value >= 2 ** 16 or node.value < -2 ** 15:
raise HERAError("integer literal exceeds 16 bits")
return node.value
elif isinstance(node, RegisterNode):
Expand Down
140 changes: 83 additions & 57 deletions hera/lexer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from enum import Enum

from hera.data import Location, Token
from hera.data import Location, Token, TOKEN
from hera.utils import is_register


class Lexer:
Expand All @@ -27,21 +26,61 @@ def next_token(self):
else:
ch = self.text[self.position]
if ch.isalpha() or ch == "_":
length = self.read_register()
if length != -1:
length = self.read_symbol()
if is_register(self.text[self.position : self.position + length]):
self.set_token(TOKEN.REGISTER, length=length)
else:
length = self.read_symbol()
self.set_token(TOKEN.SYMBOL, length=length)
elif ch.isdigit():
length = self.read_int()
self.set_token(TOKEN.INT, length=length)
elif ch == '"':
loc = self.get_location()
s = self.consume_str()
self.tkn = Token(TOKEN.STRING, s, loc)
elif ch == "'":
if self.peek_char() == "\\":
if self.peek_char(3) == "'":
ch = self.peek_char(2)
escape = escape_char(ch)
self.next_char() # open quote
self.next_char() # backslash
loc = self.get_location()
self.next_char() # character
self.next_char() # end quote
if len(escape) == 2:
self.tkn = Token(TOKEN.CHAR, escape[1], loc)
else:
self.tkn = Token(TOKEN.CHAR, escape, loc)
else:
if self.peek_char(2) == "'":
ch = self.peek_char()
self.next_char() # open quote
loc = self.get_location()
self.next_char() # character
self.next_char() # end quote
self.tkn = Token(TOKEN.CHAR, ch, loc)
else:
self.set_token(TOKEN.UNKNOWN)
elif self.text[self.position :].startswith("#include"):
self.set_token(TOKEN.INCLUDE, length=len("#include"))
elif ch == "<":
self.next_char()
length = self.read_bracketed()
self.set_token(TOKEN.BRACKETED, length=length)
self.next_char()
elif ch == ":":
self.position += 1
length = self.read_symbol()
self.set_token(TOKEN.FMT, length=length)
elif ch == "-":
self.set_token(TOKEN.MINUS)
if self.peek_char().isdigit():
self.position += 1
length = self.read_int()
self.position -= 1
self.set_token(TOKEN.INT, length=length + 1)
else:
self.set_token(TOKEN.MINUS)
elif ch == "+":
self.set_token(TOKEN.PLUS)
elif ch == "/":
Expand All @@ -61,33 +100,6 @@ def next_token(self):

return self.tkn

def read_register(self):
ch = self.text[self.position]
if ch in "rR":
if self.peek_char() in "tT":
return 2
elif self.peek_char().isdigit():
length = 2
while self.peek_char(length).isdigit():
length += 1
return length
elif ch in "pP":
if self.text[self.position :].lower().startswith("pc_ret"):
return 6
elif self.text[self.position :].lower().startswith("pc"):
return 2
elif ch in "fF":
if self.text[self.position :].lower().startswith("fp_alt"):
return 6
elif self.text[self.position :].lower().startswith("fp"):
return 2
elif ch in "sS":
if self.peek_char() in "pP":
return 2

# Default: not a register.
return -1

def read_int(self):
length = 1
digits = set([str(i) for i in range(10)])
Expand All @@ -111,6 +123,27 @@ def read_symbol(self):
length += 1
return length

def read_bracketed(self):
length = 1
while self.position + length < len(self.text) and self.peek_char(length) != ">":
length += 1
return length

def consume_str(self):
sbuilder = []
self.next_char()
while self.text[self.position] != '"':
if self.text[self.position] == "\\":
escape = escape_char(self.text[self.position + 1])
sbuilder.append(escape)
self.next_char()
self.next_char()
else:
sbuilder.append(self.text[self.position])
self.next_char()
self.next_char()
return "".join(sbuilder)

def skip(self):
"""Skip past whitespace and comments."""
while True:
Expand Down Expand Up @@ -162,26 +195,19 @@ def set_token(self, typ, *, length=1):
self.tkn = Token(typ, value, loc)


class TOKEN(Enum):
# Values
INT = "TOKEN_INT"
REGISTER = "TOKEN_REGISTER"
SYMBOL = "TOKEN_SYMBOL"
STRING = "TOKEN_STRING"
CHAR = "TOKEN_CHAR"

# Operators
MINUS = "TOKEN_MINUS"
AT = "TOKEN_AT"
ASTERISK = "TOKEN_ASTERISK"
PLUS = "TOKEN_PLUS"
SLASH = "TOKEN_SLASH"

LPAREN = "TOKEN_LPAREN"
RPAREN = "TOKEN_RPAREN"
COMMA = "TOKEN_COMMA"

FMT = "TOKEN_FMT"
INCLUDE = "TOKEN_INCLUDE"
EOF = "TOKEN_EOF"
UNKNOWN = "TOKEN_UNKNOWN"
def escape_char(c):
"""Return the special character that `c` encodes.
>>> escape_char("n")
"\n"
"""
if c == "n":
return "\n"
elif c == "t":
return "\t"
elif c == "\\":
return "\\"
elif c == '"':
return '"'
else:
return "\\" + c
2 changes: 1 addition & 1 deletion hera/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from .checker import check
from .data import HERAError, Messages, Op, Settings
from .parser import parse
from .parser_bespoke import parse
from .utils import handle_messages, read_file


Expand Down
12 changes: 6 additions & 6 deletions hera/op.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from contextlib import suppress
from typing import Dict, List, Optional, Tuple

from hera.data import Constant, DataLabel, Location, Messages, Op, Token
from hera.data import Constant, DataLabel, Location, Messages, Op, Token, TOKEN
from hera.utils import (
format_int,
from_u16,
Expand Down Expand Up @@ -70,7 +70,7 @@ def __str__(self):


def arg_to_string(arg):
if isinstance(arg, Token) and arg.type == "STRING":
if isinstance(arg, Token) and arg.type == TOKEN.STRING:
return json.dumps(arg)
else:
return str(arg)
Expand Down Expand Up @@ -937,7 +937,7 @@ def check_arglist(argtypes, args, symbol_table):


def check_register(arg) -> Optional[str]:
if not isinstance(arg, Token) or arg.type != "REGISTER":
if not isinstance(arg, Token) or arg.type != TOKEN.REGISTER:
return "expected register"

if arg.lower() == "pc":
Expand All @@ -952,10 +952,10 @@ def check_register(arg) -> Optional[str]:


def check_register_or_label(arg, symbol_table: Dict[str, int]) -> Optional[str]:
if not isinstance(arg, Token) or arg.type not in ("REGISTER", "SYMBOL"):
if not isinstance(arg, Token) or arg.type not in (TOKEN.REGISTER, TOKEN.SYMBOL):
return "expected register or label"

if arg.type == "REGISTER":
if arg.type == TOKEN.REGISTER:
return check_register(arg)
else:
try:
Expand All @@ -979,7 +979,7 @@ def check_label(arg) -> Optional[str]:


def check_string(arg):
if not isinstance(arg, Token) or arg.type != "STRING":
if not isinstance(arg, Token) or arg.type != TOKEN.STRING:
return "expected string literal"
else:
return None
Expand Down
Loading

0 comments on commit a307518

Please sign in to comment.