In [2]:
import re
from typing import Dict, List, Set, Tuple, Union, Generator
from typing import OrderedDict as OrderedDictType
from collections import OrderedDict
from dataclasses import dataclass
from pathlib import Path
from tqdm import tqdm
from itertools import product
from copy import deepcopy

In [3]:
@dataclass
class Production:
    """
    产生式规则
    """
    left: str
    right: List[str]

    def __str__(self):
        return f"{self.left} -> {' '.join(self.right)}"

    def __repr__(self):
        return str(self)

    def __hash__(self):
        return hash(str(self))

    def __eq__(self, other):
        return self.__hash__() == other.__hash__()

In [4]:
class Grammar:
    """
    文法
    """

    def __init__(self, productions: Set[Production], start_symbol: str):
        self._productions: Set[Production] = productions  # P
        self._terminals: Set[str] = set()  # V_T
        self._non_terminals: Set[str] = set()  # V_N
        self._start_symbol: str = start_symbol  # S

        self._productions_by_key: OrderedDictType[str, List[Production]] = OrderedDict()  # A -> alpha_1 | ... | alpha_n

        self._compute_non_terminals()
        self._compute_terminals()
        self._compute_productions_by_key()

    @property
    def productions_by_key(self):
        if len(self._productions_by_key) == 0:
            self._compute_productions_by_key()
        return self._productions_by_key

    @property
    def productions(self):
        return self._productions

    @property
    def terminals(self):
        return self._terminals

    @property
    def non_terminals(self):
        return self._non_terminals

    @property
    def start_symbol(self):
        return self._start_symbol

    def _compute_productions_by_key(self):
        """
        create a dictionary for easy access, i.e. [A -> B,..., A -> C, A -> D] => A: A -> B | C | D, B: B->...
        Returns:

        """
        for production in self._productions:
            if production.left not in self._productions_by_key:
                self._productions_by_key[production.left] = []
            self._productions_by_key[production.left].append(production)

    def _compute_non_terminals(self):
        """
        计算非终结符集合
        """
        for production in self._productions:
            self._non_terminals.add(production.left)

    def _compute_terminals(self):
        """
        计算终结符集合
        """
        if len(self._non_terminals) == 0:
            self._compute_non_terminals()
        for production in self._productions:
            for symbol in production.right:
                if symbol not in self._non_terminals:
                    self._terminals.add(symbol)

    def __str__(self):
        return f"""
Start Symbol: {self._start_symbol}
Terminals: {self._terminals}
Non-terminals: {self._non_terminals}
Productions:
""" + "\n".join([str(p) for p in self._productions])

    def __repr__(self):
        return str(self)

    def __hash__(self):
        return hash(str(self))

In [5]:
def load_grammar(file_path: Union[Path,str]) -> Grammar:
    production_regex = re.compile(r'(?P<left>.+) -> (?P<right>.+)')
    productions: List[Production] = []
    with open(file_path, "r") as f:
        lines = f.readlines()
        for line in lines:
            line = line.strip()  # remove trailing whitespace
            if line == "":
                continue
            match = production_regex.match(line)
            if match is None:
                raise Exception(f"Invalid production: {line}")
            left = match.group("left")
            right = match.group("right").split(" ")
            productions.append(Production(left, right))

    grammar = Grammar(productions=set(productions),
                      start_symbol=productions[0].left)
    return grammar

In [6]:
grammar = load_grammar("grammar.txt")

In [7]:
print(grammar)


Start Symbol: program
Terminals: {')', '-', '=', 'void', '<=', '$', '!=', '+', '/', '*', '%', 'INT', 'IDN', '{', 'return', '==', '<', '>=', ';', '(', '}', ',', '>', 'const', 'int'}
Non-terminals: {'decl', 'funcFParam', 'argConst', 'argFunctionF', 'block', 'argFunctionR', 'addExpAtom', 'argExp', 'bType', 'stmt', 'assignExpAtom', 'compUnit', 'blockItem', 'constDef', 'funcRParam', 'initVal', 'number', 'funcType', 'assignExp', 'argVarDef', 'mulExpAtom', 'exp', 'program', 'eqExp', 'mulExp', 'relExpAtom', 'funcDef', 'callFunc', 'funcRParams', 'eqExpAtom', 'constExp', 'constInitVal', 'relExp', 'funcFParams', 'addExp', 'unaryExp', 'varDef', 'argVarDecl', 'varDecl', 'constDecl'}
Productions:
relExpAtom -> >= addExp relExpAtom
unaryExp -> IDN callFunc
relExpAtom -> <= addExp relExpAtom
addExpAtom -> - mulExp addExpAtom
unaryExp -> number
compUnit -> $
funcRParam -> exp
assignExpAtom -> = eqExp assignExpAtom
number -> INT
argVarDecl -> $
argFunctionR -> , funcRParam argFunctionR
eqExpAtom -> == 

## Left-Recursion

In [8]:
def eliminate_left_recursion(grammar: Grammar) -> Grammar:
    """
    消除左递归：
        1. 带入生成式，产生 mid_productions_p_i
        2. 消除 mid_productions_p_i 的直接左递归，产生无左递归的 new_productions_p_i
    Args:
        grammar:

    Returns:

    """
    productions = list(grammar.productions)
    terminals = list(grammar.terminals)
    non_terminals = list(grammar.non_terminals)
    new_productions: List[Production] = []

    def get_right_symbols(productions_list: List[Production]) -> Set[str]:
        """
        Get symbols that appeared on the right hand side of a production
        Args:
            productions_list:

        Returns:

        """
        right_symbol_set = set()
        for p in productions_list:
            for sym in p.right:
                right_symbol_set.add(sym)
        return right_symbol_set

    # create a dictionary for easy access, i.e. [A -> B,..., A -> C, A -> D] => A: A -> B | C | D, B: B->...
    productions_by_key: OrderedDictType[
        str, List[Production]] = grammar.productions_by_key  # A -> alpha_1 | ... | alpha_n
    for production in productions:
        if production.left not in productions_by_key:
            productions_by_key[production.left] = []
        productions_by_key[production.left].append(production)

    # indirect left recursion
    for i in range(len(non_terminals)):
        P_i = non_terminals[i]

        prev_productions_p_i = productions_by_key[P_i]
        # intermediate productions, (not recursion free). Those start with terminals is not changed
        mid_productions_p_i: List[Production] = [p for p in prev_productions_p_i if p.right[0] in terminals]
        # those with terminals after P_i is not changed
        mid_productions_p_i.extend([p for p in prev_productions_p_i if p.right[0] in non_terminals[i:]])

        for j in range(i):
            P_j = non_terminals[j]

            # find all right hand side symbols of P_i
            P_i_right_symbol_set = get_right_symbols(productions_by_key[P_i])

            if P_j not in P_i_right_symbol_set:  # P_i -> P_j gamma not exists
                continue

            # P_i -> P_j gamma exists,
            # change P_i -> P_j gamma to P_i -> delta_1 gamma | delta_2 gamma | ... | delta_n gamma
            # where P_j -> delta_1 | delta_2 | ... | delta_n
            productions_p_j = productions_by_key[P_j]
            for production_p_i in prev_productions_p_i:  # for each in P_i -> alpha_1 | alpha_2 | ... | alpha_n
                if production_p_i.right[0] == P_j:  # if production_p_i: P_i -> P_j gamma
                    gamma = production_p_i.right[1:]
                    for production_p_j in productions_p_j:  # for each in P_j -> delta_1 | ... | delta_n
                        delta = production_p_j.right
                        mid_production_p_i = Production(
                            left=P_i,
                            right=delta + gamma
                        )  # P_i -> delta gamma
                        mid_productions_p_i.append(mid_production_p_i)

                else:  # production_p_i: P_i -> beta
                    mid_productions_p_i.append(production_p_i)

            productions_by_key[P_i] = mid_productions_p_i  # TODO check
            prev_productions_p_i = productions_by_key[P_i]

            # store intermediate productions. Those start with terminals is not changed
            mid_productions_p_i: List[Production] = [p for p in prev_productions_p_i if p.right[0] in terminals]
            # those with terminals after P_i is not changed
            mid_productions_p_i.extend([p for p in prev_productions_p_i if p.right[0] in non_terminals[i:]])

        # end of for j in range(i)

        # attempt to eliminate direct left recursion for P_i
        new_productions_p_i: List[Production] = []  # stores left-recursion-free productions for P_i
        new_productions_p_i_: List[Production] = []  # P_i' for left recursion elimination

        # test if P_i has left recursion
        has_left_recursion: bool = False
        for production_p_i in mid_productions_p_i:
            if production_p_i.right[0] == P_i:  # production_p_i: P_i -> P_i gamma
                print(f"left-recursion found! {production_p_i}")
                has_left_recursion = True
                break

        if not has_left_recursion:
            new_productions_p_i = mid_productions_p_i
        else:
            # eliminate direct left recursion for P_i
            for production_p_i in mid_productions_p_i:
                if production_p_i.right[0] == P_i:  # production_p_i: P_i -> P_i gamma
                    gamma = production_p_i.right[1:]

                    # P_i' -> epsilon | gamma P_i'
                    new_production_p_i_ = Production(
                        left=P_i + "'",
                        right=gamma + [P_i + "'"]
                    )  # P_i' -> gamma P_i'
                    new_productions_p_i_.append(new_production_p_i_)
                else:  # production_p_i: P_i -> beta, no left recursion for current production
                    new_production_p_i = Production(
                        left=P_i,
                        right=production_p_i.right + [P_i + "'"]
                    )  # P_i -> beta P_i'
                    new_productions_p_i.append(new_production_p_i)

            # add P_i' -> epsilon
            new_productions_p_i_.append(Production(
                left=P_i + "'",
                right=["$"]
            ))  # P_i' -> epsilon

            productions_by_key[P_i + "'"] = new_productions_p_i_

        productions_by_key[P_i] = new_productions_p_i
    # end of for i in range(len(non_terminals))

    for k, v in productions_by_key.items():
        new_productions.extend(v)

    return Grammar(
        productions=set(new_productions),
        start_symbol=grammar.start_symbol
    )

In [9]:
# """
# Unit test for eliminate_left_recursion
# """
#
# old_grammar = load_grammar("unittest/grammar_recursion.txt")
# new_grammar = eliminate_left_recursion(old_grammar)
# sorted(list(new_grammar.productions), key=lambda x: x.left)

In [10]:
new_grammar = eliminate_left_recursion(grammar)

## FIRST, FOLLOW

### FIRST

In [11]:
def get_all_first(grammar: Grammar) -> Dict[str, Set[str]]:
    """
    Get FIRST set of all symbols
    Args:
        grammar: a grammar

    Returns: a dictionary of FIRST sets

    """
    first: Dict[str, Set[str]] = {}
    terminals = list(grammar.terminals)
    non_terminals = list(grammar.non_terminals)

    productions_by_key = grammar.productions_by_key

    def get_first(symbol: str) -> Set[str]:
        """
        Get FIRST set of a symbol
        Args:
            symbol: a terminal or non-terminal symbol

        Returns: a set of terminals

        """
        if first.get(symbol) is not None:  # already computed
            return first[symbol]

        if symbol in terminals:  # terminal
            return {symbol}
        elif symbol in non_terminals:
            first_set: Set[str] = set()
            for production in productions_by_key[symbol]:
                if production.right[0] == "$":  # production: symbol -> $
                    first_set.add("$")
                else:
                    for i in range(len(production.right)):
                        right_first = get_first(production.right[i])
                        if "$" in right_first:
                            first_set.update(right_first - {"$"})
                            if i == len(production.right) - 1:  # last symbol
                                first_set.add("$")
                        else:
                            first_set.update(right_first)
                            break
            return first_set
        else:
            raise Exception(f"Invalid symbol: {symbol}")

    for terminal in terminals:
        first[terminal] = {terminal}
    for non_terminal in non_terminals:
        first[non_terminal] = get_first(non_terminal)
    return first

In [12]:
FIRST = get_all_first(grammar)
FIRST

{')': {')'},
 '-': {'-'},
 '=': {'='},
 'void': {'void'},
 '<=': {'<='},
 '$': {'$'},
 '!=': {'!='},
 '+': {'+'},
 '/': {'/'},
 '*': {'*'},
 '%': {'%'},
 'INT': {'INT'},
 'IDN': {'IDN'},
 '{': {'{'},
 'return': {'return'},
 '==': {'=='},
 '<': {'<'},
 '>=': {'>='},
 ';': {';'},
 '(': {'('},
 '}': {'}'},
 ',': {','},
 '>': {'>'},
 'const': {'const'},
 'int': {'int'},
 'decl': {'const', 'int'},
 'funcFParam': {'int'},
 'argConst': {'$', ','},
 'argFunctionF': {'$', ','},
 'block': {'{'},
 'argFunctionR': {'$', ','},
 'addExpAtom': {'$', '+', '-'},
 'argExp': {'$', 'IDN', 'INT'},
 'bType': {'int'},
 'stmt': {';', 'IDN', 'INT', 'return', '{'},
 'assignExpAtom': {'$', '='},
 'compUnit': {'$', 'const', 'int', 'void'},
 'blockItem': {'$', ';', 'IDN', 'INT', 'const', 'int', 'return', '{'},
 'constDef': {'IDN'},
 'funcRParam': {'IDN', 'INT'},
 'initVal': {'IDN', 'INT'},
 'number': {'INT'},
 'funcType': {'void'},
 'assignExp': {'IDN', 'INT'},
 'argVarDef': {'$', '='},
 'mulExpAtom': {'$', '%', '

### FOLLOW

In [13]:
def get_all_follow(grammar: Grammar, FIRST: Dict[str, Set[str]]) -> Dict[str, Set[str]]:
    """
    Get FOLLOW set of all symbols, iterative implementation
    Args:
        FIRST: first set
        grammar: a grammar

    Returns: a dictionary of FOLLOW sets

    """

    follow: Dict[str, Set[str]] = dict()
    prev_follow: Dict[str, Set[str]] = None
    non_terminals = list(grammar.non_terminals)
    productions = list(grammar.productions)

    for non_terminal in non_terminals:
        follow[non_terminal] = set()

    follow[grammar.start_symbol] = {"EOF"} # start symbol


    while prev_follow != follow:
        prev_follow = deepcopy(follow)
        for p in productions:
            beta_has_epsilon: bool = True
            for i in reversed(range(len(p.right))):  # from back to front, to check for A -> alpha B beta, if beta =>* epsilon
                if p.right[i] not in non_terminals:  # B is terminal
                    continue

                if i == len(p.right) - 1: # A -> alpha B, add FOLLOW(A) to FOLLOW(B)
                    follow[p.right[i]].update(follow[p.left])
                    # print(f"FOLLOW({p.left}) -> FOLLOW({p.right[i]})")
                elif "$" in FIRST[p.right[i+1]] and beta_has_epsilon:  # beta =>* epsilon
                    # print(f"FOLLOW({p.left}) -> FOLLOW({p.right[i]})")
                    follow[p.right[i]].update(follow[p.left])
                    follow[p.right[i]].update(FIRST[p.right[i+1]] - {"$"})
                    # print(f"FIRST({p.right[i+1]}) - {{\"$\"}} -> FOLLOW({p.right[i]})")
                else:
                    beta_has_epsilon = False
                    follow[p.right[i]].update(FIRST[p.right[i+1]] - {"$"})
    return follow

In [14]:
# def get_all_follow(grammar: Grammar, FIRST: Dict[str, Set[str]]) -> Dict[str, Set[str]]:
#     """
#     Get FOLLOW set of all symbols, recursive implementation
#     Args:
#         FIRST: first set
#         grammar: a grammar
#
#     Returns: a dictionary of FOLLOW sets
#
#     """
#
#     follow: Dict[str, Set[str]] = {}
#     non_terminals = list(grammar.non_terminals)
#
#     def get_follow(symbol: str) -> Set[str]:
#         """
#         Get FOLLOW set of a symbol
#         Args:
#             symbol: a terminal or non-terminal symbol
#
#         Returns: a set of terminals
#
#         """
#         if follow.get(symbol) is None:
#             follow[symbol] = set()
#
#         for p in grammar.productions:
#             beta_has_epsilon: bool = True
#             for i in reversed(range(len(p.right))):  # for all occurrences of symbol on right hand side
#
#                 if i < len(p.right) - 1 and "$" not in FIRST[p.right[i + 1]]:  # not last, beta =>* epsilon is not possible
#                     beta_has_epsilon = False
#
#                 if p.right[i] != symbol:  # not production: A -> alpha symbol beta
#                     continue
#
#                 if i == len(p.right) - 1: # last symbol, A -> alpha B, add FOLLOW(A) to FOLLOW(B)
#                     if p.left != symbol:  # avoid infinite recursion, A -> alpha A, FOLLOW(A)=FOLLOW(A)
#                         follow[symbol].update(get_follow(p.left))
#                 else:  # not last symbol, FOLLOW(symbol) += FIRST(beta) - {epsilon}
#                     beta = p.right[i + 1]
#                     follow[symbol].update(FIRST[beta] - {"$"})
#                     if beta_has_epsilon and p.left != symbol: # beta =>* epsilon, A -> alpha B beta, add FOLLOW(A) to FOLLOW(B)
#                         follow[symbol].update(get_follow(p.left))
#
#         return follow[symbol]
#
#     follow[grammar.start_symbol] = {"EOF"}
#     for non_terminal in non_terminals:
#         follow[non_terminal] = get_follow(non_terminal)
#     return follow

In [15]:
FOLLOW = get_all_follow(grammar, FIRST)
FOLLOW

{'decl': {';',
  'EOF',
  'IDN',
  'INT',
  'const',
  'int',
  'return',
  'void',
  '{',
  '}'},
 'funcFParam': {')', ','},
 'argConst': {';'},
 'argFunctionF': {')'},
 'block': {';',
  'EOF',
  'IDN',
  'INT',
  'const',
  'int',
  'return',
  'void',
  '{',
  '}'},
 'argFunctionR': {')'},
 'addExpAtom': {'!=', ')', ',', ';', '<', '<=', '=', '==', '>', '>='},
 'argExp': {';'},
 'bType': {'IDN'},
 'stmt': {';', 'IDN', 'INT', 'const', 'int', 'return', '{', '}'},
 'assignExpAtom': {')', ',', ';'},
 'compUnit': {'EOF'},
 'blockItem': {'}'},
 'constDef': {',', ';'},
 'funcRParam': {')', ','},
 'initVal': {',', ';'},
 'number': {'!=',
  '%',
  ')',
  '*',
  '+',
  ',',
  '-',
  '/',
  ';',
  '<',
  '<=',
  '=',
  '==',
  '>',
  '>='},
 'funcType': {'IDN'},
 'assignExp': {')', ',', ';'},
 'argVarDef': {',', ';'},
 'mulExpAtom': {'!=',
  ')',
  '+',
  ',',
  '-',
  ';',
  '<',
  '<=',
  '=',
  '==',
  '>',
  '>='},
 'exp': {')', ',', ';'},
 'program': {'EOF'},
 'eqExp': {')', ',', ';', '='}

In [16]:
# """
# Unit tests for FIRST and FOLLOW
# """
# grammar_first_follow = load_grammar("unittest/grammar_first_follow.txt")
# grammar_first_follow

In [17]:
# FIRST = get_all_first(grammar_first_follow)
# FIRST

In [18]:
# FOLLOW = get_all_follow(grammar_first_follow, FIRST)
# FOLLOW

## LL(1) Parsing Table

In [19]:
def get_parsing_table(productions, FIRST, FOLLOW) -> Dict[Tuple[str, str], Production]:
    parse_table: Dict[Tuple[str, str], Production] = dict()

    for p in productions:  # A -> alpha
        A = p.left

        # get FIRST(alpha)
        FIRST_alpha = set()
        if p.right[0] == "$":
            FIRST_alpha.update(FOLLOW[p.left])
        else:
            FIRST_alpha.update(FIRST[p.right[0]])

        for a in list(FIRST_alpha):
            parse_table[(A, a)] = p
            if "$" in FIRST_alpha:
                for b in FOLLOW[A]:
                    parse_table[(A, b)] = Production(A,"$")
    return parse_table

In [20]:
parsing_table = get_parsing_table(grammar.productions, FIRST, FOLLOW)

In [21]:
from prettytable import PrettyTable
import sys

def print_parsing_table(grammar: Grammar, parsing_table, f=sys.stdout):
    terminals = list(grammar.terminals)
    non_terminals = list(grammar.non_terminals)

    table = PrettyTable()
    table.field_names = [""] + terminals + ["EOF"]
    for non_terminal in non_terminals:
        row = [non_terminal]
        for terminal in terminals:
            production = parsing_table.get((non_terminal, terminal))
            if production is None:
                row.append("")
            else:
                row.append(str(production))
        production = parsing_table.get((non_terminal, "EOF"))
        if production is None:
            row.append("")
        else:
            row.append(str(production))
        table.add_row(row)
    print(table, file=f)

In [22]:
with open("output/parsing_table.txt", "w") as f:
    print_parsing_table(grammar, parsing_table, f)

## LL(1) Parser

In [23]:
class Stack:
    def __init__(self):
        self.stack = []

    def push(self, item):
        self.stack.append(item)

    def pop(self) -> str:
        return self.stack.pop()

    def top(self) -> str:
        return self.stack[-1]

    def __len__(self):
        return len(self.stack)

    def __str__(self):
        return " ".join([str(s) for s in reversed(self.stack)])

    def __repr__(self):
        return str(self)

In [24]:
def input_generator(infile: Union[Path,str]) -> Generator[Tuple[str,str], None, None]:
    with open(infile) as f:
        lines = f.readlines()
    lex_regex = re.compile(r"(?P<lexeme>.+)\s+<(?P<type>.+)>")
    for line in lines:
        line = line.strip()
        match = lex_regex.match(line)
        if match:
            token_type = match.group("type")
            if not(token_type == "INT" or token_type == "IDN"):
                token_type = match.group("lexeme")
            yield token_type, match.group("lexeme")
        else:
            raise Exception(f"Invalid input: {line}")
    yield "EOF", "EOF"

In [25]:
# for token, lexeme in input_generator("testcases/00/00_lexical.txt"):
#     print(f"{token} {lexeme}")

In [26]:
class ParseSyntaxError(Exception):
    ...

def parse(grammar: Grammar, input_generator: Generator[Tuple[str,str], None, None]) -> Generator[str, None, None]:
    """
    Parse input using LL(1) parsing table
    Args:
        grammar: a grammar
        input_generator: a generator of input tokens

    Returns: None

    """

    FIRST = get_all_first(grammar)

    FOLLOW = get_all_follow(grammar, FIRST)

    parsing_table = get_parsing_table(grammar.productions, FIRST, FOLLOW)

    stack = Stack()
    stack.push("EOF")
    stack.push(grammar.start_symbol)
    token, _ = next(input_generator)
    while True:
        # print(f"Stack: {stack}\t", end="\t")
        # print(f"Input: {token}\t")
        top = stack.top()
        if top in grammar.terminals:
            if top == token:  # if match, pop stack and get next token
                # print(f"Matched {token}")
                yield f"{top}#{token}\tmove"

                stack.pop()
                try:
                    token, _ = next(input_generator)
                except StopIteration:
                    break
            else:
                yield f"{top}#{token}\terror"
                raise ParseSyntaxError(f"Syntax error: expected {top} but got {token}")
        elif top in grammar.non_terminals:  # apply production
            production = parsing_table.get((top, token))
            if production is None:
                yield f"{top}#{token}\terror"
                raise ParseSyntaxError(f"Syntax error: no production for {top} {token}")
            # print(f"Apply production {production}")
            yield f"{top}#{token}\treduction"

            stack.pop()
            if production.right[0] != "$": # not A -> epsilon
                for symbol in reversed(production.right):
                    stack.push(symbol)
        elif top == "EOF":
            if top == token:
                print("Accepted")
                break
            else:
                yield f"{top}#{token}\terror"
                raise ParseSyntaxError(f"Syntax error: expected {top} but got {token}")
        else:
            yield f"{top}#{token}\terror"
            raise ParseSyntaxError(f"Invalid symbol: {top}")

    if stack.top() != "EOF":
        yield f"{top}#{token}\terror"
        raise ParseSyntaxError("Syntax error: input is not fully parsed")

    yield "EOF#EOF\taccept"

In [27]:
for s in parse(grammar, input_generator("testcases/10_编译错误示例/10_lexical.txt")):
    print(s)

program#int	reduction
compUnit#int	reduction
decl#int	reduction
varDecl#int	reduction
bType#int	reduction
int#int	move
varDef#IDN	reduction
IDN#IDN	move
argVarDef#=	reduction
=#=	move
initVal#INT	reduction
exp#INT	reduction
assignExp#INT	reduction
eqExp#INT	reduction
relExp#INT	reduction
addExp#INT	reduction
mulExp#INT	reduction
unaryExp#INT	reduction
number#INT	reduction
INT#INT	move
mulExpAtom#void	error


ParseSyntaxError: Syntax error: no production for mulExpAtom void

In [None]:
# """
# Unit test for parsing
# """
# grammar_parse = load_grammar("unittest/grammar_parse.txt")
# grammar_parse

In [None]:
# for token, lexeme in input_generator("unittest/lexical.txt"):
#     print(f"{token} {lexeme}")

In [None]:
# FIRST = get_all_first(grammar_parse)
# FIRST

In [None]:
# FOLLOW = get_all_follow(grammar_parse, FIRST)
# FOLLOW

In [None]:
# parsing_table = get_parsing_table(grammar_parse.productions, FIRST, FOLLOW)
# with open("parsing_table.txt", "w") as f:
#     print_parsing_table(grammar_parse, parsing_table, f)

In [None]:
# parse(grammar_parse, input_generator("unittest/lexical.txt"))

## Test

In [28]:
for testcase_dir in Path("testcases").glob("*"):
    ans_path = next(testcase_dir.glob("*_lexical.txt"))
    outfile_name = f"{ans_path.name[:-4]}_parsing.txt"  # remove trailing .txt
    with open(f"output/{outfile_name}", "w") as f:
        try:
            for s in parse(grammar, input_generator(ans_path)):
                print(s, file=f)
        except ParseSyntaxError as e:
            print(e)

Accepted
Accepted
Accepted
Accepted
Syntax error: no production for varDef INT
Syntax error: no production for mulExpAtom void


In [36]:
for testcase_dir in Path("testcases").glob("*"):
    ans_path = next(testcase_dir.glob("*_grammar.txt"))
    res_path = f'output/{ans_path.name.replace("grammar.txt","lexical_parsing.txt")}'

    # compare
    with open(res_path) as f:
        # strip trailing cr
        output = [line.rstrip() for line in f.readlines()]
    with open(ans_path) as f:
        # expected = f.readlines()
        expected = [line.rstrip() for line in f.readlines()]
    if output == expected:
        print(f"{testcase_dir.name} passed!")
    else:
        print(f"{testcase_dir.name} failed!")
        !diff {res_path} {ans_path}

00 passed!
01 passed!
02 passed!
07 passed!
08_编译错误示例 passed!
10_编译错误示例 failed!
21c21,26
< mulExpAtom#void	error
---
> mulExpAtom#void	reduction
> addExpAtom#void	reduction
> relExpAtom#void	reduction
> eqExpAtom#void	reduction
> assignExpAtom#void	reduction
> argVarDecl#void	error
