In [49]:
from py010parser import parse_file, parse_string, c_ast

In [50]:
ast = parse_file("gif.bt", cpp_args="-xc++")  # cpp arg is needed on a Mac

In [51]:
import string
import re

In [52]:
def char(c):
    if 32 <= ord(c) <= 96:
        return repr(c)
    return f"'\\0x{ord(c):02x}'"

In [113]:
class BT2FandangoVisitor(c_ast.NodeVisitor):
    START_SYMBOL = "<start>"

    def __init__(self):
        self.defs = {}
        self.forced_bytes = []
        self.seen = set()
        self.context = []
        self.constraints = []
        self.not_handled = []
        self.in_code = False

    def cond(self):
        return " and ".join(self.context)

    def spec(self, symbol: str = START_SYMBOL, root = True) -> str:
        if root:
            self.seen = set()
        if symbol not in self.defs or symbol in self.seen:
            return ""
        self.seen.add(symbol)  # avoid infinite recursion

        expansion = self.defs[symbol]
        s = f"{symbol} ::= {expansion}\n"
        nonterminals = re.findall(r"(<[^>]+>)", expansion)
        for nonterminal in nonterminals:
            s += self.spec(nonterminal, root=False)

        if root:
            for constraint in self.constraints:
                s += f"{constraint};\n"
            for not_handled in self.not_handled:
                s += f"# Not handled: {not_handled}\n"
        return s

    # def visit(self, node):
    #     method_name = 'visit_' + node.__class__.__name__
    #     visitor = getattr(self, method_name, self.generic_visit)
    #     return visitor(node)

    def generic_visit(self, node) -> str:
        print("Ignoring", node.__class__.__name__)
        for _, child in node.children():
            self.visit(child)
        return ""

    def generic_join(self, node, sep: str = " ") -> str:
        s = ""
        for _, child in node.children():
            member = self.visit(child)
            if s and member:
                s += sep
            if member:
                s += member
        return s

    def visit_ID(self, node: c_ast.ID) -> str:
        return node.name

    def visit_BinaryOp(self, node: c_ast.BinaryOp) -> str:
        left = self.visit(node.left)
        right = self.visit(node.right)
        return f"{left} {node.op} {right}"

    def visit_UnaryOp(self, node: c_ast.UnaryOp) -> str:
        expr = self.visit(node.expr)
        return f"{node.op}{expr}"

    def visit_ExprList(self, node: c_ast.ExprList) -> str:
        return self.generic_join(node)

    def visit_TypeDecl(self, node: c_ast.TypeDecl) -> str:
        return self.generic_join(node)

    def visit_FuncCall(self, node: c_ast.FuncCall) -> str:
        if self.in_code:
            return

        self.in_code = True
        name = self.visit(node.name)
        self.not_handled.append(name)
        self.in_code = False
        return ""

    def visit_FuncDef(self, node: c_ast.FuncDef) -> str:
        if self.in_code:
            return ""

        self.in_code = True
        decl = self.visit(node.decl)
        self.not_handled.append(f"def {decl}(): ...")
        self.in_code = False
        return ""

    def visit_Compound(self, node: c_ast.Compound) -> str:
        return self.generic_join(node)

    def visit_StructRef(self, node: c_ast.StructRef) -> str:
        name = self.visit(node.name)
        field = self.visit(node.field)
        return f"<{name}>.<{field}>"

    def visit_FileAST(self, node: c_ast.FileAST) -> str:
        members = self.generic_join(node)
        self.defs[self.START_SYMBOL] = members
        return ""

    def visit_EmptyStatement(self, node: c_ast.EmptyStatement) -> str:
        return ""

    def visit_Typedef(self, node: c_ast.Typedef) -> str:
        members = self.generic_join(node)
        if node.name:
            self.defs[f"<{node.name}>"] = members
        return ""

    def visit_Struct(self, node: c_ast.Struct) -> str:
        members = ""
        for _, child in node.children():
            if self.forced_bytes:
                elem = char(self.forced_bytes[0])
                self.forced_bytes = self.forced_bytes[1:]
            else:
                elem = self.visit(child)

            if elem and members:
                members += " "
            if elem:
                members += elem

        if node.name:
            self.defs[f"<{node.name}>"] = members
            return f"<{node.name}>"
        return members

    def visit_Decl(self, node: c_ast.Decl):
        if self.in_code:
            return node.name

        if node.bitsize:
            bitsize = eval(self.visit(node.bitsize))
            m = f"<bit>{{{bitsize}}}"
        else:
            m = self.visit(node.type)

        self.defs[f"<{node.name}>"] = m
        return f"<{node.name}>"

    def visit_ArrayDecl(self, node: c_ast.ArrayDecl) -> str:
        type_ = self.visit(node.type)
        dim = self.visit(node.dim)
        return f"{type_}{{{dim}}}"

    def visit_Constant(self, node: c_ast.Constant) -> str:
        return f"{node.value}"

    def visit_Return(self, node: c_ast.Return) -> str:
        # We assume an early return, i.e. return -1
        self.constraints.append("not " + self.cond())
        return ""

    def visit_IdentifierType(self, node: c_ast.IdentifierType) -> str:
        name = "_".join(node.names)
        if self.in_code:
            return name
        else:
            return f"<{name}>"

    def force_bytes(self, cond, iftrue=True):
        # Convert lookaheads into expected bytes
        # as in `if (ReadUShort(FTell()) == 0x0121) ...`
        if not isinstance(cond, c_ast.BinaryOp):
            return

        binary_op: c_ast.BinaryOp = cond
        if (binary_op.op == '==' and not iftrue or
            binary_op.op == '!=' and iftrue):
            return

        if (isinstance(binary_op.left, c_ast.FuncCall) and
            isinstance(binary_op.right, c_ast.Constant)):
            funccall: c_ast.FuncCall = binary_op.left
            constant = eval(binary_op.right.value)
        elif (isinstance(binary_op.right, c_ast.FuncCall) and
            isinstance(binary_op.left, c_ast.Constant)):
            funccall: c_ast.FuncCall = binary_op.right
            constant = eval(binary_op.left.value)
        else:
            return

        func = funccall.name
        if not isinstance(func, c_ast.ID):
            return

        name = func.name
        if (name == 'ReadUShort'):
            self.forced_bytes += [
                # assume little endian
                chr(constant % 256),
                chr(constant // 256)
            ]
        if (name == 'ReadUByte'):
            self.forced_bytes += [
                chr(constant),
            ]


    def visit_While(self, node: c_ast.While) -> str:
        if self.in_code:
            return ""

        self.force_bytes(node.cond, iftrue=True)

        cond = self.visit(node.cond)
        self.context.append(node.cond)
        body = self.visit(node.stmt)
        self.context.pop()

        self.force_bytes(node.cond, iftrue=False)
        return f"({body})*"

    def visit_If(self, node: c_ast.If) -> str:
        if self.in_code:
            return ""

        self.force_bytes(node.cond)

        cond = self.visit(node.cond)
        self.context.append(cond)
        iftrue = self.visit(node.iftrue)
        self.context.pop()

        if len(node.children()) <= 2:
            return f"{iftrue}?"
        else:
            self.context.append("not " + cond)
            iffalse = self.visit(node.iffalse)
            self.context.pop()
            return f"{iftrue} | {iffalse}"

visitor = BT2FandangoVisitor()
visitor.visit(ast)
print(visitor.spec())

Ignoring Assignment
Ignoring For
Ignoring Assignment
Ignoring Assignment
Ignoring For
Ignoring Assignment
Ignoring Assignment
<start> ::= <GifHeader> ? <LogicalScreenDescriptor> <GlobalColorTable>? <Data> <Trailer>
<GifHeader> ::= <GIFHEADER>
<GIFHEADER> ::= <Signature> <Version>
<Signature> ::= <char>{3}
<Version> ::= <char>{3}
<LogicalScreenDescriptor> ::= <LOGICALSCREENDESCRIPTOR>
<LOGICALSCREENDESCRIPTOR> ::= <Width> <Height> <PackedFields> <BackgroundColorIndex> <PixelAspectRatio>
<Width> ::= <ushort>
<ushort> ::= <unsigned_short>
<Height> ::= <ushort>
<PackedFields> ::= <GRAPHICCONTROLEXTENSION_DATASUBBLOCK_PACKEDFIELDS>
<GRAPHICCONTROLEXTENSION_DATASUBBLOCK_PACKEDFIELDS> ::= <Reserved> <DisposalMethod> <UserInputFlag> <TransparentColorFlag>
<Reserved> ::= <bit>3
<DisposalMethod> ::= <bit>3
<UserInputFlag> ::= <bit>1
<TransparentColorFlag> ::= <bit>1
<BackgroundColorIndex> ::= <UBYTE>
<UBYTE> ::= <ubyte>
<ubyte> ::= <uchar>
<uchar> ::= <unsigned_char>
<PixelAspectRatio> ::= <UBYT