In [20]:

#exec(open('stuff.py').read(), {'__file__': 'stuff.py'})
from stuff import Grammar, Literal, Identifier, Definition, CharacterClass, Concatenation, Alternatives, ZeroOrMore
import re


with open("reserved") as fd:
    reserved = set(map(str.strip, fd))
with open("operators") as fd:
    operator_to_name = dict(line.strip().split("\t") for line in fd)
name_to_operator = {v:k for k,v in operator_to_name.items()}
start_rule = "start"

grammar = Grammar.from_bnf(filename="bnf2.txt")
syscalls = set()
replacements = {
    Literal('-inf'): Definition.from_bnf("'-' 'inf'"),
    Literal('( )'): Definition.from_bnf("'(' ')'"),
    Literal('if ('): Definition.from_bnf("'if' '('"),
    Literal('else if ('): Definition.from_bnf("'else' 'if' '('"),
    Literal('branch ('): Definition.from_bnf("'branch' '('"),
}
for lit in grammar.literals:
    match = re.match("\$(\w+)\s*\(", lit)
    if not match:
        continue
    name = "$" + match.group(1)
    syscalls.add(name)
    replacements[Literal(lit)] = Concatenation((Literal(name), Literal("(")))
grammar.substitute(replacements)
bad_literals = '''
@ connectrules wait reg primitive paramset defparam connectmodule macromodule design wreal vectored trireg
scalared signed ground realtime time
=== !== ~ & | ^ ^~  << >> <<< >>>  ~^ ~& ~|
'''.split() + ['# (', "analog initial"]
bad_rules = '''
module_instantiation
analog_case_statement
system_function_call
conditional_statement 
function_declaration

aliasparam_declaration generate_region specify_block specparam_declaration always_construct 
conditional_generate_construct continuous_assign gate_instantiation initial_construct 
local_parameter_declaration loop_generate_construct  reg_declaration task_declaration 
time_declaration
event_declaration genvar_declaration realtime_declaration 
zi_filter_name
analog_filter_function_call
range_expression
wait_statement task_enable system_task_enable procedural_timing_control_statement procedural_continuous_assignments
nonblocking_assignment event_trigger disable_statement
udp_instantiation
delay_or_event_control
analog_event_control_statement
analog_system_task_enable indirect_contribution_statement
analog_concatenation
concatenation
constant_concatenation
array_analog_variable_assignment
port_probe_function_call
nature_attribute_reference
nature_attribute_override
parent_nature
drive_strength
delay3
constant_range_expression
escaped_identifier
net_type
par_block
analog_loop_generate_statement
analog_function_case_statement
analog_function_conditional_statement
analog_function_loop_statement

analog_conditional_expression
if_else_if_statement
analog_loop_statement
constant_arrayinit
case_statement

analysis_function_call

string
analog_function_declaration
analog_small_signal_function_call
analog_range_expression
range
dimension
loop_statement
analog_built_in_function_call
constant_analog_built_in_function_call

binary_number decimal_base hex_number octal_number

function_call
port_branch_declaration
list_of_net_decl_assignments
'''.split()
for literal in bad_literals:
    grammar.remove(Literal(literal))
for rule in bad_rules:
    grammar.remove(Identifier(rule))
for k in grammar.missing_rules:
    #print(f'{k}: {grammar.rule_users[k]}')
    grammar[k] = Literal("MISSING")
grammar.update(Grammar.from_bnf(bnf='''
analog_conditional_statement ::= 'if' LPAREN expression RPAREN analog_statement_or_null | 'if' LPAREN expression RPAREN analog_statement_or_null 'else' analog_statement_or_null
analog_event_functions ::= function_call
analog_filter_function_call ::= function_call
analog_function_call ::= function_call
constant_analog_function_call ::= constant_function_call
branch_probe_function_call ::= function_call
indirect_expression ::= function_call
real_number ::= [0-9] { [0-9] } '.' [0-9] { [0-9] } [ [eE] { [+-] } [0-9] { [0-9] } | [TGMKkmunpfa] ]
    | [0-9] { [0-9] } ( [eE] { [+-] } [0-9] { [0-9] } | [TGMKkmunpfa] )
STRING_LITERAL ::= '"' { [^"] } '"'
unsigned_number ::= [0-9] { [0-9] }
nature_attribute_expression ::= real_number | unsigned_number | STRING_LITERAL | identifier
variable_reference ::= identifier
hierarchical_identifier ::= identifier
port_reference ::= identifier
port ::= port_reference
branch_terminal ::= identifier
hierarchical_function_identifier ::= identifier | SYSTEM_IDENTIFIER
system_function_identifier  ::= SYSTEM_IDENTIFIER
system_parameter_identifier  ::= SYSTEM_IDENTIFIER
binary_operator2 ::= RAISED
binary_operator3 ::= TIMES | DIVIDED | MODULUS
binary_operator4 ::= PLUS | MINUS
binary_operator6 ::= SMALLER | SMALLEROREQUAL | GREATER | GREATEROREQUAL
binary_operator7 ::= EQUALS | NOTEQUAL
binary_operator11 ::= LOGICALAND
binary_operator12 ::= LOGICALOR
unary_operator ::= PLUS | MINUS | LOGICALNEGATION
expression ::= expr13
expr13 ::= expr12 '?' expr13 ':' expr13 | expr12
expr12 ::= expr12 binary_operator12 expr11 | expr11
expr11 ::= expr11 binary_operator11 expr7 | expr7
expr7 ::= expr7 binary_operator7 expr6 | expr6
expr6 ::= expr6 binary_operator6 expr4 | expr4
expr4 ::= expr4 binary_operator4 expr3 | expr3
expr3 ::= expr3 binary_operator3 expr2 | expr2
expr2 ::= expr2 binary_operator2 expr1 | expr1
expr1 ::= unary_operator expr_primary | expr_primary
expr_primary ::= number | SIMPLE_IDENTIFIER | SYSTEM_IDENTIFIER | STRING_LITERAL | function_call | parenthesized_expr
parenthesized_expr ::= LPAREN expression RPAREN
constant_expression ::= expression
analog_expression ::= expression
constant_mintypmax_expression ::= constant_expression
scalar_analog_variable_lvalue ::= SIMPLE_IDENTIFIER
start ::= start_nature | start_expression | start_discipline | start_module | start_source_text | start_statement
start_nature ::= nature_declaration EOF
start_expression ::= expression EOF
start_discipline ::= discipline_declaration EOF
start_statement ::= analog_statement EOF
start_module ::= module_declaration EOF
start_source_text ::= source_text EOF
'''))
grammar['simple_identifier'] = Concatenation(children=(CharacterClass(pattern='a-zA-Z_\\'), ZeroOrMore(children=(CharacterClass(pattern='a-zA-Z0-9_$'),))))
grammar['SYSTEM_IDENTIFIER'] = Concatenation(children=(Literal('$'), CharacterClass(pattern='a-zA-Z0-9_$'), ZeroOrMore(children=(CharacterClass(pattern='a-zA-Z0-9_$'),))))
tokenize = '''
unsigned_number
real_number
simple_identifier
'''.split()
for k in tokenize:
    grammar[k.upper()] = grammar[k]
grammar.substitute({Identifier(k): Identifier(k.upper()) for k in tokenize})
expand = '''
decimal_number
'''.split() + [k for k,v in grammar.rules.items() if v == Identifier('simple_identifier')]
print({Identifier(k): grammar[k] for k in expand})
grammar.substitute({Identifier(k): grammar[k] for k in expand})
grammar.removeOrphans(start_rule)
# Literals used outside a lexer rule
bare_literals = {k for k, v in grammar.literal_users.items()} #if not all(v_==v_.upper() for v_ in v)} #- reserved - set(operator_to_name)
name_literals = {}
for literal in bare_literals & reserved:
    name_literals[literal] = literal.upper()
used_operators = {name_to_operator[name] for name in grammar.rule_users if name in name_to_operator}
for literal in (bare_literals | used_operators) & set(operator_to_name):
    name_literals[literal] = operator_to_name[literal]
grammar.substitute({Literal(literal): Identifier(name) for literal, name in name_literals.items()})
for literal, name in name_literals.items():
    grammar[name] = Literal(literal)
grammar.removeOrphans(start_rule) 
print(grammar.to_antlr())
import ast
with open('../antlr/parser.py') as fd:
    visitorfile = ast.parse(fd.read())

visitorclass = next(iter(x for x in visitorfile.body if isinstance(x, ast.ClassDef) and x.name == 'MyVisitor'))

def get_tag_names(visitorclass):
    ret = {}
    prefix = 'visit'
    methods = [x for x in visitorclass.body if isinstance(x, ast.FunctionDef) and x.name.startswith(prefix)]
    for method in methods:
        if not isinstance(method.body[0], ast.Expr) or not isinstance(method.body[0].value, ast.Constant):
            continue
        docstring = method.body[0].value.value
        if not isinstance(docstring, str):
            continue
        rule, _, definition = docstring.partition(':')
        ret[rule.strip(), definition.strip()] = method.name[len(prefix):]
    return ret

tags = get_tag_names(visitorclass)
print(tags)

#print(grammar.to_antlr())
with open('/home/ignacio/programacion/veriloga/antlr/VerilogALexer.g4', 'w') as fd:
    fd.write('lexer grammar VerilogALexer;\n\nimport VerilogAPreprocessor;\n\n')
    fd.write('WHITESPACE: [\\t ]+ -> skip;\n')
    fd.write("COMMENT: '//' ~[\\n]+ -> skip;\n")
    lexerrules = Grammar({k: v for k,v in grammar.rules.items() if k.isupper()})
    fd.write(lexerrules.canonicalize().to_antlr())
with open('/home/ignacio/programacion/veriloga/antlr/VerilogAParser.g4', 'w') as fd:
    fd.write('''\
parser grammar VerilogAParser;
options {
    tokenVocab = VerilogALexer;
}

''')
    parserrules = Grammar({k: v for k,v in grammar.rules.items() if k.islower()})
    fd.write(parserrules.canonicalize().to_antlr(tags))
    
with open('/home/ignacio/programacion/veriloga/antlr/single_identifier_rules.txt', 'w') as fd:
    fd.write('\n'.join(k 
                       for k,v in parserrules.rules.items() 
                       if isinstance(v, Identifier)
                       or k.startswith('start_')
                      or isinstance(v, Alternatives) and all(isinstance(alt, Identifier)
                                                            for alt in v.children)))


{Identifier(name='decimal_number'): Identifier(name='UNSIGNED_NUMBER')}
ENDDISCIPLINE:
      'enddiscipline';
CONTINUOUS:
      'continuous';
DDT_NATURE:
      'ddt_nature';
DISCIPLINE:
      'discipline';
IDT_NATURE:
      'idt_nature';
ENDMODULE:
      'endmodule';
ENDNATURE:
      'endnature';
PARAMETER:
      'parameter';
POTENTIAL:
      'potential';
DISCRETE:
      'discrete';
EXCLUDE:
      'exclude';
INTEGER:
      'integer';
ABSTOL:
      'abstol';
ACCESS:
      'access';
ANALOG:
      'analog';
BRANCH:
      'branch';
DOMAIN:
      'domain';
MODULE:
      'module';
NATURE:
      'nature';
OUTPUT:
      'output';
STRING:
      'string';
BEGIN:
      'begin';
INOUT:
      'inout';
INPUT:
      'input';
UNITS:
      'units';
ELSE:
      'else';
FLOW:
      'flow';
FROM:
      'from';
REAL:
      'real';
END:
      'end';
INF:
      'inf';
ANALOGCONTRIBUTION:
      '<+';
EQUALS:
      '==';
GREATEROREQUAL:
      '>=';
IF:
      'if';
LATTR:
      '(*';
LOGICALAND:
      '&&';
LOG

attribute_instance* parameter_declaration ';' | attribute_instance* integer_declaration | attribute_instance* real_declaration


In [9]:
print('simple_identifier ::= [a-zA-Z_\\\\] { [a-zA-Z0-9_$] }')
print(Grammar.from_bnf(bnf='simple_identifier ::= [a-zA-Z_\\\\] { [a-zA-Z0-9_$] }').to_antlr())

simple_identifier ::= [a-zA-Z_\\] { [a-zA-Z0-9_$] }
simple_identifier:
      [a-zA-Z_\] { [a-zA-Z0-9_$];


In [10]:
Grammar.from_bnf(bnf='simple_identifier ::= [a-zA-Z_\\\\] { [a-zA-Z0-9_$] }')['simple_identifier']

CharacterClass(pattern='a-zA-Z_] { [a-zA-Z0-9_$')

In [7]:
print(grammar['SIMPLE_IDENTIFIER'].to_antlr())

KeyError: 'SIMPLE_IDENTIFIER'

In [4]:
print(CharacterClass('abc\\').to_antlr())

[abc\\]


In [14]:
print(parserrules.canonicalize().to_antlr())




In [34]:
def is_alternation_of_identifiers(definition):
    definition = definition.simplify()
    if isinstance(definition, Identifier):
        return True
    if not isinstance(definition, Alternatives):
        return False
    return all(isinstance(child, Identifier) for child in definition.children)
        
{k for k,definition in parserrules.rules.items() if is_alternation_of_identifiers(definition)}

{'constant_primary', 'number'}

In [29]:
from copy import copy
method_pattern = ast.parse('''
class DefaultVisitor:
    def visitNumber(self, ctx):
        return self.visit(ctx.getChild(0))
''').body[0]
copy(method)
myclass = ast.parse('''
class DefaultVisitor:
    def visitNumber(self, ctx):
        return self.visit(ctx.getChild(0))
''').body[0]
dir(myclass)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_attributes',
 '_fields',
 'bases',
 'body',
 'col_offset',
 'decorator_list',
 'end_col_offset',
 'end_lineno',
 'keywords',
 'lineno',
 'name']

In [19]:
copy

NameError: name 'copy' is not defined

In [4]:
len(sgrammar).split('\n'))

92

In [5]:
grammar.literal_users[',']

['COMMA']

In [6]:
#grammar.removeOrphans('expression')
print(grammar)

ABSTOL ::= 'abstol'
ACCESS ::= 'access'
ASSIGN ::= '='
BINARY_OPERATOR ::= '!='
    | '!=='
    | '%'
    | '&'
    | '&&'
    | '*'
    | '**'
    | '+'
    | '-'
    | '/'
    | '<'
    | '<<'
    | '<<<'
    | '<='
    | '=='
    | '==='
    | '>'
    | '>='
    | '>>'
    | '>>>'
    | '^'
    | '^~'
    | '|'
    | '||'
    | '~^'
COLON ::= ':'
COMMA ::= ','
CONTINUOUS ::= 'continuous'
DDT_NATURE ::= 'ddt_nature'
DISCIPLINE ::= 'discipline'
DISCRETE ::= 'discrete'
DOMAIN ::= 'domain'
ENDDISCIPLINE ::= 'enddiscipline'
ENDNATURE ::= 'endnature'
IDT_NATURE ::= 'idt_nature'
LPAREN ::= '('
NATURE ::= 'nature'
POTENTIAL_OR_FLOW ::= 'flow'
    | 'potential'
RPAREN ::= ')'
SEMICOLON ::= ';'
SIMPLE_IDENTIFIER ::= [a-zA-Z_] { [a-zA-Z0-9_$] }
TERNARY ::= '?'
UNARY_OPERATOR ::= '!'
    | '&'
    | '+'
    | '-'
    | '^'
    | '^~'
    | '|'
    | '~'
    | '~&'
    | '~^'
    | '~|'
UNITS ::= 'units'
UNSIGNED_NUMBER ::= [0-9] { [0-9] }
constant_analog_function_call ::= SIMPLE_IDENTIFIER LPAR

In [339]:
rule = 'net_reference'
print(grammar[rule])
print('')
print(grammar.rule_users[rule])

simple_identifier

['analog_primary']


In [294]:
rule = 'analog_primary'
print(grammar[rule])
print('')
print(grammar.rule_users[rule])

analog_built_in_function_call
    | analog_system_function_call
    | function_call
    | net_reference
    | number
    | parameter_reference
    | simple_identifier
    | '(' analog_expression ')'

['analog_expression', 'analog_expression']


In [161]:
rule = 'real_type'
print(grammar[rule])
print('')
print(grammar.rule_users[rule])

simple_identifier '=' constant_expression
    | simple_identifier { dimension } [ '=' constant_arrayinit ]

['list_of_real_identifiers', 'list_of_real_identifiers']


In [165]:
rule = 'constant_arrayinit'
print(grammar[rule])
print('')
print(grammar.rule_users[rule])

'MISSING'

['real_type', 'variable_type', 'param_assignment', 'noise_table_input_arg']


In [38]:
remaining = grammar.literals - set(operator_to_name) - reserved - syscalls
replacements = {}
for lit in remaining:
    parts = set(lit.split())
    if parts.issubset(set(operator_to_name) | reserved | syscalls):
        replacements[Literal(lit)] = Concatenation(tuple(Literal(part) for part in lit.split()))
grammar.substitute(replacements)

grammar.literals - set(operator_to_name) - reserved - syscalls

{'#', '# (', 'MISSING'}

In [218]:
print(grammar.rule_users['module_parameter_port_list'])

['module_declaration', 'module_declaration']


In [175]:
print(grammar.rule_users['procedural_timing_control_statement'])

['statement']


In [227]:
print(grammar.literal_users[
'$root .'
])

['hierarchical_identifier']


In [51]:
grammar['local_parameter_declaration'].precedence

2

In [228]:
k = """
hierarchical_identifier
""".strip()
print(f'''
    {k}=Definition.from_bnf("""{grammar[k]}"""),
''')


    hierarchical_identifier=Definition.from_bnf("""[ '$root .' ] { identifier [ '[' constant_expression ']' ] '.' } identifier"""),



In [183]:
k = """
seq_block
""".strip()
print(f'''
    {k}=Definition.from_bnf("""{grammar[k]}"""),
''')


    seq_block=Definition.from_bnf("""'begin' [ ':' block_identifier { block_item_declaration } ] { statement } 'end'"""),



In [102]:
grammar.literal_users['z']

['z_digit']

In [None]:
replacements = {lit: Concatenation([Identifier('system_function_identifier'), Literal('(')])
    for lit in literals if lit.strip()[0] == '$' and lit[-1] == '('}

literals = {node.value for rule in rules.values() for node in rule.descendants(Literal)}
identifiers = {node.name for rule in rules.values() for node in rule.descendants(Identifier)}
weird = literals - reserved - set(operator_to_name)
replacements = {}
for l in weird:
    if re.match(r'\$(\w+)\s*\(', l):
        replacements[l] = 