/
parsing.py
161 lines (130 loc) · 7.09 KB
/
parsing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# from sphinxcontrib-doxylink
# modified: -> search for carsten-forty2
#import multiprocessing
import itertools
from pyparsing import Word, Literal, alphas, nums, alphanums, OneOrMore
from pyparsing import Optional, SkipTo, ParseException, Group, ZeroOrMore
from pyparsing import Suppress, Combine, delimitedList, quotedString
from pyparsing import nestedExpr, ParseResults, oneOf
# define punctuation - reuse of expressions helps packratting work better
LPAR,RPAR,LBRACK,RBRACK,COMMA,EQ = list(map(Literal,"()[],="))
#Qualifier to go in front of type in the argument list (unsigned const int foo)
qualifier = OneOrMore(oneOf('const unsigned typename struct enum'))
def turn_parseresults_to_list(s, loc, toks):
return ParseResults(normalise_templates(toks[0].asList()))
def normalise_templates(toks, isinstance=isinstance, str=str):
s_list = ['<']
s_list_append = s_list.append #lookup append func once, instead of many times
for tok in toks:
if isinstance(tok, str): #See if it's a string
s_list_append(' ' + tok)
else:
#If it's not a string
s_list_append(normalise_templates(tok))
s_list_append(' >')
return ''.join(s_list)
#Skip pairs of brackets.
angle_bracket_pair = nestedExpr(opener='<',closer='>').setParseAction(turn_parseresults_to_list)
#TODO Fix for nesting brackets
parentheses_pair = LPAR + SkipTo(RPAR) + RPAR
square_bracket_pair = LBRACK + SkipTo(RBRACK) + RBRACK
#The raw type of the input, i.e. 'int' in (unsigned const int * foo)
#TODO I guess this should be a delimited list (by '::') of name and angle brackets
input_type = Combine(Word(alphanums + ':_') + Optional(angle_bracket_pair + Optional(Word(alphanums + ':_'))))
#A number. e.g. -1, 3.6 or 5
number = Word('-.' + nums)
#The name of the argument. We will ignore this but it must be matched anyway.
input_name = OneOrMore(Word(alphanums + '_') | angle_bracket_pair | parentheses_pair | square_bracket_pair)
#Grab the '&', '*' or '**' type bit in (const QString & foo, int ** bar)
pointer_or_reference = oneOf('* &')
#The '=QString()' or '=false' bit in (int foo = 4, bool bar = false)
default_value = Literal('=') + OneOrMore(number | quotedString | input_type | parentheses_pair | angle_bracket_pair | square_bracket_pair | Word('|&^'))
#A combination building up the interesting bit -- the argument type, e.g. 'const QString &', 'int' or 'char*'
argument_type = Optional(qualifier, default='')("qualifier") + \
input_type("input_type") + \
Optional(pointer_or_reference, default='')("pointer_or_reference1") + \
Optional('const')('const_pointer_or_reference') + \
Optional(pointer_or_reference, default='')("pointer_or_reference2")
#Argument + variable name + default
argument = Group(argument_type('argument_type') + Optional(input_name) + Optional(default_value))
#List of arguments in parentheses with an optional 'const' on the end
arglist = LPAR + delimitedList(argument)('arg_list') + Optional(COMMA + '...')('var_args') + RPAR
def normalise(symbol):
"""
Takes a c++ symbol or function and splits it into symbol and a normalised argument list.
:Parameters:
symbol : string
A C++ symbol or function definition like ``PolyVox::Volume``, ``Volume::printAll() const``
:return:
a tuple consisting of two strings: ``(qualified function name or symbol, normalised argument list)``
"""
try:
bracket_location = symbol.index('(')
#Split the input string into everything before the opening bracket and everything else
function_name = symbol[:bracket_location]
arglist_input_string = symbol[bracket_location:]
except ValueError:
#If there's no brackets, then there's no function signature. This means the passed in symbol is just a type name
return symbol, ''
#This is a very common signature so we'll make a special case for it. It requires no parsing anyway
if arglist_input_string.startswith('()'):
if arglist_input_string in ('()', '()=0'):
return function_name, arglist_input_string
elif arglist_input_string in ('() const ', '() const', '() const =0'):
return function_name, '() const'
#By now we're left with something like "(blah, blah)", "(blah, blah) const" or "(blah, blah) const =0"
try:
closing_bracket_location = arglist_input_string.rindex(')')
arglist_suffix = arglist_input_string[closing_bracket_location+1:]
arglist_input_string = arglist_input_string[:closing_bracket_location+1]
except ValueError:
#This shouldn't happen.
print(('Could not find closing bracket in %s' % arglist_input_string))
raise
try:
result = arglist.parseString(arglist_input_string)
except ParseException as error:
#print symbol
#print pe
return str(error), None
else:
#Will be a list or normalised string arguments
#e.g. ['OBMol&', 'vector< int >&', 'OBBitVec&', 'OBBitVec&', 'int', 'int']
normalised_arg_list = []
#Cycle through all the matched arguments
for arg in result.arg_list:
#Here is where we build up our normalised form of the argument
argument_string_list = ['']
if arg.qualifier:
# carsten-forty2: changed the following:
# argument_string_list.append(''.join(arg.qualifier,' '))
argument_string_list.append(''.join((str(arg.qualifier),' ')))
argument_string_list.append(arg.input_type)
#Functions can have a funny combination of *, & and const between the type and the name so build up a list of theose here:
const_pointer_ref_list = []
const_pointer_ref_list.append(arg.pointer_or_reference1)
if arg.const_pointer_or_reference:
const_pointer_ref_list.append(''.join((' ', arg.const_pointer_or_reference, ' ')))
# same here
const_pointer_ref_list.append(arg.pointer_or_reference2)
#And combine them into a single normalised string and add them to the argument list
argument_string_list.extend(const_pointer_ref_list)
#Finally we join our argument string and add it to our list
normalised_arg_list.append(''.join(argument_string_list))
#If the function contains a variable number of arguments (int foo, ...) then add them on.
if result.var_args:
normalised_arg_list.append('...')
#Combine all the arguments and put parentheses around it
normalised_arg_list_string = ''.join(['(', ', '.join(normalised_arg_list), ')'])
#Add a const onto the end
if 'const' in arglist_suffix:
normalised_arg_list_string += ' const'
return function_name, normalised_arg_list_string
#TODO Maybe this should raise an exception?
return None
def normalise_list(list_of_symbols):
#normalise_pool = multiprocessing.Pool(multiprocessing.cpu_count() * 2)
#results = normalise_pool.map(normalise, list_of_symbols)
#normalise_pool.terminate()
results = list(map(normalise, list_of_symbols))
return results