-
Notifications
You must be signed in to change notification settings - Fork 48
/
parser.py
368 lines (299 loc) · 12.4 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
'''
A custom robotframework parser that retains line numbers (though
it doesn't (yet!) retain character positions for each cell)
Note: this only works on pipe and space separated files. It uses a
copy of the deprecated TxtReader robot parser to divide a line into cells.
(probably works for space-separated too. I haven't tried. )
Performance is pretty spiffy! At the time I write this (where
admittedly I don't fully parse everything) it is about 3x-5x faster
than the official robot parser. It can read a file with 500
test cases and 500 keywords in about 30ms, compared to 150ms
for the robot parser. Sweet.
'''
from __future__ import print_function
import re
import sys
import os.path
from robot.errors import DataError
from robot.utils import FileReader
from .util import timeit, Matcher
from .tables import AbstractContainerTable, DefaultTable, SettingTable, VariableTable, UnknownTable
from .testcase import Testcase
from .rfkeyword import Keyword
from .common import Row, Statement
def RobotFactory(path, parent=None):
'''Return an instance of SuiteFile, ResourceFile, SuiteFolder
Exactly which is returned depends on whether it's a file or
folder, and if a file, the contents of the file. If there is a
testcase table, this will return an instance of SuiteFile,
otherwise it will return an instance of ResourceFile.
'''
if os.path.isdir(path):
return SuiteFolder(path, parent)
else:
rf = RobotFile(path, parent)
for table in rf.tables:
if isinstance(table, TestcaseTable):
rf.__class__ = SuiteFile
return rf
rf.__class__ = ResourceFile
return rf
class SuiteFolder(object):
def __init__(self, path, parent=None):
self.path = os.path.abspath(path)
self.parent = parent
self.name = os.path.splitext(os.path.basename(path))[0]
self.initfile = None
# see if there's an initialization file. If so,
# attempt to load it
for filename in ("__init__.robot", "__init__.txt"):
if os.path.exists(os.path.join(self.path, filename)):
self.initfile = RobotFile(os.path.join(self.path, filename))
break
def walk(self, *types):
'''
Iterator which visits all suites and suite files,
yielding test cases and keywords
'''
requested = types if len(types) > 0 else [SuiteFile, ResourceFile, SuiteFolder, Testcase, Keyword]
for thing in self.robot_files:
if thing.__class__ in requested:
yield thing
if isinstance(thing, SuiteFolder):
for child in thing.walk():
if child.__class__ in requested:
yield child
else:
for child in thing.walk(*types):
yield child
@property
def robot_files(self):
'''Return a list of all folders, and test suite files (.txt, .robot)
'''
result = []
for name in os.listdir(self.path):
fullpath = os.path.join(self.path, name)
if os.path.isdir(fullpath):
result.append(RobotFactory(fullpath, parent=self))
else:
if ((name.endswith(".txt") or name.endswith(".robot")) and
(name not in ("__init__.txt", "__init__.robot"))):
result.append(RobotFactory(fullpath, parent=self))
return result
class RobotFile(object):
'''
Terminology:
- A file is a set of tables
- A table begins with a heading and extends to the next table or EOF
- Each table may be made up of smaller tables that define test cases
or keywords
- Each line of text in a table becomes a "Row".
- A Row object contains a list of cells.
- A cell is all of the data between pipes, stripped of leading and
trailing spaces
'''
def __init__(self, path, parent=None):
self.parent = parent
self.name = os.path.splitext(os.path.basename(path))[0]
self.path = os.path.abspath(path)
self.tables = []
self.rows = []
try:
self._load(path)
except Exception as e:
sys.stderr.write("there was a problem reading '%s': %s\n" % (path, str(e)))
def walk(self, *types):
'''
Iterator which can return all test cases and/or keywords
You can specify with objects to return as parameters; if
no parameters are given, both tests and keywords will
be returned.
For example, to get only test cases, you could call it
like this:
robot_file = RobotFactory(...)
for testcase in robot_file.walk(Testcase): ...
'''
requested = types if len(types) > 0 else [Testcase, Keyword]
if Testcase in requested:
for testcase in self.testcases:
yield testcase
if Keyword in requested:
for keyword in self.keywords:
yield keyword
def _load(self, path):
'''
The general idea is to do a quick parse, creating a list of
tables. Each table is nothing more than a list of rows, with
each row being a list of cells. Additional parsing such as
combining rows into statements is done on demand. This first
pass is solely to read in the plain text and organize it by table.
'''
self.tables = []
current_table = DefaultTable(self)
with FileReader(path) as f:
# N.B. the caller should be catching errors
self.raw_text = f.read()
f.file.seek(0)
matcher = Matcher(re.IGNORECASE)
for linenumber, raw_text in enumerate(f.readlines()):
linenumber += 1; # start counting at 1 rather than zero
# this mimics what the robot TSV reader does --
# it replaces non-breaking spaces with regular spaces,
# and then strips trailing whitespace
raw_text = raw_text.replace(u'\xA0', ' ')
raw_text = raw_text.rstrip()
# FIXME: I'm keeping line numbers but throwing away
# where each cell starts. I should be preserving that
# (though to be fair, robot is throwing that away so
# I'll have to write my own splitter if I want to save
# the character position)
cells = self.split_row(raw_text)
_heading_regex = r'^\s*\*+\s*(.*?)[ *]*$'
if matcher(_heading_regex, cells[0]):
# we've found the start of a new table
table_name = matcher.group(1)
current_table = tableFactory(self, linenumber, table_name, raw_text)
self.tables.append(current_table)
else:
current_table.append(Row(linenumber, raw_text, cells))
def split_row(self, row):
""" function copied from
https://github.com/robotframework/robotframework/blob/v3.1.2/src/robot/parsing/robotreader.py
"""
space_splitter = re.compile(u'[ \t\xa0]{2,}|\t+')
pipe_splitter = re.compile(u'[ \t\xa0]+\|(?=[ \t\xa0]+)')
pipe_starts = ('|', '| ', '|\t', u'|\xa0')
pipe_ends = (' |', '\t|', u'\xa0|')
if row[:2] in pipe_starts:
row = row[1:-1] if row[-2:] in pipe_ends else row[1:]
return [cell.strip()
for cell in pipe_splitter.split(row)]
return space_splitter.split(row)
def __repr__(self):
return "<RobotFile(%s)>" % self.path
@property
def type(self):
'''Return 'suite' or 'resource' or None
This will return 'suite' if a testcase table is found;
It will return 'resource' if at least one robot table
is found. If no tables are found it will return None
'''
robot_tables = [table for table in self.tables if not isinstance(table, UnknownTable)]
if len(robot_tables) == 0:
return None
for table in self.tables:
if isinstance(table, TestcaseTable):
return "suite"
return "resource"
@property
def keywords(self):
'''Generator which returns all keywords in the suite'''
for table in self.tables:
if isinstance(table, KeywordTable):
for keyword in table.keywords:
yield keyword
@property
def testcases(self):
'''Generator which returns all test cases in the suite'''
for table in self.tables:
if isinstance(table, TestcaseTable):
for testcase in table.testcases:
yield testcase
def dump(self):
'''Regurgitate the tables and rows'''
for table in self.tables:
print("*** %s ***" % table.name)
table.dump()
def tableFactory(parent, linenumber, name, header):
match = Matcher(re.IGNORECASE)
if name is None:
table = UnknownTable(parent, linenumber, name, header)
elif match(r'settings?|metadata', name):
table = SettingTable(parent, linenumber, name, header)
elif match(r'variables?', name):
table = VariableTable(parent, linenumber, name, header)
elif match(r'test ?cases?', name):
table = TestcaseTable(parent, linenumber, name, header)
elif match(r'(user )?keywords?', name):
table = KeywordTable(parent, linenumber, name, header)
else:
table = UnknownTable(parent, linenumber, name, header)
return table
class SuiteFile(RobotFile):
def __repr__(self):
return "<SuiteFile(%s)>" % self.path
@property
def settings(self):
'''Generator which returns all of the statements in all of the settings tables'''
for table in self.tables:
if isinstance(table, SettingTable):
for statement in table.statements:
yield statement
@property
def variables(self):
'''Generator which returns all of the statements in all of the variables tables'''
for table in self.tables:
if isinstance(table, VariableTable):
# FIXME: settings have statements, variables have rows WTF? :-(
for statement in table.rows:
if statement[0] != "":
yield statement
class ResourceFile(RobotFile):
def __repr__(self):
return "<ResourceFile(%s)>" % self.path
@property
def settings(self):
'''Generator which returns all of the statements in all of the settings tables'''
for table in self.tables:
if isinstance(table, SettingTable):
for statement in table.statements:
yield statement
class TestcaseTable(AbstractContainerTable):
_childClass = Testcase
def __init__(self, parent, *args, **kwargs):
super(TestcaseTable, self).__init__(parent, *args, **kwargs)
self.testcases = self._children
class KeywordTable(AbstractContainerTable):
_childClass = Keyword
def __init__(self, parent, *args, **kwargs):
super(KeywordTable, self).__init__(parent, *args, **kwargs)
self.keywords = self._children
@timeit
def dump(suite):
result = []
for table in suite.tables:
# print "table:", table
# for row in table.rows:
# print "=>", row
if isinstance(table, TestcaseTable):
for tc in table.testcases:
# force parsing of individual steps
steps = [step for step in tc.steps]
if __name__ == "__main__":
from robot.parsing import TestData, ResourceFile
import sys
# parse with the robot parser and this parser, to
# see which is faster. Of course, this parser will
# be faster :-)
@timeit
def test_robot():
try:
suite = TestData(parent=None, source=sys.argv[1])
except DataError:
# if loading the suite failed, assume it's a resource file
# (bad assumption, but _whatever_)
suite = ResourceFile(source=sys.argv[1])
return suite
@timeit
def test_mine():
suite = RobotFile(sys.argv[1])
# force parsing of every line
for tc in suite.testcases:
statements = tc.statements
tags = tc.tags
return suite
if len(sys.argv) == 1:
print("give me a filename on the command line")
sys.exit(1)
suite1 = test_robot()
suite2 = test_mine()