-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
Processing.py
733 lines (637 loc) · 29.6 KB
/
Processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
from itertools import chain
import multiprocessing
import os
import platform
import queue
import subprocess
from coalib.collecting.Collectors import collect_files
from coala_utils.string_processing.StringConverter import StringConverter
from coalib.output.printers.LOG_LEVEL import LOG_LEVEL
from coalib.processes.BearRunning import run
from coalib.processes.CONTROL_ELEMENT import CONTROL_ELEMENT
from coalib.processes.LogPrinterThread import LogPrinterThread
from coalib.results.Result import Result
from coalib.results.result_actions.ApplyPatchAction import ApplyPatchAction
from coalib.results.result_actions.IgnoreResultAction import IgnoreResultAction
from coalib.results.result_actions.PrintDebugMessageAction import (
PrintDebugMessageAction)
from coalib.results.result_actions.ShowPatchAction import ShowPatchAction
from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY
from coalib.results.SourceRange import SourceRange
from coalib.settings.Setting import glob_list
from coalib.parsing.Globbing import fnmatch
ACTIONS = [ApplyPatchAction,
PrintDebugMessageAction,
ShowPatchAction,
IgnoreResultAction]
def get_cpu_count():
try:
return multiprocessing.cpu_count()
# cpu_count is not implemented for some CPU architectures/OSes
except NotImplementedError: # pragma: no cover
return 2
def fill_queue(queue_fill, any_list):
"""
Takes element from a list and populates a queue with those elements.
:param queue_fill: The queue to be filled.
:param any_list: List containing the elements.
"""
for elem in any_list:
queue_fill.put(elem)
def get_running_processes(processes):
return sum((1 if process.is_alive() else 0) for process in processes)
def create_process_group(command_array, **kwargs):
if platform.system() == 'Windows': # pragma: no cover
proc = subprocess.Popen(
command_array,
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP,
**kwargs)
else:
proc = subprocess.Popen(command_array,
preexec_fn=os.setsid,
**kwargs)
return proc
def get_default_actions(section):
"""
Parses the key ``default_actions`` in the given section.
:param section: The section where to parse from.
:return: A dict with the bearname as keys and their default
actions as values and another dict that contains bears
and invalid action names.
"""
try:
default_actions = dict(section['default_actions'])
except IndexError:
return {}, {}
action_dict = {action.get_metadata().name: action for action in ACTIONS}
invalid_action_set = default_actions.values() - action_dict.keys()
invalid_actions = {}
if len(invalid_action_set) != 0:
invalid_actions = {
bear: action
for bear, action in default_actions.items()
if action in invalid_action_set}
for invalid in invalid_actions.keys():
del default_actions[invalid]
actions = {bearname: action_dict[action_name]
for bearname, action_name in default_actions.items()}
return actions, invalid_actions
def autoapply_actions(results,
file_dict,
file_diff_dict,
section,
log_printer):
"""
Auto-applies actions like defined in the given section.
:param results: A list of results.
:param file_dict: A dictionary containing the name of files and its
contents.
:param file_diff_dict: A dictionary that contains filenames as keys and
diff objects as values.
:param section: The section.
:param log_printer: A log printer instance to log messages on.
:return: A list of unprocessed results.
"""
default_actions, invalid_actions = get_default_actions(section)
no_autoapply_warn = bool(section.get('no_autoapply_warn', False))
for bearname, actionname in invalid_actions.items():
log_printer.warn('Selected default action {!r} for bear {!r} does '
'not exist. Ignoring action.'.format(actionname,
bearname))
if len(default_actions) == 0:
# There's nothing to auto-apply.
return results
not_processed_results = []
for result in results:
try:
# Match full bear names deterministically, prioritized!
action = default_actions[result.origin]
except KeyError:
for bear_glob in default_actions:
if fnmatch(result.origin, bear_glob):
action = default_actions[bear_glob]
break
else:
not_processed_results.append(result)
continue
applicable = action.is_applicable(result, file_dict, file_diff_dict)
if applicable is not True:
if not no_autoapply_warn:
log_printer.warn('{}: {}'.format(result.origin, applicable))
not_processed_results.append(result)
continue
try:
action().apply_from_section(result,
file_dict,
file_diff_dict,
section)
log_printer.info('Applied {!r} on {} from {!r}.'.format(
action.get_metadata().name,
result.location_repr(),
result.origin))
except Exception as ex:
not_processed_results.append(result)
log_printer.log_exception(
'Failed to execute action {!r} with error: {}.'.format(
action.get_metadata().name, ex),
ex)
log_printer.debug('-> for result ' + repr(result) + '.')
return not_processed_results
def check_result_ignore(result, ignore_ranges):
"""
Determines if the result has to be ignored.
Any result will be ignored if its origin matches any bear names and its
SourceRange overlaps with the ignore range.
Note that everything after a space in the origin will be cut away, so the
user can ignore results with an origin like `CSecurityBear (buffer)` with
just `# Ignore CSecurityBear`.
:param result: The result that needs to be checked.
:param ignore_ranges: A list of tuples, each containing a list of lower
cased affected bearnames and a SourceRange to
ignore. If any of the bearname lists is empty, it
is considered an ignore range for all bears.
This may be a list of globbed bear wildcards.
:return: True if the result has to be ignored.
"""
for bears, range in ignore_ranges:
orig = result.origin.lower().split(' ')[0]
if (result.overlaps(range) and
(len(bears) == 0 or orig in bears or fnmatch(orig, bears))):
return True
return False
def print_result(results,
file_dict,
retval,
print_results,
section,
log_printer,
file_diff_dict,
ignore_ranges,
console_printer):
"""
Takes the results produced by each bear and gives them to the print_results
method to present to the user.
:param results: A list of results.
:param file_dict: A dictionary containing the name of files and its
contents.
:param retval: It is True if no results were yielded ever before.
If it is False this function will return False no
matter what happens. Else it depends on if this
invocation yields results.
:param print_results: A function that prints all given results appropriate
to the output medium.
:param file_diff_dict: A dictionary that contains filenames as keys and
diff objects as values.
:param ignore_ranges: A list of SourceRanges. Results that affect code in
any of those ranges will be ignored.
:param console_printer: Object to print messages on the console.
:return: Returns False if any results were yielded. Else
True.
"""
min_severity_str = str(section.get('min_severity', 'INFO')).upper()
min_severity = RESULT_SEVERITY.str_dict.get(min_severity_str, 'INFO')
results = list(filter(lambda result:
type(result) is Result and
result.severity >= min_severity and
not check_result_ignore(result, ignore_ranges),
results))
patched_results = autoapply_actions(results,
file_dict,
file_diff_dict,
section,
log_printer)
print_results(log_printer,
section,
patched_results,
file_dict,
file_diff_dict,
console_printer)
return retval or len(results) > 0, patched_results
def get_file_dict(filename_list, log_printer):
"""
Reads all files into a dictionary.
:param filename_list: List of names of paths to files to get contents of.
:param log_printer: The logger which logs errors.
:return: Reads the content of each file into a dictionary
with filenames as keys.
"""
file_dict = {}
for filename in filename_list:
try:
with open(filename, 'r', encoding='utf-8') as _file:
file_dict[filename] = tuple(_file.readlines())
except UnicodeDecodeError:
log_printer.warn("Failed to read file '{}'. It seems to contain "
'non-unicode characters. Leaving it '
'out.'.format(filename))
except OSError as exception: # pragma: no cover
log_printer.log_exception("Failed to read file '{}' because of "
'an unknown error. Leaving it '
'out.'.format(filename),
exception,
log_level=LOG_LEVEL.WARNING)
log_printer.debug('Files that will be checked:\n' +
'\n'.join(file_dict.keys()))
return file_dict
def filter_raising_callables(it, exception, *args, **kwargs):
"""
Filters all callable items inside the given iterator that raise the
given exceptions.
:param it: The iterator to filter.
:param exception: The (tuple of) exception(s) to filter for.
:param args: Positional arguments to pass to the callable.
:param kwargs: Keyword arguments to pass to the callable.
"""
for elem in it:
try:
yield elem(*args, **kwargs)
except exception:
pass
def instantiate_bears(section,
local_bear_list,
global_bear_list,
file_dict,
message_queue,
console_printer):
"""
Instantiates each bear with the arguments it needs.
:param section: The section the bears belong to.
:param local_bear_list: List of local bear classes to instantiate.
:param global_bear_list: List of global bear classes to instantiate.
:param file_dict: Dictionary containing filenames and their
contents.
:param message_queue: Queue responsible to maintain the messages
delivered by the bears.
:param console_printer: Object to print messages on the console.
:return: The local and global bear instance lists.
"""
local_bear_list = [bear
for bear in filter_raising_callables(
local_bear_list,
RuntimeError,
section,
message_queue,
timeout=0.1)]
global_bear_list = [bear
for bear in filter_raising_callables(
global_bear_list,
RuntimeError,
file_dict,
section,
message_queue,
timeout=0.1)]
return local_bear_list, global_bear_list
def instantiate_processes(section,
local_bear_list,
global_bear_list,
job_count,
cache,
log_printer,
console_printer):
"""
Instantiate the number of processes that will run bears which will be
responsible for running bears in a multiprocessing environment.
:param section: The section the bears belong to.
:param local_bear_list: List of local bears belonging to the section.
:param global_bear_list: List of global bears belonging to the section.
:param job_count: Max number of processes to create.
:param cache: An instance of ``misc.Caching.FileCache`` to use as
a file cache buffer.
:param log_printer: The log printer to warn to.
:param console_printer: Object to print messages on the console.
:return: A tuple containing a list of processes,
and the arguments passed to each process which are
the same for each object.
"""
filename_list = collect_files(
glob_list(section.get('files', '')),
log_printer,
ignored_file_paths=glob_list(section.get('ignore', '')),
limit_file_paths=glob_list(section.get('limit_files', '')))
# This stores all matched files irrespective of whether coala is run
# only on changed files or not. Global bears require all the files
complete_filename_list = filename_list
# Start tracking all the files
if cache:
cache.track_files(set(complete_filename_list))
changed_files = cache.get_uncached_files(
set(filename_list)) if cache else filename_list
# If caching is enabled then the local bears should process only the
# changed files.
log_printer.debug("coala is run only on changed files, bears' log "
'messages from previous runs may not appear. You may '
'use the `--flush-cache` flag to see them.')
filename_list = changed_files
# Note: the complete file dict is given as the file dict to bears and
# the whole project is accessible to every bear. However, local bears are
# run only for the changed files if caching is enabled.
complete_file_dict = get_file_dict(complete_filename_list, log_printer)
file_dict = {filename: complete_file_dict[filename]
for filename in filename_list
if filename in complete_file_dict}
manager = multiprocessing.Manager()
global_bear_queue = multiprocessing.Queue()
filename_queue = multiprocessing.Queue()
local_result_dict = manager.dict()
global_result_dict = manager.dict()
message_queue = multiprocessing.Queue()
control_queue = multiprocessing.Queue()
bear_runner_args = {'file_name_queue': filename_queue,
'local_bear_list': local_bear_list,
'global_bear_list': global_bear_list,
'global_bear_queue': global_bear_queue,
'file_dict': file_dict,
'local_result_dict': local_result_dict,
'global_result_dict': global_result_dict,
'message_queue': message_queue,
'control_queue': control_queue,
'timeout': 0.1}
local_bear_list[:], global_bear_list[:] = instantiate_bears(
section,
local_bear_list,
global_bear_list,
complete_file_dict,
message_queue,
console_printer=console_printer)
fill_queue(filename_queue, file_dict.keys())
fill_queue(global_bear_queue, range(len(global_bear_list)))
return ([multiprocessing.Process(target=run, kwargs=bear_runner_args)
for i in range(job_count)],
bear_runner_args)
def get_ignore_scope(line, keyword):
"""
Retrieves the bears that are to be ignored defined in the given line.
:param line: The line containing the ignore declaration.
:param keyword: The keyword that was found. Everything after the rightmost
occurrence of it will be considered for the scope.
:return: A list of lower cased bearnames or an empty list (-> "all")
"""
toignore = line[line.rfind(keyword) + len(keyword):]
if toignore.startswith('all'):
return []
else:
return list(StringConverter(toignore, list_delimiters=', '))
def yield_ignore_ranges(file_dict):
"""
Yields tuples of affected bears and a SourceRange that shall be ignored for
those.
:param file_dict: The file dictionary.
"""
for filename, file in file_dict.items():
start = None
bears = []
stop_ignoring = False
for line_number, line in enumerate(file, start=1):
# Before lowering all lines ever read, first look for the biggest
# common substring, case sensitive: I*gnor*e, start i*gnor*ing,
# N*oqa*.
if 'gnor' in line or 'oqa' in line:
line = line.lower()
if 'start ignoring ' in line:
start = line_number
bears = get_ignore_scope(line, 'start ignoring ')
elif 'stop ignoring' in line:
stop_ignoring = True
if start:
yield (bears,
SourceRange.from_values(
filename,
start,
1,
line_number,
len(file[line_number-1])))
else:
for ignore_stmt in ['ignore ', 'noqa ', 'noqa']:
if ignore_stmt in line:
end_line = min(line_number + 1, len(file))
yield (get_ignore_scope(line, ignore_stmt),
SourceRange.from_values(
filename,
line_number, 1,
end_line, len(file[end_line-1])))
break
if stop_ignoring is False and start is not None:
yield (bears,
SourceRange.from_values(filename,
start,
1,
len(file),
len(file[-1])))
def get_file_list(results):
"""
Get the set of files that are affected in the given results.
:param results: A list of results from which the list of files is to be
extracted.
:return: A set of file paths containing the mentioned list of
files.
"""
return {code.file for result in results for code in result.affected_code}
def process_queues(processes,
control_queue,
local_result_dict,
global_result_dict,
file_dict,
print_results,
section,
cache,
log_printer,
console_printer):
"""
Iterate the control queue and send the results received to the print_result
method so that they can be presented to the user.
:param processes: List of processes which can be used to run
Bears.
:param control_queue: Containing control elements that indicate
whether there is a result available and which
bear it belongs to.
:param local_result_dict: Dictionary containing results respective to
local bears. It is modified by the processes
i.e. results are added to it by multiple
processes.
:param global_result_dict: Dictionary containing results respective to
global bears. It is modified by the processes
i.e. results are added to it by multiple
processes.
:param file_dict: Dictionary containing file contents with
filename as keys.
:param print_results: Prints all given results appropriate to the
output medium.
:param cache: An instance of ``misc.Caching.FileCache`` to use
as a file cache buffer.
:return: Return True if all bears execute successfully and
Results were delivered to the user. Else False.
"""
file_diff_dict = {}
retval = False
# Number of processes working on local/global bears. They are count down
# when the last queue element of that process is processed which may be
# *after* the process has ended!
local_processes = len(processes)
global_processes = len(processes)
global_result_buffer = []
result_files = set()
ignore_ranges = list(yield_ignore_ranges(file_dict))
# One process is the logger thread
while local_processes > 1:
try:
control_elem, index = control_queue.get(timeout=0.1)
if control_elem == CONTROL_ELEMENT.LOCAL_FINISHED:
local_processes -= 1
elif control_elem == CONTROL_ELEMENT.GLOBAL_FINISHED:
global_processes -= 1
elif control_elem == CONTROL_ELEMENT.LOCAL:
assert local_processes != 0
result_files.update(get_file_list(local_result_dict[index]))
retval, res = print_result(local_result_dict[index],
file_dict,
retval,
print_results,
section,
log_printer,
file_diff_dict,
ignore_ranges,
console_printer=console_printer)
local_result_dict[index] = res
else:
assert control_elem == CONTROL_ELEMENT.GLOBAL
global_result_buffer.append(index)
except queue.Empty:
if get_running_processes(processes) < 2: # pragma: no cover
# Recover silently, those branches are only
# nondeterministically covered.
break
# Flush global result buffer
for elem in global_result_buffer:
result_files.update(get_file_list(global_result_dict[elem]))
retval, res = print_result(global_result_dict[elem],
file_dict,
retval,
print_results,
section,
log_printer,
file_diff_dict,
ignore_ranges,
console_printer=console_printer)
global_result_dict[elem] = res
# One process is the logger thread
while global_processes > 1:
try:
control_elem, index = control_queue.get(timeout=0.1)
if control_elem == CONTROL_ELEMENT.GLOBAL:
result_files.update(get_file_list(global_result_dict[index]))
retval, res = print_result(global_result_dict[index],
file_dict,
retval,
print_results,
section,
log_printer,
file_diff_dict,
ignore_ranges,
console_printer)
global_result_dict[index] = res
else:
assert control_elem == CONTROL_ELEMENT.GLOBAL_FINISHED
global_processes -= 1
except queue.Empty:
if get_running_processes(processes) < 2: # pragma: no cover
# Recover silently, those branches are only
# nondeterministically covered.
break
if cache:
cache.untrack_files(result_files)
return retval
def simplify_section_result(section_result):
"""
Takes in a section's result from ``execute_section`` and simplifies it
for easy usage in other functions.
:param section_result: The result of a section which was executed.
:return: Tuple containing:
- bool - True if results were yielded
- bool - True if unfixed results were yielded
- list - Results from all bears (local and global)
"""
section_yielded_result = section_result[0]
results_for_section = []
for value in chain(section_result[1].values(),
section_result[2].values()):
if value is None:
continue
for result in value:
results_for_section.append(result)
section_yielded_unfixed_results = len(results_for_section) > 0
return (section_yielded_result,
section_yielded_unfixed_results,
results_for_section)
def execute_section(section,
global_bear_list,
local_bear_list,
print_results,
cache,
log_printer,
console_printer):
"""
Executes the section with the given bears.
The execute_section method does the following things:
1. Prepare a Process
- Load files
- Create queues
2. Spawn up one or more Processes
3. Output results from the Processes
4. Join all processes
:param section: The section to execute.
:param global_bear_list: List of global bears belonging to the section.
Dependencies are already resolved.
:param local_bear_list: List of local bears belonging to the section.
Dependencies are already resolved.
:param print_results: Prints all given results appropriate to the
output medium.
:param cache: An instance of ``misc.Caching.FileCache`` to use as
a file cache buffer.
:param log_printer: The log_printer to warn to.
:param console_printer: Object to print messages on the console.
:return: Tuple containing a bool (True if results were
yielded, False otherwise), a Manager.dict
containing all local results(filenames are key)
and a Manager.dict containing all global bear
results (bear names are key) as well as the
file dictionary.
"""
try:
running_processes = int(section['jobs'])
except ValueError:
log_printer.warn("Unable to convert setting 'jobs' into a number. "
'Falling back to CPU count.')
running_processes = get_cpu_count()
except IndexError:
running_processes = get_cpu_count()
processes, arg_dict = instantiate_processes(section,
local_bear_list,
global_bear_list,
running_processes,
cache,
log_printer,
console_printer=console_printer)
logger_thread = LogPrinterThread(arg_dict['message_queue'],
log_printer)
# Start and join the logger thread along with the processes to run bears
processes.append(logger_thread)
for runner in processes:
runner.start()
try:
return (process_queues(processes,
arg_dict['control_queue'],
arg_dict['local_result_dict'],
arg_dict['global_result_dict'],
arg_dict['file_dict'],
print_results,
section,
cache,
log_printer,
console_printer=console_printer),
arg_dict['local_result_dict'],
arg_dict['global_result_dict'],
arg_dict['file_dict'])
finally:
logger_thread.running = False
for runner in processes:
runner.join()