Skip to content

Commit

Permalink
Add option to run coala only on changed files
Browse files Browse the repository at this point in the history
With `--caching` the user can run coala only on those files that
had changed since the last time coala was run. This should improve
the running time of coala.

Fixes #1991
  • Loading branch information
adtac committed May 25, 2016
1 parent ee667e1 commit d234c1e
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 21 deletions.
51 changes: 31 additions & 20 deletions coalib/coala_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
from coalib.output.Tagging import delete_tagged_results, tag_results
from coalib.processes.Processing import execute_section, simplify_section_result
from coalib.settings.ConfigurationGathering import gather_configuration
from coalib.misc.Caching import FileCache
from coalib.misc.CachingUtilities import (
settings_changed, update_settings_db, get_settings_hash)

do_nothing = lambda *args: True

Expand Down Expand Up @@ -71,31 +74,39 @@ def run_coala(log_printer=None,
dtag = str(sections['default'].get('dtag', None))
config_file = os.path.abspath(str(sections["default"].get("config")))

settings_hash = get_settings_hash(sections)
flush_cache = (sections['default'].get('flush_cache', False) or
settings_changed(log_printer, settings_hash))

# Deleting all .orig files, so the latest files are up to date!
coala_delete_orig.main(log_printer, sections["default"])

delete_tagged_results(dtag, config_file, log_printer)

for section_name, section in sections.items():
if not section.is_enabled(targets):
continue

print_section_beginning(section)
section_result = execute_section(
section=section,
global_bear_list=global_bears[section_name],
local_bear_list=local_bears[section_name],
print_results=print_results,
log_printer=log_printer)
yielded, yielded_unfixed, results[section_name] = (
simplify_section_result(section_result))

yielded_results = yielded_results or yielded
yielded_unfixed_results = (
yielded_unfixed_results or yielded_unfixed)
did_nothing = False

file_dicts[section_name] = section_result[3]
with FileCache(log_printer, os.getcwd(), flush_cache) as cache:
for section_name, section in sections.items():
if not section.is_enabled(targets):
continue

print_section_beginning(section)
section_result = execute_section(
section=section,
global_bear_list=global_bears[section_name],
local_bear_list=local_bears[section_name],
print_results=print_results,
cache=cache,
log_printer=log_printer)
yielded, yielded_unfixed, results[section_name] = (
simplify_section_result(section_result))

yielded_results = yielded_results or yielded
yielded_unfixed_results = (
yielded_unfixed_results or yielded_unfixed)
did_nothing = False

file_dicts[section_name] = section_result[3]

update_settings_db(log_printer, settings_hash)

tag_results(tag, config_file, results, log_printer)

Expand Down
1 change: 1 addition & 0 deletions coalib/output/dbus/DbusDocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def Analyze(self):
global_bear_list=global_bears[section_name],
local_bear_list=local_bears[section_name],
print_results=lambda *args: True,
cache=None,
log_printer=log_printer)
yielded_results = yielded_results or section_result[0]

Expand Down
15 changes: 15 additions & 0 deletions coalib/parsing/DefaultArgParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,21 @@ def default_arg_parser(formatter_class=None):
help='Files that will be analyzed will be '
'restricted to those in the globs listed '
'in this argument as well the files setting')
arg_parser.add_argument('-C',
'--caching',
nargs='?',
const=True,
metavar='BOOL',
help='Run coala only on files that have changed '
'since the last time coala was run. Note: '
'Caching is currently experimental and '
'will be enabled by default from the next '
'release (this option will be removed)')
arg_parser.add_argument('--flush-cache',
nargs='?',
const=True,
metavar='BOOL',
help='Rebuild the file cache')
arg_parser.add_argument('-b',
'--bears',
nargs='+',
Expand Down
59 changes: 58 additions & 1 deletion coalib/processes/Processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ def instantiate_processes(section,
local_bear_list,
global_bear_list,
job_count,
cache,
log_printer):
"""
Instantiate the number of processes that will run bears which will be
Expand All @@ -328,6 +329,8 @@ def instantiate_processes(section,
:param local_bear_list: List of local bears belonging to the section.
:param global_bear_list: List of global bears belonging to the section.
:param job_count: Max number of processes to create.
:param cache: An instance of ``misc.Caching.FileCache`` to use as
a file cache buffer.
:param log_printer: The log printer to warn to.
:return: A tuple containing a list of processes,
and the arguments passed to each process which are
Expand All @@ -338,7 +341,31 @@ def instantiate_processes(section,
log_printer,
ignored_file_paths=glob_list(section.get('ignore', "")),
limit_file_paths=glob_list(section.get('limit_files', "")))

# This stores all matched files irrespective of whether coala is run
# only on changed files or not. Global bears require all the files
complete_filename_list = filename_list

# We need to compute the changed files so that the cache gets updated
# even if caching is disabled (in which case all files will be processed
# but the cache will be updated).
changed_files = cache.get_changed_files(
filename_list) if cache else filename_list

# If caching is enabled then the local bears should process only the
# changed files.
if section.get('caching', False):
# FIXME: Log this to the debug channel instead.
log_printer.info("Caching is enabled, bears' log messages from "
"previous runs may not appear. You may use "
"the `--flush-cache` flag to see them.")
filename_list = changed_files

# Note: the complete file dict is given as the file dict to bears and
# the whole project is accessible to every bear. However, local bears are
# run only for the changed files if caching is enabled.
file_dict = get_file_dict(filename_list, log_printer)
complete_file_dict = get_file_dict(complete_filename_list, log_printer)

manager = multiprocessing.Manager()
global_bear_queue = multiprocessing.Queue()
Expand All @@ -363,7 +390,7 @@ def instantiate_processes(section,
section,
local_bear_list,
global_bear_list,
file_dict,
complete_file_dict,
message_queue)

fill_queue(filename_queue, file_dict.keys())
Expand Down Expand Up @@ -431,13 +458,30 @@ def yield_ignore_ranges(file_dict):
len(file[-1])))


def get_file_list(results):
"""
Get the list of files that are affected in the given results.
:param results: A list of results from which the list of files is to be
extracted.
:return: A list of file paths containing the mentioned list of
files.
"""
files = []
for result in results:
for code in result.affected_code:
files.append(code.file)
return files


def process_queues(processes,
control_queue,
local_result_dict,
global_result_dict,
file_dict,
print_results,
section,
cache,
log_printer):
"""
Iterate the control queue and send the results recieved to the print_result
Expand All @@ -460,6 +504,8 @@ def process_queues(processes,
filename as keys.
:param print_results: Prints all given results appropriate to the
output medium.
:param cache: An instance of ``misc.Caching.FileCache`` to use
as a file cache buffer.
:return: Return True if all bears execute succesfully and
Results were delivered to the user. Else False.
"""
Expand All @@ -471,6 +517,7 @@ def process_queues(processes,
local_processes = len(processes)
global_processes = len(processes)
global_result_buffer = []
result_files = set()
ignore_ranges = list(yield_ignore_ranges(file_dict))

# One process is the logger thread
Expand All @@ -484,6 +531,7 @@ def process_queues(processes,
global_processes -= 1
elif control_elem == CONTROL_ELEMENT.LOCAL:
assert local_processes != 0
result_files.update(get_file_list(local_result_dict[index]))
retval, res = print_result(local_result_dict[index],
file_dict,
retval,
Expand All @@ -504,6 +552,7 @@ def process_queues(processes,

# Flush global result buffer
for elem in global_result_buffer:
result_files.update(get_file_list(global_result_dict[elem]))
retval, res = print_result(global_result_dict[elem],
file_dict,
retval,
Expand All @@ -520,6 +569,7 @@ def process_queues(processes,
control_elem, index = control_queue.get(timeout=0.1)

if control_elem == CONTROL_ELEMENT.GLOBAL:
result_files.update(get_file_list(global_result_dict[index]))
retval, res = print_result(global_result_dict[index],
file_dict,
retval,
Expand All @@ -538,6 +588,8 @@ def process_queues(processes,
# nondeterministically covered.
break

if cache:
cache.add_to_changed_files(result_files)
return retval


Expand Down Expand Up @@ -572,6 +624,7 @@ def execute_section(section,
global_bear_list,
local_bear_list,
print_results,
cache,
log_printer):
"""
Executes the section with the given bears.
Expand All @@ -590,6 +643,8 @@ def execute_section(section,
:param local_bear_list: List of local bears belonging to the section.
:param print_results: Prints all given results appropriate to the
output medium.
:param cache: An instance of ``misc.Caching.FileCache`` to use as
a file cache buffer.
:param log_printer: The log_printer to warn to.
:return: Tuple containing a bool (True if results were
yielded, False otherwise), a Manager.dict
Expand All @@ -614,6 +669,7 @@ def execute_section(section,
local_bear_list,
global_bear_list,
running_processes,
cache,
log_printer)

logger_thread = LogPrinterThread(arg_dict["message_queue"],
Expand All @@ -632,6 +688,7 @@ def execute_section(section,
arg_dict["file_dict"],
print_results,
section,
cache,
log_printer),
arg_dict["local_result_dict"],
arg_dict["global_result_dict"],
Expand Down
8 changes: 8 additions & 0 deletions tests/processes/ProcessingTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from coalib.settings.ConfigurationGathering import gather_configuration
from coalib.settings.Section import Section
from coalib.settings.Setting import Setting
from coalib.misc.Caching import FileCache


process_group_test_code = """
Expand Down Expand Up @@ -90,10 +91,13 @@ def setUp(self):

def test_run(self):
self.sections['default'].append(Setting('jobs', "1"))
self.sections['default'].append(Setting('caching', True))
cache = FileCache(self.log_printer, "coala_test", flush_cache=True)
results = execute_section(self.sections["default"],
self.global_bears["default"],
self.local_bears["default"],
lambda *args: self.result_queue.put(args[2]),
cache,
self.log_printer)
self.assertTrue(results[0])

Expand Down Expand Up @@ -130,6 +134,7 @@ def test_empty_run(self):
[],
[],
lambda *args: self.result_queue.put(args[2]),
None,
self.log_printer)
# No results
self.assertFalse(results[0])
Expand Down Expand Up @@ -200,6 +205,7 @@ def test_process_queues(self):
"seventh"]},
lambda *args: self.queue.put(args[2]),
section,
None,
self.log_printer)

self.assertEqual(self.queue.get(timeout=0), ([first_local,
Expand All @@ -221,6 +227,7 @@ def test_dead_processes(self):
ctrlq, {}, {}, {},
lambda *args: self.queue.put(args[2]),
Section(""),
None,
self.log_printer)
with self.assertRaises(queue.Empty):
self.queue.get(timeout=0)
Expand All @@ -234,6 +241,7 @@ def test_dead_processes(self):
ctrlq, {}, {}, {},
lambda *args: self.queue.put(args[2]),
Section(""),
None,
self.log_printer)
with self.assertRaises(queue.Empty):
self.queue.get(timeout=0)
Expand Down

0 comments on commit d234c1e

Please sign in to comment.