Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Load target build files in parallel using Python multiprocessing.

This parallelizes the portion of the processing that took the largest fraction of runtime previously. There's more opportunity for parallelization elsewhere, but this seems to have the biggest impact. 

Linux & Mac only currently. Raw numbers: 

GYP_GENERATORS=ninja 
time build/gyp_chromium 

1. Linux, z600, chromium src + src-internal + full WebKit:
   23.14 s -> 16.04 s (30% speedup) 
2. MacBook Pro, chromium src only:
   36.83 s -> 27.87 (25% speedup) 
3. Linux, z620, SSD, chromium src + src-internal: 
   21.41 s -> 14.14 s (34% speedup)

Review URL: http://codereview.chromium.org/10911082



git-svn-id: http://gyp.googlecode.com/svn/trunk@1508 78cadc50-ecff-11dd-a971-7dbc132099af
  • Loading branch information...
commit 9ab98f9c2e167392a6d47baaf3bc430a42dcf4c2 1 parent 246e972
dmazzoni@chromium.org authored

Showing 2 changed files with 171 additions and 18 deletions. Show diff stats Hide diff stats

  1. +12 3 pylib/gyp/__init__.py
  2. +159 15 pylib/gyp/input.py
15 pylib/gyp/__init__.py
@@ -46,7 +46,8 @@ def FindBuildFiles():
46 46
47 47
48 48 def Load(build_files, format, default_variables={},
49   - includes=[], depth='.', params=None, check=False, circular_check=True):
  49 + includes=[], depth='.', params=None, check=False,
  50 + circular_check=True, parallel=False):
50 51 """
51 52 Loads one or more specified build files.
52 53 default_variables and includes will be copied before use.
@@ -124,7 +125,8 @@ def Load(build_files, format, default_variables={},
124 125
125 126 # Process the input specific to this generator.
126 127 result = gyp.input.Load(build_files, default_variables, includes[:],
127   - depth, generator_input_info, check, circular_check)
  128 + depth, generator_input_info, check, circular_check,
  129 + parallel)
128 130 return [generator] + result
129 131
130 132 def NameValueListToDict(name_value_list):
@@ -311,6 +313,9 @@ def gyp_main(args):
311 313 help='do not read options from environment variables')
312 314 parser.add_option('--check', dest='check', action='store_true',
313 315 help='check format of gyp files')
  316 + parser.add_option('--parallel', action='store_true',
  317 + env_name='GYP_PARALLEL',
  318 + help='Use multiprocessing for speed (experimental)')
314 319 parser.add_option('--toplevel-dir', dest='toplevel_dir', action='store',
315 320 default=None, metavar='DIR', type='path',
316 321 help='directory to use as the root of the source tree')
@@ -370,6 +375,9 @@ def gyp_main(args):
370 375 if g_o:
371 376 options.generator_output = g_o
372 377
  378 + if not options.parallel and options.use_environment:
  379 + options.parallel = bool(os.environ.get('GYP_PARALLEL'))
  380 +
373 381 for mode in options.debug:
374 382 gyp.debug[mode] = 1
375 383
@@ -487,7 +495,8 @@ def gyp_main(args):
487 495 cmdline_default_variables,
488 496 includes, options.depth,
489 497 params, options.check,
490   - options.circular_check)
  498 + options.circular_check,
  499 + options.parallel)
491 500
492 501 # TODO(mark): Pass |data| for now because the generator needs a list of
493 502 # build files that came in. In the future, maybe it should just accept
174 pylib/gyp/input.py
@@ -12,12 +12,15 @@
12 12 import compiler
13 13 import copy
14 14 import gyp.common
  15 +import multiprocessing
15 16 import optparse
16 17 import os.path
17 18 import re
18 19 import shlex
19 20 import subprocess
20 21 import sys
  22 +import threading
  23 +import time
21 24 from gyp.common import GypError
22 25
23 26
@@ -330,7 +333,7 @@ def ProcessToolsetsInDict(data):
330 333 # a build file that contains targets and is expected to provide a targets dict
331 334 # that contains the targets...
332 335 def LoadTargetBuildFile(build_file_path, data, aux_data, variables, includes,
333   - depth, check):
  336 + depth, check, load_dependencies):
334 337 # If depth is set, predefine the DEPTH variable to be a relative path from
335 338 # this build file's directory to the directory identified by depth.
336 339 if depth:
@@ -349,7 +352,7 @@ def LoadTargetBuildFile(build_file_path, data, aux_data, variables, includes,
349 352
350 353 if build_file_path in data['target_build_files']:
351 354 # Already loaded.
352   - return
  355 + return False
353 356 data['target_build_files'].add(build_file_path)
354 357
355 358 gyp.DebugOutput(gyp.DEBUG_INCLUDES,
@@ -419,22 +422,158 @@ def LoadTargetBuildFile(build_file_path, data, aux_data, variables, includes,
419 422 # in other words, you can't put a "dependencies" section inside a "post"
420 423 # conditional within a target.
421 424
  425 + dependencies = []
422 426 if 'targets' in build_file_data:
423 427 for target_dict in build_file_data['targets']:
424 428 if 'dependencies' not in target_dict:
425 429 continue
426 430 for dependency in target_dict['dependencies']:
427   - other_build_file = \
428   - gyp.common.ResolveTarget(build_file_path, dependency, None)[0]
429   - try:
430   - LoadTargetBuildFile(other_build_file, data, aux_data, variables,
431   - includes, depth, check)
432   - except Exception, e:
433   - gyp.common.ExceptionAppend(
434   - e, 'while loading dependencies of %s' % build_file_path)
435   - raise
  431 + dependencies.append(
  432 + gyp.common.ResolveTarget(build_file_path, dependency, None)[0])
  433 +
  434 + if load_dependencies:
  435 + for dependency in dependencies:
  436 + try:
  437 + LoadTargetBuildFile(dependency, data, aux_data, variables,
  438 + includes, depth, check, load_dependencies)
  439 + except Exception, e:
  440 + gyp.common.ExceptionAppend(
  441 + e, 'while loading dependencies of %s' % build_file_path)
  442 + raise
  443 + else:
  444 + return (build_file_path, dependencies)
  445 +
  446 +
  447 +def CallLoadTargetBuildFile(global_flags,
  448 + build_file_path, data,
  449 + aux_data, variables,
  450 + includes, depth, check):
  451 + """Wrapper around LoadTargetBuildFile for parallel processing.
  452 +
  453 + This wrapper is used when LoadTargetBuildFile is executed in
  454 + a worker process.
  455 + """
  456 +
  457 + # Apply globals so that the worker process behaves the same.
  458 + for key, value in global_flags.iteritems():
  459 + globals()[key] = value
  460 +
  461 + # Save the keys so we can return data that changed.
  462 + data_keys = set(data)
  463 + aux_data_keys = set(aux_data)
436 464
437   - return data
  465 + result = LoadTargetBuildFile(build_file_path, data,
  466 + aux_data, variables,
  467 + includes, depth, check, False)
  468 + if not result:
  469 + return result
  470 +
  471 + (build_file_path, dependencies) = result
  472 +
  473 + data_out = {}
  474 + for key in data:
  475 + if key == 'target_build_files':
  476 + continue
  477 + if key not in data_keys:
  478 + data_out[key] = data[key]
  479 + aux_data_out = {}
  480 + for key in aux_data:
  481 + if key not in aux_data_keys:
  482 + aux_data_out[key] = aux_data[key]
  483 +
  484 + # This gets serialized and sent back to the main process via a pipe.
  485 + # It's handled in LoadTargetBuildFileCallback.
  486 + return (build_file_path,
  487 + data_out,
  488 + aux_data_out,
  489 + dependencies)
  490 +
  491 +
  492 +class ParallelState(object):
  493 + """Class to keep track of state when processing input files in parallel.
  494 +
  495 + If build files are loaded in parallel, use this to keep track of
  496 + state during farming out and processing parallel jobs. It's stored
  497 + in a global so that the callback function can have access to it.
  498 + """
  499 +
  500 + def __init__(self):
  501 + # The multiprocessing pool.
  502 + self.pool = None
  503 + # The condition variable used to protect this object and notify
  504 + # the main loop when there might be more data to process.
  505 + self.condition = None
  506 + # The "data" dict that was passed to LoadTargetBuildFileParallel
  507 + self.data = None
  508 + # The "aux_data" dict that was passed to LoadTargetBuildFileParallel
  509 + self.aux_data = None
  510 + # The number of parallel calls outstanding; decremented when a response
  511 + # was received.
  512 + self.pending = 0
  513 + # The set of all build files that have been scheduled, so we don't
  514 + # schedule the same one twice.
  515 + self.scheduled = set()
  516 + # A list of dependency build file paths that haven't been scheduled yet.
  517 + self.dependencies = []
  518 +
  519 + def LoadTargetBuildFileCallback(self, result):
  520 + """Handle the results of running LoadTargetBuildFile in another process.
  521 + """
  522 + (build_file_path0, data0, aux_data0, dependencies0) = result
  523 + self.condition.acquire()
  524 + self.data['target_build_files'].add(build_file_path0)
  525 + for key in data0:
  526 + self.data[key] = data0[key]
  527 + for key in aux_data0:
  528 + self.aux_data[key] = aux_data0[key]
  529 + for new_dependency in dependencies0:
  530 + if new_dependency not in self.scheduled:
  531 + self.scheduled.add(new_dependency)
  532 + self.dependencies.append(new_dependency)
  533 + self.pending -= 1
  534 + self.condition.notify()
  535 + self.condition.release()
  536 +
  537 +
  538 +def LoadTargetBuildFileParallel(build_file_path, data, aux_data,
  539 + variables, includes, depth, check):
  540 + global parallel_state
  541 + parallel_state = ParallelState()
  542 + parallel_state.condition = threading.Condition()
  543 + parallel_state.dependencies = [build_file_path]
  544 + parallel_state.scheduled = set([build_file_path])
  545 + parallel_state.pending = 0
  546 + parallel_state.data = data
  547 + parallel_state.aux_data = aux_data
  548 +
  549 + parallel_state.condition.acquire()
  550 + while parallel_state.dependencies or parallel_state.pending:
  551 + if not parallel_state.dependencies:
  552 + parallel_state.condition.wait()
  553 + continue
  554 +
  555 + dependency = parallel_state.dependencies.pop()
  556 +
  557 + parallel_state.pending += 1
  558 + data_in = {}
  559 + data_in['target_build_files'] = data['target_build_files']
  560 + aux_data_in = {}
  561 + global_flags = {
  562 + 'path_sections': globals()['path_sections'],
  563 + 'non_configuration_keys': globals()['non_configuration_keys'],
  564 + 'absolute_build_file_paths': globals()['absolute_build_file_paths'],
  565 + 'multiple_toolsets': globals()['multiple_toolsets']}
  566 +
  567 + if not parallel_state.pool:
  568 + parallel_state.pool = multiprocessing.Pool(8)
  569 + parallel_state.pool.apply_async(
  570 + CallLoadTargetBuildFile,
  571 + args = (global_flags, dependency,
  572 + data_in, aux_data_in,
  573 + variables, includes, depth, check),
  574 + callback = parallel_state.LoadTargetBuildFileCallback)
  575 +
  576 + parallel_state.condition.release()
438 577
439 578
440 579 # Look for the bracket that matches the first bracket seen in a
@@ -2335,7 +2474,7 @@ def VerifyNoCollidingTargets(targets):
2335 2474
2336 2475
2337 2476 def Load(build_files, variables, includes, depth, generator_input_info, check,
2338   - circular_check):
  2477 + circular_check, parallel):
2339 2478 # Set up path_sections and non_configuration_keys with the default data plus
2340 2479 # the generator-specifc data.
2341 2480 global path_sections
@@ -2376,8 +2515,13 @@ def Load(build_files, variables, includes, depth, generator_input_info, check,
2376 2515 # used as keys to the data dict and for references between input files.
2377 2516 build_file = os.path.normpath(build_file)
2378 2517 try:
2379   - LoadTargetBuildFile(build_file, data, aux_data, variables, includes,
2380   - depth, check)
  2518 + if parallel:
  2519 + print >>sys.stderr, 'Using parallel processing (experimental).'
  2520 + LoadTargetBuildFileParallel(build_file, data, aux_data,
  2521 + variables, includes, depth, check)
  2522 + else:
  2523 + LoadTargetBuildFile(build_file, data, aux_data,
  2524 + variables, includes, depth, check, True)
2381 2525 except Exception, e:
2382 2526 gyp.common.ExceptionAppend(e, 'while trying to load %s' % build_file)
2383 2527 raise

0 comments on commit 9ab98f9

Please sign in to comment.
Something went wrong with that request. Please try again.