Skip to content

Commit

Permalink
setting interpreter turns off boostrap_mrjob (fixes #1041)
Browse files Browse the repository at this point in the history
  • Loading branch information
David Marin committed Jun 18, 2015
1 parent 0b5a0ae commit 835585f
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 10 deletions.
6 changes: 4 additions & 2 deletions docs/guides/configs-all-runners.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@ options related to file uploading.
:switch: --bootstrap-mrjob, --no-bootstrap-mrjob
:type: boolean
:set: all
:default: ``True``
:default: (automatic)

Should we automatically tar up the mrjob library and install it when we run
job? Set this to ``False`` if you've already installed ``mrjob`` on your
job? By default, we do unless :mrjob:`interpreter` is set.

Set this to ``False`` if you've already installed ``mrjob`` on your
Hadoop cluster or install it by some other method.

.. mrjob-opt::
Expand Down
10 changes: 5 additions & 5 deletions mrjob/emr.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,7 @@ def _add_bootstrap_files_for_upload(self, persistent=False):
persistent -- set by make_persistent_job_flow()
"""
# lazily create mrjob.tar.gz
if self._opts['bootstrap_mrjob']:
if self._bootstrap_mrjob():
self._create_mrjob_tar_gz()
self._bootstrap_dir_mgr.add('file', self._mrjob_tar_gz_path)

Expand Down Expand Up @@ -2023,12 +2023,12 @@ def _create_master_bootstrap_script_if_needed(self):
# Also don't bother if we're not bootstrapping
if not (self._bootstrap or self._legacy_bootstrap or
self._opts['bootstrap_files'] or
self._opts['bootstrap_mrjob']):
self._bootstrap_mrjob()):
return

# create mrjob.tar.gz if we need it, and add commands to install it
mrjob_bootstrap = []
if self._opts['bootstrap_mrjob']:
if self._bootstrap_mrjob():
# _add_bootstrap_files_for_upload() should have done this
assert self._mrjob_tar_gz_path
path_dict = {
Expand Down Expand Up @@ -2500,10 +2500,10 @@ def _pool_hash(self):
self._bootstrap,
self._bootstrap_actions,
self._opts['bootstrap_cmds'],
self._opts['bootstrap_mrjob'],
self._bootstrap_mrjob(),
]

if self._opts['bootstrap_mrjob']:
if self._bootstrap_mrjob():
things_to_hash.append(mrjob.__version__)

things_json = json.dumps(things_to_hash, sort_keys=True)
Expand Down
10 changes: 8 additions & 2 deletions mrjob/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ def default_options(self):

return combine_dicts(super_opts, {
'base_tmp_dir': tempfile.gettempdir(),
'bootstrap_mrjob': True,
'check_input_paths': True,
'cleanup': ['ALL'],
'cleanup_on_failure': ['NONE'],
Expand Down Expand Up @@ -909,7 +908,7 @@ def _create_setup_wrapper_script(self, dest='setup-wrapper.sh'):

setup = self._setup

if self._opts['bootstrap_mrjob'] and self.BOOTSTRAP_MRJOB_IN_SETUP:
if self._bootstrap_mrjob() and self.BOOTSTRAP_MRJOB_IN_SETUP:
# patch setup to add mrjob.tar.gz to PYTYHONPATH
mrjob_tar_gz = self._create_mrjob_tar_gz()
path_dict = {'type': 'archive', 'name': None, 'path': mrjob_tar_gz}
Expand Down Expand Up @@ -1045,6 +1044,13 @@ def writeln(line=''):

return out

def _bootstrap_mrjob(self):
"""Should we bootstrap mrjob?"""
if self._opts['bootstrap_mrjob'] is None:
return self._opts['interpreter'] is None
else:
return bool(self._opts['bootstrap_mrjob'])

def _get_input_paths(self):
"""Get the paths to input files, dumping STDIN to a local
file if need be."""
Expand Down
2 changes: 1 addition & 1 deletion tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ def test_loading_boostrapped_mrjob_library(self):

with mr_job.make_runner() as runner:
# sanity check
self.assertEqual(runner.get_opts()['bootstrap_mrjob'], True)
self.assertEqual(runner._bootstrap_mrjob(), True)
local_tmp_dir = os.path.realpath(runner._get_local_tmp_dir())

runner.run()
Expand Down
21 changes: 21 additions & 0 deletions tests/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -836,3 +836,24 @@ def test_interpreter_overrides_steps_python_bin(self):
steps_python_bin=['python', '-v'])
self.assertEqual(runner._interpreter(), ['ruby'])
self.assertEqual(runner._interpreter(steps=True), ['ruby'])


class BootstrapMRJobTestCase(TestCase):
# this just tests _bootstrap_mrjob() (i.e. whether to bootstrap mrjob);
# actual testing of bootstrapping is in test_local

def test_default(self):
runner = MRJobRunner()
self.assertEqual(runner._bootstrap_mrjob(), True)

def test_no_bootstrap_mrjob(self):
runner = MRJobRunner(bootstrap_mrjob=False)
self.assertEqual(runner._bootstrap_mrjob(), False)

def test_interpreter(self):
runner = MRJobRunner(interpreter=['ruby'])
self.assertEqual(runner._bootstrap_mrjob(), False)

def test_bootstrap_mrjob_overrides_interpreter(self):
runner = MRJobRunner(interpreter=['ruby'], bootstrap_mrjob=True)
self.assertEqual(runner._bootstrap_mrjob(), True)

0 comments on commit 835585f

Please sign in to comment.