Skip to content

Commit

Permalink
Merge pull request #1760 from davidmarin/google-bootstrap-dir
Browse files Browse the repository at this point in the history
run Dataproc bootstrap scripts in /tmp/mrjob (fixes #1601)
  • Loading branch information
David Marin committed Apr 30, 2018
2 parents e472b69 + 647ae76 commit 8f34a7b
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 0 deletions.
10 changes: 10 additions & 0 deletions mrjob/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,11 @@ def _master_bootstrap_script_content(self, bootstrap):
out.extend(self._start_of_sh_script())
out.append('')

# for example, create a tmp dir and cd to it
if self._bootstrap_pre_commands():
out.extend(self._bootstrap_pre_commands())
out.append('')

# store $PWD
out.append('# store $PWD')
out.append('__mrjob_PWD=$PWD')
Expand Down Expand Up @@ -370,6 +375,11 @@ def _master_bootstrap_script_content(self, bootstrap):

return out

def _bootstrap_pre_commands(self):
"""A list of hard-coded commands to run at the beginning of the
bootstrap script. Currently used by dataproc to cd into a tmp dir."""
return []

def _start_of_sh_script(self):
"""Return a list of lines (without trailing newlines) containing the
shell script shebang and pre-commands."""
Expand Down
7 changes: 7 additions & 0 deletions mrjob/dataproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,6 +894,13 @@ def _store_cluster_info(self):
self._hadoop_version = map_version(
self._image_version, _DATAPROC_IMAGE_TO_HADOOP_VERSION)

def _bootstrap_pre_commands(self):
# don't run the bootstrap script in / (see #1601)
return [
'mkdir /tmp/mrjob',
'cd /tmp/mrjob',
]

### Bootstrapping ###

def _bootstrap_python(self):
Expand Down
5 changes: 5 additions & 0 deletions tests/test_dataproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,11 @@ def assertScriptDownloads(path, name=None):
# check scripts get run

# bootstrap

# see #1601
self.assertIn('mkdir /tmp/mrjob', lines)
self.assertIn('cd /tmp/mrjob', lines)

self.assertIn(' ' + PYTHON_BIN + ' $__mrjob_PWD/bar.py', lines)
self.assertIn(' $__mrjob_PWD/ohnoes.sh', lines)

Expand Down

0 comments on commit 8f34a7b

Please sign in to comment.