Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge pull request #466 from irskep/mock_ftw

Make tests twice as fast using mock
  • Loading branch information...
commit fdf8ad4a88e7dce336942b716dab295d9b0e388b 2 parents 3f1bacf + a08b691
David Marin authored
View
2  setup.py
@@ -10,7 +10,7 @@
],
'provides': ['mrjob'],
'test_suite': 'tests.suite.load_tests',
- 'tests_require': ['unittest2'],
+ 'tests_require': ['unittest2', 'mock'],
'zip_safe': False, # so that we can bootstrap mrjob
}
except ImportError:
View
4 tests/mr_counting_job.py
@@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Trivial multi-step job for testing counter behavior"""
-from mrjob.job import MRJob
+from tests.mr_testing_job import MRTestingJob
-class MRCountingJob(MRJob):
+class MRCountingJob(MRTestingJob):
def steps(self):
return [self.mr(self.mapper),
View
4 tests/mr_exit_42_job.py
@@ -1,10 +1,10 @@
"""Job that exits with return code 42, without creating a traceback"""
import os
-from mrjob.job import MRJob
+from tests.mr_testing_job import MRTestingJob
-class MRExit42Job(MRJob):
+class MRExit42Job(MRTestingJob):
def mapper_final(self):
os._exit(42)
View
4 tests/mr_hadoop_format_job.py
@@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Trivial two-step job which sets hadoop input and output format."""
-from mrjob.job import MRJob
+from tests.mr_testing_job import MRTestingJob
-class MRHadoopFormatJob(MRJob):
+class MRHadoopFormatJob(MRTestingJob):
HADOOP_INPUT_FORMAT = 'mapred.FooInputFormat'
HADOOP_OUTPUT_FORMAT = 'mapred.BarOutputFormat'
View
4 tests/mr_job_where_are_you.py
@@ -21,10 +21,10 @@
warnings.simplefilter('ignore')
import mrjob
-from mrjob.job import MRJob
+from tests.mr_testing_job import MRTestingJob
-class MRJobWhereAreYou(MRJob):
+class MRJobWhereAreYou(MRTestingJob):
"""Output what directory the mrjob library is in."""
def mapper_final(self):
View
4 tests/mr_nomapper_multistep.py
@@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-from mrjob.job import MRJob
+from tests.mr_testing_job import MRTestingJob
-class MRNoMapper(MRJob):
+class MRNoMapper(MRTestingJob):
def mapper(self, key, value):
yield key, value
View
4 tests/mr_test_cmdenv.py
@@ -13,10 +13,10 @@
# limitations under the License.
import os
-from mrjob.job import MRJob
+from tests.mr_testing_job import MRTestingJob
-class MRTestCmdenv(MRJob):
+class MRTestCmdenv(MRTestingJob):
"""cmdenv test."""
def mapper(self, key, value):
# try adding something
View
4 tests/mr_test_jobconf.py
@@ -26,8 +26,8 @@
# limitations under the License.
"""Tests for JobConf Environment Variables
"""
-from mrjob.job import MRJob
from mrjob.compat import get_jobconf_value
+from tests.mr_testing_job import MRTestingJob
JOBCONF_LIST = [
'mapreduce.job.id',
@@ -45,7 +45,7 @@
]
-class MRTestJobConf(MRJob):
+class MRTestJobConf(MRTestingJob):
def mapper(self, _, line):
for jobconf in JOBCONF_LIST:
View
23 tests/mr_testing_job.py
@@ -0,0 +1,23 @@
+# Copyright 2012 Yelp and Contributors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mrjob.job import MRJob
+
+
+class MRTestingJob(MRJob):
+ """Simple optimization to make our test cases run faster"""
+
+ def make_runner(self, *args, **kwargs):
+ runner = super(MRTestingJob, self).make_runner(*args, **kwargs)
+ runner._steps = self._steps_desc()
+ return runner
View
4 tests/mr_tower_of_powers.py
@@ -19,11 +19,11 @@
import os
-from mrjob.job import MRJob
from mrjob.protocol import JSONValueProtocol
+from tests.mr_testing_job import MRTestingJob
-class MRTowerOfPowers(MRJob):
+class MRTowerOfPowers(MRTestingJob):
INPUT_PROTOCOL = JSONValueProtocol
OUTPUT_PROTOCOL = JSONValueProtocol
View
4 tests/mr_two_step_job.py
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Trivial multi-step job, useful for testing runners."""
-from mrjob.job import MRJob
+from tests.mr_testing_job import MRTestingJob
try:
import simplejson as json # preferred because of C speedups
@@ -46,7 +46,7 @@ def write(self, key, value):
return '%s\t%s' % (json.dumps(key), json.dumps(value))
-class MRTwoStepJob(MRJob):
+class MRTwoStepJob(MRTestingJob):
INPUT_PROTOCOL = CustomRawValueProtocol
INTERNAL_PROTOCOL = CustomJSONProtocol
View
4 tests/mr_verbose_job.py
@@ -15,10 +15,10 @@
"""We use this to test jobs that emit a large amount of stderr."""
import sys
-from mrjob.job import MRJob
+from tests.mr_testing_job import MRTestingJob
-class MRVerboseJob(MRJob):
+class MRVerboseJob(MRTestingJob):
def mapper_final(self):
# the UNIX pipe buffer can hold 65536 bytes, so this should
View
5 tests/mr_word_count.py
@@ -13,14 +13,15 @@
# limitations under the License.
"""Tests for JobConf Environment Variables
"""
-from mrjob.job import MRJob
import re
+
from mrjob.compat import get_jobconf_value
+from tests.mr_testing_job import MRTestingJob
WORD_RE = re.compile(r"[\w']+")
-class MRWordCount(MRJob):
+class MRWordCount(MRTestingJob):
""" Trivial Job that returns the number of words in each input file
"""
def mapper(self, _, line):
View
31 tests/test_emr.py
@@ -29,6 +29,9 @@
import shutil
from StringIO import StringIO
import tempfile
+import time
+
+from mock import patch
try:
import unittest2 as unittest
@@ -80,10 +83,37 @@
class MockEMRAndS3TestCase(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ cls.fake_mrjob_tgz_path = tempfile.mkstemp(
+ prefix='fake_mrjob_', suffix='.tar.gz')[1]
+
+ @classmethod
+ def tearDownClass(cls):
+ if os.path.exists(cls.fake_mrjob_tgz_path):
+ os.remove(cls.fake_mrjob_tgz_path)
+
def setUp(self):
self.make_mrjob_conf()
self.sandbox_boto()
+ def simple_patch(obj, attr, side_effect=None, autospec=False):
+ patcher = patch.object(obj, attr, side_effect=side_effect,
+ autospec=autospec)
+ patcher.start()
+ self.addCleanup(patcher.stop)
+
+ def fake_create_mrjob_tar_gz(mocked_self, *args, **kwargs):
+ mocked_self._mrjob_tar_gz_path = self.fake_mrjob_tgz_path
+ return self.fake_mrjob_tgz_path
+
+ simple_patch(EMRJobRunner, '_create_mrjob_tar_gz',
+ fake_create_mrjob_tar_gz, autospec=True)
+
+ simple_patch(EMRJobRunner, '_wait_for_s3_eventual_consistency')
+ simple_patch(EMRJobRunner, '_wait_for_job_flow_termination')
+ simple_patch(time, 'sleep')
+
def tearDown(self):
self.unsandbox_boto()
self.rm_mrjob_conf()
@@ -250,7 +280,6 @@ def test_end_to_end(self):
# on real EMR.
self.assertEqual(runner._opts['additional_emr_info'],
'{"key": "value"}')
-
runner.run()
for line in runner.stream_output():
View
4 tests/test_inline.py
@@ -28,10 +28,10 @@
import unittest
from mrjob.conf import dump_mrjob_conf
-from mrjob.job import MRJob
from mrjob.inline import InlineMRJobRunner
from mrjob.protocol import JSONValueProtocol
from tests.mr_test_cmdenv import MRTestCmdenv
+from tests.mr_testing_job import MRTestingJob
from tests.mr_two_step_job import MRTwoStepJob
@@ -140,7 +140,7 @@ def test_cmdenv(self):
# this doesn't need to be in its own file because it'll be run inline
-class MRIncrementerJob(MRJob):
+class MRIncrementerJob(MRTestingJob):
"""A terribly silly way to add a positive integer to values."""
INPUT_PROTOCOL = JSONValueProtocol
View
27 tests/test_job.py
@@ -44,6 +44,7 @@
from mrjob.protocol import ReprProtocol
from mrjob.util import log_to_stream
from tests.mr_hadoop_format_job import MRHadoopFormatJob
+from tests.mr_testing_job import MRTestingJob
from tests.mr_tower_of_powers import MRTowerOfPowers
from tests.mr_two_step_job import MRTwoStepJob
from tests.mr_nomapper_multistep import MRNoMapper
@@ -73,7 +74,7 @@ def stepdict(mapper=_IDENTITY_MAPPER, reducer=None, combiner=None,
# These can't be invoked as a separate script, but they don't need to be
-class MRBoringJob(MRJob):
+class MRBoringJob(MRTestingJob):
"""It's a boring job, but somebody had to do it."""
def mapper(self, key, value):
yield(key, value)
@@ -91,7 +92,7 @@ def mapper_final(self):
yield('num_lines', self.num_lines)
-class MRInitJob(MRJob):
+class MRInitJob(MRTestingJob):
def __init__(self, *args, **kwargs):
super(MRInitJob, self).__init__(*args, **kwargs)
@@ -118,7 +119,7 @@ def combiner(self, key, values):
yield(None, sum(values) * self.combiner_multiplier)
-class MRInvisibleMapperJob(MRJob):
+class MRInvisibleMapperJob(MRTestingJob):
def mapper_init(self):
self.things = 0
@@ -130,7 +131,7 @@ def mapper_final(self):
yield None, self.things
-class MRInvisibleReducerJob(MRJob):
+class MRInvisibleReducerJob(MRTestingJob):
def reducer_init(self):
self.things = 0
@@ -142,7 +143,7 @@ def reducer_final(self):
yield None, self.things
-class MRInvisibleCombinerJob(MRJob):
+class MRInvisibleCombinerJob(MRTestingJob):
def mapper(self, key, value):
yield key, 1
@@ -405,7 +406,7 @@ def internal_protocol(self):
class MRBoringJob4(MRBoringJob):
INTERNAL_PROTOCOL = ReprProtocol
- class MRTrivialJob(MRJob):
+ class MRTrivialJob(MRTestingJob):
OUTPUT_PROTOCOL = ReprProtocol
def mapper(self, key, value):
@@ -549,17 +550,17 @@ class MRBoringJob2(MRBoringJob):
class MRBoringJob3(MRBoringJob):
DEFAULT_PROTOCOL = 'repr'
- class MRTrivialJob(MRJob):
+ class MRTrivialJob(MRTestingJob):
DEFAULT_OUTPUT_PROTOCOL = 'repr'
def mapper(self, key, value):
yield key, value
- class MRInconsistentJob(MRJob):
+ class MRInconsistentJob(MRTestingJob):
DEFAULT_INPUT_PROTOCOL = 'json'
INPUT_PROTOCOL = ReprProtocol
- class MRInconsistentJob2(MRJob):
+ class MRInconsistentJob2(MRTestingJob):
DEFAULT_INPUT_PROTOCOL = 'json'
def input_protocol(self):
@@ -753,11 +754,11 @@ def test_undecodable_output_strict(self):
class JobConfTestCase(unittest.TestCase):
- class MRJobConfJob(MRJob):
+ class MRJobConfJob(MRTestingJob):
JOBCONF = {'mapred.foo': 'garply',
'mapred.bar.bar.baz': 'foo'}
- class MRJobConfMethodJob(MRJob):
+ class MRJobConfMethodJob(MRTestingJob):
def jobconf(self):
return {'mapred.baz': 'bar'}
@@ -853,7 +854,7 @@ class HadoopFormatTestCase(unittest.TestCase):
# MRHadoopFormatJob is imported above
- class MRHadoopFormatMethodJob(MRJob):
+ class MRHadoopFormatMethodJob(MRTestingJob):
def hadoop_input_format(self):
return 'mapred.ReasonableInputFormat'
@@ -919,7 +920,7 @@ def test_deprecated_command_line_options_override_attrs(self):
class PartitionerTestCase(unittest.TestCase):
- class MRPartitionerJob(MRJob):
+ class MRPartitionerJob(MRTestingJob):
PARTITIONER = 'org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner'
def test_empty(self):
View
4 tests/test_local.py
@@ -387,6 +387,10 @@ def test_echo_as_steps_python_bin(self):
with mr_job.make_runner() as runner:
assert isinstance(runner, LocalMRJobRunner)
try:
+ # make_runner() populates _steps in the runner, so un-populate
+ # it here so that the runner actually tries to get the steps
+ # via subprocess
+ runner._steps = None
runner._get_steps()
assert False, 'Should throw exception'
except ValueError, ex:
Please sign in to comment.
Something went wrong with that request. Please try again.