Permalink
Browse files

added tests that simulate newer hadoop ls behavior

  • Loading branch information...
1 parent 5cb2e46 commit 443b8378da2b10b361b8f06f75c5e60265eac1d3 @davidmarin davidmarin committed Nov 16, 2012
Showing with 28 additions and 6 deletions.
  1. +13 −1 tests/fs/test_hadoop.py
  2. +15 −5 tests/mockhadoop.py
@@ -75,10 +75,14 @@ def test_ls_s3n(self):
self.assertEqual(list(self.fs.ls('s3n://bucket/')),
['s3n://bucket/f'])
- def test_spaces(self):
+ def test_single_space(self):
self.make_mock_file('foo bar')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///foo bar'])
+ def test_double_space(self):
+ self.make_mock_file('foo bar')
+ self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///foo bar'])
+
def test_cat_uncompressed(self):
# mockhadoop doesn't support compressed files, so we won't test for it.
# this is only a sanity check anyway.
@@ -124,3 +128,11 @@ def test_rm(self):
def test_touchz(self):
# mockhadoop doesn't implement this.
pass
+
+
+class NewerHadoopFSTestCase(HadoopFSTestCase):
+
+ def set_up_mock_hadoop(self):
+ super(NewerHadoopFSTestCase, self).set_up_mock_hadoop()
+
+ self.env['MOCK_HADOOP_LS_RETURNS_FULL_URIS'] = '1'
View
@@ -22,6 +22,8 @@
fake job output (to add output, use add_mock_output())
MOCK_HADOOP_CMD_LOG -- optional: if this is set, append arguments passed
to the fake hadoop binary to this script, one line per invocation
+MOCK_HADOOP_LS_RETURNS_FULL_URIS -- optional: if true, ls returns full URIs
+when passed URIs.
This is designed to run as: python -m tests.mockhadoop <hadoop args>
@@ -40,6 +42,7 @@
import stat
import sys
+from mrjob.compat import version_gte
from mrjob.parse import urlparse
@@ -198,8 +201,9 @@ def hadoop_fs_lsr(stdout, stderr, environ, *args):
"""Implements hadoop fs -lsr."""
hdfs_path_globs = args or ['']
- def ls_line(real_path, scheme):
+ def ls_line(real_path, scheme, netloc):
hdfs_path = real_path_to_hdfs_path(real_path, environ)
+
# we could actually implement ls here, but mrjob only cares about
# the path
if os.path.isdir(real_path):
@@ -213,13 +217,19 @@ def ls_line(real_path, scheme):
else:
user_and_group = 'dave supergroup'
+ # newer Hadoop returns fully qualified URIs (see Pull Request #577)
+ if scheme and environ.get('MOCK_HADOOP_LS_RETURNS_FULL_URIS'):
+ hdfs_path = '%s://%s%s' % (scheme, netloc, hdfs_path)
+
return (
'%srwxrwxrwx - %s 18321 2010-10-01 15:16 %s' %
(file_type, user_and_group, hdfs_path))
failed = False
for hdfs_path_glob in hdfs_path_globs:
- scheme = urlparse(hdfs_path_glob).scheme
+ parsed = urlparse(hdfs_path_glob)
+ scheme = parsed.scheme
+ netloc = parsed.netloc
real_path_glob = hdfs_path_to_real_path(hdfs_path_glob, environ)
real_paths = glob.glob(real_path_glob)
@@ -232,12 +242,12 @@ def ls_line(real_path, scheme):
for real_path in real_paths:
if os.path.isdir(real_path):
for dirpath, dirnames, filenames in os.walk(real_path):
- print >> stdout, ls_line(dirpath, scheme)
+ print >> stdout, ls_line(dirpath, scheme, netloc)
for filename in filenames:
path = os.path.join(dirpath, filename)
- print >> stdout, ls_line(path, scheme)
+ print >> stdout, ls_line(path, scheme, netloc)
else:
- print >> stdout, ls_line(real_path, scheme)
+ print >> stdout, ls_line(real_path, scheme, netloc)
if failed:
return -1

0 comments on commit 443b837

Please sign in to comment.