Skip to content

Commit

Permalink
added tests that simulate newer hadoop ls behavior
Browse files Browse the repository at this point in the history
  • Loading branch information
David Marin committed Nov 16, 2012
1 parent 5cb2e46 commit 443b837
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 6 deletions.
14 changes: 13 additions & 1 deletion tests/fs/test_hadoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,14 @@ def test_ls_s3n(self):
self.assertEqual(list(self.fs.ls('s3n://bucket/')),
['s3n://bucket/f'])

def test_spaces(self):
def test_single_space(self):
self.make_mock_file('foo bar')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///foo bar'])

def test_double_space(self):
self.make_mock_file('foo bar')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///foo bar'])

def test_cat_uncompressed(self):
# mockhadoop doesn't support compressed files, so we won't test for it.
# this is only a sanity check anyway.
Expand Down Expand Up @@ -124,3 +128,11 @@ def test_rm(self):
def test_touchz(self):
# mockhadoop doesn't implement this.
pass


class NewerHadoopFSTestCase(HadoopFSTestCase):

def set_up_mock_hadoop(self):
super(NewerHadoopFSTestCase, self).set_up_mock_hadoop()

self.env['MOCK_HADOOP_LS_RETURNS_FULL_URIS'] = '1'
20 changes: 15 additions & 5 deletions tests/mockhadoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
fake job output (to add output, use add_mock_output())
MOCK_HADOOP_CMD_LOG -- optional: if this is set, append arguments passed
to the fake hadoop binary to this script, one line per invocation
MOCK_HADOOP_LS_RETURNS_FULL_URIS -- optional: if true, ls returns full URIs
when passed URIs.
This is designed to run as: python -m tests.mockhadoop <hadoop args>
Expand All @@ -40,6 +42,7 @@
import stat
import sys

from mrjob.compat import version_gte
from mrjob.parse import urlparse


Expand Down Expand Up @@ -198,8 +201,9 @@ def hadoop_fs_lsr(stdout, stderr, environ, *args):
"""Implements hadoop fs -lsr."""
hdfs_path_globs = args or ['']

def ls_line(real_path, scheme):
def ls_line(real_path, scheme, netloc):
hdfs_path = real_path_to_hdfs_path(real_path, environ)

# we could actually implement ls here, but mrjob only cares about
# the path
if os.path.isdir(real_path):
Expand All @@ -213,13 +217,19 @@ def ls_line(real_path, scheme):
else:
user_and_group = 'dave supergroup'

# newer Hadoop returns fully qualified URIs (see Pull Request #577)
if scheme and environ.get('MOCK_HADOOP_LS_RETURNS_FULL_URIS'):
hdfs_path = '%s://%s%s' % (scheme, netloc, hdfs_path)

return (
'%srwxrwxrwx - %s 18321 2010-10-01 15:16 %s' %
(file_type, user_and_group, hdfs_path))

failed = False
for hdfs_path_glob in hdfs_path_globs:
scheme = urlparse(hdfs_path_glob).scheme
parsed = urlparse(hdfs_path_glob)
scheme = parsed.scheme
netloc = parsed.netloc

real_path_glob = hdfs_path_to_real_path(hdfs_path_glob, environ)
real_paths = glob.glob(real_path_glob)
Expand All @@ -232,12 +242,12 @@ def ls_line(real_path, scheme):
for real_path in real_paths:
if os.path.isdir(real_path):
for dirpath, dirnames, filenames in os.walk(real_path):
print >> stdout, ls_line(dirpath, scheme)
print >> stdout, ls_line(dirpath, scheme, netloc)
for filename in filenames:
path = os.path.join(dirpath, filename)
print >> stdout, ls_line(path, scheme)
print >> stdout, ls_line(path, scheme, netloc)
else:
print >> stdout, ls_line(real_path, scheme)
print >> stdout, ls_line(real_path, scheme, netloc)

if failed:
return -1
Expand Down

0 comments on commit 443b837

Please sign in to comment.