Skip to content

Commit

Permalink
Add "is_duplicate" attribute to HashAddress and make HashFS.put() ret…
Browse files Browse the repository at this point in the history
…urn HashAddress with "is_duplicate=True" when file with same hash already exists on disk.

Refs #1.
  • Loading branch information
dgilland committed Jul 1, 2015
1 parent 26de497 commit 244000b
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 5 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Expand Up @@ -3,6 +3,8 @@ Changelog


- Rename private method ``HashFS.copy`` to ``HashFS._copy``.
- Add ``is_duplicate`` attribute to ``HashAddress``.
- Make ``HashFS.put()`` return ``HashAddress`` with ``is_duplicate=True`` when file with same hash already exists on disk.


v0.4.0 (2015-06-03)
Expand Down
3 changes: 3 additions & 0 deletions README.rst
Expand Up @@ -104,6 +104,9 @@ Add content to the folder using either readable objects (e.g. ``StringIO``) or f
# The path relative to fs.root.
address.relpath
# Whether the file previously existed.
address.is_duplicate
Retrieving File Address
-----------------------
Expand Down
22 changes: 17 additions & 5 deletions hashfs/hashfs.py
Expand Up @@ -64,9 +64,9 @@ def put(self, file, extension=None):

with closing(stream):
id = self.computehash(stream)
filepath = self.copy(stream, id, extension)
filepath, is_duplicate = self._copy(stream, id, extension)

return HashAddress(id, self.relpath(filepath), filepath)
return HashAddress(id, self.relpath(filepath), filepath, is_duplicate)

def _copy(self, stream, id, extension=None):
"""Copy the contents of `stream` onto disk with an optional file
Expand All @@ -77,11 +77,14 @@ def _copy(self, stream, id, extension=None):

# Only copy file if it doesn't already exist.
if not os.path.isfile(filepath):
is_duplicate = False
with tmpfile(stream, self.fmode) as fname:
self.makepath(os.path.dirname(filepath))
shutil.copy(fname, filepath)
else:
is_duplicate = True

return filepath
return (filepath, is_duplicate)

def get(self, file):
"""Return :class:`HashAdress` from given id or path. If `file` does not
Expand Down Expand Up @@ -329,15 +332,24 @@ def __len__(self):
return self.count()


class HashAddress(namedtuple('HashAddress', ['id', 'relpath', 'abspath'])):
class HashAddress(namedtuple('HashAddress',
['id', 'relpath', 'abspath', 'is_duplicate'])):
"""File address containing file's path on disk and it's content hash ID.
Attributes:
id (str): Hash ID (hexdigest) of file contents.
relpath (str): Relative path location to :attr:`HashFS.root`.
abspath (str): Absoluate path location of file on disk.
is_duplicate (boolean, optional): Whether the hash address created was
a duplicate of a previously existing file. Can only be ``True``
after a put operation. Defaults to ``False``.
"""
pass
def __new__(cls, id, relpath, abspath, is_duplicate=False):
return super(HashAddress, cls).__new__(cls,
id,
relpath,
abspath,
is_duplicate)


class Stream(object):
Expand Down
11 changes: 11 additions & 0 deletions tests/test_hashfs.py
Expand Up @@ -96,6 +96,14 @@ def test_hashfs_put_file(fs, filepath):
assert fileobj.read() == to_bytes(filepath.read())


def test_hashfs_put_duplicate(fs, stringio):
address_a = fs.put(stringio)
address_b = fs.put(stringio)

assert not address_a.is_duplicate
assert address_b.is_duplicate


@pytest.mark.parametrize('extension', [
'txt',
'.txt',
Expand All @@ -108,6 +116,7 @@ def test_hashfs_put_extension(fs, stringio, extension):
assert_file_put(fs, address)
assert os.path.sep in address.abspath
assert os.path.splitext(address.abspath)[1].endswith(extension)
assert not address.is_duplicate


def test_hashfs_put_error(fs):
Expand All @@ -121,6 +130,7 @@ def test_hashfs_address(fs, stringio):
assert fs.root not in address.relpath
assert os.path.join(fs.root, address.relpath) == address.abspath
assert address.relpath.replace(os.sep, '') == address.id
assert not address.is_duplicate


@pytest.mark.parametrize('extension,address_attr', [
Expand Down Expand Up @@ -166,6 +176,7 @@ def test_hashfs_contains(fs, stringio):
def test_hashfs_get(fs, stringio):
address = fs.put(stringio)

assert not address.is_duplicate
assert fs.get(address.id) == address
assert fs.get(address.relpath) == address
assert fs.get(address.abspath) == address
Expand Down

0 comments on commit 244000b

Please sign in to comment.