Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve file_size flexibility #440

Merged
merged 5 commits into from
Apr 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 30 additions & 14 deletions great_expectations/data_asset/file_data_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ def expect_file_hash_to_equal(self, value, hash_alg='md5', result_format=None,
return {"success":success}

@DataAsset.expectation(["minsize", "maxsize"])
def expect_file_size_to_be_between(self, minsize, maxsize, result_format=None,
def expect_file_size_to_be_between(self, minsize=0, maxsize=None, result_format=None,
include_config=False, catch_exceptions=None,
meta=None):

Expand Down Expand Up @@ -421,31 +421,47 @@ def expect_file_size_to_be_between(self, minsize, maxsize, result_format=None,
:ref:`include_config`, :ref:`catch_exceptions`, and :ref:`meta`.
"""

success = False
try:
size = os.path.getsize(self._path)
except OSError:
raise

if not isinstance(minsize,int):
raise TypeError('minsize must be an integer')

if not isinstance(maxsize,int):
raise TypeError('maxsize must be an integer')
# We want string or float or int versions of numbers, but
# they must be representable as clean integers.
try:
if not float(minsize).is_integer():
raise ValueError('minsize must be an integer')
minsize = int(float(minsize))

if maxsize is not None and not float(maxsize).is_integer():
raise ValueError('maxsize must be an integer')
elif maxsize is not None:
maxsize = int(float(maxsize))
except TypeError:
raise

if minsize < 0:
raise ValueError('minsize must be greater than of equal to 0')
raise ValueError('minsize must be greater than or equal to 0')

if maxsize < 0:
raise ValueError('maxsize must be greater than of equal to 0')
if maxsize is not None and maxsize < 0:
raise ValueError('maxsize must be greater than or equal to 0')

if minsize > maxsize:
raise ValueError('maxsize must be greater than of equal to minsize')
if maxsize is not None and minsize > maxsize:
raise ValueError('maxsize must be greater than or equal to minsize')

if (size >= minsize) and (size <= maxsize):
if maxsize is None and size >= minsize:
success = True
elif (size >= minsize) and (size <= maxsize):
success = True
else:
success = False

return {"success":success}
return {
"success": success,
"details": {
"filesize": size
}
}

@DataAsset.expectation(["filepath"])
def expect_file_to_exist(self, filepath=None, result_format=None, include_config=False,
Expand Down
8 changes: 4 additions & 4 deletions tests/test_filedata_asset_expectations.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,12 @@ def test_expect_file_size_to_be_between():
titanic_file = ge.data_asset.FileDataAsset(titanic_path)

# Test minsize not an integer
with pytest.raises(TypeError):
titanic_file.expect_file_size_to_be_between('0', 10000)
with pytest.raises(ValueError):
titanic_file.expect_file_size_to_be_between('a', 10000)

# Test maxsize not an integer
with pytest.raises(TypeError):
titanic_file.expect_file_size_to_be_between(0, '10000')
with pytest.raises(ValueError):
titanic_file.expect_file_size_to_be_between(0, '10000a')

# Test minsize less than 0
with pytest.raises(ValueError):
Expand Down