Skip to content

Commit

Permalink
TST: placement of network error catching in s3 tests (pandas-dev#19645)
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback authored and harisbal committed Feb 28, 2018
1 parent aa97648 commit 49a016b
Show file tree
Hide file tree
Showing 14 changed files with 114 additions and 59 deletions.
13 changes: 9 additions & 4 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,10 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
Returns
-------
a filepath_ or buffer or S3File instance, the encoding, the compression
tuple of ({a filepath_ or buffer or S3File instance},
encoding, str,
compression, str,
should_close, bool)
"""
filepath_or_buffer = _stringify_path(filepath_or_buffer)

Expand All @@ -194,7 +197,8 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
# Override compression based on Content-Encoding header
compression = 'gzip'
reader = BytesIO(req.read())
return reader, encoding, compression
req.close()
return reader, encoding, compression, True

if is_s3_url(filepath_or_buffer):
from pandas.io import s3
Expand All @@ -206,13 +210,13 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
if isinstance(filepath_or_buffer, (compat.string_types,
compat.binary_type,
mmap.mmap)):
return _expand_user(filepath_or_buffer), None, compression
return _expand_user(filepath_or_buffer), None, compression, False

if not is_file_like(filepath_or_buffer):
msg = "Invalid file path or buffer object type: {_type}"
raise ValueError(msg.format(_type=type(filepath_or_buffer)))

return filepath_or_buffer, None, compression
return filepath_or_buffer, None, compression, False


def file_path_to_url(path):
Expand Down Expand Up @@ -309,6 +313,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None,
is_text : boolean, default True
whether file/buffer is in text format (csv, json, etc.), or in binary
mode (pickle, etc.)
Returns
-------
f : file-like
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ def __init__(self, io, **kwds):
if _is_url(self._io):
io = _urlopen(self._io)
elif not isinstance(self.io, (ExcelFile, xlrd.Book)):
io, _, _ = get_filepath_or_buffer(self._io)
io, _, _, _ = get_filepath_or_buffer(self._io)

if engine == 'xlrd' and isinstance(io, xlrd.Book):
self.book = io
Expand Down
10 changes: 8 additions & 2 deletions pandas/io/json/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
"""

compression = _infer_compression(path_or_buf, compression)
filepath_or_buffer, _, compression = get_filepath_or_buffer(
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
path_or_buf, encoding=encoding, compression=compression,
)

Expand All @@ -419,7 +419,13 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
if chunksize:
return json_reader

return json_reader.read()
result = json_reader.read()
if should_close:
try:
filepath_or_buffer.close()
except: # noqa: flake8
pass
return result


class JsonReader(BaseIterator):
Expand Down
8 changes: 7 additions & 1 deletion pandas/io/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,14 +180,20 @@ def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs):
obj : type of object stored in file
"""
path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf)
path_or_buf, _, _, should_close = get_filepath_or_buffer(path_or_buf)
if iterator:
return Iterator(path_or_buf)

def read(fh):
l = list(unpack(fh, encoding=encoding, **kwargs))
if len(l) == 1:
return l[0]

if should_close:
try:
path_or_buf.close()
except: # noqa: flake8
pass
return l

# see if we have an actual file
Expand Down
30 changes: 19 additions & 11 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def write(self, df, path, compression='snappy',
self.validate_dataframe(df)
if self._pyarrow_lt_070:
self._validate_write_lt_070(df)
path, _, _ = get_filepath_or_buffer(path, mode='wb')
path, _, _, _ = get_filepath_or_buffer(path, mode='wb')

if self._pyarrow_lt_060:
table = self.api.Table.from_pandas(df, timestamps_to_ms=True)
Expand All @@ -121,13 +121,21 @@ def write(self, df, path, compression='snappy',
coerce_timestamps=coerce_timestamps, **kwargs)

def read(self, path, columns=None, **kwargs):
path, _, _ = get_filepath_or_buffer(path)
path, _, _, should_close = get_filepath_or_buffer(path)
if self._pyarrow_lt_070:
return self.api.parquet.read_pandas(path, columns=columns,
**kwargs).to_pandas()
kwargs['use_pandas_metadata'] = True
return self.api.parquet.read_table(path, columns=columns,
**kwargs).to_pandas()
result = self.api.parquet.read_pandas(path, columns=columns,
**kwargs).to_pandas()
else:
kwargs['use_pandas_metadata'] = True
result = self.api.parquet.read_table(path, columns=columns,
**kwargs).to_pandas()
if should_close:
try:
path.close()
except: # noqa: flake8
pass

return result

def _validate_write_lt_070(self, df):
# Compatibility shim for pyarrow < 0.7.0
Expand Down Expand Up @@ -199,11 +207,11 @@ def write(self, df, path, compression='snappy', **kwargs):
# path is s3:// so we need to open the s3file in 'wb' mode.
# TODO: Support 'ab'

path, _, _ = get_filepath_or_buffer(path, mode='wb')
path, _, _, _ = get_filepath_or_buffer(path, mode='wb')
# And pass the opened s3file to the fastparquet internal impl.
kwargs['open_with'] = lambda path, _: path
else:
path, _, _ = get_filepath_or_buffer(path)
path, _, _, _ = get_filepath_or_buffer(path)

with catch_warnings(record=True):
self.api.write(path, df,
Expand All @@ -214,13 +222,13 @@ def read(self, path, columns=None, **kwargs):
# When path is s3:// an S3File is returned.
# We need to retain the original path(str) while also
# pass the S3File().open function to fsatparquet impl.
s3, _, _ = get_filepath_or_buffer(path)
s3, _, _, should_close = get_filepath_or_buffer(path)
try:
parquet_file = self.api.ParquetFile(path, open_with=s3.s3.open)
finally:
s3.close()
else:
path, _, _ = get_filepath_or_buffer(path)
path, _, _, _ = get_filepath_or_buffer(path)
parquet_file = self.api.ParquetFile(path)

return parquet_file.to_pandas(columns=columns, **kwargs)
Expand Down
9 changes: 8 additions & 1 deletion pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ def _read(filepath_or_buffer, kwds):

compression = kwds.get('compression')
compression = _infer_compression(filepath_or_buffer, compression)
filepath_or_buffer, _, compression = get_filepath_or_buffer(
filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer(
filepath_or_buffer, encoding, compression)
kwds['compression'] = compression

Expand All @@ -439,6 +439,13 @@ def _read(filepath_or_buffer, kwds):
data = parser.read(nrows)
finally:
parser.close()

if should_close:
try:
filepath_or_buffer.close()
except: # noqa: flake8
pass

return data


Expand Down
4 changes: 2 additions & 2 deletions pandas/io/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
fs = s3fs.S3FileSystem(anon=False)
try:
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
except (OSError, NoCredentialsError):
except (compat.FileNotFoundError, NoCredentialsError):
# boto3 has troubles when trying to access a public file
# when credentialed...
# An OSError is raised if you have credentials, but they
Expand All @@ -36,4 +36,4 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
# for that bucket.
fs = s3fs.S3FileSystem(anon=True)
filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode)
return filepath_or_buffer, None, compression
return filepath_or_buffer, None, compression, True
2 changes: 1 addition & 1 deletion pandas/io/sas/sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def __init__(self, path_or_buf, index=None, convert_dates=True,
self._current_row_on_page_index = 0
self._current_row_in_file_index = 0

self._path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf)
self._path_or_buf, _, _, _ = get_filepath_or_buffer(path_or_buf)
if isinstance(self._path_or_buf, compat.string_types):
self._path_or_buf = open(self._path_or_buf, 'rb')
self.handle = self._path_or_buf
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/sas/sas_xport.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,8 @@ def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1',
self._chunksize = chunksize

if isinstance(filepath_or_buffer, str):
filepath_or_buffer, encoding, compression = get_filepath_or_buffer(
(filepath_or_buffer, encoding,
compression, should_close) = get_filepath_or_buffer(
filepath_or_buffer, encoding=encoding)

if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)):
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,7 +988,7 @@ def __init__(self, path_or_buf, convert_dates=True,
self._native_byteorder = _set_endianness(sys.byteorder)
path_or_buf = _stringify_path(path_or_buf)
if isinstance(path_or_buf, str):
path_or_buf, encoding, _ = get_filepath_or_buffer(
path_or_buf, encoding, _, should_close = get_filepath_or_buffer(
path_or_buf, encoding=self._default_encoding
)

Expand Down
53 changes: 31 additions & 22 deletions pandas/tests/io/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,34 @@

import pytest
from pandas.io.parsers import read_table
from pandas.util import testing as tm

HERE = os.path.dirname(__file__)

@pytest.fixture
def parser_data(request):
return os.path.join(tm.get_data_path(), '..', 'parser', 'data')

@pytest.fixture(scope='module')
def tips_file():

@pytest.fixture
def tips_file(parser_data):
"""Path to the tips dataset"""
return os.path.join(HERE, 'parser', 'data', 'tips.csv')
return os.path.join(parser_data, 'tips.csv')


@pytest.fixture(scope='module')
def jsonl_file():
@pytest.fixture
def jsonl_file(parser_data):
"""Path a JSONL dataset"""
return os.path.join(HERE, 'parser', 'data', 'items.jsonl')
return os.path.join(parser_data, 'items.jsonl')


@pytest.fixture(scope='module')
def salaries_table():
@pytest.fixture
def salaries_table(parser_data):
"""DataFrame with the salaries dataset"""
path = os.path.join(HERE, 'parser', 'data', 'salaries.csv')
path = os.path.join(parser_data, 'salaries.csv')
return read_table(path)


@pytest.fixture(scope='module')
@pytest.fixture
def s3_resource(tips_file, jsonl_file):
"""Fixture for mocking S3 interaction.
Expand All @@ -41,8 +45,8 @@ def s3_resource(tips_file, jsonl_file):
is yielded by the fixture.
"""
pytest.importorskip('s3fs')
boto3 = pytest.importorskip('boto3')
moto = pytest.importorskip('moto')
moto.mock_s3().start()

test_s3_files = [
('tips.csv', tips_file),
Expand All @@ -58,17 +62,22 @@ def add_tips_files(bucket_name):
Key=s3_key,
Body=f)

boto3 = pytest.importorskip('boto3')
# see gh-16135
bucket = 'pandas-test'
try:

conn = boto3.resource("s3", region_name="us-east-1")
conn.create_bucket(Bucket=bucket)
add_tips_files(bucket)
s3 = moto.mock_s3()
s3.start()

conn.create_bucket(Bucket='cant_get_it', ACL='private')
add_tips_files('cant_get_it')
# see gh-16135
bucket = 'pandas-test'
conn = boto3.resource("s3", region_name="us-east-1")

yield conn
conn.create_bucket(Bucket=bucket)
add_tips_files(bucket)

moto.mock_s3().stop()
conn.create_bucket(Bucket='cant_get_it', ACL='private')
add_tips_files('cant_get_it')
yield conn
except: # noqa: flake8
pytest.skip("failure to use s3 resource")
finally:
s3.stop()
1 change: 0 additions & 1 deletion pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1039,7 +1039,6 @@ def test_read_inline_jsonl(self):
assert_frame_equal(result, expected)

def test_read_s3_jsonl(self, s3_resource):
pytest.importorskip('s3fs')
# GH17200

result = read_json('s3n://pandas-test/items.jsonl', lines=True)
Expand Down
Loading

0 comments on commit 49a016b

Please sign in to comment.