Skip to content

Commit

Permalink
TST: Read bz2 files from S3 in PY2
Browse files Browse the repository at this point in the history
  • Loading branch information
dhimmel committed Dec 16, 2016
1 parent 09dcbff commit 8568aed
Showing 1 changed file with 24 additions and 43 deletions.
67 changes: 24 additions & 43 deletions pandas/io/tests/parser/test_network.py
Expand Up @@ -12,7 +12,6 @@

import pandas.util.testing as tm
from pandas import DataFrame
from pandas import compat
from pandas.io.parsers import read_csv, read_table


Expand Down Expand Up @@ -64,18 +63,12 @@ def setUp(self):
@tm.network
def test_parse_public_s3_bucket(self):
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
if comp == 'bz2' and compat.PY2:
# The Python 2 C parser can't read bz2 from S3.
self.assertRaises(ValueError, read_csv,
's3://pandas-test/tips.csv' + ext,
compression=comp)
else:
df = read_csv('s3://pandas-test/tips.csv' +
ext, compression=comp)
self.assertTrue(isinstance(df, DataFrame))
self.assertFalse(df.empty)
tm.assert_frame_equal(read_csv(
tm.get_data_path('tips.csv')), df)
df = read_csv('s3://pandas-test/tips.csv' +
ext, compression=comp)
self.assertTrue(isinstance(df, DataFrame))
self.assertFalse(df.empty)
tm.assert_frame_equal(read_csv(
tm.get_data_path('tips.csv')), df)

# Read public file from bucket with not-public contents
df = read_csv('s3://cant_get_it/tips.csv')
Expand Down Expand Up @@ -104,43 +97,31 @@ def test_parse_public_s3a_bucket(self):
@tm.network
def test_parse_public_s3_bucket_nrows(self):
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
if comp == 'bz2' and compat.PY2:
# The Python 2 C parser can't read bz2 from S3.
self.assertRaises(ValueError, read_csv,
's3://pandas-test/tips.csv' + ext,
compression=comp)
else:
df = read_csv('s3://pandas-test/tips.csv' +
ext, nrows=10, compression=comp)
self.assertTrue(isinstance(df, DataFrame))
self.assertFalse(df.empty)
tm.assert_frame_equal(read_csv(
tm.get_data_path('tips.csv')).iloc[:10], df)
df = read_csv('s3://pandas-test/tips.csv' +
ext, nrows=10, compression=comp)
self.assertTrue(isinstance(df, DataFrame))
self.assertFalse(df.empty)
tm.assert_frame_equal(read_csv(
tm.get_data_path('tips.csv')).iloc[:10], df)

@tm.network
def test_parse_public_s3_bucket_chunked(self):
# Read with a chunksize
chunksize = 5
local_tips = read_csv(tm.get_data_path('tips.csv'))
for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
if comp == 'bz2' and compat.PY2:
# The Python 2 C parser can't read bz2 from S3.
self.assertRaises(ValueError, read_csv,
's3://pandas-test/tips.csv' + ext,
compression=comp)
else:
df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
chunksize=chunksize, compression=comp)
self.assertEqual(df_reader.chunksize, chunksize)
for i_chunk in [0, 1, 2]:
# Read a couple of chunks and make sure we see them
# properly.
df = df_reader.get_chunk()
self.assertTrue(isinstance(df, DataFrame))
self.assertFalse(df.empty)
true_df = local_tips.iloc[
chunksize * i_chunk: chunksize * (i_chunk + 1)]
tm.assert_frame_equal(true_df, df)
df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
chunksize=chunksize, compression=comp)
self.assertEqual(df_reader.chunksize, chunksize)
for i_chunk in [0, 1, 2]:
# Read a couple of chunks and make sure we see them
# properly.
df = df_reader.get_chunk()
self.assertTrue(isinstance(df, DataFrame))
self.assertFalse(df.empty)
true_df = local_tips.iloc[
chunksize * i_chunk: chunksize * (i_chunk + 1)]
tm.assert_frame_equal(true_df, df)

@tm.network
def test_parse_public_s3_bucket_chunked_python(self):
Expand Down

0 comments on commit 8568aed

Please sign in to comment.