Skip to content

Commit

Permalink
Handling empty Pandas DataFrames
Browse files Browse the repository at this point in the history
  • Loading branch information
igorborgest committed Sep 6, 2019
1 parent 6e1319b commit 6c1fe73
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 2 deletions.
4 changes: 4 additions & 0 deletions awswrangler/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,7 @@ class InvalidRedshiftSortstyle(Exception):

class InvalidRedshiftSortkey(Exception):
pass


class EmptyDataframe(Exception):
pass
5 changes: 4 additions & 1 deletion awswrangler/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
import pyarrow
from pyarrow import parquet

from awswrangler.exceptions import UnsupportedWriteMode, UnsupportedFileFormat, AthenaQueryError, EmptyS3Object, LineTerminatorNotFound
from awswrangler.exceptions import UnsupportedWriteMode, UnsupportedFileFormat,\
AthenaQueryError, EmptyS3Object, LineTerminatorNotFound, EmptyDataframe
from awswrangler.utils import calculate_bounders
from awswrangler import s3

Expand Down Expand Up @@ -535,6 +536,8 @@ def to_s3(self,
:param cast_columns: Dictionary of columns indexes and Arrow types to be casted. (E.g. {2: "int64", 5: "int32"}) (Only for "parquet" file_format)
:return: List of objects written on S3
"""
if dataframe.empty:
raise EmptyDataframe()
if not partition_cols:
partition_cols = []
if mode == "overwrite" or (mode == "overwrite_partitions"
Expand Down
13 changes: 12 additions & 1 deletion testing/test_awswrangler/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import numpy

from awswrangler import Session, Pandas
from awswrangler.exceptions import LineTerminatorNotFound
from awswrangler.exceptions import LineTerminatorNotFound, EmptyDataframe

logging.basicConfig(
level=logging.INFO,
Expand Down Expand Up @@ -391,3 +391,14 @@ def test_to_parquet_with_kms(
break
sleep(1)
assert len(dataframe.index) == len(dataframe2.index)


def test_to_parquet_with_empty_dataframe(session, bucket, database):
dataframe = pandas.DataFrame()
with pytest.raises(EmptyDataframe):
assert session.pandas.to_parquet(dataframe=dataframe,
database=database,
path=f"s3://{bucket}/test/",
preserve_index=False,
mode="overwrite",
procs_cpu_bound=1)

0 comments on commit 6c1fe73

Please sign in to comment.