From 1c14dbc792fca146b810dd71e82c7806906f4e9e Mon Sep 17 00:00:00 2001 From: JPFrancoia Date: Wed, 29 Jan 2020 15:24:37 +0100 Subject: [PATCH 1/2] Default to OpenCSVSerDe when calling csv_table_definition if serde isn't provided in extra_args. --- awswrangler/glue.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/awswrangler/glue.py b/awswrangler/glue.py index 435441da3..9345f0f9d 100644 --- a/awswrangler/glue.py +++ b/awswrangler/glue.py @@ -292,7 +292,7 @@ def csv_table_definition(table: str, compressed = False if compression is None else True sep = extra_args["sep"] if "sep" in extra_args else "," sep = "," if sep is None else sep - serde = extra_args.get("serde") + serde = extra_args.get("serde", "OpenCSVSerDe") if serde == "OpenCSVSerDe": serde_fullname = "org.apache.hadoop.hive.serde2.OpenCSVSerde" param = { From 14bd814d8bc249491f233637735a88bfceea3344 Mon Sep 17 00:00:00 2001 From: JPFrancoia Date: Wed, 29 Jan 2020 15:30:08 +0100 Subject: [PATCH 2/2] Docstring update for metadata_to_glue method. --- awswrangler/glue.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/awswrangler/glue.py b/awswrangler/glue.py index 9345f0f9d..00903b0b3 100644 --- a/awswrangler/glue.py +++ b/awswrangler/glue.py @@ -68,19 +68,19 @@ def metadata_to_glue(self, description: Optional[str] = None, parameters: Optional[Dict[str, str]] = None, columns_comments: Optional[Dict[str, str]] = None) -> None: - """ + """Create/update a table in the Glue catalog based on a dataframe. :param dataframe: Pandas Dataframe + :param path: AWS S3 path (E.g. s3://bucket-name/folder_name/ :param objects_paths: Files paths on S3 - :param preserve_index: Should preserve index on S3? - :param partition_cols: partitions names - :param mode: "append", "overwrite", "overwrite_partitions" - :param cast_columns: Dictionary of columns names and Athena/Glue types to be casted. (E.g. {"col name": "bigint", "col2 name": "int"}) (Only for "parquet" file_format) + :param file_format: "csv" or "parquet" :param database: AWS Glue Database name :param table: AWS Glue table name - :param path: AWS S3 path (E.g. s3://bucket-name/folder_name/ - :param file_format: "csv" or "parquet" + :param partition_cols: partitions names + :param preserve_index: Should preserve index on S3? + :param mode: "append", "overwrite", "overwrite_partitions" :param compression: None, gzip, snappy, etc + :param cast_columns: Dictionary of columns names and Athena/Glue types to be casted. (E.g. {"col name": "bigint", "col2 name": "int"}) (Only for "parquet" file_format) :param extra_args: Extra arguments specific for each file formats (E.g. "sep" for CSV) :param description: Table description :param parameters: Key/value pairs to tag the table (Optional[Dict[str, str]])