Skip to content

Commit

Permalink
get schema before removing null parts
Browse files Browse the repository at this point in the history
  • Loading branch information
rjzamora committed Nov 14, 2020
1 parent 44f3a04 commit 37098ad
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions dask/dataframe/io/parquet/arrow.py
Expand Up @@ -1030,15 +1030,16 @@ def write_partition(

@staticmethod
def write_metadata(parts, fmd, fs, path, append=False, **kwargs):
schema = parts[0][0].get("schema", None)
parts = [p for p in parts if p[0]["meta"] is not None]
if parts:
if "schema" in parts[0][0] and not append:
if not append:
# Get only arguments specified in the function
common_metadata_path = fs.sep.join([path, "_common_metadata"])
keywords = getargspec(pq.write_metadata).args
kwargs_meta = {k: v for k, v in kwargs.items() if k in keywords}
with fs.open(common_metadata_path, "wb") as fil:
pq.write_metadata(parts[0][0]["schema"], fil, **kwargs_meta)
pq.write_metadata(schema, fil, **kwargs_meta)

# Aggregate metadata and write to _metadata file
metadata_path = fs.sep.join([path, "_metadata"])
Expand Down

0 comments on commit 37098ad

Please sign in to comment.