From 312fb0fca2af636ceb7373d6f6dc06cb7d87a8fc Mon Sep 17 00:00:00 2001
From: Matthew Rocklin <mrocklin@gmail.com>
Date: Fri, 24 Nov 2023 08:38:08 -0600
Subject: [PATCH] Change default parquet compression format from Snappy to LZ4

Snappy's status as default is maybe just due to history.  Snappy had
better Java support and LZ4 wasn't always available in systems like
Spark.  Today Spark and other systems support LZ4 as well, and LZ4
generally performs a bit better, especially on decompression.

This is a significant change, but I think the only reason not to do it
is historical, which I think maybe isn't a good enough reason these
days.
---
 dask/dataframe/io/parquet/core.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/dask/dataframe/io/parquet/core.py b/dask/dataframe/io/parquet/core.py
index d26a266d63b..d02d048d204 100644
--- a/dask/dataframe/io/parquet/core.py
+++ b/dask/dataframe/io/parquet/core.py
@@ -698,7 +698,7 @@ def to_parquet(
     df,
     path,
     engine="auto",
-    compression="snappy",
+    compression="lz4",
     write_index=True,
     append=False,
     overwrite=False,
@@ -729,10 +729,10 @@ def to_parquet(
     engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
         Parquet library to use. Defaults to 'auto', which uses ``pyarrow`` if
         it is installed, and falls back to ``fastparquet`` otherwise.
-    compression : string or dict, default 'snappy'
-        Either a string like ``"snappy"`` or a dictionary mapping column names
-        to compressors like ``{"name": "gzip", "values": "snappy"}``. Defaults
-        to ``"snappy"``.
+    compression : string or dict, default 'lz4'
+        Either a string like ``"lz4"`` or a dictionary mapping column names
+        to compressors like ``{"name": "gzip", "values": "lz4"}``. Defaults
+        to ``"lz4"``.
     write_index : boolean, default True
         Whether or not to write the index. Defaults to True.
     append : bool, default False