huggingface · cakiki · Nov 1, 2024 · Nov 1, 2024 · May 21, 2025 · May 21, 2025
diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py
@@ -1426,7 +1426,7 @@ def save_to_disk(
                 Path (e.g. `dataset/train`) or remote URI (e.g. `s3://my-bucket/dataset/train`)
                 of the dataset directory where the dataset will be saved to.
             max_shard_size (`int` or `str`, *optional*, defaults to `"500MB"`):
-                The maximum size of the dataset shards to be uploaded to the hub. If expressed as a string, needs to be digits followed by a unit
+                The maximum size of the dataset shards to be saved to the filesystem. If expressed as a string, needs to be digits followed by a unit
                 (like `"50MB"`).
             num_shards (`int`, *optional*):
                 Number of shards to write. By default the number of shards depends on `max_shard_size` and `num_proc`.

diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py
@@ -1240,7 +1240,7 @@ def save_to_disk(
                 Path (e.g. `dataset/train`) or remote URI (e.g. `s3://my-bucket/dataset/train`)
                 of the dataset dict directory where the dataset dict will be saved to.
             max_shard_size (`int` or `str`, *optional*, defaults to `"500MB"`):
-                The maximum size of the dataset shards to be uploaded to the hub. If expressed as a string, needs to be digits followed by a unit
+                The maximum size of the dataset shards to be saved to the filesystem. If expressed as a string, needs to be digits followed by a unit
                 (like `"50MB"`).
             num_shards (`Dict[str, int]`, *optional*):
                 Number of shards to write. By default the number of shards depends on `max_shard_size` and `num_proc`.

diff --git a/src/datasets/utils/py_utils.py b/src/datasets/utils/py_utils.py
@@ -709,7 +709,7 @@ def iflatmap_unordered(
                     pool_changed = True
                     # One of the subprocesses has died. We should not wait forever.
                     raise RuntimeError(
-                        "One of the subprocesses has abruptly died during map operation."
+                        "One of the subprocesses has abruptly died during the map operation."
                         "To debug the error, disable multiprocessing."
                     )
         finally:

diff --git a/tests/test_arrow_dataset.py b/tests/test_arrow_dataset.py
@@ -1744,7 +1744,7 @@ def do_crash(row):
                 with pytest.raises(RuntimeError) as excinfo:
                     dset.map(do_crash, num_proc=2)
                 assert str(excinfo.value) == (
-                    "One of the subprocesses has abruptly died during map operation."
+                    "One of the subprocesses has abruptly died during the map operation."
                     "To debug the error, disable multiprocessing."
                 )