Fix typos found by codespell (#10993)

dask · Mar 13, 2024 · 5949e54 · 5949e54
1 parent f201f7e
commit 5949e54
Show file tree

Hide file tree

Showing 48 changed files with 87 additions and 87 deletions.
diff --git a/dask/array/core.py b/dask/array/core.py
@@ -442,7 +442,7 @@ def apply_infer_dtype(func, args, kwargs, funcname, suggest_dtype="dtype", nout=
 
     nout: None or Int
         ``None`` if function returns single output, integer if many.
-        Deafults to ``None``.
+        Defaults to ``None``.
 
     Returns
     -------
@@ -1822,7 +1822,7 @@ def to_dask_dataframe(self, columns=None, index=None, meta=None):
             The default output index depends on whether the array has any unknown
             chunks. If there are any unknown chunks, the output has ``None``
             for all the divisions (one per chunk). If all the chunks are known,
-            a default index with known divsions is created.
+            a default index with known divisions is created.
 
             Specifying ``index`` can be useful if you're conforming a Dask Array
             to an existing dask Series or DataFrame, and you would like the
@@ -4538,7 +4538,7 @@ def asarray(
         Reference object to allow the creation of Dask arrays with chunks
         that are not NumPy arrays. If an array-like passed in as ``like``
         supports the ``__array_function__`` protocol, the chunk type of the
-        resulting array will be definde by it. In this case, it ensures the
+        resulting array will be defined by it. In this case, it ensures the
         creation of a Dask array compatible with that passed in via this
         argument. If ``like`` is a Dask array, the chunk type of the
         resulting array will be defined by the chunk type of ``like``.
@@ -4603,7 +4603,7 @@ def asanyarray(a, dtype=None, order=None, *, like=None, inline_array=False):
         Reference object to allow the creation of Dask arrays with chunks
         that are not NumPy arrays. If an array-like passed in as ``like``
         supports the ``__array_function__`` protocol, the chunk type of the
-        resulting array will be definde by it. In this case, it ensures the
+        resulting array will be defined by it. In this case, it ensures the
         creation of a Dask array compatible with that passed in via this
         argument. If ``like`` is a Dask array, the chunk type of the
         resulting array will be defined by the chunk type of ``like``.

diff --git a/dask/array/einsumfuncs.py b/dask/array/einsumfuncs.py
@@ -200,7 +200,7 @@ def einsum(*operands, dtype=None, optimize=False, split_every=None, **kwargs):
 
     split_every: int >= 2 or dict(axis: int), optional
         Determines the depth of the recursive aggregation.
-        Deafults to ``None`` which would let dask heuristically
+        Defaults to ``None`` which would let dask heuristically
         decide a good default.
     """
 

diff --git a/dask/array/gufunc.py b/dask/array/gufunc.py
@@ -113,7 +113,7 @@ def _validate_normalize_axes(axes, axis, keepdims, input_coredimss, output_cored
                         "To use `axis`, all core dimensions have to be equal"
                     )
 
-    # Expand dafaults or axis
+    # Expand defaults or axis
     if axes is None:
         if axis is not None:
             axes = [(axis,) if cd else tuple() for cd in core_dims]

diff --git a/dask/array/ma.py b/dask/array/ma.py
@@ -78,7 +78,7 @@ def masked_where(condition, a):
     cshape = getattr(condition, "shape", ())
     if cshape and cshape != a.shape:
         raise IndexError(
-            "Inconsistant shape between the condition and the "
+            "Inconsistent shape between the condition and the "
             "input (got %s and %s)" % (cshape, a.shape)
         )
     condition = asanyarray(condition)

diff --git a/dask/array/overlap.py b/dask/array/overlap.py
@@ -135,7 +135,7 @@ def trim_internal(x, axes, boundary=None):
 
 
 def _trim(x, axes, boundary, block_info):
-    """Similar to dask.array.chunk.trim but requires one to specificy the
+    """Similar to dask.array.chunk.trim but requires one to specify the
     boundary condition.
 
     ``axes``, and ``boundary`` are assumed to have been coerced.
@@ -277,7 +277,7 @@ def _remove_overlap_boundaries(l, r, axis, depth):
 
 
 def boundaries(x, depth=None, kind=None):
-    """Add boundary conditions to an array before overlaping
+    """Add boundary conditions to an array before overlapping
 
     See Also
     --------

diff --git a/dask/array/random.py b/dask/array/random.py
@@ -887,7 +887,7 @@ def _choice_validate_params(state, a, size, replace, p, axis, chunks):
         size = (size,)
 
     if axis != 0:
-        raise ValueError("axis must be 0 since a is one dimensinal")
+        raise ValueError("axis must be 0 since a is one dimensional")
 
     chunks = normalize_chunks(chunks, size, dtype=np.float64)
     if not replace and len(chunks[0]) > 1:

diff --git a/dask/array/rechunk.py b/dask/array/rechunk.py
@@ -669,7 +669,7 @@ def _compute_rechunk(x, chunks):
     split_name = "rechunk-split-" + token
     split_name_suffixes = count()
 
-    # Pre-allocate old block references, to allow re-use and reduce the
+    # Pre-allocate old block references, to allow reuse and reduce the
     # graph's memory footprint a bit.
     old_blocks = np.empty([len(c) for c in x.chunks], dtype="O")
     for index in np.ndindex(old_blocks.shape):

diff --git a/dask/array/reshape.py b/dask/array/reshape.py
@@ -268,7 +268,7 @@ def reshape(x, shape, merge_chunks=True, limit=None):
                 "To avoid creating the large chunks, set the option\n"
                 "    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n"
                 "    ...     array.reshape(shape)"
-                "Explictly passing ``limit`` to ``reshape`` will also silence this warning\n"
+                "Explicitly passing ``limit`` to ``reshape`` will also silence this warning\n"
                 "    >>> array.reshape(shape, limit='128 MiB')"
             )
             warnings.warn(msg, PerformanceWarning, stacklevel=6)

diff --git a/dask/array/routines.py b/dask/array/routines.py
@@ -1205,7 +1205,7 @@ def histogramdd(sample, bins, range=None, normed=None, weights=None, density=Non
     If the sample 0th dimension and weight 0th (row) dimension are
     chunked differently, a ``ValueError`` will be raised. If
     coordinate groupings ((x, y, z) trios) are separated by a chunk
-    boundry, then a ``ValueError`` will be raised. We suggest that you
+    boundary, then a ``ValueError`` will be raised. We suggest that you
     rechunk your data if it is of that form.
 
     The chunks property of the data (and optional weights) are used to
@@ -2202,7 +2202,7 @@ def piecewise(x, condlist, funclist, *args, **kw):
 
 def _select(*args, **kwargs):
     """
-    This is a version of :func:`numpy.select` that acceptes an arbitrary number of arguments and
+    This is a version of :func:`numpy.select` that accepts an arbitrary number of arguments and
     splits them in half to create ``condlist`` and ``choicelist`` params.
     """
     split_at = len(args) // 2

diff --git a/dask/array/slicing.py b/dask/array/slicing.py
@@ -1303,10 +1303,10 @@ def parse_assignment_indices(indices, shape):
     This function is intended to be called by `setitem_array`.
 
     A slice object that is decreasing (i.e. with a negative step), is
-    recast as an increasing slice (i.e. with a postive step. For
+    recast as an increasing slice (i.e. with a positive step. For
     example ``slice(7,3,-1)`` would be cast as ``slice(4,8,1)``. This
     is to facilitate finding which blocks are touched by the
-    index. The dimensions for which this has occured are returned by
+    index. The dimensions for which this has occurred are returned by
     the function.
 
     Parameters
@@ -1319,7 +1319,7 @@ def parse_assignment_indices(indices, shape):
     Returns
     -------
     parsed_indices : `list`
-        The reformated indices that are equivalent to the input
+        The reformatted indices that are equivalent to the input
         indices.
     implied_shape : `list`
         The shape implied by the parsed indices. For instance, indices
@@ -1376,7 +1376,7 @@ def parse_assignment_indices(indices, shape):
                 f"numpy or dask array index: {index!r}"
             )
 
-    # Inititalize output variables
+    # Initialize output variables
     implied_shape = []
     implied_shape_positions = []
     reverse = []
@@ -1524,14 +1524,14 @@ def setitem_array(out_name, array, indices, value):
     unchanged.
 
     Each block that overlaps the indices is assigned from the
-    approriate part of the assignment value. The dasks of these value
+    appropriate part of the assignment value. The dasks of these value
     parts are included in the output dask dictionary, as are the dasks
     of any 1-d dask array indices. This ensures that the dask array
     assignment value and any dask array indices are not computed until
     the `Array.__setitem__` operation is computed.
 
     The part of the assignment value applies to block is created as a
-    "getitem" slice of the full asignment value.
+    "getitem" slice of the full assignment value.
 
     Parameters
     ----------
@@ -1570,7 +1570,7 @@ def setitem_array(out_name, array, indices, value):
         overlap the indices. setitem is the chunk assignment function;
         v_key is the dask key of the the part of the assignment value
         that corresponds to the block; and block_indices are the
-        assigment indices that apply to the block.
+        assignment indices that apply to the block.
 
         The dictionary also includes any additional key/value pairs
         needed to define v_key, as well as any any additional
@@ -1637,7 +1637,7 @@ def block_index_from_1d_index(dim, loc0, loc1, is_bool):
             i = index[i] - loc0
 
         if is_dask_collection(i):
-            # Return dask key intead of dask array
+            # Return dask key instead of dask array
             i = concatenate_array_chunks(i)
             dsk.update(dict(i.dask))
             i = next(flatten(i.__dask_keys__()))
@@ -1989,7 +1989,7 @@ def value_indices_from_1d_int_index(dim, vsize, loc0, loc1):
                 #       we can't tell if this block overlaps it, so we
                 #       assume that it does. If it in fact doesn't
                 #       overlap then the part of the assignment value
-                #       that cooresponds to this block will have zero
+                #       that corresponds to this block will have zero
                 #       size which, at compute time, will indicate to
                 #       the `setitem` function to pass the block
                 #       through unchanged.

diff --git a/dask/array/tests/test_cupy_routines.py b/dask/array/tests/test_cupy_routines.py
@@ -37,7 +37,7 @@ def test_compress():
     res = da.compress(c, darr, axis=0)
 
     # cupy.compress is not implemented but dask implementation does not
-    # rely on np.compress -- move originial data back to host and
+    # rely on np.compress -- move original data back to host and
     # compare da.compress with np.compress
     assert_eq(np.compress(c.tolist(), carr.tolist(), axis=0), res, check_type=False)
 

diff --git a/dask/array/tests/test_dispatch.py b/dask/array/tests/test_dispatch.py
@@ -227,7 +227,7 @@ def test_is_valid_chunk_type(arr_type, result):
 
 
 def test_direct_deferral_wrapping_override():
-    """Directly test Dask defering to an upcast type and the ability to still wrap it."""
+    """Directly test Dask deferring to an upcast type and the ability to still wrap it."""
     a = da.from_array(np.arange(4))
     b = WrappedArray(np.arange(4))
     assert a.__add__(b) is NotImplemented

diff --git a/dask/array/tests/test_routines.py b/dask/array/tests/test_routines.py
@@ -1957,7 +1957,7 @@ def test_count_nonzero_str():
     # We may have behavior differences with NumPy for strings
     # with just spaces, depending on the version of NumPy.
     # https://github.com/numpy/numpy/issues/9875
-    x = np.array(list("Hellow orld"))
+    x = np.array(list("Hello world"))
     d = da.from_array(x, chunks=(4,))
 
     x_c = np.count_nonzero(x)

diff --git a/dask/base.py b/dask/base.py
@@ -284,7 +284,7 @@ def visualize(self, filename="mydask", format=None, optimize_graph=False, **kwar
 
         Returns
         -------
-        result : IPython.diplay.Image, IPython.display.SVG, or None
+        result : IPython.display.Image, IPython.display.SVG, or None
             See dask.dot.dot_graph for more information.
 
         See Also
@@ -738,7 +738,7 @@ def visualize(
 
     Returns
     -------
-    result : IPython.diplay.Image, IPython.display.SVG, or None
+    result : IPython.display.Image, IPython.display.SVG, or None
         See dask.dot.dot_graph for more information.
 
     See Also

diff --git a/dask/blockwise.py b/dask/blockwise.py
@@ -165,7 +165,7 @@ def __getitem__(self, idx: tuple[int, ...]) -> Any:
             return self.mapping[idx]
         except KeyError as err:
             # If a DataFrame collection was converted
-            # to an Array collection, the dimesion of
+            # to an Array collection, the dimension of
             # `idx` may not agree with the keys in
             # `self.mapping`. In this case, we can
             # use `self.numblocks` to check for a key

diff --git a/dask/cli.py b/dask/cli.py
@@ -179,7 +179,7 @@ def _register_command_ep(interface, entry_point):
     except Exception as e:
         warnings.warn(
             f"While registering the command with name '{entry_point.name}', an "
-            f"exception ocurred; {e}."
+            f"exception occurred; {e}."
         )
         return
     if not isinstance(command, (click.Command, click.Group)):

diff --git a/dask/dataframe/__init__.py b/dask/dataframe/__init__.py
@@ -89,7 +89,7 @@ def _dask_expr_enabled() -> bool:
 
         # trigger loading of dask-expr which will in-turn import dask.dataframe and run remainder
         # of this module's init updating attributes to be dask-expr
-        # note: needs reload, incase dask-expr imported before dask.dataframe; works fine otherwise
+        # note: needs reload, in case dask-expr imported before dask.dataframe; works fine otherwise
         dd = importlib.reload(dd)
 except ImportError as e:
     msg = (

diff --git a/dask/dataframe/backends.py b/dask/dataframe/backends.py
@@ -307,7 +307,7 @@ def make_meta_object(x, index=None):
         )
     elif not hasattr(x, "dtype") and x is not None:
         # could be a string, a dtype object, or a python type. Skip `None`,
-        # because it is implictly converted to `dtype('f8')`, which we don't
+        # because it is implicitly converted to `dtype('f8')`, which we don't
         # want here.
         try:
             dtype = np.dtype(x)

diff --git a/dask/dataframe/core.py b/dask/dataframe/core.py
@@ -1744,7 +1744,7 @@ def repartition(
         should be specified. A ``ValueError`` will be raised when that is
         not the case.
 
-        Also note that ``len(divisons)`` is equal to ``npartitions + 1``. This is because ``divisions``
+        Also note that ``len(divisions)`` is equal to ``npartitions + 1``. This is because ``divisions``
         represents the upper and lower bounds of each partition. The first item is the
         lower bound of the first partition, the second item is the lower bound of the
         second partition and the upper bound of the first partition, and so on.
@@ -3548,7 +3548,7 @@ def _cum_agg(
             name = f"{self._token_prefix}{op_name}-{suffix}"
             cname = f"{self._token_prefix}{op_name}-cum-last-{suffix}"
 
-            # aggregate cumulated partisions and its previous last element
+            # aggregate cumulated partitions and its previous last element
             layer = {}
             layer[(name, 0)] = (cumpart._name, 0)
 
@@ -4908,7 +4908,7 @@ def map(self, arg, na_action=None, meta=no_default, is_monotonic=False):
         Note that this method clears any known divisions.
 
         If your mapping function is monotonically increasing then use `is_monotonic`
-        to apply the maping function to the old divisions and assign the new
+        to apply the mapping function to the old divisions and assign the new
         divisions to the output.
 
         """
@@ -5453,7 +5453,7 @@ def set_index(
                        '2021-01-05', '2021-01-06', '2021-01-07'],
                       dtype='datetime64[ns]', freq='D')
 
-        Note that ``len(divisons)`` is equal to ``npartitions + 1``. This is because ``divisions``
+        Note that ``len(divisions)`` is equal to ``npartitions + 1``. This is because ``divisions``
         represents the upper and lower bounds of each partition. The first item is the
         lower bound of the first partition, the second item is the lower bound of the
         second partition and the upper bound of the first partition, and so on.
@@ -8142,7 +8142,7 @@ def repartition_npartitions(df, npartitions):
         ]
         return _repartition_from_boundaries(df, new_partitions_boundaries, new_name)
     else:
-        # Drop duplcates in case last partition has same
+        # Drop duplicates in case last partition has same
         # value for min and max division
         original_divisions = divisions = pd.Series(df.divisions).drop_duplicates()
         if df.known_divisions and (

diff --git a/dask/dataframe/dispatch.py b/dask/dataframe/dispatch.py
@@ -111,7 +111,7 @@ def make_meta(x, index=None, parent_meta=None):
         determine which back-end to select and dispatch to. To use
         utilize this parameter ``make_meta_obj`` has be dispatched.
         If ``parent_meta`` is ``None``, a pandas DataFrame is used for
-        ``parent_meta`` thats chooses pandas as the backend.
+        ``parent_meta`` that chooses pandas as the backend.
 
     Returns
     -------

diff --git a/dask/dataframe/groupby.py b/dask/dataframe/groupby.py
@@ -89,7 +89,7 @@
 # the ``_determine_levels`` function.
 #
 # To minimize overhead, any ``by`` that is a series contained within the
-# dataframe is passed as a columnn key. This transformation is implemented as
+# dataframe is passed as a column key. This transformation is implemented as
 # ``_normalize_by``.
 #
 # #############################################
@@ -452,7 +452,7 @@ def _groupby_aggregate_spec(
 
 def _non_agg_chunk(df, *by, key, dropna=None, observed=None, **kwargs):
     """
-    A non-aggregation agg function. This simuates the behavior of an initial
+    A non-aggregation agg function. This simulates the behavior of an initial
     partitionwise aggregation, but doesn't actually aggregate or throw away
     any data.
     """
@@ -905,7 +905,7 @@ def _build_agg_args(spec):
     ----------
     spec: a list of (result-column, aggregation-function, input-column) triples.
         To work with all argument forms understood by pandas use
-        ``_normalize_spec`` to normalize the argment before passing it on to
+        ``_normalize_spec`` to normalize the argument before passing it on to
         ``_build_agg_args``.
 
     Returns
@@ -914,7 +914,7 @@ def _build_agg_args(spec):
         that are applied on grouped chunks of the initial dataframe.
 
     agg_funcs: a list of (intermediate-column, functions, keyword) triples that
-        are applied on the grouped concatination of the preprocessed chunks.
+        are applied on the grouped concatenation of the preprocessed chunks.
 
     finalizers: a list of (result-column, function, keyword) triples that are
         applied after the ``agg_funcs``. They are used to create final results

diff --git a/dask/dataframe/io/csv.py b/dask/dataframe/io/csv.py
@@ -532,7 +532,7 @@ def read_pandas(
         path_converter = None
 
     # If compression is "infer", inspect the (first) path suffix and
-    # set the proper compression option if the suffix is recongnized.
+    # set the proper compression option if the suffix is recognized.
     if compression == "infer":
         # Translate the input urlpath to a simple path list
         paths = get_fs_token_paths(urlpath, mode="rb", storage_options=storage_options)[

diff --git a/dask/dataframe/io/io.py b/dask/dataframe/io/io.py
@@ -734,7 +734,7 @@ def chunksizes(ind):
         # Map current position selection (i)
         # to the corresponding division value (div)
         div = seq[i]
-        # pos is the position of the first occurance of
+        # pos is the position of the first occurrence of
         # div (which is i when seq has no duplicates)
         if duplicates:
             # Note: cupy requires casts to `int` below