Skip to content

Commit

Permalink
Fix typos found by codespell (#10993)
Browse files Browse the repository at this point in the history
  • Loading branch information
DimitriPapadopoulos committed Mar 13, 2024
1 parent f201f7e commit 5949e54
Show file tree
Hide file tree
Showing 48 changed files with 87 additions and 87 deletions.
8 changes: 4 additions & 4 deletions dask/array/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ def apply_infer_dtype(func, args, kwargs, funcname, suggest_dtype="dtype", nout=
nout: None or Int
``None`` if function returns single output, integer if many.
Deafults to ``None``.
Defaults to ``None``.
Returns
-------
Expand Down Expand Up @@ -1822,7 +1822,7 @@ def to_dask_dataframe(self, columns=None, index=None, meta=None):
The default output index depends on whether the array has any unknown
chunks. If there are any unknown chunks, the output has ``None``
for all the divisions (one per chunk). If all the chunks are known,
a default index with known divsions is created.
a default index with known divisions is created.
Specifying ``index`` can be useful if you're conforming a Dask Array
to an existing dask Series or DataFrame, and you would like the
Expand Down Expand Up @@ -4538,7 +4538,7 @@ def asarray(
Reference object to allow the creation of Dask arrays with chunks
that are not NumPy arrays. If an array-like passed in as ``like``
supports the ``__array_function__`` protocol, the chunk type of the
resulting array will be definde by it. In this case, it ensures the
resulting array will be defined by it. In this case, it ensures the
creation of a Dask array compatible with that passed in via this
argument. If ``like`` is a Dask array, the chunk type of the
resulting array will be defined by the chunk type of ``like``.
Expand Down Expand Up @@ -4603,7 +4603,7 @@ def asanyarray(a, dtype=None, order=None, *, like=None, inline_array=False):
Reference object to allow the creation of Dask arrays with chunks
that are not NumPy arrays. If an array-like passed in as ``like``
supports the ``__array_function__`` protocol, the chunk type of the
resulting array will be definde by it. In this case, it ensures the
resulting array will be defined by it. In this case, it ensures the
creation of a Dask array compatible with that passed in via this
argument. If ``like`` is a Dask array, the chunk type of the
resulting array will be defined by the chunk type of ``like``.
Expand Down
2 changes: 1 addition & 1 deletion dask/array/einsumfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def einsum(*operands, dtype=None, optimize=False, split_every=None, **kwargs):
split_every: int >= 2 or dict(axis: int), optional
Determines the depth of the recursive aggregation.
Deafults to ``None`` which would let dask heuristically
Defaults to ``None`` which would let dask heuristically
decide a good default.
"""

Expand Down
2 changes: 1 addition & 1 deletion dask/array/gufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def _validate_normalize_axes(axes, axis, keepdims, input_coredimss, output_cored
"To use `axis`, all core dimensions have to be equal"
)

# Expand dafaults or axis
# Expand defaults or axis
if axes is None:
if axis is not None:
axes = [(axis,) if cd else tuple() for cd in core_dims]
Expand Down
2 changes: 1 addition & 1 deletion dask/array/ma.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def masked_where(condition, a):
cshape = getattr(condition, "shape", ())
if cshape and cshape != a.shape:
raise IndexError(
"Inconsistant shape between the condition and the "
"Inconsistent shape between the condition and the "
"input (got %s and %s)" % (cshape, a.shape)
)
condition = asanyarray(condition)
Expand Down
4 changes: 2 additions & 2 deletions dask/array/overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def trim_internal(x, axes, boundary=None):


def _trim(x, axes, boundary, block_info):
"""Similar to dask.array.chunk.trim but requires one to specificy the
"""Similar to dask.array.chunk.trim but requires one to specify the
boundary condition.
``axes``, and ``boundary`` are assumed to have been coerced.
Expand Down Expand Up @@ -277,7 +277,7 @@ def _remove_overlap_boundaries(l, r, axis, depth):


def boundaries(x, depth=None, kind=None):
"""Add boundary conditions to an array before overlaping
"""Add boundary conditions to an array before overlapping
See Also
--------
Expand Down
2 changes: 1 addition & 1 deletion dask/array/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,7 @@ def _choice_validate_params(state, a, size, replace, p, axis, chunks):
size = (size,)

if axis != 0:
raise ValueError("axis must be 0 since a is one dimensinal")
raise ValueError("axis must be 0 since a is one dimensional")

chunks = normalize_chunks(chunks, size, dtype=np.float64)
if not replace and len(chunks[0]) > 1:
Expand Down
2 changes: 1 addition & 1 deletion dask/array/rechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ def _compute_rechunk(x, chunks):
split_name = "rechunk-split-" + token
split_name_suffixes = count()

# Pre-allocate old block references, to allow re-use and reduce the
# Pre-allocate old block references, to allow reuse and reduce the
# graph's memory footprint a bit.
old_blocks = np.empty([len(c) for c in x.chunks], dtype="O")
for index in np.ndindex(old_blocks.shape):
Expand Down
2 changes: 1 addition & 1 deletion dask/array/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def reshape(x, shape, merge_chunks=True, limit=None):
"To avoid creating the large chunks, set the option\n"
" >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):\n"
" ... array.reshape(shape)"
"Explictly passing ``limit`` to ``reshape`` will also silence this warning\n"
"Explicitly passing ``limit`` to ``reshape`` will also silence this warning\n"
" >>> array.reshape(shape, limit='128 MiB')"
)
warnings.warn(msg, PerformanceWarning, stacklevel=6)
Expand Down
4 changes: 2 additions & 2 deletions dask/array/routines.py
Original file line number Diff line number Diff line change
Expand Up @@ -1205,7 +1205,7 @@ def histogramdd(sample, bins, range=None, normed=None, weights=None, density=Non
If the sample 0th dimension and weight 0th (row) dimension are
chunked differently, a ``ValueError`` will be raised. If
coordinate groupings ((x, y, z) trios) are separated by a chunk
boundry, then a ``ValueError`` will be raised. We suggest that you
boundary, then a ``ValueError`` will be raised. We suggest that you
rechunk your data if it is of that form.
The chunks property of the data (and optional weights) are used to
Expand Down Expand Up @@ -2202,7 +2202,7 @@ def piecewise(x, condlist, funclist, *args, **kw):

def _select(*args, **kwargs):
"""
This is a version of :func:`numpy.select` that acceptes an arbitrary number of arguments and
This is a version of :func:`numpy.select` that accepts an arbitrary number of arguments and
splits them in half to create ``condlist`` and ``choicelist`` params.
"""
split_at = len(args) // 2
Expand Down
18 changes: 9 additions & 9 deletions dask/array/slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1303,10 +1303,10 @@ def parse_assignment_indices(indices, shape):
This function is intended to be called by `setitem_array`.
A slice object that is decreasing (i.e. with a negative step), is
recast as an increasing slice (i.e. with a postive step. For
recast as an increasing slice (i.e. with a positive step. For
example ``slice(7,3,-1)`` would be cast as ``slice(4,8,1)``. This
is to facilitate finding which blocks are touched by the
index. The dimensions for which this has occured are returned by
index. The dimensions for which this has occurred are returned by
the function.
Parameters
Expand All @@ -1319,7 +1319,7 @@ def parse_assignment_indices(indices, shape):
Returns
-------
parsed_indices : `list`
The reformated indices that are equivalent to the input
The reformatted indices that are equivalent to the input
indices.
implied_shape : `list`
The shape implied by the parsed indices. For instance, indices
Expand Down Expand Up @@ -1376,7 +1376,7 @@ def parse_assignment_indices(indices, shape):
f"numpy or dask array index: {index!r}"
)

# Inititalize output variables
# Initialize output variables
implied_shape = []
implied_shape_positions = []
reverse = []
Expand Down Expand Up @@ -1524,14 +1524,14 @@ def setitem_array(out_name, array, indices, value):
unchanged.
Each block that overlaps the indices is assigned from the
approriate part of the assignment value. The dasks of these value
appropriate part of the assignment value. The dasks of these value
parts are included in the output dask dictionary, as are the dasks
of any 1-d dask array indices. This ensures that the dask array
assignment value and any dask array indices are not computed until
the `Array.__setitem__` operation is computed.
The part of the assignment value applies to block is created as a
"getitem" slice of the full asignment value.
"getitem" slice of the full assignment value.
Parameters
----------
Expand Down Expand Up @@ -1570,7 +1570,7 @@ def setitem_array(out_name, array, indices, value):
overlap the indices. setitem is the chunk assignment function;
v_key is the dask key of the the part of the assignment value
that corresponds to the block; and block_indices are the
assigment indices that apply to the block.
assignment indices that apply to the block.
The dictionary also includes any additional key/value pairs
needed to define v_key, as well as any any additional
Expand Down Expand Up @@ -1637,7 +1637,7 @@ def block_index_from_1d_index(dim, loc0, loc1, is_bool):
i = index[i] - loc0

if is_dask_collection(i):
# Return dask key intead of dask array
# Return dask key instead of dask array
i = concatenate_array_chunks(i)
dsk.update(dict(i.dask))
i = next(flatten(i.__dask_keys__()))
Expand Down Expand Up @@ -1989,7 +1989,7 @@ def value_indices_from_1d_int_index(dim, vsize, loc0, loc1):
# we can't tell if this block overlaps it, so we
# assume that it does. If it in fact doesn't
# overlap then the part of the assignment value
# that cooresponds to this block will have zero
# that corresponds to this block will have zero
# size which, at compute time, will indicate to
# the `setitem` function to pass the block
# through unchanged.
Expand Down
2 changes: 1 addition & 1 deletion dask/array/tests/test_cupy_routines.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def test_compress():
res = da.compress(c, darr, axis=0)

# cupy.compress is not implemented but dask implementation does not
# rely on np.compress -- move originial data back to host and
# rely on np.compress -- move original data back to host and
# compare da.compress with np.compress
assert_eq(np.compress(c.tolist(), carr.tolist(), axis=0), res, check_type=False)

Expand Down
2 changes: 1 addition & 1 deletion dask/array/tests/test_dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def test_is_valid_chunk_type(arr_type, result):


def test_direct_deferral_wrapping_override():
"""Directly test Dask defering to an upcast type and the ability to still wrap it."""
"""Directly test Dask deferring to an upcast type and the ability to still wrap it."""
a = da.from_array(np.arange(4))
b = WrappedArray(np.arange(4))
assert a.__add__(b) is NotImplemented
Expand Down
2 changes: 1 addition & 1 deletion dask/array/tests/test_routines.py
Original file line number Diff line number Diff line change
Expand Up @@ -1957,7 +1957,7 @@ def test_count_nonzero_str():
# We may have behavior differences with NumPy for strings
# with just spaces, depending on the version of NumPy.
# https://github.com/numpy/numpy/issues/9875
x = np.array(list("Hellow orld"))
x = np.array(list("Hello world"))
d = da.from_array(x, chunks=(4,))

x_c = np.count_nonzero(x)
Expand Down
4 changes: 2 additions & 2 deletions dask/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def visualize(self, filename="mydask", format=None, optimize_graph=False, **kwar
Returns
-------
result : IPython.diplay.Image, IPython.display.SVG, or None
result : IPython.display.Image, IPython.display.SVG, or None
See dask.dot.dot_graph for more information.
See Also
Expand Down Expand Up @@ -738,7 +738,7 @@ def visualize(
Returns
-------
result : IPython.diplay.Image, IPython.display.SVG, or None
result : IPython.display.Image, IPython.display.SVG, or None
See dask.dot.dot_graph for more information.
See Also
Expand Down
2 changes: 1 addition & 1 deletion dask/blockwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def __getitem__(self, idx: tuple[int, ...]) -> Any:
return self.mapping[idx]
except KeyError as err:
# If a DataFrame collection was converted
# to an Array collection, the dimesion of
# to an Array collection, the dimension of
# `idx` may not agree with the keys in
# `self.mapping`. In this case, we can
# use `self.numblocks` to check for a key
Expand Down
2 changes: 1 addition & 1 deletion dask/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def _register_command_ep(interface, entry_point):
except Exception as e:
warnings.warn(
f"While registering the command with name '{entry_point.name}', an "
f"exception ocurred; {e}."
f"exception occurred; {e}."
)
return
if not isinstance(command, (click.Command, click.Group)):
Expand Down
2 changes: 1 addition & 1 deletion dask/dataframe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def _dask_expr_enabled() -> bool:

# trigger loading of dask-expr which will in-turn import dask.dataframe and run remainder
# of this module's init updating attributes to be dask-expr
# note: needs reload, incase dask-expr imported before dask.dataframe; works fine otherwise
# note: needs reload, in case dask-expr imported before dask.dataframe; works fine otherwise
dd = importlib.reload(dd)
except ImportError as e:
msg = (
Expand Down
2 changes: 1 addition & 1 deletion dask/dataframe/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def make_meta_object(x, index=None):
)
elif not hasattr(x, "dtype") and x is not None:
# could be a string, a dtype object, or a python type. Skip `None`,
# because it is implictly converted to `dtype('f8')`, which we don't
# because it is implicitly converted to `dtype('f8')`, which we don't
# want here.
try:
dtype = np.dtype(x)
Expand Down
10 changes: 5 additions & 5 deletions dask/dataframe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1744,7 +1744,7 @@ def repartition(
should be specified. A ``ValueError`` will be raised when that is
not the case.
Also note that ``len(divisons)`` is equal to ``npartitions + 1``. This is because ``divisions``
Also note that ``len(divisions)`` is equal to ``npartitions + 1``. This is because ``divisions``
represents the upper and lower bounds of each partition. The first item is the
lower bound of the first partition, the second item is the lower bound of the
second partition and the upper bound of the first partition, and so on.
Expand Down Expand Up @@ -3548,7 +3548,7 @@ def _cum_agg(
name = f"{self._token_prefix}{op_name}-{suffix}"
cname = f"{self._token_prefix}{op_name}-cum-last-{suffix}"

# aggregate cumulated partisions and its previous last element
# aggregate cumulated partitions and its previous last element
layer = {}
layer[(name, 0)] = (cumpart._name, 0)

Expand Down Expand Up @@ -4908,7 +4908,7 @@ def map(self, arg, na_action=None, meta=no_default, is_monotonic=False):
Note that this method clears any known divisions.
If your mapping function is monotonically increasing then use `is_monotonic`
to apply the maping function to the old divisions and assign the new
to apply the mapping function to the old divisions and assign the new
divisions to the output.
"""
Expand Down Expand Up @@ -5453,7 +5453,7 @@ def set_index(
'2021-01-05', '2021-01-06', '2021-01-07'],
dtype='datetime64[ns]', freq='D')
Note that ``len(divisons)`` is equal to ``npartitions + 1``. This is because ``divisions``
Note that ``len(divisions)`` is equal to ``npartitions + 1``. This is because ``divisions``
represents the upper and lower bounds of each partition. The first item is the
lower bound of the first partition, the second item is the lower bound of the
second partition and the upper bound of the first partition, and so on.
Expand Down Expand Up @@ -8142,7 +8142,7 @@ def repartition_npartitions(df, npartitions):
]
return _repartition_from_boundaries(df, new_partitions_boundaries, new_name)
else:
# Drop duplcates in case last partition has same
# Drop duplicates in case last partition has same
# value for min and max division
original_divisions = divisions = pd.Series(df.divisions).drop_duplicates()
if df.known_divisions and (
Expand Down
2 changes: 1 addition & 1 deletion dask/dataframe/dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def make_meta(x, index=None, parent_meta=None):
determine which back-end to select and dispatch to. To use
utilize this parameter ``make_meta_obj`` has be dispatched.
If ``parent_meta`` is ``None``, a pandas DataFrame is used for
``parent_meta`` thats chooses pandas as the backend.
``parent_meta`` that chooses pandas as the backend.
Returns
-------
Expand Down
8 changes: 4 additions & 4 deletions dask/dataframe/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
# the ``_determine_levels`` function.
#
# To minimize overhead, any ``by`` that is a series contained within the
# dataframe is passed as a columnn key. This transformation is implemented as
# dataframe is passed as a column key. This transformation is implemented as
# ``_normalize_by``.
#
# #############################################
Expand Down Expand Up @@ -452,7 +452,7 @@ def _groupby_aggregate_spec(

def _non_agg_chunk(df, *by, key, dropna=None, observed=None, **kwargs):
"""
A non-aggregation agg function. This simuates the behavior of an initial
A non-aggregation agg function. This simulates the behavior of an initial
partitionwise aggregation, but doesn't actually aggregate or throw away
any data.
"""
Expand Down Expand Up @@ -905,7 +905,7 @@ def _build_agg_args(spec):
----------
spec: a list of (result-column, aggregation-function, input-column) triples.
To work with all argument forms understood by pandas use
``_normalize_spec`` to normalize the argment before passing it on to
``_normalize_spec`` to normalize the argument before passing it on to
``_build_agg_args``.
Returns
Expand All @@ -914,7 +914,7 @@ def _build_agg_args(spec):
that are applied on grouped chunks of the initial dataframe.
agg_funcs: a list of (intermediate-column, functions, keyword) triples that
are applied on the grouped concatination of the preprocessed chunks.
are applied on the grouped concatenation of the preprocessed chunks.
finalizers: a list of (result-column, function, keyword) triples that are
applied after the ``agg_funcs``. They are used to create final results
Expand Down
2 changes: 1 addition & 1 deletion dask/dataframe/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ def read_pandas(
path_converter = None

# If compression is "infer", inspect the (first) path suffix and
# set the proper compression option if the suffix is recongnized.
# set the proper compression option if the suffix is recognized.
if compression == "infer":
# Translate the input urlpath to a simple path list
paths = get_fs_token_paths(urlpath, mode="rb", storage_options=storage_options)[
Expand Down
2 changes: 1 addition & 1 deletion dask/dataframe/io/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ def chunksizes(ind):
# Map current position selection (i)
# to the corresponding division value (div)
div = seq[i]
# pos is the position of the first occurance of
# pos is the position of the first occurrence of
# div (which is i when seq has no duplicates)
if duplicates:
# Note: cupy requires casts to `int` below
Expand Down
Loading

0 comments on commit 5949e54

Please sign in to comment.