Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements and renaming #165

Merged
merged 1 commit into from
Apr 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "redcat"
version = "0.0.1a139"
version = "0.0.1a140"
description = "A library to manipulate batches of examples"
readme = "README.md"
authors = ["Thibaut Durand <durand.tibo+gh@gmail.com>"]
Expand Down
184 changes: 94 additions & 90 deletions src/redcat/basetensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1218,6 +1218,81 @@ def shuffle_along_dim_(self, dim: int, generator: torch.Generator | None = None)
"""
self.permute_along_dim_(torch.randperm(self._data.shape[dim], generator=generator), dim=dim)

def sort(
self,
dim: int = -1,
descending: bool = False,
stable: bool = False,
) -> tuple[TBatchedTensor, TBatchedTensor]:
r"""Sorts the elements of the batch along a given dimension in monotonic
order by value.

Args:
descending (bool, optional): Controls the sorting order.
If ``True``, the elements are sorted in descending
order by value. Default: ``False``
stable (bool, optional): Makes the sorting routine stable,
which guarantees that the order of equivalent elements
is preserved. Default: ``False``

Returns:
(``BaseBatchedTensor``, ``BaseBatchedTensor``): A tuple
two values:
- The first batch contains the batch values sorted
along the given dimension.
- The second batch contains the indices that sort
the batch along the given dimension.

Example usage:

.. code-block:: python

>>> import torch
>>> from redcat import BatchedTensor
>>> BatchedTensor(torch.rand(2, 5)).sort()
(tensor([[0.2274, 0.4843, 0.4932, 0.8583, 0.9154],
[0.0101, 0.0733, 0.5018, 0.6007, 0.6589]], batch_dim=0),
tensor([[2, 3, 4, 1, 0], [4, 3, 1, 0, 2]], batch_dim=0))
"""
return torch.sort(self, dim=dim, descending=descending, stable=stable)

@abstractmethod
def sort_along_batch(
self,
descending: bool = False,
stable: bool = False,
) -> tuple[TBatchedTensor, TBatchedTensor]:
r"""Sorts the elements of the batch along the batch dimension in
monotonic order by value.

Args:
descending (bool, optional): Controls the sorting order.
If ``True``, the elements are sorted in descending
order by value. Default: ``False``
stable (bool, optional): Makes the sorting routine stable,
which guarantees that the order of equivalent elements
is preserved. Default: ``False``

Returns:
(``BaseBatchedTensor``, ``BaseBatchedTensor``): A tuple
two values:
- The first batch contains the batch values sorted
along the given dimension.
- The second batch contains the indices that sort
the batch along the given dimension.

Example usage:

.. code-block:: python

>>> import torch
>>> from redcat import BatchedTensor
>>> BatchedTensor(torch.rand(2, 5)).sort_along_batch()
(tensor([[0.2274, 0.4843, 0.4932, 0.8583, 0.9154],
[0.0101, 0.0733, 0.5018, 0.6007, 0.6589]], batch_dim=0),
tensor([[2, 3, 4, 1, 0], [4, 3, 1, 0, 2]], batch_dim=0))
"""

################################################
# Mathematical | point-wise operations #
################################################
Expand Down Expand Up @@ -1272,10 +1347,10 @@ def clamp(

Args:
min (int, float or ``None``, optional): Specifies
the lower bound. If ``min_value`` is ``None``,
the lower bound. If ``min`` is ``None``,
there is no lower bound. Default: ``None``
max (int, float or ``None``, optional): Specifies
the upper bound. If ``max_value`` is ``None``,
the upper bound. If ``max`` is ``None``,
there is no upper bound. Default: ``None``

Returns:
Expand All @@ -1302,22 +1377,22 @@ def clamp(

def clamp_(
self,
min_value: int | float | None = None,
max_value: int | float | None = None,
min: int | float | None = None, # noqa: A002
max: int | float | None = None, # noqa: A002
) -> None:
r"""Clamps all elements in ``self`` into the range ``[min_value,
max_value]``.
r"""Clamps all elements in ``self`` into the range ``[min,
max]``.

Inplace version of ``clamp``.

Note: ``min_value`` and ``max_value`` cannot be both ``None``.
Note: ``min`` and ``max`` cannot be both ``None``.

Args:
min_value (int, float or ``None``, optional): Specifies
the lower bound. If ``min_value`` is ``None``,
min (int, float or ``None``, optional): Specifies
the lower bound. If ``min`` is ``None``,
there is no lower bound. Default: ``None``
max_value (int, float or ``None``, optional): Specifies
the upper bound. If ``max_value`` is ``None``,
max (int, float or ``None``, optional): Specifies
the upper bound. If ``max`` is ``None``,
there is no upper bound. Default: ``None``

Example usage:
Expand All @@ -1327,22 +1402,22 @@ def clamp_(
>>> import torch
>>> from redcat import BatchedTensor
>>> batch = BatchedTensor(torch.arange(10).view(2, 5))
>>> batch.clamp_(min_value=2, max_value=5)
>>> batch.clamp_(min=2, max=5)
>>> batch
tensor([[2, 2, 2, 3, 4],
[5, 5, 5, 5, 5]], batch_dim=0)
>>> batch = BatchedTensor(torch.arange(10).view(2, 5))
>>> batch.clamp_(min_value=2)
>>> batch.clamp_(min=2)
>>> batch
tensor([[2, 2, 2, 3, 4],
[5, 6, 7, 8, 9]], batch_dim=0)
>>> batch = BatchedTensor(torch.arange(10).view(2, 5))
>>> batch.clamp_(max_value=7)
>>> batch.clamp_(max=7)
>>> batch
tensor([[0, 1, 2, 3, 4],
[5, 6, 7, 7, 7]], batch_dim=0)
"""
self._data.clamp_(min=min_value, max=max_value)
self._data.clamp_(min=min, max=max)

def exp(self) -> TBatchedTensor:
r"""Computes the exponential of the elements.
Expand Down Expand Up @@ -1747,6 +1822,10 @@ def sqrt_(self) -> None:
"""
self._data.sqrt_()

################################
# Reduction operations #
################################

###########################################
# Mathematical | trigo operations #
###########################################
Expand Down Expand Up @@ -2812,81 +2891,6 @@ def slice_along_dim(
data = self._data.transpose(0, dim)[start:stop:step].transpose(0, dim)
return self.__class__(data, **self._get_kwargs())

def sort(
self,
dim: int = -1,
descending: bool = False,
stable: bool = False,
) -> tuple[TBatchedTensor, TBatchedTensor]:
r"""Sorts the elements of the batch along a given dimension in monotonic
order by value.

Args:
descending (bool, optional): Controls the sorting order.
If ``True``, the elements are sorted in descending
order by value. Default: ``False``
stable (bool, optional): Makes the sorting routine stable,
which guarantees that the order of equivalent elements
is preserved. Default: ``False``

Returns:
(``BaseBatchedTensor``, ``BaseBatchedTensor``): A tuple
two values:
- The first batch contains the batch values sorted
along the given dimension.
- The second batch contains the indices that sort
the batch along the given dimension.

Example usage:

.. code-block:: python

>>> import torch
>>> from redcat import BatchedTensor
>>> BatchedTensor(torch.rand(2, 5)).sort()
(tensor([[0.2274, 0.4843, 0.4932, 0.8583, 0.9154],
[0.0101, 0.0733, 0.5018, 0.6007, 0.6589]], batch_dim=0),
tensor([[2, 3, 4, 1, 0], [4, 3, 1, 0, 2]], batch_dim=0))
"""
return torch.sort(self, dim=dim, descending=descending, stable=stable)

@abstractmethod
def sort_along_batch(
self,
descending: bool = False,
stable: bool = False,
) -> tuple[TBatchedTensor, TBatchedTensor]:
r"""Sorts the elements of the batch along the batch dimension in
monotonic order by value.

Args:
descending (bool, optional): Controls the sorting order.
If ``True``, the elements are sorted in descending
order by value. Default: ``False``
stable (bool, optional): Makes the sorting routine stable,
which guarantees that the order of equivalent elements
is preserved. Default: ``False``

Returns:
(``BaseBatchedTensor``, ``BaseBatchedTensor``): A tuple
two values:
- The first batch contains the batch values sorted
along the given dimension.
- The second batch contains the indices that sort
the batch along the given dimension.

Example usage:

.. code-block:: python

>>> import torch
>>> from redcat import BatchedTensor
>>> BatchedTensor(torch.rand(2, 5)).sort_along_batch()
(tensor([[0.2274, 0.4843, 0.4932, 0.8583, 0.9154],
[0.0101, 0.0733, 0.5018, 0.6007, 0.6589]], batch_dim=0),
tensor([[2, 3, 4, 1, 0], [4, 3, 1, 0, 2]], batch_dim=0))
"""

def split(
self, split_size_or_sections: int | Sequence[int], dim: int = 0
) -> tuple[TBatchedTensor, ...]:
Expand Down
75 changes: 67 additions & 8 deletions tests/unit/test_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1798,6 +1798,65 @@ def test_batched_tensor_shuffle_along_dim__different_random_seeds() -> None:
assert not batch1.equal(batch2)


def test_batched_tensor_seq_sort_descending_true() -> None:
values, indices = BatchedTensor(torch.tensor([[4, 1, 2, 5, 3], [9, 7, 5, 6, 8]])).sort(
descending=True
)
assert values.equal(BatchedTensor(torch.tensor([[5, 4, 3, 2, 1], [9, 8, 7, 6, 5]])))
assert indices.equal(BatchedTensor(torch.tensor([[3, 0, 4, 2, 1], [0, 4, 1, 3, 2]])))


def test_batched_tensor_seq_sort_dim_0() -> None:
values, indices = BatchedTensor(torch.tensor([[4, 9], [1, 7], [2, 5], [5, 6], [3, 8]])).sort(
dim=0
)
assert values.equal(BatchedTensor(torch.tensor([[1, 5], [2, 6], [3, 7], [4, 8], [5, 9]])))
assert indices.equal(BatchedTensor(torch.tensor([[1, 2], [2, 3], [4, 1], [0, 4], [3, 0]])))


def test_batched_tensor_seq_sort_dim_1() -> None:
values, indices = BatchedTensor(
torch.tensor(
[
[[0, 1], [-2, 3], [-4, 5], [-6, 7], [-8, 9]],
[[10, -11], [12, -13], [14, -15], [16, -17], [18, -19]],
]
)
).sort(dim=1)
assert values.equal(
BatchedTensor(
torch.tensor(
[
[[-8, 1], [-6, 3], [-4, 5], [-2, 7], [0, 9]],
[[10, -19], [12, -17], [14, -15], [16, -13], [18, -11]],
]
)
)
)
assert indices.equal(
BatchedTensor(
torch.tensor(
[
[[4, 0], [3, 1], [2, 2], [1, 3], [0, 4]],
[[0, 4], [1, 3], [2, 2], [3, 1], [4, 0]],
]
)
)
)


def test_batched_tensor_seq_sort_custom_dims() -> None:
values, indices = BatchedTensor(
torch.tensor([[4, 9], [1, 7], [2, 5], [5, 6], [3, 8]]), batch_dim=1
).sort(dim=0)
assert values.equal(
BatchedTensor(torch.tensor([[1, 5], [2, 6], [3, 7], [4, 8], [5, 9]]), batch_dim=1)
)
assert indices.equal(
BatchedTensor(torch.tensor([[1, 2], [2, 3], [4, 1], [0, 4], [3, 0]]), batch_dim=1)
)


def test_batched_tensor_sort_along_batch_descending_false() -> None:
values, indices = BatchedTensor(
torch.tensor([[4, 9], [1, 7], [2, 5], [5, 6], [3, 8]])
Expand Down Expand Up @@ -1881,15 +1940,15 @@ def test_batched_tensor_clamp() -> None:
)


def test_batched_tensor_clamp_only_max_value() -> None:
def test_batched_tensor_clamp_only_max() -> None:
assert (
BatchedTensor(torch.arange(10).view(2, 5))
.clamp(max=5)
.equal(BatchedTensor(torch.tensor([[0, 1, 2, 3, 4], [5, 5, 5, 5, 5]])))
)


def test_batched_tensor_clamp_only_min_value() -> None:
def test_batched_tensor_clamp_only_min() -> None:
assert (
BatchedTensor(torch.arange(10).view(2, 5))
.clamp(min=2)
Expand All @@ -1907,25 +1966,25 @@ def test_batched_tensor_clamp_custom_dims() -> None:

def test_batched_tensor_clamp_() -> None:
batch = BatchedTensor(torch.arange(10).view(2, 5))
batch.clamp_(min_value=2, max_value=5)
batch.clamp_(min=2, max=5)
assert batch.equal(BatchedTensor(torch.tensor([[2, 2, 2, 3, 4], [5, 5, 5, 5, 5]])))


def test_batched_tensor_clamp__only_max_value() -> None:
def test_batched_tensor_clamp__only_max() -> None:
batch = BatchedTensor(torch.arange(10).view(2, 5))
batch.clamp_(max_value=5)
batch.clamp_(max=5)
assert batch.equal(BatchedTensor(torch.tensor([[0, 1, 2, 3, 4], [5, 5, 5, 5, 5]])))


def test_batched_tensor_clamp__only_min_value() -> None:
def test_batched_tensor_clamp__only_min() -> None:
batch = BatchedTensor(torch.arange(10).view(2, 5))
batch.clamp_(min_value=2)
batch.clamp_(min=2)
assert batch.equal(BatchedTensor(torch.tensor([[2, 2, 2, 3, 4], [5, 6, 7, 8, 9]])))


def test_batched_tensor_clamp__custom_dims() -> None:
batch = BatchedTensor(torch.arange(10).view(2, 5), batch_dim=1)
batch.clamp_(min_value=2, max_value=5)
batch.clamp_(min=2, max=5)
assert batch.equal(BatchedTensor(torch.tensor([[2, 2, 2, 3, 4], [5, 5, 5, 5, 5]]), batch_dim=1))


Expand Down
Loading