ENH: a simple cov

lucascolley · lucascolley · commit 2edc2ce6fea9 · 2024-10-24T12:03:20.000+01:00
TST: cov: add another test

DOC: cov, mean: WIP docstrings

API: make `mean` private

DOC: cov: finish docstring
diff --git a/docs/api-reference.md b/docs/api-reference.md
@@ -7,6 +7,7 @@
     :toctree: generated
 
     atleast_nd
+    cov
     expand_dims
     kron
 ```
diff --git a/src/array_api_extra/__init__.py b/src/array_api_extra/__init__.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
-from ._funcs import atleast_nd, expand_dims, kron
+from ._funcs import atleast_nd, cov, expand_dims, kron
 
 __version__ = "0.1.2.dev0"
 
-__all__ = ["__version__", "atleast_nd", "expand_dims", "kron"]
+__all__ = ["__version__", "atleast_nd", "cov", "expand_dims", "kron"]
diff --git a/src/array_api_extra/_funcs.py b/src/array_api_extra/_funcs.py
@@ -1,11 +1,12 @@
 from __future__ import annotations
 
+import warnings
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from ._typing import Array, ModuleType
 
-__all__ = ["atleast_nd", "expand_dims", "kron"]
+__all__ = ["atleast_nd", "cov", "expand_dims", "kron"]
 
 
 def atleast_nd(x: Array, /, *, ndim: int, xp: ModuleType) -> Array:
@@ -48,6 +49,117 @@ def atleast_nd(x: Array, /, *, ndim: int, xp: ModuleType) -> Array:
     return x
 
 
+def cov(m: Array, /, *, xp: ModuleType) -> Array:
+    """
+    Estimate a covariance matrix.
+
+    Covariance indicates the level to which two variables vary together.
+    If we examine N-dimensional samples, :math:`X = [x_1, x_2, ... x_N]^T`,
+    then the covariance matrix element :math:`C_{ij}` is the covariance of
+    :math:`x_i` and :math:`x_j`. The element :math:`C_{ii}` is the variance
+    of :math:`x_i`.
+
+    This provides a subset of the functionality of ``numpy.cov``.
+
+    Parameters
+    ----------
+    m : array
+        A 1-D or 2-D array containing multiple variables and observations.
+        Each row of `m` represents a variable, and each column a single
+        observation of all those variables.
+    xp : array_namespace
+        The standard-compatible namespace for `m`.
+
+    Returns
+    -------
+    res : array
+        The covariance matrix of the variables.
+
+    Examples
+    --------
+    >>> import array_api_strict as xp
+    >>> import array_api_extra as xpx
+
+    Consider two variables, :math:`x_0` and :math:`x_1`, which
+    correlate perfectly, but in opposite directions:
+
+    >>> x = xp.asarray([[0, 2], [1, 1], [2, 0]]).T
+    >>> x
+    Array([[0, 1, 2],
+           [2, 1, 0]], dtype=array_api_strict.int64)
+
+    Note how :math:`x_0` increases while :math:`x_1` decreases. The covariance
+    matrix shows this clearly:
+
+    >>> xpx.cov(x, xp=xp)
+    Array([[ 1., -1.],
+           [-1.,  1.]], dtype=array_api_strict.float64)
+
+
+    Note that element :math:`C_{0,1}`, which shows the correlation between
+    :math:`x_0` and :math:`x_1`, is negative.
+
+    Further, note how `x` and `y` are combined:
+
+    >>> x = xp.asarray([-2.1, -1,  4.3])
+    >>> y = xp.asarray([3,  1.1,  0.12])
+    >>> X = xp.stack((x, y), axis=0)
+    >>> xpx.cov(X, xp=xp)
+    Array([[11.71      , -4.286     ],
+           [-4.286     ,  2.14413333]], dtype=array_api_strict.float64)
+
+    >>> xpx.cov(x, xp=xp)
+    Array(11.71, dtype=array_api_strict.float64)
+
+    >>> xpx.cov(y, xp=xp)
+    Array(2.14413333, dtype=array_api_strict.float64)
+
+    """
+    m = xp.asarray(m, copy=True)
+    dtype = (
+        xp.float64 if xp.isdtype(m.dtype, "integral") else xp.result_type(m, xp.float64)
+    )
+
+    m = atleast_nd(m, ndim=2, xp=xp)
+    m = xp.astype(m, dtype)
+
+    avg = _mean(m, axis=1, xp=xp)
+    fact = m.shape[1] - 1
+
+    if fact <= 0:
+        warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning, stacklevel=2)
+        fact = 0.0
+
+    m -= avg[:, None]
+    m_transpose = m.T
+    if xp.isdtype(m_transpose.dtype, "complex floating"):
+        m_transpose = xp.conj(m_transpose)
+    c = m @ m_transpose
+    c /= fact
+    axes = tuple(axis for axis, length in enumerate(c.shape) if length == 1)
+    return xp.squeeze(c, axis=axes)
+
+
+def _mean(
+    x: Array,
+    /,
+    *,
+    axis: int | tuple[int, ...] | None = None,
+    keepdims: bool = False,
+    xp: ModuleType,
+) -> Array:
+    """
+    Complex mean, https://github.com/data-apis/array-api/issues/846.
+    """
+    if xp.isdtype(x.dtype, "complex floating"):
+        x_real = xp.real(x)
+        x_imag = xp.imag(x)
+        mean_real = xp.mean(x_real, axis=axis, keepdims=keepdims)
+        mean_imag = xp.mean(x_imag, axis=axis, keepdims=keepdims)
+        return mean_real + (mean_imag * xp.asarray(1j))
+    return xp.mean(x, axis=axis, keepdims=keepdims)
+
+
 def expand_dims(
     a: Array, /, *, axis: int | tuple[int, ...] = (0,), xp: ModuleType
 ) -> Array:
diff --git a/tests/test_funcs.py b/tests/test_funcs.py
@@ -1,14 +1,15 @@
 from __future__ import annotations
 
 import contextlib
+import warnings
 from typing import TYPE_CHECKING, Any
 
 # array-api-strict#6
 import array_api_strict as xp  # type: ignore[import-untyped]
 import pytest
-from numpy.testing import assert_array_equal, assert_equal
+from numpy.testing import assert_allclose, assert_array_equal, assert_equal
 
-from array_api_extra import atleast_nd, expand_dims, kron
+from array_api_extra import atleast_nd, cov, expand_dims, kron
 
 if TYPE_CHECKING:
     Array = Any  # To be changed to a Protocol later (see array-api#589)
@@ -76,6 +77,41 @@ def test_5D(self):
         assert_array_equal(y, xp.ones((1, 1, 1, 1, 1, 1, 1, 1, 1)))
 
 
+class TestCov:
+    def test_basic(self):
+        assert_allclose(
+            cov(xp.asarray([[0, 2], [1, 1], [2, 0]]).T, xp=xp),
+            xp.asarray([[1.0, -1.0], [-1.0, 1.0]]),
+        )
+
+    def test_complex(self):
+        x = xp.asarray([[1, 2, 3], [1j, 2j, 3j]])
+        res = xp.asarray([[1.0, -1.0j], [1.0j, 1.0]])
+        assert_allclose(cov(x, xp=xp), res)
+
+    def test_empty(self):
+        with warnings.catch_warnings(record=True):
+            warnings.simplefilter("always", RuntimeWarning)
+            assert_array_equal(cov(xp.asarray([]), xp=xp), xp.nan)
+            assert_array_equal(
+                cov(xp.reshape(xp.asarray([]), (0, 2)), xp=xp),
+                xp.reshape(xp.asarray([]), (0, 0)),
+            )
+            assert_array_equal(
+                cov(xp.reshape(xp.asarray([]), (2, 0)), xp=xp),
+                xp.asarray([[xp.nan, xp.nan], [xp.nan, xp.nan]]),
+            )
+
+    def test_combination(self):
+        x = xp.asarray([-2.1, -1, 4.3])
+        y = xp.asarray([3, 1.1, 0.12])
+        X = xp.stack((x, y), axis=0)
+        desired = xp.asarray([[11.71, -4.286], [-4.286, 2.144133]])
+        assert_allclose(cov(X, xp=xp), desired, rtol=1e-6)
+        assert_allclose(cov(x, xp=xp), xp.asarray(11.71))
+        assert_allclose(cov(y, xp=xp), xp.asarray(2.144133), rtol=1e-6)
+
+
 class TestKron:
     def test_basic(self):
         # Using 0-dimensional array