helmholtz-analytics · Markus-Goetz · Jun 24, 2020 · Jun 5, 2020 · Jun 6, 2020 · Jun 7, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@
 - [#593](https://github.com/helmholtz-analytics/heat/pull/593) New feature arctan2()
 - [#594](https://github.com/helmholtz-analytics/heat/pull/594) New feature: Advanced indexing
 - [#594](https://github.com/helmholtz-analytics/heat/pull/594) Bugfix: getitem and setitem memory consumption heavily reduced
+- [#596](https://github.com/helmholtz-analytics/heat/pull/596) New feature: outer()
 
 # v0.4.0
 

diff --git a/heat/core/linalg/basics.py b/heat/core/linalg/basics.py
@@ -9,7 +9,7 @@
 from .. import manipulations
 from .. import types
 
-__all__ = ["dot", "matmul", "norm", "projection", "transpose", "tril", "triu"]
+__all__ = ["dot", "matmul", "norm", "outer", "projection", "transpose", "tril", "triu"]
 
 
 def dot(a, b, out=None):
@@ -827,6 +827,225 @@ def norm(a):
     return exponential.sqrt(d).item()
 
 
+def outer(a, b, out=None, split=None):
+    """
+    Compute the outer product of two 1-D DNDarrays.
+
+    Given two vectors, ``a`` = [a_0, a_1, ..., a_n] and ``b`` = [b_0, b_1, ..., b_m], the outer product is:
+
+    [[a_0*b_0  a_0*b_1 ... a_0*b_m]
+     [a_1*b_0     .
+     [ ...        .
+     [a_n*b_0     .        a_n*b_m]]
+
+    Parameters
+    ----------
+
+    a(n,): DNDarray
+            First input DNDarray. Must be 1-dimensional. Will be flattened by default if more than 1-D
+
+    b(m,): DNDarray
+            Second input DNDarray. Must be 1-dimensional. Will be flattened by default if more than 1-D
+
+    out(n, m): DNDarray, optional
+            A location where the result is stored
+
+    split: int, optional #TODO check out docstring format
+            Split dimension of the resulting DNDarray. Can be 0, 1, or None.
+            This is only relevant if the calculations are memory-distributed,
+            in which case default is ``split=0`` (see Note).
+
+    Note: parallel implementation of outer product, arrays are dense. #TODO sparse #384
+        In the classical (dense) case, one DNDarray stays put, the other one is passed around the ranks in
+        ring communication. The slice-by-slice outer product is calculated locally (here via torch.einsum()).
+        N.B.: if ``b`` is sent around, the resulting outer product is split along the rows dimension (``split = 0``).
+              if ``a`` is sent around, the resulting outer product is split along the columns (``split = 1``).
+        So if ``split`` is not None, ``split`` defines which DNDarray stays put and which one is passed around. No
+        communication is needed beyond ring communication of one of the DNDarrays.
+        If ``split`` is None or unspecified, the result will be distributed along axis 0, i.e. by default ``b`` is
+        passed around, ``a`` stays put.
+
+    Returns
+    -------
+
+    out(n, m): DNDarray
+
+        out[i, j] = a[i] * b[j]
+
+    Examples
+    --------
+    >>> a = ht.arange(4)
+    >>> b = ht.arange(3)
+    >>> ht.outer(a, b)
+    (3 processes)
+    (0/3)   tensor([[0, 0, 0],
+                    [0, 1, 2],
+                    [0, 2, 4],
+                    [0, 3, 6]], dtype=torch.int32)
+    (1/3)   tensor([[0, 0, 0],
+                    [0, 1, 2],
+                    [0, 2, 4],
+                    [0, 3, 6]], dtype=torch.int32)
+    (2/3)   tensor([[0, 0, 0],
+                    [0, 1, 2],
+                    [0, 2, 4],
+                    [0, 3, 6]], dtype=torch.int32)
+    >>> a = ht.arange(4, split=0)
+    >>> b = ht.arange(3, split=0)
+    >>> ht.outer(a, b)
+    (0/3)   tensor([[0, 0, 0],
+                    [0, 1, 2]], dtype=torch.int32)
+    (1/3)   tensor([[0, 2, 4]], dtype=torch.int32)
+    (2/3)   tensor([[0, 3, 6]], dtype=torch.int32)
+    >>> ht.outer(a, b, split=1)
+    (0/3)   tensor([[0],
+                    [0],
+                    [0],
+                    [0]], dtype=torch.int32)
+    (1/3)   tensor([[0],
+                    [1],
+                    [2],
+                    [3]], dtype=torch.int32)
+    (2/3)   tensor([[0],
+                    [2],
+                    [4],
+                    [6]], dtype=torch.int32)
+    >>> a = ht.arange(5, dtype=ht.float32, split=0)
+    >>> b = ht.arange(4, dtype=ht.float64, split=0)
+    >>> out = ht.empty((5,4), dtype=ht.float64, split=1)
+    >>> ht.outer(a, b, split=1, out=out)
+    >>> out
+    (0/3)   tensor([[0., 0.],
+                    [0., 1.],
+                    [0., 2.],
+                    [0., 3.],
+                    [0., 4.]], dtype=torch.float64)
+    (1/3)   tensor([[0.],
+                    [2.],
+                    [4.],
+                    [6.],
+                    [8.]], dtype=torch.float64)
+    (2/3)   tensor([[ 0.],
+                    [ 3.],
+                    [ 6.],
+                    [ 9.],
+                    [12.]], dtype=torch.float64)
+    """
+    # sanitize input
+    if not isinstance(a, dndarray.DNDarray) or not isinstance(b, dndarray.DNDarray):
+        raise TypeError(
+            "a, b must be of type ht.DNDarray, but were {}, {}".format(type(a), type(b))
+        )
+
+    # sanitize dimensions
+    # TODO move to sanitation module #468
+    if a.ndim > 1:
+        a = manipulations.flatten(a)
+    if b.ndim > 1:
+        b = manipulations.flatten(b)
+    if a.ndim == 0 or b.ndim == 0:
+        raise RuntimeError(
+            "a, b must be 1-D DNDarrays, but were {}-D and {}-D".format(a.ndim, b.ndim)
+        )
+
+    outer_gshape = (a.gshape[0], b.gshape[0])
+    t_a = a._DNDarray__array
+    t_b = b._DNDarray__array
+    t_outer_dtype = torch.promote_types(t_a.dtype, t_b.dtype)
+    t_a, t_b = t_a.type(t_outer_dtype), t_b.type(t_outer_dtype)
+    outer_dtype = types.canonical_heat_type(t_outer_dtype)
+
+    if out is not None:
+        if not isinstance(out, dndarray.DNDarray):
+            raise TypeError("out must be of type ht.DNDarray, was {}".format(type(out)))
+        if out.dtype is not outer_dtype:
+            raise TypeError(
+                "Wrong datatype for out: expected {}, got {}".format(outer_dtype, out.dtype)
+            )
+        if out.gshape != outer_gshape:
+            raise ValueError("out must have shape {}, got {}".format(outer_gshape, out.gshape))
+        if out.split is not split:
+            raise ValueError(
+                "Split dimension mismatch for out: expected {}, got {}".format(split, out.split)
+            )
+
+    # distributed outer product (dense, TODO: implement sparse version, #384)
+    if a.comm.is_distributed() and split is not None or a.split is not None or b.split is not None:
+        # MPI coordinates
+        rank = a.comm.rank
+        size = a.comm.size
+        t_outer_slice = 2 * [slice(None, None, None)]
+
+        if a.split is None:
+            a.resplit_(axis=0)
+            t_a = a._DNDarray__array.type(t_outer_dtype)
+        if b.split is None:
+            b.resplit_(axis=0)
+            t_b = b._DNDarray__array.type(t_outer_dtype)
+        if split is None:
+            # Split semantics: default out.split = a.split
+            split = a.split
+            if out is not None and out.split is None:
+                out.resplit_(axis=split)
+
+        # calculate local slice of outer product
+        if split == 0:
+            lshape_map = b.create_lshape_map()
+            t_outer_shape = (a.lshape[0], b.gshape[0])
+            _, _, local_slice = b.comm.chunk(b.gshape, b.split)
+            t_outer_slice[1] = local_slice[0]
+        elif split == 1:
+            lshape_map = a.create_lshape_map()
+            t_outer_shape = (a.gshape[0], b.lshape[0])
+            _, _, local_slice = a.comm.chunk(a.gshape, a.split)
+            t_outer_slice[0] = local_slice[0]
+        t_outer = torch.zeros(t_outer_shape, dtype=t_outer_dtype, device=t_a.device)
+        if lshape_map[rank] != 0:
+            t_outer[t_outer_slice] = torch.einsum("i,j->ij", t_a, t_b)
+
+        # Ring: fill in missing slices of outer product
+        for p in range(size - 1):
+            # prepare for sending
+            dest_rank = rank + 1 if rank != size - 1 else 0
+            # prepare for receiving
+            origin_rank = rank - 1 if rank != 0 else size - 1
+            actual_origin = origin_rank - p
+            if origin_rank < p:
+                actual_origin += size
+            # blocking send and recv
+            if split == 0:
+                b.comm.Send(t_b, dest_rank)
+                t_b = torch.empty(lshape_map[actual_origin], dtype=t_outer_dtype, device=t_a.device)
+                b.comm.Recv(t_b, origin_rank)
+                _, _, remote_slice = b.comm.chunk(
+                    b.gshape, b.split, rank=actual_origin, w_size=size
+                )
+                t_outer_slice[1] = remote_slice[0]
+            elif split == 1:
+                a.comm.Send(t_a, dest_rank)
+                t_a = torch.empty(lshape_map[actual_origin], dtype=t_outer_dtype, device=t_a.device)
+                a.comm.Recv(t_a, origin_rank)
+                _, _, remote_slice = a.comm.chunk(
+                    a.gshape, a.split, rank=actual_origin, w_size=size
+                )
+                t_outer_slice[0] = remote_slice[0]
+            t_outer[t_outer_slice] = torch.einsum("i,j->ij", t_a, t_b)
+    else:
+        # outer product, all local
+        t_outer = torch.einsum("i,j->ij", t_a, t_b)
+        split = None
+
+    outer = dndarray.DNDarray(
+        t_outer, gshape=outer_gshape, dtype=outer_dtype, split=split, device=a.device, comm=a.comm
+    )
+
+    if out is not None:
+        out._DNDarray__array = outer._DNDarray__array
+        return out
+
+    return outer
+
+
 def projection(a, b):
     """
     Projection of vector a onto vector b

diff --git a/heat/core/linalg/tests/test_basics.py b/heat/core/linalg/tests/test_basics.py
@@ -479,6 +479,81 @@ def test_norm(self):
             c = np.arange(9) - 4
             ht.linalg.norm(c)
 
+    def test_outer(self):
+        # test outer, a and b local, different dtypes
+        a = ht.arange(3, dtype=ht.int32)
+        b = ht.arange(8, dtype=ht.float32)
+        ht_outer = ht.outer(a, b, split=None)
+        np_outer = np.outer(a.numpy(), b.numpy())
+        t_outer = torch.einsum("i,j->ij", a._DNDarray__array, b._DNDarray__array)
+        self.assertTrue((ht_outer.numpy() == np_outer).all())
+        self.assertTrue(ht_outer._DNDarray__array.dtype is t_outer.dtype)
+
+        # test outer, a and b distributed, no data on some ranks
+        a_split = ht.arange(3, dtype=ht.float32, split=0)
+        b_split = ht.arange(8, dtype=ht.float32, split=0)
+        ht_outer_split = ht.outer(a_split, b_split, split=None)
+
+        # a and b split 0, outer split 1
+        ht_outer_split = ht.outer(a_split, b_split, split=1)
+        self.assertTrue((ht_outer_split.numpy() == np_outer).all())
+        self.assertTrue(ht_outer_split.split == 1)
+
+        # a and b distributed, outer split unspecified
+        ht_outer_split = ht.outer(a_split, b_split, split=None)
+        self.assertTrue((ht_outer_split.numpy() == np_outer).all())
+        self.assertTrue(ht_outer_split.split == 0)
+
+        # a not distributed, outer.split = 1
+        ht_outer_split = ht.outer(a, b_split, split=1)
+        self.assertTrue((ht_outer_split.numpy() == np_outer).all())
+        self.assertTrue(ht_outer_split.split == 1)
+
+        # b not distributed, outer.split = 0
+        ht_outer_split = ht.outer(a_split, b, split=0)
+        self.assertTrue((ht_outer_split.numpy() == np_outer).all())
+        self.assertTrue(ht_outer_split.split == 0)
+
+        # a_split.ndim > 1 and a.split != 0
+        a_split_3d = ht.random.randn(3, 3, 3, dtype=ht.float64, split=2)
+        ht_outer_split = ht.outer(a_split_3d, b_split)
+        np_outer_3d = np.outer(a_split_3d.numpy(), b_split.numpy())
+        self.assertTrue((ht_outer_split.numpy() == np_outer_3d).all())
+        self.assertTrue(ht_outer_split.split == 0)
+
+        # write to out buffer
+        ht_out = ht.empty((a.gshape[0], b.gshape[0]), dtype=ht.float32)
+        ht.outer(a, b, out=ht_out)
+        self.assertTrue((ht_out.numpy() == np_outer).all())
+        ht_out_split = ht.empty((a_split.gshape[0], b_split.gshape[0]), dtype=ht.float32, split=1)
+        ht.outer(a_split, b_split, out=ht_out_split, split=1)
+        self.assertTrue((ht_out_split.numpy() == np_outer).all())
+
+        # test exceptions
+        t_a = torch.arange(3)
+        with self.assertRaises(TypeError):
+            ht.outer(t_a, b)
+        np_b = np.arange(8)
+        with self.assertRaises(TypeError):
+            ht.outer(a, np_b)
+        a_0d = ht.array(2.3)
+        with self.assertRaises(RuntimeError):
+            ht.outer(a_0d, b)
+        t_out = torch.empty((a.gshape[0], b.gshape[0]), dtype=torch.float32)
+        with self.assertRaises(TypeError):
+            ht.outer(a, b, out=t_out)
+        ht_out_wrong_dtype = ht.empty((a.gshape[0], b.gshape[0]), dtype=ht.float64)
+        with self.assertRaises(TypeError):
+            ht.outer(a, b, out=ht_out_wrong_dtype)
+        ht_out_wrong_shape = ht.empty((7, b.gshape[0]), dtype=ht.float32)
+        with self.assertRaises(ValueError):
+            ht.outer(a, b, out=ht_out_wrong_shape)
+        ht_out_wrong_split = ht.empty(
+            (a_split.gshape[0], b_split.gshape[0]), dtype=ht.float32, split=1
+        )
+        with self.assertRaises(ValueError):
+            ht.outer(a_split, b_split, out=ht_out_wrong_split, split=0)
+
     def test_projection(self):
         a = ht.arange(1, 4, dtype=ht.float32, split=None)
         e1 = ht.array([1, 0, 0], dtype=ht.float32, split=None)