Skip to content

Commit

Permalink
support contraction of arbitrary objects
Browse files Browse the repository at this point in the history
  • Loading branch information
jcmgray committed Jul 2, 2020
1 parent e88983f commit dc23938
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 0 deletions.
44 changes: 44 additions & 0 deletions docs/source/backends.rst
Original file line number Diff line number Diff line change
Expand Up @@ -325,3 +325,47 @@ so again just the ``backend`` keyword needs to be supplied:
[-462.52362 , -121.12659 , -67.84769 , 624.5455 ],
[ 5.2839584, 36.44155 , 81.62852 , 703.15784 ]],
dtype=float32)
Contracting arbitrary objects
=============================

There is one more explicit backend that can handle arbitrary arrays of objects,
so long the *objects themselves* just support multiplication and addition (
``__mul__`` and ``__add__`` dunder methods respectively). Use it by supplying
``backend='object'``.

For example imagine we want to perform a contraction of arrays made up of
`sympy <www.sympy.org>`_ symbols:

.. code-block:: python
>>> import opt_einsum as oe
>>> import numpy as np
>>> import sympy
>>> # define the symbols
>>> a, b, c, d, e, f, g, h, i, j, k, l = [sympy.symbols(oe.get_symbol(i)) for i in range(12)]
>>> a * b + c * d
𝑎𝑏+𝑐𝑑
>>> # define the tensors (you might explicitly specify ``dtype=object``)
>>> X = np.array([[a, b], [c, d]])
>>> Y = np.array([[e, f], [g, h]])
>>> Z = np.array([[i, j], [k, l]])
>>> # contract the tensors!
>>> oe.contract('uv,vw,wu->u', X, Y, Z, backend='object')
array([i*(a*e + b*g) + k*(a*f + b*h), j*(c*e + d*g) + l*(c*f + d*h)],
dtype=object)
There's a few things to note here:

* The returned array is a ``numpy.ndarray`` but since it has ``dtype=object``
it can really hold *any* python objects
* We had to explicitly use ``backend='object'``, since :func:`numpy.einsum`
would have otherwise been dispatched to, which can't handle ``dtype=object``
(though :func:`numpy.tensordot` in fact can)
* Although an optimized pairwise contraction order is used, the looping in each
single contraction is **performed in python so performance will be
drastically lower than for numeric dtypes!**
5 changes: 5 additions & 0 deletions opt_einsum/backends/dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import numpy

from . import objects
from . import cupy as _cupy
from . import jax as _jax
from . import tensorflow as _tensorflow
Expand Down Expand Up @@ -49,6 +50,10 @@ def _import_func(func, backend, default=None):
('tensordot', 'numpy'): numpy.tensordot,
('transpose', 'numpy'): numpy.transpose,
('einsum', 'numpy'): numpy.einsum,
# also pre-populate with the arbitrary object backend
('tensordot', 'object'): numpy.tensordot,
('transpose', 'object'): numpy.transpose,
('einsum', 'object'): objects.oinsum,
}


Expand Down
60 changes: 60 additions & 0 deletions opt_einsum/backends/objects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""
Functions for performing contractions with array elements which are objects.
"""

import numpy as np
import functools
import operator


def oinsum(eq, *arrays):
"""A ``einsum`` implementation for ``numpy`` arrays with object dtype.
The loop is performed in python, meaning that the objects themselves need
only to implement ``__mul__`` and ``__add__`` for the contraction to be
computed. This may be useful when, for example, computing expressions of
tensors with symbolic elements, but note it will be very slow when compared
to ``numpy.einsum`` and numeric data types!
Parameters
----------
eq : str
The contraction string, should specify output.
arrays : sequence of arrays
These can be any indexable arrays as long as addition and
multiplication is defined on the elements.
Returns
-------
out : numpy.ndarray
The output tensor, with ``dtype=object``.
"""

# when called by ``opt_einsum`` we will always be given a full eq
lhs, output = eq.split('->')
inputs = lhs.split(',')

sizes = {}
for term, array in zip(inputs, arrays):
for k, d in zip(term, array.shape):
sizes[k] = d

out_size = tuple(sizes[k] for k in output)
out = np.empty(out_size, dtype=object)

inner = tuple(k for k in sizes if k not in output)
inner_size = tuple(sizes[k] for k in inner)

for coo_o in np.ndindex(*out_size):

coord = dict(zip(output, coo_o))

def gen_inner_sum():
for coo_i in np.ndindex(*inner_size):
coord.update(dict(zip(inner, coo_i)))
locs = (tuple(coord[k] for k in term) for term in inputs)
elements = (array[loc] for array, loc in zip(arrays, locs))
yield functools.reduce(operator.mul, elements)

out[coo_o] = functools.reduce(operator.add, gen_inner_sum())

return out
22 changes: 22 additions & 0 deletions opt_einsum/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,3 +431,25 @@ def test_auto_backend_custom_array_no_tensordot():
# Shaped is an array-like object defined by opt_einsum - which has no TDOT
assert infer_backend(x) == 'opt_einsum'
assert parse_backend([x], 'auto') == 'numpy'


@pytest.mark.parametrize("string", tests)
def test_objects(string):
views = helpers.build_views(string)
ein = contract(string, *views, optimize=False, use_blas=False)
assert ein.dtype != object

shps = [v.shape for v in views]
expr = contract_expression(string, *shps, optimize=True)

obj_views = [view.astype(object) for view in views]

# try raw contract
obj_opt = contract(string, *obj_views, backend='object')
assert obj_opt.dtype == object
assert np.allclose(ein, obj_opt.astype(float))

# test expression
obj_opt = expr(*obj_views, backend='object')
assert obj_opt.dtype == object
assert np.allclose(ein, obj_opt.astype(float))

0 comments on commit dc23938

Please sign in to comment.