diff --git a/example/numpy/demo.ipynb b/example/numpy/demo.ipynb new file mode 100644 index 000000000000..31c13e97e3dd --- /dev/null +++ b/example/numpy/demo.ipynb @@ -0,0 +1,441 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fundamentals of MXNet-NumPy Module\n", + "\n", + "## Namespaces for Imperative Programming\n", + "- `mxnet.numpy`: Regular NumPy operators\n", + "- `mxnet.numpy.random`: NumPy random operators\n", + "- `mxnet.numpy.linalg`: NumPy linear algebra operators\n", + "- `mxnet.numpy_extension`: Operators implemented in MXNet that do not exist in the official NumPy and some utils (e.g. context related functions).\n", + "\n", + "## Operator Namespaces for Gluon\n", + "`F` can be either `mxnet.ndarray` or `mxnet.symbol`. Note that `np` and `npe` are aliases of `numpy` and `numpy_extension`, respectively.\n", + "- `F.np`: Regular NumPy operators\n", + "- `F.np.random`: NumPy random operators\n", + "- `F.np.linalg`: NumPy linear algebra operators\n", + "- `F.npe`: Operators implemented in MXNet that do not exist in official NumPy\n", + "\n", + "## New `ndarray` and `symbol`\n", + "`mxnet.numpy.ndarray` (visible to users) and `mxnet.symbol.numpy._Symbol` (not directly visible to users)\n", + "- Same name as in the official NumPy package\n", + "- Dispatch convience fluent method calls to MXNet Numpy operators\n", + "- Override many convenience fluent methods that do not exist in the official NumPy ndarray\n", + "- Make the behavior of built-in methods consistent with the official NumPy\n", + " - Indexing: `__getitem__` and `__setitem__`\n", + " - Many binary element-wise with broadcasting, not supported in `mxnet.symbol.Symbol`\n", + " \n", + "## User Experience of Module Importing (In Progress)\n", + "**Legacy**\n", + "```python\n", + "import mxnet as mx\n", + "from mxnet import gluon\n", + "```\n", + "**Numpy**\n", + "```python\n", + "from mxnet import np, npe, gluon\n", + "```\n", + "\n", + " \n", + "## MXNet NumPy in Action\n", + "### Scalar and zero-size tensors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import mxnet as mx\n", + "from mxnet import numpy as np\n", + "\n", + "# create a scalar tensor\n", + "x = np.array(3.14)\n", + "print(x) # x is actually an ndarray, but a scalar value will be printed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s = x.item() # copy the element from the scalar tensor to a python scalar\n", + "print('s = {}'.format(str(s)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create a scalar tensors with only one element 1.0\n", + "y = np.ones(())\n", + "print(y)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create a zero-size tensor\n", + "x = np.ones((5, 4, 0, 6))\n", + "print(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# transpose the zero-size tensor\n", + "y = np.transpose(x)\n", + "print(y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Conversion between classic and numpy ndarrays" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create a classic MXNet NDArray\n", + "x = mx.nd.random.uniform(shape=(2, 3))\n", + "print(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# convert classic NDArray type to mxnet.numpy.ndarray with zero-copy\n", + "y = x.as_np_ndarray()\n", + "print(y)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# changing y's content changes x's content too\n", + "y[:] = 1\n", + "print(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# convert mxnet.numpy.ndarray to classic NDArray with zero-copy\n", + "z = y.as_classic_ndarray()\n", + "print(z)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# changing z's content changes y's content too\n", + "z[:] = 2\n", + "print(y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### There is a line between classic operators and numpy operators...\n", + "- Numpy operators can only accept numpy `ndarray`s/`_Symbol`s as inputs\n", + "- Classic operators can only accept classic `NDArray`s/`Symbol`s as inputs\n", + "- Explicit conversions must be performed if users want to leverage operators on both sides\n", + "- The layer inheriting from `HybridBlock` must have the same type of outputs, i.e., either all classic `NDArray`s or all numpy `ndarray`s, before hybridization\n", + "\n", + "#### Imperative" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "a = mx.nd.ones((2, 3)) # create a classic NDArray\n", + "print(a)\n", + "out = np.sum(a) # feeding it to a numpy operator would result in failure" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "b = a.as_np_ndarray() # convert `a` to a numpy ndarray sharing the same data memory\n", + "print(b)\n", + "out = np.sum(b) # feed the numpy ndarray to a numpy operator\n", + "print('np.sum(b) =', out)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "out = mx.nd.sum(b) # feeding `b` to a classic operator would reuslt in failure" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c = b.as_classic_ndarray() # convert `b` to a classic ndarray\n", + "out = mx.nd.sum(c) # feed the classic ndarray to a classic operator\n", + "print('mx.nd.sum(c) =', str(out))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Gluon" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from mxnet import gluon\n", + "class TestMultipleOutputs(gluon.HybridBlock):\n", + " def hybrid_forward(self, F, x):\n", + " ret1 = F.sum(x) # a classic operator produces a classic NDArray\n", + " ret2 = F.np.sum(x) # a numpy operator produces a numpy NDArray\n", + " return ret1, ret2\n", + "\n", + "net = TestMultipleOutputs()\n", + "net.hybridize()\n", + "out = net(a) # `a` is a classic NDArray and will cause an error on `F.np.sum` which is a numpy operator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "net = TestMultipleOutputs() # redefine a net with no pre-built graph\n", + "net.hybridize()\n", + "out = net(b) # `b` is a numpy ndarray and will cause an error on `F.sum` which is a classic operator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class TestMultipleOutputs2(gluon.HybridBlock):\n", + " def hybrid_forward(self, F, x): # x is known to be a numpy ndarray\n", + " ret1 = F.sum(x.as_classic_ndarray()) # a classic operator produces a classic NDArray\n", + " ret2 = F.np.sum() # a numpy operator produces a numpy NDArray\n", + " return ret1, ret2 # two outputs of the layer with different types would result in failure in building the graph\n", + "\n", + "net = TestMultipleOutputs2()\n", + "net.hybridize()\n", + "out = net(b)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class TestMultipleOutputs3(gluon.HybridBlock):\n", + " def hybrid_forward(self, F, x): # x is known to be a numpy ndarray\n", + " ret1 = F.sum(x.as_classic_ndarray()) # a classic operator produces a classic NDArray\n", + " ret2 = F.np.sum(x) # a numpy operator produces a numpy NDArray\n", + " return ret1.as_np_ndarray(), ret2 # two outputs of the layer with different types would result in failure in building the graph\n", + "\n", + "net = TestMultipleOutputs3()\n", + "net.hybridize()\n", + "out = net(b)\n", + "print('classic operator output: ', out[0])\n", + "print('numpy operator output: ', out[1])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Binary element-wise operations with broadcasting in new and old symbols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class TestBinaryBroadcast(gluon.HybridBlock):\n", + " def hybrid_forward(self, F, x1, x2):\n", + " print(\"x1 type in hybrid_forward:\", str(type(x1)))\n", + " print(\"x2 type in hybrid_forward:\", str(type(x2)))\n", + " return x1 + x2\n", + "\n", + "net = TestBinaryBroadcast()\n", + "x1 = mx.nd.ones((2, 1))\n", + "x2 = mx.nd.ones((1, 3))\n", + "print('x1 input tensor type: ', str(type(x1)))\n", + "print('x2 input tensor type: ', str(type(x2)))\n", + "out = net(x1, x2) # ok: imperative execution supports broadcasting\n", + "print(out)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "net.hybridize() # mark the block for execution using a computational graph\n", + "try:\n", + " out = net(x1, x2) # error: old symbol `+` operation does not support broadcasting\n", + " assert False # should not reach here\n", + "except mx.MXNetError:\n", + " print(\"ERROR: cannot perform broadcast add for two symbols of type mx.sym.Symbol\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "net = TestBinaryBroadcast() # redefine a net to clear the pre-built graph cache\n", + "net.hybridize()\n", + "\n", + "x1 = x1.as_np_ndarray() # convert x1 to np.ndarray\n", + "x2 = x2.as_np_ndarray() # convert x2 to np.ndarray\n", + "print('x1 input tensor type: ', str(type(x1)))\n", + "print('x2 input tensor type: ', str(type(x2)))\n", + "out = net(x1, x2) # ok: a graph is built with numpy symbols which supports broadcasting, because inputs are np.ndarray's, \n", + "print(out)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## A Simple Linear Regression Model\n", + "Let's consider a simple linear regression model as the following.\n", + "Given dataset `{x, y}`, where `x`s represent input examples and `y`s represent observed data, find the parameters `w1` and `w2` for the following model.\n", + "```\n", + "y_pred = np.dot(np.maximum(np.dot(x, w1), 0), w2)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import mxnet as mx\n", + "from mxnet import gluon, autograd, np\n", + "\n", + "\n", + "@np.use_np\n", + "class LinearRegression(gluon.HybridBlock):\n", + " def __init__(self, num_input_dim=1000, num_hidden_dim=100, num_output_dim=10):\n", + " super(LinearRegression, self).__init__()\n", + " with self.name_scope():\n", + " self.w1 = self.params.get('w1', shape=(num_input_dim, num_hidden_dim),\n", + " allow_deferred_init=True)\n", + " self.w2 = self.params.get('w2', shape=(num_hidden_dim, num_output_dim),\n", + " allow_deferred_init=True)\n", + "\n", + " def hybrid_forward(self, F, x, w1, w2):\n", + " h = x.dot(w1) # equivalent to F.np.dot(x, w1)\n", + " h_relu = F.npe.relu(h) # equivalent to F.relu(h) but generating np.ndarray\n", + " y_pred = h_relu.dot(w2) # equivalent to F.np.dot(h_relu, w2)\n", + " return y_pred\n", + "\n", + "\n", + "class TotalLoss(gluon.HybridBlock):\n", + " def hybrid_forward(self, F, pred, label):\n", + " return ((pred - label) ** 2).sum() # equivalent to F.np.sum(F.np.square(pred - label))\n", + "\n", + "\n", + "regressor = LinearRegression()\n", + "regressor.initialize(mx.init.Normal())\n", + "regressor.hybridize()\n", + "\n", + "# Create random input and output data\n", + "x = mx.nd.random.normal(shape=(64, 1000)).as_np_ndarray() # x is of type mxnet.numpy.ndarray\n", + "y = mx.nd.random.normal(shape=(64, 10)).as_np_ndarray() # y is of type mxnet.numpy.ndarray\n", + "\n", + "total_loss = TotalLoss()\n", + "trainer = gluon.Trainer(regressor.collect_params(),\n", + " 'sgd',\n", + " {'learning_rate': 1e-3, 'momentum': 0.9, 'allow_np': True})\n", + "\n", + "for t in range(50):\n", + " with autograd.record():\n", + " output = regressor(x) # output is a type of np.ndarray because np.dot is the last op in the network\n", + " loss = total_loss(output, y) # loss is a scalar np.ndarray\n", + " loss.backward()\n", + " print(t, loss) # note that loss.asnumpy() is called\n", + " trainer.step(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/example/numpy/numpy_semantics.ipynb b/example/numpy/numpy_semantics.ipynb new file mode 100644 index 000000000000..1cec51f95bfd --- /dev/null +++ b/example/numpy/numpy_semantics.ipynb @@ -0,0 +1,308 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to Use NumPy Semantics in MXNet with `mxnet.numpy` Module\n", + "\n", + "## NumPy Shape Semantics\n", + "\n", + "### Example \n", + "\n", + "| Shape Example | MXNet (before) | MXNet/NumPy |\n", + "|:---:|:---:|:---:|\n", + "| `()` | unknown | Scalar tensor |\n", + "| `(2, 0, 1)` | Second dimension unknown | Zero-size tensor |\n", + "| `None`(Python) | N/A | Unknown |\n", + "| `(2, -1, 0)`(C++) | N/A | Second dim uknown|\n", + "\n", + "### Affected modules\n", + "- Shape inference: imperative, symbolic, Gluon\n", + "- Legacy operators (not recommended to use)\n", + "- MXNet/NumPy operators\n", + "\n", + "## NumPy Array Semantics\n", + "**Definition:** The type of created ndarrays is `mxnet.numpy.ndarray`/`mxnet.symbol.numpy._Symbol`, instead of `mxnet.ndarray.NDArray`/`mxnet.symbol.Symbol` (only affects Gluon modules).\n", + "- Block/HybridBlock\n", + " - Parameter creation and initialization.\n", + " - Inputs/outputs (symbol/ndarray) of `__call__`/`forward`/`hybrid_forward`.\n", + " - Computational graph construction.\n", + "- Dataloader\n", + "\n", + "## Dependency of Two Types of Semantics\n", + "- It is required to keep NumPy shape semantics active while activating NumPy array semantics.\n", + "- Deactivating NumPy shape semantics while NumPy array semantics is still active is not allowed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import mxnet as mx\n", + "from mxnet import np, npx, gluon\n", + "\n", + "logging.basicConfig(level=logging.INFO)\n", + "\n", + "try:\n", + " npx.set_np(shape=False, array=True)\n", + "except ValueError as e:\n", + " print(e)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How to Enable NumPy Shape semantics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " a = mx.nd.random.uniform(shape=())\n", + "except mx.MXNetError as e:\n", + " print(e)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " b = mx.nd.random.uniform(shape=(2, 0, 1))\n", + "except mx.MXNetError as e:\n", + " print(e)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " c = np.random.uniform()\n", + "except mx.MXNetError as e:\n", + " print(e)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " d = np.random.uniform(size=(2, 0, 1))\n", + "except mx.MXNetError as e:\n", + " print(e) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "npx.set_np(shape=True, array=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "a = mx.nd.random.uniform(shape=())\n", + "b = mx.nd.random.uniform(shape=(2, 0, 1))\n", + "c = np.random.uniform()\n", + "d = np.random.uniform(size=(2, 0, 1))\n", + "\n", + "print('type(a) =', type(a))\n", + "print('a.shape = ', a.shape)\n", + "print('a.size = ', a.size)\n", + "\n", + "print('type(b) =', type(b))\n", + "print('b.shape = ', b.shape)\n", + "print('b.size = ', b.size)\n", + "\n", + "print('type(c) =', type(c))\n", + "print('c.shape = ', c.shape)\n", + "print('c.size = ', c.size)\n", + "\n", + "print('type(d) =', type(d))\n", + "print('d.shape = ', d.shape)\n", + "print('d.size = ', d.size)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How to Enable NumPy Array Semantics\n", + "\n", + "### Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "npx.reset_np() # reset two types of semantics to the default state, which is False for both of them\n", + "\n", + "from mxnet.gluon import nn\n", + "class Net(gluon.Block):\n", + " def __init__(self, in_units=0, **kwargs): # 0 means in_units is unknown and must be inferred at runtime\n", + " super(Net, self).__init__(**kwargs)\n", + " with self.name_scope():\n", + " self.dense0 = nn.Dense(5, in_units=in_units)\n", + " self.dense1 = nn.Dense(5, in_units=in_units)\n", + " \n", + " def forward(self, x):\n", + " return self.dense1(self.dense0(x))\n", + "\n", + "net1 = Net()\n", + "net1.initialize()\n", + "net1(mx.nd.zeros((3, 10)))\n", + "for k, v in net1.collect_params().items():\n", + " print('parameter {}, type {}'.format(k, str(type(v.data()))))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "npx.set_np()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "net2 = Net()\n", + "net2.initialize()\n", + "net2(np.zeros((3, 10)))\n", + "for k, v in net2.collect_params().items():\n", + " print('parameter {}, type {}'.format(k, str(type(v.data()))))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dataloader" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "from mxnet.gluon import data as gdata\n", + "\n", + "\n", + "npx.reset_np()\n", + "\n", + "\n", + "def load_data_fashion_mnist(batch_size, resize=None, root=os.path.join(\n", + " '~', '.mxnet', 'datasets', 'fashion-mnist')):\n", + " \"\"\"Download the Fashion-MNIST dataset and then load into memory.\"\"\"\n", + " root = os.path.expanduser(root)\n", + " transformer = []\n", + " if resize:\n", + " transformer += [gdata.vision.transforms.Resize(resize)]\n", + " transformer += [gdata.vision.transforms.ToTensor()]\n", + " transformer = gdata.vision.transforms.Compose(transformer)\n", + "\n", + " mnist_train = gdata.vision.FashionMNIST(root=root, train=True)\n", + " mnist_test = gdata.vision.FashionMNIST(root=root, train=False)\n", + " num_workers = 0 if sys.platform.startswith('win32') else 4\n", + "\n", + " train_iter = gdata.DataLoader(mnist_train.transform_first(transformer),\n", + " batch_size, shuffle=True,\n", + " num_workers=num_workers)\n", + " test_iter = gdata.DataLoader(mnist_test.transform_first(transformer),\n", + " batch_size, shuffle=False,\n", + " num_workers=num_workers)\n", + " return train_iter, test_iter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_iter, test_iter = load_data_fashion_mnist(16)\n", + "\n", + "for X, y in train_iter:\n", + " print('type(X) = ', type(X))\n", + " print('type(y) = ', type(y))\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "npx.set_np()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_iter, test_iter = load_data_fashion_mnist(16)\n", + "\n", + "for X, y in train_iter:\n", + " print('type(X) = ', type(X))\n", + " print('type(y) = ', type(y))\n", + " break" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/include/mxnet/base.h b/include/mxnet/base.h index b239cb1f7302..5c612fd1df12 100644 --- a/include/mxnet/base.h +++ b/include/mxnet/base.h @@ -421,7 +421,9 @@ inline int32_t Context::GetGPUCount() { #if MXNET_USE_CUDA int32_t count; cudaError_t e = cudaGetDeviceCount(&count); - if (e == cudaErrorNoDevice) { + // TODO(junwu): Remove e == 35 + // This is skipped for working around wheel build system with older CUDA driver. + if (e == cudaErrorNoDevice || e == 35) { return 0; } CHECK_EQ(e, cudaSuccess) << " CUDA: " << cudaGetErrorString(e); diff --git a/include/mxnet/c_api.h b/include/mxnet/c_api.h index 1b1c10e79fea..e9e9a37ed582 100644 --- a/include/mxnet/c_api.h +++ b/include/mxnet/c_api.h @@ -2811,6 +2811,18 @@ MXNET_DLL int MXEnginePushSync(EngineSyncFunc sync_func, void* func_param, EngineVarHandle mutable_vars_handle, int num_mutable_vars, EngineFnPropertyHandle prop_handle DEFAULT(NULL), int priority DEFAULT(0), const char* opr_name DEFAULT(NULL)); +/*! + * \brief Create an NDArray from source sharing the same data chunk. + * \param src source NDArray + * \param out new NDArray sharing the same data chunck with src + */ +MXNET_DLL int MXShallowCopyNDArray(NDArrayHandle src, NDArrayHandle* out); +/*! + * \brief Create an Symbol from source sharing the same graph structure. + * \param src source Symbol + * \param out new Symbol sharing the same graph structure with src + */ +MXNET_DLL int MXShallowCopySymbol(SymbolHandle src, SymbolHandle * out); #ifdef __cplusplus } diff --git a/include/mxnet/tuple.h b/include/mxnet/tuple.h index bc630f153744..f018c8faabea 100644 --- a/include/mxnet/tuple.h +++ b/include/mxnet/tuple.h @@ -272,6 +272,14 @@ class Tuple { is.get(); if (ch == '(' || ch == '[') break; if (!isspace(ch)) { + if (ch == 'N') { + std::string tmp_val; + is >> tmp_val; + if (tmp_val == "one") { // is stores "None" + t.SetDim(-1); + return is; + } + } is.setstate(std::ios::failbit); return is; } @@ -653,6 +661,13 @@ inline bool shape_is_known(const TShape& x) { return true; } +inline bool shape_is_known(const std::vector& shapes) { + for (const TShape& shape : shapes) { + if (!shape_is_known(shape)) return false; + } + return true; +} + /*! \brief helper function to cast type of container elements */ template inline DstIter ShapeTypeCast(const SrcIter begin, diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index ab4bffde28a9..f288b4c65926 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -25,10 +25,15 @@ from . import engine from .base import MXNetError from .util import is_np_shape, set_np_shape, np_shape, use_np_shape +from .util import is_np_array, np_array, use_np_array, use_np from . import base from . import contrib from . import ndarray from . import ndarray as nd +from . import numpy +from . import numpy_extension +from . import numpy as np +from . import numpy_extension as npx from . import name # use mx.sym as short for symbol from . import symbol as sym diff --git a/python/mxnet/_ctypes/ndarray.py b/python/mxnet/_ctypes/ndarray.py index f324545a2352..dd429e6f6c46 100644 --- a/python/mxnet/_ctypes/ndarray.py +++ b/python/mxnet/_ctypes/ndarray.py @@ -55,6 +55,8 @@ def __reduce__(self): _ndarray_cls = None +_np_ndarray_cls = None + def _set_ndarray_class(cls): """Set the symbolic class to be cls""" @@ -62,7 +64,13 @@ def _set_ndarray_class(cls): _ndarray_cls = cls -def _imperative_invoke(handle, ndargs, keys, vals, out): +def _set_np_ndarray_class(cls): + """Set the symbolic class to be cls""" + global _np_ndarray_cls + _np_ndarray_cls = cls + + +def _imperative_invoke(handle, ndargs, keys, vals, out, is_np_op): """ctypes implementation of imperative invoke wrapper""" if out is not None: original_output = out @@ -91,23 +99,27 @@ def _imperative_invoke(handle, ndargs, keys, vals, out): c_str_array([str(s) for s in vals]), ctypes.byref(out_stypes))) + create_ndarray_fn = _np_ndarray_cls if is_np_op else _ndarray_cls if original_output is not None: return original_output if num_output.value == 1: - return _ndarray_cls(ctypes.cast(output_vars[0], NDArrayHandle), - stype=out_stypes[0]) + return create_ndarray_fn(ctypes.cast(output_vars[0], NDArrayHandle), + stype=out_stypes[0]) else: - return [_ndarray_cls(ctypes.cast(output_vars[i], NDArrayHandle), - stype=out_stypes[i]) - for i in range(num_output.value)] + return [create_ndarray_fn(ctypes.cast(output_vars[i], NDArrayHandle), + stype=out_stypes[i]) for i in range(num_output.value)] class CachedOp(object): """Cached operator handle.""" - __slots__ = ["handle"] + __slots__ = ["handle", "is_np_sym"] + def __init__(self, sym, flags=()): self.handle = CachedOpHandle() + from ..symbol.numpy._symbol import _Symbol + self.is_np_sym = bool(isinstance(sym, _Symbol)) + check_call(_LIB.MXCreateCachedOpEx( sym.handle, len(flags), @@ -118,6 +130,13 @@ def __init__(self, sym, flags=()): def __del__(self): check_call(_LIB.MXFreeCachedOp(self.handle)) + def _is_from_np_compat_op(self, idx): + """Check if the CachedOp's idx-th output is directly from a numpy op.""" + is_from_np_op = ctypes.c_int(0) + check_call(_LIB.MXIsCachedOpOutputFromNumpyCompatOp(self.handle, ctypes.c_int(idx), + ctypes.byref(is_from_np_op))) + return is_from_np_op.value != 0 + def __call__(self, *args, **kwargs): """ctypes implementation of imperative invoke wrapper""" out = kwargs.pop('out', None) @@ -151,10 +170,10 @@ def __call__(self, *args, **kwargs): if original_output is not None: return original_output + create_ndarray_fn = _np_ndarray_cls if self.is_np_sym else _ndarray_cls if num_output.value == 1: - return _ndarray_cls(ctypes.cast(output_vars[0], NDArrayHandle), - stype=out_stypes[0]) + return create_ndarray_fn(ctypes.cast(output_vars[0], NDArrayHandle), + stype=out_stypes[0]) else: - return [_ndarray_cls(ctypes.cast(output_vars[i], NDArrayHandle), - stype=out_stypes[i]) - for i in range(num_output.value)] + return [create_ndarray_fn(ctypes.cast(output_vars[i], NDArrayHandle), + stype=out_stypes[i]) for i in range(num_output.value)] diff --git a/python/mxnet/_ctypes/symbol.py b/python/mxnet/_ctypes/symbol.py index fe4cb950ed14..fc159f86854d 100644 --- a/python/mxnet/_ctypes/symbol.py +++ b/python/mxnet/_ctypes/symbol.py @@ -27,6 +27,7 @@ from ..base import check_call _symbol_cls = None +_np_symbol_cls = None class SymbolBase(object): """Symbol is symbolic graph.""" @@ -115,7 +116,13 @@ def _set_symbol_class(cls): _symbol_cls = cls -def _symbol_creator(handle, args, kwargs, keys, vals, name): +def _set_np_symbol_class(cls): + """Set the symbolic class to be cls""" + global _np_symbol_cls + _np_symbol_cls = cls + + +def _symbol_creator(handle, args, kwargs, keys, vals, name, is_np_op): sym_handle = SymbolHandle() check_call(_LIB.MXSymbolCreateAtomicSymbol( ctypes.c_void_p(handle), @@ -128,7 +135,8 @@ def _symbol_creator(handle, args, kwargs, keys, vals, name): raise TypeError( 'Operators with variable length input can only accept input' 'Symbols either as positional or keyword arguments, not both') - s = _symbol_cls(sym_handle) + create_symbol_fn = _np_symbol_cls if is_np_op else _symbol_cls + s = create_symbol_fn(sym_handle) if args: s._compose(*args, name=name) elif kwargs: diff --git a/python/mxnet/_numpy_op_doc.py b/python/mxnet/_numpy_op_doc.py new file mode 100644 index 000000000000..15df473c0e1e --- /dev/null +++ b/python/mxnet/_numpy_op_doc.py @@ -0,0 +1,496 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: skip-file + + +"""Doc placeholder for numpy ops with prefix _np.""" + +def _np_reshape(a, newshape, order='C'): + """ + reshape(a, newshape, order='C') + + Gives a new shape to an array without changing its data. + + Parameters + ---------- + a : ndarray + Array to be reshaped. + newshape : int or tuple of ints + The new shape should be compatible with the original shape. If + an integer, then the result will be a 1-D array of that length. + One shape dimension can be -1. In this case, the value is + inferred from the length of the array and remaining dimensions. + order : {'C'}, optional + Read the elements of `a` using this index order, and place the + elements into the reshaped array using this index order. 'C' + means to read / write the elements using C-like index order, + with the last axis index changing fastest, back to the first + axis index changing slowest. Other order types such as 'F'/'A' + may be added in the future. + + Returns + ------- + reshaped_array : ndarray + It will be always a copy of the original array. This behavior is different + from the official NumPy package where views of the original array may be + generated. + + See Also + -------- + ndarray.reshape : Equivalent method. + """ + pass + + +def _np_ones_like(a): + """Return an array of ones with the same shape and type as a given array. + + Parameters + ---------- + a : ndarray + The shape and data-type of `a` define these same attributes of + the returned array. + + Returns + ------- + out : ndarray + Array of ones with the same shape and type as `a`. + """ + pass + + +def _np_zeros_like(a): + r""" + zeros_like(a) + + Return an array of zeros with the same shape and type as a given array. + + Parameters + ---------- + a : ndarray + The shape and data-type of `a` define these same attributes of + the returned array. + + Returns + ------- + out : ndarray + Array of zeros with the same shape and type as `a`. + + + See Also + -------- + ones_like : Return an array of ones with shape and type of input. + zeros : Return a new array setting values to zero. + + Examples + -------- + >>> x = np.arange(6) + >>> x = x.reshape((2, 3)) + >>> x + array([[0., 1., 2.], + [3., 4., 5.]]) + >>> np.zeros_like(x) + array([[0., 0., 0.], + [0., 0., 0.]]) + >>> y = np.arange(3) + >>> y + array([0., 1., 2.]) + >>> np.zeros_like(y) + array([0., 0., 0.]) + + Notes + ----- + The output `ndarray` has the same `ctx` as the input `ndarray`. + + This function differs from the original `numpy.zeros_like + `_ in + the following aspects: + + - The parameter `dtype` and `subok` are not supported now. + - Only 'C' order is supported. + """ + pass + + +def _np_repeat(a, repeats, axis=None): + """Repeat elements of an array. + + Parameters + ---------- + a : ndarray + Input array. + repeats : int or array of ints + The number of repetitions for each element. `repeats` is broadcasted + to fit the shape of the given axis. + axis : int, optional + The axis along which to repeat values. By default, use the + flattened input array, and return a flat output array. + + Returns + ------- + repeated_array : ndarray + Output array which has the same shape as `a`, except along + the given axis. + """ + pass + + +def _np_cumsum(a, axis=None, dtype=None, out=None): + """cumsum(a, axis=None, dtype=None, out=None) + + Return the cumulative sum of the elements along a given axis. + + Parameters + ---------- + a : ndarray + Input array. + axis : int, optional + Axis along which the cumulative sum is computed. The default + (None) is to compute the cumsum over the flattened array. + dtype : dtype, optional + Type of the returned array and of the accumulator in which the + elements are summed. If `dtype` is not specified, it defaults + to the dtype of `a`. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape, type and buffer length as the expected output. + + Returns + ------- + cumsum_along_axis : ndarray. + A new array holding the result is returned unless `out` is + specified, in which case a reference to `out` is returned. The + result has the same size as `a`, and the same shape as `a` if + `axis` is not None or `a` is a 1-d array. + + Examples + -------- + >>> a = np.array([[1,2,3], [4,5,6]]) + >>> a + array([[1., 2., 3.], + [4., 5., 6.]]) + >>> np.cumsum(a) + array([ 1., 3., 6., 10., 15., 21.]) + >>> np.cumsum(a, dtype=float) + array([ 1., 3., 6., 10., 15., 21.], dtype=float64) + >>> np.cumsum(a,axis=0) + array([[1., 2., 3.], + [5., 7., 9.]]) + >>> np.cumsum(a,axis=1) + array([[ 1., 3., 6.], + [ 4., 9., 15.]]) + """ + pass + + +def _np_dot(a, b, out=None): + """dot(a, b, out=None) + + Dot product of two arrays. Specifically, + + - If both `a` and `b` are 1-D arrays, it is inner product of vectors + + - If both `a` and `b` are 2-D arrays, it is matrix multiplication, + + - If either `a` or `b` is 0-D (scalar), it is equivalent to :func:`multiply` + and using ``np.multiply(a, b)`` or ``a * b`` is preferred. + + - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over + the last axis of `a` and `b`. + + - If `a` is an N-D array and `b` is a 2-D array, it is a + sum product over the last axis of `a` and the second-to-last axis of `b`:: + + dot(a, b)[i,j,k] = sum(a[i,j,:] * b[:,k]) + + Parameters + ---------- + a : ndarray + First argument. + b : ndarray + Second argument. + + out : ndarray, optional + Output argument. It must have the same shape and type as the expected output. + + Returns + ------- + output : ndarray + Returns the dot product of `a` and `b`. If `a` and `b` are both + scalars or both 1-D arrays then a scalar is returned; otherwise + an array is returned. + If `out` is given, then it is returned + + Examples + -------- + >>> a = np.array(3) + >>> b = np.array(4) + >>> np.dot(a, b) + array(12.) + + For 2-D arrays it is the matrix product: + + >>> a = np.array([[1, 0], [0, 1]]) + >>> b = np.array([[4, 1], [2, 2]]) + >>> np.dot(a, b) + array([[4., 1.], + [2., 2.]]) + + >>> a = np.arange(3*4*5*6).reshape((3,4,5,6)) + >>> b = np.arange(5*6)[::-1].reshape((6,5)) + >>> np.dot(a, b)[2,3,2,2] + array(29884.) + >>> np.sum(a[2,3,2,:] * b[:,2]) + array(29884.) + """ + pass + + +def _np_sum(a, axis=0, dtype=None, keepdims=None, initial=None, out=None): + r""" + sum(a, axis=None, dtype=None, keepdims=_Null, initial=_Null, out=None) + + Sum of array elements over a given axis. + + Parameters + ---------- + a : ndarray + Input data. + axis : None or int, optional + Axis or axes along which a sum is performed. The default, + axis=None, will sum all of the elements of the input array. If + axis is negative it counts from the last to the first axis. + dtype : dtype, optional + The type of the returned array and of the accumulator in which the + elements are summed. The default type is float32. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `sum` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-classes `sum` method does not implement `keepdims` any + exceptions will be raised. + initial: Currently only supports None as input, optional + Starting value for the sum. + Currently not implemented. Please use ``None`` as input or skip this argument. + out : ndarray or None, optional + Alternative output array in which to place the result. It must have + the same shape and dtype as the expected output. + + Returns + ------- + sum_along_axis : ndarray + An ndarray with the same shape as `a`, with the specified + axis removed. If an output array is specified, a reference to + `out` is returned. + + Notes + ----- + - Input type does not support Python native iterables. + - "out" param: cannot perform auto type change. out ndarray's dtype must be the same as the expected output. + - "initial" param is not supported yet. Please use None as input. + - Arithmetic is modular when using integer types, and no error is raised on overflow. + - The sum of an empty array is the neutral element 0: + + >>> a = np.empty(1) + >>> np.sum(a) + array(0.) + + This function differs from the original `numpy.sum + `_ in + the following aspects: + + - Input type does not support Python native iterables(list, tuple, ...). + - "out" param: cannot perform auto type cast. out ndarray's dtype must be the same as the expected output. + - "initial" param is not supported yet. Please use ``None`` as input or skip it. + + Examples + -------- + >>> a = np.array([0.5, 1.5]) + >>> np.sum(a) + array(2.) + >>> a = np.array([0.5, 0.7, 0.2, 1.5]) + >>> np.sum(a, dtype=np.int32) + array(2, dtype=int32) + >>> a = np.array([[0, 1], [0, 5]]) + >>> np.sum(a) + array(6.) + >>> np.sum(a, axis=0) + array([0., 6.]) + >>> np.sum(a, axis=1) + array([1., 5.]) + + With output ndarray: + + >>> a = np.array([[0, 1], [0, 5]]) + >>> b = np.ones((2,), dtype=np.float32) + >>> np.sum(a, axis = 0, out=b) + array([0., 6.]) + >>> b + array([0., 6.]) + + If the accumulator is too small, overflow occurs: + + >>> np.ones(128, dtype=np.int8).sum(dtype=np.int8) + array(-128, dtype=int8) + """ + pass + + +def _np_copy(a, out=None): + """ + copy(a, out=None) + + Return an array copy of the given object. + + Parameters + ---------- + a : ndarray + Input data. + out : ndarray or None, optional + Alternative output array in which to place the result. It must have + the same shape and dtype as the expected output. + + Returns + ------- + arr : ndarray + Array interpretation of `a`. + + Notes + ------- + This function differs from the original `numpy.copy + `_ in + the following aspects: + + - Input type does not support Python native iterables(list, tuple, ...). + - ``out`` param: cannot perform auto broadcasting. ``out`` ndarray's shape must be the same as the expected output. + - ``out`` param: cannot perform auto type cast. ``out`` ndarray's dtype must be the same as the expected output. + - Does not support "order" parameter. + + Examples + -------- + Create an array x, with a reference y and a copy z: + + >>> x = np.array([1, 2, 3]) + >>> y = x + >>> z = np.copy(x) + + Note that, when ``x`` is modified, ``y`` is also modified, but not ``z``: + + >>> x[0] = 10 + >>> x[0] == y[0] + array([1.]) + >>> x[0] == z[0] + array([0.]) + """ + pass + + +def _np_transpose(a, axes=None): + """ + transpose(a, axes=None) + + Permute the dimensions of an array. + + Parameters + ---------- + a : ndarray + Input array. + axes : list of ints, optional + By default, reverse the dimensions, + otherwise permute the axes according to the values given. + + Returns + ------- + p : ndarray + a with its axes permuted. + + Notes + ----- + This function differs from the original `numpy.transpose + `_ in + the following way(s): + + - only ndarray is accepted as valid input, python iterables are not supported + + Examples + -------- + >>> x = np.arange(4).reshape((2,2)) + >>> x + array([[0., 1.], + [2., 3.]]) + >>> np.transpose(x) + array([[0., 2.], + [1., 3.]]) + >>> x = np.ones((1, 2, 3)) + >>> np.transpose(x, (1, 0, 2)).shape + (2, 1, 3) + """ + pass + + +def _np_trace(a, offset=0, axis1=0, axis2=1, out=None): + """trace(a, offset=0, axis1=0, axis2=1, out=None) + + Return the sum along diagonals of the array. + + If `a` is 2-D, the sum along its diagonal with the given offset + is returned, i.e., the sum of elements ``a[i,i+offset]`` for all i. + + If `a` has more than two dimensions, then the axes specified by axis1 and + axis2 are used to determine the 2-D sub-arrays whose traces are returned. + The shape of the resulting array is the same as that of `a` with `axis1` + and `axis2` removed. + + Parameters + ---------- + a : ndarray + Input array, from which the diagonals are taken. + offset : int, optional + Offset of the diagonal from the main diagonal. Can be both positive + and negative. Defaults to 0. + axis1, axis2 : int, optional + Axes to be used as the first and second axis of the 2-D sub-arrays + from which the diagonals should be taken. Defaults are the first two + axes of `a`. + out : ndarray, optional + Array into which the output is placed. It must be of the right shape + and right type to hold the output. + + Returns + ------- + sum_along_diagonals : ndarray + If `a` is 2-D, the sum along the diagonal is returned. If `a` has + larger dimensions, then an array of sums along diagonals is returned. + + Examples + -------- + >>> a = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) + >>> np.trace(a) + array(3.) + >>> a = np.arange(8).reshape((2, 2, 2)) + >>> np.trace(a) + array([6., 8.]) + >>> a = np.arange(24).reshape((2, 2, 2, 3)) + >>> np.trace(a).shape + (2, 3) + """ + pass diff --git a/python/mxnet/base.py b/python/mxnet/base.py index 73fae4876873..545c2ea4eb19 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -16,7 +16,7 @@ # under the License. # coding: utf-8 -# pylint: disable=invalid-name, no-member, trailing-comma-tuple, bad-mcs-classmethod-argument, unnecessary-pass +# pylint: disable=invalid-name, no-member, trailing-comma-tuple, bad-mcs-classmethod-argument, unnecessary-pass, too-many-lines """ctypes library of mxnet and helper functions.""" from __future__ import absolute_import @@ -598,7 +598,9 @@ def _init_op_module(root_namespace, module_name, make_op_func): ctypes.byref(plist))) op_names = [] for i in range(size.value): - op_names.append(py_str(plist[i])) + op_name = py_str(plist[i]) + if not _is_np_op(op_name): + op_names.append(op_name) module_op = sys.modules["%s.%s.op" % (root_namespace, module_name)] module_internal = sys.modules["%s.%s._internal" % (root_namespace, module_name)] @@ -692,7 +694,9 @@ def write_all_str(module_file, module_all_list): ctypes.byref(plist))) op_names = [] for i in range(size.value): - op_names.append(py_str(plist[i])) + op_name = py_str(plist[i]) + if not _is_np_op(op_name): + op_names.append(op_name) module_op_file = get_module_file("%s.%s.op" % (root_namespace, module_name)) module_op_all = [] @@ -734,3 +738,109 @@ def write_all_str(module_file, module_all_list): ctypes.pythonapi.PyCapsule_New.restype = ctypes.py_object ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.c_void_p + + +_NP_OP_PREFIX = '_np_' +_NP_OP_SUBMODULE_LIST = ['_random_', '_linalg_'] + +_NP_EXT_OP_PREFIX = '_npx_' +_NP_EXT_OP_SUBMODULE_LIST = ['_image_'] + +_NP_INTERNAL_OP_PREFIX = '_npi_' + + +def _is_np_op(op_name): + return op_name.startswith(_NP_OP_PREFIX) or op_name.startswith(_NP_EXT_OP_PREFIX)\ + or op_name.startswith(_NP_INTERNAL_OP_PREFIX) + + +def _get_op_submodule_name(op_name, op_name_prefix, submodule_name_list): + assert op_name.startswith(op_name_prefix) + for submodule_name in submodule_name_list: + if op_name[len(op_name_prefix):].startswith(submodule_name): + return submodule_name + return "" + + +def _init_np_op_module(root_module_name, np_module_name, mx_module_name, make_op_func): + """ + Register numpy operators in namespaces `mxnet.numpy`, `mxnet.ndarray.numpy` + and `mxnet.symbol.numpy`. They are used in imperative mode, Gluon APIs w/o hybridization, + and Gluon APIs w/ hybridization, respectively. Essentially, operators with the same name + registered in three namespaces, respectively share the same functionality in C++ backend. + Different namespaces are needed for dispatching operator calls in Gluon's `HybridBlock` by `F`. + + Parameters + ---------- + root_module_name : str + Top level module name, `mxnet` in the current cases. + np_module_name : str + Second level module name, `numpy` or `numpy_extension` in the current case. + make_op_func : function + Function for creating op functions. + """ + from . import _numpy_op_doc as _np_op_doc + if np_module_name == 'numpy': + op_name_prefix = _NP_OP_PREFIX + submodule_name_list = _NP_OP_SUBMODULE_LIST + elif np_module_name == 'numpy_extension': + op_name_prefix = _NP_EXT_OP_PREFIX + submodule_name_list = _NP_EXT_OP_SUBMODULE_LIST + elif np_module_name == 'numpy._internal': + op_name_prefix = _NP_INTERNAL_OP_PREFIX + submodule_name_list = [] + else: + raise ValueError('unsupported np module name {}'.format(np_module_name)) + + plist = ctypes.POINTER(ctypes.c_char_p)() + size = ctypes.c_uint() + check_call(_LIB.MXListAllOpNames(ctypes.byref(size), ctypes.byref(plist))) + op_names = [] + for i in range(size.value): + name = py_str(plist[i]) + if name.startswith(op_name_prefix): + op_names.append(name) + + if mx_module_name is None: + # register np/npx ops for imperative programming + op_module_name = "%s.%s._op" % (root_module_name, np_module_name) # e.g. mxnet.numpy._op + op_submodule_name = "%s.%s" % (root_module_name, np_module_name) # e.g. mxnet.numpy.random + elif mx_module_name in ('ndarray', 'symbol'): + # register numpy internal ops and np/npx ops for use in Gluon + # np internal ops are registered in mxnet.ndarray/symbol.numpy._internal + # np ops are registered in mxnet.ndarray/symbol.numpy._op + # npx ops are registered in mxnet.ndarray/symbol.numpy_extension._op + op_module_name = "%s.%s.%s" % (root_module_name, mx_module_name, np_module_name) + if op_name_prefix != _NP_INTERNAL_OP_PREFIX: + op_module_name += '._op' + # e.g. mxnet.symbol.numpy.random + op_submodule_name = "%s.%s.%s" % (root_module_name, mx_module_name, np_module_name) + else: + raise ValueError('unsupported mxnet module {}'.format(mx_module_name)) + op_submodule_name += '.%s' + + op_module = sys.modules[op_module_name] + submodule_dict = {} + for submodule_name in submodule_name_list: + submodule_dict[submodule_name] = sys.modules[op_submodule_name % submodule_name[1:-1]] + for name in op_names: + hdl = OpHandle() + check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl))) + submodule_name = _get_op_submodule_name(name, op_name_prefix, submodule_name_list) + if len(submodule_name) > 0: + func_name = name[(len(op_name_prefix) + len(submodule_name)):] + cur_module = submodule_dict[submodule_name] + module_name_local = op_submodule_name % submodule_name[1:-1] + else: + func_name = name[len(op_name_prefix):] + cur_module = op_module + module_name_local =\ + op_module_name[:-len('._op')] if op_module_name.endswith('._op') else op_module_name + + function = make_op_func(hdl, name, func_name) + function.__module__ = module_name_local + setattr(cur_module, function.__name__, function) + cur_module.__all__.append(function.__name__) + + if hasattr(_np_op_doc, name): + function.__doc__ = getattr(_np_op_doc, name).__doc__ diff --git a/python/mxnet/contrib/text/embedding.py b/python/mxnet/contrib/text/embedding.py index b7f3fcbc5c50..030fe9835fed 100644 --- a/python/mxnet/contrib/text/embedding.py +++ b/python/mxnet/contrib/text/embedding.py @@ -35,6 +35,9 @@ from ... import ndarray as nd from ... import registry from ... import base +from ...util import is_np_array +from ... import numpy as _mx_np +from ... import numpy_extension as _mx_npx def register(embedding_cls): @@ -295,12 +298,15 @@ def _load_embedding(self, pretrained_file_path, elem_delim, init_unknown_vec, en tokens.add(token) self._vec_len = vec_len - self._idx_to_vec = nd.array(all_elems).reshape((-1, self.vec_len)) + array_fn = _mx_np.array if is_np_array() else nd.array + self._idx_to_vec = array_fn(all_elems).reshape((-1, self.vec_len)) if loaded_unknown_vec is None: - self._idx_to_vec[C.UNKNOWN_IDX] = init_unknown_vec(shape=self.vec_len) + init_val = init_unknown_vec(shape=self.vec_len) + self._idx_to_vec[C.UNKNOWN_IDX] =\ + init_val.as_np_ndarray() if is_np_array() else init_val else: - self._idx_to_vec[C.UNKNOWN_IDX] = nd.array(loaded_unknown_vec) + self._idx_to_vec[C.UNKNOWN_IDX] = array_fn(loaded_unknown_vec) def _index_tokens_from_vocabulary(self, vocabulary): self._token_to_idx = vocabulary.token_to_idx.copy() \ @@ -328,7 +334,8 @@ def _set_idx_to_vec_by_embeddings(self, token_embeddings, vocab_len, vocab_idx_t """ new_vec_len = sum(embed.vec_len for embed in token_embeddings) - new_idx_to_vec = nd.zeros(shape=(vocab_len, new_vec_len)) + zeros_fn = _mx_np.zeros if is_np_array() else nd.zeros + new_idx_to_vec = zeros_fn(shape=(vocab_len, new_vec_len)) col_start = 0 # Concatenate all the embedding vectors in token_embeddings. @@ -397,7 +404,13 @@ def get_vecs_by_tokens(self, tokens, lower_case_backup=False): else self.token_to_idx.get(token.lower(), C.UNKNOWN_IDX) for token in tokens] - vecs = nd.Embedding(nd.array(indices), self.idx_to_vec, self.idx_to_vec.shape[0], + if is_np_array(): + embedding_fn = _mx_npx.embedding + array_fn = _mx_np.array + else: + embedding_fn = nd.Embedding + array_fn = nd.array + vecs = embedding_fn(array_fn(indices), self.idx_to_vec, self.idx_to_vec.shape[0], self.idx_to_vec.shape[1]) return vecs[0] if to_reduce else vecs @@ -425,7 +438,8 @@ def update_token_vectors(self, tokens, new_vectors): if not isinstance(tokens, list): tokens = [tokens] if len(new_vectors.shape) == 1: - new_vectors = new_vectors.expand_dims(0) + expand_dims_fn = _mx_np.expand_dims if is_np_array() else nd.expand_dims + new_vectors = expand_dims_fn(new_vectors, axis=0) else: assert isinstance(new_vectors, nd.NDArray) and len(new_vectors.shape) == 2, \ @@ -444,7 +458,8 @@ def update_token_vectors(self, tokens, new_vectors): '`unknown_token` %s in `tokens`. This is to avoid unintended ' 'updates.' % (token, self.idx_to_token[C.UNKNOWN_IDX])) - self._idx_to_vec[nd.array(indices)] = new_vectors + array_fn = _mx_np.array if is_np_array() else nd.array + self._idx_to_vec[array_fn(indices)] = new_vectors @classmethod def _check_pretrained_file_names(cls, pretrained_file_name): diff --git a/python/mxnet/cython/ndarray.pyx b/python/mxnet/cython/ndarray.pyx index f9279889b504..50791e9b9a86 100644 --- a/python/mxnet/cython/ndarray.pyx +++ b/python/mxnet/cython/ndarray.pyx @@ -64,21 +64,27 @@ cdef class NDArrayBase: _ndarray_cls = None +_np_ndarray_cls = None def _set_ndarray_class(cls): global _ndarray_cls _ndarray_cls = cls -cdef NewArray(NDArrayHandle handle, int stype=-1): +def _set_np_ndarray_class(cls): + global _np_ndarray_cls + _np_ndarray_cls = cls + + +cdef NewArray(NDArrayHandle handle, int stype=-1, int is_np_array=0): """Create a new array given handle""" - return _ndarray_cls(_ctypes.cast(handle, _ctypes.c_void_p), stype=stype) + create_array_fn = _np_ndarray_cls if is_np_array else _ndarray_cls + return create_array_fn(_ctypes.cast(handle, _ctypes.c_void_p), stype=stype) cdef class CachedOp: """Cached operator handle.""" cdef CachedOpHandle chandle - cdef _set_handle(self, handle): cdef unsigned long long ptr if handle is None: @@ -96,6 +102,8 @@ cdef class CachedOp: def __set__(self, value): self._set_handle(value) + cdef int is_np_sym + def __init__(self, sym, flags=()): cdef vector[string] s_flag_keys cdef vector[string] s_flag_vals @@ -106,6 +114,9 @@ cdef class CachedOp: cdef vector[const char*] c_flag_keys = SVec2Ptr(s_flag_keys) cdef vector[const char*] c_flag_vals = SVec2Ptr(s_flag_vals) + from ..symbol.numpy._symbol import _Symbol + self.is_np_sym = bool(isinstance(sym, _Symbol)) + CALL(MXCreateCachedOpEx( (sym.handle.value), len(flags), @@ -154,12 +165,12 @@ cdef class CachedOp: if original_output is not None: return original_output if num_output == 1: - return NewArray(p_output_vars[0], p_output_stypes[0]) + return NewArray(p_output_vars[0], p_output_stypes[0], self.is_np_sym) else: - return [NewArray(p_output_vars[i], p_output_stypes[i]) for i in range(num_output)] + return [NewArray(p_output_vars[i], p_output_stypes[i], self.is_np_sym) for i in range(num_output)] -def _imperative_invoke(handle, ndargs, keys, vals, out): +def _imperative_invoke(handle, ndargs, keys, vals, out, is_np_op=0): """cython implementation of imperative invoke wrapper""" cdef unsigned long long ihandle = handle cdef OpHandle chandle = ihandle @@ -211,6 +222,6 @@ def _imperative_invoke(handle, ndargs, keys, vals, out): if original_output is not None: return original_output if num_output == 1: - return NewArray(p_output_vars[0], p_output_stypes[0]) + return NewArray(p_output_vars[0], p_output_stypes[0], is_np_op) else: - return [NewArray(p_output_vars[i], p_output_stypes[i]) for i in range(num_output)] + return [NewArray(p_output_vars[i], p_output_stypes[i], is_np_op) for i in range(num_output)] diff --git a/python/mxnet/cython/symbol.pyx b/python/mxnet/cython/symbol.pyx index 1bdea6c6c547..86fe8ae6db4f 100644 --- a/python/mxnet/cython/symbol.pyx +++ b/python/mxnet/cython/symbol.pyx @@ -84,19 +84,27 @@ cdef SymbolSetAttr(SymbolHandle handle, dict kwargs): _symbol_cls = SymbolBase +_np_symbol_cls = None def _set_symbol_class(cls): global _symbol_cls _symbol_cls = cls -cdef NewSymbol(SymbolHandle handle): + +def _set_np_symbol_class(cls): + global _np_symbol_cls + _np_symbol_cls = cls + + +cdef NewSymbol(SymbolHandle handle, int is_np_sym=0): """Create a new symbol given handle""" - sym = _symbol_cls(None) + create_symbol_fn = _np_symbol_cls if is_np_sym else _symbol_cls + sym = create_symbol_fn(None) (sym).chandle = handle return sym -def _symbol_creator(handle, args, kwargs, keys, vals, name): +def _symbol_creator(handle, args, kwargs, keys, vals, name, is_np_op=0): cdef unsigned long long ihandle = handle cdef OpHandle chandle = ihandle cdef vector[string] ckeys @@ -143,4 +151,4 @@ def _symbol_creator(handle, args, kwargs, keys, vals, name): &csym_keys[0] if csym_keys.size() != 0 else NULL, &sym_args[0] if sym_args.size() != 0 else NULL)) - return NewSymbol(ret_handle) + return NewSymbol(ret_handle, is_np_op) diff --git a/python/mxnet/gluon/block.py b/python/mxnet/gluon/block.py index 20f0a32f48f1..db43203b0202 100644 --- a/python/mxnet/gluon/block.py +++ b/python/mxnet/gluon/block.py @@ -26,13 +26,17 @@ import re from collections import OrderedDict -from ..base import mx_real_t +from ..base import mx_real_t, MXNetError from .. import symbol, ndarray, initializer from ..symbol import Symbol from ..ndarray import NDArray from .. import name as _name from .parameter import Parameter, ParameterDict, DeferredInitializationError from .utils import _indent, _brief_print_list, HookHandle +from .utils import _check_same_symbol_type, _check_all_np_ndarrays +from .. import numpy_extension as _mx_npx +from .. import numpy as _mx_np, numpy_extension as _mx_npx +from .. util import is_np_array, np_shape, np_array class _BlockScope(object): @@ -331,7 +335,8 @@ def save_parameters(self, filename): """ params = self._collect_params_with_prefix() arg_dict = {key : val._reduce() for key, val in params.items()} - ndarray.save(filename, arg_dict) + save_fn = _mx_npx.save if is_np_array() else ndarray.save + save_fn(filename, arg_dict) def save_params(self, filename): """[Deprecated] Please use save_parameters. Note that if you want load @@ -374,7 +379,28 @@ def load_parameters(self, filename, ctx=None, allow_missing=False, `Saving and Loading Gluon Models \ `_ """ - loaded = ndarray.load(filename) + if is_np_array(): + # failure may happen when loading parameters saved as NDArrays within + # NumPy semantics. Check the failure type and recover from it if it happens. + try: + loaded = _mx_npx.load(filename) + except MXNetError as e: + err_msg = str(e) + if 'is_np_shape' in err_msg: + # Loading failure due to parameters saved without numpy semantics. + # Temporarily disable numpy semantics and load parameters. After it's + # done, resume the numpy semantics. This is fine because the cases + # numpy ndarray covers is a superset of the legacy ndarray's. + with np_array(False): + with np_shape(False): + loaded_nds = ndarray.load(filename) + assert isinstance(loaded_nds, dict),\ + 'expecting a dict type, got {}'.format(str(type(loaded_nds))) + loaded = {k: loaded_nds[k].as_np_ndarray() for k in loaded_nds} + else: + raise ValueError(err_msg) + else: + loaded = ndarray.load(filename) params = self._collect_params_with_prefix() if not loaded and not params: return @@ -541,7 +567,8 @@ def __call__(self, *args): for hook in self._forward_hooks.values(): hook(self, args, out) - + if _mx_npx.is_np_array(): + _check_all_np_ndarrays(out) return out def forward(self, *args): @@ -731,9 +758,13 @@ def _get_graph(self, *args): if not self._cached_graph: args, self._in_format = _flatten(args, "input") if len(args) > 1: - inputs = [symbol.var('data%d'%i) for i in range(len(args))] + inputs = [symbol.var('data%d' % i).as_np_ndarray() + if isinstance(args[i], _mx_np.ndarray) + else symbol.var('data%d' % i) for i in range(len(args))] else: - inputs = [symbol.var('data')] + inputs = [symbol.var('data').as_np_ndarray() + if isinstance(args[0], _mx_np.ndarray) + else symbol.var('data')] grouped_inputs = _regroup(inputs, self._in_format)[0] params = {i: j.var() for i, j in self._reg_params.items()} @@ -741,7 +772,7 @@ def _get_graph(self, *args): out = self.hybrid_forward(symbol, *grouped_inputs, **params) # pylint: disable=no-value-for-parameter out, self._out_format = _flatten(out, "output") - self._cached_graph = inputs, symbol.Group(out) + self._cached_graph = inputs, symbol.Group(out, _check_same_symbol_type(out)) return self._cached_graph @@ -896,7 +927,8 @@ def export(self, path, epoch=0, remove_amp_cast=True): else: assert name in aux_names arg_dict['aux:%s'%name] = param._reduce() - ndarray.save('%s-%04d.params'%(path, epoch), arg_dict) + save_fn = _mx_npx.save if is_np_array() else ndarray.save + save_fn('%s-%04d.params'%(path, epoch), arg_dict) def forward(self, x, *args): """Defines the forward computation. Arguments can be either @@ -1044,7 +1076,7 @@ def __init__(self, outputs, inputs, params=None): syms, self._in_format = _flatten(inputs, "input") out, self._out_format = _flatten(outputs, "output") - out = symbol.Group(out) + out = symbol.Group(out, _check_same_symbol_type(out)) input_names = set() for i in syms: diff --git a/python/mxnet/gluon/data/dataloader.py b/python/mxnet/gluon/data/dataloader.py index 934f2d5954c1..a0e9da0062e8 100644 --- a/python/mxnet/gluon/data/dataloader.py +++ b/python/mxnet/gluon/data/dataloader.py @@ -18,6 +18,7 @@ # coding: utf-8 # pylint: disable=ungrouped-imports """Dataset generator.""" +from __future__ import absolute_import __all__ = ['DataLoader'] import pickle @@ -37,6 +38,8 @@ from . import sampler as _sampler from ... import nd, context +from ...util import is_np_shape, is_np_array, set_np +from ... import numpy as _mx_np # pylint: disable=reimported if sys.platform == 'darwin' or sys.platform == 'win32': def rebuild_ndarray(*args): @@ -124,30 +127,37 @@ def __init__(self, *args, **kwargs): self._send = self._writer.send self._recv = self._reader.recv + def default_batchify_fn(data): """Collate data into batch.""" if isinstance(data[0], nd.NDArray): - return nd.stack(*data) + return _mx_np.stack(data) if is_np_array() else nd.stack(*data) elif isinstance(data[0], tuple): data = zip(*data) return [default_batchify_fn(i) for i in data] else: data = np.asarray(data) - return nd.array(data, dtype=data.dtype) + array_fn = _mx_np.array if is_np_array() else nd.array + return array_fn(data, dtype=data.dtype) def default_mp_batchify_fn(data): """Collate data into batch. Use shared memory for stacking.""" if isinstance(data[0], nd.NDArray): - out = nd.empty((len(data),) + data[0].shape, dtype=data[0].dtype, + empty_fn = _mx_np.empty if is_np_array() else nd.empty + out = empty_fn((len(data),) + data[0].shape, dtype=data[0].dtype, ctx=context.Context('cpu_shared', 0)) - return nd.stack(*data, out=out) + if is_np_array(): + return _mx_np.stack(data, out=out) + else: + return nd.stack(*data, out=out) elif isinstance(data[0], tuple): data = zip(*data) return [default_mp_batchify_fn(i) for i in data] else: data = np.asarray(data) - return nd.array(data, dtype=data.dtype, + array_fn = _mx_np.array if is_np_array() else nd.array + return array_fn(data, dtype=data.dtype, ctx=context.Context('cpu_shared', 0)) @@ -382,14 +392,21 @@ def same_process_iter(): def __len__(self): return len(self._batch_sampler) + +def _thread_worker_initializer(active_shape, active_array): + """Initializer for ThreadPool.""" + set_np(shape=active_shape, array=active_array) + + _worker_dataset = None -def _worker_initializer(dataset): +def _worker_initializer(dataset, active_shape, active_array): """Initialier for processing pool.""" # global dataset is per-process based and only available in worker processes # this is only necessary to handle MXIndexedRecordIO because otherwise dataset # can be passed as argument global _worker_dataset _worker_dataset = dataset + set_np(shape=active_shape, array=active_array) def _worker_fn(samples, batchify_fn, dataset=None): """Function for processing data in worker process.""" @@ -555,10 +572,13 @@ def __init__(self, dataset, batch_size=None, shuffle=False, sampler=None, self._prefetch = max(0, int(prefetch) if prefetch is not None else 2 * self._num_workers) if self._num_workers > 0: if self._thread_pool: - self._worker_pool = ThreadPool(self._num_workers) + self._worker_pool = ThreadPool(self._num_workers, + initializer=_thread_worker_initializer, + initargs=(is_np_shape(), is_np_array())) else: self._worker_pool = multiprocessing.Pool( - self._num_workers, initializer=_worker_initializer, initargs=[self._dataset]) + self._num_workers, initializer=_worker_initializer, + initargs=[self._dataset, is_np_shape(), is_np_array()]) if batchify_fn is None: if num_workers > 0: self._batchify_fn = default_mp_batchify_fn diff --git a/python/mxnet/gluon/data/vision/datasets.py b/python/mxnet/gluon/data/vision/datasets.py index 12ef7e16ef49..bdcaff52a042 100644 --- a/python/mxnet/gluon/data/vision/datasets.py +++ b/python/mxnet/gluon/data/vision/datasets.py @@ -31,6 +31,8 @@ from .. import dataset from ...utils import download, check_sha1, _get_repo_file_url from .... import nd, image, recordio, base +from .... import numpy as _mx_np # pylint: disable=reimported +from ....util import is_np_array class MNIST(dataset._DownloadedDataset): @@ -81,13 +83,16 @@ def _get_data(self): with gzip.open(label_file, 'rb') as fin: struct.unpack(">II", fin.read(8)) label = np.frombuffer(fin.read(), dtype=np.uint8).astype(np.int32) + if is_np_array(): + label = _mx_np.array(label, dtype=label.dtype) with gzip.open(data_file, 'rb') as fin: struct.unpack(">IIII", fin.read(16)) data = np.frombuffer(fin.read(), dtype=np.uint8) data = data.reshape(len(label), 28, 28, 1) - self._data = nd.array(data, dtype=data.dtype) + array_fn = _mx_np.array if is_np_array() else nd.array + self._data = array_fn(data, dtype=data.dtype) self._label = label @@ -183,8 +188,9 @@ def _get_data(self): data = np.concatenate(data) label = np.concatenate(label) - self._data = nd.array(data, dtype=data.dtype) - self._label = label + array_fn = _mx_np.array if is_np_array() else nd.array + self._data = array_fn(data, dtype=data.dtype) + self._label = array_fn(label, dtype=label.dtype) if is_np_array() else label class CIFAR100(CIFAR10): diff --git a/python/mxnet/gluon/data/vision/transforms.py b/python/mxnet/gluon/data/vision/transforms.py index dff7f66b032d..2714c15e5cec 100644 --- a/python/mxnet/gluon/data/vision/transforms.py +++ b/python/mxnet/gluon/data/vision/transforms.py @@ -23,6 +23,7 @@ from ...nn import Sequential, HybridSequential from .... import image from ....base import numeric_types +from ....util import is_np_array class Compose(Sequential): @@ -92,6 +93,8 @@ def __init__(self, dtype='float32'): self._dtype = dtype def hybrid_forward(self, F, x): + if is_np_array(): + F = F.npx return F.cast(x, self._dtype) @@ -134,6 +137,8 @@ def __init__(self): super(ToTensor, self).__init__() def hybrid_forward(self, F, x): + if is_np_array(): + F = F.npx return F.image.to_tensor(x) @@ -187,6 +192,8 @@ def __init__(self, mean=0.0, std=1.0): self._std = std def hybrid_forward(self, F, x): + if is_np_array(): + F = F.npx return F.image.normalize(x, self._mean, self._std) @@ -369,6 +376,8 @@ def __init__(self, size, keep_ratio=False, interpolation=1): self._interpolation = interpolation def hybrid_forward(self, F, x): + if is_np_array(): + F = F.npx return F.image.resize(x, self._size, self._keep, self._interpolation) class RandomFlipLeftRight(HybridBlock): @@ -385,6 +394,8 @@ def __init__(self): super(RandomFlipLeftRight, self).__init__() def hybrid_forward(self, F, x): + if is_np_array(): + F = F.npx return F.image.random_flip_left_right(x) @@ -402,6 +413,8 @@ def __init__(self): super(RandomFlipTopBottom, self).__init__() def hybrid_forward(self, F, x): + if is_np_array(): + F = F.npx return F.image.random_flip_top_bottom(x) @@ -427,6 +440,8 @@ def __init__(self, brightness): self._args = (max(0, 1-brightness), 1+brightness) def hybrid_forward(self, F, x): + if is_np_array(): + F = F.npx return F.image.random_brightness(x, *self._args) @@ -452,6 +467,8 @@ def __init__(self, contrast): self._args = (max(0, 1-contrast), 1+contrast) def hybrid_forward(self, F, x): + if is_np_array(): + F = F.npx return F.image.random_contrast(x, *self._args) @@ -477,6 +494,8 @@ def __init__(self, saturation): self._args = (max(0, 1-saturation), 1+saturation) def hybrid_forward(self, F, x): + if is_np_array(): + F = F.npx return F.image.random_saturation(x, *self._args) @@ -502,6 +521,8 @@ def __init__(self, hue): self._args = (max(0, 1-hue), 1+hue) def hybrid_forward(self, F, x): + if is_np_array(): + F = F.npx return F.image.random_hue(x, *self._args) @@ -536,6 +557,8 @@ def __init__(self, brightness=0, contrast=0, saturation=0, hue=0): self._args = (brightness, contrast, saturation, hue) def hybrid_forward(self, F, x): + if is_np_array(): + F = F.npx return F.image.random_color_jitter(x, *self._args) @@ -559,4 +582,6 @@ def __init__(self, alpha): self._alpha = alpha def hybrid_forward(self, F, x): + if is_np_array(): + F = F.npx return F.image.random_lighting(x, self._alpha) diff --git a/python/mxnet/gluon/loss.py b/python/mxnet/gluon/loss.py index e6d4c5bab852..d634e7922fae 100644 --- a/python/mxnet/gluon/loss.py +++ b/python/mxnet/gluon/loss.py @@ -29,6 +29,8 @@ from .. import ndarray from ..base import numeric_types from .block import HybridBlock +from .utils import _adapt_np_array +from ..util import is_np_array def _apply_weighting(F, loss, weight=None, sample_weight=None): @@ -53,7 +55,10 @@ def _apply_weighting(F, loss, weight=None, sample_weight=None): Weighted loss """ if sample_weight is not None: - loss = F.broadcast_mul(loss, sample_weight) + if is_np_array(): + loss = loss * sample_weight + else: + loss = F.broadcast_mul(loss, sample_weight) if weight is not None: assert isinstance(weight, numeric_types), "weight must be a number" @@ -64,7 +69,11 @@ def _apply_weighting(F, loss, weight=None, sample_weight=None): def _reshape_like(F, x, y): """Reshapes x to the same shape as y.""" - return x.reshape(y.shape) if F is ndarray else F.reshape_like(x, y) + if F is ndarray: + return x.reshape(y.shape) + elif is_np_array(): + F = F.npx + return F.reshape_like(x, y) class Loss(HybridBlock): @@ -136,9 +145,15 @@ def __init__(self, weight=1., batch_axis=0, **kwargs): def hybrid_forward(self, F, pred, label, sample_weight=None): label = _reshape_like(F, label, pred) - loss = F.square(label - pred) + loss = F.np.square(label - pred) if is_np_array() else F.square(label - pred) loss = _apply_weighting(F, loss, self._weight / 2, sample_weight) - return F.mean(loss, axis=self._batch_axis, exclude=True) + if is_np_array(): + if F is ndarray: + return F.np.mean(loss, axis=tuple(range(1, loss.ndim))) + else: + return F.npx.batch_flatten(loss).mean(axis=1) + else: + return F.mean(loss, axis=self._batch_axis, exclude=True) class L1Loss(Loss): @@ -173,6 +188,7 @@ class L1Loss(Loss): def __init__(self, weight=None, batch_axis=0, **kwargs): super(L1Loss, self).__init__(weight, batch_axis, **kwargs) + @_adapt_np_array def hybrid_forward(self, F, pred, label, sample_weight=None): label = _reshape_like(F, label, pred) loss = F.abs(label - pred) @@ -244,27 +260,45 @@ def __init__(self, from_sigmoid=False, weight=None, batch_axis=0, **kwargs): def hybrid_forward(self, F, pred, label, sample_weight=None, pos_weight=None): label = _reshape_like(F, label, pred) + if is_np_array(): + relu_fn = F.npx.relu + act_fn = F.npx.activation + abs_fn = F.np.abs + mul_fn = F.np.multiply + log_fn = F.np.log + else: + relu_fn = F.relu + act_fn = F.Activation + abs_fn = F.abs + mul_fn = F.broadcast_mul + log_fn = F.log if not self._from_sigmoid: if pos_weight is None: # We use the stable formula: max(x, 0) - x * z + log(1 + exp(-abs(x))) - loss = F.relu(pred) - pred * label + \ - F.Activation(-F.abs(pred), act_type='softrelu') + loss = relu_fn(pred) - pred * label + \ + act_fn(-abs_fn(pred), act_type='softrelu') else: # We use the stable formula: x - x * z + (1 + z * pos_weight - z) * \ # (log(1 + exp(-abs(x))) + max(-x, 0)) - log_weight = 1 + F.broadcast_mul(pos_weight - 1, label) - loss = pred - pred * label + log_weight * \ - (F.Activation(-F.abs(pred), act_type='softrelu') + F.relu(-pred)) + log_weight = 1 + mul_fn(pos_weight - 1, label) + loss = pred - pred * label + log_weight *\ + (act_fn(-abs_fn(pred), act_type='softrelu') + relu_fn(-pred)) else: eps = 1e-12 if pos_weight is None: - loss = -(F.log(pred + eps) * label - + F.log(1. - pred + eps) * (1. - label)) + loss = -(log_fn(pred + eps) * label + + log_fn(1. - pred + eps) * (1. - label)) else: - loss = -(F.broadcast_mul(F.log(pred + eps) * label, pos_weight) - + F.log(1. - pred + eps) * (1. - label)) + loss = -(mul_fn(log_fn(pred + eps) * label, pos_weight) + + log_fn(1. - pred + eps) * (1. - label)) loss = _apply_weighting(F, loss, self._weight, sample_weight) - return F.mean(loss, axis=self._batch_axis, exclude=True) + if is_np_array(): + if F is ndarray: + return F.np.mean(loss, axis=tuple(range(1, loss.ndim))) + else: + return F.npx.batch_flatten(loss).mean(axis=1) + else: + return F.mean(loss, axis=self._batch_axis, exclude=True) SigmoidBCELoss = SigmoidBinaryCrossEntropyLoss @@ -341,15 +375,27 @@ def __init__(self, axis=-1, sparse_label=True, from_logits=False, weight=None, self._from_logits = from_logits def hybrid_forward(self, F, pred, label, sample_weight=None): + if is_np_array(): + log_softmax = F.npx.log_softmax + pick = F.npx.pick + else: + log_softmax = F.log_softmax + pick = F.pick if not self._from_logits: - pred = F.log_softmax(pred, self._axis) + pred = log_softmax(pred, self._axis) if self._sparse_label: - loss = -F.pick(pred, label, axis=self._axis, keepdims=True) + loss = -pick(pred, label, axis=self._axis, keepdims=True) else: label = _reshape_like(F, label, pred) - loss = -F.sum(pred * label, axis=self._axis, keepdims=True) + loss = -(pred * label).sum(axis=self._axis, keepdims=True) loss = _apply_weighting(F, loss, self._weight, sample_weight) - return F.mean(loss, axis=self._batch_axis, exclude=True) + if is_np_array(): + if F is ndarray: + return loss.mean(axis=tuple(range(1, loss.ndim))) + else: + return F.npx.batch_flatten(loss).mean(axis=1) + else: + return loss.mean(axis=self._batch_axis, exclude=True) SoftmaxCELoss = SoftmaxCrossEntropyLoss diff --git a/python/mxnet/gluon/model_zoo/vision/resnet.py b/python/mxnet/gluon/model_zoo/vision/resnet.py index 48390decb11b..50a65ec8d2da 100644 --- a/python/mxnet/gluon/model_zoo/vision/resnet.py +++ b/python/mxnet/gluon/model_zoo/vision/resnet.py @@ -33,6 +33,7 @@ from ...block import HybridBlock from ... import nn from .... import base +from .... util import is_np_array # Helpers def _conv3x3(channels, stride, in_channels): @@ -81,7 +82,8 @@ def hybrid_forward(self, F, x): if self.downsample: residual = self.downsample(residual) - x = F.Activation(residual+x, act_type='relu') + act = F.npx.activation if is_np_array() else F.Activation + x = act(residual+x, act_type='relu') return x @@ -129,7 +131,8 @@ def hybrid_forward(self, F, x): if self.downsample: residual = self.downsample(residual) - x = F.Activation(x + residual, act_type='relu') + act = F.npx.activation if is_np_array() else F.Activation + x = act(x + residual, act_type='relu') return x @@ -165,13 +168,14 @@ def __init__(self, channels, stride, downsample=False, in_channels=0, **kwargs): def hybrid_forward(self, F, x): residual = x x = self.bn1(x) - x = F.Activation(x, act_type='relu') + act = F.npx.activation if is_np_array() else F.Activation + x = act(x, act_type='relu') if self.downsample: residual = self.downsample(x) x = self.conv1(x) x = self.bn2(x) - x = F.Activation(x, act_type='relu') + x = act(x, act_type='relu') x = self.conv2(x) return x + residual @@ -211,17 +215,18 @@ def __init__(self, channels, stride, downsample=False, in_channels=0, **kwargs): def hybrid_forward(self, F, x): residual = x x = self.bn1(x) - x = F.Activation(x, act_type='relu') + act = F.npx.activation if is_np_array() else F.Activation + x = act(x, act_type='relu') if self.downsample: residual = self.downsample(x) x = self.conv1(x) x = self.bn2(x) - x = F.Activation(x, act_type='relu') + x = act(x, act_type='relu') x = self.conv2(x) x = self.bn3(x) - x = F.Activation(x, act_type='relu') + x = act(x, act_type='relu') x = self.conv3(x) return x + residual diff --git a/python/mxnet/gluon/nn/activations.py b/python/mxnet/gluon/nn/activations.py index 8c51b0a52592..a3baae004311 100644 --- a/python/mxnet/gluon/nn/activations.py +++ b/python/mxnet/gluon/nn/activations.py @@ -22,6 +22,7 @@ from ... import initializer from ..block import HybridBlock +from ...util import is_np_array class Activation(HybridBlock): @@ -48,7 +49,8 @@ def _alias(self): return self._act_type def hybrid_forward(self, F, x): - return F.Activation(x, act_type=self._act_type, name='fwd') + act = F.npx.activation if is_np_array() else F.Activation + return act(x, act_type=self._act_type, name='fwd') def __repr__(self): s = '{name}({_act_type})' @@ -88,7 +90,8 @@ def __init__(self, alpha, **kwargs): self._alpha = alpha def hybrid_forward(self, F, x): - return F.LeakyReLU(x, act_type='leaky', slope=self._alpha, name='fwd') + leaky_relu = F.npx.leaky_relu if is_np_array() else F.LeakyReLU + return leaky_relu(x, act_type='leaky', slope=self._alpha, name='fwd') def __repr__(self): s = '{name}({alpha})' diff --git a/python/mxnet/gluon/nn/basic_layers.py b/python/mxnet/gluon/nn/basic_layers.py index 3d6976c32740..87d6e89a4d99 100644 --- a/python/mxnet/gluon/nn/basic_layers.py +++ b/python/mxnet/gluon/nn/basic_layers.py @@ -25,8 +25,9 @@ from .activations import Activation from ..block import Block, HybridBlock -from ..utils import _indent +from ..utils import _indent, _adapt_np_array from ... import nd, sym +from ...util import is_np_array class Sequential(Block): @@ -217,8 +218,9 @@ def __init__(self, units, activation=None, use_bias=True, flatten=True, self.act = None def hybrid_forward(self, F, x, weight, bias=None): - act = F.FullyConnected(x, weight, bias, no_bias=bias is None, num_hidden=self._units, - flatten=self._flatten, name='fwd') + fc = F.npx.fully_connected if is_np_array() else F.FullyConnected + act = fc(x, weight, bias, no_bias=bias is None, num_hidden=self._units, + flatten=self._flatten, name='fwd') if self.act is not None: act = self.act(act) return act @@ -263,9 +265,11 @@ def __init__(self, rate, axes=(), **kwargs): def hybrid_forward(self, F, x): if self._rate > 0: - return F.Dropout(x, p=self._rate, axes=self._axes, name='fwd', cudnn_off=False) + dropout = F.npx.dropout if is_np_array() else F.Dropout + return dropout(x, p=self._rate, axes=self._axes, name='fwd', cudnn_off=False) else: - return F.identity(x) + copy = F.np.copy if is_np_array() else F.identity + return copy(x) def __repr__(self): s = '{name}(p = {_rate}, axes={_axes})' @@ -356,8 +360,9 @@ def cast(self, dtype): super(BatchNorm, self).cast(dtype) def hybrid_forward(self, F, x, gamma, beta, running_mean, running_var): - return F.BatchNorm(x, gamma, beta, running_mean, running_var, - name='fwd', **self._kwargs) + batch_norm = F.npx.batch_norm if is_np_array() else F.BatchNorm + return batch_norm(x, gamma, beta, running_mean, running_var, + name='fwd', **self._kwargs) def __repr__(self): s = '{name}({content}' @@ -409,7 +414,8 @@ def __init__(self, input_dim, output_dim, dtype='float32', allow_deferred_init=True, grad_stype=grad_stype) def hybrid_forward(self, F, x, weight): - return F.Embedding(x, weight, name='fwd', **self._kwargs) + embedding = F.npx.embedding if is_np_array() else F.Embedding + return embedding(x, weight, name='fwd', **self._kwargs) def __repr__(self): s = '{block_name}({input_dim} -> {output_dim}, {dtype})' @@ -430,7 +436,8 @@ def __init__(self, **kwargs): super(Flatten, self).__init__(**kwargs) def hybrid_forward(self, F, x): - return F.Flatten(x) + flatten = F.npx.batch_flatten if is_np_array() else F.flatten + return flatten(x) def __repr__(self): return self.__class__.__name__ @@ -514,6 +521,7 @@ def __init__(self, axis=1, epsilon=1e-5, center=True, scale=False, shape=(in_channels,), init=beta_initializer, allow_deferred_init=True) + @_adapt_np_array def hybrid_forward(self, F, x, gamma, beta): if self._axis == 1: return F.InstanceNorm(x, gamma, beta, @@ -603,8 +611,8 @@ def __init__(self, axis=-1, epsilon=1e-5, center=True, scale=True, allow_deferred_init=True) def hybrid_forward(self, F, data, gamma, beta): - norm_data = F.LayerNorm(data, gamma=gamma, beta=beta, axis=self._axis, eps=self._epsilon) - return norm_data + layer_norm = F.npx.layer_norm if is_np_array() else F.LayerNorm + return layer_norm(data, gamma=gamma, beta=beta, axis=self._axis, eps=self._epsilon) def __repr__(self): s = '{name}({content}' @@ -698,6 +706,7 @@ def __init__(self, function, prefix=None): "Unrecognized function in lambda: {} of type {}" .format(function, type(function))) + @_adapt_np_array def hybrid_forward(self, F, x, *args): return self._func(F, x, *args) diff --git a/python/mxnet/gluon/nn/conv_layers.py b/python/mxnet/gluon/nn/conv_layers.py index 4122a08563fa..4682684662cd 100644 --- a/python/mxnet/gluon/nn/conv_layers.py +++ b/python/mxnet/gluon/nn/conv_layers.py @@ -30,11 +30,17 @@ from ... import symbol from ...base import numeric_types from .activations import Activation +from ...util import is_np_array def _infer_weight_shape(op_name, data_shape, kwargs): - op = getattr(symbol, op_name) - sym = op(symbol.var('data', shape=data_shape), **kwargs) + data = symbol.var('data', shape=data_shape) + if is_np_array(): + op = getattr(symbol.npx, op_name) + data = data.as_np_ndarray() + else: + op = getattr(symbol, op_name) + sym = op(data, **kwargs) return sym.infer_shape_partial()[0] @@ -109,7 +115,11 @@ def __init__(self, channels, kernel_size, strides, padding, dilation, if adj is not None: self._kwargs['adj'] = adj - dshape = [0]*(len(kernel_size) + 2) + if is_np_array(): + dshape = [-1]*(len(kernel_size) + 2) + else: + dshape = [0]*(len(kernel_size) + 2) + dshape[layout.find('N')] = 1 dshape[layout.find('C')] = in_channels wshapes = _infer_weight_shape(op_name, dshape, self._kwargs) @@ -129,6 +139,8 @@ def __init__(self, channels, kernel_size, strides, padding, dilation, self.act = None def hybrid_forward(self, F, x, weight, bias=None): + if is_np_array(): + F = F.npx if bias is None: act = getattr(F, self._op_name)(x, weight, name='fwd', **self._kwargs) else: @@ -235,9 +247,13 @@ def __init__(self, channels, kernel_size, strides=1, padding=0, dilation=1, if isinstance(kernel_size, numeric_types): kernel_size = (kernel_size,) assert len(kernel_size) == 1, "kernel_size must be a number or a list of 1 ints" + op_name = kwargs.pop('op_name', 'Convolution') + if is_np_array(): + op_name = 'convolution' super(Conv1D, self).__init__( channels, kernel_size, strides, padding, dilation, groups, layout, - in_channels, activation, use_bias, weight_initializer, bias_initializer, **kwargs) + in_channels, activation, use_bias, weight_initializer, bias_initializer, + op_name, **kwargs) class Conv2D(_Conv): @@ -315,9 +331,13 @@ def __init__(self, channels, kernel_size, strides=(1, 1), padding=(0, 0), if isinstance(kernel_size, numeric_types): kernel_size = (kernel_size,)*2 assert len(kernel_size) == 2, "kernel_size must be a number or a list of 2 ints" + op_name = kwargs.pop('op_name', 'Convolution') + if is_np_array(): + op_name = 'convolution' super(Conv2D, self).__init__( channels, kernel_size, strides, padding, dilation, groups, layout, - in_channels, activation, use_bias, weight_initializer, bias_initializer, **kwargs) + in_channels, activation, use_bias, weight_initializer, bias_initializer, + op_name, **kwargs) class Conv3D(_Conv): @@ -396,9 +416,13 @@ def __init__(self, channels, kernel_size, strides=(1, 1, 1), padding=(0, 0, 0), if isinstance(kernel_size, numeric_types): kernel_size = (kernel_size,)*3 assert len(kernel_size) == 3, "kernel_size must be a number or a list of 3 ints" + op_name = kwargs.pop('op_name', 'Convolution') + if is_np_array(): + op_name = 'convolution' super(Conv3D, self).__init__( channels, kernel_size, strides, padding, dilation, groups, layout, - in_channels, activation, use_bias, weight_initializer, bias_initializer, **kwargs) + in_channels, activation, use_bias, weight_initializer, bias_initializer, + op_name, **kwargs) class Conv1DTranspose(_Conv): @@ -480,10 +504,13 @@ def __init__(self, channels, kernel_size, strides=1, padding=0, output_padding=0 output_padding = (output_padding,) assert len(kernel_size) == 1, "kernel_size must be a number or a list of 1 ints" assert len(output_padding) == 1, "output_padding must be a number or a list of 1 ints" + op_name = kwargs.pop('op_name', 'Deconvolution') + if is_np_array(): + op_name = 'deconvolution' super(Conv1DTranspose, self).__init__( channels, kernel_size, strides, padding, dilation, groups, layout, in_channels, activation, use_bias, weight_initializer, - bias_initializer, op_name='Deconvolution', adj=output_padding, **kwargs) + bias_initializer, op_name=op_name, adj=output_padding, **kwargs) self.outpad = output_padding @@ -571,10 +598,13 @@ def __init__(self, channels, kernel_size, strides=(1, 1), padding=(0, 0), output_padding = (output_padding,)*2 assert len(kernel_size) == 2, "kernel_size must be a number or a list of 2 ints" assert len(output_padding) == 2, "output_padding must be a number or a list of 2 ints" + op_name = kwargs.pop('op_name', 'Deconvolution') + if is_np_array(): + op_name = 'deconvolution' super(Conv2DTranspose, self).__init__( channels, kernel_size, strides, padding, dilation, groups, layout, in_channels, activation, use_bias, weight_initializer, - bias_initializer, op_name='Deconvolution', adj=output_padding, **kwargs) + bias_initializer, op_name=op_name, adj=output_padding, **kwargs) self.outpad = output_padding @@ -663,10 +693,13 @@ def __init__(self, channels, kernel_size, strides=(1, 1, 1), padding=(0, 0, 0), output_padding = (output_padding,)*3 assert len(kernel_size) == 3, "kernel_size must be a number or a list of 3 ints" assert len(output_padding) == 3, "output_padding must be a number or a list of 3 ints" + op_name = kwargs.pop('op_name', 'Deconvolution') + if is_np_array(): + op_name = 'deconvolution' super(Conv3DTranspose, self).__init__( channels, kernel_size, strides, padding, dilation, groups, layout, in_channels, activation, use_bias, weight_initializer, bias_initializer, - op_name='Deconvolution', adj=output_padding, **kwargs) + op_name=op_name, adj=output_padding, **kwargs) self.outpad = output_padding @@ -693,7 +726,8 @@ def _alias(self): return 'pool' def hybrid_forward(self, F, x): - return F.Pooling(x, name='fwd', **self._kwargs) + pooling = F.npx.pooling if is_np_array() else F.Pooling + return pooling(x, name='fwd', **self._kwargs) def __repr__(self): s = '{name}(size={kernel}, stride={stride}, padding={pad}, ceil_mode={ceil_mode}' diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py index 83edbaf210a7..df0c179e60dc 100644 --- a/python/mxnet/gluon/parameter.py +++ b/python/mxnet/gluon/parameter.py @@ -18,6 +18,8 @@ # coding: utf-8 # pylint: disable=unnecessary-pass """Neural network parameter.""" +from __future__ import absolute_import + __all__ = ['DeferredInitializationError', 'Parameter', 'Constant', 'ParameterDict', 'tensor_types'] @@ -31,7 +33,8 @@ from ..context import Context, cpu from .. import autograd from .utils import _indent, _brief_print_list, shape_is_known -from .. import is_np_shape +from ..util import is_np_shape, is_np_array +from .. import numpy as _mx_np # pylint: disable=reimported # pylint: disable= invalid-name tensor_types = (symbol.Symbol, ndarray.NDArray) @@ -131,7 +134,6 @@ def __init__(self, name, grad_req='write', shape=None, dtype=mx_real_t, self._grad_stype = grad_stype self._stype = stype - def __repr__(self): s = 'Parameter {name} (shape={shape}, dtype={dtype})' return s.format(name=self.name, shape=self.shape, dtype=self.dtype) @@ -179,9 +181,9 @@ def shape(self, new_shape): return assert len(self._shape) == len(new_shape) and \ - all(j in (0, i) for i, j in zip(new_shape, self._shape)), \ + all(j in (-1, 0, i) for i, j in zip(new_shape, self._shape)), \ "Expected shape %s is incompatible with given shape %s."%( - str(new_shape), str(self._shape)) + str(new_shape), str(self._shape)) # -1 means unknown dim size in np_shape mode self._shape = new_shape @@ -244,12 +246,14 @@ def _get_row_sparse(self, arr_list, ctx, row_id): def _load_init(self, data, ctx): """(Re)initializes by loading from data.""" if self.shape: + unknown_dim_size = -1 if is_np_shape() else 0 for self_dim, data_dim in zip(self.shape, data.shape): - assert self_dim in (0, data_dim), \ + assert self_dim in (unknown_dim_size, data_dim), \ "Failed loading Parameter '%s' from saved params: " \ "shape incompatible expected %s vs saved %s"%( self.name, str(self.shape), str(data.shape)) - self.shape = tuple(i if i != 0 else j for i, j in zip(self.shape, data.shape)) + self.shape = tuple(i if i != unknown_dim_size else j + for i, j in zip(self.shape, data.shape)) if self.dtype: assert np.dtype(self.dtype).type == data.dtype, \ "Failed loading Parameter '%s' from saved params: " \ @@ -283,6 +287,7 @@ def _finish_deferred_init(self): return init, ctx, default_init, data = self._deferred_init self._deferred_init = () + assert shape_is_known(self.shape), \ "Cannot initialize Parameter '%s' because it has " \ "invalid shape: %s. Please specify in_units, " \ @@ -291,8 +296,16 @@ def _finish_deferred_init(self): with autograd.pause(): if data is None: - data = ndarray.zeros(shape=self.shape, dtype=self.dtype, - ctx=context.cpu(), stype=self._stype) + kwargs = {'shape': self.shape, 'dtype': self.dtype, 'ctx': context.cpu()} + if is_np_array(): + if self._stype != 'default': + raise ValueError("mxnet.numpy.zeros does not support stype = {}" + .format(self._stype)) + zeros_fn = _mx_np.zeros + else: + kwargs['stype'] = self._stype + zeros_fn = ndarray.zeros + data = zeros_fn(**kwargs) initializer.create(default_init)( initializer.InitDesc(self.name, {'__init__': init}), data) @@ -317,8 +330,15 @@ def _init_grad(self): self._grad = None return - self._grad = [ndarray.zeros(shape=i.shape, dtype=i.dtype, ctx=i.context, - stype=self._grad_stype) for i in self._data] + if is_np_array(): + if self._grad_stype != 'default': + raise ValueError("mxnet.numpy.zeros does not support stype = {}" + .format(self._grad_stype)) + self._grad = [_mx_np.zeros(shape=i.shape, dtype=i.dtype, ctx=i.context) + for i in self._data] + else: + self._grad = [ndarray.zeros(shape=i.shape, dtype=i.dtype, ctx=i.context, + stype=self._grad_stype) for i in self._data] autograd.mark_variables(self._check_and_get(self._data, list), self._grad, self.grad_req) @@ -328,7 +348,10 @@ def _reduce(self): ctx = context.cpu() if self._stype == 'default': block = self.list_data() - data = ndarray.add_n(*(w.copyto(ctx) for w in block)) / len(block) + if is_np_array(): + data = sum([w.copyto(ctx) for w in block]) / len(block) + else: + data = ndarray.add_n(*(w.copyto(ctx) for w in block)) / len(block) else: # fetch all rows for 'row_sparse' param all_row_ids = ndarray.arange(0, self.shape[0], dtype='int64', ctx=ctx) @@ -428,7 +451,6 @@ def reset_ctx(self, ctx): raise ValueError("Cannot reset context for Parameter '%s' because it " "has not been initialized."%self.name) - def set_data(self, data): """Sets this parameter's value on all contexts.""" self.shape = data.shape @@ -567,6 +589,8 @@ def var(self): self._var = symbol.var(self.name, shape=self.shape, dtype=self.dtype, lr_mult=self.lr_mult, wd_mult=self.wd_mult, init=self.init, stype=self._stype) + if is_np_array(): + self._var = self._var.as_np_ndarray() return self._var def cast(self, dtype): @@ -728,12 +752,12 @@ def get(self, name, **kwargs): inferred_shape = [] matched = True for dim1, dim2 in zip(v, existing): - if dim1 != dim2 and dim1 * dim2 != 0: + if dim1 != dim2 and dim1 > 0 and dim2 > 0: matched = False break elif dim1 == dim2: inferred_shape.append(dim1) - elif dim1 == 0: + elif dim1 in (0, -1): # -1 means unknown dim size in np_shape mode inferred_shape.append(dim2) else: inferred_shape.append(dim1) diff --git a/python/mxnet/gluon/rnn/rnn_layer.py b/python/mxnet/gluon/rnn/rnn_layer.py index b3cc596282a7..9807c5e33108 100644 --- a/python/mxnet/gluon/rnn/rnn_layer.py +++ b/python/mxnet/gluon/rnn/rnn_layer.py @@ -28,6 +28,8 @@ from ... import ndarray, symbol from .. import HybridBlock, tensor_types from . import rnn_cell +from ...util import is_np_array + class _RNNLayer(HybridBlock): """Implementation of recurrent layers.""" @@ -217,7 +219,10 @@ def begin_state(self, batch_size=0, func=ndarray.zeros, **kwargs): info.update(kwargs) else: info = kwargs - states.append(func(name='%sh0_%d'%(self.prefix, i), **info)) + state = func(name='%sh0_%d' % (self.prefix, i), **info) + if is_np_array(): + state = state.as_np_ndarray() + states.append(state) return states def __call__(self, inputs, states=None, sequence_length=None, **kwargs): @@ -236,7 +241,6 @@ def __call__(self, inputs, states=None, sequence_length=None, **kwargs): else: return super(_RNNLayer, self).__call__(inputs, states, **kwargs) - def hybrid_forward(self, F, inputs, states, sequence_length=None, **kwargs): if F is ndarray: batch_size = inputs.shape[self._layout.find('N')] @@ -254,8 +258,9 @@ def hybrid_forward(self, F, inputs, states, sequence_length=None, **kwargs): def _forward_kernel(self, F, inputs, states, sequence_length, **kwargs): """ forward using CUDNN or CPU kenrel""" + swapaxes = F.np.swapaxes if is_np_array() else F.swapaxes if self._layout == 'NTC': - inputs = F.swapaxes(inputs, dim1=0, dim2=1) + inputs = swapaxes(inputs, 0, 1) if self._projection_size is None: params = (kwargs['{}{}_{}_{}'.format(d, l, g, t)].reshape(-1) for t in ['weight', 'bias'] @@ -270,21 +275,23 @@ def _forward_kernel(self, F, inputs, states, sequence_length, **kwargs): for g in ['i2h', 'h2h', 'h2r'] if g != 'h2r' or t != 'bias') - params = F._internal._rnn_param_concat(*params, dim=0) + rnn_param_concat = F.np._internal.rnn_param_concat if is_np_array()\ + else F._internal._rnn_param_concat + params = rnn_param_concat(*params, dim=0) if self._use_sequence_length: rnn_args = states + [sequence_length] else: rnn_args = states - rnn = F.RNN(inputs, params, *rnn_args, use_sequence_length=self._use_sequence_length, - state_size=self._hidden_size, projection_size=self._projection_size, - num_layers=self._num_layers, bidirectional=self._dir == 2, - p=self._dropout, state_outputs=True, mode=self._mode, - lstm_state_clip_min=self._lstm_state_clip_min, - lstm_state_clip_max=self._lstm_state_clip_max, - lstm_state_clip_nan=self._lstm_state_clip_nan) - + rnn_fn = F.npx.rnn if is_np_array() else F.RNN + rnn = rnn_fn(inputs, params, *rnn_args, use_sequence_length=self._use_sequence_length, + state_size=self._hidden_size, projection_size=self._projection_size, + num_layers=self._num_layers, bidirectional=self._dir == 2, + p=self._dropout, state_outputs=True, mode=self._mode, + lstm_state_clip_min=self._lstm_state_clip_min, + lstm_state_clip_max=self._lstm_state_clip_max, + lstm_state_clip_nan=self._lstm_state_clip_nan) if self._mode == 'lstm': outputs, states = rnn[0], [rnn[1], rnn[2]] @@ -292,7 +299,7 @@ def _forward_kernel(self, F, inputs, states, sequence_length, **kwargs): outputs, states = rnn[0], [rnn[1]] if self._layout == 'NTC': - outputs = F.swapaxes(outputs, dim1=0, dim2=1) + outputs = swapaxes(outputs, 0, 1) return outputs, states diff --git a/python/mxnet/gluon/utils.py b/python/mxnet/gluon/utils.py index 3957b7402688..2822c7019a28 100644 --- a/python/mxnet/gluon/utils.py +++ b/python/mxnet/gluon/utils.py @@ -18,6 +18,8 @@ # coding: utf-8 # pylint: disable= """Parallelization utility optimizer.""" +from __future__ import absolute_import + __all__ = ['split_data', 'split_and_load', 'clip_global_norm', 'check_sha1', 'download'] @@ -38,7 +40,9 @@ class requests_failed_to_import(object): import numpy as np from .. import ndarray -from ..util import is_np_shape +from ..util import is_np_shape, is_np_array, wraps_safely +from .. import numpy as _mx_np # pylint: disable=reimported + def split_data(data, num_slice, batch_axis=0, even_split=True): """Splits an NDArray into `num_slice` slices along `batch_axis`. @@ -82,12 +86,19 @@ def split_data(data, num_slice, batch_axis=0, even_split=True): slices = [data[i*step:(i+1)*step] if i < num_slice - 1 else data[i*step:size] for i in range(num_slice)] elif even_split: - slices = ndarray.split(data, num_outputs=num_slice, axis=batch_axis) + if is_np_array(): + slices = _mx_np.split(data, indices_or_sections=num_slice, axis=batch_axis) + else: + slices = ndarray.split(data, num_outputs=num_slice, axis=batch_axis) else: - slices = [ndarray.slice_axis(data, batch_axis, i*step, (i+1)*step) - if i < num_slice - 1 else - ndarray.slice_axis(data, batch_axis, i*step, size) - for i in range(num_slice)] + if is_np_array(): + indices = [step * i for i in range(1, num_slice)] + slices = _mx_np.split(data, indices_or_sections=indices, axis=batch_axis) + else: + slices = [ndarray.slice_axis(data, batch_axis, i*step, (i+1)*step) + if i < num_slice - 1 else + ndarray.slice_axis(data, batch_axis, i*step, size) + for i in range(num_slice)] return slices @@ -97,7 +108,7 @@ def split_and_load(data, ctx_list, batch_axis=0, even_split=True): Parameters ---------- - data : NDArray + data : NDArray or ndarray A batch of data. ctx_list : list of Context A list of Contexts. @@ -108,11 +119,12 @@ def split_and_load(data, ctx_list, batch_axis=0, even_split=True): Returns ------- - list of NDArray + list of NDArrays or ndarrays Each corresponds to a context in `ctx_list`. """ + array_fn = _mx_np.array if is_np_array() else ndarray.array if not isinstance(data, ndarray.NDArray): - data = ndarray.array(data, ctx=ctx_list[0]) + data = array_fn(data, ctx=ctx_list[0]) if len(ctx_list) == 1: return [data.as_in_context(ctx_list[0])] @@ -414,6 +426,7 @@ def __enter__(self): def __exit__(self, ptype, value, trace): self.detach() + def shape_is_known(shape): """Check whether a shape is completely known with or without np semantics. @@ -430,3 +443,94 @@ def shape_is_known(shape): assert dim_size > unknown_dim_size, "shape dimension size cannot be less than {}, while " \ "received {}".format(unknown_dim_size, dim_size) return True + + +def _check_same_symbol_type(symbols): + """Check whether all the symbols in the list are of the same type. + Raise type error if the types are different. Return the class of + the symbols.""" + from ..symbol.numpy import _Symbol as np_symbol + from ..symbol import Symbol as nd_symbol + is_np_sym = bool(isinstance(symbols[0], np_symbol)) + for s in symbols[1:]: + if is_np_sym != isinstance(s, np_symbol): + raise TypeError('Found both classic symbol (mx.sym.Symbol) and numpy symbol ' + '(mx.sym.np._Symbol) in outputs. This will prevent you from building ' + 'a computation graph by grouping them since different types of symbols ' + 'are not allowed to be grouped in Gluon to form a computation graph. ' + 'You will need to convert them to the same type of symbols, either ' + 'classic or numpy following this rule: if you want numpy ndarray ' + 'output(s) from the computation graph, please convert all the classic ' + 'symbols in the list to numpy symbols by calling `as_np_ndarray()` ' + 'on each of them; if you want classic ndarray output(s) from the ' + 'computation graph, please convert all the numpy symbols in the list ' + 'to classic symbols by calling `as_nd_ndarray()` on each of them.') + return np_symbol if is_np_sym else nd_symbol + + +def _check_all_np_ndarrays(out): + """Check if ndarrays/symbols in out are all np.ndarray/np._Symbol.""" + from ..numpy import ndarray as np_ndarray + from ..symbol.numpy import _Symbol as np_symbol + from ..symbol import Symbol as nd_symbol + from ..ndarray import NDArray as nd_ndarray + + # pylint: disable=no-else-raise + if isinstance(out, (nd_ndarray, nd_symbol)) and not isinstance(out, (np_ndarray, np_symbol)): + raise TypeError("Block's output ndarrays/symbols must be of type `mxnet.numpy.ndarray`" + " or `mxnet.symbol.numpy._Symbol`, while got output type {}" + .format(str(type(out)))) + elif isinstance(out, (list, tuple)): + for i in out: + _check_all_np_ndarrays(i) + # pylint: enable=no-else-raise + + +def _to_classic_arrays(*args, **kwargs): + """Convert arrays to classic arrays. This is used in a Gluon layer for converting + inputs of np arrays to classic arrays so that the layer built with legacy ops can still + be used in np_array semantics.""" + from ..numpy import ndarray as np_ndarray + from ..symbol.numpy import _Symbol as np_symbol + num_inputs = len(args) + assert num_inputs != 0 + if not is_np_array(): + return args, kwargs + in_arrs = [arr if arr is None else arr.as_nd_ndarray() for arr in args] + new_kwargs = {} + for k, v in kwargs.items(): + if isinstance(v, (np_ndarray, np_symbol)): + new_kwargs[k] = v.as_nd_ndarray() + else: + new_kwargs[k] = v + return in_arrs, new_kwargs + + +def _to_np_arrays(*args): + """Convert arrays to np arrays. This is used in a Gluon layer for converting + outputs of classic arrays to np arrays so that the layer built with legacy ops can still + be used in np_array semantics.""" + num_outputs = len(args) + assert num_outputs != 0 + if not is_np_array(): + return args[0] if num_outputs == 1 else args + out = [arr.as_np_ndarray() for arr in args] + return out[0] if num_outputs == 1 else out + + +# TODO(junwu): This is a temp solution for allowing basic layers +# implemented using legacy ops to accept np.ndarrays as inputs and return +# np.ndarrays as outputs. We should remove it after changing all the layers +# to use np ops in np_array semantics in the future. +def _adapt_np_array(func): + @wraps_safely(func) + def _with_np_array(*args, **kwargs): + assert len(args) > 2, "expect at least three arguments in args" + if is_np_array(): + input_args, kwargs = _to_classic_arrays(*args[2:], **kwargs) + input_args = list(args[0:2]) + list(input_args) + out = func(*input_args, **kwargs) + return _to_np_arrays(out) + else: + return func(*args, **kwargs) + return _with_np_array diff --git a/python/mxnet/image/detection.py b/python/mxnet/image/detection.py index a70e5723072f..f3b551b53893 100644 --- a/python/mxnet/image/detection.py +++ b/python/mxnet/image/detection.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -# pylint: disable=unused-import +# pylint: disable=unused-import, too-many-lines """Read images and perform augmentations for object detection.""" from __future__ import absolute_import, print_function @@ -34,6 +34,8 @@ from .image import RandomOrderAug, ColorJitterAug, LightingAug, ColorNormalizeAug from .image import ResizeAug, ForceResizeAug, CastAug, HueJitterAug, RandomGrayAug from .image import fixed_crop, ImageIter, Augmenter +from ..util import is_np_array +from .. import numpy as _mx_np # pylint: disable=reimported class DetAugmenter(object): @@ -762,6 +764,7 @@ def _batchify(self, batch_data, batch_label, start=0): """Override the helper function for batchifying data""" i = start batch_size = self.batch_size + array_fn = _mx_np.array if is_np_array() else nd.array try: while i < batch_size: label, s = self.next_sample() @@ -778,7 +781,7 @@ def _batchify(self, batch_data, batch_label, start=0): assert i < batch_size, 'Batch size must be multiples of augmenter output length' batch_data[i] = self.postprocess_data(datum) num_object = label.shape[0] - batch_label[i][0:num_object] = nd.array(label) + batch_label[i][0:num_object] = array_fn(label) if num_object < batch_label[i].shape[0]: batch_label[i][num_object:] = -1 i += 1 @@ -801,8 +804,14 @@ def next(self): batch_label = self._cache_label i = self._cache_idx else: - batch_data = nd.zeros((batch_size, c, h, w)) - batch_label = nd.empty(self.provide_label[0][1]) + if is_np_array(): + zeros_fn = _mx_np.zeros + empty_fn = _mx_np.empty + else: + zeros_fn = nd.zeros + empty_fn = nd.empty + batch_data = zeros_fn((batch_size, c, h, w)) + batch_label = empty_fn(self.provide_label[0][1]) batch_label[:] = -1 i = self._batchify(batch_data, batch_label) # calculate the padding diff --git a/python/mxnet/image/image.py b/python/mxnet/image/image.py index f7dc27b72951..8834f4c8d5e8 100644 --- a/python/mxnet/image/image.py +++ b/python/mxnet/image/image.py @@ -28,6 +28,7 @@ import json import warnings import numpy as np +from .. import numpy as _mx_np # pylint: disable=reimported try: @@ -40,6 +41,8 @@ from ..ndarray import _internal from .. import io from .. import recordio +from .. util import is_np_array +from ..ndarray.numpy import _internal as _npi def imread(filename, *args, **kwargs): @@ -80,7 +83,11 @@ def imread(filename, *args, **kwargs): >>> mx.img.imread("flower.jpg", to_rgb=0) """ - return _internal._cvimread(filename, *args, **kwargs) + if is_np_array(): + read_fn = _npi.cvimread + else: + read_fn = _internal._cvimread + return read_fn(filename, *args, **kwargs) def imresize(src, w, h, *args, **kwargs): @@ -137,7 +144,8 @@ def imresize(src, w, h, *args, **kwargs): >>> new_image """ - return _internal._cvimresize(src, w, h, *args, **kwargs) + resize_fn = _npi.cvimresize if is_np_array() else _internal._cvimresize + return resize_fn(src, w, h, *args, **kwargs) def imdecode(buf, *args, **kwargs): @@ -193,9 +201,11 @@ def imdecode(buf, *args, **kwargs): if sys.version_info[0] == 3 and not isinstance(buf, (bytes, bytearray, np.ndarray)): raise ValueError('buf must be of type bytes, bytearray or numpy.ndarray,' 'if you would like to input type str, please convert to bytes') - buf = nd.array(np.frombuffer(buf, dtype=np.uint8), dtype=np.uint8) + array_fn = _mx_np.array if is_np_array() else nd.array + buf = array_fn(np.frombuffer(buf, dtype=np.uint8), dtype=np.uint8) - return _internal._cvimdecode(buf, *args, **kwargs) + cvimdecode = _npi.cvimdecode if is_np_array() else _internal._cvimdecode + return cvimdecode(buf, *args, **kwargs) def scale_down(src_size, size): @@ -428,7 +438,7 @@ def fixed_crop(src, x0, y0, w, h, size=None, interp=2): NDArray An `NDArray` containing the cropped image. """ - out = nd.slice(src, begin=(y0, x0, 0), end=(y0 + h, x0 + w, int(src.shape[2]))) + out = src[y0:y0+h, x0:x0+w] if size is not None and (w, h) != size: sizes = (h, w, size[1], size[0]) out = imresize(out, *size, interp=_get_interp_method(interp, sizes)) @@ -1206,6 +1216,7 @@ def __init__(self, batch_size, data_shape, label_width=1, else: self.imgrec = None + array_fn = _mx_np.array if is_np_array() else nd.array if path_imglist: logging.info('%s: loading image list %s...', class_name, path_imglist) with open(path_imglist) as fin: @@ -1213,7 +1224,7 @@ def __init__(self, batch_size, data_shape, label_width=1, imgkeys = [] for line in iter(fin.readline, ''): line = line.strip().split('\t') - label = nd.array(line[1:-1], dtype=dtype) + label = array_fn(line[1:-1], dtype=dtype) key = int(line[0]) imglist[key] = (label, line[-1]) imgkeys.append(key) @@ -1227,11 +1238,11 @@ def __init__(self, batch_size, data_shape, label_width=1, key = str(index) # pylint: disable=redefined-variable-type index += 1 if len(img) > 2: - label = nd.array(img[:-1], dtype=dtype) + label = array_fn(img[:-1], dtype=dtype) elif isinstance(img[0], numeric_types): - label = nd.array([img[0]], dtype=dtype) + label = array_fn([img[0]], dtype=dtype) else: - label = nd.array(img[0], dtype=dtype) + label = array_fn(img[0], dtype=dtype) result[key] = (label, img[-1]) imgkeys.append(str(key)) self.imglist = result @@ -1367,8 +1378,14 @@ def next(self): i = self._cache_idx # clear the cache data else: - batch_data = nd.zeros((batch_size, c, h, w)) - batch_label = nd.empty(self.provide_label[0][1]) + if is_np_array(): + zeros_fn = _mx_np.zeros + empty_fn = _mx_np.empty + else: + zeros_fn = nd.zeros + empty_fn = nd.empty + batch_data = zeros_fn((batch_size, c, h, w)) + batch_label = empty_fn(self.provide_label[0][1]) i = self._batchify(batch_data, batch_label) # calculate the padding pad = batch_size - i @@ -1445,4 +1462,7 @@ def augmentation_transform(self, data): def postprocess_data(self, datum): """Final postprocessing step before image is loaded into the batch.""" - return nd.transpose(datum, axes=(2, 0, 1)) + if is_np_array(): + return datum.transpose(2, 0, 1) + else: + return nd.transpose(datum, axes=(2, 0, 1)) diff --git a/python/mxnet/initializer.py b/python/mxnet/initializer.py index aca7c58707e2..06e25e98a91f 100755 --- a/python/mxnet/initializer.py +++ b/python/mxnet/initializer.py @@ -29,6 +29,8 @@ from . import random from . import registry from . import ndarray +from . util import is_np_array +from . import numpy as _mx_np # pylint: disable=reimported # inherit str for backward compatibility class InitDesc(str): @@ -495,7 +497,8 @@ def __init__(self, scale=0.07): self.scale = scale def _init_weight(self, _, arr): - random.uniform(-self.scale, self.scale, out=arr) + uniform_fn = _mx_np.random.uniform if is_np_array() else random.uniform + uniform_fn(-self.scale, self.scale, out=arr) @register class Normal(Initializer): @@ -528,7 +531,8 @@ def __init__(self, sigma=0.01): self.sigma = sigma def _init_weight(self, _, arr): - random.normal(0, self.sigma, out=arr) + normal_fn = _mx_np.random.normal if is_np_array() else random.normal + normal_fn(0, self.sigma, out=arr) @register class Orthogonal(Initializer): @@ -627,9 +631,11 @@ def _init_weight(self, name, arr): raise ValueError("Incorrect factor type") scale = np.sqrt(self.magnitude / factor) if self.rnd_type == "uniform": - random.uniform(-scale, scale, out=arr) + uniform_fn = _mx_np.random.uniform if is_np_array() else random.uniform + uniform_fn(-scale, scale, out=arr) elif self.rnd_type == "gaussian": - random.normal(0, scale, out=arr) + normal_fn = _mx_np.random.normal if is_np_array() else random.normal + normal_fn(0, scale, out=arr) else: raise ValueError("Unknown random type") diff --git a/python/mxnet/ndarray/__init__.py b/python/mxnet/ndarray/__init__.py index f09908e894d5..f6b8712a2513 100644 --- a/python/mxnet/ndarray/__init__.py +++ b/python/mxnet/ndarray/__init__.py @@ -17,7 +17,7 @@ """NDArray API of MXNet.""" -from . import _internal, contrib, linalg, op, random, sparse, utils, image, ndarray +from . import _internal, contrib, linalg, op, random, sparse, utils, image, ndarray, numpy # pylint: disable=wildcard-import, redefined-builtin try: from .gen_op import * # pylint: disable=unused-wildcard-import @@ -30,6 +30,8 @@ from .utils import load, load_frombuffer, save, zeros, empty, array from .sparse import _ndarray_cls from .ndarray import _GRAD_REQ_MAP, _DTYPE_MX_TO_NP, _DTYPE_NP_TO_MX, _new_empty_handle +from . import numpy as np +from . import numpy_extension as npx __all__ = op.__all__ + ndarray.__all__ + utils.__all__ + \ - ['contrib', 'linalg', 'random', 'sparse', 'image'] + ['contrib', 'linalg', 'random', 'sparse', 'image', 'numpy', 'numpy_extension'] diff --git a/python/mxnet/ndarray/_internal.py b/python/mxnet/ndarray/_internal.py index 8045d9bd2b14..d48255647939 100644 --- a/python/mxnet/ndarray/_internal.py +++ b/python/mxnet/ndarray/_internal.py @@ -23,18 +23,18 @@ try: if int(_os.environ.get("MXNET_ENABLE_CYTHON", True)) == 0: from .._ctypes.ndarray import NDArrayBase, CachedOp - from .._ctypes.ndarray import _set_ndarray_class, _imperative_invoke + from .._ctypes.ndarray import _set_ndarray_class, _imperative_invoke, _set_np_ndarray_class elif _sys.version_info >= (3, 0): from .._cy3.ndarray import NDArrayBase, CachedOp - from .._cy3.ndarray import _set_ndarray_class, _imperative_invoke + from .._cy3.ndarray import _set_ndarray_class, _imperative_invoke, _set_np_ndarray_class else: from .._cy2.ndarray import NDArrayBase, CachedOp - from .._cy2.ndarray import _set_ndarray_class, _imperative_invoke + from .._cy2.ndarray import _set_ndarray_class, _imperative_invoke, _set_np_ndarray_class except ImportError: if int(_os.environ.get("MXNET_ENFORCE_CYTHON", False)) != 0: raise ImportError("Cython Module cannot be loaded but MXNET_ENFORCE_CYTHON=1") from .._ctypes.ndarray import NDArrayBase, CachedOp - from .._ctypes.ndarray import _set_ndarray_class, _imperative_invoke + from .._ctypes.ndarray import _set_ndarray_class, _imperative_invoke, _set_np_ndarray_class from ..base import _Null try: @@ -42,4 +42,5 @@ except ImportError: pass -__all__ = ['NDArrayBase', 'CachedOp', '_imperative_invoke', '_set_ndarray_class'] +__all__ = ['NDArrayBase', 'CachedOp', '_imperative_invoke', '_set_ndarray_class', + '_set_np_ndarray_class'] diff --git a/python/mxnet/ndarray/ndarray.py b/python/mxnet/ndarray/ndarray.py index 4b717e27efe5..7c0fb504d1f8 100644 --- a/python/mxnet/ndarray/ndarray.py +++ b/python/mxnet/ndarray/ndarray.py @@ -184,6 +184,24 @@ class NDArray(NDArrayBase): # See C++ side of definition(kTVMNDArrayTypeCode) at include/mxmet/tensor_blob.h _tvm_tcode = 19 # pylint: disable= no-member, undefined-variable + + def as_np_ndarray(self): + """Convert mxnet.ndarray.NDArray to mxnet.numpy.ndarray.""" + storage_type = self.stype + if storage_type != 'default': + raise ValueError('cannot convert ndarray of stype {} to numpy ndarray' + .format(str(type(storage_type)))) + from ..numpy import ndarray + hdl = NDArrayHandle() + check_call(_LIB.MXShallowCopyNDArray(self.handle, ctypes.byref(hdl))) + return ndarray(handle=hdl, writable=self.writable) + + def as_nd_ndarray(self): + """A convenience function for creating a classic ndarray from the current + ndarray with zero copy. For this class, it just returns itself since it is + already a classic ndarray.""" + return self + @property def _tvm_handle(self): return self.handle.value @@ -207,6 +225,7 @@ def _to_shared_mem(self): def __add__(self, other): """x.__add__(y) <=> x+y <=> mx.nd.add(x, y) """ + # other may be the type of mxnet.numpy.ndarray return add(self, other) def __iadd__(self, other): @@ -225,6 +244,7 @@ def __radd__(self, other): def __sub__(self, other): """x.__sub__(y) <=> x-y <=> mx.nd.subtract(x, y) """ + # other may be the type of mxnet.numpy.ndarray return subtract(self, other) def __isub__(self, other): @@ -908,7 +928,7 @@ def _slice(self, start, stop): check_call(_LIB.MXNDArraySlice( self.handle, mx_uint(start), mx_uint(stop), ctypes.byref(handle))) - return NDArray(handle=handle, writable=self.writable) + return self.__class__(handle=handle, writable=self.writable) def _at(self, idx): """Returns a view of the array sliced at `idx` in the first dim. @@ -942,7 +962,7 @@ def _at(self, idx): % (idx-length, length)) check_call(_LIB.MXNDArrayAt( self.handle, mx_uint(idx), ctypes.byref(handle))) - return NDArray(handle=handle, writable=self.writable) + return self.__class__(handle=handle, writable=self.writable) def reshape(self, *shape, **kwargs): """Returns a **view** of this array with a new shape without altering any data. @@ -1065,7 +1085,7 @@ def reshape(self, *shape, **kwargs): c_array(ctypes.c_int64, shape), reverse, ctypes.byref(handle))) - return NDArray(handle=handle, writable=self.writable) + return self.__class__(handle=handle, writable=self.writable) def reshape_like(self, *args, **kwargs): """Convenience fluent method for :py:func:`reshape_like`. @@ -2384,7 +2404,7 @@ def _get_broadcast_shape(shape1, shape2): for a, b in zip(shape1[::-1], shape2[::-1]): if a != 1 and b != 1 and a != b: raise ValueError('shape1=%s is not broadcastable to shape2=%s' % (shape1, shape2)) - shape[i] = max(a, b) + shape[i] = b if a == 1 else a i -= 1 return tuple(shape) diff --git a/python/mxnet/ndarray/numpy/__init__.py b/python/mxnet/ndarray/numpy/__init__.py new file mode 100644 index 000000000000..7eb478f792f5 --- /dev/null +++ b/python/mxnet/ndarray/numpy/__init__.py @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Module for numpy ops under mxnet.ndarray.""" + +from . import random +from . import linalg +from . import _op, _internal +from . import _register +from ._op import * # pylint: disable=wildcard-import + +__all__ = _op.__all__ diff --git a/python/mxnet/ndarray/numpy/_internal.py b/python/mxnet/ndarray/numpy/_internal.py new file mode 100644 index 000000000000..c5f292842b3b --- /dev/null +++ b/python/mxnet/ndarray/numpy/_internal.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for numpy internal ops.""" + +__all__ = [] diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py new file mode 100644 index 000000000000..4e8dc3c61d8a --- /dev/null +++ b/python/mxnet/ndarray/numpy/_op.py @@ -0,0 +1,1795 @@ +# pylint: disable=C0302 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: disable=too-many-lines +"""Namespace for numpy operators used in Gluon dispatched by F=ndarray.""" + +# pylint: disable=too-many-lines +from __future__ import absolute_import +import numpy as _np +from ...base import numeric_types +from ...util import _sanity_check_params, set_module +from ...context import current_context +from . import _internal as _npi +from ..ndarray import NDArray + +__all__ = ['zeros', 'ones', 'maximum', 'minimum', 'stack', 'arange', 'argmax', + 'add', 'subtract', 'multiply', 'divide', 'mod', 'power', 'concatenate', + 'clip', 'split', 'swapaxes', 'expand_dims', 'tile', 'linspace', + 'sin', 'cos', 'sinh', 'cosh', 'log10', 'sqrt', 'abs', 'exp', 'arctan', 'sign', 'log', + 'degrees', 'log2', 'rint', 'radians', 'mean', 'reciprocal', 'square', 'arcsin', + 'argsort', 'identity'] + + +@set_module('mxnet.ndarray.numpy') +def zeros(shape, dtype=_np.float32, **kwargs): + """Return a new array of given shape and type, filled with zeros. + This function currently only supports storing multi-dimensional data + in row-major (C-style). + + Parameters + ---------- + shape : int or tuple of int + The shape of the empty array. + dtype : str or numpy.dtype, optional + An optional value type. Default is `numpy.float32`. Note that this + behavior is different from NumPy's `zeros` function where `float64` + is the default value, because `float32` is considered as the default + data type in deep learning. + ctx : Context, optional + An optional device context (default is the current default context). + + Returns + ------- + out : ndarray + Array of zeros with the given shape, dtype, and ctx. + """ + _sanity_check_params('zeros', ['order'], kwargs) + ctx = kwargs.pop('ctx', current_context()) + if ctx is None: + ctx = current_context() + dtype = _np.float32 if dtype is None else dtype + return _npi.zeros(shape=shape, ctx=ctx, dtype=dtype, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def ones(shape, dtype=None, **kwargs): + """Return a new array of given shape and type, filled with ones. + This function currently only supports storing multi-dimensional data + in row-major (C-style). + + Parameters + ---------- + shape : int or tuple of int + The shape of the empty array. + dtype : str or numpy.dtype, optional + An optional value type. Default is `numpy.float32`. Note that this + behavior is different from NumPy's `ones` function where `float64` + is the default value, because `float32` is considered as the default + data type in deep learning. + ctx : Context, optional + An optional device context (default is the current default context). + + Returns + ------- + out : ndarray + Array of zeros with the given shape, dtype, and ctx. + """ + _sanity_check_params('ones', ['order'], kwargs) + ctx = kwargs.pop('ctx', current_context()) + if ctx is None: + ctx = current_context() + dtype = _np.float32 if dtype is None else dtype + return _npi.ones(shape=shape, ctx=ctx, dtype=dtype, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def identity(n, dtype=None, **kwargs): + """ + Return the identity array. + + The identity array is a square array with ones on + the main diagonal. + + Parameters + ---------- + n : int + Number of rows (and columns) in `n` x `n` output. + dtype : data-type, optional + Data-type of the output. Defaults to ``numpy.float32``. + ctx : Context, optional + An optional device context (default is the current default context). + + Returns + ------- + out : ndarray + `n` x `n` array with its main diagonal set to one, + and all other elements 0. + + Examples + -------- + >>> np.identity(3) + >>> np.identity(3) + array([[1., 0., 0.], + [0., 1., 0.], + [0., 0., 1.]]) + """ + if not isinstance(n, int): + raise TypeError("Input 'n' should be an integer") + if n < 0: + raise ValueError("Input 'n' cannot be negative") + ctx = kwargs.pop('ctx', current_context()) + if ctx is None: + ctx = current_context() + dtype = _np.float32 if dtype is None else dtype + return _npi.identity(shape=(n, n), ctx=ctx, dtype=dtype, **kwargs) + + +#pylint: disable= too-many-arguments, no-member, protected-access +def _ufunc_helper(lhs, rhs, fn_array, fn_scalar, lfn_scalar, rfn_scalar=None, out=None): + """ Helper function for element-wise operation. + The function will perform numpy-like broadcasting if needed and call different functions. + + Parameters + -------- + lhs : ndarray or numeric value + Left-hand side operand. + + rhs : ndarray or numeric value + Right-hand operand, + + fn_array : function + Function to be called if both lhs and rhs are of ``ndarray`` type. + + fn_scalar : function + Function to be called if both lhs and rhs are numeric values. + + lfn_scalar : function + Function to be called if lhs is ``ndarray`` while rhs is numeric value + + rfn_scalar : function + Function to be called if lhs is numeric value while rhs is ``ndarray``; + if none is provided, then the function is commutative, so rfn_scalar is equal to lfn_scalar + + Returns + -------- + mxnet.numpy.ndarray or scalar + result array or scalar + """ + from ...numpy import ndarray + if isinstance(lhs, numeric_types): + if isinstance(rhs, numeric_types): + return fn_scalar(lhs, rhs, out=out) + else: + if rfn_scalar is None: + # commutative function + return lfn_scalar(rhs, float(lhs), out=out) + else: + return rfn_scalar(rhs, float(lhs), out=out) + elif isinstance(rhs, numeric_types): + return lfn_scalar(lhs, float(rhs), out=out) + elif isinstance(rhs, ndarray): + return fn_array(lhs, rhs, out=out) + else: + raise TypeError('type {} not supported'.format(str(type(rhs)))) +#pylint: enable= too-many-arguments, no-member, protected-access + + +@set_module('mxnet.ndarray.numpy') +def maximum(x1, x2, out=None): + """Returns element-wise maximum of the input arrays with broadcasting. + + Parameters + ---------- + x1, x2 : scalar or mxnet.numpy.ndarray + The arrays holding the elements to be compared. They must have the same shape, + or shapes that can be broadcast to a single shape. + + Returns + ------- + out : mxnet.numpy.ndarray or scalar + The maximum of x1 and x2, element-wise. This is a scalar if both x1 and x2 are scalars.""" + return _ufunc_helper(x1, x2, _npi.maximum, _np.maximum, _npi.maximum_scalar, None, out) + + +@set_module('mxnet.ndarray.numpy') +def minimum(x1, x2, out=None): + """Returns element-wise minimum of the input arrays with broadcasting. + + Parameters + ---------- + x1, x2 : scalar or mxnet.numpy.ndarray + The arrays holding the elements to be compared. They must have the same shape, + or shapes that can be broadcast to a single shape. + + Returns + ------- + out : mxnet.numpy.ndarray or scalar + The minimum of x1 and x2, element-wise. This is a scalar if both x1 and x2 are scalars.""" + return _ufunc_helper(x1, x2, _npi.minimum, _np.minimum, _npi.minimum_scalar, None, out) + + +@set_module('mxnet.ndarray.numpy') +def mean(a, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ + """ + mean(a, axis=None, dtype=None, out=None, keepdims=None) + + Compute the arithmetic mean along the specified axis. + Returns the average of the array elements. + The average is taken over the flattened array by default, otherwise over the specified axis. + + Parameters + ---------- + a : ndarray + ndarray containing numbers whose mean is desired. + axis : None or int or tuple of ints, optional + Axis or axes along which the means are computed. The default is to compute the mean of the flattened array. + If this is a tuple of ints, a mean is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default is float32; + for floating point inputs, it is the same as the input dtype. + out : ndarray, optional + Alternate output array in which to place the result. The default is None; if provided, + it must have the same shape and type as the expected output + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in the result + as dimensions with size one. With this option, the result will broadcast correctly + against the input array. + If the default value is passed, then keepdims will not be passed through to the mean + method of sub-classes of ndarray, however any non-default value will be. If the sub-class + method does not implement keepdims any exceptions will be raised. + + Returns + ------- + m : ndarray, see dtype parameter above + If out=None, returns a new array containing the mean values, + otherwise a reference to the output array is returned. + + Notes + ----- + This function differs from the original `numpy.mean + `_ in + the following way(s): + + - only ndarray is accepted as valid input, python iterables or scalar is not supported + - default data type for integer input is float32 + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.mean(a) + array(2.5) + >>> a = np.zeros((2, 512*512), dtype=np.float32) + >>> a[0,:] = 1.0 + >>> a[1,:] = 0.1 + >>> np.mean(a) + array(0.55) + >>> np.mean(a, dtype=np.float64) + array(0.55) + """ + return _npi.mean(a, axis=axis, dtype=dtype, keepdims=keepdims, out=out) + + +@set_module('mxnet.ndarray.numpy') +def stack(arrays, axis=0, out=None): + """Join a sequence of arrays along a new axis. + + The axis parameter specifies the index of the new axis in the dimensions of the result. + For example, if `axis=0` it will be the first dimension and if `axis=-1` it will be the last dimension. + + Parameters + ---------- + arrays : sequence of ndarrays + Each array must have the same shape. + axis : int, optional + The axis in the result array along which the input arrays are stacked. + out : ndarray, optional + If provided, the destination to place the result. The shape must be correct, + matching that of what stack would have returned if no out argument were specified. + + Returns + ------- + out : ndarray + The stacked array has one more dimension than the input arrays. + + Notes + ----- + This function differs from the original `numpy.stack + `_ in + the following ways: + + - only sequence of ndarray is accepted as valid input + + Examples + -------- + >>> arrays = [np.random.uniform(size=(3, 4)) for _ in range(10)] + >>> np.stack(arrays, axis=0).shape + (10, 3, 4) + >>> np.stack(arrays, axis=1).shape + (3, 10, 4) + >>> np.stack(arrays, axis=2).shape + (3, 4, 10) + >>> a = np.array([1, 2, 3]) + >>> b = np.array([2, 3, 4]) + >>> np.stack((a, b)) + array([[1., 2., 3.], + [2., 3., 4.]]) + >>> np.stack((a, b), axis=-1) + array([[1., 2.], + [2., 3.], + [3., 4.]]) + """ + def get_list(arrays): + if not hasattr(arrays, '__getitem__') and hasattr(arrays, '__iter__'): + raise ValueError("expected iterable for arrays but got {}".format(type(arrays))) + return [arr for arr in arrays] + + arrays = get_list(arrays) + return _npi.stack(*arrays, axis=axis, out=out) + + +@set_module('mxnet.ndarray.numpy') +def arange(start, stop=None, step=1, dtype=None, ctx=None): + """Return evenly spaced values within a given interval. + + Values are generated within the half-open interval ``[start, stop)`` + (in other words, the interval including `start` but excluding `stop`). + For integer arguments the function is equivalent to the Python built-in + `range` function, but returns an ndarray rather than a list. + + Parameters + ---------- + start : number, optional + Start of interval. The interval includes this value. The default + start value is 0. + stop : number + End of interval. The interval does not include this value, except + in some cases where `step` is not an integer and floating point + round-off affects the length of `out`. + step : number, optional + Spacing between values. For any output `out`, this is the distance + between two adjacent values, ``out[i+1] - out[i]``. The default + step size is 1. If `step` is specified as a position argument, + `start` must also be given. + dtype : dtype + The type of the output array. The default is `float32`. + + Returns + ------- + arange : ndarray + Array of evenly spaced values. + + For floating point arguments, the length of the result is + ``ceil((stop - start)/step)``. Because of floating point overflow, + this rule may result in the last element of `out` being greater + than `stop`. + """ + if dtype is None: + dtype = 'float32' + if ctx is None: + ctx = current_context() + if stop is None: + stop = start + start = 0 + if step is None: + step = 1 + if start is None and stop is None: + raise ValueError('start and stop cannot be both None') + if step == 0: + raise ZeroDivisionError('step cannot be 0') + return _npi.arange(start=start, stop=stop, step=step, dtype=dtype, ctx=ctx) + + +@set_module('mxnet.ndarray.numpy') +def argmax(a, axis=None, out=None): + r""" + argmax(a, axis=None, out=None) + + Returns the indices of the maximum values along an axis. + + Parameters + ---------- + a : ndarray + Input array. Only support ndarrays of dtype `float16`, `float32`, and `float64`. + axis : int, optional + By default, the index is into the flattened array, otherwise + along the specified axis. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + index_array : ndarray of indices whose dtype is same as the input ndarray. + Array of indices into the array. It has the same shape as `a.shape` + with the dimension along `axis` removed. + + Notes + ----- + In case of multiple occurrences of the maximum values, the indices + corresponding to the first occurrence are returned. + + This function differs from the original `numpy.argmax + `_ in + the following aspects: + + - Input type does not support Python native iterables(list, tuple, ...). + - Output has dtype that is same as the input ndarray. + - ``out`` param: cannot perform auto broadcasting. ``out`` ndarray's shape must be the same as the expected output. + - ``out`` param: cannot perform auto type cast. ``out`` ndarray's dtype must be the same as the expected output. + - ``out`` param does not support scalar input case. + + Examples + -------- + >>> a = np.arange(6).reshape(2,3) + 10 + >>> a + array([[10., 11., 12.], + [13., 14., 15.]]) + >>> np.argmax(a) + array(5.) + >>> np.argmax(a, axis=0) + array([1., 1., 1.]) + >>> np.argmax(a, axis=1) + array([2., 2.]) + + >>> b = np.arange(6) + >>> b[1] = 5 + >>> b + array([0., 5., 2., 3., 4., 5.]) + >>> np.argmax(b) # Only the first occurrence is returned. + array(1.) + + Specify ``out`` ndarray: + + >>> a = np.arange(6).reshape(2,3) + 10 + >>> b = np.zeros((2,)) + >>> np.argmax(a, axis=1, out=b) + array([2., 2.]) + >>> b + array([2., 2.]) + """ + return _npi.argmax(a, axis=axis, keepdims=False, out=out) + + +@set_module('mxnet.ndarray.numpy') +def argsort(a, axis=-1, kind='quicksort', order=None): + """ + Returns the indices that would sort an input array along the given axis. + This function performs sorting along the given axis and returns an array + of indices having same shape as an input array that index data in sorted order. + + Parameters + ---------- + a : ndarray + Input array + axis : int, optional + The axis along which to sort teh input tensor. + If not given, the last, dimension -1 will be used by default. + If None, the flattened array is used. + kind: {'quicksort'} + Currently not supported. + order: None + Currently not supported. + + Returns + ------- + output : ndarray + Array of indices that sort a along the specified axis. + If a is one-dimensional, a[index_array] yields a sorted a. + More generally, np.take_along_axis(a, index_array, axis=a) always yields the sorted a, + irrespective of dimensionality. + + Examples + -------- + >>> x = np.array([3, 1, 2]) + >>> np.argsort(x) + array([1., 2., 0.]) + >>> x = np.array([[0, 3], [2, 2]]) + >>> x + array([[0., 3.], + [2., 2.]]) + >>> np.argsort(x, axis=0) # sorts along first axis (down) + array([[0., 1.], + [1., 0.]]) + >>> np.argsort(x, axis=1) # sorts along last axis (across) + array([[0., 1.], + [0., 1.]]) + + Notes + ----- + This function differs from the original `numpy.argsort + `_ in + the following way(s): + + - kind and order are currently not supported + """ + if kind != 'quicksort': + raise AttributeError('mxnet.numpy.argsort does not support other sorting methods') + if order is not None: + raise AttributeError('mxnet.numpy.argsort does not support sorting with fields ordering') + return _npi.argsort(a, axis) + + +@set_module('mxnet.ndarray.numpy') +def concatenate(seq, axis=0, out=None): + """Join a sequence of arrays along an existing axis. + + Parameters + ---------- + a1, a2, ... : sequence of array_like + The arrays must have the same shape, except in the dimension + corresponding to `axis` (the first, by default). + axis : int, optional + The axis along which the arrays will be joined. If axis is None, + arrays are flattened before use. Default is 0. + out : ndarray, optional + If provided, the destination to place the result. The shape must be + correct, matching that of what concatenate would have returned if no + out argument were specified. + + Returns + ------- + res : ndarray + The concatenated array. + """ + return _npi.concatenate(*seq, dim=axis, out=out) + + +@set_module('mxnet.ndarray.numpy') +def add(x1, x2, out=None): + """Add arguments element-wise. + + Parameters + ---------- + x1, x2 : ndarrays or scalar values + The arrays to be added. If x1.shape != x2.shape, they must be broadcastable to + a common shape (which may be the shape of one or the other). + + out : ndarray + A location into which the result is stored. If provided, it must have a shape + that the inputs broadcast to. If not provided or None, a freshly-allocated array + is returned. + + Returns + ------- + add : ndarray or scalar + The sum of x1 and x2, element-wise. This is a scalar if both x1 and x2 are scalars. + """ + return _ufunc_helper(x1, x2, _npi.add, _np.add, _npi.add_scalar, None, out) + + +@set_module('mxnet.ndarray.numpy') +def subtract(x1, x2, out=None): + """Subtract arguments element-wise. + + Parameters + ---------- + x1, x2 : ndarrays or scalar values + The arrays to be subtracted from each other. If x1.shape != x2.shape, + they must be broadcastable to a common shape (which may be the shape + of one or the other). + + out : ndarray + A location into which the result is stored. If provided, it must have a shape + that the inputs broadcast to. If not provided or None, a freshly-allocated array + is returned. + + Returns + ------- + subtract : ndarray or scalar + The difference of x1 and x2, element-wise. This is a scalar if both x1 and x2 are scalars. + """ + return _ufunc_helper(x1, x2, _npi.subtract, _np.subtract, _npi.subtract_scalar, + _npi.rsubtract_scalar, out) + + +@set_module('mxnet.ndarray.numpy') +def multiply(x1, x2, out=None): + """Multiply arguments element-wise. + + Parameters + ---------- + x1, x2 : ndarrays or scalar values + The arrays to be multiplied. If x1.shape != x2.shape, they must be broadcastable to + a common shape (which may be the shape of one or the other). + + out : ndarray + A location into which the result is stored. If provided, it must have a shape + that the inputs broadcast to. If not provided or None, a freshly-allocated array + is returned. + + Returns + ------- + out : ndarray or scalar + The multiplication of x1 and x2, element-wise. This is a scalar if both x1 and x2 + are scalars. + """ + return _ufunc_helper(x1, x2, _npi.multiply, _np.multiply, _npi.multiply_scalar, None, out) + + +@set_module('mxnet.ndarray.numpy') +def divide(x1, x2, out=None): + """Returns a true division of the inputs, element-wise. + + Parameters + ---------- + x1 : ndarray or scalar + Dividend array. + + x2 : ndarray or scalar + Divisor array. + + out : ndarray + A location into which the result is stored. If provided, it must have a shape + that the inputs broadcast to. If not provided or None, a freshly-allocated array + is returned. + + Returns + ------- + out : ndarray or scalar + This is a scalar if both x1 and x2 are scalars. + """ + return _ufunc_helper(x1, x2, _npi.true_divide, _np.divide, _npi.true_divide_scalar, + _npi.rtrue_divide_scalar, out) + + +@set_module('mxnet.ndarray.numpy') +def mod(x1, x2, out=None): + """Return element-wise remainder of division. + + Parameters + ---------- + x1 : ndarray or scalar + Dividend array. + + x2 : ndarray or scalar + Divisor array. + + out : ndarray + A location into which the result is stored. If provided, it must have a shape + that the inputs broadcast to. If not provided or None, a freshly-allocated array + is returned. + + Returns + ------- + out : ndarray or scalar + This is a scalar if both x1 and x2 are scalars. + """ + return _ufunc_helper(x1, x2, _npi.mod, _np.mod, _npi.mod_scalar, _npi.rmod_scalar, out) + + +@set_module('mxnet.ndarray.numpy') +def power(x1, x2, out=None): + """First array elements raised to powers from second array, element-wise. + + Parameters + ---------- + x1 : ndarray or scalar + The bases. + + x2 : ndarray or scalar + The exponent. + + out : ndarray + A location into which the result is stored. If provided, it must have a shape + that the inputs broadcast to. If not provided or None, a freshly-allocated array + is returned. + + Returns + ------- + out : ndarray or scalar + The bases in x1 raised to the exponents in x2. + This is a scalar if both x1 and x2 are scalars. + """ + return _ufunc_helper(x1, x2, _npi.power, _np.power, _npi.power_scalar, _npi.rpower_scalar, out) + + +@set_module('mxnet.ndarray.numpy') +def clip(a, a_min, a_max, out=None): + """clip(a, a_min, a_max, out=None) + + Clip (limit) the values in an array. + Given an interval, values outside the interval are clipped to + the interval edges. For example, if an interval of ``[0, 1]`` + is specified, values smaller than 0 become 0, and values larger + than 1 become 1. + + Parameters + ---------- + a : ndarray + Array containing elements to clip. + a_min : scalar or `None` + Minimum value. If `None`, clipping is not performed on lower + interval edge. Not more than one of `a_min` and `a_max` may be + `None`. + a_max : scalar or `None` + Maximum value. If `None`, clipping is not performed on upper + interval edge. Not more than one of `a_min` and `a_max` may be + `None`. + out : ndarray, optional + The results will be placed in this array. It may be the input + array for in-place clipping. `out` must be of the right shape + to hold the output. Its type is preserved. + + Returns + ------- + clipped_array : ndarray + An array with the elements of `a`, but where values + < `a_min` are replaced with `a_min`, and those > `a_max` + with `a_max`. + + Notes + ----- + array_like `a_min` and `a_max` are not supported. + + Examples + -------- + >>> a = np.arange(10) + >>> np.clip(a, 1, 8) + array([1., 1., 2., 3., 4., 5., 6., 7., 8., 8.], dtype=float32) + >>> a + array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32) + >>> np.clip(a, 3, 6, out=a) + array([3., 3., 3., 3., 4., 5., 6., 6., 6., 6.], dtype=float32) + """ + if a_min is None and a_max is None: + raise ValueError('array_clip: must set either max or min') + if a_min is None: + a_min = float('-inf') + if a_max is None: + a_max = float('inf') + return _npi.clip(a, a_min, a_max, out=out) + + +@set_module('mxnet.ndarray.numpy') +def swapaxes(a, axis1, axis2): + """Interchange two axes of an array. + + Parameters + ---------- + a : ndarray + Input array. + axis1 : int + First axis. + axis2 : int + Second axis. + + Returns + ------- + a_swapped : ndarray + Swapped array. This is always a copy of the input array. + """ + return _npi.swapaxes(a, dim1=axis1, dim2=axis2) + + +@set_module('mxnet.ndarray.numpy') +def expand_dims(a, axis): + """Expand the shape of an array. + + Insert a new axis that will appear at the `axis` position in the expanded + + Parameters + ---------- + a : ndarray + Input array. + axis : int + Position in the expanded axes where the new axis is placed. + + Returns + ------- + res : ndarray + Output array. The number of dimensions is one greater than that of + the input array. + """ + return _npi.expand_dims(a, axis) + + +# pylint: disable=line-too-long +@set_module('mxnet.ndarray.numpy') +def split(ary, indices_or_sections, axis=0): + """Split an array into multiple sub-arrays. + + Parameters + ---------- + ary : ndarray + Array to be divided into sub-arrays. + indices_or_sections : int or 1-D array + If `indices_or_sections` is an integer, N, the array will be divided + into N equal arrays along `axis`. If such a split is not possible, + an error is raised. + + If `indices_or_sections` is a 1-D array of sorted integers, the entries + indicate where along `axis` the array is split. For example, + ``[2, 3]`` would, for ``axis=0``, result in + + - ary[:2] + - ary[2:3] + - ary[3:] + + Index `must be within` the dimension of the array along `axis`. + axis : int, optional + The axis along which to split, default is 0. + + Returns + ------- + sub-arrays : list of ndarrays + A list of sub-arrays. + + Raises + ------ + ValueError + If `indices_or_sections` is given as an integer, but + a split does not result in equal division. + + Notes + ----- + This function differs from the original `numpy.split + `_ in + the following ways: + + - Index exceeding the dimension the dimension of the array is currently not supported. + + Examples + -------- + >>> x = np.arange(9.0) + >>> np.split(x, 3) + [array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7., 8.])] + >>> np.split(x, (3, 5, 6)) + [array([0., 1., 2.]), array([3., 4.]), array([5.]), array([6., 7.])] + """ + indices = [] + axis_size = ary.shape[axis] + if isinstance(indices_or_sections, int): + sections = indices_or_sections + if axis_size % sections: + raise ValueError('array split does not result in an equal division') + section_size = int(axis_size / sections) + indices = [i * section_size for i in range(sections)] + elif isinstance(indices_or_sections, tuple): + indices = [0] + list(indices_or_sections) + else: + raise ValueError('indices_or_sections must either int or tuple of ints') + ret = _npi.split(ary, indices, axis, False) + if not isinstance(ret, list): + raise NotImplementedError('single output from split is not supported yet...') + return ret +# pylint: enable=line-too-long + + +@set_module('mxnet.ndarray.numpy') +def tile(A, reps): + r""" + Construct an array by repeating A the number of times given by reps. + + If `reps` has length ``d``, the result will have dimension of + ``max(d, A.ndim)``. + + If ``A.ndim < d``, `A` is promoted to be d-dimensional by prepending new + axes. So a shape (3,) array is promoted to (1, 3) for 2-D replication, + or shape (1, 1, 3) for 3-D replication. If this is not the desired + behavior, promote `A` to d-dimensions manually before calling this + function. + + If ``A.ndim > d``, `reps` is promoted to `A`.ndim by pre-pending 1's to it. + Thus for an `A` of shape (2, 3, 4, 5), a `reps` of (2, 2) is treated as + (1, 1, 2, 2). + + Parameters + ---------- + A : ndarray or scalar + An input array or a scalar to repeat. + reps : a single integer or tuple of integers + The number of repetitions of `A` along each axis. + + Returns + ------- + c : ndarray + The tiled output array. + + Examples + -------- + >>> a = np.array([0, 1, 2]) + >>> np.tile(a, 2) + array([0., 1., 2., 0., 1., 2.]) + >>> np.tile(a, (2, 2)) + array([[0., 1., 2., 0., 1., 2.], + [0., 1., 2., 0., 1., 2.]]) + >>> np.tile(a, (2, 1, 2)) + array([[[0., 1., 2., 0., 1., 2.]], + [[0., 1., 2., 0., 1., 2.]]]) + + >>> b = np.array([[1, 2], [3, 4]]) + >>> np.tile(b, 2) + array([[1., 2., 1., 2.], + [3., 4., 3., 4.]]) + >>> np.(b, (2, 1)) + array([[1., 2.], + [3., 4.], + [1., 2.], + [3., 4.]]) + + >>> c = np.array([1,2,3,4]) + >>> np.tile(c,(4,1)) + array([[1., 2., 3., 4.], + [1., 2., 3., 4.], + [1., 2., 3., 4.], + [1., 2., 3., 4.]]) + + Scalar as input: + + >>> np.tile(2, 3) + array([2, 2, 2]) # repeating integer `2` + + """ + return _unary_func_helper(A, _npi.tile, _np.tile, reps=reps) + + +@set_module('mxnet.ndarray.numpy') +def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0, ctx=None): # pylint: disable=too-many-arguments + r""" + Return evenly spaced numbers over a specified interval. + + Returns num evenly spaced samples, calculated over the interval [start, stop]. + The endpoint of the interval can optionally be excluded. + + Parameters + ---------- + start : real number + The starting value of the sequence. + stop : real number + The end value of the sequence, unless endpoint is set to False. In + that case, the sequence consists of all but the last of num + 1 + evenly spaced samples, so that stop is excluded. Note that the step + size changes when endpoint is False. + num : int, optional + Number of samples to generate. Default is 50. Must be non-negative. + endpoint : bool, optional + If True, stop is the last sample. Otherwise, it is not included. + Default is True. + retstep : bool, optional + If True, return (samples, step), where step is the spacing between samples. + dtype : dtype, optional + The type of the output array. If dtype is not given, infer the data + type from the other input arguments. + axis : int, optional + The axis in the result to store the samples. Relevant only if start or + stop are array-like. By default (0), the samples will be along a new + axis inserted at the beginning. Use -1 to get an axis at the end. + + Returns + ------- + samples : ndarray + There are num equally spaced samples in the closed interval + `[start, stop]` or the half-open interval `[start, stop)` + (depending on whether endpoint is True or False). + step : float, optional + Only returned if retstep is True + Size of spacing between samples. + + + See Also + -------- + arange : Similar to `linspace`, but uses a step size (instead of the + number of samples). + + Examples + -------- + >>> np.linspace(2.0, 3.0, num=5) + array([2. , 2.25, 2.5 , 2.75, 3. ]) + >>> np.linspace(2.0, 3.0, num=5, endpoint=False) + array([2. , 2.2, 2.4, 2.6, 2.8]) + >>> np.linspace(2.0, 3.0, num=5, retstep=True) + (array([2. , 2.25, 2.5 , 2.75, 3. ]), 0.25) + + Graphical illustration: + + >>> import matplotlib.pyplot as plt + >>> N = 8 + >>> y = np.zeros(N) + >>> x1 = np.linspace(0, 10, N, endpoint=True) + >>> x2 = np.linspace(0, 10, N, endpoint=False) + >>> plt.plot(x1.asnumpy(), y.asnumpy(), 'o') + [] + >>> plt.plot(x2.asnumpy(), (y + 0.5).asnumpy(), 'o') + [] + >>> plt.ylim([-0.5, 1]) + (-0.5, 1) + >>> plt.show() + + Notes + ----- + + This function differs from the original `numpy.linspace + `_ in + the following aspects: + + - `start` and `stop` do not support list, numpy ndarray and mxnet ndarray + - axis could only be 0 + - There could be an additional `ctx` argument to specify the device, e.g. the i-th + GPU. + """ + if isinstance(start, (list, _np.ndarray, NDArray)) or \ + isinstance(stop, (list, _np.ndarray, NDArray)): + raise NotImplementedError('start and stop only support int') + if axis != 0: + raise NotImplementedError("the function only support axis 0") + if ctx is None: + ctx = current_context() + if retstep: + step = (stop - start) / (num - 1) + return _npi.linspace(start=start, stop=stop, num=num, endpoint=endpoint, ctx=ctx, dtype=dtype), step + else: + return _npi.linspace(start=start, stop=stop, num=num, endpoint=endpoint, ctx=ctx, dtype=dtype) + + +def _unary_func_helper(x, fn_array, fn_scalar, out=None, **kwargs): + """Helper function for unary operators. + + Parameters + ---------- + x : ndarray or scalar + Input of the unary operator. + fn_array : function + Function to be called if x is of ``ndarray`` type. + fn_scalar : function + Function to be called if x is a Python scalar. + out : ndarray + The buffer ndarray for storing the result of the unary function. + + Returns + ------- + out : mxnet.numpy.ndarray or scalar + Result array or scalar. + """ + if isinstance(x, numeric_types): + return fn_scalar(x, **kwargs) + elif isinstance(x, NDArray): + return fn_array(x, out=out, **kwargs) + else: + raise TypeError('type {} not supported'.format(str(type(x)))) + + +@set_module('mxnet.ndarray.numpy') +def sin(x, out=None, **kwargs): + r"""Trigonometric sine, element-wise. + + Parameters + ---------- + x : ndarray or scalar + Angle, in radians (:math:`2 \pi` rad equals 360 degrees). + out : ndarray or None + A location into which the result is stored. If provided, it + must have a shape that the inputs broadcast to. If not provided + or None, a freshly-allocated array is returned. The dtype of the + output is the same as that of the input if the input is an ndarray. + + Returns + ------- + y : ndarray or scalar + The sine of each element of x. This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _unary_func_helper(x, _npi.sin, _np.sin, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def cos(x, out=None, **kwargs): + r"""Cosine, element-wise. + + Parameters + ---------- + x : ndarray or scalar + Angle, in radians (:math:`2 \pi` rad equals 360 degrees). + out : ndarray or None + A location into which the result is stored. If provided, it + must have a shape that the inputs broadcast to. If not provided + or None, a freshly-allocated array is returned. The dtype of the + output is the same as that of the input if the input is an ndarray. + + Returns + ------- + y : ndarray or scalar + The corresponding cosine values. This is a scalar if x is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _unary_func_helper(x, _npi.cos, _np.cos, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def sinh(x, out=None, **kwargs): + """Hyperbolic sine, element-wise. + + Equivalent to ``1/2 * (np.exp(x) - np.exp(-x))`` or ``-1j * np.sin(1j*x)``. + + Parameters + ---------- + x : ndarray or scalar + Input array or scalar. + out : ndarray or None + A location into which the result is stored. If provided, it + must have a shape that the inputs broadcast to. If not provided + or None, a freshly-allocated array is returned. The dtype of the + output is the same as that of the input if the input is an ndarray. + + Returns + ------- + y : ndarray or scalar + The corresponding hyperbolic sine values. This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _unary_func_helper(x, _npi.sinh, _np.sinh, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def cosh(x, out=None, **kwargs): + """Hyperbolic cosine, element-wise. + + Equivalent to ``1/2 * (np.exp(x) + np.exp(-x))`` and ``np.cos(1j*x)``. + + + Parameters + ---------- + x : ndarray or scalar + Input array or scalar. + out : ndarray or None + A location into which the result is stored. If provided, it + must have a shape that the inputs broadcast to. If not provided + or None, a freshly-allocated array is returned. The dtype of the + output is the same as that of the input if the input is an ndarray. + + Returns + ------- + y : ndarray or scalar + The corresponding hyperbolic cosine values. This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _unary_func_helper(x, _npi.cosh, _np.cosh, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def log10(x, out=None, **kwargs): + """Return the base 10 logarithm of the input array, element-wise. + + Parameters + ---------- + x : ndarray or scalar + Input array or scalar. + out : ndarray or None + A location into which the result is stored. If provided, it + must have a shape that the inputs broadcast to. If not provided + or None, a freshly-allocated array is returned. The dtype of the + output is the same as that of the input if the input is an ndarray. + + Returns + ------- + y : ndarray or scalar + The logarithm to the base 10 of `x`, element-wise. NaNs are + returned where x is negative. This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _unary_func_helper(x, _npi.log10, _np.log10, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def sqrt(x, out=None, **kwargs): + """ + Return the non-negative square-root of an array, element-wise. + + Parameters + ---------- + x : ndarray or scalar + The values whose square-roots are required. + out : ndarray, or None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. + + Returns + ------- + y : ndarray or scalar + An array of the same shape as `x`, containing the positive + square-root of each element in `x`. This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _unary_func_helper(x, _npi.sqrt, _np.sqrt, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def abs(x, out=None, **kwargs): + r"""abs(x, out=None, **kwargs) + + Calculate the absolute value element-wise. + + Parameters + ---------- + x : ndarray or scalar + Input array. + out : ndarray or None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. + + Returns + ------- + absolute : ndarray + An ndarray containing the absolute value of + each element in `x`. This is a scalar if `x` is a scalar. + + Examples + -------- + >>> x = np.array([-1.2, 1.2]) + >>> np.abs(x) + array([1.2, 1.2]) + """ + return _unary_func_helper(x, _npi.abs, _np.abs, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def sign(x, out=None, **kwargs): + r""" + sign(x, out=None) + + Returns an element-wise indication of the sign of a number. + + The `sign` function returns ``-1 if x < 0, 0 if x==0, 1 if x > 0``. Only supports real number. + + Parameters + ---------- + x : ndarray or a scalar + Input values. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + y : ndarray + The sign of `x`. + This is a scalar if `x` is a scalar. + + Note + ------- + - Only supports real number as input elements. + - Input type does not support Python native iterables(list, tuple, ...). + - ``out`` param: cannot perform auto broadcasting. ``out`` ndarray's shape must be the same as the expected output. + - ``out`` param: cannot perform auto type cast. ``out`` ndarray's dtype must be the same as the expected output. + - ``out`` param does not support scalar input case. + + Examples + -------- + >>> a = np.array([-5., 4.5]) + >>> np.sign(a) + array([-1., 1.]) + + Scalars as input: + + >>> np.sign(4.0) + 1.0 + >>> np.sign(0) + 0 + + Use ``out`` parameter: + + >>> b = np.zeros((2, )) + >>> np.sign(a, out=b) + array([-1., 1.]) + >>> b + array([-1., 1.]) + + """ + return _unary_func_helper(x, _npi.sign, _np.sign, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def exp(x, out=None, **kwargs): + r"""exp(x, out=None, **kwargs) + + Calculate the exponential of all elements in the input array. + + Parameters + ---------- + x : ndarray or scalar + Input values. + out : ndarray or None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. + + Returns + ------- + out : ndarray or scalar + Output array, element-wise exponential of `x`. + This is a scalar if `x` is a scalar. + + Examples + -------- + >>> np.exp(1) + 2.718281828459045 + >>> x = np.array([-1, 1, -2, 2]) + >>> np.exp(x) + array([0.36787945, 2.7182817 , 0.13533528, 7.389056 ]) + """ + return _unary_func_helper(x, _npi.exp, _np.exp, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def arctan(x, out=None, **kwargs): + r"""arctan(x, out=None, **kwargs) + + Trigonometric inverse tangent, element-wise. + + The inverse of tan, so that if ``y = tan(x)`` then ``x = arctan(y)``. + + Parameters + ---------- + x : ndarray or scalar + Input values. + out : ndarray or None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. + + Returns + ------- + out : ndarray or scalar + Out has the same shape as `x`. It lies is in + ``[-pi/2, pi/2]`` (``arctan(+/-inf)`` returns ``+/-pi/2``). + This is a scalar if `x` is a scalar. + + Notes + ----- + `arctan` is a multi-valued function: for each `x` there are infinitely + many numbers `z` such that tan(`z`) = `x`. The convention is to return + the angle `z` whose real part lies in [-pi/2, pi/2]. + + For real-valued input data types, `arctan` always returns real output. + For each value that cannot be expressed as a real number or infinity, + it yields ``nan`` and sets the `invalid` floating point error flag. + + For complex-valued input, we do not have support for them yet. + + The inverse tangent is also known as `atan` or tan^{-1}. + + Examples + -------- + We expect the arctan of 0 to be 0, and of 1 to be pi/4: + + >>> x = np.array([0, 1]) + >>> np.arctan(x) + array([0. , 0.7853982]) + + >>> np.pi/4 + 0.7853981633974483 + """ + return _unary_func_helper(x, _npi.arctan, _np.arctan, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def log(x, out=None, **kwargs): + """ + log(x, out=None) + + Natural logarithm, element-wise. + + The natural logarithm `log` is the inverse of the exponential function, + so that `log(exp(x)) = x`. The natural logarithm is logarithm in base + `e`. + + Parameters + ---------- + x : ndarray + Input value. Elements must be of real value. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + y : ndarray + The natural logarithm of `x`, element-wise. + This is a scalar if `x` is a scalar. + + Notes + ----- + Currently only supports data of real values and ``inf`` as input. Returns data of real value, ``inf``, ``-inf`` and + ``nan`` according to the input. + + This function differs from the original `numpy.log + `_ in + the following aspects: + + - Does not support complex number for now + - Input type does not support Python native iterables(list, tuple, ...). + - ``out`` param: cannot perform auto broadcasting. ``out`` ndarray's shape must be the same as the expected output. + - ``out`` param: cannot perform auto type cast. ``out`` ndarray's dtype must be the same as the expected output. + - ``out`` param does not support scalar input case. + + Examples + -------- + >>> a = np.array([1, np.exp(1), np.exp(2), 0], dtype=np.float64) + >>> np.log(a) + array([ 0., 1., 2., -inf], dtype=float64) + + + Due to internal calculation mechanism, using default float32 dtype may cause some special behavior: + + >>> a = np.array([1, np.exp(1), np.exp(2), 0], dtype=np.float32) + >>> np.log(a) + array([ 0., 0.99999994, 2., -inf]) + + Scalar calculation: + + >>> np.log(1) + 0.0 + + """ + return _unary_func_helper(x, _npi.log, _np.log, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def degrees(x, out=None, **kwargs): + """ + degrees(x, out=None) + + Convert angles from radians to degrees. + + Parameters + ---------- + x : ndarray + Input value. Elements must be of real value. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + y : ndarray + The corresponding degree values; if `out` was supplied this is a + reference to it. + This is a scalar if `x` is a scalar. + + Notes + ------- + This function differs from the original `numpy.degrees + `_ in + the following aspects: + + - Input type does not support Python native iterables(list, tuple, ...). Only ndarray is supported. + - ``out`` param: cannot perform auto broadcasting. ``out`` ndarray's shape must be the same as the expected output. + - ``out`` param: cannot perform auto type cast. ``out`` ndarray's dtype must be the same as the expected output. + - ``out`` param does not support scalar input case. + + Examples + -------- + Convert a radian array to degrees + + >>> rad = np.arange(12.) * np.pi / 6 + >>> np.degrees(rad) + array([ 0., 30., 60., 90., 120., 150., 180., 210., 240., 270., 300., 330.]) + + Use specified ``out`` ndarray: + + >>> out = np.zeros((rad.shape)) + >>> np.degrees(rad, out) + array([ 0., 30., 60., 90., 120., 150., 180., 210., 240., 270., 300., 330.]) + >>> out + array([ 0., 30., 60., 90., 120., 150., 180., 210., 240., 270., 300., 330.]) + + """ + return _unary_func_helper(x, _npi.degrees, _np.degrees, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def rint(x, out=None, **kwargs): + """ + Round elements of the array to the nearest integer. + + Parameters + ---------- + x : ndarray or scalar + Input array. + out : ndarray or None + A location into which the result is stored. + If provided, it must have the same shape and type as the input. + If not provided or None, a freshly-allocated array is returned. + + Returns + ------- + out : ndarray or scalar + Output array is same shape and type as x. This is a scalar if x is a scalar. + + Notes + ----- + This function differs from the original `numpy.rint + `_ in + the following way(s): + + - only ndarray or scalar is accpted as valid input, tuple of ndarray is not supported + - broadcasting to `out` of different shape is currently not supported + - when input is plain python numerics, the result will not be stored in the `out` param + + Examples + -------- + >>> a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) + >>> np.rint(a) + array([-2., -2., -0., 0., 1., 2., 2.]) + """ + return _unary_func_helper(x, _npi.rint, _np.rint, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def log2(x, out=None, **kwargs): + """ + Base-2 logarithm of x. + + Parameters + ---------- + x : ndarray or scalar + Input values. + out : ndarray or None + A location into which the result is stored. + If provided, it must have the same shape and type as the input. + If not provided or None, a freshly-allocated array is returned. + + Returns + ------- + y : ndarray + The logarithm base two of `x`, element-wise. + This is a scalar if `x` is a scalar. + + Notes + ----- + This function differs from the original `numpy.log2 + `_ in + the following way(s): + + - only ndarray or scalar is accpted as valid input, tuple of ndarray is not supported + - broadcasting to `out` of different shape is currently not supported + - when input is plain python numerics, the result will not be stored in the `out` param + + Examples + -------- + >>> x = np.array([0, 1, 2, 2**4]) + >>> np.log2(x) + array([-inf, 0., 1., 4.]) + + """ + return _unary_func_helper(x, _npi.log2, _np.log2, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def radians(x, out=None, **kwargs): + """ + Convert angles from degrees to radians. + + Parameters + ---------- + x : ndarray or scalar + Input array in degrees. + out : ndarray or None + A location into which the result is stored. + If provided, it must have the same shape and type as the input. + If not provided or None, a freshly-allocated array is returned. + + Returns + ------- + y : ndarray + The corresponding radian values. This is a scalar if x is a scalar. + + Notes + ----- + This function differs from the original `numpy.radians + `_ in + the following way(s): + + - only ndarray or scalar is accpted as valid input, tuple of ndarray is not supported + - broadcasting to `out` of different shape is currently not supported + - when input is plain python numerics, the result will not be stored in the `out` param + + Examples + -------- + >>> deg = np.arange(12.) * 30. + >>> np.radians(deg) + array([0. , 0.5235988, 1.0471976, 1.5707964, 2.0943952, 2.6179938, + 3.1415927, 3.6651914, 4.1887903, 4.712389 , 5.2359877, 5.7595863], + dtype=float32) + + """ + return _unary_func_helper(x, _npi.radians, _np.radians, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def reciprocal(x, out=None, **kwargs): + r""" + reciprocal(x, out=None) + + Return the reciprocal of the argument, element-wise. + + Calculates ``1/x``. + + Parameters + ---------- + x : ndarray or scalar + The values whose reciprocals are required. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape as the input. + If not provided or None, a freshly-allocated array is returned. + + Returns + ------- + y : ndarray or scalar + Output array is same shape and type as x. This is a scalar if x is a scalar. + + Examples + -------- + >>> np.reciprocal(2.) + 0.5 + >>> x = np.array([1, 2., 3.33]) + >>> np.reciprocal(x) + array([1. , 0.5 , 0.3003003]) + + Notes + ----- + .. note:: + This function is not designed to work with integers. + + For integer arguments with absolute value larger than 1 the result is + always zero because of the way Python handles integer division. For + integer zero the result is an overflow. + + The output `ndarray` has the same `ctx` as the input `ndarray`. + + This function differs from the original `numpy.reciprocal + `_ in + the following aspects: + + - Only support ndarray and scalar now. + - `where` argument is not supported. + """ + return _unary_func_helper(x, _npi.reciprocal, _np.reciprocal, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def square(x, out=None, **kwargs): + r""" + square(x, out=None) + + Return the element-wise square of the input. + + Parameters + ---------- + x : ndarray or scalar + The values whose squares are required. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape as the input. + If not provided or None, a freshly-allocated array is returned. + + Returns + ------- + y : ndarray or scalar + Output array is same shape and type as x. This is a scalar if x is a scalar. + + Examples + -------- + >>> np.square(2.) + 4.0 + >>> x = np.array([1, 2., -1]) + >>> np.square(x) + array([1., 4., 1.]) + + Notes + ----- + The output `ndarray` has the same `ctx` as the input `ndarray`. + + This function differs from the original `numpy.square + `_ in + the following aspects: + + - Only support ndarray and scalar now. + - `where` argument is not supported. + - Complex input is not supported. + """ + return _unary_func_helper(x, _npi.square, _np.square, out=out, **kwargs) + + +@set_module('mxnet.ndarray.numpy') +def arcsin(x, out=None, **kwargs): + r""" + arcsin(x, out=None) + + Inverse sine, element-wise. + + Parameters + ---------- + x : ndarray or scalar + `y`-coordinate on the unit circle. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape as the input. + If not provided or None, a freshly-allocated array is returned. + + Returns + ------- + angle : ndarray or scalar + Output array is same shape and type as x. This is a scalar if x is a scalar. + The inverse sine of each element in `x`, in radians and in the + closed interval ``[-pi/2, pi/2]``. + + Examples + -------- + >>> np.arcsin(1) # pi/2 + 1.5707963267948966 + >>> np.arcsin(-1) # -pi/2 + -1.5707963267948966 + >>> np.arcsin(0) + 0.0 + + Notes + ----- + `arcsin` is a multivalued function: for each `x` there are infinitely + many numbers `z` such that :math:`sin(z) = x`. The convention is to + return the angle `z` whose real part lies in [-pi/2, pi/2]. + + For real-valued input data types, *arcsin* always returns real output. + For each value that cannot be expressed as a real number or infinity, + it yields ``nan`` and sets the `invalid` floating point error flag. + + The inverse sine is also known as `asin` or sin^{-1}. + + The output `ndarray` has the same `ctx` as the input `ndarray`. + + This function differs from the original `numpy.arcsin + `_ in + the following aspects: + + - Only support ndarray or scalar now. + - `where` argument is not supported. + - Complex input is not supported. + + References + ---------- + Abramowitz, M. and Stegun, I. A., *Handbook of Mathematical Functions*, + 10th printing, New York: Dover, 1964, pp. 79ff. + http://www.math.sfu.ca/~cbm/aands/ + """ + return _unary_func_helper(x, _npi.arcsin, _np.arcsin, out=out, **kwargs) diff --git a/python/mxnet/ndarray/numpy/_register.py b/python/mxnet/ndarray/numpy/_register.py new file mode 100644 index 000000000000..3ac464e24217 --- /dev/null +++ b/python/mxnet/ndarray/numpy/_register.py @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Registering numpy ops.""" + +from ...base import _init_np_op_module +from ..register import _make_ndarray_function + + +_init_np_op_module(root_module_name='mxnet', np_module_name='numpy', + mx_module_name='ndarray', make_op_func=_make_ndarray_function) + +_init_np_op_module(root_module_name='mxnet', np_module_name='numpy._internal', + mx_module_name='ndarray', make_op_func=_make_ndarray_function) diff --git a/python/mxnet/ndarray/numpy/linalg.py b/python/mxnet/ndarray/numpy/linalg.py new file mode 100644 index 000000000000..36f3f21a7588 --- /dev/null +++ b/python/mxnet/ndarray/numpy/linalg.py @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for operators used in Gluon dispatched by F=ndarray.""" + +from __future__ import absolute_import +from . import _op as _mx_nd_np + +__all__ = ['norm'] + + +def norm(x, ord=None, axis=None, keepdims=False): + r"""Matrix or vector norm. + + This function can only support Frobenius norm for now. + The Frobenius norm is given by [1]_: + + :math:`||A||_F = [\sum_{i,j} abs(a_{i,j})^2]^{1/2}` + + Parameters + ---------- + x : ndarray + Input array. + ord : {'fro'}, optional + Order of the norm. + axis : {int, 2-tuple of ints, None}, optional + If `axis` is an integer, it specifies the axis of `x` along which to + compute the vector norms. If `axis` is a 2-tuple, it specifies the + axes that hold 2-D matrices, and the matrix norms of these matrices + are computed. If `axis` is None, the norm of the whole ndarray is + returned. + + keepdims : bool, optional + If this is set to True, the axes which are normed over are left in the + result as dimensions with size one. With this option the result will + broadcast correctly against the original `x`. + + Returns + ------- + n : float or ndarray + Norm of the matrix or vector(s). + + References + ---------- + .. [1] G. H. Golub and C. F. Van Loan, *Matrix Computations*, + Baltimore, MD, Johns Hopkins University Press, 1985, pg. 15 + """ + if ord is not None and ord != 'fro': + raise ValueError('only support Frobenius norm for now, received ord={}'.format(str(ord))) + if isinstance(axis, tuple) and len(axis) > 2: + raise ValueError('Improper number of dimensions to norm') + if ord == 'fro' and x.ndim > 2 and axis is None: + raise ValueError('Improper number of dimensions to norm') + return _mx_nd_np.sqrt(_mx_nd_np.sum(x * x, axis=axis, keepdims=keepdims)) diff --git a/python/mxnet/ndarray/numpy/random.py b/python/mxnet/ndarray/numpy/random.py new file mode 100644 index 000000000000..4522f30dc85e --- /dev/null +++ b/python/mxnet/ndarray/numpy/random.py @@ -0,0 +1,191 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for operators used in Gluon dispatched by F=ndarray.""" +from __future__ import absolute_import +import numpy as np +from ...base import numeric_types +from ...context import current_context +from ..ndarray import NDArray +from . import _internal as _npi + +__all__ = ['uniform', 'normal', 'multinomial'] + + +def _random_helper(random, sampler, params, shape, dtype, ctx, out, kwargs): + """Helper function for random generators.""" + from ...numpy import ndarray as np_ndarray + if isinstance(params[0], np_ndarray): + for i in params[1:]: + assert isinstance(i, np_ndarray), \ + "Distribution parameters must all have the same type, but got " \ + "both %s and %s." % (type(params[0]), type(i)) + return sampler(*params, shape=shape, dtype=dtype, out=out, **kwargs) + elif isinstance(params[0], numeric_types): + if ctx is None: + ctx = current_context() + if shape is None and out is None: + shape = () + for i in params[1:]: + assert isinstance(i, numeric_types), \ + "Distribution parameters must all have the same type, but got " \ + "both %s and %s."%(type(params[0]), type(i)) + return random(*params, shape=shape, dtype=dtype, ctx=ctx, out=out, **kwargs) + + raise ValueError("Distribution parameters must be either mxnet.numpy.ndarray or numbers, " + "but got %s." % type(params[0])) + + +def uniform(low=0.0, high=1.0, size=None, **kwargs): + """Draw samples from a uniform distribution. + + Samples are uniformly distributed over the half-open interval + ``[low, high)`` (includes low, but excludes high). In other words, + any value within the given interval is equally likely to be drawn + by `uniform`. + + Parameters + ---------- + low : float, optional + Lower boundary of the output interval. All values generated will be + greater than or equal to low. The default value is 0. + high : float + Upper boundary of the output interval. All values generated will be + less than high. The default value is 1.0. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a scalar tensor containing a single value is returned if + ``low`` and ``high`` are both scalars. + dtype : {'float16', 'float32', 'float64'}, optional + Data type of output samples. Default is 'float32' + ctx : Context, optional + Device context of output. Default is current context. + out : ndarray, optional + Store output to an existing ndarray. + + Returns + ------- + out : ndarray + Drawn samples from the parameterized uniform distribution. + + + Notes + ----- + This function currently does not support ``low`` and ``high`` as ndarrays. + """ + dtype = kwargs.pop('dtype', None) + if dtype is None: + dtype = 'float32' + ctx = kwargs.pop('ctx', None) + out = kwargs.pop('out', None) + return _random_helper(_npi.random_uniform, None, + [low, high], size, dtype, ctx, out, kwargs) + + +def normal(loc=0.0, scale=1.0, size=None, **kwargs): + """Draw random samples from a normal (Gaussian) distribution. + + Samples are distributed according to a normal distribution parametrized + by *loc* (mean) and *scale* (standard deviation). + + + Parameters + ---------- + loc : float, optional + Mean (centre) of the distribution. + scale : float, optional + Standard deviation (spread or "width") of the distribution. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., `(m, n, k)`, then `m * n * k` + samples are drawn. If size is `None` (default), a scalar tensor containing + a single value is returned if loc and scale are both scalars. + dtype : {'float16', 'float32', 'float64'}, optional + Data type of output samples. Default is 'float32' + ctx : Context, optional + Device context of output. Default is current context. + out : ``ndarray``, optional + Store output to an existing ``ndarray``. + + Returns + ------- + out : ndarray + Drawn samples from the parameterized normal distribution. + + Notes + ----- + This function currently does not support ``loc`` and ``scale`` as ndarrays. + """ + dtype = kwargs.pop('dtype', None) + if dtype is None: + dtype = 'float32' + ctx = kwargs.pop('ctx', None) + out = kwargs.pop('out', None) + return _random_helper(_npi.random_normal, None, + [loc, scale], size, dtype, ctx, out, kwargs) + + +def multinomial(n, pvals, size=None): + """multinomial(n, pvals, size=None) + + Draw samples from a multinomial distribution. + + The multinomial distribution is a multivariate generalisation of the binomial distribution. + Take an experiment with one of ``p`` possible outcomes. An example of such an experiment is throwing a dice, + where the outcome can be 1 through 6. Each sample drawn from the distribution represents n such experiments. + Its values, ``X_i = [X_0, X_1, ..., X_p]``, represent the number of times the outcome was ``i``. + + Parameters + ---------- + n : int + Number of experiments. + pvals : sequence of floats, length p + Probabilities of each of the p different outcomes. These should sum to 1. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples + are drawn. Default is None, in which case a single value is returned. + + Returns + ------- + out : ndarray + The drawn samples, of shape size, if that was provided. If not, the shape is ``(N,)``. + In other words, each entry ``out[i,j,...,:]`` is an N-dimensional value drawn from the distribution. + + Examples + -------- + Throw a dice 1000 times, and 1000 times again: + + >>> np.random.multinomial(1000, [1/6.]*6, size=2) + array([[164, 161, 179, 158, 150, 188], + [178, 162, 177, 143, 163, 177]]) + + A loaded die is more likely to land on number 6: + + >>> np.random.multinomial(100, [1/7.]*5 + [2/7.]) + array([19, 14, 12, 11, 21, 23]) + + >>> np.random.multinomial(100, [1.0 / 3, 2.0 / 3]) + array([32, 68]) + """ + if isinstance(pvals, NDArray): + return _npi.multinomial(pvals, pvals=None, n=n, size=size) + else: + if isinstance(pvals, np.ndarray): + pvals = pvals.tolist() + if any(isinstance(i, list) for i in pvals): + raise ValueError('object too deep for desired array') + return _npi.multinomial(n=n, pvals=pvals, size=size) diff --git a/python/mxnet/ndarray/numpy_extension/__init__.py b/python/mxnet/ndarray/numpy_extension/__init__.py new file mode 100644 index 000000000000..5be34ac9b3d5 --- /dev/null +++ b/python/mxnet/ndarray/numpy_extension/__init__.py @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Module for the ops not belonging to the official numpy package.""" + +from . import _op +from . import image +from . import _register +from ._op import * # pylint: disable=wildcard-import + +__all__ = _op.__all__ diff --git a/python/mxnet/ndarray/numpy_extension/_op.py b/python/mxnet/ndarray/numpy_extension/_op.py new file mode 100644 index 000000000000..22738a0f1950 --- /dev/null +++ b/python/mxnet/ndarray/numpy_extension/_op.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for the operators not belonging to the official numpy package +used in Gluon dispatched by F=ndarray module.""" + +__all__ = [] diff --git a/python/mxnet/ndarray/numpy_extension/_register.py b/python/mxnet/ndarray/numpy_extension/_register.py new file mode 100644 index 000000000000..32cd0686551c --- /dev/null +++ b/python/mxnet/ndarray/numpy_extension/_register.py @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Registering numpy_extension ops.""" + +from ...base import _init_np_op_module +from ..register import _make_ndarray_function + + +_init_np_op_module(root_module_name='mxnet', np_module_name='numpy_extension', + mx_module_name='ndarray', make_op_func=_make_ndarray_function) diff --git a/python/mxnet/ndarray/numpy_extension/image.py b/python/mxnet/ndarray/numpy_extension/image.py new file mode 100644 index 000000000000..b3bd27fc503c --- /dev/null +++ b/python/mxnet/ndarray/numpy_extension/image.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Image pre-processing operators.""" + +__all__ = [] diff --git a/python/mxnet/ndarray/register.py b/python/mxnet/ndarray/register.py index 1ccf228698ba..bdbfa1584ca6 100644 --- a/python/mxnet/ndarray/register.py +++ b/python/mxnet/ndarray/register.py @@ -24,12 +24,97 @@ from ._internal import NDArrayBase, _imperative_invoke # pylint: disable=unused-import from ..ndarray_doc import _build_doc -from ..base import mx_uint, check_call, _LIB, py_str, _init_op_module, _Null # pylint: disable=unused-import +from ..base import mx_uint, check_call, _LIB, py_str, _init_op_module, _Null, _is_np_op # pylint: disable=unused-import +from ..util import use_np_shape # pylint: disable=unused-import + + +def _verify_all_np_ndarrays(op_name, func_name, args, out): + """Verify if all the arrays are numpy ndarrays. + + Parameters + ---------- + op_name : str + Operator full name registered in backend. + func_name : str + Operator name exposed to users. This is usually the name by stripping off + the prefix of the full operator names registered in backend. + args : list of arrays + Input ndarray arguments to be checked. + out : ndarray or None or list of ndarrays + User-provided output ndarrays. + """ + from ..numpy import ndarray as np_ndarray + for arr in args: + if (arr is not None) and (not isinstance(arr, np_ndarray)): + raise TypeError('Operator `{}` registered in backend is known as `{}` in Python. ' + 'This is a numpy operator which can only accept ' + 'MXNet numpy ndarrays, while received a legacy ndarray. ' + 'Please ensure that you have activated numpy semantics by calling ' + '`npx.set_np()` in your code. If you still see this error with numpy ' + 'semantics activated, please call `as_np_ndarray()` upon the legacy ' + 'ndarray to convert it to an MXNet numpy ndarray, and then feed the ' + 'converted array to this operator.' + .format(op_name, func_name)) + if out is None: + return + if not isinstance(out, (list, tuple)): + out = [out] + for arr in out: + if (arr is not None) and (not isinstance(arr, np_ndarray)): + raise TypeError('Operator `{}` registered in backend is known as `{}` in Python. ' + 'This is a numpy operator which can only accept ' + 'MXNet numpy ndarrays, while received a legacy ndarray. ' + 'Please ensure that you have activated numpy semantics by calling ' + '`npx.set_np()` in your code. If you still see this error with numpy ' + 'semantics activated, please call `as_np_ndarray()` upon the legacy ' + 'ndarray to convert it to an MXNet numpy ndarray, and then feed the ' + 'converted array to this operator.' + .format(op_name, func_name)) + + +def _verify_all_legacy_ndarrays(op_name, func_name, args, out): + """Verify if all the arrays are legacy ndarrays. + + Parameters + ---------- + op_name : str + Operator full name registered in backend. + func_name : str + Operator name exposed to users. This is usually the name by stripping off + the prefix of the full operator names registered in backend. + args : list of arrays + Input ndarray arguments to be checked. + out : ndarray or None or list of ndarrays + User-provided output ndarrays. + """ + from ..numpy import ndarray as np_ndarray + for arr in args: + if (arr is not None) and (isinstance(arr, np_ndarray)): + raise TypeError('Operator `{}` registered in backend is known as `{}` in Python. ' + 'This is a legacy operator which can only accept ' + 'legacy ndarrays, while received an MXNet numpy ndarray. ' + 'Please call `as_nd_ndarray()` upon the numpy ndarray to ' + 'convert it to a legacy ndarray, and then feed the converted ' + 'array to this operator.' + .format(op_name, func_name)) + if out is None: + return + if not isinstance(out, (list, tuple)): + out = [out] + for arr in out: + if (arr is not None) and (isinstance(arr, np_ndarray)): + raise TypeError('Operator `{}` registered in backend is known as `{}` in Python. ' + 'This is a legacy operator which can only write to ' + 'legacy ndarrays, while received an MXNet numpy ndarray. ' + 'Please call `as_nd_ndarray()` upon the numpy ndarray to ' + 'convert it to a legacy ndarray, and then feed the converted ' + 'array to this operator.' + .format(op_name, func_name)) # pylint: disable=too-many-locals -def _generate_ndarray_function_code(handle, name, func_name, signature_only=False): - """Generate function for ndarray op by handle and function name.""" +def _generate_ndarray_function_code(handle, op_name, func_name, signature_only=False): + """Generate function for ndarray op by handle and function op_name.""" real_name = ctypes.c_char_p() desc = ctypes.c_char_p() num_args = mx_uint() @@ -52,7 +137,7 @@ def _generate_ndarray_function_code(handle, name, func_name, signature_only=Fals arg_types = [py_str(arg_types[i]) for i in range(narg)] key_var_num_args = py_str(key_var_num_args.value) ret_type = py_str(ret_type.value) if ret_type.value is not None else '' - doc_str = _build_doc(name, + doc_str = _build_doc(op_name, py_str(desc.value), arg_names, arg_types, @@ -90,6 +175,10 @@ def _generate_ndarray_function_code(handle, name, func_name, signature_only=Fals signature = ndsignature + signature code = [] + is_np_op = _is_np_op(op_name) + doc_str_idx = 1 + if is_np_op: + doc_str_idx = 2 if arr_name: code.append(""" def %s(*%s, **kwargs):"""%(func_name, arr_name)) @@ -134,15 +223,26 @@ def %s(%s):"""%(func_name, ', '.join(signature))) vals.append(%s)"""%(name, name, name)) # dtype if dtype_name is not None: - code.append(""" + if is_np_op: + code.append(""" + if %s is not _Null and %s is not None: + keys.append('%s') + vals.append(_np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name, dtype_name)) + else: + code.append(""" if %s is not _Null: keys.append('%s') vals.append(_np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name)) + verify_ndarrays_fn =\ + _verify_all_np_ndarrays.__name__ if is_np_op else _verify_all_legacy_ndarrays.__name__ if not signature_only: code.append(""" - return _imperative_invoke(%d, ndargs, keys, vals, out)"""%( - handle.value)) + {verify_fn}("{op_name}", "{func_name}", ndargs, out) + """.format(verify_fn=verify_ndarrays_fn, op_name=op_name, func_name=func_name)) + code.append(""" + return _imperative_invoke(%d, ndargs, keys, vals, out, %s)"""%( + handle.value, str(is_np_op))) else: code.append(""" return (0,)""") @@ -150,7 +250,7 @@ def %s(%s):"""%(func_name, ', '.join(signature))) doc_str_lines = _os.linesep+''.join([' '+s if s.strip() else s for s in 'r"""{doc_str}"""'.format(doc_str=doc_str) .splitlines(True)]) - code.insert(1, doc_str_lines) + code.insert(doc_str_idx, doc_str_lines) return ''.join(code), doc_str diff --git a/python/mxnet/ndarray/utils.py b/python/mxnet/ndarray/utils.py index ff93d0be6d73..730f2172c4f4 100644 --- a/python/mxnet/ndarray/utils.py +++ b/python/mxnet/ndarray/utils.py @@ -248,6 +248,7 @@ def save(fname, data): >>> mx.nd.load('my_dict') {'y': , 'x': } """ + from ..numpy import ndarray as np_ndarray if isinstance(data, NDArray): data = [data] handles = c_array(NDArrayHandle, []) @@ -257,11 +258,17 @@ def save(fname, data): if any(not isinstance(k, string_types) for k in str_keys) or \ any(not isinstance(v, NDArray) for v in nd_vals): raise TypeError('save only accept dict str->NDArray or list of NDArray') + if any(isinstance(v, np_ndarray) for v in nd_vals): + raise TypeError('cannot save mxnet.numpy.ndarray using mxnet.ndarray.save;' + ' use mxnet.numpy.save instead.') keys = c_str_array(str_keys) handles = c_handle_array(nd_vals) elif isinstance(data, list): if any(not isinstance(v, NDArray) for v in data): raise TypeError('save only accept dict str->NDArray or list of NDArray') + if any(isinstance(v, np_ndarray) for v in data): + raise TypeError('cannot save mxnet.numpy.ndarray using mxnet.ndarray.save;' + ' use mxnet.numpy.save instead.') keys = None handles = c_handle_array(data) else: diff --git a/python/mxnet/numpy/__init__.py b/python/mxnet/numpy/__init__.py new file mode 100644 index 000000000000..1994148d14d1 --- /dev/null +++ b/python/mxnet/numpy/__init__.py @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""MXNet NumPy module.""" + +from __future__ import division, absolute_import, print_function + +from . import random +from . import linalg +from .multiarray import * # pylint: disable=wildcard-import +from . import _op +from . import _register +from ._op import * # pylint: disable=wildcard-import +from .utils import * # pylint: disable=wildcard-import +from .function_base import * # pylint: disable=wildcard-import +from .stride_tricks import * # pylint: disable=wildcard-import +from .io import * # pylint: disable=wildcard-import +from .arrayprint import * # pylint: disable=wildcard-import + +__all__ = [] diff --git a/python/mxnet/numpy/_op.py b/python/mxnet/numpy/_op.py new file mode 100644 index 000000000000..8f6f9cc053e4 --- /dev/null +++ b/python/mxnet/numpy/_op.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for registering numpy ops for imperative programming.""" + +__all__ = [] diff --git a/python/mxnet/numpy/_register.py b/python/mxnet/numpy/_register.py new file mode 100644 index 000000000000..8a2d2ea61c24 --- /dev/null +++ b/python/mxnet/numpy/_register.py @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Registering ops in mxnet.numpy for imperative programming.""" + +from __future__ import absolute_import + +from ..base import _init_np_op_module +from ..ndarray.register import _make_ndarray_function + + +_init_np_op_module(root_module_name='mxnet', np_module_name='numpy', + mx_module_name=None, make_op_func=_make_ndarray_function) diff --git a/python/mxnet/numpy/arrayprint.py b/python/mxnet/numpy/arrayprint.py new file mode 100644 index 000000000000..9be7faf1f602 --- /dev/null +++ b/python/mxnet/numpy/arrayprint.py @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""ndarray print format controller.""" + +from __future__ import absolute_import, print_function + +import numpy as onp +from ..util import set_module + +__all__ = ['set_printoptions'] + + +@set_module('mxnet.numpy') +def set_printoptions(precision=None, threshold=None, **kwarg): + """ + Set printing options. + + These options determine the way floating point numbers and arrays are displayed. + + Parameters + ---------- + precision : int or None, optional + Number of digits of precision for floating point output (default 8). + May be `None` if `floatmode` is not `fixed`, to print as many digits as + necessary to uniquely specify the value. + threshold : int, optional + Total number of array elements which trigger summarization + rather than full repr (default 1000). + + Examples + -------- + Floating point precision can be set: + + >>> np.set_printoptions(precision=4) + >>> print(np.array([1.123456789])) + [ 1.1235] + + Long arrays can be summarised: + + >>> np.set_printoptions(threshold=5) + >>> print(np.arange(10)) + [0. 1. 2. ... 7. 8. 9.] + """ + if kwarg: + raise NotImplementedError('mxnet.numpy.set_printoptions only supports parameters' + ' precision and threshold for now.') + onp.set_printoptions(precision=precision, threshold=threshold, **kwarg) diff --git a/python/mxnet/numpy/function_base.py b/python/mxnet/numpy/function_base.py new file mode 100644 index 000000000000..e8e07c70a167 --- /dev/null +++ b/python/mxnet/numpy/function_base.py @@ -0,0 +1,115 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Numpy basic functions.""" +from __future__ import absolute_import + +from .stride_tricks import broadcast_arrays + +__all__ = ['meshgrid'] + + +def meshgrid(*xi, **kwargs): + """ + Return coordinate matrices from coordinate vectors. + + Make N-D coordinate arrays for vectorized evaluations of + N-D scalar/vector fields over N-D grids, given + one-dimensional coordinate arrays x1, x2,..., xn. + + Parameters + ---------- + x1, x2,..., xn : ndarrays + 1-D arrays representing the coordinates of a grid. + indexing : {'xy', 'ij'}, optional + Cartesian ('xy', default) or matrix ('ij') indexing of output. + See Notes for more details. + + sparse : bool, optional + If True a sparse grid is returned in order to conserve memory. + Default is False. Please note that `sparse=True` is currently + not supported. + + copy : bool, optional + If False, a view into the original arrays are returned in order to + conserve memory. Default is True. Please note that `copy=False` + is currently not supported. + + Returns + ------- + X1, X2,..., XN : ndarray + For vectors `x1`, `x2`,..., 'xn' with lengths ``Ni=len(xi)`` , + return ``(N1, N2, N3,...Nn)`` shaped arrays if indexing='ij' + or ``(N2, N1, N3,...Nn)`` shaped arrays if indexing='xy' + with the elements of `xi` repeated to fill the matrix along + the first dimension for `x1`, the second for `x2` and so on. + + Notes + ----- + This function supports both indexing conventions through the indexing + keyword argument. Giving the string 'ij' returns a meshgrid with + matrix indexing, while 'xy' returns a meshgrid with Cartesian indexing. + In the 2-D case with inputs of length M and N, the outputs are of shape + (N, M) for 'xy' indexing and (M, N) for 'ij' indexing. In the 3-D case + with inputs of length M, N and P, outputs are of shape (N, M, P) for + 'xy' indexing and (M, N, P) for 'ij' indexing. The difference is + illustrated by the following code snippet:: + + xv, yv = np.meshgrid(x, y, sparse=False, indexing='ij') + for i in range(nx): + for j in range(ny): + # treat xv[i,j], yv[i,j] + + xv, yv = np.meshgrid(x, y, sparse=False, indexing='xy') + for i in range(nx): + for j in range(ny): + # treat xv[j,i], yv[j,i] + + In the 1-D and 0-D case, the indexing and sparse keywords have no effect. + """ + ndim = len(xi) + + copy_ = kwargs.pop('copy', True) + if not copy_: + raise NotImplementedError('copy=False is not implemented') + sparse = kwargs.pop('sparse', False) + if sparse: + raise NotImplementedError('sparse=False is not implemented') + indexing = kwargs.pop('indexing', 'xy') + + if kwargs: + raise TypeError("meshgrid() got an unexpected keyword argument '%s'" + % (list(kwargs)[0],)) + + if indexing not in ['xy', 'ij']: + raise ValueError( + "Valid values for `indexing` are 'xy' and 'ij'.") + + s0 = (1,) * ndim + output = [x.reshape(s0[:i] + (-1,) + s0[i + 1:]) + for i, x in enumerate(xi)] + + if indexing == 'xy' and ndim > 1: + # switch first and second axis + output[0] = output[0].reshape(1, -1, *s0[2:]) + output[1] = output[1].reshape(-1, 1, *s0[2:]) + + if not sparse: + # Return the full N-D matrix (not only the 1-D vector) + output = broadcast_arrays(*output) + + return output diff --git a/python/mxnet/numpy/io.py b/python/mxnet/numpy/io.py new file mode 100644 index 000000000000..aece13fa1db4 --- /dev/null +++ b/python/mxnet/numpy/io.py @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +"""I/O functions for ndarrays.""" +from __future__ import absolute_import +import numpy as onp +from ..context import current_context +from .multiarray import array + +__all__ = ['genfromtxt'] + + +# TODO(junwu): Add doc +def genfromtxt(*args, **kwargs): + """This is a wrapper of the official NumPy's `genfromtxt` function. + Please refer to the documentation here + https://docs.scipy.org/doc/numpy/reference/generated/numpy.genfromtxt.html. + + Notes + ----- + This function has added an additional parameter `ctx` which allows to create + ndarrays on the user-specified device. + """ + ctx = kwargs.pop('ctx', current_context()) + if ctx is None: + ctx = current_context() + ret = onp.genfromtxt(*args, **kwargs) + return array(ret, dtype=ret.dtype, ctx=ctx) diff --git a/python/mxnet/numpy/linalg.py b/python/mxnet/numpy/linalg.py new file mode 100644 index 000000000000..9758af47233d --- /dev/null +++ b/python/mxnet/numpy/linalg.py @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for ops used in imperative programming.""" + +from __future__ import absolute_import +from ..ndarray import numpy as _mx_nd_np + +__all__ = ['norm'] + + +def norm(x, ord=None, axis=None, keepdims=False): + r"""Matrix or vector norm. + + This function can only support Frobenius norm for now. + The Frobenius norm is given by [1]_: + + :math:`||A||_F = [\sum_{i,j} abs(a_{i,j})^2]^{1/2}` + + Parameters + ---------- + x : ndarray + Input array. + ord : {'fro'}, optional + Order of the norm. + axis : {int, 2-tuple of ints, None}, optional + If `axis` is an integer, it specifies the axis of `x` along which to + compute the vector norms. If `axis` is a 2-tuple, it specifies the + axes that hold 2-D matrices, and the matrix norms of these matrices + are computed. If `axis` is None, the norm of the whole ndarray is + returned. + + keepdims : bool, optional + If this is set to True, the axes which are normed over are left in the + result as dimensions with size one. With this option the result will + broadcast correctly against the original `x`. + + Returns + ------- + n : float or ndarray + Norm of the matrix or vector(s). + + References + ---------- + .. [1] G. H. Golub and C. F. Van Loan, *Matrix Computations*, + Baltimore, MD, Johns Hopkins University Press, 1985, pg. 15 + """ + return _mx_nd_np.linalg.norm(x, ord, axis, keepdims) diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py new file mode 100644 index 000000000000..d48174301d15 --- /dev/null +++ b/python/mxnet/numpy/multiarray.py @@ -0,0 +1,2973 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: disable=too-many-lines +"""numpy ndarray and util functions.""" + +from __future__ import absolute_import +from __future__ import division + +try: + from __builtin__ import slice as py_slice +except ImportError: + from builtins import slice as py_slice + +from array import array as native_array +import sys +import ctypes +import warnings +import numpy as _np +from ..ndarray import NDArray, _DTYPE_NP_TO_MX, _GRAD_REQ_MAP +from ..ndarray._internal import _set_np_ndarray_class +from . import _op as _mx_np_op +from ..base import check_call, _LIB, NDArrayHandle +from ..base import mx_real_t, c_array_buf, mx_uint, numeric_types, integer_types +from ..util import _sanity_check_params, set_module +from ..context import current_context +from ..ndarray import numpy as _mx_nd_np +from ..ndarray.numpy import _internal as _npi + +__all__ = ['ndarray', 'empty', 'array', 'zeros', 'ones', 'maximum', 'minimum', 'stack', 'arange', + 'argmax', 'add', 'subtract', 'multiply', 'divide', 'mod', 'power', 'concatenate', + 'clip', 'split', 'swapaxes', 'expand_dims', 'tile', 'linspace', 'sin', 'cos', + 'sin', 'cos', 'sinh', 'cosh', 'log10', 'sqrt', 'abs', 'exp', 'arctan', 'sign', 'log', + 'degrees', 'log2', 'rint', 'radians', 'mean', 'reciprocal', 'square', 'arcsin', + 'argsort', 'identity'] + + +# This function is copied from ndarray.py since pylint +# keeps giving false alarm error of undefined-all-variable +def _new_alloc_handle(shape, ctx, delay_alloc, dtype=mx_real_t): + """Return a new handle with specified shape and context. + + Empty handle is only used to hold results. + + Returns + ------- + handle + A new empty `ndarray` handle. + """ + hdl = NDArrayHandle() + check_call(_LIB.MXNDArrayCreateEx( + c_array_buf(mx_uint, native_array('I', shape)), + mx_uint(len(shape)), + ctypes.c_int(ctx.device_typeid), + ctypes.c_int(ctx.device_id), + ctypes.c_int(int(delay_alloc)), + ctypes.c_int(int(_DTYPE_NP_TO_MX[_np.dtype(dtype).type])), + ctypes.byref(hdl))) + return hdl + + +# Have to use 0 as default value for stype since plylint does not allow +# importing _STORAGE_TYPE_DEFAULT from ndarray.py. +def _np_ndarray_cls(handle, writable=True, stype=0): + if stype != 0: + raise ValueError('_np_ndarray_cls currently only supports default storage ' + 'type, while received stype = {}'.format(stype)) + return ndarray(handle, writable=writable) + + +_set_np_ndarray_class(_np_ndarray_cls) + + +def _get_index(idx): + if isinstance(idx, NDArray) and not isinstance(idx, ndarray): + raise TypeError('Cannot have mx.nd.NDArray as index') + if isinstance(idx, ndarray): + return idx._as_nd_ndarray() + elif sys.version_info[0] > 2 and isinstance(idx, range): + return arange(idx.start, idx.stop, idx.step, dtype='int32')._as_nd_ndarray() + else: + return idx + + +@set_module('mxnet.numpy') # pylint: disable=invalid-name +class ndarray(NDArray): + """An array object represents a multidimensional, homogeneous array of fixed-size items. + An associated data-type object describes the format of each element in the array + (its byte-order, how many bytes it occupies in memory, whether it is an integer, a + floating point number, or something else, etc.). Arrays should be constructed using + `array`, `zeros` or `empty`. Currently, only c-contiguous arrays are supported.""" + + # pylint: disable=too-many-return-statements + def __getitem__(self, key): + # TODO(junwu): calling base class __getitem__ is a temp solution + ndim = self.ndim + shape = self.shape + if ndim == 0: + if key != (): + raise IndexError('scalar tensor can only accept `()` as index') + if isinstance(key, tuple) and len(key) == 0: + return self + elif isinstance(key, tuple) and len(key) == ndim\ + and all(isinstance(idx, integer_types) for idx in key): + out = self + for idx in key: + out = out[idx] + return out + elif isinstance(key, integer_types): + if key > shape[0] - 1: + raise IndexError( + 'index {} is out of bounds for axis 0 with size {}'.format( + key, shape[0])) + return self._at(key) + elif isinstance(key, py_slice): + if key.step is not None and key.step != 1: + if key.step == 0: + raise ValueError("slice step cannot be zero") + return self.as_nd_ndarray()._get_nd_basic_indexing(key).as_np_ndarray() + elif key.start is not None or key.stop is not None: + return self._slice(key.start, key.stop) + else: + return self + + if isinstance(key, ndarray): + key = key._as_nd_ndarray() + elif isinstance(key, tuple): + key = [_get_index(idx) for idx in key] + key = tuple(key) + elif isinstance(key, list): + key = [_get_index(idx) for idx in key] + elif sys.version_info[0] > 2 and isinstance(key, range): + key = _get_index(key) + return self._as_nd_ndarray().__getitem__(key).as_np_ndarray() + # pylint: enable=too-many-return-statements + + def __setitem__(self, key, value): + # TODO(junwu): calling base class __setitem__ is a temp solution + if isinstance(value, NDArray) and not isinstance(value, ndarray): + raise TypeError('Cannot assign mx.nd.NDArray to mxnet.numpy.ndarray') + if self.ndim == 0: + if not isinstance(key, tuple) or len(key) != 0: + raise IndexError('scalar tensor can only accept `()` as index') + if isinstance(value, ndarray): + value = value._as_nd_ndarray() + # TODO(junwu): Better handling of this situation + if isinstance(key, tuple) and len(key) == 0: + self._as_nd_ndarray().__setitem__(slice(None), value) + return + + if isinstance(key, ndarray): + key = key._as_nd_ndarray() + elif isinstance(key, tuple): + key = [_get_index(idx) for idx in key] + key = tuple(key) + elif isinstance(key, list): + key = [_get_index(idx) for idx in key] + elif sys.version_info[0] > 2 and isinstance(key, range): + key = _get_index(key) + self._as_nd_ndarray().__setitem__(key, value) + + def __add__(self, other): + """x.__add__(y) <=> x + y""" + return add(self, other) + + def __iadd__(self, other): + """x.__iadd__(y) <=> x += y""" + if not self.writable: + raise ValueError('trying to add to a readonly ndarray') + return add(self, other, out=self) + + def __sub__(self, other): + """x.__sub__(y) <=> x - y""" + return subtract(self, other) + + def __isub__(self, other): + """x.__isub__(y) <=> x -= y""" + if not self.writable: + raise ValueError('trying to subtract from a readonly ndarray') + return subtract(self, other, out=self) + + def __rsub__(self, other): + """x.__rsub__(y) <=> y - x""" + return subtract(other, self) + + def __mul__(self, other): + """x.__mul__(y) <=> x * y""" + return multiply(self, other) + + def __neg__(self): + return self.__mul__(-1.0) + + def __imul__(self, other): + """x.__imul__(y) <=> x *= y""" + if not self.writable: + raise ValueError('trying to add to a readonly ndarray') + return multiply(self, other, out=self) + + def __rmul__(self, other): + """x.__rmul__(y) <=> y * x""" + return self.__mul__(other) + + def __div__(self, other): + raise AttributeError('ndarray.__div__ is replaced by __truediv__. If you are using' + ' Python2, please use the statement from __future__ import division' + ' to change the / operator to mean true division throughout the' + ' module. If you are using Python3, this error should not have' + ' been encountered.') + + def __rdiv__(self, other): + raise AttributeError('ndarray.__rdiv__ is replaced by __rtruediv__. If you are using' + ' Python2, please use the statement from __future__ import division' + ' to change the / operator to mean true division throughout the' + ' module. If you are using Python3, this error should not have' + ' been encountered.') + + def __idiv__(self, other): + raise AttributeError('ndarray.__idiv__ is replaced by __irtruediv__. If you are using' + ' Python2, please use the statement from __future__ import division' + ' to change the / operator to mean true division throughout the' + ' module. If you are using Python3, this error should not have' + ' been encountered.') + + def __truediv__(self, other): + """x.__truediv__(y) <=> x / y""" + return divide(self, other) + + def __rtruediv__(self, other): + """x.__rtruediv__(y) <=> y / x""" + return divide(other, self) + + def __itruediv__(self, other): + return divide(self, other, out=self) + + def __mod__(self, other): + """x.__mod__(y) <=> x % y""" + return mod(self, other) + + def __rmod__(self, other): + """x.__rmod__(y) <=> y % x""" + return mod(other, self) + + def __imod__(self, other): + """x.__imod__(y) <=> x %= y""" + return mod(self, other, out=self) + + def __pow__(self, other): + """x.__pow__(y) <=> x ** y""" + return power(self, other) + + def __rpow__(self, other): + """x.__rpow__(y) <=> y ** x""" + return power(other, self) + + def __eq__(self, other): + """x.__eq__(y) <=> x == y""" + # TODO(junwu): Return boolean ndarray when dtype=bool_ is supported + if isinstance(other, ndarray): + return _npi.equal(self, other) + elif isinstance(other, numeric_types): + return _npi.equal_scalar(self, float(other)) + else: + raise TypeError("ndarray does not support type {} as operand".format(str(type(other)))) + + def __hash__(self): + raise NotImplementedError + + def __ne__(self, other): + """x.__ne__(y) <=> x != y""" + # TODO(junwu): Return boolean ndarray when dtype=bool_ is supported + if isinstance(other, ndarray): + return _npi.not_equal(self, other) + elif isinstance(other, numeric_types): + return _npi.not_equal_scalar(self, float(other)) + else: + raise TypeError("ndarray does not support type {} as operand".format(str(type(other)))) + + def __gt__(self, other): + """x.__gt__(y) <=> x > y""" + # TODO(junwu): Return boolean ndarray when dtype=bool_ is supported + if isinstance(other, ndarray): + return _npi.greater(self, other) + elif isinstance(other, numeric_types): + return _npi.greater_scalar(self, float(other)) + else: + raise TypeError("ndarray does not support type {} as operand".format(str(type(other)))) + + def __ge__(self, other): + """x.__ge__(y) <=> x >= y""" + # TODO(junwu): Return boolean ndarray when dtype=bool_ is supported + if isinstance(other, ndarray): + return _npi.greater_equal(self, other) + elif isinstance(other, numeric_types): + return _npi.greater_equal_scalar(self, float(other)) + else: + raise TypeError("ndarray does not support type {} as operand".format(str(type(other)))) + + def __lt__(self, other): + """x.__lt__(y) <=> x < y""" + # TODO(junwu): Return boolean ndarray when dtype=bool_ is supported + if isinstance(other, ndarray): + return _npi.less(self, other) + elif isinstance(other, numeric_types): + return _npi.less_scalar(self, float(other)) + else: + raise TypeError("ndarray does not support type {} as operand".format(str(type(other)))) + + def __le__(self, other): + """x.__le__(y) <=> x <= y""" + # TODO(junwu): Return boolean ndarray when dtype=bool_ is supported + if isinstance(other, ndarray): + return _npi.less_equal(self, other) + elif isinstance(other, numeric_types): + return _npi.less_equal_scalar(self, float(other)) + else: + raise TypeError("ndarray does not support type {} as operand".format(str(type(other)))) + + def __bool__(self): + num_elements = self.size + if num_elements == 0: + warnings.simplefilter('default') + warnings.warn('The truth value of an empty array is ambiguous. Returning False, but in' + ' future this will result in an error.', DeprecationWarning) + return False + elif num_elements == 1: + return bool(self.item()) + else: + raise ValueError("The truth value of an ndarray with multiple elements is ambiguous.") + + __nonzero__ = __bool__ + + def __float__(self): + num_elements = self.size + if num_elements != 1: + raise TypeError('only size-1 arrays can be converted to Python scalars') + return float(self.item()) + + def __int__(self): + num_elements = self.size + if num_elements != 1: + raise TypeError('only size-1 arrays can be converted to Python scalars') + return int(self.item()) + + def __len__(self): + """Number of elements along the first axis.""" + shape = self.shape + if len(shape) == 0: + raise TypeError('len() of unsized object') + return self.shape[0] + + def __reduce__(self): + return ndarray, (None,), self.__getstate__() + + def item(self, *args): + """Copy an element of an array to a standard Python scalar and return it. + + Parameters + ---------- + *args : Arguments (variable number and type) + none: in this case, the method only works for arrays with one element (a.size == 1), + which element is copied into a standard Python scalar object and returned. + + int_type: this argument is interpreted as a flat index into the array, specifying which + element to copy and return. + + tuple of int_types: functions as does a single int_type argument, except that the + argument is interpreted as an nd-index into the array. + + Returns + ------- + z : Standard Python scalar object + A copy of the specified element of the array as a suitable Python scalar. + """ + # TODO(junwu): no need to call asnumpy() on the whole array. + return self.asnumpy().item(*args) + + @property + # pylint: disable= invalid-name, undefined-variable + def T(self): + """Same as self.transpose(). This always returns a copy of self.""" + return self.transpose() + # pylint: enable= invalid-name, undefined-variable + + def all(self, axis=None, out=None, keepdims=False): + raise NotImplementedError + + def any(self, axis=None, out=None, keepdims=False): + raise NotImplementedError + + def _as_nd_ndarray(self): + """This is not a user-facing API.""" + hdl = NDArrayHandle() + check_call(_LIB.MXShallowCopyNDArray(self.handle, ctypes.byref(hdl))) + return NDArray(handle=hdl, writable=self.writable) + + def as_nd_ndarray(self): + """Convert mxnet.numpy.ndarray to mxnet.ndarray.NDArray to use its fluent methods.""" + # TODO(junwu): Uncomment the following lines + # if self.ndim == 0: # TODO(junwu): this costs ~10ns, can be moved to backend + # raise ValueError('cannot convert a scalar np.ndarray to mx.nd.NDArray') + # if self.size == 0: # TODO(junwu): this costs ~10ns, can be moved to backend + # raise ValueError('cannot convert a zero-size np.ndarray to mx.nd.NDArray') + return self._as_nd_ndarray() + + def as_np_ndarray(self): + """A convenience function for creating a numpy ndarray from the current ndarray + with zero copy. For this class, it just returns itself since it's already a + numpy ndarray.""" + return self + + def __repr__(self): + """ + Returns a string representation of the array. The dtype of the ndarray will not + be appended to the string if it is `float32`. The context of the ndarray will + be appended for devices other than CPU. + + Examples + -------- + >>> from mxnet import np, npx + >>> a = np.random.uniform(size=(2, 3)) + >>> a + array([[0.5488135 , 0.5928446 , 0.71518934], + [0.84426576, 0.60276335, 0.8579456 ]]) + >>> print(a) + [[0.5488135 0.5928446 0.71518934] + [0.84426576 0.60276335 0.8579456 ]] + >>> a.dtype + + >>> b = a.astype(np.float64) + >>> b + array([[0.54881352, 0.59284461, 0.71518934], + [0.84426576, 0.60276335, 0.85794562]], dtype=float64) + >>> print(b) + [[0.54881352 0.59284461 0.71518934] + [0.84426576 0.60276335 0.85794562]] + >>> b.dtype + + >>> c = a.copyto(npx.gpu(0)) + >>> c + array([[0.5488135 , 0.5928446 , 0.71518934], + [0.84426576, 0.60276335, 0.8579456 ]], ctx=gpu(0)) + >>> print(c) + [[0.5488135 0.5928446 0.71518934] + [0.84426576 0.60276335 0.8579456 ]] @gpu(0) + >>> d = b.copyto(npx.gpu(0)) + >>> d + array([[0.54881352, 0.59284461, 0.71518934], + [0.84426576, 0.60276335, 0.85794562]], dtype=float64, ctx=gpu(0)) + >>> print(d) + [[0.54881352 0.59284461 0.71518934] + [0.84426576 0.60276335 0.85794562]] @gpu(0) + """ + array_str = self.asnumpy().__repr__() + dtype = self.dtype + if 'dtype=' in array_str: + if dtype == _np.float32: + array_str = array_str[:array_str.rindex(',')] + ')' + elif dtype != _np.float32: + array_str = array_str[:-1] + ', dtype={})'.format(dtype.__name__) + + context = self.context + if context.device_type == 'cpu': + return array_str + return array_str[:-1] + ', ctx={})'.format(str(context)) + + def __str__(self): + """Returns a string representation of the array.""" + array_str = self.asnumpy().__str__() + context = self.context + if context.device_type == 'cpu' or self.ndim == 0: + return array_str + return '{array} @{ctx}'.format(array=array_str, ctx=context) + + def attach_grad(self, grad_req='write'): # pylint: disable=arguments-differ + """Attach a gradient buffer to this ndarray, so that `backward` + can compute gradient with respect to it. + + Parameters + ---------- + grad_req : {'write', 'add', 'null'} + How gradient will be accumulated. + - 'write': gradient will be overwritten on every backward. + - 'add': gradient will be added to existing value on every backward. + - 'null': do not compute gradient for this NDArray. + """ + grad = _mx_np_op.zeros_like(self) # pylint: disable=undefined-variable + grad_req = _GRAD_REQ_MAP[grad_req] + check_call(_LIB.MXAutogradMarkVariables( + 1, ctypes.pointer(self.handle), + ctypes.pointer(mx_uint(grad_req)), + ctypes.pointer(grad.handle))) + + @property + def grad(self): + """Returns gradient buffer attached to this ndarray.""" + hdl = NDArrayHandle() + check_call(_LIB.MXNDArrayGetGrad(self.handle, ctypes.byref(hdl))) + if hdl.value is None: + return None + return _np_ndarray_cls(hdl) + + def detach(self): + """Returns a new ndarray, detached from the current graph.""" + hdl = NDArrayHandle() + check_call(_LIB.MXNDArrayDetach(self.handle, ctypes.byref(hdl))) + return _np_ndarray_cls(hdl) + + def astype(self, dtype, *args, **kwargs): # pylint: disable=arguments-differ,unused-argument + """ + Copy of the array, cast to a specified type. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + copy : bool, optional + Default `True`. By default, astype always returns a newly + allocated ndarray on the same context. If this is set to + `False`, and the dtype requested is the same as the ndarray's + dtype, the ndarray is returned instead of a copy. + + Returns + ------- + arr_t : ndarray + Unless `copy` is False and the other conditions for returning the input + array are satisfied (see description for `copy` input parameter), `arr_t` + is a new array of the same shape as the input array with `dtype`. + """ + _sanity_check_params('astype', ['order', 'casting', 'subok'], kwargs) + copy = kwargs.get('copy', True) + if not copy and _np.dtype(dtype) == self.dtype: + return self + + res = empty(self.shape, dtype=dtype, ctx=self.context) + self.copyto(res) + return res + + def copyto(self, other): + """Copies the value of this array to another array. + + If ``other`` is a ``ndarray`` object, then ``other.shape`` and + ``self.shape`` should be the same. This function copies the value from + ``self`` to ``other``. + + If ``other`` is a context, a new ``NDArray`` will be first created on + the target context, and the value of ``self`` is copied. + + Parameters + ---------- + other : ndarray or Context + The destination array or context. + + Returns + ------- + ndarray + The copied array. If ``other`` is an ``ndarray``, then the return value + and ``other`` will point to the same ``ndarray``. + + Examples + -------- + >>> x = np.ones((2,3)) + >>> y = np.zeros((2,3), mx.gpu(0)) + >>> z = x.copyto(y) + >>> z is y + True + >>> y.asnumpy() + array([[ 1., 1., 1.], + [ 1., 1., 1.]], dtype=float32) + """ + if isinstance(other, ndarray): + other = other._as_nd_ndarray() + return self._as_nd_ndarray().copyto(other).as_np_ndarray() + + def asscalar(self): + raise AttributeError('mxnet.numpy.ndarray object has no attribute asscalar') + + def argmax(self, axis=None, out=None): # pylint: disable=arguments-differ + return _mx_nd_np.argmax(self, axis, out) + + def as_in_context(self, context): + """Returns an array on the target device with the same value as this array. + + If the target context is the same as ``self.context``, then ``self`` is + returned. Otherwise, a copy is made. + + Parameters + ---------- + context : Context + The target context. + + Returns + ------- + ndarray + The target array. + """ + if self.context == context: + return self + return self.copyto(context) + + def copy(self, order='C'): # pylint: disable=arguments-differ + if order != 'C': + raise NotImplementedError('ndarray.copy only supports order=\'C\', while ' + 'received {}'.format(str(order))) + return super(ndarray, self).copy().as_np_ndarray() + + def dot(self, b, out=None): + return _mx_np_op.dot(self, b, out=out) + + def reshape(self, *args, **kwargs): # pylint: disable=arguments-differ + """Returns an array containing the same data with a new shape. + + Notes + ----- + Unlike the free function `numpy.reshape`, this method on `ndarray` allows + the elements of the shape parameter to be passed in as separate arguments. + For example, ``a.reshape(10, 11)`` is equivalent to + ``a.reshape((10, 11))``. + """ + order = 'C' + if len(kwargs) > 1: + raise TypeError('function takes at most 1 keyword argument') + if len(kwargs) == 1: + if 'order' not in kwargs: + raise TypeError('{} is an invalid keyword argument for this function' + .format(kwargs.keys()[0])) + order = kwargs.pop('order', 'C') + if order != 'C': + raise NotImplementedError('only supports C-order,' + ' while received {}'.format(order)) + if len(args) == 0: + raise TypeError('reshape() takes exactly 1 argument (0 given)') + if len(args) == 1 and isinstance(args[0], tuple): + return _mx_np_op.reshape(self, newshape=args[0], order=order) + else: + return _mx_np_op.reshape(self, newshape=args, order=order) + + def reshape_like(self, *args, **kwargs): + """Convenience fluent method for :py:func:`reshape_like`. + + The arguments are the same as for :py:func:`reshape_like`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute reshape_like') + + def zeros_like(self, *args, **kwargs): + """Convenience fluent method for :py:func:`zeros_like`. + + The arguments are the same as for :py:func:`zeros_like`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute zeros_like') + + def ones_like(self, *args, **kwargs): + """Convenience fluent method for :py:func:`ones_like`. + + The arguments are the same as for :py:func:`ones_like`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute ones_like') + + def broadcast_axes(self, *args, **kwargs): + """Convenience fluent method for :py:func:`broadcast_axes`. + + The arguments are the same as for :py:func:`broadcast_axes`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute broadcast_like') + + def repeat(self, repeats, axis=None): # pylint: disable=arguments-differ + """Repeat elements of an array.""" + return _mx_np_op.repeat(self, repeats=repeats, axis=axis) + + def pad(self, *args, **kwargs): + """Convenience fluent method for :py:func:`pad`. + + The arguments are the same as for :py:func:`pad`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute pad') + + def swapaxes(self, axis1, axis2): # pylint: disable=arguments-differ + """Return a copy of the array with axis1 and axis2 interchanged. + Refer to `mxnet.numpy.swapaxes` for full documentation. + """ + return swapaxes(self, axis1, axis2) + + def split(self, *args, **kwargs): + """Convenience fluent method for :py:func:`split`. + + The arguments are the same as for :py:func:`split`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute split') + + def split_v2(self, *args, **kwargs): + """Convenience fluent method for :py:func:`split_v2`. + + The arguments are the same as for :py:func:`split_v2`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute split_v2') + + def slice(self, *args, **kwargs): + """Convenience fluent method for :py:func:`slice`. + + The arguments are the same as for :py:func:`slice`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute slice') + + def slice_axis(self, *args, **kwargs): + """Convenience fluent method for :py:func:`slice_axis`. + + The arguments are the same as for :py:func:`slice_axis`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute slice_axis') + + def slice_like(self, *args, **kwargs): + """Convenience fluent method for :py:func:`slice_like`. + + The arguments are the same as for :py:func:`slice_like`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute slice_like') + + def take(self, *args, **kwargs): + """Convenience fluent method for :py:func:`take`. + + The arguments are the same as for :py:func:`take`, with + this array as data. + """ + raise NotImplementedError + + def one_hot(self, *args, **kwargs): + """Convenience fluent method for :py:func:`one_hot`. + + The arguments are the same as for :py:func:`one_hot`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute one_hot') + + def pick(self, *args, **kwargs): + """Convenience fluent method for :py:func:`pick`. + + The arguments are the same as for :py:func:`pick`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute pick') + + def sort(self, *args, **kwargs): + """Convenience fluent method for :py:func:`sort`. + + The arguments are the same as for :py:func:`sort`, with + this array as data. + """ + raise NotImplementedError + + def topk(self, *args, **kwargs): + """Convenience fluent method for :py:func:`topk`. + + The arguments are the same as for :py:func:`topk`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute topk') + + def argsort(self, axis=-1, kind='quicksort', order=None): # pylint: disable=arguments-differ + """Convenience fluent method for :py:func:`argsort`. + + The arguments are the same as for :py:func:`argsort`, with + this array as data. + """ + if kind != 'quicksort': + raise AttributeError('mxnet.numpy.argsort does not support other sorting methods') + if order is not None: + raise AttributeError('mxnet.numpy.argsort does not support sorting with fields ordering') + return _npi.argsort(self, axis) + + def argmax_channel(self, *args, **kwargs): + """Convenience fluent method for :py:func:`argmax_channel`. + + The arguments are the same as for :py:func:`argmax_channel`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute argmax_channel') + + def argmin(self, *args, **kwargs): + """Convenience fluent method for :py:func:`argmin`. + + The arguments are the same as for :py:func:`argmin`, with + this array as data. + """ + raise NotImplementedError + + def clip(self, min=None, max=None, out=None): # pylint: disable=arguments-differ + """Return an array whose values are limited to [min, max]. + One of max or min must be given. + """ + return clip(self, min, max, out=out) + + def abs(self, *args, **kwargs): + """Convenience fluent method for :py:func:`abs`. + + The arguments are the same as for :py:func:`abs`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute abs') + + def sign(self, *args, **kwargs): + """Convenience fluent method for :py:func:`sign`. + + The arguments are the same as for :py:func:`sign`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute sign') + + def flatten(self, order='C'): # pylint: disable=arguments-differ + """Return a copy of the array collapsed into one dimension.""" + return self.reshape(-1, order=order) + + def shape_array(self, *args, **kwargs): + """Convenience fluent method for :py:func:`shape_array`. + + The arguments are the same as for :py:func:`shape_array`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute shape_array') + + def size_array(self, *args, **kwargs): + """Convenience fluent method for :py:func:`size_array`. + + The arguments are the same as for :py:func:`size_array`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute size_array') + + def expand_dims(self, *args, **kwargs): + """Convenience fluent method for :py:func:`expand_dims`. + + The arguments are the same as for :py:func:`expand_dims`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute expand_dims') + + def tile(self, *args, **kwargs): + """Convenience fluent method for :py:func:`tile`. + + The arguments are the same as for :py:func:`tile`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute tile') + + def transpose(self, *axes): # pylint: disable=arguments-differ + """Permute the dimensions of an array.""" + return _mx_np_op.transpose(self, axes=axes if len(axes) != 0 else None) + + def flip(self, *args, **kwargs): + """Convenience fluent method for :py:func:`flip`. + + The arguments are the same as for :py:func:`flip`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute flip') + + def depth_to_space(self, *args, **kwargs): + """Convenience fluent method for :py:func:`depth_to_space`. + + The arguments are the same as for :py:func:`depth_to_space`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute depth_to_space') + + def space_to_depth(self, *args, **kwargs): + """Convenience fluent method for :py:func:`space_to_depth`. + + The arguments are the same as for :py:func:`space_to_depth`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute space_to_depth') + + def diag(self, k=0, **kwargs): + """Convenience fluent method for :py:func:`diag`. + + The arguments are the same as for :py:func:`diag`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute diag') + + def sum(self, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ + """Convenience fluent method for :py:func:`sum`. + + The arguments are the same as for :py:func:`sum`, with + this array as data. + """ + return _mx_np_op.sum(self, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + + def nansum(self, *args, **kwargs): + """Convenience fluent method for :py:func:`nansum`. + + The arguments are the same as for :py:func:`nansum`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute nansum') + + def prod(self, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ + """Return the product of the array elements over the given axis.""" + return _mx_np_op.prod(self, axis=axis, dtype=dtype, keepdims=keepdims, out=out) + + def nanprod(self, *args, **kwargs): + """Convenience fluent method for :py:func:`nanprod`. + + The arguments are the same as for :py:func:`nanprod`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute nanprod') + + def mean(self, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ + """Returns the average of the array elements along given axis.""" + return _npi.mean(self, axis=axis, dtype=dtype, keepdims=keepdims, out=out) + + # TODO(junwu): Use mxnet std op instead of onp.std + def std(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): # pylint: disable=arguments-differ + """Returns the standard deviation of the array elements along given axis.""" + ret_np = self.asnumpy().std(axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) + return array(ret_np, dtype=ret_np.dtype, ctx=self.context) + + def cumsum(self, axis=None, dtype=None, out=None): + """Return the cumulative sum of the elements along the given axis.""" + return _mx_np_op.cumsum(self, axis=axis, dtype=dtype, out=out) + + def tolist(self): + return self.asnumpy().tolist() + + def max(self, axis=None, out=None, keepdims=False): # pylint: disable=arguments-differ + """Return the maximum along a given axis.""" + return _mx_np_op.max(self, axis=axis, keepdims=keepdims, out=out) + + def min(self, *args, **kwargs): + """Convenience fluent method for :py:func:`min`. + + The arguments are the same as for :py:func:`min`, with + this array as data. + """ + raise NotImplementedError + + def norm(self, *args, **kwargs): + """Convenience fluent method for :py:func:`norm`. + + The arguments are the same as for :py:func:`norm`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute norm') + + def round(self, *args, **kwargs): + """Convenience fluent method for :py:func:`round`. + + The arguments are the same as for :py:func:`round`, with + this array as data. + """ + raise NotImplementedError + + def rint(self, *args, **kwargs): + """Convenience fluent method for :py:func:`rint`. + + The arguments are the same as for :py:func:`rint`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute rint') + + def fix(self, *args, **kwargs): + """Convenience fluent method for :py:func:`fix`. + + The arguments are the same as for :py:func:`fix`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute fix') + + def floor(self, *args, **kwargs): + """Convenience fluent method for :py:func:`floor`. + + The arguments are the same as for :py:func:`floor`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute floor') + + def ceil(self, *args, **kwargs): + """Convenience fluent method for :py:func:`ceil`. + + The arguments are the same as for :py:func:`ceil`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute ceil') + + def trunc(self, *args, **kwargs): + """Convenience fluent method for :py:func:`trunc`. + + The arguments are the same as for :py:func:`trunc`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute trunc') + + def sin(self, *args, **kwargs): + """Convenience fluent method for :py:func:`sin`. + + The arguments are the same as for :py:func:`sin`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute sin') + + def cos(self, *args, **kwargs): + """Convenience fluent method for :py:func:`cos`. + + The arguments are the same as for :py:func:`cos`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute cos') + + def tan(self, *args, **kwargs): + """Convenience fluent method for :py:func:`tan`. + + The arguments are the same as for :py:func:`tan`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute tan') + + def arcsin(self, *args, **kwargs): + """Convenience fluent method for :py:func:`arcsin`. + + The arguments are the same as for :py:func:`arcsin`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute arcsin') + + def arccos(self, *args, **kwargs): + """Convenience fluent method for :py:func:`arccos`. + + The arguments are the same as for :py:func:`arccos`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute arccos') + + def arctan(self, *args, **kwargs): + """Convenience fluent method for :py:func:`arctan`. + + The arguments are the same as for :py:func:`arctan`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute arctan') + + def degrees(self, *args, **kwargs): + """Convenience fluent method for :py:func:`degrees`. + + The arguments are the same as for :py:func:`degrees`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute degrees') + + def radians(self, *args, **kwargs): + """Convenience fluent method for :py:func:`radians`. + + The arguments are the same as for :py:func:`radians`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute radians') + + def sinh(self, *args, **kwargs): + """Convenience fluent method for :py:func:`sinh`. + + The arguments are the same as for :py:func:`sinh`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute sinh') + + def cosh(self, *args, **kwargs): + """Convenience fluent method for :py:func:`cosh`. + + The arguments are the same as for :py:func:`cosh`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute cosh') + + def tanh(self, *args, **kwargs): + """Convenience fluent method for :py:func:`tanh`. + + The arguments are the same as for :py:func:`tanh`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute tanh') + + def arcsinh(self, *args, **kwargs): + """Convenience fluent method for :py:func:`arcsinh`. + + The arguments are the same as for :py:func:`arcsinh`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute arcsinh') + + def arccosh(self, *args, **kwargs): + """Convenience fluent method for :py:func:`arccosh`. + + The arguments are the same as for :py:func:`arccosh`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute arccosh') + + def arctanh(self, *args, **kwargs): + """Convenience fluent method for :py:func:`arctanh`. + + The arguments are the same as for :py:func:`arctanh`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute arctanh') + + def exp(self, *args, **kwargs): + """Convenience fluent method for :py:func:`exp`. + + The arguments are the same as for :py:func:`exp`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute exp') + + def expm1(self, *args, **kwargs): + """Convenience fluent method for :py:func:`expm1`. + + The arguments are the same as for :py:func:`expm1`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute expm1') + + def log(self, *args, **kwargs): + """Convenience fluent method for :py:func:`log`. + + The arguments are the same as for :py:func:`log`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute log') + + def log10(self, *args, **kwargs): + """Convenience fluent method for :py:func:`log10`. + + The arguments are the same as for :py:func:`log10`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute log10') + + def log2(self, *args, **kwargs): + """Convenience fluent method for :py:func:`log2`. + + The arguments are the same as for :py:func:`log2`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute log2') + + def log1p(self, *args, **kwargs): + """Convenience fluent method for :py:func:`log1p`. + + The arguments are the same as for :py:func:`log1p`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute log1p') + + def sqrt(self, *args, **kwargs): + """Convenience fluent method for :py:func:`sqrt`. + + The arguments are the same as for :py:func:`sqrt`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute sqrt') + + def rsqrt(self, *args, **kwargs): + """Convenience fluent method for :py:func:`rsqrt`. + + The arguments are the same as for :py:func:`rsqrt`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute rsqrt') + + def cbrt(self, *args, **kwargs): + """Convenience fluent method for :py:func:`cbrt`. + + The arguments are the same as for :py:func:`cbrt`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute cqrt') + + def rcbrt(self, *args, **kwargs): + """Convenience fluent method for :py:func:`rcbrt`. + + The arguments are the same as for :py:func:`rcbrt`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute rcqrt') + + def square(self, *args, **kwargs): + """Convenience fluent method for :py:func:`square`. + + The arguments are the same as for :py:func:`square`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute square') + + def reciprocal(self, *args, **kwargs): + """Convenience fluent method for :py:func:`reciprocal`. + + The arguments are the same as for :py:func:`reciprocal`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute reciprocal') + + def relu(self, *args, **kwargs): + """Convenience fluent method for :py:func:`relu`. + + The arguments are the same as for :py:func:`relu`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute relu') + + def sigmoid(self, *args, **kwargs): + """Convenience fluent method for :py:func:`sigmoid`. + + The arguments are the same as for :py:func:`sigmoid`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute sigmoid') + + def softmax(self, *args, **kwargs): + """Convenience fluent method for :py:func:`softmax`. + + The arguments are the same as for :py:func:`softmax`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute softmax') + + def log_softmax(self, *args, **kwargs): + """Convenience fluent method for :py:func:`log_softmax`. + + The arguments are the same as for :py:func:`log_softmax`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute log_softmax') + + def softmin(self, *args, **kwargs): + """Convenience fluent method for :py:func:`softmin`. + + The arguments are the same as for :py:func:`softmin`, with + this array as data. + """ + raise AttributeError('mxnet.numpy.ndarray object has no attribute softmin') + + def squeeze(self, axis=None): # pylint: disable=arguments-differ + """Remove single-dimensional entries from the shape of a. + """ + return _mx_np_op.squeeze(self, axis=axis) + + def broadcast_to(self, shape): + raise AttributeError('mxnet.numpy.ndarray object has no attribute broadcast_to') + + def broadcast_like(self, other): + raise AttributeError('mxnet.numpy.ndarray object has no attribute broadcast_like') + + + @property + def shape(self): + return super(ndarray, self).shape + + @property + def ndim(self): + """Number of array dimensions.""" + return len(self.shape) + + @property + def size(self): + """Number of elements in the array.""" + return super(ndarray, self).size + + def tostype(self, stype): + raise AttributeError('mxnet.numpy.ndarray object has no attribute tostype') + + +@set_module('mxnet.numpy') +def empty(shape, dtype=None, **kwargs): + """Return a new array of given shape and type, without initializing entries. + + Parameters + ---------- + shape : int or tuple of int Shape of the empty array, e.g., ``(2, 3)`` or ``2``. + dtype : data-type, optional + Desired output data-type for the array, e.g, `numpy.int8`. Default is + `numpy.float32`. Note that this behavior is different from NumPy's `empty` + function where `float64` is the default value, because `float32` is + considered as the default data type in deep learning. + ctx : device context, optional + Device context on which the memory is allocated. Default is + `mxnet.context.current_context()`. + + Returns + ------- + out : ndarray + Array of uninitialized (arbitrary) data of the given shape, dtype, and order. + """ + _sanity_check_params('emtpy', ['order'], kwargs) + ctx = kwargs.get('ctx', current_context()) + if ctx is None: + ctx = current_context() + if dtype is None: + dtype = _np.float32 + if isinstance(shape, int): + shape = (shape,) + return ndarray(handle=_new_alloc_handle(shape, ctx, False, dtype)) + + +@set_module('mxnet.numpy') +def array(object, dtype=None, ctx=None): + """ + Create an array. + + Parameters + ---------- + object : array_like or `numpy.ndarray` or `mxnet.numpy.ndarray` + An array, any object exposing the array interface, an object whose + __array__ method returns an array, or any (nested) sequence. + dtype : data-type, optional + The desired data-type for the array. Default is `float32`. + ctx : device context, optional + Device context on which the memory is allocated. Default is + `mxnet.context.current_context()`. + + Returns + ------- + out : ndarray + An array object satisfying the specified requirements. + """ + if ctx is None: + ctx = current_context() + if isinstance(object, ndarray): + dtype = object.dtype if dtype is None else dtype + else: + dtype = mx_real_t if dtype is None else dtype + if not isinstance(object, (ndarray, _np.ndarray)): + try: + object = _np.array(object, dtype=dtype) + except Exception as e: + raise TypeError('{}'.format(str(e))) + ret = empty(object.shape, dtype=dtype, ctx=ctx) + if len(object.shape) == 0: + ret[()] = object + else: + ret[:] = object + return ret + + +@set_module('mxnet.numpy') +def zeros(shape, dtype=_np.float32, **kwargs): + """Return a new array of given shape and type, filled with zeros. + This function currently only supports storing multi-dimensional data + in row-major (C-style). + + Parameters + ---------- + shape : int or tuple of int + The shape of the empty array. + dtype : str or numpy.dtype, optional + An optional value type (default is `numpy.float32`). Note that this + behavior is different from NumPy's `zeros` function where `float64` + is the default value, because `float32` is considered as the default + data type in deep learning. + ctx : Context, optional + An optional device context (default is the current default context). + + Returns + ------- + out : ndarray + Array of zeros with the given shape, dtype, and ctx. + """ + return _mx_nd_np.zeros(shape, dtype, **kwargs) + + +@set_module('mxnet.numpy') +def ones(shape, dtype=None, **kwargs): + """Return a new array of given shape and type, filled with zeros. + This function currently only supports storing multi-dimensional data + in row-major (C-style). + + Parameters + ---------- + shape : int or tuple of int + The shape of the empty array. + dtype : str or numpy.dtype, optional + An optional value type. Default is `numpy.float32`. Note that this + behavior is different from NumPy's `ones` function where `float64` + is the default value, because `float32` is considered as the default + data type in deep learning. + ctx : Context, optional + An optional device context (default is the current default context). + + Returns + ------- + out : ndarray + Array of zeros with the given shape, dtype, and ctx. + """ + return _mx_nd_np.ones(shape, dtype, **kwargs) + + +@set_module('mxnet.numpy') +def identity(n, dtype=None, **kwargs): + """ + Return the identity array. + + The identity array is a square array with ones on + the main diagonal. + + Parameters + ---------- + n : int + Number of rows (and columns) in `n` x `n` output. + dtype : data-type, optional + Data-type of the output. Defaults to ``numpy.float32``. + ctx : Context, optional + An optional device context (default is the current default context). + + Returns + ------- + out : ndarray + `n` x `n` array with its main diagonal set to one, + and all other elements 0. + + Examples + -------- + >>> np.identity(3) + >>> np.identity(3) + array([[1., 0., 0.], + [0., 1., 0.], + [0., 0., 1.]]) + """ + return _mx_nd_np.identity(n, dtype, **kwargs) + + +@set_module('mxnet.numpy') +def maximum(x1, x2, out=None): + """Returns element-wise maximum of the input arrays with broadcasting. + + Parameters + ---------- + x1, x2 : scalar or mxnet.numpy.ndarray + The arrays holding the elements to be compared. They must have the same shape, + or shapes that can be broadcast to a single shape. + + Returns + ------- + out : mxnet.numpy.ndarray or scalar + The maximum of x1 and x2, element-wise. This is a scalar if both x1 and x2 are scalars.""" + return _mx_nd_np.maximum(x1, x2, out=out) + + +@set_module('mxnet.numpy') +def minimum(x1, x2, out=None): + """Returns element-wise minimum of the input arrays with broadcasting. + + Parameters + ---------- + x1, x2 : scalar or mxnet.numpy.ndarray + The arrays holding the elements to be compared. They must have the same shape, + or shapes that can be broadcast to a single shape. + + Returns + ------- + out : mxnet.numpy.ndarray or scalar + The minimum of x1 and x2, element-wise. This is a scalar if both x1 and x2 are scalars.""" + return _mx_nd_np.minimum(x1, x2, out=out) + + +@set_module('mxnet.numpy') +def mean(a, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ + """ + mean(a, axis=None, dtype=None, out=None, keepdims=None) + + Compute the arithmetic mean along the specified axis. + Returns the average of the array elements. + The average is taken over the flattened array by default, otherwise over the specified axis. + + Parameters + ---------- + a : ndarray + ndarray containing numbers whose mean is desired. + axis : None or int or tuple of ints, optional + Axis or axes along which the means are computed. The default is to compute the mean of the flattened array. + If this is a tuple of ints, a mean is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default is float32; + for floating point inputs, it is the same as the input dtype. + out : ndarray, optional + Alternate output array in which to place the result. The default is None; if provided, + it must have the same shape and type as the expected output. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in the result + as dimensions with size one. With this option, the result will broadcast correctly + against the input array. + If the default value is passed, then keepdims will not be passed through to the mean + method of sub-classes of ndarray, however any non-default value will be. If the sub-class + method does not implement keepdims any exceptions will be raised. + + Returns + ------- + m : ndarray, see dtype parameter above + If out=None, returns a new array containing the mean values, + otherwise a reference to the output array is returned. + + Notes + ----- + This function differs from the original `numpy.mean + `_ in + the following way(s): + + - only ndarray is accepted as valid input, python iterables or scalar is not supported + - default data type for integer input is float32 + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.mean(a) + array(2.5) + >>> a = np.zeros((2, 512*512), dtype=np.float32) + >>> a[0,:] = 1.0 + >>> a[1,:] = 0.1 + >>> np.mean(a) + array(0.55) + >>> np.mean(a, dtype=np.float64) + array(0.55) + """ + return _npi.mean(a, axis=axis, dtype=dtype, keepdims=keepdims, out=out) + + +@set_module('mxnet.numpy') +def stack(arrays, axis=0, out=None): + """Join a sequence of arrays along a new axis. + + The axis parameter specifies the index of the new axis in the dimensions of the result. + For example, if `axis=0` it will be the first dimension and if `axis=-1` it will be the last dimension. + + Parameters + ---------- + arrays : sequence of ndarrays + Each array must have the same shape. + axis : int, optional + The axis in the result array along which the input arrays are stacked. + out : ndarray, optional + If provided, the destination to place the result. The shape and type must be the + same with that of what stack would have returned if no out argument were specified. + + Returns + ------- + out : ndarray + The stacked array has one more dimension than the input arrays. + + Notes + ----- + This function differs from the original `numpy.stack + `_ in + the following way(s): + + - only sequence of ndarray is accepted as valid input + + Examples + -------- + >>> arrays = [np.random.uniform(size=(3, 4)) for _ in range(10)] + >>> np.stack(arrays, axis=0).shape + (10, 3, 4) + >>> np.stack(arrays, axis=1).shape + (3, 10, 4) + >>> np.stack(arrays, axis=2).shape + (3, 4, 10) + >>> a = np.array([1, 2, 3]) + >>> b = np.array([2, 3, 4]) + >>> np.stack((a, b)) + array([[1., 2., 3.], + [2., 3., 4.]]) + >>> np.stack((a, b), axis=-1) + array([[1., 2.], + [2., 3.], + [3., 4.]]) + """ + return _mx_nd_np.stack(arrays, axis=axis, out=out) + + +@set_module('mxnet.numpy') +def arange(start, stop=None, step=1, dtype=None, ctx=None): + """Return evenly spaced values within a given interval. + + Values are generated within the half-open interval ``[start, stop)`` + (in other words, the interval including `start` but excluding `stop`). + For integer arguments the function is equivalent to the Python built-in + `range` function, but returns an ndarray rather than a list. + + Parameters + ---------- + start : number, optional + Start of interval. The interval includes this value. The default + start value is 0. + stop : number + End of interval. The interval does not include this value, except + in some cases where `step` is not an integer and floating point + round-off affects the length of `out`. + step : number, optional + Spacing between values. For any output `out`, this is the distance + between two adjacent values, ``out[i+1] - out[i]``. The default + step size is 1. If `step` is specified as a position argument, + `start` must also be given. + dtype : dtype + The type of the output array. The default is `float32`. + + Returns + ------- + arange : ndarray + Array of evenly spaced values. + + For floating point arguments, the length of the result is + ``ceil((stop - start)/step)``. Because of floating point overflow, + this rule may result in the last element of `out` being greater + than `stop`. + """ + return _mx_nd_np.arange(start, stop, step, dtype, ctx) + + +@set_module('mxnet.numpy') +def argmax(a, axis=None, out=None): + r""" + argmax(a, axis=None, out=None) + + Returns the indices of the maximum values along an axis. + + Parameters + ---------- + a : ndarray + Input array. Only support ndarrays of dtype `float16`, `float32`, and `float64`. + axis : int, optional + By default, the index is into the flattened array, otherwise + along the specified axis. + out : ndarray or None, optional + If provided, the result will be inserted into this array. It should + be of the appropriate shape and dtype. + + Returns + ------- + index_array : ndarray of indices whose dtype is same as the input ndarray. + Array of indices into the array. It has the same shape as `a.shape` + with the dimension along `axis` removed. + + Notes + ----- + In case of multiple occurrences of the maximum values, the indices + corresponding to the first occurrence are returned. + + This function differs from the original `numpy.argmax + `_ in + the following aspects: + + - Input type does not support Python native iterables(list, tuple, ...). + - Output has dtype that is same as the input ndarray. + - ``out`` param: cannot perform auto broadcasting. ``out`` ndarray's shape must be the same as the expected output. + - ``out`` param: cannot perform auto type cast. ``out`` ndarray's dtype must be the same as the expected output. + - ``out`` param does not support scalar input case. + + Examples + -------- + >>> a = np.arange(6).reshape(2,3) + 10 + >>> a + array([[10., 11., 12.], + [13., 14., 15.]]) + >>> np.argmax(a) + array(5.) + >>> np.argmax(a, axis=0) + array([1., 1., 1.]) + >>> np.argmax(a, axis=1) + array([2., 2.]) + + >>> b = np.arange(6) + >>> b[1] = 5 + >>> b + array([0., 5., 2., 3., 4., 5.]) + >>> np.argmax(b) # Only the first occurrence is returned. + array(1.) + + Specify ``out`` ndarray: + + >>> a = np.arange(6).reshape(2,3) + 10 + >>> b = np.zeros((2,)) + >>> np.argmax(a, axis=1, out=b) + array([2., 2.]) + >>> b + array([2., 2.]) + """ + return _mx_nd_np.argmax(a, axis, out) + + +@set_module('mxnet.numpy') +def argsort(a, axis=-1, kind='quicksort', order=None): + """ + Returns the indices that would sort an input array along the given axis. + This function performs sorting along the given axis and returns an array + of indices having same shape as an input array that index data in sorted order. + + Parameters + ---------- + a : ndarray + Input array + axis : int, optional + The axis along which to sort teh input tensor. + If not given, the last, dimension -1 will be used by default. + If None, the flattened array is used. + kind: {'quicksort'} + Currently not supported. + order: None + Currently not supported. + + Returns + ------- + output : ndarray + Array of indices that sort a along the specified axis. + If a is one-dimensional, a[index_array] yields a sorted a. + More generally, np.take_along_axis(a, index_array, axis=a) always yields the sorted a, + irrespective of dimensionality. + + Examples + -------- + >>> x = np.array([3, 1, 2]) + >>> np.argsort(x) + array([1., 2., 0.]) + >>> x = np.array([[0, 3], [2, 2]]) + >>> x + array([[0., 3.], + [2., 2.]]) + >>> np.argsort(x, axis=0) # sorts along first axis (down) + array([[0., 1.], + [1., 0.]]) + >>> np.argsort(x, axis=1) # sorts along last axis (across) + array([[0., 1.], + [0., 1.]]) + + Notes + ----- + This function differs from the original `numpy.argsort + `_ in + the following way(s): + + - kind and order are currently not supported + """ + if kind != 'quicksort': + raise AttributeError('mxnet.numpy.argsort does not support other sorting methods') + if order is not None: + raise AttributeError('mxnet.numpy.argsort does not support sorting with fields ordering') + return _npi.argsort(a, axis) + + +@set_module('mxnet.numpy') +def concatenate(seq, axis=0, out=None): + """Join a sequence of arrays along an existing axis. + + Parameters + ---------- + a1, a2, ... : sequence of array_like + The arrays must have the same shape, except in the dimension + corresponding to `axis` (the first, by default). + axis : int, optional + The axis along which the arrays will be joined. If axis is None, + arrays are flattened before use. Default is 0. + out : ndarray, optional + If provided, the destination to place the result. The shape must be + correct, matching that of what concatenate would have returned if no + out argument were specified. + + Returns + ------- + res : ndarray + The concatenated array. + """ + return _mx_nd_np.concatenate(seq, axis=axis, out=out) + + +@set_module('mxnet.numpy') +def add(x1, x2, out=None): + """Add arguments element-wise. + + Parameters + ---------- + x1, x2 : ndarrays or scalar values + The arrays to be added. If x1.shape != x2.shape, they must be broadcastable to + a common shape (which may be the shape of one or the other). + + out : ndarray + A location into which the result is stored. If provided, it must have a shape + that the inputs broadcast to. If not provided or None, a freshly-allocated array + is returned. + + Returns + ------- + add : ndarray or scalar + The sum of x1 and x2, element-wise. This is a scalar if both x1 and x2 are scalars. + """ + return _mx_nd_np.add(x1, x2, out) + + +@set_module('mxnet.numpy') +def subtract(x1, x2, out=None): + """Subtract arguments element-wise. + + Parameters + ---------- + x1, x2 : ndarrays or scalar values + The arrays to be subtracted from each other. If x1.shape != x2.shape, + they must be broadcastable to a common shape (which may be the shape + of one or the other). + + out : ndarray + A location into which the result is stored. If provided, it must have a shape + that the inputs broadcast to. If not provided or None, a freshly-allocated array + is returned. + + Returns + ------- + subtract : ndarray or scalar + The difference of x1 and x2, element-wise. This is a scalar if both x1 and x2 are scalars. + """ + return _mx_nd_np.subtract(x1, x2, out) + + +@set_module('mxnet.numpy') +def multiply(x1, x2, out=None): + """Multiply arguments element-wise. + + Parameters + ---------- + x1, x2 : ndarrays or scalar values + The arrays to be multiplied. If x1.shape != x2.shape, they must be broadcastable to + a common shape (which may be the shape of one or the other). + + out : ndarray + A location into which the result is stored. If provided, it must have a shape + that the inputs broadcast to. If not provided or None, a freshly-allocated array + is returned. + + Returns + ------- + out : ndarray or scalar + The difference of x1 and x2, element-wise. This is a scalar if both x1 and x2 are scalars. + """ + return _mx_nd_np.multiply(x1, x2, out) + + +@set_module('mxnet.numpy') +def divide(x1, x2, out=None): + """Returns a true division of the inputs, element-wise. + + Parameters + ---------- + x1 : ndarray or scalar + Dividend array. + + x2 : ndarray or scalar + Divisor array. + + out : ndarray + A location into which the result is stored. If provided, it must have a shape + that the inputs broadcast to. If not provided or None, a freshly-allocated array + is returned. + + Returns + ------- + out : ndarray or scalar + This is a scalar if both x1 and x2 are scalars. + """ + return _mx_nd_np.divide(x1, x2, out=out) + + +@set_module('mxnet.numpy') +def mod(x1, x2, out=None): + """Return element-wise remainder of division. + + Parameters + ---------- + x1 : ndarray or scalar + Dividend array. + + x2 : ndarray or scalar + Divisor array. + + out : ndarray + A location into which the result is stored. If provided, it must have a shape + that the inputs broadcast to. If not provided or None, a freshly-allocated array + is returned. + + Returns + ------- + out : ndarray or scalar + This is a scalar if both x1 and x2 are scalars. + """ + return _mx_nd_np.mod(x1, x2, out=out) + + +@set_module('mxnet.numpy') +def power(x1, x2, out=None): + """First array elements raised to powers from second array, element-wise. + + Parameters + ---------- + x1 : ndarray or scalar + The bases. + + x2 : ndarray or scalar + The exponent. + + out : ndarray + A location into which the result is stored. If provided, it must have a shape + that the inputs broadcast to. If not provided or None, a freshly-allocated array + is returned. + + Returns + ------- + out : ndarray or scalar + The bases in x1 raised to the exponents in x2. + This is a scalar if both x1 and x2 are scalars. + """ + return _mx_nd_np.power(x1, x2, out=out) + + +@set_module('mxnet.numpy') +def clip(a, a_min, a_max, out=None): + """clip(a, a_min, a_max, out=None) + + Clip (limit) the values in an array. + Given an interval, values outside the interval are clipped to + the interval edges. For example, if an interval of ``[0, 1]`` + is specified, values smaller than 0 become 0, and values larger + than 1 become 1. + + Parameters + ---------- + a : ndarray + Array containing elements to clip. + a_min : scalar or `None` + Minimum value. If `None`, clipping is not performed on lower + interval edge. Not more than one of `a_min` and `a_max` may be + `None`. + a_max : scalar or `None` + Maximum value. If `None`, clipping is not performed on upper + interval edge. Not more than one of `a_min` and `a_max` may be + `None`. + out : ndarray, optional + The results will be placed in this array. It may be the input + array for in-place clipping. `out` must be of the right shape + to hold the output. Its type is preserved. + + Returns + ------- + clipped_array : ndarray + An array with the elements of `a`, but where values + < `a_min` are replaced with `a_min`, and those > `a_max` + with `a_max`. + + Notes + ----- + array_like `a_min` and `a_max` are not supported. + + Examples + -------- + >>> a = np.arange(10) + >>> np.clip(a, 1, 8) + array([1., 1., 2., 3., 4., 5., 6., 7., 8., 8.], dtype=float32) + >>> a + array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32) + >>> np.clip(a, 3, 6, out=a) + array([3., 3., 3., 3., 4., 5., 6., 6., 6., 6.], dtype=float32) + """ + return _mx_nd_np.clip(a, a_min, a_max, out=out) + + +@set_module('mxnet.numpy') +def swapaxes(a, axis1, axis2): + """Interchange two axes of an array. + + Parameters + ---------- + a : ndarray + Input array. + axis1 : int + First axis. + axis2 : int + Second axis. + + Returns + ------- + a_swapped : ndarray + Swapped array. This is always a copy of the input array. + """ + return _npi.swapaxes(a, dim1=axis1, dim2=axis2) + + +@set_module('mxnet.numpy') +def expand_dims(a, axis): + """Expand the shape of an array. + + Insert a new axis that will appear at the `axis` position in the expanded array shape. + + Parameters + ---------- + a : ndarray + Input array. + axis : int + Position in the expanded axes where the new axis is placed. + + Returns + ------- + res : ndarray + Output array. The number of dimensions is one greater than that of + the input array. + """ + return _npi.expand_dims(a, axis) + + +# pylint: disable=line-too-long +@set_module('mxnet.numpy') +def split(ary, indices_or_sections, axis=0): + """Split an array into multiple sub-arrays. + + Parameters + ---------- + ary : ndarray + Array to be divided into sub-arrays. + indices_or_sections : int or 1-D array + If `indices_or_sections` is an integer, N, the array will be divided + into N equal arrays along `axis`. If such a split is not possible, + an error is raised. + + If `indices_or_sections` is a 1-D array of sorted integers, the entries + indicate where along `axis` the array is split. For example, + ``[2, 3]`` would, for ``axis=0``, result in + + - ary[:2] + - ary[2:3] + - ary[3:] + + Index `must be within` the dimension of the array along `axis`. + axis : int, optional + The axis along which to split, default is 0. + + Returns + ------- + sub-arrays : list of ndarrays + A list of sub-arrays. + + Raises + ------ + ValueError + If `indices_or_sections` is given as an integer, but + a split does not result in equal division. + + Notes + ----- + This function differs from the original `numpy.split + `_ in + the following ways: + + - Index exceeding the dimension the dimension of the array is currently not supported. + + Examples + -------- + >>> x = np.arange(9.0) + >>> np.split(x, 3) + [array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7., 8.])] + >>> np.split(x, (3, 5, 6)) + [array([0., 1., 2.]), array([3., 4.]), array([5.]), array([6., 7.])] + """ + return _mx_nd_np.split(ary, indices_or_sections, axis=axis) +# pylint: enable=line-too-long + + +@set_module('mxnet.numpy') +def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0, ctx=None): # pylint: disable=too-many-arguments + r""" + Return evenly spaced numbers over a specified interval. + + Returns num evenly spaced samples, calculated over the interval [start, stop]. + The endpoint of the interval can optionally be excluded. + + Parameters + ---------- + start : real number + The starting value of the sequence. + stop : real number + The end value of the sequence, unless endpoint is set to False. In + that case, the sequence consists of all but the last of num + 1 + evenly spaced samples, so that stop is excluded. Note that the step + size changes when endpoint is False. + num : int, optional + Number of samples to generate. Default is 50. Must be non-negative. + endpoint : bool, optional + If True, stop is the last sample. Otherwise, it is not included. + Default is True. + retstep : bool, optional + If True, return (samples, step), where step is the spacing between samples. + dtype : dtype, optional + The type of the output array. If dtype is not given, infer the data + type from the other input arguments. + axis : int, optional + The axis in the result to store the samples. Relevant only if start or + stop are array-like. By default (0), the samples will be along a new + axis inserted at the beginning. Use -1 to get an axis at the end. + + Returns + ------- + samples : ndarray + There are num equally spaced samples in the closed interval + `[start, stop]` or the half-open interval `[start, stop)` + (depending on whether endpoint is True or False). + step : float, optional + Only returned if retstep is True + Size of spacing between samples. + + + See Also + -------- + arange : Similar to `linspace`, but uses a step size (instead of the + number of samples). + + Examples + -------- + >>> np.linspace(2.0, 3.0, num=5) + array([2. , 2.25, 2.5 , 2.75, 3. ]) + >>> np.linspace(2.0, 3.0, num=5, endpoint=False) + array([2. , 2.2, 2.4, 2.6, 2.8]) + >>> np.linspace(2.0, 3.0, num=5, retstep=True) + (array([2. , 2.25, 2.5 , 2.75, 3. ]), 0.25) + + Graphical illustration: + + >>> import matplotlib.pyplot as plt + >>> N = 8 + >>> y = np.zeros(N) + >>> x1 = np.linspace(0, 10, N, endpoint=True) + >>> x2 = np.linspace(0, 10, N, endpoint=False) + >>> plt.plot(x1.asnumpy(), y.asnumpy(), 'o') + [] + >>> plt.plot(x2.asnumpy(), (y + 0.5).asnumpy(), 'o') + [] + >>> plt.ylim([-0.5, 1]) + (-0.5, 1) + >>> plt.show() + + Notes + ----- + + This function differs from the original `numpy.linspace + `_ in + the following aspects: + + - `start` and `stop` do not support list, numpy ndarray and mxnet ndarray + - axis could only be 0 + - There could be an additional `ctx` argument to specify the device, e.g. the i-th + GPU. + """ + return _mx_nd_np.linspace(start, stop, num, endpoint, retstep, dtype, axis, ctx) + + +@set_module('mxnet.numpy') +def sin(x, out=None, **kwargs): + r"""Trigonometric sine, element-wise. + + Parameters + ---------- + x : ndarray or scalar + Angle, in radians (:math:`2 \pi` rad equals 360 degrees). + out : ndarray or None + A location into which the result is stored. If provided, it + must have a shape that the inputs broadcast to. If not provided + or None, a freshly-allocated array is returned. The dtype of the + output is the same as that of the input if the input is an ndarray. + + Returns + ------- + y : ndarray or scalar + The sine of each element of x. This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _mx_nd_np.sin(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def cos(x, out=None, **kwargs): + r"""Cosine, element-wise. + + Parameters + ---------- + x : ndarray or scalar + Angle, in radians (:math:`2 \pi` rad equals 360 degrees). + out : ndarray or None + A location into which the result is stored. If provided, it + must have a shape that the inputs broadcast to. If not provided + or None, a freshly-allocated array is returned. The dtype of the + output is the same as that of the input if the input is an ndarray. + + Returns + ------- + y : ndarray or scalar + The corresponding cosine values. This is a scalar if x is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _mx_nd_np.cos(x, out=out, **kwargs) + + +def sinh(x, out=None, **kwargs): + """Hyperbolic sine, element-wise. + + Equivalent to ``1/2 * (np.exp(x) - np.exp(-x))`` or ``-1j * np.sin(1j*x)``. + + Parameters + ---------- + x : ndarray or scalar + Input array or scalar. + out : ndarray or None + A location into which the result is stored. If provided, it + must have a shape that the inputs broadcast to. If not provided + or None, a freshly-allocated array is returned. The dtype of the + output is the same as that of the input if the input is an ndarray. + + Returns + ------- + y : ndarray or scalar + The corresponding hyperbolic sine values. This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _mx_nd_np.sinh(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def cosh(x, out=None, **kwargs): + """Hyperbolic cosine, element-wise. + + Equivalent to ``1/2 * (np.exp(x) + np.exp(-x))`` and ``np.cos(1j*x)``. + + + Parameters + ---------- + x : ndarray or scalar + Input array or scalar. + out : ndarray or None + A location into which the result is stored. If provided, it + must have a shape that the inputs broadcast to. If not provided + or None, a freshly-allocated array is returned. The dtype of the + output is the same as that of the input if the input is an ndarray. + + Returns + ------- + y : ndarray or scalar + The corresponding hyperbolic cosine values. This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _mx_nd_np.cosh(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def log10(x, out=None, **kwargs): + """Return the base 10 logarithm of the input array, element-wise. + + Parameters + ---------- + x : ndarray or scalar + Input array or scalar. + out : ndarray or None + A location into which the result is stored. If provided, it + must have a shape that the inputs broadcast to. If not provided + or None, a freshly-allocated array is returned. The dtype of the + output is the same as that of the input if the input is an ndarray. + + Returns + ------- + y : ndarray or scalar + The logarithm to the base 10 of `x`, element-wise. NaNs are + returned where x is negative. This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _mx_nd_np.log10(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def sqrt(x, out=None, **kwargs): + """ + Return the non-negative square-root of an array, element-wise. + + Parameters + ---------- + x : ndarray or scalar + The values whose square-roots are required. + out : ndarray, or None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. + + Returns + ------- + y : ndarray or scalar + An array of the same shape as `x`, containing the positive + square-root of each element in `x`. This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _mx_nd_np.sqrt(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def tile(A, reps): + r""" + Construct an array by repeating A the number of times given by reps. + + If `reps` has length ``d``, the result will have dimension of + ``max(d, A.ndim)``. + + If ``A.ndim < d``, `A` is promoted to be d-dimensional by prepending new + axes. So a shape (3,) array is promoted to (1, 3) for 2-D replication, + or shape (1, 1, 3) for 3-D replication. If this is not the desired + behavior, promote `A` to d-dimensions manually before calling this + function. + + If ``A.ndim > d``, `reps` is promoted to `A`.ndim by pre-pending 1's to it. + Thus for an `A` of shape (2, 3, 4, 5), a `reps` of (2, 2) is treated as + (1, 1, 2, 2). + + Parameters + ---------- + A : ndarray or scalar + An input array or a scalar to repeat. + reps : a single integer or tuple of integers + The number of repetitions of `A` along each axis. + + Returns + ------- + c : ndarray + The tiled output array. + + Examples + -------- + >>> a = np.array([0, 1, 2]) + >>> np.tile(a, 2) + array([0., 1., 2., 0., 1., 2.]) + >>> np.tile(a, (2, 2)) + array([[0., 1., 2., 0., 1., 2.], + [0., 1., 2., 0., 1., 2.]]) + >>> np.tile(a, (2, 1, 2)) + array([[[0., 1., 2., 0., 1., 2.]], + [[0., 1., 2., 0., 1., 2.]]]) + + >>> b = np.array([[1, 2], [3, 4]]) + >>> np.tile(b, 2) + array([[1., 2., 1., 2.], + [3., 4., 3., 4.]]) + >>> np.(b, (2, 1)) + array([[1., 2.], + [3., 4.], + [1., 2.], + [3., 4.]]) + + >>> c = np.array([1,2,3,4]) + >>> np.tile(c,(4,1)) + array([[1., 2., 3., 4.], + [1., 2., 3., 4.], + [1., 2., 3., 4.], + [1., 2., 3., 4.]]) + + Scalar as input: + + >>> np.tile(2, 3) + array([2, 2, 2]) # repeating integer `2` + + """ + return _mx_nd_np.tile(A, reps) + + +@set_module('mxnet.numpy') +def abs(x, out=None, **kwargs): + r"""abs(x, out=None, **kwargs) + + Calculate the absolute value element-wise. + + Parameters + ---------- + x : ndarray or scalar + Input array. + out : ndarray or None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. + + Returns + ------- + absolute : ndarray + An ndarray containing the absolute value of + each element in `x`. This is a scalar if `x` is a scalar. + + Examples + -------- + >>> x = np.array([-1.2, 1.2]) + >>> np.abs(x) + array([1.2, 1.2]) + """ + return _mx_nd_np.abs(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def exp(x, out=None, **kwargs): + r"""exp(x, out=None, **kwargs) + + Calculate the exponential of all elements in the input array. + + Parameters + ---------- + x : ndarray or scalar + Input values. + out : ndarray or None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. + + Returns + ------- + out : ndarray or scalar + Output array, element-wise exponential of `x`. + This is a scalar if `x` is a scalar. + + Examples + -------- + >>> np.exp(1) + 2.718281828459045 + >>> x = np.array([-1, 1, -2, 2]) + >>> np.exp(x) + array([0.36787945, 2.7182817 , 0.13533528, 7.389056 ]) + """ + return _mx_nd_np.exp(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def arctan(x, out=None, **kwargs): + r"""arctan(x, out=None, **kwargs) + + Trigonometric inverse tangent, element-wise. + + The inverse of tan, so that if ``y = tan(x)`` then ``x = arctan(y)``. + + Parameters + ---------- + x : ndarray or scalar + Input values. + out : ndarray or None, optional + A location into which the result is stored. If provided, it must have + a shape that the inputs broadcast to. If not provided or `None`, + a freshly-allocated array is returned. + + Returns + ------- + out : ndarray or scalar + Out has the same shape as `x`. It lies is in + ``[-pi/2, pi/2]`` (``arctan(+/-inf)`` returns ``+/-pi/2``). + This is a scalar if `x` is a scalar. + + Notes + ----- + `arctan` is a multi-valued function: for each `x` there are infinitely + many numbers `z` such that tan(`z`) = `x`. The convention is to return + the angle `z` whose real part lies in [-pi/2, pi/2]. + + For real-valued input data types, `arctan` always returns real output. + For each value that cannot be expressed as a real number or infinity, + it yields ``nan`` and sets the `invalid` floating point error flag. + + For complex-valued input, we do not have support for them yet. + + The inverse tangent is also known as `atan` or tan^{-1}. + + Examples + -------- + We expect the arctan of 0 to be 0, and of 1 to be pi/4: + + >>> x = np.array([0, 1]) + >>> np.arctan(x) + array([0. , 0.7853982]) + + >>> np.pi/4 + 0.7853981633974483 + """ + return _mx_nd_np.arctan(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def sign(x, out=None): + """ + sign(x, out=None) + + Returns an element-wise indication of the sign of a number. + + The `sign` function returns ``-1 if x < 0, 0 if x==0, 1 if x > 0``. Only supports real number. + + Parameters + ---------- + x : ndarray or a scalar + Input values. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + y : ndarray + The sign of `x`. + This is a scalar if `x` is a scalar. + + Note + ------- + - Only supports real number as input elements. + - Input type does not support Python native iterables(list, tuple, ...). + - ``out`` param: cannot perform auto broadcasting. ``out`` ndarray's shape must be the same as the expected output. + - ``out`` param: cannot perform auto type cast. ``out`` ndarray's dtype must be the same as the expected output. + - ``out`` param does not support scalar input case. + + Examples + -------- + >>> a = np.array([-5., 4.5]) + >>> np.sign(a) + array([-1., 1.]) + + Scalars as input: + + >>> np.sign(4.0) + 1.0 + >>> np.sign(0) + 0 + + Use ``out`` parameter: + + >>> b = np.zeros((2, )) + >>> np.sign(a, out=b) + array([-1., 1.]) + >>> b + array([-1., 1.]) + + """ + return _mx_nd_np.sign(x, out=out) + + +@set_module('mxnet.numpy') +def log(x, out=None, **kwargs): + """ + log(x, out=None) + + Natural logarithm, element-wise. + + The natural logarithm `log` is the inverse of the exponential function, + so that `log(exp(x)) = x`. The natural logarithm is logarithm in base + `e`. + + Parameters + ---------- + x : ndarray + Input value. Elements must be of real value. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + y : ndarray + The natural logarithm of `x`, element-wise. + This is a scalar if `x` is a scalar. + + Notes + ----- + Currently only supports data of real values and ``inf`` as input. Returns data of real value, ``inf``, ``-inf`` and + ``nan`` according to the input. + + This function differs from the original `numpy.log + `_ in + the following aspects: + + - Does not support complex number for now + - Input type does not support Python native iterables(list, tuple, ...). + - ``out`` param: cannot perform auto broadcasting. ``out`` ndarray's shape must be the same as the expected output. + - ``out`` param: cannot perform auto type cast. ``out`` ndarray's dtype must be the same as the expected output. + - ``out`` param does not support scalar input case. + + Examples + -------- + >>> a = np.array([1, np.exp(1), np.exp(2), 0], dtype=np.float64) + >>> np.log(a) + array([ 0., 1., 2., -inf], dtype=float64) + + + Due to internal calculation mechanism, using default float32 dtype may cause some special behavior: + + >>> a = np.array([1, np.exp(1), np.exp(2), 0]) + >>> np.log(a) + array([ 0., 0.99999994, 2., -inf]) + + Scalar calculation: + + >>> np.log(1) + 0.0 + + """ + return _mx_nd_np.log(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def rint(x, out=None, **kwargs): + """ + Round elements of the array to the nearest integer. + + Parameters + ---------- + x : ndarray or scalar + Input array. + out : ndarray or None + A location into which the result is stored. + If provided, it must have the same shape and type as the input. + If not provided or None, a freshly-allocated array is returned. + + Returns + ------- + out : ndarray or scalar + Output array is same shape and type as x. This is a scalar if x is a scalar. + + Notes + ----- + This function differs from the original `numpy.rint + `_ in + the following way(s): + + - only ndarray or scalar is accpted as valid input, tuple of ndarray is not supported + - broadcasting to `out` of different shape is currently not supported + - when input is plain python numerics, the result will not be stored in the `out` param + + Examples + -------- + >>> a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) + >>> np.rint(a) + array([-2., -2., -0., 0., 1., 2., 2.]) + """ + return _mx_nd_np.rint(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def log2(x, out=None, **kwargs): + """ + Base-2 logarithm of x. + + Parameters + ---------- + x : ndarray or scalar + Input values. + out : ndarray or None + A location into which the result is stored. + If provided, it must have the same shape and type as the input. + If not provided or None, a freshly-allocated array is returned. + + Returns + ------- + y : ndarray + The logarithm base two of `x`, element-wise. + This is a scalar if `x` is a scalar. + + Notes + ----- + This function differs from the original `numpy.log2 + `_ in + the following way(s): + + - only ndarray or scalar is accpted as valid input, tuple of ndarray is not supported + - broadcasting to `out` of different shape is currently not supported + - when input is plain python numerics, the result will not be stored in the `out` param + + Examples + -------- + >>> x = np.array([0, 1, 2, 2**4]) + >>> np.log2(x) + array([-inf, 0., 1., 4.]) + + """ + return _mx_nd_np.log2(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def degrees(x, out=None, **kwargs): + """ + degrees(x, out=None) + + Convert angles from radians to degrees. + + Parameters + ---------- + x : ndarray + Input value. Elements must be of real value. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape and dtype as input ndarray. + If not provided or `None`, a freshly-allocated array is returned. + + Returns + ------- + y : ndarray + The corresponding degree values; if `out` was supplied this is a + reference to it. + This is a scalar if `x` is a scalar. + + Notes + ------- + This function differs from the original `numpy.degrees + `_ in + the following aspects: + + - Input type does not support Python native iterables(list, tuple, ...). Only ndarray is supported. + - ``out`` param: cannot perform auto broadcasting. ``out`` ndarray's shape must be the same as the expected output. + - ``out`` param: cannot perform auto type cast. ``out`` ndarray's dtype must be the same as the expected output. + - ``out`` param does not support scalar input case. + + Examples + -------- + Convert a radian array to degrees + + >>> rad = np.arange(12.) * np.pi / 6 + >>> np.degrees(rad) + array([ 0., 30., 60., 90., 120., 150., 180., 210., 240., 270., 300., 330.]) + + Use specified ``out`` ndarray: + + >>> out = np.zeros((rad.shape)) + >>> np.degrees(rad, out) + array([ 0., 30., 60., 90., 120., 150., 180., 210., 240., 270., 300., 330.]) + >>> out + array([ 0., 30., 60., 90., 120., 150., 180., 210., 240., 270., 300., 330.]) + + """ + return _mx_nd_np.degrees(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def radians(x, out=None, **kwargs): + """ + Convert angles from degrees to radians. + + Parameters + ---------- + x : ndarray or scalar + Input array in degrees. + out : ndarray or None + A location into which the result is stored. + If provided, it must have the same shape and type as the input. + If not provided or None, a freshly-allocated array is returned. + + Returns + ------- + y : ndarray + The corresponding radian values. This is a scalar if x is a scalar. + + Notes + ----- + This function differs from the original `numpy.radians + `_ in + the following way(s): + + - only ndarray or scalar is accpted as valid input, tuple of ndarray is not supported + - broadcasting to `out` of different shape is currently not supported + - when input is plain python numerics, the result will not be stored in the `out` param + + Examples + -------- + >>> deg = np.arange(12.) * 30. + >>> np.radians(deg) + array([0. , 0.5235988, 1.0471976, 1.5707964, 2.0943952, 2.6179938, + 3.1415927, 3.6651914, 4.1887903, 4.712389 , 5.2359877, 5.7595863], + dtype=float32) + + """ + return _mx_nd_np.radians(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def reciprocal(x, out=None, **kwargs): + r""" + reciprocal(x, out=None) + + Return the reciprocal of the argument, element-wise. + + Calculates ``1/x``. + + Parameters + ---------- + x : ndarray or scalar + The values whose reciprocals are required. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape as the input. + If not provided or None, a freshly-allocated array is returned. + + Returns + ------- + y : ndarray or scalar + Output array is same shape and type as x. This is a scalar if x is a scalar. + + Examples + -------- + >>> np.reciprocal(2.) + 0.5 + >>> x = np.array([1, 2., 3.33]) + >>> np.reciprocal(x) + array([1. , 0.5 , 0.3003003]) + + Notes + ----- + .. note:: + This function is not designed to work with integers. + + For integer arguments with absolute value larger than 1 the result is + always zero because of the way Python handles integer division. For + integer zero the result is an overflow. + + The output `ndarray` has the same `ctx` as the input `ndarray`. + + This function differs from the original `numpy.reciprocal + `_ in + the following aspects: + + - Only support ndarray and scalar now. + - `where` argument is not supported. + """ + return _mx_nd_np.reciprocal(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def square(x, out=None, **kwargs): + r""" + square(x, out=None) + + Return the element-wise square of the input. + + Parameters + ---------- + x : ndarray or scalar + The values whose squares are required. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape as the input. + If not provided or None, a freshly-allocated array is returned. + + Returns + ------- + y : ndarray or scalar + Output array is same shape and type as x. This is a scalar if x is a scalar. + + Examples + -------- + >>> np.square(2.) + 4.0 + >>> x = np.array([1, 2., -1]) + >>> np.square(x) + array([1., 4., 1.]) + + Notes + ----- + The output `ndarray` has the same `ctx` as the input `ndarray`. + + This function differs from the original `numpy.square + `_ in + the following aspects: + + - Only support ndarray and scalar now. + - `where` argument is not supported. + - Complex input is not supported. + """ + return _mx_nd_np.square(x, out=out, **kwargs) + + +@set_module('mxnet.numpy') +def arcsin(x, out=None, **kwargs): + r""" + arcsin(x, out=None) + + Inverse sine, element-wise. + + Parameters + ---------- + x : ndarray or scalar + `y`-coordinate on the unit circle. + out : ndarray or None, optional + A location into which the result is stored. + If provided, it must have the same shape as the input. + If not provided or None, a freshly-allocated array is returned. + + Returns + ------- + angle : ndarray or scalar + Output array is same shape and type as x. This is a scalar if x is a scalar. + The inverse sine of each element in `x`, in radians and in the + closed interval ``[-pi/2, pi/2]``. + + Examples + -------- + >>> np.arcsin(1) # pi/2 + 1.5707963267948966 + >>> np.arcsin(-1) # -pi/2 + -1.5707963267948966 + >>> np.arcsin(0) + 0.0 + + Notes + ----- + `arcsin` is a multivalued function: for each `x` there are infinitely + many numbers `z` such that :math:`sin(z) = x`. The convention is to + return the angle `z` whose real part lies in [-pi/2, pi/2]. + + For real-valued input data types, *arcsin* always returns real output. + For each value that cannot be expressed as a real number or infinity, + it yields ``nan`` and sets the `invalid` floating point error flag. + + The inverse sine is also known as `asin` or sin^{-1}. + + The output `ndarray` has the same `ctx` as the input `ndarray`. + + This function differs from the original `numpy.arcsin + `_ in + the following aspects: + + - Only support ndarray or scalar now. + - `where` argument is not supported. + - Complex input is not supported. + + References + ---------- + Abramowitz, M. and Stegun, I. A., *Handbook of Mathematical Functions*, + 10th printing, New York: Dover, 1964, pp. 79ff. + http://www.math.sfu.ca/~cbm/aands/ + """ + return _mx_nd_np.arcsin(x, out=out, **kwargs) diff --git a/python/mxnet/numpy/random.py b/python/mxnet/numpy/random.py new file mode 100644 index 000000000000..2a4fe0ed98fb --- /dev/null +++ b/python/mxnet/numpy/random.py @@ -0,0 +1,145 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for ops used in imperative programming.""" + +from __future__ import absolute_import +from ..ndarray import numpy as _mx_nd_np + +__all__ = ['uniform', 'normal'] + + +def uniform(low=0.0, high=1.0, size=None, **kwargs): + """Draw samples from a uniform distribution. + + Samples are uniformly distributed over the half-open interval + ``[low, high)`` (includes low, but excludes high). In other words, + any value within the given interval is equally likely to be drawn + by `uniform`. + + Parameters + ---------- + low : float, optional + Lower boundary of the output interval. All values generated will be + greater than or equal to low. The default value is 0. + high : float + Upper boundary of the output interval. All values generated will be + less than high. The default value is 1.0. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a scalar tensor containing a single value is returned if + ``low`` and ``high`` are both scalars. + dtype : {'float16', 'float32', 'float64'}, optional + Data type of output samples. Default is 'float32' + ctx : Context, optional + Device context of output. Default is current context. + out : ndarray, optional + Store output to an existing ndarray. + + Returns + ------- + out : ndarray + Drawn samples from the parameterized uniform distribution. + + + Notes + ----- + This function currently does not support ``low`` and ``high`` as ndarrays. + """ + return _mx_nd_np.random.uniform(low, high, size, **kwargs) + + +def normal(loc=0.0, scale=1.0, size=None, **kwargs): + """Draw random samples from a normal (Gaussian) distribution. + + Samples are distributed according to a normal distribution parametrized + by *loc* (mean) and *scale* (standard deviation). + + + Parameters + ---------- + loc : float, optional + Mean (centre) of the distribution. + scale : float, optional + Standard deviation (spread or "width") of the distribution. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., `(m, n, k)`, then `m * n * k` + samples are drawn. If size is `None` (default), a scalar tensor containing + a single value is returned if loc and scale are both scalars. + dtype : {'float16', 'float32', 'float64'}, optional + Data type of output samples. Default is 'float32' + ctx : Context, optional + Device context of output. Default is current context. + out : ``ndarray``, optional + Store output to an existing ``ndarray``. + + Returns + ------- + out : ndarray + Drawn samples from the parameterized normal distribution. + + Notes + ----- + This function currently does not support ``loc`` and ``scale`` as ndarrays. + """ + return _mx_nd_np.random.normal(loc, scale, size, **kwargs) + + +def multinomial(n, pvals, size=None, **kwargs): + """multinomial(n, pvals, size=None) + + Draw samples from a multinomial distribution. + + The multinomial distribution is a multivariate generalisation of the binomial distribution. + Take an experiment with one of ``p`` possible outcomes. An example of such an experiment is throwing a dice, + where the outcome can be 1 through 6. Each sample drawn from the distribution represents n such experiments. + Its values, ``X_i = [X_0, X_1, ..., X_p]``, represent the number of times the outcome was ``i``. + + Parameters + ---------- + n : int + Number of experiments. + pvals : sequence of floats, length p + Probabilities of each of the p different outcomes. These should sum to 1. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples + are drawn. Default is None, in which case a single value is returned. + + Returns + ------- + out : ndarray + The drawn samples, of shape size, if that was provided. If not, the shape is ``(N,)``. + In other words, each entry ``out[i,j,...,:]`` is an N-dimensional value drawn from the distribution. + + Examples + -------- + Throw a dice 1000 times, and 1000 times again: + + >>> np.random.multinomial(1000, [1/6.]*6, size=2) + array([[164, 161, 179, 158, 150, 188], + [178, 162, 177, 143, 163, 177]]) + + A loaded die is more likely to land on number 6: + + >>> np.random.multinomial(100, [1/7.]*5 + [2/7.]) + array([19, 14, 12, 11, 21, 23]) + + >>> np.random.multinomial(100, [1.0 / 3, 2.0 / 3]) + array([32, 68]) + """ + return _mx_nd_np.random.multinomial(n, pvals, size, **kwargs) diff --git a/python/mxnet/numpy/stride_tricks.py b/python/mxnet/numpy/stride_tricks.py new file mode 100644 index 000000000000..1848a292e673 --- /dev/null +++ b/python/mxnet/numpy/stride_tricks.py @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Util functions with broadcast.""" + +from ..ndarray.ndarray import _get_broadcast_shape +from . import _op as _mx_np_op + + +__all__ = ['broadcast_arrays'] + + +def _broadcast_shape(*args): + shape = () + for arr in args: + shape = _get_broadcast_shape(shape, arr.shape) + return shape + + +def broadcast_arrays(*args): + """ + Broadcast any number of arrays against each other. + + Parameters + ---------- + `*args` : a list of ndarrays + The arrays to broadcast. + + Returns + ------- + broadcasted : list of arrays + These arrays are copies of the original arrays unless that all the input + arrays have the same shape, the input list of arrays are returned + instead of a list of copies. + """ + shape = _broadcast_shape(*args) + + if all(array.shape == shape for array in args): + # Common case where nothing needs to be broadcasted. + return args + + return [_mx_np_op.broadcast_to(array, shape) for array in args] diff --git a/python/mxnet/numpy/utils.py b/python/mxnet/numpy/utils.py new file mode 100644 index 000000000000..920897efc80b --- /dev/null +++ b/python/mxnet/numpy/utils.py @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Util functions for the numpy module.""" + + +from __future__ import absolute_import + +import numpy as onp + +__all__ = ['float16', 'float32', 'float64', 'uint8', 'int32', 'int8', 'int64', 'pi'] + +float16 = onp.float16 +float32 = onp.float32 +float64 = onp.float64 +uint8 = onp.uint8 +int32 = onp.int32 +int8 = onp.int8 +int64 = onp.int64 + +pi = onp.pi diff --git a/python/mxnet/numpy_extension/__init__.py b/python/mxnet/numpy_extension/__init__.py new file mode 100644 index 000000000000..6e89c004f6a4 --- /dev/null +++ b/python/mxnet/numpy_extension/__init__.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Module for ops not belonging to the official numpy package for imperative programming.""" + +from __future__ import absolute_import +from . import _op +from . import image +from . import _register +from ._op import * # pylint: disable=wildcard-import +from ..context import * # pylint: disable=wildcard-import +# TODO(junwu): revisit what functions should be exposed to users +from ..util import use_np_shape, np_shape, is_np_shape +from ..util import use_np_array, np_array, is_np_array +from ..util import set_np, use_np, reset_np +from ..ndarray import waitall +from .utils import * # pylint: disable=wildcard-import +from .random import * # pylint: disable=wildcard-import + +__all__ = [] diff --git a/python/mxnet/numpy_extension/_op.py b/python/mxnet/numpy_extension/_op.py new file mode 100644 index 000000000000..a995e480221a --- /dev/null +++ b/python/mxnet/numpy_extension/_op.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for registering numpy_extension ops for imperative programming.""" + +__all__ = [] diff --git a/python/mxnet/numpy_extension/_register.py b/python/mxnet/numpy_extension/_register.py new file mode 100644 index 000000000000..8abb7254057c --- /dev/null +++ b/python/mxnet/numpy_extension/_register.py @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Registering ops in mxnet.numpy_extension for imperative programming.""" + +from __future__ import absolute_import + +from ..base import _init_np_op_module +from ..ndarray.register import _make_ndarray_function + + +_init_np_op_module(root_module_name='mxnet', np_module_name='numpy_extension', + mx_module_name=None, make_op_func=_make_ndarray_function) diff --git a/python/mxnet/numpy_extension/image.py b/python/mxnet/numpy_extension/image.py new file mode 100644 index 000000000000..00a028b3c18f --- /dev/null +++ b/python/mxnet/numpy_extension/image.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Image pre-processing operators.""" + +from ..image import * # pylint: disable=wildcard-import, unused-wildcard-import + +__all__ = [] diff --git a/python/mxnet/numpy_extension/random.py b/python/mxnet/numpy_extension/random.py new file mode 100644 index 000000000000..bfe2270358bb --- /dev/null +++ b/python/mxnet/numpy_extension/random.py @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for ops used in imperative programming.""" + +from __future__ import absolute_import +from .. import random as _mx_rand + + +__all__ = ['seed'] + + +def seed(seed, ctx='all'): # pylint: disable=redefined-outer-name + """Seeds the random number generators in MXNet. + + This affects the behavior of modules in MXNet that uses random number generators, + like the dropout operator and `ndarray`'s random sampling operators. + + Parameters + ---------- + seed : int + The random number seed. + + ctx : Context + The device context of the generator. The default is "all" which means seeding random + number generators of all devices. + + Notes + ----- + Random number generators in MXNet are device specific. + `mx.random.seed(seed_state)` sets the state of each generator using `seed_state` and the + device id. Therefore, random numbers generated from different devices can be different + even if they are seeded using the same seed. + + To produce identical random number sequences independent of the device id, + set optional `ctx` argument. This produces the same sequence of random numbers independent + of the device id, but the sequence can be different on different kind of devices as MXNet's + random number generators for CPU and GPU use different algorithms. + + Example + ------- + >>> from mxnet import np, npx + >>> npx.set_np() + >>> npx.random.seed(0) + >>> np.random.uniform() + array(0.5488135) + >>> npx.random.seed(128) + >>> np.random.uniform() + array(0.03812965) + >>> npx.random.seed(128) + >>> np.random.uniform() + array(0.03812965) + >>> npx.random.seed(128) + >>> np.random.uniform(ctx=npx.gpu(0)) + array(0.9894903, ctx=gpu(0)) + >>> npx.random.seed(128) + >>> np.random.uniform(ctx=npx.gpu(0)) + array(0.9894903, ctx=gpu(0)) + """ + _mx_rand.seed(seed_state=seed, ctx=ctx) diff --git a/python/mxnet/numpy_extension/utils.py b/python/mxnet/numpy_extension/utils.py new file mode 100644 index 000000000000..0aa89badbb58 --- /dev/null +++ b/python/mxnet/numpy_extension/utils.py @@ -0,0 +1,122 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Util functions for the numpy module.""" + + +from __future__ import absolute_import + +import ctypes +from .. util import is_np_array, is_np_shape +from .. base import _LIB, check_call, string_types, c_str_array +from .. base import c_handle_array, c_str, mx_uint, NDArrayHandle, py_str +from ..numpy import ndarray + +__all__ = ['save', 'load'] + + +def save(file, arr): + """Saves a list of `ndarray`s or a dict of `str`->`ndarray` to file. + + Examples of filenames: + + - ``/path/to/file`` + - ``s3://my-bucket/path/to/file`` (if compiled with AWS S3 supports) + - ``hdfs://path/to/file`` (if compiled with HDFS supports) + + Parameters + ---------- + file : str + Filename to which the data is saved. + arr : `ndarray` or list of `ndarray`s or dict of `str` to `ndarray` + The data to be saved. + + Notes + ----- + This function can only be called within numpy semantics, i.e., `npx.is_np_shape()` + and `npx.is_np_array()` must both return true. + """ + if not (is_np_shape() and is_np_array()): + raise ValueError('Cannot save `mxnet.numpy.ndarray` in legacy mode. Please activate' + ' numpy semantics by calling `npx.set_np()` in the global scope' + ' before calling this function.') + if isinstance(arr, ndarray): + arr = [arr] + if isinstance(arr, dict): + str_keys = arr.keys() + nd_vals = arr.values() + if any(not isinstance(k, string_types) for k in str_keys) or \ + any(not isinstance(v, ndarray) for v in nd_vals): + raise TypeError('Only accepts dict str->ndarray or list of ndarrays') + keys = c_str_array(str_keys) + handles = c_handle_array(nd_vals) + elif isinstance(arr, list): + if any(not isinstance(v, ndarray) for v in arr): + raise TypeError('Only accepts dict str->ndarray or list of ndarrays') + keys = None + handles = c_handle_array(arr) + else: + raise ValueError("data needs to either be a ndarray, dict of (str, ndarray) pairs " + "or a list of ndarrays.") + check_call(_LIB.MXNDArraySave(c_str(file), + mx_uint(len(handles)), + handles, + keys)) + + +def load(file): + """Loads an array from file. + + See more details in ``save``. + + Parameters + ---------- + file : str + The filename. + + Returns + ------- + result : list of ndarrays or dict of str -> ndarray + Data stored in the file. + + Notes + ----- + This function can only be called within numpy semantics, i.e., `npx.is_np_shape()` + and `npx.is_np_array()` must both return true. + """ + if not (is_np_shape() and is_np_array()): + raise ValueError('Cannot load `mxnet.numpy.ndarray` in legacy mode. Please activate' + ' numpy semantics by calling `npx.set_np()` in the global scope' + ' before calling this function.') + if not isinstance(file, string_types): + raise TypeError('file required to be a string') + out_size = mx_uint() + out_name_size = mx_uint() + handles = ctypes.POINTER(NDArrayHandle)() + names = ctypes.POINTER(ctypes.c_char_p)() + check_call(_LIB.MXNDArrayLoad(c_str(file), + ctypes.byref(out_size), + ctypes.byref(handles), + ctypes.byref(out_name_size), + ctypes.byref(names))) + if out_name_size.value == 0: + return [ndarray(NDArrayHandle(handles[i])) for i in range(out_size.value)] + else: + assert out_name_size.value == out_size.value + return dict( + (py_str(names[i]), ndarray(NDArrayHandle(handles[i]))) + for i in range(out_size.value)) diff --git a/python/mxnet/optimizer/optimizer.py b/python/mxnet/optimizer/optimizer.py index c2c1aa6a76f4..d953e9247900 100644 --- a/python/mxnet/optimizer/optimizer.py +++ b/python/mxnet/optimizer/optimizer.py @@ -18,6 +18,7 @@ # pylint: disable=too-many-lines """Weight updating functions.""" +from __future__ import absolute_import import logging import math import pickle @@ -33,6 +34,7 @@ multi_mp_sgd_mom_update) from ..ndarray import sparse from ..random import normal +from ..util import is_np_array __all__ = [ 'AdaDelta', 'AdaGrad', 'Adam', 'Adamax', 'DCASGD', 'FTML', 'Ftrl', 'LBSGD', @@ -119,6 +121,7 @@ def __init__(self, rescale_grad=1., param_idx2name=None, wd=0., self.idx2name = param_idx2name.copy() self.sym_info = (sym.attr_dict(), sym.list_arguments()) if sym is not None else () self.param_dict = param_dict if param_dict else {} + self.allow_np_array = is_np_array() self.set_lr_mult({}) self.set_wd_mult({}) @@ -1644,6 +1647,28 @@ def update(self, index, weight, grad, state): # backward compatibility wrapper for Optimizer.CreateOptimizer create = Optimizer.create_optimizer # pylint: disable=invalid-name + +def _as_classic(a, allow_np): + # TODO(junwu): This is a temp solution for allowing converting + # np.ndarray to mx.nd.NDArray to be fed into the optimizer since + # users may have custom optimizers implemented using mx.nd.NDArray ops. + from ..numpy import ndarray as np_ndarray + if isinstance(a, (tuple, list)): + if any(isinstance(x, np_ndarray) for x in a): + if allow_np: + return [x.as_nd_ndarray() for x in a] + else: + raise ValueError('Converting np.ndarray to mx.nd.NDArray is not allowed') + else: + if isinstance(a, np_ndarray): + if allow_np: + return a.as_nd_ndarray() + else: + raise ValueError('Converting np.ndarray to mx.nd.NDArray is not allowed') + return a + + + class Updater(object): """Updater for kvstore.""" def __init__(self, optimizer): @@ -1654,14 +1679,15 @@ def __init__(self, optimizer): def __call__(self, index, grad, weight): """Updates weight given gradient and index.""" + allow_np = self.optimizer.allow_np_array if not isinstance(index, (list, tuple)): indices = [index] - grads = [grad] - weights = [weight] + grads = [_as_classic(grad, allow_np)] + weights = [_as_classic(weight, allow_np)] else: indices = index - grads = grad - weights = weight + grads = _as_classic(grad, allow_np) + weights = _as_classic(weight, allow_np) if weights: self.optimizer._set_current_context(weights[0].context.device_id) for i, idx in enumerate(indices): diff --git a/python/mxnet/symbol/__init__.py b/python/mxnet/symbol/__init__.py index f438e4954aa9..2ce395bdd279 100644 --- a/python/mxnet/symbol/__init__.py +++ b/python/mxnet/symbol/__init__.py @@ -17,7 +17,7 @@ """Symbol API of MXNet.""" -from . import _internal, contrib, linalg, op, random, sparse, image, symbol +from . import _internal, contrib, linalg, op, random, sparse, image, symbol, numpy # pylint: disable=wildcard-import, redefined-builtin try: from .gen_op import * # pylint: disable=unused-wildcard-import @@ -27,5 +27,8 @@ from .op import * from .symbol import * # pylint: enable=wildcard-import +from . import numpy as np +from . import numpy_extension as npx -__all__ = op.__all__ + symbol.__all__ + ['contrib', 'linalg', 'random', 'sparse', 'image'] +__all__ = op.__all__ + symbol.__all__\ + + ['contrib', 'linalg', 'random', 'sparse', 'image', 'numpy', 'numpy_extension'] diff --git a/python/mxnet/symbol/_internal.py b/python/mxnet/symbol/_internal.py index 7e9787e32b1c..d46c0e64e6f1 100644 --- a/python/mxnet/symbol/_internal.py +++ b/python/mxnet/symbol/_internal.py @@ -24,18 +24,18 @@ try: if int(_os.environ.get("MXNET_ENABLE_CYTHON", True)) == 0: - from .._ctypes.symbol import SymbolBase, _set_symbol_class + from .._ctypes.symbol import SymbolBase, _set_symbol_class, _set_np_symbol_class from .._ctypes.symbol import _symbol_creator elif _sys.version_info >= (3, 0): - from .._cy3.symbol import SymbolBase, _set_symbol_class + from .._cy3.symbol import SymbolBase, _set_symbol_class, _set_np_symbol_class from .._cy3.symbol import _symbol_creator else: - from .._cy2.symbol import SymbolBase, _set_symbol_class + from .._cy2.symbol import SymbolBase, _set_symbol_class, _set_np_symbol_class from .._cy2.symbol import _symbol_creator except ImportError: if int(_os.environ.get("MXNET_ENFORCE_CYTHON", False)) != 0: raise ImportError("Cython Module cannot be loaded but MXNET_ENFORCE_CYTHON=1") - from .._ctypes.symbol import SymbolBase, _set_symbol_class + from .._ctypes.symbol import SymbolBase, _set_symbol_class, _set_np_symbol_class from .._ctypes.symbol import _symbol_creator from ..attribute import AttrScope from ..base import _Null @@ -45,4 +45,4 @@ except ImportError: pass -__all__ = ['SymbolBase', '_set_symbol_class', '_symbol_creator'] +__all__ = ['SymbolBase', '_set_symbol_class', '_symbol_creator', '_set_np_symbol_class'] diff --git a/python/mxnet/symbol/numpy/__init__.py b/python/mxnet/symbol/numpy/__init__.py new file mode 100644 index 000000000000..857849c4ae62 --- /dev/null +++ b/python/mxnet/symbol/numpy/__init__.py @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Module for numpy ops under mxnet.symbol.""" + +from . import random +from . import linalg +from . import _op, _symbol, _internal +from ._symbol import _Symbol +from . import _register +from ._op import * # pylint: disable=wildcard-import +from ._symbol import * # pylint: disable=wildcard-import + +__all__ = _op.__all__ + _symbol.__all__ diff --git a/python/mxnet/symbol/numpy/_internal.py b/python/mxnet/symbol/numpy/_internal.py new file mode 100644 index 000000000000..c5f292842b3b --- /dev/null +++ b/python/mxnet/symbol/numpy/_internal.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for numpy internal ops.""" + +__all__ = [] diff --git a/python/mxnet/symbol/numpy/_op.py b/python/mxnet/symbol/numpy/_op.py new file mode 100644 index 000000000000..a4a979f30b18 --- /dev/null +++ b/python/mxnet/symbol/numpy/_op.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for operators used in Gluon dispatched by F=symbol module.""" + +__all__ = [] diff --git a/python/mxnet/symbol/numpy/_register.py b/python/mxnet/symbol/numpy/_register.py new file mode 100644 index 000000000000..3245c8d6d638 --- /dev/null +++ b/python/mxnet/symbol/numpy/_register.py @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Registering numpy ops.""" + +from ...base import _init_np_op_module +from ..register import _make_symbol_function + +_init_np_op_module(root_module_name='mxnet', np_module_name='numpy', + mx_module_name='symbol', make_op_func=_make_symbol_function) + + +_init_np_op_module(root_module_name='mxnet', np_module_name='numpy._internal', + mx_module_name='symbol', make_op_func=_make_symbol_function) diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py new file mode 100644 index 000000000000..71cd99d42fe9 --- /dev/null +++ b/python/mxnet/symbol/numpy/_symbol.py @@ -0,0 +1,2260 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: disable=too-many-lines +"""numpy namespace for operators used in Gluon APIs dispatched by F=symbol module.""" + +from __future__ import absolute_import +import ctypes +import numpy as _np +from . import _op as _mx_np_op +from ...base import _LIB, SymbolHandle, numeric_types, mx_uint +from ...util import _sanity_check_params, check_call, set_module +from ...context import current_context +from ..symbol import Symbol +from .._internal import _set_np_symbol_class +from . import _internal as _npi + +__all__ = ['zeros', 'ones', 'maximum', 'minimum', 'stack', 'concatenate', 'arange', 'argmax', + 'clip', 'add', 'subtract', 'multiply', 'divide', 'mod', 'power', 'split', 'swapaxes', + 'expand_dims', 'tile', 'linspace', 'sin', 'cos', 'sinh', 'cosh', 'log10', 'sqrt', + 'abs', 'exp', 'arctan', 'sign', 'log', 'degrees', 'log2', 'rint', 'radians', 'mean', + 'reciprocal', 'square', 'arcsin', 'argsort', 'identity'] + + +def _num_outputs(sym): + return len(sym.as_nd_ndarray()) + + +@set_module('mxnet.symbol.numpy') +class _Symbol(Symbol): + def __getitem__(self, key): + num_outputs = _num_outputs(self) + if num_outputs == 1: + raise NotImplementedError + if not isinstance(key, int): + raise NotImplementedError + if key >= num_outputs: + # Important, python determines the end by this exception + raise IndexError + handle = SymbolHandle() + check_call(_LIB.MXSymbolGetOutput( + self.handle, mx_uint(key), ctypes.byref(handle))) + return _Symbol(handle=handle) + + def __setitem__(self, key, value): + raise NotImplementedError + + def __iter__(self): + raise AttributeError('_Symbol object has no attribute __iter__') + + def __add__(self, other): + """x.__add__(y) <=> x + y""" + return add(self, other) + + def __sub__(self, other): + """x.__sub__(y) <=> x - y""" + return subtract(self, other) + + def __rsub__(self, other): + """x.__rsub__(y) <=> y - x""" + return subtract(other, self) + + def __mul__(self, other): + """x.__mul__(y) <=> x * y""" + return multiply(self, other) + + def __rmul__(self, other): + """x.__rmul__(y) <=> y * x""" + return multiply(other, self) + + def __div__(self, other): + raise AttributeError('_Symbol.__div__ is replaced by __truediv__. If you are using' + ' Python2, please use the statement from __future__ import division' + ' to change the / operator to mean true division throughout the' + ' module. If you are using Python3, this error should not have' + ' been encountered.') + + def __rdiv__(self, other): + raise AttributeError('_Symbol.__rdiv__ is replaced by __rtruediv__. If you are using' + ' Python2, please use the statement from __future__ import division' + ' to change the / operator to mean true division throughout the' + ' module. If you are using Python3, this error should not have' + ' been encountered.') + + def __mod__(self, other): + """x.__mod__(y) <=> x % y""" + return mod(self, other) + + def __rmod__(self, other): + """x.__rmod__(y) <=> y % x""" + return mod(other, self) + + def __idiv__(self, other): + raise NotImplementedError + + def __truediv__(self, other): + """x.__truediv__(y) <=> x / y""" + return divide(self, other) + + def __rtruediv__(self, other): + """x.__rtruediv__(y) <=> y / x""" + return divide(other, self) + + def __itruediv__(self, other): + raise NotImplementedError + + def __pow__(self, other): + """x.__pow__(y) <=> x ** y""" + return power(self, other) + + def __rpow__(self, other): + return power(other, self) + + def __neg__(self): + """x.__neg__() <=> - x""" + return self.__mul__(-1.0) + + def __deepcopy__(self, _): + return super(_Symbol, self).as_np_ndarray() + + def __eq__(self, other): + """x.__eq__(y) <=> x == y""" + # TODO(junwu): Return boolean ndarray when dtype=bool_ is supported + if isinstance(other, _Symbol): + return _npi.equal(self, other) + elif isinstance(other, numeric_types): + return _npi.equal_scalar(self, float(other)) + else: + raise TypeError("_Symbol does not support type {} as operand".format(str(type(other)))) + + def __ne__(self, other): + """x.__ne__(y) <=> x != y""" + # TODO(junwu): Return boolean ndarray when dtype=bool_ is supported + if isinstance(other, _Symbol): + return _npi.not_equal(self, other) + elif isinstance(other, numeric_types): + return _npi.not_equal_scalar(self, float(other)) + else: + raise TypeError("_Symbol does not support type {} as operand".format(str(type(other)))) + + def __gt__(self, other): + """x.__gt__(y) <=> x > y""" + # TODO(junwu): Return boolean ndarray when dtype=bool_ is supported + if isinstance(other, _Symbol): + return _npi.greater(self, other) + elif isinstance(other, numeric_types): + return _npi.greater_scalar(self, float(other)) + else: + raise TypeError("_Symbol does not support type {} as operand".format(str(type(other)))) + + def __ge__(self, other): + """x.__ge__(y) <=> x >= y""" + # TODO(junwu): Return boolean ndarray when dtype=bool_ is supported + if isinstance(other, _Symbol): + return _npi.greater_equal(self, other) + elif isinstance(other, numeric_types): + return _npi.greater_equal_scalar(self, float(other)) + else: + raise TypeError("_Symbol does not support type {} as operand".format(str(type(other)))) + + def __lt__(self, other): + """x.__lt__(y) <=> x < y""" + # TODO(junwu): Return boolean ndarray when dtype=bool_ is supported + if isinstance(other, _Symbol): + return _npi.less(self, other) + elif isinstance(other, numeric_types): + return _npi.less_scalar(self, float(other)) + else: + raise TypeError("_Symbol does not support type {} as operand".format(str(type(other)))) + + def __le__(self, other): + """x.__le__(y) <=> x <= y""" + # TODO(junwu): Return boolean ndarray when dtype=bool_ is supported + if isinstance(other, _Symbol): + return _npi.less_equal(self, other) + elif isinstance(other, numeric_types): + return _npi.less_equal_scalar(self, float(other)) + else: + raise TypeError("_Symbol does not support type {} as operand".format(str(type(other)))) + + def __len__(self): + raise NotImplementedError + + def as_nd_ndarray(self): + """Convert _Symbol to mxnet.symbol.Symbol to use its convenience fluent methods.""" + hdl = SymbolHandle() + check_call(_LIB.MXShallowCopySymbol(self.handle, ctypes.byref(hdl))) + return Symbol(handle=hdl) + + def as_np_ndarray(self): + """For the convenience of conversion between legacy and np symbols.""" + return self + + @property + # pylint: disable= invalid-name, undefined-variable + def T(self): + """Same as self.transpose().""" + return self.transpose() + # pylint: enable= invalid-name, undefined-variable + + def astype(self, dtype, **kwargs): # pylint: disable=arguments-differ + raise NotImplementedError + + def dot(self, b, out=None): + return _mx_np_op.dot(self, b, out=out) + + def reshape(self, *args, **kwargs): # pylint: disable=arguments-differ + """Returns an array containing the same data with a new shape. + + Notes + ----- + Unlike the free function `numpy.reshape`, this method on `ndarray` allows + the elements of the shape parameter to be passed in as separate arguments. + For example, ``a.reshape(10, 11)`` is equivalent to + ``a.reshape((10, 11))``. + """ + order = 'C' + if len(kwargs) > 1: + raise TypeError('function takes at most 1 keyword argument') + if len(kwargs) == 1: + if 'order' not in kwargs: + raise TypeError('{} is an invalid keyword argument for this function' + .format(kwargs.keys()[0])) + order = kwargs.pop('order', 'C') + if order != 'C': + raise NotImplementedError('only supports C-order,' + ' while received {}'.format(order)) + if len(args) == 0: + raise TypeError('reshape() takes exactly 1 argument (0 given)') + if len(args) == 1 and isinstance(args[0], tuple): + return _mx_np_op.reshape(self, newshape=args[0], order=order) + else: + return _mx_np_op.reshape(self, newshape=args, order=order) + + def argmax(self, axis=None, out=None): # pylint: disable=arguments-differ + return _mx_np_op.argmax(self, axis, out) + + def reshape_like(self, *args, **kwargs): + """Convenience fluent method for :py:func:`reshape_like`. + + The arguments are the same as for :py:func:`reshape_like`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute reshape_like') + + def zeros_like(self, *args, **kwargs): + """Convenience fluent method for :py:func:`zeros_like`. + + The arguments are the same as for :py:func:`zeros_like`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute zeros_like') + + def ones_like(self, *args, **kwargs): + """Convenience fluent method for :py:func:`ones_like`. + + The arguments are the same as for :py:func:`ones_like`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute ones_like') + + def broadcast_axes(self, *args, **kwargs): + """Convenience fluent method for :py:func:`broadcast_axes`. + + The arguments are the same as for :py:func:`broadcast_axes`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute broadcast_like') + + def repeat(self, repeats, axis=None): # pylint: disable=arguments-differ + """Repeat elements of an array.""" + return _mx_np_op.repeat(self, repeats=repeats, axis=axis) + + def pad(self, *args, **kwargs): + """Convenience fluent method for :py:func:`pad`. + + The arguments are the same as for :py:func:`pad`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute pad') + + def swapaxes(self, axis1, axis2): # pylint: disable=arguments-differ + """Return a copy of the array with axis1 and axis2 interchanged. + Refer to `mxnet.numpy.swapaxes` for full documentation. + """ + return swapaxes(self, axis1, axis2) + + def split(self, *args, **kwargs): + """Convenience fluent method for :py:func:`split`. + + The arguments are the same as for :py:func:`split`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute split') + + def split_v2(self, *args, **kwargs): + """Convenience fluent method for :py:func:`split_v2`. + + The arguments are the same as for :py:func:`split_v2`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute split_v2') + + def slice(self, *args, **kwargs): + """Convenience fluent method for :py:func:`slice`. + + The arguments are the same as for :py:func:`slice`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute slice') + + def slice_axis(self, *args, **kwargs): + """Convenience fluent method for :py:func:`slice_axis`. + + The arguments are the same as for :py:func:`slice_axis`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute slice_axis') + + def slice_like(self, *args, **kwargs): + """Convenience fluent method for :py:func:`slice_like`. + + The arguments are the same as for :py:func:`slice_like`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute slice_like') + + def take(self, *args, **kwargs): + """Convenience fluent method for :py:func:`take`. + + The arguments are the same as for :py:func:`take`, with + this array as data. + """ + raise NotImplementedError + + def one_hot(self, *args, **kwargs): + """Convenience fluent method for :py:func:`one_hot`. + + The arguments are the same as for :py:func:`one_hot`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute one_hot') + + def pick(self, *args, **kwargs): + """Convenience fluent method for :py:func:`pick`. + + The arguments are the same as for :py:func:`pick`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute pick') + + def sort(self, *args, **kwargs): + """Convenience fluent method for :py:func:`sort`. + + The arguments are the same as for :py:func:`sort`, with + this array as data. + """ + raise NotImplementedError + + def topk(self, *args, **kwargs): + """Convenience fluent method for :py:func:`topk`. + + The arguments are the same as for :py:func:`topk`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute topk') + + def argsort(self, axis=-1, kind='quicksort', order=None): # pylint: disable=arguments-differ + """ + Returns the indices that would sort an input array along the given axis. + This function performs sorting along the given axis and returns an array + of indices having same shape as an input array that index data in sorted order. + + Parameters + ---------- + a : _Symbol + Input array + axis : int, optional + The axis along which to sort teh input tensor. + If not given, the last, dimension -1 will be used by default. + If None, the flattened array is used. + kind: {'quicksort'} + Currently not supported. + order: None + Currently not supported. + + Returns + ------- + output : ndarray + Array of indices that sort a along the specified axis. + If a is one-dimensional, a[index_array] yields a sorted a. + More generally, np.take_along_axis(a, index_array, axis=a) always yields the sorted a, + irrespective of dimensionality. + + Examples + -------- + >>> x = np.array([3, 1, 2]) + >>> np.argsort(x) + array([1., 2., 0.]) + >>> x = np.array([[0, 3], [2, 2]]) + >>> x + array([[0., 3.], + [2., 2.]]) + >>> np.argsort(x, axis=0) # sorts along first axis (down) + array([[0., 1.], + [1., 0.]]) + >>> np.argsort(x, axis=1) # sorts along last axis (across) + array([[0., 1.], + [0., 1.]]) + + Notes + ----- + This function differs from the original `numpy.mean + `_ in + the following way(s): + + - kind and order are currently not supported + """ + if kind != 'quicksort': + raise AttributeError('mxnet.numpy.argsort does not support other sorting methods') + if order is not None: + raise AttributeError('mxnet.numpy.argsort does not support sorting with fields ordering') + return _npi.argsort(self, axis) + + def argmax_channel(self, *args, **kwargs): + """Convenience fluent method for :py:func:`argmax_channel`. + + The arguments are the same as for :py:func:`argmax_channel`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute argmax_channel') + + def argmin(self, *args, **kwargs): + """Convenience fluent method for :py:func:`argmin`. + + The arguments are the same as for :py:func:`argmin`, with + this array as data. + """ + raise NotImplementedError + + def clip(self, min=None, max=None, out=None): # pylint: disable=arguments-differ + """Return an array whose values are limited to [min, max]. + One of max or min must be given. + """ + return clip(self, min, max, out=out) + + def abs(self, *args, **kwargs): + """Convenience fluent method for :py:func:`abs`. + + The arguments are the same as for :py:func:`abs`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute abs') + + def sign(self, *args, **kwargs): + """Convenience fluent method for :py:func:`sign`. + + The arguments are the same as for :py:func:`sign`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute abs') + + def flatten(self, order='C'): # pylint: disable=arguments-differ + """Return a copy of the array collapsed into one dimension.""" + return self.reshape(-1, order=order) + + def shape_array(self, *args, **kwargs): + """Convenience fluent method for :py:func:`shape_array`. + + The arguments are the same as for :py:func:`shape_array`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute shape_array') + + def size_array(self, *args, **kwargs): + """Convenience fluent method for :py:func:`size_array`. + + The arguments are the same as for :py:func:`size_array`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute size_array') + + def expand_dims(self, *args, **kwargs): + """Convenience fluent method for :py:func:`expand_dims`. + + The arguments are the same as for :py:func:`expand_dims`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute expand_dims') + + def tile(self, *args, **kwargs): + """Convenience fluent method for :py:func:`tile`. + + The arguments are the same as for :py:func:`tile`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute tile') + + def transpose(self, *axes): # pylint: disable=arguments-differ + """Convenience fluent method for :py:func:`transpose`. + + The arguments are the same as for :py:func:`transpose`, with + this array as data. + """ + return _mx_np_op.transpose(self, axes=axes if len(axes) != 0 else None) + + def flip(self, *args, **kwargs): + """Convenience fluent method for :py:func:`flip`. + + The arguments are the same as for :py:func:`flip`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute flip') + + def depth_to_space(self, *args, **kwargs): + """Convenience fluent method for :py:func:`depth_to_space`. + + The arguments are the same as for :py:func:`depth_to_space`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute depth_to_space') + + def space_to_depth(self, *args, **kwargs): + """Convenience fluent method for :py:func:`space_to_depth`. + + The arguments are the same as for :py:func:`space_to_depth`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute space_to_depth') + + def diag(self, k=0, **kwargs): + """Convenience fluent method for :py:func:`diag`. + + The arguments are the same as for :py:func:`diag`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute diag') + + def sum(self, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ + """Convenience fluent method for :py:func:`sum`. + + The arguments are the same as for :py:func:`sum`, with + this array as data. + """ + return _mx_np_op.sum(self, axis=axis, dtype=dtype, out=out, keepdims=keepdims) + + def nansum(self, *args, **kwargs): + """Convenience fluent method for :py:func:`nansum`. + + The arguments are the same as for :py:func:`nansum`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute nansum') + + def prod(self, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ + """Return the product of the array elements over the given axis.""" + return _mx_np_op.prod(self, axis=axis, dtype=dtype, keepdims=keepdims, out=out) + + def nanprod(self, *args, **kwargs): + """Convenience fluent method for :py:func:`nanprod`. + + The arguments are the same as for :py:func:`nanprod`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute nanprod') + + def mean(self, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ + """Convenience fluent method for :py:func:`mean`. + + The arguments are the same as for :py:func:`mean`, with + this array as data. + """ + return _npi.mean(self, axis=axis, dtype=dtype, keepdims=keepdims, out=out) + + def cumsum(self, axis=None, dtype=None, out=None): + """Return the cumulative sum of the elements along the given axis.""" + return _mx_np_op.cumsum(self, axis=axis, dtype=dtype, out=out) + + def max(self, axis=None, out=None, keepdims=False): # pylint: disable=arguments-differ + """Return the maximum along a given axis.""" + return _mx_np_op.max(self, axis=axis, keepdims=keepdims, out=out) + + def min(self, *args, **kwargs): + """Convenience fluent method for :py:func:`min`. + + The arguments are the same as for :py:func:`min`, with + this array as data. + """ + raise NotImplementedError + + def norm(self, *args, **kwargs): + """Convenience fluent method for :py:func:`norm`. + + The arguments are the same as for :py:func:`norm`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute norm') + + def round(self, *args, **kwargs): + """Convenience fluent method for :py:func:`round`. + + The arguments are the same as for :py:func:`round`, with + this array as data. + """ + raise NotImplementedError + + def rint(self, *args, **kwargs): + """Convenience fluent method for :py:func:`rint`. + + The arguments are the same as for :py:func:`rint`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute rint') + + def fix(self, *args, **kwargs): + """Convenience fluent method for :py:func:`fix`. + + The arguments are the same as for :py:func:`fix`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute fix') + + def floor(self, *args, **kwargs): + """Convenience fluent method for :py:func:`floor`. + + The arguments are the same as for :py:func:`floor`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute floor') + + def ceil(self, *args, **kwargs): + """Convenience fluent method for :py:func:`ceil`. + + The arguments are the same as for :py:func:`ceil`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute ceil') + + def trunc(self, *args, **kwargs): + """Convenience fluent method for :py:func:`trunc`. + + The arguments are the same as for :py:func:`trunc`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute trunc') + + def sin(self, *args, **kwargs): + """Convenience fluent method for :py:func:`sin`. + + The arguments are the same as for :py:func:`sin`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute sin') + + def cos(self, *args, **kwargs): + """Convenience fluent method for :py:func:`cos`. + + The arguments are the same as for :py:func:`cos`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute cos') + + def tan(self, *args, **kwargs): + """Convenience fluent method for :py:func:`tan`. + + The arguments are the same as for :py:func:`tan`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute tan') + + def arcsin(self, *args, **kwargs): + """Convenience fluent method for :py:func:`arcsin`. + + The arguments are the same as for :py:func:`arcsin`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute arcsin') + + def arccos(self, *args, **kwargs): + """Convenience fluent method for :py:func:`arccos`. + + The arguments are the same as for :py:func:`arccos`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute arccos') + + def arctan(self, *args, **kwargs): + """Convenience fluent method for :py:func:`arctan`. + + The arguments are the same as for :py:func:`arctan`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute arctan') + + def degrees(self, *args, **kwargs): + """Convenience fluent method for :py:func:`degrees`. + + The arguments are the same as for :py:func:`degrees`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute degrees') + + def radians(self, *args, **kwargs): + """Convenience fluent method for :py:func:`radians`. + + The arguments are the same as for :py:func:`radians`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute radians') + + def sinh(self, *args, **kwargs): + """Convenience fluent method for :py:func:`sinh`. + + The arguments are the same as for :py:func:`sinh`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute sinh') + + def cosh(self, *args, **kwargs): + """Convenience fluent method for :py:func:`cosh`. + + The arguments are the same as for :py:func:`cosh`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute cosh') + + def tanh(self, *args, **kwargs): + """Convenience fluent method for :py:func:`tanh`. + + The arguments are the same as for :py:func:`tanh`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute tanh') + + def arcsinh(self, *args, **kwargs): + """Convenience fluent method for :py:func:`arcsinh`. + + The arguments are the same as for :py:func:`arcsinh`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute arcsinh') + + def arccosh(self, *args, **kwargs): + """Convenience fluent method for :py:func:`arccosh`. + + The arguments are the same as for :py:func:`arccosh`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute arccosh') + + def arctanh(self, *args, **kwargs): + """Convenience fluent method for :py:func:`arctanh`. + + The arguments are the same as for :py:func:`arctanh`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute arctanh') + + def exp(self, *args, **kwargs): + """Convenience fluent method for :py:func:`exp`. + + The arguments are the same as for :py:func:`exp`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute exp') + + def expm1(self, *args, **kwargs): + """Convenience fluent method for :py:func:`expm1`. + + The arguments are the same as for :py:func:`expm1`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute expm1') + + def log(self, *args, **kwargs): + """Convenience fluent method for :py:func:`log`. + + The arguments are the same as for :py:func:`log`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute log') + + def log10(self, *args, **kwargs): + """Convenience fluent method for :py:func:`log10`. + + The arguments are the same as for :py:func:`log10`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute log10') + + def log2(self, *args, **kwargs): + """Convenience fluent method for :py:func:`log2`. + + The arguments are the same as for :py:func:`log2`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute log2') + + def log1p(self, *args, **kwargs): + """Convenience fluent method for :py:func:`log1p`. + + The arguments are the same as for :py:func:`log1p`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute log1p') + + def sqrt(self, *args, **kwargs): + """Convenience fluent method for :py:func:`sqrt`. + + The arguments are the same as for :py:func:`sqrt`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute sqrt') + + def rsqrt(self, *args, **kwargs): + """Convenience fluent method for :py:func:`rsqrt`. + + The arguments are the same as for :py:func:`rsqrt`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute rsqrt') + + def cbrt(self, *args, **kwargs): + """Convenience fluent method for :py:func:`cbrt`. + + The arguments are the same as for :py:func:`cbrt`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute cqrt') + + def rcbrt(self, *args, **kwargs): + """Convenience fluent method for :py:func:`rcbrt`. + + The arguments are the same as for :py:func:`rcbrt`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute rcqrt') + + def square(self, *args, **kwargs): + """Convenience fluent method for :py:func:`square`. + + The arguments are the same as for :py:func:`square`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute square') + + def reciprocal(self, *args, **kwargs): + """Convenience fluent method for :py:func:`reciprocal`. + + The arguments are the same as for :py:func:`reciprocal`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute reciprocal') + + def relu(self, *args, **kwargs): + """Convenience fluent method for :py:func:`relu`. + + The arguments are the same as for :py:func:`relu`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute relu') + + def sigmoid(self, *args, **kwargs): + """Convenience fluent method for :py:func:`sigmoid`. + + The arguments are the same as for :py:func:`sigmoid`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute sigmoid') + + def softmax(self, *args, **kwargs): + """Convenience fluent method for :py:func:`softmax`. + + The arguments are the same as for :py:func:`softmax`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute softmax') + + def log_softmax(self, *args, **kwargs): + """Convenience fluent method for :py:func:`log_softmax`. + + The arguments are the same as for :py:func:`log_softmax`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute log_softmax') + + def softmin(self, *args, **kwargs): + """Convenience fluent method for :py:func:`softmin`. + + The arguments are the same as for :py:func:`softmin`, with + this array as data. + """ + raise AttributeError('_Symbol object has no attribute softmin') + + def squeeze(self, axis=None): # pylint: disable=arguments-differ + """Remove single-dimensional entries from the shape of a. + """ + return _mx_np_op.squeeze(self, axis=axis) + + def broadcast_to(self, *args, **kwargs): + raise AttributeError('_Symbol object has no attribute broadcast_to') + + def broadcast_like(self, *args, **kwargs): + raise AttributeError('_Symbol object has no attribute broadcast_like') + + +@set_module('mxnet.symbol.numpy') +def zeros(shape, dtype=_np.float32, **kwargs): + """Return a new array of given shape and type, filled with zeros. + This function currently only supports storing multi-dimensional data + in row-major (C-style). + + Parameters + ---------- + shape : int or tuple of int + The shape of the empty array. + dtype : str or numpy.dtype, optional + An optional value type. Default is `numpy.float32`. Note that this + behavior is different from NumPy's `zeros` function where `float64` + is the default value, because `float32` is considered as the default + data type in deep learning. + ctx : Context, optional + An optional device context (default is the current default context). + + Returns + ------- + out : Symbol + Array of zeros with the given shape, dtype, and ctx. + """ + _sanity_check_params('zeros', ['order'], kwargs) + ctx = kwargs.get('ctx', current_context()) + if ctx is None: + ctx = current_context() + dtype = _np.float32 if dtype is None else dtype + return _npi.zeros(shape=shape, ctx=ctx, dtype=dtype, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def ones(shape, dtype=None, **kwargs): + """Return a new array of given shape and type, filled with zeros. + This function currently only supports storing multi-dimensional data + in row-major (C-style). + + Parameters + ---------- + shape : int or tuple of int + The shape of the empty array. + dtype : str or numpy.dtype, optional + An optional value type. Default is `numpy.float32`. Note that this + behavior is different from NumPy's `ones` function where `float64` + is the default value, because `float32` is considered as the default + data type in deep learning. + ctx : Context, optional + An optional device context (default is the current default context). + + Returns + ------- + out : ndarray + Array of zeros with the given shape, dtype, and ctx. + """ + _sanity_check_params('ones', ['order'], kwargs) + ctx = kwargs.get('ctx', current_context()) + if ctx is None: + ctx = current_context() + dtype = _np.float32 if dtype is None else dtype + return _npi.ones(shape=shape, ctx=ctx, dtype=dtype, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def identity(n, dtype=None, **kwargs): + """ + Return the identity array. + + The identity array is a square array with ones on + the main diagonal. + + Parameters + ---------- + n : int + Number of rows (and columns) in `n` x `n` output. + dtype : data-type, optional + Data-type of the output. Defaults to ``numpy.float32``. + ctx : Context, optional + An optional device context (default is the current default context). + + Returns + ------- + out : _Symbol + `n` x `n` array with its main diagonal set to one, + and all other elements 0. + """ + if not isinstance(n, int): + raise TypeError("Input 'n' should be an integer") + if n < 0: + raise ValueError("Input 'n' cannot be negative") + ctx = kwargs.pop('ctx', current_context()) + if ctx is None: + ctx = current_context() + dtype = _np.float32 if dtype is None else dtype + return _npi.identity(shape=(n, n), ctx=ctx, dtype=dtype, **kwargs) + + +#pylint: disable= too-many-arguments, no-member, protected-access +def _ufunc_helper(lhs, rhs, fn_array, fn_scalar, lfn_scalar, rfn_scalar=None, out=None): + """ Helper function for element-wise operation. + The function will perform numpy-like broadcasting if needed and call different functions. + + Parameters + -------- + lhs : Symbol or numeric value + Left-hand side operand. + + rhs : Symbol or numeric value + Right-hand operand, + + fn_array : function + Function to be called if both lhs and rhs are of ``Symbol`` type. + + fn_scalar : function + Function to be called if both lhs and rhs are numeric values. + + lfn_scalar : function + Function to be called if lhs is ``Symbol`` while rhs is numeric value + + rfn_scalar : function + Function to be called if lhs is numeric value while rhs is ``Symbol``; + if none is provided, then the function is commutative, so rfn_scalar is equal to lfn_scalar + + Returns + -------- + mxnet.numpy.ndarray + result array + """ + if isinstance(lhs, numeric_types): + if isinstance(rhs, numeric_types): + return fn_scalar(lhs, rhs, out=out) + else: + if rfn_scalar is None: + # commutative function + return lfn_scalar(rhs, float(lhs), out=out) + else: + return rfn_scalar(rhs, float(lhs), out=out) + elif isinstance(rhs, numeric_types): + return lfn_scalar(lhs, float(rhs), out=out) + elif isinstance(rhs, Symbol): + return fn_array(lhs, rhs, out=out) + else: + raise TypeError('type %s not supported' % str(type(rhs))) +#pylint: enable= too-many-arguments, no-member, protected-access + + +@set_module('mxnet.symbol.numpy') +def maximum(x1, x2, out=None): + return _ufunc_helper(x1, x2, _npi.maximum, _np.maximum, _npi.maximum_scalar, None, out) + + +@set_module('mxnet.symbol.numpy') +def minimum(x1, x2, out=None): + return _ufunc_helper(x1, x2, _npi.minimum, _np.minimum, _npi.minimum_scalar, None, out) + + +@set_module('mxnet.symbol.numpy') +def add(x1, x2, out=None): + return _ufunc_helper(x1, x2, _npi.add, _np.add, _npi.add_scalar, None, out) + + +@set_module('mxnet.symbol.numpy') +def subtract(x1, x2, out=None): + return _ufunc_helper(x1, x2, _npi.subtract, _np.subtract, _npi.subtract_scalar, + _npi.rsubtract_scalar, out) + + +@set_module('mxnet.symbol.numpy') +def multiply(x1, x2, out=None): + return _ufunc_helper(x1, x2, _npi.multiply, _np.multiply, _npi.multiply_scalar, None, out) + + +@set_module('mxnet.symbol.numpy') +def divide(x1, x2, out=None): + return _ufunc_helper(x1, x2, _npi.true_divide, _np.divide, _npi.true_divide_scalar, + _npi.rtrue_divide_scalar, out) + + +@set_module('mxnet.symbol.numpy') +def mod(x1, x2, out=None): + return _ufunc_helper(x1, x2, _npi.mod, _np.mod, _npi.mod_scalar, _npi.rmod_scalar, out) + + +@set_module('mxnet.symbol.numpy') +def power(x1, x2, out=None): + return _ufunc_helper(x1, x2, _npi.power, _np.power, _npi.power_scalar, _npi.rpower_scalar, out) + + +@set_module('mxnet.symbol.numpy') +def mean(a, axis=None, dtype=None, out=None, keepdims=False): # pylint: disable=arguments-differ + """ + mean(a, axis=None, dtype=None, out=None, keepdims=None) + + Compute the arithmetic mean along the specified axis. + Returns the average of the array elements. + The average is taken over the flattened array by default, otherwise over the specified axis. + + Parameters + ---------- + a : `_Symbol` + _Symbol containing numbers whose mean is desired. + axis : None or int or tuple of ints, optional + Axis or axes along which the means are computed. The default is to compute the mean of the flattened array. + If this is a tuple of ints, a mean is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default is float32; + for floating point inputs, it is the same as the input dtype. + out : _Symbol, optional + Dummy parameter to keep the consistency with the ndarray counterpart. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in the result + as dimensions with size one. With this option, the result will broadcast correctly + against the input array. + If the default value is passed, then keepdims will not be passed through to the mean + method of sub-classes of _Symbol, however any non-default value will be. If the sub-class + method does not implement keepdims any exceptions will be raised. + + Returns + ------- + m : _Symbol, see dtype parameter above + If out=None, returns a new array containing the mean values, + otherwise a reference to the output array is returned. + + Notes + ----- + This function differs from the original `numpy.mean + `_ in + the following way(s): + + - only _Symbol is accepted as valid input, python iterables or scalar is not supported + - default data type for integer input is float32 + + Examples + -------- + >>> a = np.array([[1, 2], [3, 4]]) + >>> np.mean(a) + array(2.5) + >>> a = np.zeros((2, 512*512), dtype=np.float32) + >>> a[0,:] = 1.0 + >>> a[1,:] = 0.1 + >>> np.mean(a) + array(0.55) + >>> np.mean(a, dtype=np.float64) + array(0.55) + """ + return _npi.mean(a, axis=axis, dtype=dtype, keepdims=keepdims, out=out) + + +@set_module('mxnet.symbol.numpy') +def stack(arrays, axis=0, out=None): + """ + Join a sequence of arrays along a new axis. + + The axis parameter specifies the index of the new axis in the dimensions of the result. + For example, if `axis=0` it will be the first dimension and if `axis=-1` it will be the last dimension. + + Parameters + ---------- + arrays : sequence of _Symbols + Each array must have the same shape. + axis : int, optional + The axis in the result array along which the input arrays are stacked. + out : _Symbol, optional + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + out : _Symbol + The stacked array has one more dimension than the input arrays. + + Notes + ----- + This function differs from the original `numpy.stack + `_ in + the following ways: + + - only sequence of _Symbol is accepted as valid input + + Examples + -------- + >>> arrays = [np.random.uniform(size=(3, 4)) for _ in range(10)] + >>> np.stack(arrays, axis=0).shape + (10, 3, 4) + >>> np.stack(arrays, axis=1).shape + (3, 10, 4) + >>> np.stack(arrays, axis=2).shape + (3, 4, 10) + >>> a = np.array([1, 2, 3]) + >>> b = np.array([2, 3, 4]) + >>> np.stack((a, b)) + array([[1., 2., 3.], + [2., 3., 4.]]) + >>> np.stack((a, b), axis=-1) + array([[1., 2.], + [2., 3.], + [3., 4.]]) + """ + def get_list(arrays): + if not hasattr(arrays, '__getitem__') and hasattr(arrays, '__iter__'): + raise ValueError("expected iterable for arrays but got {}".format(type(arrays))) + return [arr for arr in arrays] + + arrays = get_list(arrays) + return _npi.stack(*arrays, axis=axis, out=out) + + +@set_module('mxnet.symbol.numpy') +def concatenate(seq, axis=0, out=None): + """Join a sequence of arrays along an existing axis. + + Parameters + ---------- + a1, a2, ... : sequence of array_like + The arrays must have the same shape, except in the dimension + corresponding to `axis` (the first, by default). + axis : int, optional + The axis along which the arrays will be joined. If axis is None, + arrays are flattened before use. Default is 0. + out : ndarray, optional + If provided, the destination to place the result. The shape must be + correct, matching that of what concatenate would have returned if no + out argument were specified. + + Returns + ------- + res : ndarray + The concatenated array. + """ + return _npi.concatenate(*seq, dim=axis, out=out) + + +@set_module('mxnet.symbol.numpy') +def arange(start, stop=None, step=1, dtype=None, ctx=None): + """Return evenly spaced values within a given interval. + + Values are generated within the half-open interval ``[start, stop)`` + (in other words, the interval including `start` but excluding `stop`). + For integer arguments the function is equivalent to the Python built-in + `range` function, but returns an ndarray rather than a list. + + Parameters + ---------- + start : number, optional + Start of interval. The interval includes this value. The default + start value is 0. + stop : number + End of interval. The interval does not include this value, except + in some cases where `step` is not an integer and floating point + round-off affects the length of `out`. + step : number, optional + Spacing between values. For any output `out`, this is the distance + between two adjacent values, ``out[i+1] - out[i]``. The default + step size is 1. If `step` is specified as a position argument, + `start` must also be given. + dtype : dtype + The type of the output array. The default is `float32`. + + Returns + ------- + arange : ndarray + Array of evenly spaced values. + + For floating point arguments, the length of the result is + ``ceil((stop - start)/step)``. Because of floating point overflow, + this rule may result in the last element of `out` being greater + than `stop`. + """ + if dtype is None: + dtype = 'float32' + if ctx is None: + ctx = current_context() + if stop is None: + stop = start + start = 0 + if step is None: + step = 1 + if start is None and stop is None: + raise ValueError('start and stop cannot be both None') + if step == 0: + raise ZeroDivisionError('step cannot be 0') + return _npi.arange(start=start, stop=stop, step=step, dtype=dtype, ctx=ctx) + + +@set_module('mxnet.symbol.numpy') +def argmax(a, axis=None, out=None): + r""" + argmax(a, axis=None, out=None) + + Returns the indices of the maximum values along an axis. + + Parameters + ---------- + a : _Symbol + Input array. Only support dtype `float16`, `float32`, and `float64`. + axis : int, optional + By default, the index is into the flattened array, otherwise + along the specified axis. + out : _Symbol or None, optional + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + index_array : _Symbol of indices whose dtype is same as the input ndarray. + Array of indices into the array. It has the same shape as `a.shape` + with the dimension along `axis` removed. + + Notes + ----- + In case of multiple occurrences of the maximum values, the indices + corresponding to the first occurrence are returned. + + This function differs from the original `numpy.argmax + `_ in + the following aspects: + + - Input type does not support Python native iterables(list, tuple, ...). + - Output has dtype that is same as the input ndarray. + - ``out`` param: cannot perform auto broadcasting. ``out`` symbol's shape must be the same as the expected output. + - ``out`` param: cannot perform auto type cast. ``out`` symnbol's dtype must be the same as the expected output. + - ``out`` param does not support scalar input case. + + """ + return _npi.argmax(a, axis=axis, keepdims=False, out=out) + + +@set_module('mxnet.symbol.numpy') +def argsort(a, axis=-1, kind='quicksort', order=None): + """ + Returns the indices that would sort an input array along the given axis. + This function performs sorting along the given axis and returns an array + of indices having same shape as an input array that index data in sorted order. + Parameters + ---------- + a : _Symbol + Input array + axis : int, optional + The axis along which to sort teh input tensor. + If not given, the last, dimension -1 will be used by default. + If None, the flattened array is used. + kind: {'quicksort'} + Currently not supported. + order: None + Currently not supported. + Returns + ------- + output : _Symbol + Array of indices that sort a along the specified axis. + If a is one-dimensional, a[index_array] yields a sorted a. + More generally, np.take_along_axis(a, index_array, axis=a) always yields the sorted a, + irrespective of dimensionality. + Examples + -------- + >>> x = np.array([3, 1, 2]) + >>> np.argsort(x) + array([1., 2., 0.]) + >>> x = np.array([[0, 3], [2, 2]]) + >>> x + array([[0., 3.], + [2., 2.]]) + >>> np.argsort(x, axis=0) # sorts along first axis (down) + array([[0., 1.], + [1., 0.]]) + >>> np.argsort(x, axis=1) # sorts along last axis (across) + array([[0., 1.], + [0., 1.]]) + Notes + ----- + This function differs from the original `numpy.argsort + `_ in + the following way(s): + - kind and order is currently not supported + """ + if kind != 'quicksort': + raise AttributeError('mxnet.numpy.argsort does not support other sorting methods') + if order is not None: + raise AttributeError('mxnet.numpy.argsort does not support sorting with fields ordering') + return _npi.argsort(a, axis) + + +@set_module('mxnet.symbol.numpy') +def clip(a, a_min, a_max, out=None): + """clip(a, a_min, a_max, out=None) + + Clip (limit) the values in an array. + Given an interval, values outside the interval are clipped to + the interval edges. For example, if an interval of ``[0, 1]`` + is specified, values smaller than 0 become 0, and values larger + than 1 become 1. + + Parameters + ---------- + a : _Symbol + Array containing elements to clip. + a_min : scalar or `None` + Minimum value. If `None`, clipping is not performed on lower + interval edge. Not more than one of `a_min` and `a_max` may be + `None`. + a_max : scalar or `None` + Maximum value. If `None`, clipping is not performed on upper + interval edge. Not more than one of `a_min` and `a_max` may be + `None`. + out : _Symbol or `None` + The results will be placed in this array. It may be the input + array for in-place clipping. `out` must be of the right shape + to hold the output. Its type is preserved. + + Returns + ------- + clipped_array : _Symbol + An array with the elements of `a`, but where values + < `a_min` are replaced with `a_min`, and those > `a_max` + with `a_max`. + + Notes + ----- + array_like `a_min` and `a_max` are not supported. + """ + if a_min is None and a_max is None: + raise ValueError('array_clip: must set either max or min') + if a_min is None: + a_min = float('-inf') + if a_max is None: + a_max = float('inf') + return _npi.clip(a, a_min, a_max, out=out) + + +@set_module('mxnet.symbol.numpy') +def swapaxes(a, axis1, axis2): + """Interchange two axes of an array. + + Parameters + ---------- + a : _Symbol + Input array. + axis1 : int + First axis. + axis2 : int + Second axis. + + Returns + ------- + a_swapped : _Symbol + Swapped array symbol. + """ + return _npi.swapaxes(a, dim1=axis1, dim2=axis2) + + +@set_module('mxnet.symbol.numpy') +def expand_dims(a, axis): + """Expand the shape of an array. + + Insert a new axis that will appear at the `axis` position in the expanded + + Parameters + ---------- + a : _Symbol + Input array. + axis : int + Position in the expanded axes where the new axis is placed. + + Returns + ------- + res : _Symbol + Output array. The number of dimensions is one greater than that of + the input array. + """ + return _npi.expand_dims(a, axis) + + +# pylint: disable=line-too-long +@set_module('mxnet.symbol.numpy') +def split(ary, indices_or_sections, axis=0): + """Split an array into multiple sub-arrays. + + Parameters + ---------- + ary : _Symbol + Array to be divided into sub-arrays. + indices_or_sections : int or 1-D array + If `indices_or_sections` is an integer, N, the array will be divided + into N equal arrays along `axis`. If such a split is not possible, + an error is raised. + + If `indices_or_sections` is a 1-D array of sorted integers, the entries + indicate where along `axis` the array is split. For example, + ``[2, 3]`` would, for ``axis=0``, result in + + - ary[:2] + - ary[2:3] + - ary[3:] + + Index `must be within` the dimension of the array along `axis`. + axis : int, optional + The axis along which to split, default is 0. + + Returns + ------- + sub-arrays : list of _Symbols + A list of sub-arrays. + + Raises + ------ + ValueError + If `indices_or_sections` is given as an integer, but + a split does not result in equal division. + + Notes + ----- + This function differs from the original `numpy.split + `_ in + the following ways: + + - Index exceeding the dimension the dimension of the array is currently not supported. + + Examples + -------- + >>> x = np.arange(9.0) + >>> np.split(x, 3) + [array([0., 1., 2.]), array([3., 4., 5.]), array([6., 7., 8.])] + >>> np.split(x, (3, 5, 6)) + [array([0., 1., 2.]), array([3., 4.]), array([5.]), array([6., 7.])] + """ + indices = [] + sections = 0 + if isinstance(indices_or_sections, int): + sections = indices_or_sections + elif isinstance(indices_or_sections, tuple): + indices = [0] + list(indices_or_sections) + else: + raise ValueError('indices_or_sections must either int or tuple of ints') + ret = _npi.split(ary, indices, axis, False, sections) + return ret +# pylint: enable=line-too-long + + +@set_module('mxnet.symbol.numpy') +def tile(A, reps): + r""" + Construct an array by repeating A the number of times given by reps. + + If `reps` has length ``d``, the result will have dimension of + ``max(d, A.ndim)``. + + If ``A.ndim < d``, `A` is promoted to be d-dimensional by prepending new + axes. So a shape (3,) array is promoted to (1, 3) for 2-D replication, + or shape (1, 1, 3) for 3-D replication. If this is not the desired + behavior, promote `A` to d-dimensions manually before calling this + function. + + If ``A.ndim > d``, `reps` is promoted to `A`.ndim by pre-pending 1's to it. + Thus for an `A` of shape (2, 3, 4, 5), a `reps` of (2, 2) is treated as + (1, 1, 2, 2). + + Parameters + ---------- + A : _Symbol or scalar + An input array or a scalar to repeat. + reps : a single integer or tuple of integers + The number of repetitions of `x` along each axis. + + Returns + ------- + c : _Symbol + The tiled output array. + """ + return _unary_func_helper(A, _npi.tile, _np.tile, reps=reps) + + +@set_module('mxnet.symbol.numpy') +def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0, ctx=None): # pylint: disable=too-many-arguments + r""" + Return evenly spaced numbers over a specified interval. + + Returns num evenly spaced samples, calculated over the interval [start, stop]. + The endpoint of the interval can optionally be excluded. + + Parameters + ---------- + start : real number + The starting value of the sequence. + stop : real number + The end value of the sequence, unless endpoint is set to False. In + that case, the sequence consists of all but the last of num + 1 + evenly spaced samples, so that stop is excluded. Note that the step + size changes when endpoint is False. + num : int, optional + Number of samples to generate. Default is 50. Must be non-negative. + endpoint : bool, optional + If True, stop is the last sample. Otherwise, it is not included. + Default is True. + retstep : bool, optional + If True, return (samples, step), where step is the spacing between samples. + dtype : dtype, optional + The type of the output array. If dtype is not given, infer the data + type from the other input arguments. + axis : int, optional + The axis in the result to store the samples. Relevant only if start or + stop are array-like. By default (0), the samples will be along a new + axis inserted at the beginning. Use -1 to get an axis at the end. + + Returns + ------- + samples : _Symbol + There are num equally spaced samples in the closed interval + `[start, stop]` or the half-open interval `[start, stop)` + (depending on whether endpoint is True or False). + step : float, optional + Only returned if retstep is True + Size of spacing between samples. + + + See Also + -------- + arange : Similar to `linspace`, but uses a step size (instead of the + number of samples). + + Notes + ----- + + This function differs from the original `numpy.linspace + `_ in + the following aspects: + + - `start` and `stop` do not support list, numpy ndarray and mxnet ndarray + - axis could only be 0 + - There could be an additional `ctx` argument to specify the device, e.g. the i-th + GPU. + """ + if isinstance(start, (list, _np.ndarray)) or \ + isinstance(stop, (list, _np.ndarray)): + raise NotImplementedError('start and stop only support int') + if axis != 0: + raise NotImplementedError("the function only support axis 0") + if ctx is None: + ctx = current_context() + if retstep: + step = (stop - start) / (num - 1) + return (_npi.linspace(start=start, stop=stop, num=num, endpoint=endpoint, ctx=ctx, dtype=dtype), step) + else: + return _npi.linspace(start=start, stop=stop, num=num, endpoint=endpoint, ctx=ctx, dtype=dtype) + + +def _unary_func_helper(x, fn_array, fn_scalar, out=None, **kwargs): + """Helper function for unary operators. + + Parameters + ---------- + x : _Symbol or scalar + Input of the unary operator. + fn_array : function + Function to be called if x is of ``_Symbol`` type. + fn_scalar : function + Function to be called if x is a Python scalar. + out : _Symbol + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + out : _Symbol or scalar + Result _Symbol or scalar. + """ + if isinstance(x, numeric_types): + return fn_scalar(x, **kwargs) + elif isinstance(x, _Symbol): + return fn_array(x, out=out, **kwargs) + else: + raise TypeError('type {} not supported'.format(str(type(x)))) + + +@set_module('mxnet.symbol.numpy') +def sin(x, out=None, **kwargs): + r"""Trigonometric sine, element-wise. + + Parameters + ---------- + x : _Symbol or scalar + Angle, in radians (:math:`2 \pi` rad equals 360 degrees). + out : _Symbol or None + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + y : _Symbol + The sine of each element of x. + This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _unary_func_helper(x, _npi.sin, _np.sin, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def cos(x, out=None, **kwargs): + r"""Cosine, element-wise. + + Parameters + ---------- + x : _Symbol or scalar + Angle, in radians (:math:`2 \pi` rad equals 360 degrees). + out : _Symbol or None + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + y : _Symbol + The corresponding cosine values. This is a scalar if x is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _unary_func_helper(x, _npi.cos, _np.cos, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def sinh(x, out=None, **kwargs): + """Hyperbolic sine, element-wise. + + Equivalent to ``1/2 * (np.exp(x) - np.exp(-x))`` or ``-1j * np.sin(1j*x)``. + + Parameters + ---------- + x : _Symbol or scalar + Input array or scalar. + out : _Symbol or None + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + y : _Symbol or scalar + The corresponding hyperbolic sine values. This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _unary_func_helper(x, _npi.sinh, _np.sinh, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def cosh(x, out=None, **kwargs): + """Hyperbolic cosine, element-wise. + + Equivalent to ``1/2 * (np.exp(x) + np.exp(-x))`` and ``np.cos(1j*x)``. + + + Parameters + ---------- + x : _Symbol or scalar + Input array or scalar. + out : ndarray or None + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + y : _Symbol or scalar + The corresponding hyperbolic cosine values. This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _unary_func_helper(x, _npi.cosh, _np.cosh, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def log10(x, out=None, **kwargs): + """Return the base 10 logarithm of the input array, element-wise. + + Parameters + ---------- + x : _Symbol or scalar + Input array or scalar. + out : _Symbol or None + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + y : _Symbol or scalar + The logarithm to the base 10 of `x`, element-wise. NaNs are + returned where x is negative. This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _unary_func_helper(x, _npi.log10, _np.log10, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def sqrt(x, out=None, **kwargs): + """ + Return the non-negative square-root of an array, element-wise. + + Parameters + ---------- + x : _Symbol or scalar + The values whose square-roots are required. + out : _Symbol, or None, optional + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + y : _Symbol or scalar + An array of the same shape as `x`, containing the positive + square-root of each element in `x`. This is a scalar if `x` is a scalar. + + Notes + ---- + This function only supports input type of float. + """ + return _unary_func_helper(x, _npi.sqrt, _np.sqrt, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def abs(x, out=None, **kwargs): + r"""abs(x, out=None, **kwargs) + + Calculate the absolute value element-wise. + + Parameters + ---------- + x : _Symbol or scalar + Input array. + out : _Symbol or None + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + absolute : _Symbol + An ndarray containing the absolute value of + each element in `x`. This is a scalar if `x` is a scalar. + """ + return _unary_func_helper(x, _npi.abs, _np.abs, out=out, **kwargs) + +@set_module('mxnet.symbol.numpy') +def sign(x, out=None, **kwargs): + r""" + sign(x, out=None) + + Returns an element-wise indication of the sign of a number. + + The `sign` function returns ``-1 if x < 0, 0 if x==0, 1 if x > 0``. Only supports real number. + + Parameters + ---------- + x : _Symbol or a scalar + Input values. + out : _Symbol or None, optional + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + y : _Symbol + The sign of `x`. + This is a scalar if `x` is a scalar. + + Note + ------- + - Only supports real number as input elements. + - Input type does not support Python native iterables(list, tuple, ...) + - ``out`` param: cannot perform auto broadcasting. ``out`` symbol's shape must be the same as the expected output. + - ``out`` param: cannot perform auto type cast. ``out`` symbol's dtype must be the same as the expected output. + - ``out`` param does not support scalar input case. + + """ + return _unary_func_helper(x, _npi.sign, _np.sign, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def exp(x, out=None, **kwargs): + r"""exp(x, out=None, **kwargs) + + Calculate the exponential of all elements in the input array. + + Parameters + ---------- + x : _Symbol or scalar + Input values. + out : _Symbol or None + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + out : _Symbol + Output array, element-wise exponential of `x`. + This is a scalar if `x` is a scalar. + """ + return _unary_func_helper(x, _npi.exp, _np.exp, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def arctan(x, out=None, **kwargs): + r"""arctan(x, out=None, **kwargs) + + Trigonometric inverse tangent, element-wise. + + The inverse of tan, so that if ``y = tan(x)`` then ``x = arctan(y)``. + + Parameters + ---------- + x : _Symbol or scalar + Input values. + out : _Symbol or None + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + out : _Symbol + Out has the same shape as `x`. It lies is in + ``[-pi/2, pi/2]`` (``arctan(+/-inf)`` returns ``+/-pi/2``). + This is a scalar if `x` is a scalar. + + Notes + ----- + `arctan` is a multi-valued function: for each `x` there are infinitely + many numbers `z` such that tan(`z`) = `x`. The convention is to return + the angle `z` whose real part lies in [-pi/2, pi/2]. + + For real-valued input data types, `arctan` always returns real output. + For each value that cannot be expressed as a real number or infinity, + it yields ``nan`` and sets the `invalid` floating point error flag. + + For complex-valued input, we do not have support for them yet. + + The inverse tangent is also known as `atan` or tan^{-1}. + """ + return _unary_func_helper(x, _npi.arctan, _np.arctan, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def log(x, out=None, **kwargs): + """ + log(x, out=None) + + Natural logarithm, element-wise. + + The natural logarithm `log` is the inverse of the exponential function, + so that `log(exp(x)) = x`. The natural logarithm is logarithm in base + `e`. + + Parameters + ---------- + x : _Symbol + Input value. Elements must be of real value. + out : _Symbol or None, optional + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + y : _Symbol + The natural logarithm of `x`, element-wise. + This is a scalar if `x` is a scalar. + + Notes + ----- + Currently only supports data of real values and ``inf`` as input. Returns data of real value, ``inf``, ``-inf`` and + ``nan`` according to the input. + + This function differs from the original `numpy.log + `_ in + the following aspects: + + - Does not support complex number for now + - Input type does not support Python native iterables(list, tuple, ...). Only ndarray is supported. + - ``out`` param: cannot perform auto braodcasting. ``out`` symbol's shape must be the same as the expected output. + - ``out`` param: cannot perform auto type cast. ``out`` symbol's dtype must be the same as the expected output. + - ``out`` param does not support scalar input case. + + """ + return _unary_func_helper(x, _npi.log, _np.log, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def degrees(x, out=None, **kwargs): + """ + degrees(x, out=None) + + Convert angles from radians to degrees. + + Parameters + ---------- + x : _Symbol + Input value. Elements must be of real value. + out : _Symbol or None, optional + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + y : _Symbol of floats + The corresponding degree values; if `out` was supplied this is a + reference to it. + This is a scalar if `x` is a scalar. + + Notes + ------- + This function differs from the original `numpy.degrees + `_ in + the following aspects: + + - Input type does not support Python native iterables(list, tuple, ...). Only ndarray is supported. + - ``out`` param: cannot perform auto broadcasting. ``out`` symbol's shape must be the same as the expected output. + - ``out`` param: cannot perform auto type cast. ``out`` symbol's dtype must be the same as the expected output. + - ``out`` param does not support scalar input case. + + """ + return _unary_func_helper(x, _npi.degrees, _np.degrees, out=out, **kwargs) + + +def rint(x, out=None, **kwargs): + """ + Round elements of the array to the nearest integer. + + Parameters + ---------- + x : _Symbol or scalar + Input array. + out : _Symbol or None + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + out : _Symbol or scalar + Output array is same shape and type as x. This is a scalar if x is a scalar. + + Notes + ----- + This function differs from the original `numpy.rint + `_ in + the following way(s): + + - only _Symbol or scalar is accpted as valid input, tuple of _Symbol is not supported + - broadcasting to `out` of different shape is currently not supported + - when input is plain python numerics, the result will not be stored in the `out` param + + """ + return _unary_func_helper(x, _npi.rint, _np.rint, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def log2(x, out=None, **kwargs): + """ + Base-2 logarithm of x. + + Parameters + ---------- + x : _Symbol + Input values. + out : ndarray or None + A location into which the result is stored. + If provided, it must have the same shape and type as the input. + If not provided or None, a freshly-allocated array is returned. + + Returns + ------- + y : _Symbol + The logarithm base two of `x`, element-wise. + This is a scalar if `x` is a scalar. + + Notes + ----- + This function differs from the original `numpy.log2 + `_ in + the following way(s): + + - only ndarray or scalar is accpted as valid input, tuple of ndarray is not supported + - broadcasting to `out` of different shape is currently not supported + - when input is plain python numerics, the result will not be stored in the `out` param + + """ + return _unary_func_helper(x, _npi.log2, _np.log2, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def radians(x, out=None, **kwargs): + """ + Convert angles from degrees to radians. + + Parameters + ---------- + x : _Symbol or scalar + Input array in degrees. + out : _Symbol or None + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + y : _Symbol + The corresponding radian values. This is a scalar if x is a scalar. + + Notes + ----- + This function differs from the original `numpy.radians + `_ in + the following way(s): + + - only _Symbol or scalar is accpted as valid input, tuple of _Symbol is not supported + - broadcasting to `out` of different shape is currently not supported + - when input is plain python numerics, the result will not be stored in the `out` param + + Examples + -------- + >>> deg = np.arange(12.) * 30. + >>> np.radians(deg) + array([0. , 0.5235988, 1.0471976, 1.5707964, 2.0943952, 2.6179938, + 3.1415927, 3.6651914, 4.1887903, 4.712389 , 5.2359877, 5.7595863], + dtype=float32) + + """ + return _unary_func_helper(x, _npi.radians, _np.radians, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def reciprocal(x, out=None, **kwargs): + r""" + reciprocal(x, out=None) + + Return the reciprocal of the argument, element-wise. + + Calculates ``1/x``. + + Parameters + ---------- + x : _Symbol or scalar + The values whose reciprocals are required. + out : _Symbol, or None, optional + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + y : _Symbol or scalar + Output array is same shape and type as x. This is a scalar if x is a scalar. + + Notes + ----- + .. note:: + This function is not designed to work with integers. + + For integer arguments with absolute value larger than 1 the result is + always zero because of the way Python handles integer division. For + integer zero the result is an overflow. + + The output `symbol` has the same `ctx` as the input `symbol`. + + This function differs from the original `numpy.reciprocal + `_ in + the following aspects: + + - Only support _Symbol and scalar now. + - `where` argument is not supported. + """ + return _unary_func_helper(x, _npi.reciprocal, _np.reciprocal, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def square(x, out=None, **kwargs): + r""" + square(x, out=None) + + Return the element-wise square of the input. + + Parameters + ---------- + x : _Symbol or scalar + The values whose reciprocals are required. + out : _Symbol, or None, optional + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + y : _Symbol or scalar + Output array is same shape and type as x. This is a scalar if x is a scalar. + + Notes + ----- + The output `symbol` has the same `ctx` as the input `symbol`. + + This function differs from the original `numpy.square + `_ in + the following aspects: + + - Only support _Symbol and scalar now. + - `where` argument is not supported. + """ + return _unary_func_helper(x, _npi.square, _np.square, out=out, **kwargs) + + +@set_module('mxnet.symbol.numpy') +def arcsin(x, out=None, **kwargs): + r""" + arcsin(x, out=None) + + Inverse sine, element-wise. + + Parameters + ---------- + x : _Symbol or scalar + The values whose reciprocals are required. + out : _Symbol, or None, optional + Dummy parameter to keep the consistency with the ndarray counterpart. + + Returns + ------- + angle : _Symbol or scalar + Output array is same shape and type as x. This is a scalar if x is a scalar. + + Notes + ----- + `arcsin` is a multivalued function: for each `x` there are infinitely + many numbers `z` such that :math:`sin(z) = x`. The convention is to + return the angle `z` whose real part lies in [-pi/2, pi/2]. + + For real-valued input data types, *arcsin* always returns real output. + For each value that cannot be expressed as a real number or infinity, + it yields ``nan`` and sets the `invalid` floating point error flag. + + The inverse sine is also known as `asin` or sin^{-1}. + + The output `symbol` has the same `ctx` as the input `symbol`. + + This function differs from the original `numpy.arcsin + `_ in + the following aspects: + + - Only support _Symbol or scalar now. + - `where` argument is not supported. + - Complex input is not supported. + + References + ---------- + Abramowitz, M. and Stegun, I. A., *Handbook of Mathematical Functions*, + 10th printing, New York: Dover, 1964, pp. 79ff. + http://www.math.sfu.ca/~cbm/aands/ + """ + return _unary_func_helper(x, _npi.arcsin, _np.arcsin, out=out, **kwargs) + + +_set_np_symbol_class(_Symbol) diff --git a/python/mxnet/symbol/numpy/linalg.py b/python/mxnet/symbol/numpy/linalg.py new file mode 100644 index 000000000000..d1918ef8b903 --- /dev/null +++ b/python/mxnet/symbol/numpy/linalg.py @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for operators used in Gluon dispatched by F=symbol.""" + +from __future__ import absolute_import +from . import _symbol +from . import _op as _mx_sym_np + +__all__ = ['norm'] + + +def norm(x, ord=None, axis=None, keepdims=False): + r"""Matrix or vector norm. + + This function can only support Frobenius norm for now. + The Frobenius norm is given by [1]_: + + :math:`||A||_F = [\sum_{i,j} abs(a_{i,j})^2]^{1/2}` + + Parameters + ---------- + x : ndarray + Input array. + ord : {'fro'}, optional + Order of the norm. + axis : {int, 2-tuple of ints, None}, optional + If `axis` is an integer, it specifies the axis of `x` along which to + compute the vector norms. If `axis` is a 2-tuple, it specifies the + axes that hold 2-D matrices, and the matrix norms of these matrices + are computed. If `axis` is None, the norm of the whole ndarray is + returned. + + keepdims : bool, optional + If this is set to True, the axes which are normed over are left in the + result as dimensions with size one. With this option the result will + broadcast correctly against the original `x`. + + Returns + ------- + n : float or ndarray + Norm of the matrix or vector(s). + + References + ---------- + .. [1] G. H. Golub and C. F. Van Loan, *Matrix Computations*, + Baltimore, MD, Johns Hopkins University Press, 1985, pg. 15 + """ + if ord is not None and ord != 'fro': + raise ValueError('only support Frobenius norm for now, received ord={}'.format(str(ord))) + if isinstance(axis, tuple) and len(axis) > 2: + raise ValueError('Improper number of dimensions to norm') + # TODO(junwu): When ord = 'fro', axis = None, and x.ndim > 2, raise exception + return _symbol.sqrt(_mx_sym_np.sum(x * x, axis=axis, keepdims=keepdims)) diff --git a/python/mxnet/symbol/numpy/random.py b/python/mxnet/symbol/numpy/random.py new file mode 100644 index 000000000000..fd73478e49eb --- /dev/null +++ b/python/mxnet/symbol/numpy/random.py @@ -0,0 +1,138 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for operators used in Gluon dispatched by F=symbol.""" + +from __future__ import absolute_import +from ...base import numeric_types +from ...context import current_context +from . import _internal as _npi + +__all__ = ['uniform', 'normal'] + + +def _random_helper(random, sampler, params, shape, dtype, ctx, out, kwargs): + """Helper function for random generators.""" + from ._symbol import _Symbol as np_symbol + if isinstance(params[0], np_symbol): + for i in params[1:]: + assert isinstance(i, np_symbol), \ + "Distribution parameters must all have the same type, but got " \ + "both %s and %s." % (type(params[0]), type(i)) + return sampler(*params, shape=shape, dtype=dtype, out=out, **kwargs) + elif isinstance(params[0], numeric_types): + if ctx is None: + ctx = current_context() + if shape is None and out is None: + shape = () + for i in params[1:]: + assert isinstance(i, numeric_types), \ + "Distribution parameters must all have the same type, but got " \ + "both %s and %s."%(type(params[0]), type(i)) + return random(*params, shape=shape, dtype=dtype, ctx=ctx, out=out, **kwargs) + + raise ValueError("Distribution parameters must be either mxnet.numpy.ndarray or numbers, " + "but got %s." % type(params[0])) + + +def uniform(low=0.0, high=1.0, size=None, **kwargs): + """Draw samples from a uniform distribution. + + Samples are uniformly distributed over the half-open interval + ``[low, high)`` (includes low, but excludes high). In other words, + any value within the given interval is equally likely to be drawn + by `uniform`. + + Parameters + ---------- + low : float, optional + Lower boundary of the output interval. All values generated will be + greater than or equal to low. The default value is 0. + high : float + Upper boundary of the output interval. All values generated will be + less than high. The default value is 1.0. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. If size is ``None`` (default), + a scalar tensor containing a single value is returned if + ``low`` and ``high`` are both scalars. + dtype : {'float16', 'float32', 'float64'}, optional + Data type of output samples. Default is 'float32' + ctx : Context, optional + Device context of output. Default is current context. + out : ndarray, optional + Store output to an existing ndarray. + + Returns + ------- + out : _Symbol (symbol representing `mxnet.numpy.ndarray` in computational graphs) + Drawn samples from the parameterized uniform distribution. + + + Notes + ----- + This function currently does not support ``low`` and ``high`` as symbols. + """ + dtype = kwargs.pop('dtype', None) + if dtype is None: + dtype = 'float32' + ctx = kwargs.pop('ctx', None) + out = kwargs.pop('out', None) + return _random_helper(_npi.random_uniform, None, + [low, high], size, dtype, ctx, out, kwargs) + + +def normal(loc=0.0, scale=1.0, size=None, **kwargs): + """Draw random samples from a normal (Gaussian) distribution. + + Samples are distributed according to a normal distribution parametrized + by *loc* (mean) and *scale* (standard deviation). + + + Parameters + ---------- + loc : float, optional + Mean (centre) of the distribution. + scale : float, optional + Standard deviation (spread or "width") of the distribution. + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., `(m, n, k)`, then `m * n * k` + samples are drawn. If size is `None` (default), a scalar tensor containing + a single value is returned if loc and scale are both scalars. + dtype : {'float16', 'float32', 'float64'}, optional + Data type of output samples. Default is 'float32' + ctx : Context, optional + Device context of output. Default is current context. + out : ``ndarray``, optional + Store output to an existing ``ndarray``. + + Returns + ------- + out : _Symbol (symbol representing `mxnet.numpy.ndarray` in computational graphs) + Drawn samples from the parameterized normal distribution. + + Notes + ----- + This function currently does not support ``loc`` and ``scale`` as `_Symbol`s. + """ + dtype = kwargs.pop('dtype', None) + if dtype is None: + dtype = 'float32' + ctx = kwargs.pop('ctx', None) + out = kwargs.pop('out', None) + return _random_helper(_npi.random_normal, None, + [loc, scale], size, dtype, ctx, out, kwargs) diff --git a/python/mxnet/symbol/numpy_extension/__init__.py b/python/mxnet/symbol/numpy_extension/__init__.py new file mode 100644 index 000000000000..5be34ac9b3d5 --- /dev/null +++ b/python/mxnet/symbol/numpy_extension/__init__.py @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Module for the ops not belonging to the official numpy package.""" + +from . import _op +from . import image +from . import _register +from ._op import * # pylint: disable=wildcard-import + +__all__ = _op.__all__ diff --git a/python/mxnet/symbol/numpy_extension/_op.py b/python/mxnet/symbol/numpy_extension/_op.py new file mode 100644 index 000000000000..82eaa8e6ec9f --- /dev/null +++ b/python/mxnet/symbol/numpy_extension/_op.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Namespace for operators not belonging to the official numpy package +used in Gluon APIs dispatched by F=symbol module.""" + +__all__ = [] diff --git a/python/mxnet/symbol/numpy_extension/_register.py b/python/mxnet/symbol/numpy_extension/_register.py new file mode 100644 index 000000000000..b118987b1fd3 --- /dev/null +++ b/python/mxnet/symbol/numpy_extension/_register.py @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Registering numpy_extension ops.""" + +from ...base import _init_np_op_module +from ..register import _make_symbol_function + +_init_np_op_module(root_module_name='mxnet', np_module_name='numpy_extension', + mx_module_name='symbol', make_op_func=_make_symbol_function) diff --git a/python/mxnet/symbol/numpy_extension/image.py b/python/mxnet/symbol/numpy_extension/image.py new file mode 100644 index 000000000000..b3bd27fc503c --- /dev/null +++ b/python/mxnet/symbol/numpy_extension/image.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Image pre-processing operators.""" + +__all__ = [] diff --git a/python/mxnet/symbol/register.py b/python/mxnet/symbol/register.py index ac59f8b97f15..a17dd79048d4 100644 --- a/python/mxnet/symbol/register.py +++ b/python/mxnet/symbol/register.py @@ -27,12 +27,60 @@ from ..attribute import AttrScope from ..base import mx_uint, check_call, _LIB, py_str from ..symbol_doc import _build_doc -from ..base import _Null, _init_op_module +from ..base import _Null, _init_op_module, _is_np_op from ..name import NameManager # pylint: enable=unused-import -def _generate_symbol_function_code(handle, name, func_name, signature_only=False): +def _verify_np_symbol(op_name, func_name, sym): + """Verify if the sym is a numpy symbol. + + Parameters + ---------- + op_name : str + Operator full name registered in backend. + func_name : str + Operator name exposed to users. This is usually the name by stripping off + the prefix of the full operator names registered in backend. + sym : symbol to be verified + """ + from .numpy._symbol import _Symbol as np_symbol + if not isinstance(sym, np_symbol): + raise TypeError('Operator `{}` registered in backend is known as `{}` in Python. ' + 'This is a numpy operator which can only accept ' + 'MXNet numpy ndarrays, while received a legacy ndarray. ' + 'Please ensure that you have activated numpy semantics by calling ' + '`npx.set_np()` in your code. If you still see this error with numpy ' + 'semantics activated, please call `as_np_ndarray()` upon the legacy ' + 'ndarray to convert it to an MXNet numpy ndarray, and then feed the ' + 'converted array to this operator.' + .format(op_name, func_name)) + + +def _verify_legacy_symbol(op_name, func_name, sym): + """Verify if the sym is a legacy symbol. + + Parameters + ---------- + op_name : str + Operator full name registered in backend. + func_name : str + Operator name exposed to users. This is usually the name by stripping off + the prefix of the full operator names registered in backend. + sym : symbol to be verified + """ + from .numpy._symbol import _Symbol as np_symbol + if isinstance(sym, np_symbol): + raise TypeError('Operator `{}` registered in backend is known as `{}` in Python. ' + 'This is a legacy operator which can only accept ' + 'legacy ndarrays, while received an MXNet numpy ndarray. ' + 'Please call `as_nd_ndarray()` upon the numpy ndarray to ' + 'convert it to a legacy ndarray, and then feed the converted ' + 'array to this operator.' + .format(op_name, func_name)) + + +def _generate_symbol_function_code(handle, op_name, func_name, signature_only=False): """Generate function for symbol op by handle and function name.""" real_name = ctypes.c_char_p() desc = ctypes.c_char_p() @@ -56,7 +104,7 @@ def _generate_symbol_function_code(handle, name, func_name, signature_only=False arg_types = [py_str(arg_types[i]) for i in range(narg)] key_var_num_args = py_str(key_var_num_args.value) ret_type = py_str(ret_type.value) if ret_type.value is not None else '' - doc_str = _build_doc(name, + doc_str = _build_doc(op_name, py_str(desc.value), arg_names, arg_types, @@ -95,6 +143,8 @@ def _generate_symbol_function_code(handle, name, func_name, signature_only=False signature.append('**kwargs') signature = ndsignature + signature + is_np_op = _is_np_op(op_name) + verify_symbol_fn = _verify_np_symbol.__name__ if is_np_op else _verify_legacy_symbol.__name__ code = [] if arr_name: code.append(""" @@ -106,7 +156,8 @@ def %s(*%s, **kwargs):"""%(func_name, arr_name)) assert isinstance(i, SymbolBase), \\ "Positional arguments must be Symbol instances, " \\ "but got %s"%str(i) - sym_args.append(i)""".format(arr_name)) + {}('{}', '{}', i) + sym_args.append(i)""".format(arr_name, verify_symbol_fn, op_name, func_name)) if dtype_name is not None: code.append(""" if '%s' in kwargs: @@ -128,9 +179,10 @@ def %s(*%s, **kwargs):"""%(func_name, arr_name)) for k, v in kwargs.items(): if isinstance(v, SymbolBase): sym_kwargs[k] = v + %s('%s', '%s', v) else: keys.append(k) - vals.append(v)"""%(func_name.lower())) + vals.append(v)"""%(func_name.lower(), verify_symbol_fn, op_name, func_name)) if key_var_num_args: # pylint: disable=using-constant-test code.append(""" if '%s' not in kwargs: @@ -139,8 +191,8 @@ def %s(*%s, **kwargs):"""%(func_name, arr_name)) key_var_num_args, key_var_num_args)) code.append(""" - return _symbol_creator(%d, sym_args, sym_kwargs, keys, vals, name)"""%( - handle.value)) + return _symbol_creator(%d, sym_args, sym_kwargs, keys, vals, name, %s)"""%( + handle.value, str(is_np_op))) else: code.append(""" def %s(%s):"""%(func_name, ', '.join(signature))) @@ -155,9 +207,10 @@ def %s(%s):"""%(func_name, ', '.join(signature))) for _k, _v in kwargs.items(): if isinstance(_v, SymbolBase): sym_kwargs[_k] = _v + {}('{}', '{}', _v) else: _keys.append(_k) - _vals.append(_v)""") + _vals.append(_v)""".format(verify_symbol_fn, op_name, func_name)) # NDArray args for name in ndarg_names: # pylint: disable=redefined-argument-from-local code.append(""" @@ -165,6 +218,9 @@ def %s(%s):"""%(func_name, ', '.join(signature))) assert isinstance({name}, SymbolBase), \\ "Argument {name} must be Symbol instances, but got %s"%str({name}) sym_kwargs['{name}'] = {name}""".format(name=name)) + code.append(""" + {}('{}', '{}', {name}) + """.format(verify_symbol_fn, op_name, func_name, name=name)) # kwargs for name in kwarg_names: # pylint: disable=redefined-argument-from-local code.append(""" @@ -173,7 +229,13 @@ def %s(%s):"""%(func_name, ', '.join(signature))) _vals.append(%s)"""%(name, name, name)) # dtype if dtype_name is not None: - code.append(""" + if is_np_op: + code.append(""" + if %s is not _Null and %s is not None: + _keys.append('%s') + _vals.append(_np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name, dtype_name)) + else: + code.append(""" if %s is not _Null: _keys.append('%s') _vals.append(_np.dtype(%s).name)"""%(dtype_name, dtype_name, dtype_name)) @@ -182,8 +244,8 @@ def %s(%s):"""%(func_name, ', '.join(signature))) if not hasattr(NameManager._current, "value"): NameManager._current.value = NameManager() name = NameManager._current.value.get(name, '%s') - return _symbol_creator(%d, None, sym_kwargs, _keys, _vals, name)"""%( - func_name.lower(), handle.value)) + return _symbol_creator(%d, None, sym_kwargs, _keys, _vals, name, %s)"""%( + func_name.lower(), handle.value, str(is_np_op))) if signature_only: code.append(""" diff --git a/python/mxnet/symbol/symbol.py b/python/mxnet/symbol/symbol.py index d3cd519b9a8c..eb9e7593d8ec 100644 --- a/python/mxnet/symbol/symbol.py +++ b/python/mxnet/symbol/symbol.py @@ -30,7 +30,7 @@ import warnings from numbers import Number -import numpy as _numpy +import numpy as _numpy # pylint: disable=relative-import from ..attribute import AttrScope from ..base import _LIB, numeric_types, c_array, c_array_buf, c_str, c_str_array, c_handle_array @@ -61,6 +61,17 @@ class Symbol(SymbolBase): # Make numpy functions return Symbol instead of numpy object array __array_priority__ = 1000.0 + def as_np_ndarray(self): + """Convert mx.sym.Symbol to mx.sym.np._Symbol.""" + from .numpy import _Symbol + hdl = SymbolHandle() + check_call(_LIB.MXShallowCopySymbol(self.handle, ctypes.byref(hdl))) + return _Symbol(hdl) + + def as_nd_ndarray(self): + """Returns self. For the convenience of conversion between legacy and np symbols.""" + return self + def __repr__(self): """Gets a string representation of the symbol.""" name = self.name @@ -144,6 +155,8 @@ def __rsub__(self, other): array([[-2., -2., -2.], [-2., -2., -2.]], dtype=float32) """ + if isinstance(other, Symbol): + return other.__sub__(self) if isinstance(other, Number): return _internal._RMinusScalar(self, scalar=other) else: @@ -192,6 +205,8 @@ def __rdiv__(self, other): array([[ 0.33333334, 0.33333334, 0.33333334], [ 0.33333334, 0.33333334, 0.33333334]], dtype=float32) """ + if isinstance(other, Symbol): + return other.__truediv__(self) if isinstance(other, Number): return _internal._RDivScalar(self, scalar=other) else: @@ -222,6 +237,8 @@ def __rmod__(self, other): array([[ 1., 1., 1., [ 1., 1., 1., dtype=float32) """ + if isinstance(other, Symbol): + return other.__mod__(self) if isinstance(other, Number): return _internal._RModScalar(self, scalar=other) else: @@ -252,7 +269,13 @@ def __pow__(self, other): raise TypeError('type %s not supported' % str(type(other))) def __rpow__(self, other): - raise NotImplementedForSymbol(self.__rpow__, 'y**x', other) + """x.__rpow__(y) <=> y ** x""" + if isinstance(other, Symbol): + return other.__pow__(self) + elif isinstance(other, Number): + return _internal._rpower_scalar(self, scalar=other) + else: + raise TypeError('type %s not supported' % str(type(other))) def __neg__(self): """x.__neg__() <=> -x @@ -2667,8 +2690,12 @@ def var(name, attr=None, shape=None, lr_mult=None, wd_mult=None, dtype=None, Variable = var -def Group(symbols): +def Group(symbols, create_fn=Symbol): """Creates a symbol that contains a collection of other symbols, grouped together. + A classic symbol (`mx.sym.Symbol`) will be returned if all the symbols in the list + are of that type; a numpy symbol (`mx.sym.np._Symbol`) will be returned if all the + symbols in the list are of that type. A type error will be raised if a list of mixed + classic and numpy symbols are provided. Example ------- @@ -2682,6 +2709,9 @@ def Group(symbols): symbols : list List of symbols to be grouped. + create_fn : mx.sym.Symbol or mx.sym.np._Symbol + Symbol class for creating the grouped symbol. + Returns ------- sym : Symbol @@ -2693,7 +2723,7 @@ def Group(symbols): check_call(_LIB.MXSymbolCreateGroup( mx_uint(len(symbols)), c_handle_array(symbols), ctypes.byref(handle))) - return Symbol(handle) + return create_fn(handle) def load(fname): diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py index bd102412c6e2..7fd2ca2b839f 100644 --- a/python/mxnet/test_utils.py +++ b/python/mxnet/test_utils.py @@ -47,6 +47,7 @@ from .ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID from .ndarray import array from .symbol import Symbol +from .symbol.numpy import _Symbol as np_symbol def default_context(): @@ -88,7 +89,8 @@ def get_etol(etol=None): def random_arrays(*shapes): """Generate some random numpy arrays.""" - arrays = [np.random.randn(*s).astype(default_dtype()) + arrays = [np.array(np.random.randn(), dtype=default_dtype()) + if len(s) == 0 else np.random.randn(*s).astype(default_dtype()) for s in shapes] if len(arrays) == 1: return arrays[0] @@ -407,16 +409,20 @@ def create_sparse_array_zd(shape, stype, density, data_init=None, density=density, shuffle_csr_indices=shuffle_csr_indices) -def rand_shape_2d(dim0=10, dim1=10): - return rnd.randint(1, dim0 + 1), rnd.randint(1, dim1 + 1) +def rand_shape_2d(dim0=10, dim1=10, allow_zero_size=False): + low = 0 if allow_zero_size else 1 + return rnd.randint(low, dim0 + 1), rnd.randint(low, dim1 + 1) -def rand_shape_3d(dim0=10, dim1=10, dim2=10): - return rnd.randint(1, dim0 + 1), rnd.randint(1, dim1 + 1), rnd.randint(1, dim2 + 1) +def rand_shape_3d(dim0=10, dim1=10, dim2=10, allow_zero_size=False): + low = 0 if allow_zero_size else 1 + return rnd.randint(low, dim0 + 1), rnd.randint(low, dim1 + 1), rnd.randint(low, dim2 + 1) -def rand_shape_nd(num_dim, dim=10): - return tuple(rnd.randint(1, dim+1, size=num_dim)) + +def rand_shape_nd(num_dim, dim=10, allow_zero_size=False): + low = 0 if allow_zero_size else 1 + return tuple(rnd.randint(low, dim+1, size=num_dim)) def rand_coord_2d(x_low, x_high, y_low, y_high): @@ -828,7 +834,7 @@ def as_stype(var, stype, dtype): continue stype = executor.arg_dict[k].stype old_value = v.copy() - for i in range(np.prod(v.shape)): + for i in range(int(np.prod(v.shape))): # inplace update v.ravel()[i] += eps/2.0 executor.arg_dict[k][:] = as_stype(v, stype, dtype=dtype) @@ -940,7 +946,12 @@ def random_projection(shape): input_shape = {k: v.shape for k, v in location.items()} _, out_shape, _ = sym.infer_shape(**input_shape) proj = mx.sym.Variable("__random_proj") + is_np_sym = bool(isinstance(sym, np_symbol)) + if is_np_sym: # convert to np symbol for using element-wise multiplication + proj = proj.as_np_ndarray() out = sym * proj + if is_np_sym: # convert to classic symbol so that make_loss can be used + out = out.as_nd_ndarray() out = mx.sym.make_loss(out) location = dict(list(location.items()) + diff --git a/python/mxnet/util.py b/python/mxnet/util.py index 5bc1dc809c88..d4e95e0c0c9c 100644 --- a/python/mxnet/util.py +++ b/python/mxnet/util.py @@ -20,6 +20,9 @@ import os import sys import functools +import itertools +import inspect +import threading from .base import _LIB, check_call @@ -76,14 +79,24 @@ def set_np_shape(active): >>> print(mx.is_np_shape()) True """ + if active: + import logging + logging.info('NumPy-shape semantics has been activated in your code. ' + 'This is required for creating and manipulating scalar and zero-size ' + 'tensors, which were not supported in MXNet before, as in the official ' + 'NumPy library. Please DO NOT manually deactivate this semantics while ' + 'using `mxnet.numpy` and `mxnet.numpy_extension` modules.') + elif is_np_array(): + raise ValueError('Deactivating NumPy shape semantics while NumPy array semantics is still' + ' active is not allowed. Please consider calling `npx.reset_np()` to' + ' deactivate both of them.') prev = ctypes.c_int() check_call(_LIB.MXSetIsNumpyShape(ctypes.c_int(active), ctypes.byref(prev))) return bool(prev.value) def is_np_shape(): - """ - Checks whether the NumPy shape semantics is currently turned on. + """Checks whether the NumPy shape semantics is currently turned on. In NumPy shape semantics, `()` represents the shape of scalar tensors, and tuples with `0` elements, for example, `(0,)`, `(1, 0, 2)`, represent the shapes of zero-size tensors. This is turned off by default for keeping @@ -213,39 +226,379 @@ def np_shape(active=True): return _NumpyShapeScope(active) +def wraps_safely(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS): + """This function is safe version of `functools.wraps` in Python2 which skips wrapping functions + for the attributes that do not exist.""" + if sys.version_info[0] > 2: + return functools.wraps(wrapped) + else: + return functools.wraps(wrapped, + assigned=itertools.ifilter( + functools.partial(hasattr, wrapped), assigned)) + + def use_np_shape(func): - """Wraps a function with an activated NumPy-shape scope. This ensures - that the execution of the function is guaranteed with the support of - scalar and zero-size tensors as in NumPy. + """A decorator wrapping a function or class with activated NumPy-shape semantics. + When `func` is a function, this ensures that the execution of the function is scoped with NumPy + shape semantics, such as the support for zero-dim and zero size tensors. When + `func` is a class, it ensures that all the methods, static functions, and properties + of the class are executed with the NumPy shape semantics. + + Example:: + import mxnet as mx + @mx.use_np_shape + def scalar_one(): + return mx.nd.ones(()) + print(scalar_one()) + + @np.use_np_shape + class ScalarTensor(object): + def __init__(self, val=None): + if val is None: + val = ScalarTensor.random().value + self._scalar = mx.nd.ones(()) * val + + def __repr__(self): + print("Is __repr__ in np_shape semantics? {}!".format(str(np.is_np_shape()))) + return str(self._scalar.asnumpy()) + + @staticmethod + def random(): + val = mx.nd.random.uniform().asnumpy().item() + return ScalarTensor(val) + + @property + def value(self): + print("Is value property in np_shape semantics? {}!".format(str(np.is_np_shape()))) + return self._scalar.asnumpy().item() + + + print("Is global scope of np_shape activated? {}!".format(str(np.is_np_shape()))) + scalar_tensor = ScalarTensor() + print(scalar_tensor) + + Parameters + ---------- + func : a user-provided callable function or class to be scoped by the NumPy-shape semantics. + + Returns + ------- + Function or class + A function or class wrapped in the NumPy-shape scope. + """ + + if inspect.isclass(func): + for name, method in inspect.getmembers( + func, + predicate= + lambda f: inspect.isfunction(f) or inspect.ismethod(f) or isinstance(f, property)): + if isinstance(method, property): + setattr(func, name, property(use_np_shape(method.__get__), + method.__set__, + method.__delattr__, + method.__doc__)) + else: + setattr(func, name, use_np_shape(method)) + return func + elif callable(func): + @wraps_safely(func) + def _with_np_shape(*args, **kwargs): + with np_shape(active=True): + return func(*args, **kwargs) + return _with_np_shape + else: + raise TypeError('use_np_shape can only decorate classes and callable objects, ' + 'while received a {}'.format(str(type(func)))) + + +def _sanity_check_params(func_name, unsupported_params, param_dict): + for param_name in unsupported_params: + if param_name in param_dict: + raise NotImplementedError("function {} does not support parameter {}" + .format(func_name, param_name)) + + +def set_module(module): + """Decorator for overriding __module__ on a function or class. + + Example usage:: + + @set_module('mxnet.numpy') + def example(): + pass + + assert example.__module__ == 'numpy' + """ + def decorator(func): + if module is not None: + func.__module__ = module + return func + return decorator + + +class _NumpyArrayScope(object): + """Scope for managing NumPy array creation. This is often used + with `is_np_array=True` in initializer to enforce array creation + as type `mxnet.numpy.ndarray`, instead of `mx.nd.NDArray` in Gluon. + + Do not use this class directly. Use `np_array(active)` instead. + """ + _current = threading.local() + + def __init__(self, is_np_array): # pylint: disable=redefined-outer-name + self._old_scope = None + self._is_np_array = is_np_array + + def __enter__(self): + if not hasattr(_NumpyArrayScope._current, "value"): + _NumpyArrayScope._current.value = _NumpyArrayScope(False) + self._old_scope = _NumpyArrayScope._current.value + _NumpyArrayScope._current.value = self + return self + + def __exit__(self, ptype, value, trace): + assert self._old_scope + _NumpyArrayScope._current.value = self._old_scope + + +def np_array(active=True): + """Returns an activated/deactivated NumPy-array scope to be used in 'with' statement + and captures code that needs the NumPy-array semantics. + + Currently, this is used in Gluon to enforce array creation in `Block`s as type + `mxnet.numpy.ndarray`, instead of `mx.nd.NDArray`. + + It is recommended to use the decorator `use_np_array` to decorate the classes + that need this semantics, instead of using this function in a `with` statement + unless you know exactly what has been scoped by this semantics. Please note that this is designed as an infrastructure for the incoming MXNet-NumPy operators. Legacy operators registered in the modules `mx.nd` and `mx.sym` are not guaranteed to behave like their counterparts in NumPy even within this scope. + Parameters + ---------- + active : bool + Indicates whether to activate NumPy-array semantics. + + Returns + ------- + _NumpyShapeScope + A scope object for wrapping the code w/ or w/o NumPy-shape semantics. + """ + return _NumpyArrayScope(active) + + +def is_np_array(): + """Checks whether the NumPy-array semantics is currently turned on. + This is currently used in Gluon for checking whether an array of type `mxnet.numpy.ndarray` + or `mx.nd.NDArray` should be created. For example, at the time when a parameter + is created in a `Block`, an `mxnet.numpy.ndarray` is created if this returns true; else + an `mx.nd.NDArray` is created. + + Normally, users are not recommended to use this API directly unless you known exactly + what is going on under the hood. + + Please note that this is designed as an infrastructure for the incoming + MXNet-NumPy operators. Legacy operators registered in the modules + `mx.nd` and `mx.sym` are not guaranteed to behave like their counterparts + in NumPy within this semantics. + + Returns + ------- + A bool value indicating whether the NumPy-array semantics is currently on. + """ + return _NumpyArrayScope._current.value._is_np_array if hasattr( + _NumpyArrayScope._current, "value") else False + + +def use_np_array(func): + """A decorator wrapping Gluon `Block`s and all its methods, properties, and static functions + with the semantics of NumPy-array, which means that where ndarrays are created, + `mxnet.numpy.ndarray`s should be created, instead of legacy ndarrays of type `mx.nd.NDArray`. + For example, at the time when a parameter is created in a `Block`, an `mxnet.numpy.ndarray` + is created if it's decorated with this decorator. + + Example:: + import mxnet as mx + from mxnet import gluon, np + + + class TestHybridBlock1(gluon.HybridBlock): + def __init__(self): + super(TestHybridBlock1, self).__init__() + self.w = self.params.get('w', shape=(2, 2)) + + def hybrid_forward(self, F, x, w): + return F.dot(x, w) + + + x = mx.nd.ones((2, 2)) + net1 = TestHybridBlock1() + net1.initialize() + out = net1.forward(x) + for _, v in net1.collect_params().items(): + assert type(v.data()) is mx.nd.NDArray + assert type(out) is mx.nd.NDArray + + + @np.use_np_array + class TestHybridBlock2(gluon.HybridBlock): + def __init__(self): + super(TestHybridBlock2, self).__init__() + self.w = self.params.get('w', shape=(2, 2)) + + def hybrid_forward(self, F, x, w): + return F.np.dot(x, w) + + + x = np.ones((2, 2)) + net2 = TestHybridBlock2() + net2.initialize() + out = net2.forward(x) + for _, v in net2.collect_params().items(): + print(type(v.data())) + assert type(v.data()) is np.ndarray + assert type(out) is np.ndarray Parameters ---------- - func : a user-provided callable function to be scoped by the NumPy-shape semantics. + func : a user-provided callable function or class to be scoped by the NumPy-array semantics. Returns ------- - Function - A function for wrapping the user functions in the NumPy-shape semantics. + Function or class + A function or class wrapped in the NumPy-array scope. + """ + if inspect.isclass(func): + for name, method in inspect.getmembers( + func, + predicate= + lambda f: inspect.isfunction(f) or inspect.ismethod(f) or isinstance(f, property)): + if isinstance(method, property): + setattr(func, name, property(use_np_array(method.__get__), + method.__set__, + method.__delattr__, + method.__doc__)) + else: + setattr(func, name, use_np_array(method)) + return func + elif callable(func): + @wraps_safely(func) + def _with_np_array(*args, **kwargs): + with np_array(active=True): + return func(*args, **kwargs) + return _with_np_array + else: + raise TypeError('use_np_array can only decorate classes and callable objects, ' + 'while received a {}'.format(str(type(func)))) - Examples - -------- - >>> import mxnet as mx - >>> @mx.use_np_shape - ... def scalar_one(): - ... return mx.nd.ones(()) - ... - >>> print(scalar_one()) +def use_np(func): + """A convenience decorator for wrapping user provided functions and classes in the scope of + both NumPy-shape and NumPy-array semantics, which means that (1) empty tuples `()` and tuples + with zeros, such as `(0, 1)`, `(1, 0, 2)`, will be treated as scalar tensors' shapes and + zero-size tensors' shapes in shape inference functions of operators, instead of as unknown + in legacy mode; (2) ndarrays of type `mxnet.numpy.ndarray` should be created instead of + `mx.nd.NDArray`. + + Example:: + import mxnet as mx + from mxnet import gluon, np + + + class TestHybridBlock1(gluon.HybridBlock): + def __init__(self): + super(TestHybridBlock1, self).__init__() + self.w = self.params.get('w', shape=(2, 2)) + + def hybrid_forward(self, F, x, w): + return F.dot(x, w) + F.ones((1,)) + + + x = mx.nd.ones((2, 2)) + net1 = TestHybridBlock1() + net1.initialize() + out = net1.forward(x) + for _, v in net1.collect_params().items(): + assert type(v.data()) is mx.nd.NDArray + assert type(out) is mx.nd.NDArray + + + @np.use_np + class TestHybridBlock2(gluon.HybridBlock): + def __init__(self): + super(TestHybridBlock2, self).__init__() + self.w = self.params.get('w', shape=(2, 2)) + + def hybrid_forward(self, F, x, w): + return F.np.dot(x, w) + F.np.ones(()) + + + x = np.ones((2, 2)) + net2 = TestHybridBlock2() + net2.initialize() + out = net2.forward(x) + for _, v in net2.collect_params().items(): + print(type(v.data())) + assert type(v.data()) is np.ndarray + assert type(out) is np.ndarray + + Parameters + ---------- + func : a user-provided callable function or class to be scoped by the + NumPy-shape and NumPy-array semantics. + + Returns + ------- + Function or class + A function or class wrapped in the Numpy-shape and NumPy-array scope. """ - @functools.wraps(func) - def _with_np_shape(*args, **kwargs): - with np_shape(active=True): - return func(*args, **kwargs) + return use_np_shape(use_np_array(func)) + + +def _set_np_array(active): + """Turns on/off NumPy array semantics for the current thread in which `mxnet.numpy.ndarray` + is expected to be created, instead of the legacy `mx.nd.NDArray`. + + Parameters + --------- + active : bool + A boolean value indicating whether the NumPy-array semantics should be turned on or off. + + Returns + ------- + A bool value indicating the previous state of NumPy array semantics. + """ + if active: + import logging + logging.info('NumPy array semantics has been activated in your code. This allows you' + ' to use operators from MXNet NumPy and NumPy Extension modules as well' + ' as MXNet NumPy `ndarray`s.') + cur_state = is_np_array() + _NumpyArrayScope._current.value = _NumpyArrayScope(active) + return cur_state + + +def set_np(shape=True, array=True): + """Setting NumPy shape and array semantics at the same time. + It is required to keep NumPy shape semantics active when activating NumPy array semantics. + Deactivating NumPy shape semantics while NumPy array semantics is still active is not allowed. + + Parameters + ---------- + shape : bool + A boolean value indicating whether the NumPy-shape semantics should be turned on or off. + array : bool + A boolean value indicating whether the NumPy-array semantics should be turned on or off. + """ + if not shape and array: + raise ValueError('NumPy Shape semantics is required in using NumPy array semantics.') + _set_np_array(array) + set_np_shape(shape) + - return _with_np_shape +def reset_np(): + """Deactivate NumPy shape and array semantics at the same time.""" + set_np(shape=False, array=False) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index f5d72d53d2b7..3795fba040d2 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1535,3 +1535,12 @@ int MXStorageEmptyCache(int dev_type, int dev_id) { Storage::Get()->ReleaseAll(ctx); API_END(); } + +int MXShallowCopyNDArray(NDArrayHandle src_handle, NDArrayHandle* out) { + NDArray* ret = nullptr; + API_BEGIN(); + NDArray* src_array = static_cast(src_handle); + ret = new NDArray(*src_array); + *out = ret; + API_END_HANDLE_ERROR(delete ret); +} diff --git a/src/c_api/c_api_common.h b/src/c_api/c_api_common.h index 013ecab93da8..233acc85f36b 100644 --- a/src/c_api/c_api_common.h +++ b/src/c_api/c_api_common.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/src/c_api/c_api_symbolic.cc b/src/c_api/c_api_symbolic.cc index 4c6229ee29b0..930b03c4d366 100644 --- a/src/c_api/c_api_symbolic.cc +++ b/src/c_api/c_api_symbolic.cc @@ -855,11 +855,20 @@ int MXGenAtomicSymbolFromSymbol(SymbolHandle sym_handle, SymbolHandle *ret_sym_h API_BEGIN(); nnvm::Symbol *source = static_cast(sym_handle); CHECK_EQ(source->outputs.size(), 1U) - << "Generating atomic symbol from other symbol only works for nongrouped symbol."; - const auto& node = source->outputs[0]; + << "Generating atomic symbol from other symbol only works for nongrouped symbol."; + const auto &node = source->outputs[0]; const auto *op = node.node->op(); const auto attrs = source->ListAttrs(nnvm::Symbol::ListAttrOption::kShallow); *s = nnvm::Symbol::CreateFunctor(op, attrs); *ret_sym_handle = s; API_END_HANDLE_ERROR(delete s); } + +int MXShallowCopySymbol(SymbolHandle src, SymbolHandle* out) { + nnvm::Symbol* out_sym = new nnvm::Symbol; + API_BEGIN(); + nnvm::Symbol* src_sym = static_cast(src); + *out_sym = *src_sym; + *out = out_sym; + API_END_HANDLE_ERROR(delete out_sym); +} diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc index d8fba1c169ec..de59657c9274 100644 --- a/src/imperative/imperative.cc +++ b/src/imperative/imperative.cc @@ -313,7 +313,9 @@ std::vector Imperative::Backward( } else { info.outputs.emplace_back(outputs[i]->shape(), outputs[i]->ctx(), true, outputs[i]->dtype()); - info.outputs.back() = static_cast(1.0); + if (info.outputs.back().shape().Size() != 0) { + info.outputs.back() = static_cast(1.0); + } } } diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h index 5cb805c5abcb..1bef5af7d8dd 100644 --- a/src/imperative/imperative_utils.h +++ b/src/imperative/imperative_utils.h @@ -853,7 +853,6 @@ inline std::multimap AllocateMemory( } CHECK_EQ(stypes[i], kDefaultStorage); if (mem_plan[i].root == i) { - CHECK_GT(mem_plan[i].size, 0); auto iter = pool.lower_bound(mem_plan[i].size); if (iter != pool.end()) { *arrays[i] = iter->second.AsArray(shapes[i], dtypes[i]); diff --git a/src/io/image_io.cc b/src/io/image_io.cc index c0357998f31c..db9ac7682287 100644 --- a/src/io/image_io.cc +++ b/src/io/image_io.cc @@ -357,6 +357,7 @@ inline void copyMakeBorder(const nnvm::NodeAttrs& attrs, } NNVM_REGISTER_OP(_cvimdecode) +.add_alias("_npi_cvimdecode") .describe("Decode image with OpenCV. \n" "Note: return image in RGB by default, " "instead of OpenCV's default BGR.") @@ -368,6 +369,7 @@ NNVM_REGISTER_OP(_cvimdecode) .add_arguments(ImdecodeParam::__FIELDS__()); NNVM_REGISTER_OP(_cvimread) +.add_alias("_npi_cvimread") .describe("Read and decode image with OpenCV. \n" "Note: return image in RGB by default, " "instead of OpenCV's default BGR.") @@ -378,6 +380,7 @@ NNVM_REGISTER_OP(_cvimread) .add_arguments(ImreadParam::__FIELDS__()); NNVM_REGISTER_OP(_cvimresize) +.add_alias("_npi_cvimresize") .describe("Resize image with OpenCV. \n") .set_num_inputs(1) .set_num_outputs(1) diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index af9e7a7170d3..d38c01931d0d 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -312,7 +312,7 @@ NDArray NDArray::AtWithRecord(index_t idx) { CHECK(storage_type() == kDefaultStorage) << "Storage type " << storage_type() << " doesn't support At()"; NDArray ret = this->SliceWithRecord(idx, idx+1); - if (shape_.ndim() > 1) { + if (shape_.ndim() > 1 || Imperative::Get()->is_np_shape()) { return ret.ReshapeWithRecord(mxnet::TShape(shape_.data()+1, shape_.data()+shape_.ndim())); } else { return ret; @@ -1200,7 +1200,10 @@ void CopyFromTo(const NDArray& from, const NDArray& to, int priority, bool is_op << "from.shape = " << from.shape() << " to.shape=" << to.shape(); CHECK(!mxnet::op::shape_is_none(from.shape())) << "source operands have undefined shape"; - if (from.shape().Size() == 0U) return; + // zero-size array, no need to copy + if (from.shape().Size() == 0U) { + return; + } // important: callback must always capture by value const Context from_ctx = from.ctx(); const int a = from_ctx.dev_mask(); @@ -1720,7 +1723,7 @@ bool NDArray::Load(dmlc::Stream *strm) { CHECK(!Imperative::Get()->is_np_shape()) << "ndarray was not saved in np shape semantics, but being loaded in np shape semantics." " Please turn off np shape semantics in Python using `with np_shape(False)`" - " to scope of the code of loading the ndarray."; + " to scope the code of loading the ndarray."; } if (magic != NDARRAY_V2_MAGIC && magic != NDARRAY_V3_MAGIC) { return LegacyLoad(strm, magic); @@ -1860,6 +1863,10 @@ void NDArray::SyncCopyFromCPU(const void *data, size_t size) const { mxnet::TShape dshape = this->shape(); CHECK_EQ(dshape.Size(), size) << "Memory size do not match"; + // zero-size array, no need to copy + if (size == 0U) { + return; + } TBlob src((void*)data, dshape, cpu::kDevMask, this->dtype_, 0); // NOLINT(*) if (this->ctx().dev_mask() == cpu::kDevMask) { @@ -1991,6 +1998,10 @@ void NDArray::SyncCopyToCPU(void *data, size_t size) const { mxnet::TShape dshape = this->shape(); CHECK_EQ(dshape.Size(), size) << "Memory size do not match"; + // zero-size array, no need to copy + if (size == 0U) { + return; + } TBlob dst(data, dshape, cpu::kDevMask, this->dtype_, 0); // NOLINT(*) if (this->ctx().dev_mask() == cpu::kDevMask) { diff --git a/src/operator/contrib/multibox_detection.cc b/src/operator/contrib/multibox_detection.cc index 65fe5f1208bb..adb254853d2f 100644 --- a/src/operator/contrib/multibox_detection.cc +++ b/src/operator/contrib/multibox_detection.cc @@ -221,5 +221,9 @@ MXNET_REGISTER_OP_PROPERTY(_contrib_MultiBoxDetection, MultiBoxDetectionProp) .add_argument("loc_pred", "NDArray-or-Symbol", "Location regression predictions.") .add_argument("anchor", "NDArray-or-Symbol", "Multibox prior anchor boxes") .add_arguments(MultiBoxDetectionParam::__FIELDS__()); + +NNVM_REGISTER_OP(_contrib_MultiBoxDetection) +.add_alias("_npx_multibox_detection"); + } // namespace op } // namespace mxnet diff --git a/src/operator/contrib/multibox_prior.cc b/src/operator/contrib/multibox_prior.cc index 2ad173a2dd93..66fd2c11517a 100644 --- a/src/operator/contrib/multibox_prior.cc +++ b/src/operator/contrib/multibox_prior.cc @@ -100,5 +100,8 @@ MXNET_REGISTER_OP_PROPERTY(_contrib_MultiBoxPrior, MultiBoxPriorProp) .add_arguments(MultiBoxPriorParam::__FIELDS__()) .describe("Generate prior(anchor) boxes from data, sizes and ratios."); +NNVM_REGISTER_OP(_contrib_MultiBoxPrior) +.add_alias("_npx_multibox_prior"); + } // namespace op } // namespace mxnet diff --git a/src/operator/contrib/multibox_target.cc b/src/operator/contrib/multibox_target.cc index a1808c5a7c81..feab3977f82c 100644 --- a/src/operator/contrib/multibox_target.cc +++ b/src/operator/contrib/multibox_target.cc @@ -307,5 +307,9 @@ MXNET_REGISTER_OP_PROPERTY(_contrib_MultiBoxTarget, MultiBoxTargetProp) .add_argument("label", "NDArray-or-Symbol", "Object detection labels.") .add_argument("cls_pred", "NDArray-or-Symbol", "Class predictions.") .add_arguments(MultiBoxTargetParam::__FIELDS__()); + +NNVM_REGISTER_OP(_contrib_MultiBoxTarget) +.add_alias("_npx_multibox_target"); + } // namespace op } // namespace mxnet diff --git a/src/operator/image/crop.cc b/src/operator/image/crop.cc index 52d2f11a464b..6067f89d7033 100644 --- a/src/operator/image/crop.cc +++ b/src/operator/image/crop.cc @@ -35,6 +35,7 @@ namespace image { DMLC_REGISTER_PARAMETER(CropParam); NNVM_REGISTER_OP(_image_crop) +.add_alias("_npx__image_crop") .describe(R"code(Crop an image NDArray of shape (H x W x C) or (N x H x W x C) to the given size. Example: diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc index 0b95b198ae64..8f3f925780b5 100644 --- a/src/operator/image/image_random.cc +++ b/src/operator/image/image_random.cc @@ -39,6 +39,7 @@ DMLC_REGISTER_PARAMETER(RandomLightingParam); DMLC_REGISTER_PARAMETER(RandomColorJitterParam); NNVM_REGISTER_OP(_image_to_tensor) +.add_alias("_npx__image_to_tensor") .describe(R"code(Converts an image NDArray of shape (H x W x C) or (N x H x W x C) with values in the range [0, 255] to a tensor NDArray of shape (C x H x W) or (N x C x H x W) with values in the range [0, 1) @@ -102,6 +103,7 @@ with values in the range [0, 1) .add_argument("data", "NDArray-or-Symbol", "Input ndarray"); NNVM_REGISTER_OP(_image_normalize) +.add_alias("_npx__image_normalize") .describe(R"code(Normalize an tensor of shape (C x H x W) or (N x C x H x W) with mean and standard deviation. @@ -189,28 +191,34 @@ NNVM_REGISTER_OP(_backward_image_normalize) .set_attr("FCompute", NormalizeOpBackward); MXNET_REGISTER_IMAGE_AUG_OP(_image_flip_left_right) +.add_alias("_npx__image_flip_left_right") .describe(R"code()code" ADD_FILELINE) .set_attr("FCompute", FlipLeftRight); MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_flip_left_right) +.add_alias("_npx__image_random_flip_left_right") .describe(R"code()code" ADD_FILELINE) .set_attr("FCompute", RandomFlipLeftRight); MXNET_REGISTER_IMAGE_AUG_OP(_image_flip_top_bottom) +.add_alias("_npx__image_flip_top_bottom") .describe(R"code()code" ADD_FILELINE) .set_attr("FCompute", FlipTopBottom); MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_flip_top_bottom) +.add_alias("_npx__image_random_flip_top_bottom") .describe(R"code()code" ADD_FILELINE) .set_attr("FCompute", RandomFlipTopBottom); MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_brightness) +.add_alias("_npx__image_random_brightness") .describe(R"code()code" ADD_FILELINE) .set_attr_parser(ParamParser) .set_attr("FCompute", RandomBrightness) .add_arguments(RandomEnhanceParam::__FIELDS__()); MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_contrast) +.add_alias("_npx__image_random_contrast") .describe(R"code()code" ADD_FILELINE) .set_attr_parser(ParamParser) .set_attr("FCompute", RandomContrast) @@ -218,6 +226,7 @@ MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_contrast) MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_saturation) +.add_alias("_npx__image_random_saturation") .describe(R"code()code" ADD_FILELINE) .set_attr_parser(ParamParser) .set_attr("FCompute", RandomSaturation) @@ -225,6 +234,7 @@ MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_saturation) MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_hue) +.add_alias("_npx__image_random_hue") .describe(R"code()code" ADD_FILELINE) .set_attr_parser(ParamParser) .set_attr("FCompute", RandomHue) @@ -232,6 +242,7 @@ MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_hue) MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_color_jitter) +.add_alias("_npx__image_random_color_jitter") .describe(R"code()code" ADD_FILELINE) .set_attr_parser(ParamParser) .set_attr("FCompute", RandomColorJitter) @@ -239,6 +250,7 @@ MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_color_jitter) MXNET_REGISTER_IMAGE_AUG_OP(_image_adjust_lighting) +.add_alias("_npx__image_adjust_lighting") .describe(R"code(Adjust the lighting level of the input. Follow the AlexNet style.)code" ADD_FILELINE) .set_attr_parser(ParamParser) .set_attr("FCompute", AdjustLighting) @@ -246,6 +258,7 @@ MXNET_REGISTER_IMAGE_AUG_OP(_image_adjust_lighting) MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_lighting) +.add_alias("_npx__image_random_lighting") .describe(R"code(Randomly add PCA noise. Follow the AlexNet style.)code" ADD_FILELINE) .set_attr_parser(ParamParser) .set_attr("FCompute", RandomLighting) diff --git a/src/operator/image/resize.cc b/src/operator/image/resize.cc index d93769faa8b3..d2397ea72685 100644 --- a/src/operator/image/resize.cc +++ b/src/operator/image/resize.cc @@ -34,6 +34,7 @@ namespace image { DMLC_REGISTER_PARAMETER(ResizeParam); NNVM_REGISTER_OP(_image_resize) +.add_alias("_npx__image_resize") .describe(R"code(Resize an image NDArray of shape (H x W x C) or (N x H x W x C) to the given size Example: diff --git a/src/operator/leaky_relu.cc b/src/operator/leaky_relu.cc index 214e41a84611..c25833b799d0 100644 --- a/src/operator/leaky_relu.cc +++ b/src/operator/leaky_relu.cc @@ -71,6 +71,7 @@ The following modified ReLU Activation functions are supported: .add_arguments(LeakyReLUParam::__FIELDS__()); NNVM_REGISTER_OP(LeakyReLU) +.add_alias("_npx_leaky_relu") .set_attr("FSetInputVarAttrOnCompose", [](const nnvm::NodeAttrs& attrs, nnvm::NodePtr var, const int index) { if (index == 1 && var->attrs.dict.find("__init__") == var->attrs.dict.end()) { diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc index 5b6cece4a92e..5abb6670c9b0 100644 --- a/src/operator/nn/activation.cc +++ b/src/operator/nn/activation.cc @@ -154,6 +154,7 @@ inline static bool BackwardActStorageType(const nnvm::NodeAttrs& attrs, MXNET_OPERATOR_REGISTER_UNARY(Activation) +.add_alias("_npx_activation") .describe(R"code(Applies an activation function element-wise to the input. The following activation functions are supported: diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc index 2564609c6b90..6382d46d272d 100644 --- a/src/operator/nn/batch_norm.cc +++ b/src/operator/nn/batch_norm.cc @@ -520,6 +520,7 @@ std::vector BatchNormGrad(const nnvm::NodePtr& n, } NNVM_REGISTER_OP(BatchNorm) +.add_alias("_npx_batch_norm") .describe(R"code(Batch normalization. Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc index 8fb229889332..80469b5385eb 100644 --- a/src/operator/nn/concat.cc +++ b/src/operator/nn/concat.cc @@ -32,9 +32,9 @@ namespace mxnet { namespace op { -static bool ConcatShape(const nnvm::NodeAttrs& attrs, - mxnet::ShapeVector *in_shape, - mxnet::ShapeVector *out_shape) { +bool ConcatShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_shape, + mxnet::ShapeVector *out_shape) { using namespace mshadow; const ConcatParam& param_ = nnvm::get(attrs.parsed); CHECK_EQ(in_shape->size(), static_cast(param_.num_args)); @@ -138,9 +138,9 @@ static bool RNNParamConcatShape(const nnvm::NodeAttrs& attrs, return shape_is_known(dshape); } -static bool ConcatType(const nnvm::NodeAttrs& attrs, - std::vector *in_type, - std::vector *out_type) { +bool ConcatType(const nnvm::NodeAttrs& attrs, + std::vector *in_type, + std::vector *out_type) { const ConcatParam& param_ = nnvm::get(attrs.parsed); int dtype = -1; @@ -403,6 +403,7 @@ NNVM_REGISTER_OP(_backward_Concat) // which handles the case where the first one or two inputs may have // unknown shape that can be inferred from output shape. NNVM_REGISTER_OP(_rnn_param_concat) +.add_alias("_npi_rnn_param_concat") #if MXNET_USE_MKLDNN == 1 .set_attr("FResourceRequest", [](const NodeAttrs& n) { return std::vector{ResourceRequest::kTempSpace}; diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc index 536e9a731171..32ed93e4a463 100644 --- a/src/operator/nn/convolution.cc +++ b/src/operator/nn/convolution.cc @@ -397,6 +397,7 @@ struct ConvolutionGrad { }; NNVM_REGISTER_OP(Convolution) +.add_alias("_npx_convolution") .describe(R"code(Compute *N*-D convolution on *(N+2)*-D input. In the 2-D convolution, given input data with shape *(batch_size, diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc index 09b255d009e0..9f461f4e9de3 100644 --- a/src/operator/nn/deconvolution.cc +++ b/src/operator/nn/deconvolution.cc @@ -408,6 +408,7 @@ struct DeconvolutionGrad { DMLC_REGISTER_PARAMETER(DeconvolutionParam); NNVM_REGISTER_OP(Deconvolution) +.add_alias("_npx_deconvolution") .describe("Computes 1D or 2D transposed convolution (aka fractionally strided convolution) of the " "input tensor. This operation can be seen as the gradient of Convolution operation with " "respect to its input. Convolution usually reduces the size of the input. Transposed " diff --git a/src/operator/nn/dropout.cc b/src/operator/nn/dropout.cc index 63da5613df84..29f13a4ffe97 100644 --- a/src/operator/nn/dropout.cc +++ b/src/operator/nn/dropout.cc @@ -65,6 +65,7 @@ struct DropoutGrad { DMLC_REGISTER_PARAMETER(DropoutParam); NNVM_REGISTER_OP(Dropout) +.add_alias("_npx_dropout") .describe(R"(Applies dropout operation to input array. - During training, each element of the input is set to zero with probability p. diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc index a097357ef5a3..a0e9938f6911 100644 --- a/src/operator/nn/fully_connected.cc +++ b/src/operator/nn/fully_connected.cc @@ -244,6 +244,7 @@ DMLC_REGISTER_PARAMETER(FullyConnectedParam); NNVM_REGISTER_OP(FullyConnected) MXNET_ADD_SPARSE_OP_ALIAS(FullyConnected) +.add_alias("_npx_fully_connected") .describe(R"code(Applies a linear transformation: :math:`Y = XW^T + b`. If ``flatten`` is set to be true, then the shapes are: diff --git a/src/operator/nn/layer_norm.cc b/src/operator/nn/layer_norm.cc index e95f47255d7a..0b53d5091194 100644 --- a/src/operator/nn/layer_norm.cc +++ b/src/operator/nn/layer_norm.cc @@ -127,6 +127,7 @@ void LayerNormGradCompute(const nnvm::NodeAttrs& attrs, } NNVM_REGISTER_OP(LayerNorm) +.add_alias("_npx_layer_norm") .describe(R"code(Layer normalization. Normalizes the channels of the input tensor by mean and variance, and applies a scale ``gamma`` as diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc index 870557756128..8a3e90da3e71 100644 --- a/src/operator/nn/pooling.cc +++ b/src/operator/nn/pooling.cc @@ -364,7 +364,8 @@ inline static bool BackwardPoolingStorageType(const nnvm::NodeAttrs &attrs, DMLC_REGISTER_PARAMETER(PoolingParam); NNVM_REGISTER_OP(Pooling) - .describe(R"code(Performs pooling on the input. +.add_alias("_npx_pooling") +.describe(R"code(Performs pooling on the input. The shapes for 1-D pooling are diff --git a/src/operator/nn/softmax.cc b/src/operator/nn/softmax.cc index e44bbbb6b8f6..8f1b2e06c371 100644 --- a/src/operator/nn/softmax.cc +++ b/src/operator/nn/softmax.cc @@ -68,6 +68,7 @@ inline static bool SoftmaxStorageType(const nnvm::NodeAttrs& attrs, #endif NNVM_REGISTER_OP(softmax) +.add_alias("_npx_softmax") .describe(R"code(Applies the softmax function. The resulting array contains elements in the range (0,1) and the elements along the given axis sum up to 1. @@ -182,6 +183,7 @@ NNVM_REGISTER_OP(_backward_softmin) mxnet_op::softmax_bwd, true>); NNVM_REGISTER_OP(log_softmax) +.add_alias("_npx_log_softmax") .describe(R"code(Computes the log softmax of the input. This is equivalent to computing softmax followed by log. diff --git a/src/operator/numpy/np_broadcast_reduce_op.h b/src/operator/numpy/np_broadcast_reduce_op.h new file mode 100644 index 000000000000..3e28f0ad0eca --- /dev/null +++ b/src/operator/numpy/np_broadcast_reduce_op.h @@ -0,0 +1,369 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2015 by Contributors + * \file broadcast_reduce_op.h + * \brief Function definition of broadcast and reduce operators + */ +#ifndef MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_ +#define MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_ + +#include +#include +#include "../tensor/broadcast_reduce_op.h" + +namespace mxnet { +namespace op { + +struct NumpyReduceAxesParam : public dmlc::Parameter { + dmlc::optional> axis; + dmlc::optional dtype; + bool keepdims; + dmlc::optional initial; + DMLC_DECLARE_PARAMETER(NumpyReduceAxesParam) { + DMLC_DECLARE_FIELD(axis) + .set_default(dmlc::optional>()) + .describe("Axis or axes along which a sum is performed. The default, axis=None, will sum " + "all of the elements of the input array. If axis is negative it counts from the " + "last to the first axis."); + DMLC_DECLARE_FIELD(dtype) + .add_enum("float16", mshadow::kFloat16) + .add_enum("float32", mshadow::kFloat32) + .add_enum("float64", mshadow::kFloat64) + .add_enum("int8", mshadow::kInt8) + .add_enum("int32", mshadow::kInt32) + .add_enum("int64", mshadow::kInt64) + .set_default(dmlc::optional()) + .describe("The type of the returned array and of the accumulator in which the elements are " + "summed. The dtype of a is used by default unless a has an integer dtype of less " + "precision than the default platform integer. In that case, if a is signed then " + "the platform integer is used while if a is unsigned then an unsigned integer of " + "the same precision as the platform integer is used."); + DMLC_DECLARE_FIELD(keepdims).set_default(false) + .describe("If this is set to `True`, the reduced axes are left " + "in the result as dimension with size one."); + DMLC_DECLARE_FIELD(initial).set_default(dmlc::optional()) + .describe("Starting value for the sum."); + } +}; + +struct NumpyMaxParam : public dmlc::Parameter { + dmlc::optional> axis; + bool keepdims; + dmlc::optional initial; + DMLC_DECLARE_PARAMETER(NumpyMaxParam) { + DMLC_DECLARE_FIELD(axis) + .set_default(dmlc::optional>()) + .describe("Axis or axes along which a sum is performed. The default, axis=None, will sum " + "all of the elements of the input array. If axis is negative it counts from the " + "last to the first axis."); + DMLC_DECLARE_FIELD(keepdims).set_default(false) + .describe("If this is set to `True`, the reduced axes are left " + "in the result as dimension with size one."); + DMLC_DECLARE_FIELD(initial).set_default(dmlc::optional()) + .describe("Starting value for the sum."); + } +}; + +inline TShape NumpyReduceAxesShapeImpl(const TShape& ishape, + const dmlc::optional>& axis, + bool keepdims) { + // TODO(junwu): improve the logic + // If input is a scalar, output should be a scalar too + if (ishape.ndim() == 0) { + if (axis.has_value()) { + const mxnet::Tuple& axes = axis.value(); + if (axes.ndim() > 0) { + CHECK_EQ(axes.ndim(), 1); + CHECK(axes[0] == 0 || axes[0] == -1); + } + } + return TShape(0, -1); + } + + // axis=None, do global reduction + if (!axis.has_value()) { + if (keepdims) { + return TShape(ishape.ndim(), 1); + } else { + return TShape(0, -1); + } + } + + // axis = (), will return identity(input) + if (axis.value().ndim() == 0) { + return ishape; + } + + // axis has value + mxnet::Tuple axes(axis.value()); + for (index_t i = 0; i < axes.ndim(); i++) { + if (axes[i] < 0) { + axes[i] += ishape.ndim(); + } + } + std::sort(axes.begin(), axes.end()); + + for (index_t i = 1; i < axes.ndim(); i++) { + CHECK_LT(axes[i-1], axes[i]) + << "Reduction axes have duplicates " + << axes; + } + CHECK_LT(axes[axes.ndim()-1], ishape.ndim()) + << "Reduction axis " << axes[axes.ndim()-1] + << " Exceeds input dimensions " << ishape; + CHECK_GE(axes[0], 0) + << "Reduction axis " << axis.value() + << " Exceeds input dimensions " << ishape; + + TShape oshape; + if (keepdims) { + oshape = TShape(ishape); + } else { + oshape = TShape(ishape.ndim() - axes.ndim(), -1); + } + + if (keepdims) { + for (index_t i = 0; i < axes.ndim(); ++i) { + oshape[axes[i]] = 1; + } + } else { + for (index_t i = 0, j = 0, k = 0; i < ishape.ndim(); ++i) { + if (j < axes.ndim() && i == axes[j]) { + ++j; + continue; + } + oshape[k++] = ishape[i]; + } + } + return oshape; +} + +inline bool NumpyReduceAxesShape(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + if (!shape_is_known(in_attrs->at(0))) { + return false; + } + const NumpyReduceAxesParam& param = nnvm::get(attrs.parsed); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, + NumpyReduceAxesShapeImpl((*in_attrs)[0], param.axis, param.keepdims)); + return shape_is_known(out_attrs->at(0)); +} + +inline bool NumpyMaxShape(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + if (!shape_is_known(in_attrs->at(0))) { + return false; + } + const NumpyMaxParam& param = nnvm::get(attrs.parsed); + // check the case where the reduction axis should not be zero + bool is_all_reducded_axes_not_zero = true; + const TShape& ishape = (*in_attrs)[0]; + if (param.axis.has_value()) { + const mxnet::Tuple& axes = param.axis.value(); + for (int i = 0; i < axes.ndim(); ++i) { + if (ishape[axes[i]] == 0) { + is_all_reducded_axes_not_zero = false; + break; + } + } + } else { + if (ishape.Size() == 0) { + // global reduction should excuted only when input have size more than 0 + is_all_reducded_axes_not_zero = false; + } + } + CHECK(is_all_reducded_axes_not_zero) + << "zero-size array to reduction operation maximum which has no identity"; + SHAPE_ASSIGN_CHECK(*out_attrs, 0, + NumpyReduceAxesShapeImpl((*in_attrs)[0], param.axis, param.keepdims)); + return shape_is_known(out_attrs->at(0)); +} + +template +inline bool NeedSafeAcc(int itype, int otype) { + bool rule = (itype != otype) || (itype != mshadow::kFloat32 && itype != mshadow::kFloat64); + return safe_acc_hint && rule; +} + +template +void NumpyReduceAxesCompute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + const NumpyReduceAxesParam& param = nnvm::get(attrs.parsed); + if (param.initial.has_value()) { + LOG(FATAL) << "initial is not supported yet"; + } + if (outputs[0].shape_.Size() == 0U) return; // zero-size tensor + if (param.axis.has_value() && param.axis.value().ndim() == 0) { + UnaryOp::IdentityCompute(attrs, ctx, inputs, req, outputs); + } + TShape small; + if (param.keepdims) { + small = outputs[0].shape_; + } else { + small = NumpyReduceAxesShapeImpl(inputs[0].shape_, param.axis, true); + } + + if (NeedSafeAcc(inputs[0].type_flag_, outputs[0].type_flag_)) { + ReduceAxesComputeImpl(ctx, inputs, req, outputs, small); + } else { + ReduceAxesComputeImpl(ctx, inputs, req, outputs, small); + } +} + +template +void NumpyMaxCompute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + const NumpyMaxParam& param = nnvm::get(attrs.parsed); + if (param.initial.has_value()) { + LOG(FATAL) << "initial is not supported yet"; + } + if (inputs[0].shape_.Size() == 0U || outputs[0].shape_.Size() == 0U) return; // zero-size tensor + if (param.axis.has_value() && param.axis.value().ndim() == 0) { + UnaryOp::IdentityCompute(attrs, ctx, inputs, req, outputs); + } + TShape small; + if (param.keepdims) { + small = outputs[0].shape_; + } else { + small = NumpyReduceAxesShapeImpl(inputs[0].shape_, param.axis, true); + } + ReduceAxesComputeImpl(ctx, inputs, req, outputs, small); +} + +template +inline void NumpyReduceAxesBackwardUseNone(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mshadow::expr; + const NumpyReduceAxesParam& param = nnvm::get(attrs.parsed); + TShape small; + if (param.keepdims) { + small = inputs[0].shape_; + } else { + small = NumpyReduceAxesShapeImpl(outputs[0].shape_, param.axis, true); + } + + BroadcastComputeImpl(attrs, ctx, inputs, req, outputs, small); + if (normalize) { + Stream *s = ctx.get_stream(); + MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, IType, { + Tensor igrad = outputs[0].FlatTo1D(s); + igrad /= scalar(outputs[0].Size()/inputs[0].Size()); + }); + } +} + +template +void NumpyMaxBackward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mshadow::expr; + const NumpyMaxParam& param = nnvm::get(attrs.parsed); + TShape small; + if (param.keepdims) { + small = inputs[0].shape_; + } else { + small = NumpyReduceAxesShapeImpl(outputs[0].shape_, param.axis, true); + } + ReduceAxesBackwardUseInOutImpl(ctx, small, inputs, req, outputs); +} + +template +void NumpyReduceAxesBackwardUseInOut(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mshadow::expr; + const NumpyReduceAxesParam& param = nnvm::get(attrs.parsed); + TShape small; + if (param.keepdims) { + small = inputs[0].shape_; + } else { + small = NumpyReduceAxesShapeImpl(outputs[0].shape_, param.axis, true); + } + ReduceAxesBackwardUseInOutImpl(ctx, small, inputs, req, outputs); +} + +template +void NumpyBroadcastToForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + if (outputs[0].shape_.Size() == 0U) return; // zero-size tensor + TShape expanded_ishape(outputs[0].shape_.ndim(), 1); + const TShape& ishape = inputs[0].shape_; + CHECK_LE(ishape.ndim(), expanded_ishape.ndim()) << "output ndim cannot be less than input ndim"; + const int ndim_delta = expanded_ishape.ndim() - ishape.ndim(); + for (int i = 0; i < ishape.ndim(); ++i) { + expanded_ishape[i + ndim_delta] = ishape[i]; + } + BroadcastComputeImpl(attrs, ctx, {inputs[0].reshape(expanded_ishape)}, + req, outputs, expanded_ishape); +} + +template +void NumpyBroadcastToBackward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + TShape expanded_igrad_shape(inputs[0].shape_.ndim(), 1); + const TShape& igrad_shape = outputs[0].shape_; + CHECK_LE(igrad_shape.ndim(), expanded_igrad_shape.ndim()) + << "output ndim cannot be less than input ndim"; + const int ndim_delta = expanded_igrad_shape.ndim() - igrad_shape.ndim(); + for (int i = 0; i < igrad_shape.ndim(); ++i) { + expanded_igrad_shape[i + ndim_delta] = igrad_shape[i]; + } + if (NeedSafeAcc(inputs[0].type_flag_, outputs[0].type_flag_)) { + ReduceAxesComputeImpl( + ctx, inputs, req, {outputs[0].reshape(expanded_igrad_shape)}, expanded_igrad_shape); + } else { + ReduceAxesComputeImpl( + ctx, inputs, req, {outputs[0].reshape(expanded_igrad_shape)}, expanded_igrad_shape); + } +} + +} // namespace op +} // namespace mxnet +#endif // MXNET_OPERATOR_NUMPY_NP_BROADCAST_REDUCE_OP_H_ diff --git a/src/operator/numpy/np_broadcast_reduce_op_index.cc b/src/operator/numpy/np_broadcast_reduce_op_index.cc new file mode 100644 index 000000000000..bd6915cc9b27 --- /dev/null +++ b/src/operator/numpy/np_broadcast_reduce_op_index.cc @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_broadcast_reduce_op_index.cc + * \brief CPU Implementation of broadcast and reduce functions based on index. + */ +#include "./np_broadcast_reduce_op.h" + +namespace mxnet { +namespace op { + +bool NumpyReduceAxisShape(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + if (!shape_is_known(in_attrs->at(0))) { + return false; + } + const ReduceAxisParam& param = nnvm::get(attrs.parsed); + dmlc::optional> axes; + if (param.axis.has_value()) { + mxnet::Tuple t({param.axis.value()}); + axes = dmlc::optional>(t); + } + SHAPE_ASSIGN_CHECK(*out_attrs, 0, + NumpyReduceAxesShapeImpl((*in_attrs)[0], axes, param.keepdims)); + return shape_is_known(out_attrs->at(0)); +} + +NNVM_REGISTER_OP(_npi_argmax) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", NumpyReduceAxisShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.add_argument("data", "NDArray-or-Symbol", "The input") +.set_attr("FCompute", SearchAxisCompute) +.set_attr("FGradient", MakeZeroGradNodes) +.add_arguments(ReduceAxisParam::__FIELDS__()); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_broadcast_reduce_op_index.cu b/src/operator/numpy/np_broadcast_reduce_op_index.cu new file mode 100644 index 000000000000..aae66a6d660a --- /dev/null +++ b/src/operator/numpy/np_broadcast_reduce_op_index.cu @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_broadcast_reduce_op_index.cu + * \brief GPU Implementation of reduce functions. + */ +#include "np_broadcast_reduce_op.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_npi_argmax) +.set_attr("FCompute", SearchAxisCompute); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cc b/src/operator/numpy/np_broadcast_reduce_op_value.cc new file mode 100644 index 000000000000..9cf5c215dee3 --- /dev/null +++ b/src/operator/numpy/np_broadcast_reduce_op_value.cc @@ -0,0 +1,242 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_broadcast_reduce_op_value.cc + * \brief CPU Implementation of broadcast and reduce functions based on value. + */ + +#include "np_broadcast_reduce_op.h" + +namespace mxnet { +namespace op { + +DMLC_REGISTER_PARAMETER(NumpyReduceAxesParam); +DMLC_REGISTER_PARAMETER(NumpyMaxParam); + +inline bool NumpySumType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + const NumpyReduceAxesParam ¶m = nnvm::get(attrs.parsed); + + if (param.dtype.has_value()) { + TYPE_ASSIGN_CHECK(*out_attrs, 0, param.dtype.value()); + } else { + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); + } + + return out_attrs->at(0) != -1 && in_attrs->at(0) != -1; +} + +NNVM_REGISTER_OP(_np_sum) +.describe(R"code()code" ADD_FILELINE) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", NumpyReduceAxesShape) +.set_attr("FInferType", NumpySumType) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.add_argument("a", "NDArray-or-Symbol", "The input") +.add_arguments(NumpyReduceAxesParam::__FIELDS__()) +.set_attr("FCompute", NumpyReduceAxesCompute) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_np_sum"}); + +NNVM_REGISTER_OP(_backward_np_sum) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("TIsBackward", true) +.set_num_inputs(1) +.set_attr("FCompute", NumpyReduceAxesBackwardUseNone); + +inline bool IsIntType(const int dtype) { + return (dtype == mshadow::kUint8 || + dtype == mshadow::kInt32 || + dtype == mshadow::kInt8 || + dtype == mshadow::kInt64); +} + +inline bool NumpyMeanType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + const NumpyReduceAxesParam ¶m = nnvm::get(attrs.parsed); + + if (param.dtype.has_value()) { + if (IsIntType(in_attrs->at(0)) && !IsIntType(param.dtype.value())) { + LOG(FATAL) << "Output cannot be float type when input is integer type for now"; + } + TYPE_ASSIGN_CHECK(*out_attrs, 0, param.dtype.value()); + } else { + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); + } + + return out_attrs->at(0) != -1 && in_attrs->at(0) != -1; +} + +NNVM_REGISTER_OP(_npi_mean) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", NumpyReduceAxesShape) +.set_attr("FInferType", NumpyMeanType) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.add_argument("a", "NDArray-or-Symbol", "The input") +.add_arguments(NumpyReduceAxesParam::__FIELDS__()) +.set_attr("FCompute", NumpyReduceAxesCompute) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_np_mean"}); + +NNVM_REGISTER_OP(_backward_np_mean) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("TIsBackward", true) +.set_num_inputs(1) +.set_attr("FCompute", NumpyReduceAxesBackwardUseNone); + +inline bool NumpyMaxType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); + + return out_attrs->at(0) != -1 && in_attrs->at(0) != -1; +} + +NNVM_REGISTER_OP(_np_max) +.add_alias("_np_amax") +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", NumpyMaxShape) +.set_attr("FInferType", NumpyMaxType) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.add_argument("a", "NDArray-or-Symbol", "The input") +.add_arguments(NumpyMaxParam::__FIELDS__()) +.set_attr("FCompute", NumpyMaxCompute) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FGradient", ReduceGrad{"_backward_np_max"}); + +NNVM_REGISTER_OP(_backward_np_max) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("TIsBackward", true) +.set_num_inputs(3) +.set_attr("FCompute", NumpyMaxBackward); + +NNVM_REGISTER_OP(_np_prod) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", NumpyReduceAxesShape) +.set_attr("FInferType", NumpySumType) +.add_arguments(NumpyReduceAxesParam::__FIELDS__()) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.add_argument("a", "NDArray-or-Symbol", "The input") +.set_attr("FCompute", NumpyReduceAxesCompute) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FGradient", ReduceGrad{"_backward_np_prod"}); + +NNVM_REGISTER_OP(_backward_np_prod) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("TIsBackward", true) +.set_attr("FCompute", NumpyReduceAxesBackwardUseInOut); + +bool NumpyBroadcastToShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_attrs, + mxnet::ShapeVector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + mxnet::TShape& ishape = (*in_attrs)[0]; + if (!mxnet::shape_is_known(ishape)) return false; + const BroadcastToParam& param = nnvm::get(attrs.parsed); + CHECK(mxnet::shape_is_known(param.shape)) + << "the objective shape for broadcasting array must be known"; + CHECK_LE(ishape.ndim(), param.shape.ndim()) + << "shape " << ishape << " is not broadcastable to " << param.shape; + for (int i = param.shape.ndim() - 1; i >= 0; --i) { + int j = i - param.shape.ndim() + ishape.ndim(); + if (j < 0) break; + CHECK(ishape[j] == param.shape[i] || ishape[j] == 1) + << "shape " << ishape << " is not broadcastable to " << param.shape; + } + SHAPE_ASSIGN_CHECK(*out_attrs, 0, param.shape); + return true; +} + +NNVM_REGISTER_OP(_np_broadcast_to) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, + const std::vector& ograds) { + return MakeNonlossGradNode("_backward_np_broadcast_to", n, ograds, {}, n->attrs.dict); + }) +.add_argument("array", "NDArray-or-Symbol", "The input") +.set_attr_parser(ParamParser) +.add_arguments(BroadcastToParam::__FIELDS__()) +.set_attr("FInferShape", NumpyBroadcastToShape) +.set_attr("FCompute", NumpyBroadcastToForward); + +NNVM_REGISTER_OP(_backward_np_broadcast_to) +.set_attr_parser(ParamParser) +.set_attr("TIsBackward", true) +.set_attr("FCompute", NumpyBroadcastToBackward) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_broadcast_reduce_op_value.cu b/src/operator/numpy/np_broadcast_reduce_op_value.cu new file mode 100644 index 000000000000..6e18ebc68a26 --- /dev/null +++ b/src/operator/numpy/np_broadcast_reduce_op_value.cu @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_broadcast_reduce_op_value.cu + * \brief GPU Implementation of reduce functions based on value. + */ +#include "np_broadcast_reduce_op.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_np_sum) +.set_attr("FCompute", NumpyReduceAxesCompute); + +NNVM_REGISTER_OP(_backward_np_sum) +.set_attr("FCompute", NumpyReduceAxesBackwardUseNone); + +NNVM_REGISTER_OP(_npi_mean) +.set_attr("FCompute", NumpyReduceAxesCompute); + +NNVM_REGISTER_OP(_backward_np_mean) +.set_attr("FCompute", NumpyReduceAxesBackwardUseNone); + +NNVM_REGISTER_OP(_np_max) +.set_attr("FCompute", NumpyMaxCompute); + +NNVM_REGISTER_OP(_backward_np_max) +.set_attr("FCompute", NumpyMaxBackward); + +NNVM_REGISTER_OP(_np_prod) +.set_attr("FCompute", NumpyReduceAxesCompute); + +NNVM_REGISTER_OP(_backward_np_prod) +.set_attr("FCompute", NumpyReduceAxesBackwardUseInOut); + +NNVM_REGISTER_OP(_np_broadcast_to) +.set_attr("FCompute", NumpyBroadcastToForward); + +NNVM_REGISTER_OP(_backward_np_broadcast_to) +.set_attr("FCompute", NumpyBroadcastToBackward); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_cumsum-inl.h b/src/operator/numpy/np_cumsum-inl.h new file mode 100644 index 000000000000..a9d2d8b43681 --- /dev/null +++ b/src/operator/numpy/np_cumsum-inl.h @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_cumsum-inl.h + * \brief Function definition of numpy-compatible cumsum operator + */ + +#ifndef MXNET_OPERATOR_NUMPY_NP_CUMSUM_INL_H_ +#define MXNET_OPERATOR_NUMPY_NP_CUMSUM_INL_H_ + +#include +#include +#include +#include "../mxnet_op.h" +#include "../operator_common.h" +#include "../elemwise_op_common.h" + +namespace mxnet { +namespace op { + +struct CumsumParam : public dmlc::Parameter { + dmlc::optional axis; + dmlc::optional dtype; + DMLC_DECLARE_PARAMETER(CumsumParam) { + DMLC_DECLARE_FIELD(axis) + .set_default(dmlc::optional()) + .describe("Axis along which the cumulative sum is computed." + " The default (None) is to compute the cumsum over the flattened array."); + DMLC_DECLARE_FIELD(dtype) + .add_enum("float16", mshadow::kFloat16) + .add_enum("float32", mshadow::kFloat32) + .add_enum("float64", mshadow::kFloat64) + .add_enum("int8", mshadow::kInt8) + .add_enum("int32", mshadow::kInt32) + .add_enum("int64", mshadow::kInt64) + .set_default(dmlc::optional()) + .describe("Type of the returned array and of the accumulator in which the elements" + " are summed. If dtype is not specified, it defaults to the dtype of a," + " unless a has an integer dtype with a precision less than that of the" + " default platform integer. In that case, the default platform integer is used."); + } +}; + +struct cumsum_forward { + template + MSHADOW_XINLINE static void Map(int i, + OType *out, + const IType *in, + const int middle, + const int trailing) { + int left = i / trailing, right = i % trailing; + int offset = left * middle * trailing + right; + const IType *lane_in = in + offset; + OType *lane_out = out + offset; + lane_out[0] = OType(lane_in[0]); + for (int j = 1; j < middle; ++j) { + lane_out[j * trailing] = lane_out[(j - 1) * trailing] + OType(lane_in[j * trailing]); + } + } +}; + +template +void CumsumForwardImpl(const OpContext& ctx, + const TBlob& in, + const TBlob& out, + const dmlc::optional& axis) { + using namespace mshadow; + using namespace mxnet_op; + + int middle = axis.has_value() ? out.shape_[axis.value()] : out.Size(); + if (middle == 0 || out.Size() == 0) return; + int trailing = 1; + if (axis.has_value()) { + for (int i = axis.value() + 1; i < out.shape_.ndim(); ++i) { + trailing *= out.shape_[i]; + } + } + + Stream *s = ctx.get_stream(); + MSHADOW_TYPE_SWITCH(in.type_flag_, IType, { + MSHADOW_TYPE_SWITCH(out.type_flag_, OType, { + Kernel::Launch( + s, out.Size() / middle, out.dptr(), + in.dptr(), middle, trailing); + }); + }); +} + +template +void CumsumForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mxnet_op; + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + const CumsumParam ¶m = nnvm::get(attrs.parsed); + + CumsumForwardImpl(ctx, inputs[0], outputs[0], param.axis); +} + +struct cumsum_backward { + template + MSHADOW_XINLINE static void Map(int i, + IType *igrad, + const OType *ograd, + const int middle, + const int trailing) { + int left = i / trailing, right = i % trailing; + int offset = left * middle * trailing + right; + const OType *lane_ograd = ograd + offset; + IType *lane_igrad = igrad + offset; + lane_igrad[(middle - 1) * trailing] = IType(lane_ograd[(middle - 1) * trailing]); + for (int j = middle - 2; j >= 0; --j) { + lane_igrad[j * trailing] = lane_igrad[(j + 1) * trailing] + IType(lane_ograd[j * trailing]); + } + } +}; + +template +void CumsumBackwardImpl(const OpContext& ctx, + const TBlob& ograd, + const TBlob& igrad, + const dmlc::optional& axis) { + using namespace mshadow; + using namespace mxnet_op; + int middle = axis.has_value() ? igrad.shape_[axis.value()] : igrad.Size(); + if (middle == 0 || igrad.Size() == 0) return; + int trailing = 1; + if (axis.has_value()) { + for (int i = axis.value() + 1; i < igrad.shape_.ndim(); ++i) { + trailing *= igrad.shape_[i]; + } + } + Stream *s = ctx.get_stream(); + MSHADOW_TYPE_SWITCH(igrad.type_flag_, IType, { + MSHADOW_TYPE_SWITCH(ograd.type_flag_, OType, { + Kernel::Launch( + s, igrad.Size() / middle, igrad.dptr(), + ograd.dptr(), middle, trailing); + }); + }); +} + +template +void CumsumBackward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mxnet_op; + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + const CumsumParam ¶m = nnvm::get(attrs.parsed); + + CumsumBackwardImpl(ctx, inputs[0], outputs[0], param.axis); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_NP_CUMSUM_INL_H_ diff --git a/src/operator/numpy/np_cumsum.cc b/src/operator/numpy/np_cumsum.cc new file mode 100644 index 000000000000..8f16f25234ba --- /dev/null +++ b/src/operator/numpy/np_cumsum.cc @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_cumsum.cc + * \brief CPU implementation of numpy-compatible cumsum operator + */ + +#include "./np_cumsum-inl.h" + +namespace mxnet { +namespace op { + +inline bool CumsumShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_attrs, + mxnet::ShapeVector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + const CumsumParam ¶m = nnvm::get(attrs.parsed); + + if (param.axis.has_value()) { + return ElemwiseShape<1, 1>(attrs, in_attrs, out_attrs); + } else { + TShape out_shape(1, in_attrs->at(0).Size()); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, out_shape); + return shape_is_known(out_attrs->at(0)); + } +} + +inline bool CumsumType(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + const CumsumParam ¶m = nnvm::get(attrs.parsed); + + if (param.dtype.has_value()) { + TYPE_ASSIGN_CHECK(*out_attrs, 0, param.dtype.value()); + } else { + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); + } + + return out_attrs->at(0) != -1 && in_attrs->at(0) != -1; +} + +DMLC_REGISTER_PARAMETER(CumsumParam); + +NNVM_REGISTER_OP(_np_cumsum) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.set_attr("FInferShape", CumsumShape) +.set_attr("FInferType", CumsumType) +.set_attr("FCompute", CumsumForward) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_np_cumsum"}) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}}; + }) +.add_argument("a", "NDArray-or-Symbol", "Input ndarray") +.add_arguments(CumsumParam::__FIELDS__()); + +NNVM_REGISTER_OP(_backward_np_cumsum) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("TIsBackward", true) +.set_attr("FCompute", CumsumBackward); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_cumsum.cu b/src/operator/numpy/np_cumsum.cu new file mode 100644 index 000000000000..cc574ebf72c5 --- /dev/null +++ b/src/operator/numpy/np_cumsum.cu @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_cumsum.cu + * \brief GPU implementation of numpy-compatible cumsum operator + */ + +#include "./np_cumsum-inl.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_np_cumsum) +.set_attr("FCompute", CumsumForward); + +NNVM_REGISTER_OP(_backward_np_cumsum) +.set_attr("FCompute", CumsumBackward); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_dot-inl.h b/src/operator/numpy/np_dot-inl.h new file mode 100644 index 000000000000..fa67c07b45f6 --- /dev/null +++ b/src/operator/numpy/np_dot-inl.h @@ -0,0 +1,275 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_dot-inl.h + * \brief Function definition of matrix numpy-compatible dot operator + */ + +#ifndef MXNET_OPERATOR_NUMPY_NP_DOT_INL_H_ +#define MXNET_OPERATOR_NUMPY_NP_DOT_INL_H_ + +#include +#include +#include "../tensor/dot-inl.h" +#include "../tensor/elemwise_binary_op.h" +#include "../tensor/broadcast_reduce_op.h" + +namespace mxnet { +namespace op { + +template +inline void MMImpl(const OpContext& ctx, + const TBlob& a, + const TBlob& b, + const TBlob& out, + const OpReqType req, + const bool trans_a = false, + const bool trans_b = false) { + using namespace mshadow; + using namespace mshadow_op; + + Stream *s = ctx.get_stream(); + index_t ma, na, mb, nb; + na = a.size(a.ndim() - 1); + ma = a.Size() / na; + mb = b.size(0); + nb = b.Size() / mb; + MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, DType, { + Tensor input0 = a.get_with_shape(Shape2(ma, na), s); + Tensor input1 = b.get_with_shape(Shape2(mb, nb), s); + Tensor output0; + if (trans_a && trans_b) { + output0 = out.get_with_shape(Shape2(na, mb), s); + ASSIGN_DISPATCH(output0, req, dot(input0.T(), input1.T())); + } else if (!trans_a && trans_b) { + output0 = out.get_with_shape(Shape2(ma, mb), s); + ASSIGN_DISPATCH(output0, req, dot(input0, input1.T())); + } else if (trans_a && !trans_b) { + output0 = out.get_with_shape(Shape2(na, nb), s); + ASSIGN_DISPATCH(output0, req, dot(input0.T(), input1)); + } else { + output0 = out.get_with_shape(Shape2(ma, nb), s); + ASSIGN_DISPATCH(output0, req, dot(input0, input1)); + } + }); +} + +template +struct scalar_mul_kernel { + template + MSHADOW_XINLINE static void Map(int i, DType *out, const DType* tensor, const DType *scalar) { + KERNEL_ASSIGN(out[i], req, tensor[i] * scalar[0]); + } +}; + +template +inline void NumpyDotForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mxnet_op; + + CHECK_EQ(inputs.size(), 2U); + CHECK_EQ(outputs.size(), 1U); + + if (req[0] == kNullOp) return; + const TBlob& a = inputs[0]; + const TBlob& b = inputs[1]; + const TBlob& out = outputs[0]; + if (out.shape_.Size() == 0U) return; // zero-size tensor, no need to launch kernel + const mxnet::TShape a_shape = a.shape_; + const mxnet::TShape b_shape = b.shape_; + + Stream *s = ctx.get_stream(); + CHECK_EQ(out.type_flag_, a.type_flag_) + << "Binary function only support input/output with the same type"; + CHECK_EQ(out.type_flag_, b.type_flag_) + << "Binary function only support input/output with the same type"; + CHECK(out.type_flag_ == kFloat32 || out.type_flag_ == kFloat64 || + (out.type_flag_ == kFloat16 && ctx.run_ctx.ctx.dev_mask() == mshadow::gpu::kDevMask)) + << "dot only supports float32/float64 for CPU, and float16/float32/float64 for GPU"; + MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, DType, { + if (a_shape.Size() == 0U || b_shape.Size() == 0U) { + if (req[0] != kAddTo) { + Tensor out_data = out.get_with_shape( + Shape1(out.shape_.Size()), s); + out_data = static_cast(0); + } + } else if (a_shape.ndim() == 1 && b_shape.ndim() == 1) { + // Case 1: both 1-D arrays, inner product of vectors + if (out.type_flag_ == kFloat16) { + MMImpl(ctx, a, b, out, req[0]); + } else { + CHECK_NE(req[0], kAddTo) << "AddTo not yet supported"; + Tensor mock_1d = out.get_with_shape(Shape1(1), s); + VectorDot(mock_1d, a.get(s), b.get(s)); + } + } else if (a_shape.ndim() == 2 && b_shape.ndim() == 2) { + // Case 2: both 2-D arrays, matrix multiplication + MMImpl(ctx, a, b, out, req[0]); + } else if (a_shape.ndim() == 0 && b_shape.ndim() == 0) { + // Case 3: both 0-D scalars, equivalent to multiply + Tensor a_data = a.get_with_shape(Shape1(1), s); + Tensor b_data = b.get_with_shape(Shape1(1), s); + Tensor out_data = out.get_with_shape(Shape1(1), s); + ASSIGN_DISPATCH(out_data, req[0], a_data * b_data); + } else if (a_shape.ndim() == 0 || b_shape.ndim() == 0) { + const DType* tensor = (a_shape.ndim() == 0) ? b.dptr() : a.dptr(); + const DType* scalar = (a_shape.ndim() == 0) ? a.dptr() : b.dptr(); + // Case 3.5: either of them is a scalar, just scale by one of them + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { + Kernel, xpu>::Launch( + s, out.Size(), out.dptr(), tensor, scalar); + }); + } else if (a_shape.ndim() == 1 || b_shape.ndim() == 1) { + // Case 4: a is N-D array and b is 1-D array, sum product over the last axis + MMImpl(ctx, a, b, out, req[0]); + } else { + // Case 5: a is N-D array and b is M-D array, sum product over the last axis + // of a and the 2nd-to-last axis of b + // TODO(haojin2): To be implemented... + if (b_shape.ndim() != 2) { + LOG(FATAL) << "Only support case 5 when b.ndim = 2"; + } + MMImpl(ctx, a, b, out, req[0]); + } + }); +} + +template +inline void NumpyDotBackward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mshadow_op; + + CHECK_EQ(inputs.size(), 3U); + CHECK_EQ(outputs.size(), 2U); + + const TBlob& ograd = inputs[0]; + if (ograd.shape_.Size() == 0U) return; + const TBlob& a = inputs[1]; + const TBlob& b = inputs[2]; + const TBlob& grad_a = outputs[0]; + const TBlob& grad_b = outputs[1]; + const mxnet::TShape a_shape = a.shape_; + const mxnet::TShape b_shape = b.shape_; + if (a_shape.Size() == 0U || b_shape.Size() == 0U) return; + + Stream *s = ctx.get_stream(); + MSHADOW_REAL_TYPE_SWITCH(ograd.type_flag_, DType, { + if (a_shape.ndim() == 1 && b_shape.ndim() == 1) { + // Case 1: both 1-D arrays, inner product of vectors + Tensor out_grad = ograd.get_with_shape(Shape1(1), s); + Tensor a_data = a.get(s); + Tensor b_data = b.get(s); + Tensor a_grad = grad_a.get(s); + Tensor b_grad = grad_b.get(s); + ASSIGN_DISPATCH(b_grad, req[1], + broadcast_scalar(out_grad, a_data.shape_) * a_data); + ASSIGN_DISPATCH(a_grad, req[0], + broadcast_scalar(out_grad, a_data.shape_) * b_data); + } else if (a_shape.ndim() == 2 && b_shape.ndim() == 2) { + // Case 2: both 2-D arrays, matrix multiplication + MMImpl(ctx, a, ograd, grad_b, req[1], true, false); + MMImpl(ctx, ograd, b, grad_a, req[0], false, true); + } else if (a_shape.ndim() == 0 && b_shape.ndim() == 0) { + // Case 3: both 0-D scalars, equivalent to multiply + Tensor out_grad = ograd.get_with_shape(Shape1(1), s); + Tensor a_data = a.get_with_shape(Shape1(1), s); + Tensor b_data = b.get_with_shape(Shape1(1), s); + Tensor a_grad = grad_a.get_with_shape(Shape1(1), s); + Tensor b_grad = grad_b.get_with_shape(Shape1(1), s); + ASSIGN_DISPATCH(a_grad, req[0], b_data * out_grad); + ASSIGN_DISPATCH(b_grad, req[1], a_data * out_grad); + } else if (a_shape.ndim() == 0 || b_shape.ndim() == 0) { + // Case 3.5: either of them is a scalar, just scale by one of them + const TBlob& tensor = (a_shape.ndim() == 0) ? b : a; + const TBlob& tensor_grad = (a_shape.ndim() == 0) ? grad_b : grad_a; + const TBlob& scalar = (a_shape.ndim() == 0) ? a : b; + const TBlob& scalar_grad = (a_shape.ndim() == 0) ? grad_a : grad_b; + Tensor scalar_ = scalar.get_with_shape(Shape1(1), s); + Tensor scalar_grad_ = scalar_grad.get_with_shape(Shape1(1), s); + Tensor tensor_ = tensor.FlatTo1D(s); + Tensor tensor_grad_ = tensor_grad.FlatTo1D(s); + Tensor ograd_ = ograd.FlatTo1D(s); + const OpReqType& tensor_req = (a_shape.ndim() == 0) ? req[1] : req[0]; + const OpReqType& scalar_req = (a_shape.ndim() == 0) ? req[0] : req[1]; + ASSIGN_DISPATCH(tensor_grad_, tensor_req, + broadcast_scalar(scalar_, tensor_grad_.shape_) * ograd_); + // TODO(haojin2): Get rid of temporary space. + Tensor temp_space = + ctx.requested[0].get_space_typed(Shape1(ograd.shape_.Size()), s); + ASSIGN_DISPATCH(temp_space, kWriteTo, tensor_ * ograd_); + + ReduceAxesComputeImpl( + ctx, {TBlob(temp_space)}, {scalar_req}, {TBlob(scalar_grad_)}, scalar_grad_.shape_); + } else if (b_shape.ndim() == 1) { + size_t na = a_shape[a_shape.ndim() - 1]; + size_t ma = a_shape.Size() / na; + Tensor a_ = + a.get_with_shape(Shape2(ma, na), s); + Tensor b_ = + b.get_with_shape(Shape2(b_shape.Size(), 1), s); + Tensor grad_a_ = + grad_a.get_with_shape(Shape2(ma, na), s); + Tensor grad_b_ = + grad_b.get_with_shape(Shape2(b_shape.Size(), 1), s); + Tensor ograd_ = + ograd.get_with_shape(Shape2(ograd.shape_.Size(), 1), s); + // Case 4: a is N-D array and b is 1-D array, sum product over the last axis + MMImpl(ctx, TBlob(a_), TBlob(ograd_), TBlob(grad_b_), req[1], true, false); + MMImpl(ctx, TBlob(ograd_), TBlob(b_), TBlob(grad_a_), req[0], false, true); + } else { + // Case 5: a is N-D array and b is M-D array, sum product over the last axis + // of a and the 2nd-to-last axis of b + // TODO(haojin2): To be implemented... + if (b_shape.ndim() != 2) { + LOG(FATAL) << "Only support case 5 when b.ndim = 2"; + } else { // a is N-D, b is 2D + index_t na = a_shape[a_shape.ndim() - 1]; + index_t ma = a_shape.Size() / na; + index_t nograd = ograd.shape_[ograd.shape_.ndim() - 1]; + index_t mograd = ograd.shape_.Size() / nograd; + + Tensor a_2d = + a.get_with_shape(Shape2(ma, na), s); + Tensor grad_a_2d = + grad_a.get_with_shape(Shape2(ma, na), s); + Tensor b_2d = b.FlatTo2D(s); + Tensor grad_b_2d = grad_b.FlatTo2D(s); + Tensor ograd_2d = + ograd.get_with_shape(Shape2(mograd, nograd), s); + + MMImpl(ctx, TBlob(a_2d), TBlob(ograd_2d), TBlob(grad_b_2d), req[1], true, false); + MMImpl(ctx, TBlob(ograd_2d), TBlob(b_2d), TBlob(grad_a_2d), req[0], false, true); + } + } + }); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_NP_DOT_INL_H_ diff --git a/src/operator/numpy/np_dot.cc b/src/operator/numpy/np_dot.cc new file mode 100644 index 000000000000..627e68877998 --- /dev/null +++ b/src/operator/numpy/np_dot.cc @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_dot.cc + * \brief CPU Implementation of numpy-compatible dot + */ + +#include "./np_dot-inl.h" + +namespace mxnet { +namespace op { + +inline bool NumpyDotShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_attrs, + mxnet::ShapeVector *out_attrs) { + CHECK_EQ(in_attrs->size(), 2U); + CHECK_EQ(out_attrs->size(), 1U); + + const mxnet::TShape& a_shape = in_attrs->at(0); + const mxnet::TShape& b_shape = in_attrs->at(1); + + if (!ndim_is_known(a_shape) || !ndim_is_known(b_shape)) { + return false; + } + + if (a_shape.ndim() == 1 && b_shape.ndim() == 1) { + // Case 1: both 1-D arrays, inner product of vectors + SHAPE_ASSIGN_CHECK(*in_attrs, 0, in_attrs->at(1)); + SHAPE_ASSIGN_CHECK(*in_attrs, 1, in_attrs->at(0)); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(0, 0)); + } else if (a_shape.ndim() == 2 && b_shape.ndim() == 2) { + // Case 2: both 2-D arrays, matrix multiplication + mxnet::TShape tmp_shape(2, -1); + tmp_shape[1] = b_shape[0]; + SHAPE_ASSIGN_CHECK(*in_attrs, 0, tmp_shape); + + tmp_shape[0] = a_shape[1]; + tmp_shape[1] = -1; + SHAPE_ASSIGN_CHECK(*in_attrs, 1, tmp_shape); + + tmp_shape[0] = a_shape[0]; + tmp_shape[1] = b_shape[1]; + SHAPE_ASSIGN_CHECK(*out_attrs, 0, tmp_shape); + } else if (a_shape.ndim() == 0 || b_shape.ndim() == 0) { + // Case 3 + 3.5: either of them is a scalar, just scale by one of them + mxnet::TShape oshape = (a_shape.ndim() == 0) ? b_shape : a_shape; + SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); + } else if (b_shape.ndim() == 1) { + // Case 4: a is N-D array and b is 1-D array, sum product over the last axis + TShape tmp_shape(a_shape.ndim(), -1); + tmp_shape[a_shape.ndim() - 1] = b_shape[0]; + SHAPE_ASSIGN_CHECK(*in_attrs, 0, tmp_shape); + + tmp_shape = TShape(1, -1); + tmp_shape[0] = a_shape[a_shape.ndim() - 1]; + SHAPE_ASSIGN_CHECK(*in_attrs, 1, tmp_shape); + + mxnet::TShape out_shape(a_shape.ndim() - 1, -1); + for (int i = 0; i < a_shape.ndim() - 1; ++i) { + out_shape[i] = a_shape[i]; + } + SHAPE_ASSIGN_CHECK(*out_attrs, 0, out_shape); + } else { + // Case 5: a is N-D array and b is M-D array, sum product over the last axis + // of a and the 2nd-to-last axis of b + TShape tmp_shape(a_shape.ndim(), -1); + tmp_shape[a_shape.ndim() - 1] = b_shape[b_shape.ndim() - 2]; + SHAPE_ASSIGN_CHECK(*in_attrs, 0, tmp_shape); + + tmp_shape = TShape(b_shape.ndim(), -1); + tmp_shape[b_shape.ndim() - 2] = a_shape[a_shape.ndim() - 1]; + SHAPE_ASSIGN_CHECK(*in_attrs, 1, tmp_shape); + + tmp_shape = TShape(a_shape.ndim() + b_shape.ndim() - 2, -1); + for (int i = 0; i < a_shape.ndim() - 1; ++i) { + tmp_shape[i] = a_shape[i]; + } + for (int i = 0; i < b_shape.ndim() - 2; ++i) { + tmp_shape[i + a_shape.ndim() - 1] = b_shape[i]; + } + tmp_shape[tmp_shape.ndim() - 1] = b_shape[b_shape.ndim() - 1]; + SHAPE_ASSIGN_CHECK(*out_attrs, 0, tmp_shape); + } + return shape_is_known(*in_attrs) && shape_is_known(*out_attrs); +} + +NNVM_REGISTER_OP(_np_dot) +.describe(R"doc(Dot product of two arrays. Specifically, + +- If both a and b are 1-D arrays, it is inner product of vectors. + +- If both a and b are 2-D arrays, it is matrix multiplication. + +- If either a or b is 0-D (scalar), it is equivalent to multiply and using numpy.multiply(a, b) or a * b is preferred. + +- If a is an N-D array and b is a 1-D array, it is a sum product over the last axis of a and b. + +- If a is an N-D array and b is an M-D array (where M>=2), it is a sum product over the last axis of a and the second-to-last axis of b: + + Example :: + + dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m]) + +)doc" ADD_FILELINE) +.set_num_inputs(2) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a", "b"}; + }) +.set_attr("FInferShape", NumpyDotShape) +.set_attr("FInferType", ElemwiseType<2, 1>) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FCompute", NumpyDotForward) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_np_dot"}) +.add_argument("a", "NDArray-or-Symbol", "First input") +.add_argument("b", "NDArray-or-Symbol", "Second input"); + +NNVM_REGISTER_OP(_backward_np_dot) +.set_num_inputs(3) +.set_num_outputs(2) +.set_attr("TIsBackward", true) +.set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +.set_attr("FCompute", NumpyDotBackward); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_dot.cu b/src/operator/numpy/np_dot.cu new file mode 100644 index 000000000000..9a9c69aa98e5 --- /dev/null +++ b/src/operator/numpy/np_dot.cu @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_dot.cu + * \brief GPU Implementation of numpy-compatible dot + */ + +#include "./np_dot-inl.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_np_dot) +.set_attr("FCompute", NumpyDotForward); + +NNVM_REGISTER_OP(_backward_np_dot) +.set_attr("FCompute", NumpyDotBackward); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_elemwise_broadcast_op.cc b/src/operator/numpy/np_elemwise_broadcast_op.cc new file mode 100644 index 000000000000..fe5aeb0457aa --- /dev/null +++ b/src/operator/numpy/np_elemwise_broadcast_op.cc @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_elemwise_binary_op.cc + * \brief CPU Implementation of basic functions for elementwise numpy binary broadcast operator. + */ + +#include "../tensor/elemwise_binary_broadcast_op.h" +#include "../tensor/elemwise_binary_scalar_op.h" + +namespace mxnet { +namespace op { + +bool NumpyBinaryScalarType(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0)); + return in_attrs->at(0) != -1; +} + +#define MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(name) \ + NNVM_REGISTER_OP(name) \ + .set_num_inputs(1) \ + .set_num_outputs(1) \ + .set_attr_parser([](NodeAttrs* attrs) { \ + attrs->parsed = std::stod(attrs->dict["scalar"]); \ + }) \ + .set_attr("FInferShape", ElemwiseShape<1, 1>) \ + .set_attr("FInferType", NumpyBinaryScalarType) \ + .set_attr("FInplaceOption", \ + [](const NodeAttrs& attrs){ \ + return std::vector >{{0, 0}}; \ + }) \ + .add_argument("data", "NDArray-or-Symbol", "source input") \ + .add_argument("scalar", "float", "scalar input") + + +MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_add) +.describe(R"code(Add arguments element-wise with broadcasting if necessary. + +Example:: + + x = [[ 1., 1., 1.], + [ 1., 1., 1.]] + + y = [[ 0.], + [ 1.]] + + add(x, y) = [[ 1., 1., 1.], + [ 2., 2., 2.]] + +)code" ADD_FILELINE) +.set_attr("FCompute", BinaryBroadcastCompute) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_broadcast_add"}); + +MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_subtract) +.describe(R"code(Subtract arguments element-wise with broadcasting if necessary. + +Example:: + + x = [[ 1., 1., 1.], + [ 1., 1., 1.]] + + y = [[ 0.], + [ 1.]] + + subtract(x, y) = [[ 1., 1., 1.], + [ 0., 0., 0.]] + +)code" ADD_FILELINE) +.set_attr("FCompute", BinaryBroadcastCompute) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_broadcast_sub"}); + +MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_multiply) +.describe(R"code(Multiply arguments with broadcasting if necessary. + +Example:: + + x = [[ 1., 1., 1.], + [ 1., 1., 1.]] + + y = [[ 0.], + [ 1.]] + + multiply(x, y) = [[ 0., 0., 0.], + [ 1., 1., 1.]] + +)code" ADD_FILELINE) +.set_attr("FCompute", BinaryBroadcastCompute) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_broadcast_mul"}); + +MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_mod) +.describe(R"code(Return element-wise remainder of division. +It is equivalent to the Python modulus operator``x1 % x2`` and has the same sign as the divisor x2. + +Example:: + + x = [[ 8., 8., 8.], + [ 8., 8., 8.]] + + y = [[ 2.], + [ 3.]] + + mod(x, y) = [[ 0., 0., 0.], + [ 2., 2., 2.]] + +)code" ADD_FILELINE) +.set_attr("FCompute", BinaryBroadcastCompute) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_broadcast_mod"}); + +MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_power) +.describe(R"code(First array elements raised to powers from second array, element-wise. + +Raise each base in x1 to the positionally-corresponding power in x2. x1 and x2 must be +broadcastable to the same shape. + +Example:: + + x = [[ 1., 1., 1.], + [ 1., 1., 1.]] + + y = [[ 0.], + [ 1.]] + + power(x, y) = [[ 2., 2., 2.], + [ 4., 4., 4.]] + +)code" ADD_FILELINE) +.set_attr("FCompute", BinaryBroadcastCompute) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_broadcast_power"}); + +MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_maximum) +.describe(R"code()code" ADD_FILELINE) +.set_attr("FCompute", BinaryBroadcastCompute); + +MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_minimum) +.describe(R"code()code" ADD_FILELINE) +.set_attr("FCompute", BinaryBroadcastCompute); + +MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_add_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute) +.set_attr("FGradient", ElemwiseGradUseNone{"_copy"}); + +MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_subtract_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute) +.set_attr("FGradient", ElemwiseGradUseNone{"_copy"}); + +MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rsubtract_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute) +.set_attr("FGradient", ElemwiseGradUseNone{"negative"}); + +MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_multiply_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_mul_scalar"}); + +MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_mod_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_mod_scalar"}); + +MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rmod_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_rmod_scalar"}); + +MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_power_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_power_scalar"}); + +MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rpower_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute) +.set_attr("FGradient", ElemwiseGradUseOut{"_backward_rpower_scalar"}); + +MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_maximum_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_maximum_scalar"}); + +MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_minimum_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_minimum_scalar"}); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_elemwise_broadcast_op.cu b/src/operator/numpy/np_elemwise_broadcast_op.cu new file mode 100644 index 000000000000..c858b3a4987a --- /dev/null +++ b/src/operator/numpy/np_elemwise_broadcast_op.cu @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_elemwise_broadcast_op.cu + * \brief GPU Implementation of basic functions for elementwise binary broadcast operator. + */ +#include "../tensor/elemwise_binary_broadcast_op.h" +#include "../tensor/elemwise_binary_scalar_op.h" + +namespace mxnet { +namespace op { +NNVM_REGISTER_OP(_npi_add) +.set_attr("FCompute", BinaryBroadcastCompute); + +NNVM_REGISTER_OP(_npi_subtract) +.set_attr("FCompute", BinaryBroadcastCompute); + +NNVM_REGISTER_OP(_npi_multiply) +.set_attr("FCompute", BinaryBroadcastCompute); + +NNVM_REGISTER_OP(_npi_mod) +.set_attr("FCompute", BinaryBroadcastCompute); + +NNVM_REGISTER_OP(_npi_power) +.set_attr("FCompute", BinaryBroadcastCompute); + +NNVM_REGISTER_OP(_npi_maximum) +.set_attr("FCompute", BinaryBroadcastCompute); + +NNVM_REGISTER_OP(_npi_minimum) +.set_attr("FCompute", BinaryBroadcastCompute); + +NNVM_REGISTER_OP(_npi_add_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute); + +NNVM_REGISTER_OP(_npi_subtract_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute); + +NNVM_REGISTER_OP(_npi_rsubtract_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute); + +NNVM_REGISTER_OP(_npi_multiply_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute); + +NNVM_REGISTER_OP(_npi_mod_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute); + +NNVM_REGISTER_OP(_npi_rmod_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute); + +NNVM_REGISTER_OP(_npi_power_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute); + +NNVM_REGISTER_OP(_npi_rpower_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute); + +NNVM_REGISTER_OP(_npi_maximum_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute); + +NNVM_REGISTER_OP(_npi_minimum_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_elemwise_unary_op_basic.cc b/src/operator/numpy/np_elemwise_unary_op_basic.cc new file mode 100644 index 000000000000..7f30de090949 --- /dev/null +++ b/src/operator/numpy/np_elemwise_unary_op_basic.cc @@ -0,0 +1,370 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_elemwise_unary_op_basic.cc + * \brief CPU Implementation of numpy elementwise unary function. + */ +#include +#include "../tensor/elemwise_unary_op.h" + +namespace mxnet { +namespace op { + +MXNET_OPERATOR_REGISTER_UNARY(_npx_relu) +.describe(R"code(Computes rectified linear activation. + +.. math:: + max(features, 0) + +)code" ADD_FILELINE) +.set_attr("FCompute", UnaryOp::Compute) +.set_attr("FGradient", ElemwiseGradUseOut{"_backward_relu"}); + +MXNET_OPERATOR_REGISTER_UNARY(_npx_sigmoid) +.describe(R"code(Computes sigmoid of x element-wise. + +.. math:: + y = 1 / (1 + exp(-x)) + +)code" ADD_FILELINE) +.set_attr("FCompute", UnaryOp::Compute) +.set_attr("FGradient", ElemwiseGradUseOut{"_backward_sigmoid"}); + +NNVM_REGISTER_OP(_np_copy) +.describe(R"code(Return an array copy of the given object.)code" ADD_FILELINE) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FInferShape", ElemwiseShape<1, 1>) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs){ + return std::vector >{{0, 0}}; + }) +.set_attr("FCompute", UnaryOp::IdentityCompute) +.set_attr("FInplaceIdentity", + [](const NodeAttrs& attrs){ + return std::vector{true}; + }) +.set_attr("FGradient", ElemwiseGradUseNone{"_copy"}) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.add_argument("a", "NDArray-or-Symbol", "The input"); + +#define MXNET_OPERATOR_REGISTER_NUMPY_UNARY(__name$, __input_name$, __kernel$) \ +NNVM_REGISTER_OP(__name$) \ +.set_num_inputs(1) \ +.set_num_outputs(1) \ +.set_attr("FInferShape", ElemwiseShape<1, 1>) \ +.set_attr("FInferType", ElemwiseType<1, 1>) \ +.set_attr("FInplaceOption", \ + [](const NodeAttrs& attrs){ \ + return std::vector >{{0, 0}}; \ + }) \ +.set_attr("FListInputNames", \ + [](const NodeAttrs& attrs) { \ + return std::vector{__input_name$}; \ + }) \ +.set_attr("FCompute", UnaryOp::Compute) \ +.add_argument(__input_name$, "NDArray-or-Symbol", "The input array.") + +// negative +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_negative, "x", mshadow_op::negation) +.describe(R"code(Numerical negative, element-wise. +Example:: + negative([1., -1.]) = [-1., 1.] +)code") +.set_attr("FGradient", ElemwiseGradUseNone{"negative"}); + +// reciprocal +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_reciprocal, "x", mshadow_op::reciprocal) +.describe(R"code(Return the reciprocal of the argument, element-wise. +Example:: + reciprocal([-2, 1, 3, 1.6, 0.2]) = [-0.5, 1.0, 0.33333334, 0.625, 5.0] +)code") +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_reciprocal"}); + +// abs +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_absolute, "x", mshadow_op::abs) +.add_alias("_npi_abs") +.describe(R"code(Returns element-wise absolute value of the input. +Example:: + absolute([-2, 0, 3]) = [2, 0, 3] +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_abs"}); + +// sign +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_sign, "x", mshadow_op::sign) +.describe(R"code(Returns an element-wise indication of the sign of a number. +The sign function returns -1 if x < 0, 0 if x==0, 1 if x > 0. +Example:: + sign([-2, 0, 3]) = [-1, 0, 1] +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_sign"}); + +// rint +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_rint, "x", mshadow_op::rint) +.describe(R"code(Round elements of the array to the nearest integer. +Example:: + rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) = [-2., -2., -0., 0., 2., 2., 2.] +)code" ADD_FILELINE) +.set_attr("FGradient", MakeZeroGradNodes); + +// ceil +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_ceil, "x", mshadow_op::ceil) +.describe(R"code(Return the ceiling of the input, element-wise. +The ceil of the scalar x is the smallest integer i, such that i >= x. +Example:: + ceil([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) = [-1., -1., -0., 1., 2., 2., 2.] +)code" ADD_FILELINE) +.set_attr("FGradient", MakeZeroGradNodes); + +// floor +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_floor, "x", mshadow_op::floor) +.describe(R"code(Return the floor of the input, element-wise. +The floor of the scalar x is the largest integer i, such that i <= x. +Example:: + floor([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) = [-2., -2., -1., 0., 1., 1., 2.] +)code" ADD_FILELINE) +.set_attr("FGradient", MakeZeroGradNodes); + +// trunc +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_trunc, "x", mshadow_op::trunc) +.describe(R"code(Return the truncated value of the input, element-wise. +The truncated value of the scalar x is the nearest integer i which is closer to +zero than x is. In short, the fractional part of the signed number x is discarded. +Example:: + trunc([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) = [-1., -1., -0., 0., 1., 1., 2.] +)code" ADD_FILELINE) +.set_attr("FGradient", MakeZeroGradNodes); + +// fix +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_fix, "x", mshadow_op::fix) +.describe(R"code(Round to nearest integer towards zero. +Round an array of floats element-wise to nearest integer towards zero. +The rounded values are returned as floats. +Example:: + fix([-2.1, -1.9, 1.9, 2.1]) = [-2., -1., 1., 2.] +)code" ADD_FILELINE) +.set_attr("FGradient", MakeZeroGradNodes); + +// square +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_square, "x", mshadow_op::square) +.describe(R"code(Return the element-wise square of the input. +Example:: + square([2, 3, 4]) = [4, 9, 16] +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_square"}); + +// sqrt +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_sqrt, "x", mshadow_op::square_root) +.describe(R"code(Return the non-negative square-root of an array, element-wise. +Example:: + sqrt([4, 9, 16]) = [2, 3, 4] +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseOut{"_backward_sqrt"}); + +// cbrt +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_cbrt, "x", mshadow_op::cube_root) +.describe(R"code(Return the cube-root of an array, element-wise. +Example:: + cbrt([1, 8, -125]) = [1, 2, -5] +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseOut{"_backward_cbrt"}); + +// exp +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_exp, "x", mshadow_op::exp) +.describe(R"code(Calculate the exponential of all elements in the input array. +Example:: + exp([0, 1, 2]) = [1., 2.71828175, 7.38905621] +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseOut{"_mul"}); + +// log +NNVM_REGISTER_OP(_npi_log) +.describe(R"code(Returns element-wise Natural logarithmic value of the input. +The natural logarithm is logarithm in base *e*, so that ``log(exp(x)) = x`` +)code" ADD_FILELINE) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FInferShape", ElemwiseShape<1, 1>) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs){ + return std::vector >{{0, 0}}; + }) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"x"}; + }) +.set_attr("FCompute", UnaryOp::Compute) +.add_argument("x", "NDArray-or-Symbol", "The input array.") +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_log"}); + +// log10 +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_log10, "x", mshadow_op::log10) +.describe(R"code(Returns element-wise Base-10 logarithmic value of the input. +``10**log10(x) = x`` +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_log10"}); + +// log2 +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_log2, "x", mshadow_op::log2) +.describe(R"code(Returns element-wise Base-2 logarithmic value of the input. +``2**log2(x) = x`` +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_log2"}); + +// log1p +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_log1p, "x", mshadow_op::log1p) +.describe(R"code(Return the natural logarithm of one plus the input array, element-wise. +Calculates ``log(1 + x)``. +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_log1p"}); + +// expm1 +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_expm1, "x", mshadow_op::expm1) +.describe(R"code(Calculate ``exp(x) - 1`` for all elements in the array.)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_expm1"}); + + +// logical_not +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_logical_not, "x", mshadow_op::nt) +.describe(R"code(Compute the truth value of NOT x element-wise. +Example:: + logical_not([-2., 0., 1.]) = [0., 1., 0.] +)code") +.set_attr("FGradient", MakeZeroGradNodes); + +// sin +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_sin, "x", mshadow_op::sin) +.describe(R"code(Trigonometric sine, element-wise. +.. math:: + sin([0, \pi/4, \pi/2]) = [0, 0.707, 1] +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{ "_backward_sin" }); + +// cos +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_cos, "x", mshadow_op::cos) +.describe(R"code(Computes the element-wise cosine of the input array. +.. math:: + cos([0, \pi/4, \pi/2]) = [1, 0.707, 0] +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_cos"}); + +// tan +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_tan, "x", mshadow_op::tan) +.describe(R"code(Computes the element-wise tangent of the input array. +.. math:: + tan([0, \pi/4, \pi/2]) = [0, 1, -inf] +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseOut{ "_backward_tan" }); + +// arcsin +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_arcsin, "x", mshadow_op::arcsin) +.describe(R"code(Returns element-wise inverse sine of the input array. +.. math:: + arcsin([-1, -.707, 0, .707, 1]) = [-\pi/2, -\pi/4, 0, \pi/4, \pi/2] +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{ "_backward_arcsin" }); + +// arccos +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_arccos, "x", mshadow_op::arccos) +.describe(R"code(Returns element-wise inverse cosine of the input array. +The input should be in range `[-1, 1]`. +The output is in the closed interval :math:`[0, \pi]` +.. math:: + arccos([-1, -.707, 0, .707, 1]) = [\pi, 3\pi/4, \pi/2, \pi/4, 0] +The storage type of ``arccos`` output is always dense +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{ "_backward_arccos" }); + +// arctan +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_arctan, "x", mshadow_op::arctan) +.describe(R"code(Returns element-wise inverse tangent of the input array. +.. math:: + arctan([-1, 0, 1]) = [-\pi/4, 0, \pi/4] +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{ "_backward_arctan" }); + +// degrees +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_degrees, "x", mshadow_op::degrees) +.describe(R"code(Converts each element of the input array from radians to degrees. +.. math:: + degrees([0, \pi/2, \pi, 3\pi/2, 2\pi]) = [0, 90, 180, 270, 360] +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{ "_backward_degrees" }); + +// radians +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_radians, "x", mshadow_op::radians) +.describe(R"code(Converts each element of the input array from degrees to radians. +.. math:: + radians([0, 90, 180, 270, 360]) = [0, \pi/2, \pi, 3\pi/2, 2\pi] +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{ "_backward_radians" }); + +// sinh +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_sinh, "x", mshadow_op::sinh) +.describe(R"code(Returns the hyperbolic sine of the input array, computed element-wise. +.. math:: + sinh(x) = 0.5\times(exp(x) - exp(-x)) +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{ "_backward_sinh" }); + +// cosh +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_npi_cosh, "x", mshadow_op::cosh) +.describe(R"code(Returns the hyperbolic cosine of the input array, computed element-wise. +.. math:: + cosh(x) = 0.5\times(exp(x) + exp(-x)) +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{ "_backward_cosh" }); + +// tanh +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_tanh, "x", mshadow_op::tanh) +.describe(R"code(Returns the hyperbolic tangent of the input array, computed element-wise. +.. math:: + tanh(x) = sinh(x) / cosh(x) +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseOut{ "_backward_tanh" }); + +// arcsinh +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_arcsinh, "x", mshadow_op::arcsinh) +.describe(R"code(Returns the element-wise inverse hyperbolic sine of the input array, \ +computed element-wise. +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{ "_backward_arcsinh" }); + +// arccosh +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_arccosh, "x", mshadow_op::arccosh) +.describe(R"code(Returns the element-wise inverse hyperbolic cosine of the input array, \ +computed element-wise. +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{ "_backward_arccosh" }); + +// arctanh +MXNET_OPERATOR_REGISTER_NUMPY_UNARY(_np_arctanh, "x", mshadow_op::arctanh) +.describe(R"code(Returns the element-wise inverse hyperbolic tangent of the input array, \ +computed element-wise. +)code" ADD_FILELINE) +.set_attr("FGradient", ElemwiseGradUseIn{ "_backward_arctanh" }); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_elemwise_unary_op_basic.cu b/src/operator/numpy/np_elemwise_unary_op_basic.cu new file mode 100644 index 000000000000..8fb169226af2 --- /dev/null +++ b/src/operator/numpy/np_elemwise_unary_op_basic.cu @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_elemwise_unary_op_basic.cu + * \brief GPU Implementation of numpy unary functions. + */ +#include "../tensor/elemwise_binary_op.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_npx_relu) +.set_attr("FCompute", UnaryOp::Compute); + +NNVM_REGISTER_OP(_npx_sigmoid) +.set_attr("FCompute", UnaryOp::Compute); + +NNVM_REGISTER_OP(_np_copy) +.set_attr("FCompute", UnaryOp::IdentityCompute); + +#define MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(__name$, __kernel$) \ +NNVM_REGISTER_OP(__name$) \ +.set_attr("FCompute", UnaryOp::Compute) \ + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_negative, mshadow_op::negation); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_reciprocal, mshadow_op::reciprocal); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_absolute, mshadow_op::abs); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_sign, mshadow_op::sign); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_rint, mshadow_op::rint); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_ceil, mshadow_op::ceil); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_floor, mshadow_op::floor); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_trunc, mshadow_op::trunc); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_fix, mshadow_op::fix); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_square, mshadow_op::square); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_sqrt, mshadow_op::square_root); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_cbrt, mshadow_op::cube_root); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_exp, mshadow_op::exp); + +NNVM_REGISTER_OP(_npi_log) +.set_attr("FCompute", UnaryOp::Compute); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_log10, mshadow_op::log10); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_log2, mshadow_op::log2); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_log1p, mshadow_op::log1p); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_expm1, mshadow_op::expm1); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_logical_not, mshadow_op::nt); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_sin, mshadow_op::sin); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_cos, mshadow_op::cos); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_tan, mshadow_op::tan); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_arcsin, mshadow_op::arcsin); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_arccos, mshadow_op::arccos); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_arctan, mshadow_op::arctan); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_degrees, mshadow_op::degrees); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_radians, mshadow_op::radians); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_sinh, mshadow_op::sinh); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_npi_cosh, mshadow_op::cosh); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_tanh, mshadow_op::tanh); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_arcsinh, mshadow_op::arcsinh); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_arccosh, mshadow_op::arccosh); + +MXNET_OPERATOR_REGISTER_NUMPY_UNARY_GPU(_np_arctanh, mshadow_op::arctanh); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_init_op-inl.h b/src/operator/numpy/np_init_op-inl.h new file mode 100644 index 000000000000..94c663e76e5e --- /dev/null +++ b/src/operator/numpy/np_init_op-inl.h @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_init_op.cc + * \brief Function definition of numpy init op + */ + +#ifndef MXNET_OPERATOR_NUMPY_NP_INIT_OP_INL_H_ +#define MXNET_OPERATOR_NUMPY_NP_INIT_OP_INL_H_ + +#include +#include +#include "../mxnet_op.h" +#include "../operator_common.h" + +namespace mxnet { +namespace op { + +template +struct identity { + template + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const int n) { + using namespace mxnet_op; + + auto j = unravel(i, mshadow::Shape2(n, n)); + if (j[0] == j[1]) { + KERNEL_ASSIGN(out_data[i], req, static_cast(1)); + } else { + KERNEL_ASSIGN(out_data[i], req, static_cast(0)); + } + } +}; + +template +void IdentityCompute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mxnet_op; + using namespace mshadow; + CHECK_EQ(inputs.size(), 0U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + Stream *s = ctx.get_stream(); + const TBlob& out_data = outputs[0]; + int n = out_data.shape_[0]; + MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + Kernel, xpu>::Launch( + s, out_data.Size(), out_data.dptr(), n); + }); + }); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_NP_INIT_OP_INL_H_ diff --git a/src/operator/numpy/np_init_op.cc b/src/operator/numpy/np_init_op.cc new file mode 100644 index 000000000000..c347f48a2612 --- /dev/null +++ b/src/operator/numpy/np_init_op.cc @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_init_op.cc + * \brief CPU Implementation of numpy init op + */ + +#include "../tensor/init_op.h" +#include "../tensor/elemwise_unary_op.h" +#include "./np_init_op-inl.h" + +namespace mxnet { +namespace op { + +inline bool NumpyRangeShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector* in_shapes, + mxnet::ShapeVector* out_shapes) { + const RangeParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(in_shapes->size(), 0U); + CHECK_EQ(out_shapes->size(), 1U); + CHECK_NE(param.step, 0) << "_npi_arange does not support step=0"; + CHECK_EQ(param.repeat, 1) << "_npi_arange only supports repeat=1, received " << param.repeat; + CHECK(param.stop.has_value()) << "_npi_arange requires stop to have a value"; + double out_size = std::ceil((param.stop.value() - param.start) / param.step); + if (out_size < 0) { + out_size = 0; + } + SHAPE_ASSIGN_CHECK(*out_shapes, 0, mxnet::TShape({static_cast(out_size)})); + return true; +} + +NNVM_REGISTER_OP(_npi_zeros) +.describe("Return a new array of given shape, type, and context, filled with zeros.") +.set_num_inputs(0) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", InitShape) +.set_attr("FInferType", InitType) +.set_attr("FInferStorageType", InitStorageType) +.set_attr("FCompute", FillCompute) +.add_arguments(InitOpParam::__FIELDS__()); + +NNVM_REGISTER_OP(_npi_ones) +.describe("Return a new array of given shape, type, and context, filled with ones.") +.set_num_inputs(0) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", InitShape) +.set_attr("FInferType", InitType) +.set_attr("FCompute", FillCompute) +.add_arguments(InitOpParam::__FIELDS__()); + +NNVM_REGISTER_OP(_npi_identity) +.describe("Return a new identity array of given shape, type, and context.") +.set_num_inputs(0) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", InitShape) +.set_attr("FInferType", InitType) +.set_attr("FCompute", IdentityCompute) +.add_arguments(InitOpParam::__FIELDS__()); + +NNVM_REGISTER_OP(_np_zeros_like) +.describe(R"code(Return an array of zeros with the same shape and type as a given array. + +Examples:: + + x = [[ 1., 1., 1.], + [ 1., 1., 1.]] + + zeros_like(x) = [[ 0., 0., 0.], + [ 0., 0., 0.]] + +)code") +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FInferShape", ElemwiseShape<1, 1>) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FIgnoreInputs", + [](const NodeAttrs& attrs) { + return std::vector(1, 0); + }) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.set_attr("FCompute", FillCompute) +.set_attr("FGradient", MakeZeroGradNodes) +.add_argument("a", "NDArray-or-Symbol", + "The shape and data-type of a define these same attributes of the returned array."); + +NNVM_REGISTER_OP(_np_ones_like) +.describe(R"code(Return an array of ones with the same shape and type as a given array. + +Examples:: + + x = [[ 0., 0., 0.], + [ 0., 0., 0.]] + + ones_like(x) = [[ 1., 1., 1.], + [ 1., 1., 1.]] + +)code") +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FInferShape", ElemwiseShape<1, 1>) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FIgnoreInputs", + [](const NodeAttrs& attrs) { + return std::vector(1, 0); + }) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.set_attr("FCompute", FillCompute) +.set_attr("FGradient", MakeZeroGradNodes) +.add_argument("a", "NDArray-or-Symbol", + "The shape and data-type of a define these same attributes of the returned array."); + +NNVM_REGISTER_OP(_npi_arange) +.describe("Return evenly spaced values within a given interval.") +.set_num_inputs(0) +.set_num_outputs(1) +.set_attr_parser(RangeParamParser) +.set_attr("FInferShape", NumpyRangeShape) +.set_attr("FInferType", InitType) +.set_attr("FCompute", RangeCompute) +.add_arguments(RangeParam::__FIELDS__()); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_init_op.cu b/src/operator/numpy/np_init_op.cu new file mode 100644 index 000000000000..e1f11fd9ac37 --- /dev/null +++ b/src/operator/numpy/np_init_op.cu @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_init_op.cu + * \brief GPU Implementation of numpy init op + */ + +#include "../tensor/init_op.h" +#include "./np_init_op-inl.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_npi_zeros) +.set_attr("FCompute", FillCompute); + +NNVM_REGISTER_OP(_npi_ones) +.set_attr("FCompute", FillCompute); + +NNVM_REGISTER_OP(_npi_identity) +.set_attr("FCompute", IdentityCompute); + +NNVM_REGISTER_OP(_np_zeros_like) +.set_attr("FCompute", FillCompute); + +NNVM_REGISTER_OP(_np_ones_like) +.set_attr("FCompute", FillCompute); + +NNVM_REGISTER_OP(_npi_arange) +.set_attr("FCompute", RangeCompute); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_matrix_op-inl.h b/src/operator/numpy/np_matrix_op-inl.h new file mode 100644 index 000000000000..44a6c909c9cf --- /dev/null +++ b/src/operator/numpy/np_matrix_op-inl.h @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_matrix_op-inl.h + * \brief Function definition of matrix related operators + */ +#ifndef MXNET_OPERATOR_NUMPY_NP_MATRIX_OP_INL_H_ +#define MXNET_OPERATOR_NUMPY_NP_MATRIX_OP_INL_H_ + +#include +#include "../tensor/matrix_op-inl.h" + +namespace mxnet { +namespace op { + +struct NumpyTransposeParam : public dmlc::Parameter { + mxnet::TShape axes; + DMLC_DECLARE_PARAMETER(NumpyTransposeParam) { + DMLC_DECLARE_FIELD(axes).set_default(mxnet::TShape(-1, 0)) + .describe("By default, reverse the dimensions, otherwise permute " + "the axes according to the values given."); + } +}; + +template +void NumpyTranspose(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + const NumpyTransposeParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(req[0], kWriteTo) << "Transpose does not support inplace"; + if (ndim_is_known(param.axes)) { + TransposeImpl(ctx.run_ctx, inputs[0], outputs[0], param.axes); + } else { + mxnet::TShape axes(inputs[0].ndim(), -1); + for (int i = 0; i < axes.ndim(); ++i) { + axes[i] = axes.ndim() - 1 - i; + } + TransposeImpl(ctx.run_ctx, inputs[0], outputs[0], axes); + } +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_NP_MATRIX_OP_INL_H_ diff --git a/src/operator/numpy/np_matrix_op.cc b/src/operator/numpy/np_matrix_op.cc new file mode 100644 index 000000000000..1323447425d4 --- /dev/null +++ b/src/operator/numpy/np_matrix_op.cc @@ -0,0 +1,377 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_matrix_op.cc + * \brief CPU Implementation of numpy matrix operations + */ + +#include "./np_matrix_op-inl.h" +#include "../nn/concat-inl.h" + +namespace mxnet { +namespace op { + +DMLC_REGISTER_PARAMETER(NumpyTransposeParam); + +bool NumpyTransposeShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_attrs, + mxnet::ShapeVector *out_attrs) { + const NumpyTransposeParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + mxnet::TShape& shp = (*in_attrs)[0]; + CHECK_LE(shp.ndim(), 6) << "Transpose support at most 6 dimensions"; + mxnet::TShape ret(shp.ndim(), -1); + if (ndim_is_known(param.axes)) { + CHECK_EQ(shp.ndim(), param.axes.ndim()); + for (int i = 0; i < shp.ndim(); ++i) { + CHECK(param.axes[i] < static_cast(shp.ndim())); + ret[i] = shp[param.axes[i]]; + } + } else { + for (int i = 0; i < shp.ndim(); ++i) { + ret[i] = shp[shp.ndim()-1-i]; + } + } + SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret); + return shape_is_known(ret); +} + +NNVM_REGISTER_OP(_np_transpose) +.describe(R"code(Permute the dimensions of an array. + +Examples:: + + x = [[ 1, 2], + [ 3, 4]] + + transpose(x) = [[ 1., 3.], + [ 2., 4.]] + + x = [[[ 1., 2.], + [ 3., 4.]], + + [[ 5., 6.], + [ 7., 8.]]] + + transpose(x) = [[[ 1., 5.], + [ 3., 7.]], + + [[ 2., 6.], + [ 4., 8.]]] + + transpose(x, axes=(1,0,2)) = [[[ 1., 2.], + [ 5., 6.]], + + [[ 3., 4.], + [ 7., 8.]]] +)code" ADD_FILELINE) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", NumpyTransposeShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FGradient", + [](const nnvm::NodePtr& n, const std::vector& ograds) { + const NumpyTransposeParam& param = nnvm::get(n->attrs.parsed); + if (ndim_is_known(param.axes)) { + mxnet::TShape axes = mxnet::TShape(param.axes.ndim(), -1); + for (int i = 0; i < axes.ndim(); ++i) { + axes[param.axes[i]] = i; + } + std::ostringstream os; + os << axes; + return MakeNonlossGradNode("transpose", n, ograds, {}, {{"axes", os.str()}}); + } else { + return MakeNonlossGradNode("transpose", n, ograds, {}, + std::unordered_map()); + } + }) +.set_attr("FCompute", NumpyTranspose) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.add_argument("a", "NDArray-or-Symbol", "Source input") +.add_arguments(NumpyTransposeParam::__FIELDS__()); + +struct NumpyReshapeParam : public dmlc::Parameter { + mxnet::TShape newshape; + std::string order; + DMLC_DECLARE_PARAMETER(NumpyReshapeParam) { + DMLC_DECLARE_FIELD(newshape) + .describe("The new shape should be compatible with the original shape." + " If an integer, then the result will be a 1-D array of that length." + " One shape dimension can be -1. In this case, the value is inferred" + " from the length of the array and remaining dimensions."); + DMLC_DECLARE_FIELD(order) + .set_default("C") + .describe("Read the elements of a using this index order, and place the elements into" + " the reshaped array using this index order. 'C' means to read/write the elements" + " using C-like index order, with the last axis index changing fastest, back to the" + " first axis index changing slowest. Note that currently only C-like order is" + " supported"); + } +}; + +DMLC_REGISTER_PARAMETER(NumpyReshapeParam); + +bool NumpyReshapeInferShape(const mxnet::TShape& src, mxnet::TShape* dst) { + if (shape_is_known(src) && shape_is_known(*dst)) { + CHECK_EQ(src.Size(), dst->Size()) << "Cannot reshape array of size " + << src.Size() << " into shape " << *dst; + return true; + } else if (!shape_is_known(src) || !ndim_is_known(*dst)) { + return false; + } else { + int unknown_axis = -1; + dim_t known_dim_size_prod = 1; + for (int i = 0; i < dst->ndim(); ++i) { + if (!dim_size_is_known(*dst, i)) { + if (unknown_axis == -1) { + unknown_axis = i; + } else { + return false; // more than one unknown dim + } + } else { + known_dim_size_prod *= (*dst)[i]; + } + } + CHECK_NE(known_dim_size_prod, 0) << "Cannot reshape array of size " + << src.Size() << " into shape " << *dst; + CHECK_EQ(src.Size() % known_dim_size_prod, 0) << "Cannot reshape array of size " + << src.Size() << " into shape " << *dst; + (*dst)[unknown_axis] = src.Size() / known_dim_size_prod; + return true; + } +} + +bool NumpyReshapeShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector* in_attrs, + mxnet::ShapeVector* out_attrs) { + CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]"; + CHECK_EQ(out_attrs->size(), 1U); + const NumpyReshapeParam& param = nnvm::get(attrs.parsed); + // sanity check + bool has_unknown_dim_size = false; + for (int i = 0; i < param.newshape.ndim(); ++i) { + if (param.newshape[i] < 0) { + CHECK_EQ(param.newshape[i], -1) << "The shape dimension size to inferred must be -1"; + CHECK(!has_unknown_dim_size) << "Can only specify one unknown dimension"; + has_unknown_dim_size = true; + } + } + + mxnet::TShape target_shape = param.newshape; + bool success = NumpyReshapeInferShape(in_attrs->at(0), &target_shape); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, target_shape); + if (!success) { + success = NumpyReshapeInferShape(out_attrs->at(0), &in_attrs->at(0)); + } + return success; +} + +NNVM_REGISTER_OP(_np_reshape) +.describe(R"code()code" ADD_FILELINE) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", NumpyReshapeShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_reshape"}) +.set_attr("FCompute", UnaryOp::IdentityCompute) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}}; + }) +.set_attr("FInplaceIdentity", + [](const NodeAttrs& attrs){ + return std::vector{true}; + }) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.add_argument("a", "NDArray-or-Symbol", "Array to be reshaped.") +.add_arguments(NumpyReshapeParam::__FIELDS__()); + +NNVM_REGISTER_OP(_npi_stack) +.describe(R"code(Join a sequence of arrays along a new axis. + +The axis parameter specifies the index of the new axis in the dimensions of the +result. For example, if axis=0 it will be the first dimension and if axis=-1 it +will be the last dimension. + +Examples:: + + x = [1, 2] + y = [3, 4] + + stack(x, y) = [[1, 2], + [3, 4]] + stack(x, y, axis=1) = [[1, 3], + [2, 4]] +)code") +.set_num_inputs([](const nnvm::NodeAttrs& attrs) { + const StackParam& param = dmlc::get(attrs.parsed); + return static_cast(param.num_args); + }) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + uint32_t num_args = dmlc::get(attrs.parsed).num_args; + std::vector ret; + for (uint32_t i = 0; i < num_args; ++i) { + ret.push_back(std::string("arg") + std::to_string(i)); + } + return ret; + }) +.set_attr("key_var_num_args", "num_args") +.set_attr("FInferShape", StackOpShape) +.set_attr("FInferType", ElemwiseType<-1, 1>) +.set_attr("FCompute", StackOpForward) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_stack"}) +.add_argument("data", "NDArray-or-Symbol[]", "List of arrays to stack") +.add_arguments(StackParam::__FIELDS__()); + +bool ConcatShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_shape, + mxnet::ShapeVector *out_shape); + +bool ConcatType(const nnvm::NodeAttrs& attrs, + std::vector *in_type, + std::vector *out_type); + +struct NumpyConcatGrad { + const char *op_name; + std::vector operator()(const nnvm::NodePtr& n, + const std::vector& ograds) const { + CHECK_EQ(ograds.size(), 1); + std::vector heads(ograds.begin(), ograds.end()); + return MakeGradNode(op_name, n, heads, n->attrs.dict); + } +}; + + +NNVM_REGISTER_OP(_npi_concatenate) +.describe(R"code(Join a sequence of arrays along an existing axis.)code" ADD_FILELINE) +.set_num_inputs([](const NodeAttrs& attrs) { + const ConcatParam& params = nnvm::get(attrs.parsed); + return params.num_args; +}) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + const ConcatParam& params = nnvm::get(attrs.parsed); + std::vector ret; + for (int i = 0; i < params.num_args; ++i) { + ret.push_back(std::string("data") + std::to_string(i)); + } + return ret; +}) +.set_attr("FListOutputNames", + [](const NodeAttrs& attrs) { + return std::vector{"out"}; +}) +.set_attr("key_var_num_args", "num_args") +.set_attr("FInferType", ConcatType) +.set_attr("FInferShape", ConcatShape) +.set_attr("FCompute", ConcatCompute) +.set_attr("FGradient", NumpyConcatGrad{"_backward_np_concat"}) +.add_argument("data", "NDArray-or-Symbol[]", "List of arrays to concatenate") +.add_arguments(ConcatParam::__FIELDS__()); + +NNVM_REGISTER_OP(_backward_np_concat) +.set_num_outputs([](const NodeAttrs& attrs) { + const ConcatParam& params = nnvm::get(attrs.parsed); + return params.num_args; +}) +.set_attr_parser(ParamParser) +.set_attr("TIsBackward", true) +.set_attr("FCompute", ConcatGradCompute); + +bool NumpySqueezeShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector *in_attrs, + mxnet::ShapeVector *out_attrs) { + const SqueezeParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(in_attrs->size(), 1U) << "Input: [a]"; + CHECK_EQ(out_attrs->size(), 1U); + const mxnet::TShape& dshape = in_attrs->at(0); + const int dndim = dshape.ndim(); + if (!shape_is_known(dshape)) return false; + mxnet::TShape oshape = dshape; + // special case, scalar tensor + if (dshape.ndim() == 0) { + if (param.axis.has_value()) { + mxnet::Tuple axes = param.axis.value(); + CHECK_EQ(axes.ndim(), 1) << "cannot specify more than one axis for a scalar tensor"; + CHECK(axes[0] == 0 || axes[0] == -1) << "axis " << axes[0] + << " is out of bounds of array of dimension 0"; + } + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(0, -1)); + return true; + } + if (param.axis.has_value()) { + // preprocess axis + mxnet::Tuple axes = param.axis.value(); + for (int i = 0; i < axes.ndim(); ++i) { + if (axes[i] < 0) { + axes[i] += dndim; + CHECK_GE(axes[i], 0) + << "axis " << axes[i] - dndim << " is out of bounds for array of dimension " << dndim; + } + CHECK_LT(axes[i], dndim) + << "axis " << axes[i] << " is out of bounds for array of dimension " << dndim; + CHECK_EQ(dshape[axes[i]], 1) + << "cannot select an axis to squeeze out which has size=" + << dshape[axes[i]] << " not equal to one"; + CHECK_NE(oshape[axes[i]], 0) << "duplicate value in axis"; + oshape[axes[i]] = -1; + } + } else { + for (int i = 0; i < oshape.ndim(); ++i) { + if (oshape[i] == 1) oshape[i] = -1; + } + } + size_t oshape_size = SqueezeShapeHelper(&oshape); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, mxnet::TShape(oshape.data(), oshape.data()+oshape_size)); + return true; +} + +NNVM_REGISTER_OP(_np_squeeze) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"a"}; + }) +.set_attr("FInferShape", NumpySqueezeShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FCompute", UnaryOp::IdentityCompute) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_squeeze"}) +.add_argument("a", "NDArray-or-Symbol[]", "data to squeeze") +.add_arguments(SqueezeParam::__FIELDS__()); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_matrix_op.cu b/src/operator/numpy/np_matrix_op.cu new file mode 100644 index 000000000000..535482048906 --- /dev/null +++ b/src/operator/numpy/np_matrix_op.cu @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_matrix_op.cu + * \brief GPU Implementation of numpy matrix operations + */ +#include "./np_matrix_op-inl.h" +#include "../nn/concat-inl.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_np_transpose) +.set_attr("FCompute", NumpyTranspose); + +NNVM_REGISTER_OP(_np_reshape) +.set_attr("FCompute", UnaryOp::IdentityCompute); + +NNVM_REGISTER_OP(_npi_stack) +.set_attr("FCompute", StackOpForward); + +NNVM_REGISTER_OP(_npi_concatenate) +.set_attr("FCompute", ConcatCompute); + +NNVM_REGISTER_OP(_backward_np_concat) +.set_attr("FCompute", ConcatGradCompute); + +NNVM_REGISTER_OP(_np_squeeze) +.set_attr("FCompute", UnaryOp::IdentityCompute); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_trace_op-inl.h b/src/operator/numpy/np_trace_op-inl.h new file mode 100644 index 000000000000..741c20b61d80 --- /dev/null +++ b/src/operator/numpy/np_trace_op-inl.h @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_trace_op-inl.h + * \brief Function definition of matrix numpy-compatible trace operator + */ + +#ifndef MXNET_OPERATOR_NUMPY_NP_TRACE_OP_INL_H_ +#define MXNET_OPERATOR_NUMPY_NP_TRACE_OP_INL_H_ + +#include +#include +#include +#include +#include +#include "../mxnet_op.h" +#include "../operator_common.h" +#include "../elemwise_op_common.h" +#include "../tensor/broadcast_reduce_op.h" + +namespace mxnet { +namespace op { + +struct NumpyTraceParam: public dmlc::Parameter { + int offset, axis1, axis2; + DMLC_DECLARE_PARAMETER(NumpyTraceParam) { + DMLC_DECLARE_FIELD(offset) + .set_default(0) + .describe("Offset of the diagonal from the main diagonal. " + "Can be both positive and negative. Defaults to 0."); + DMLC_DECLARE_FIELD(axis1) + .set_default(0) + .describe("Axes to be used as the first axis of the 2-D sub-arrays " + "from which the diagonals should be taken. Defaults to 0."); + DMLC_DECLARE_FIELD(axis2) + .set_default(1) + .describe("Axes to be used as the second axis of the 2-D sub-arrays " + "from which the diagonals should be taken. Defaults to 1."); + } +}; + +template +struct numpy_trace { + template + MSHADOW_XINLINE static void Map(index_t i, DType* out, const DType* a, + mshadow::Shape oshape, + mshadow::Shape ishape, + index_t stride, index_t offset, int dlength) { + using namespace mxnet_op; + using namespace mshadow; + index_t j = ravel(unravel(i, oshape), ishape) + offset; + if (back) { + for (index_t k = 0; k < dlength; ++k) { + KERNEL_ASSIGN(out[j], req, a[i]); + j += stride; + } + } else { + if (req == kWriteTo) { + out[i] = 0; + for (index_t k = 0; k < dlength; ++k) { + out[i] += a[j]; + j += stride; + } + } else if (req == kAddTo) { + for (index_t k = 0; k < dlength; ++k) { + out[i] += a[j]; + j += stride; + } + } + } + } +}; + +template +void NumpyTraceOpProcess(const TBlob& in_data, + const TBlob& out_data, + const mxnet::TShape& ishape, + const mxnet::TShape& oshape, + index_t dsize, + const NumpyTraceParam& param, + mxnet_op::Stream *s, + const std::vector& req) { + using namespace mxnet_op; + using namespace mshadow; + if (dsize == 0) { + if (back) { + if (out_data.Size() != 0) { + MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + if (req_type == kWriteTo) { + out_data.FlatTo1D(s) = 0; + } + }); + }); + } + } + return; + } else if (ishape.Size() == 0) { + if (!back) { + MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + if (req_type == kWriteTo) { + out_data.FlatTo1D(s) = 0; + } + }); + }); + } + return; + } + uint32_t x1 = CheckAxis(param.axis1, ishape.ndim()); + uint32_t x2 = CheckAxis(param.axis2, ishape.ndim()); + + uint32_t idim = ishape.ndim(); + + uint32_t minx = x1, maxx = x2; + if (minx > maxx) { + std::swap(minx, maxx); + } + + // merges contiguous axes that are not separated + // by axis1 or axis2 since they can be directly + // mapped to the output and there is no need + // to distinguish them + // (After this the input will have no more than + // three axes, hence improving the rave and + // unravel efficiency) + + index_t oleading = 1, + obody = 1, + otrailing = 1; + + for (uint32_t i = 0; i < minx; ++i) { + oleading *= ishape[i]; + } + for (uint32_t i = minx + 1; i < maxx; ++i) { + obody *= ishape[i]; + } + for (uint32_t i = maxx + 1; i < idim; ++i) { + otrailing *= ishape[i]; + } + + index_t ileading = oleading, + ibody = obody * ishape[minx], + itrailing = otrailing * ishape[maxx]; + + index_t stride1 = itrailing * obody, + stride2 = otrailing; + // stride1 + stride2 is the stride for + // iterating over the diagonal in question + + if (x1 == maxx) { + std::swap(stride1, stride2); + } + + // the extra index offset introduced by offset + index_t offset; + if (param.offset > 0) { + offset = stride2 * param.offset; + } else if (param.offset < 0) { + offset = stride1 * -param.offset; + } else { + offset = 0; + } + + // number of elements in the offset diagonal + // may be negative + int dlength; + if (param.offset > 0) { + dlength = std::min(ishape[x1], ishape[x2] - param.offset); + } else if (param.offset < 0) { + dlength = std::min(ishape[x1] - (-param.offset), ishape[x2]); + } else { + dlength = std::min(ishape[x1], ishape[x2]); + } + + MSHADOW_TYPE_SWITCH(out_data.type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], req_type, { + if (back) { + out_data.FlatTo1D(s) = 0; + } + Kernel, xpu>::Launch(s, dsize, out_data.dptr(), + in_data.dptr(), + Shape3(oleading, obody, otrailing), + Shape3(ileading, ibody, itrailing), + stride1 + stride2, offset, dlength); + }); + }); +} + +template +void NumpyTraceOpForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mxnet_op; + using namespace mshadow; + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + mshadow::Stream *s = ctx.get_stream(); + const TBlob& in_data = inputs[0]; + const TBlob& out_data = outputs[0]; + const mxnet::TShape& ishape = inputs[0].shape_; + const mxnet::TShape& oshape = outputs[0].shape_; + const NumpyTraceParam& param = nnvm::get(attrs.parsed); + + NumpyTraceOpProcess(in_data, out_data, ishape, oshape, + out_data.Size(), param, s, req); +} + +template +void NumpyTraceOpBackward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mxnet_op; + using namespace mshadow; + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(req.size(), 1U); + Stream *s = ctx.get_stream(); + + const TBlob& in_data = inputs[0]; + const TBlob& out_data = outputs[0]; + const mxnet::TShape& ishape = inputs[0].shape_; + const mxnet::TShape& oshape = outputs[0].shape_; + const NumpyTraceParam& param = nnvm::get(attrs.parsed); + + NumpyTraceOpProcess(in_data, out_data, oshape, ishape, + in_data.Size(), param, s, req); +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_NP_TRACE_OP_INL_H_ diff --git a/src/operator/numpy/np_trace_op.cc b/src/operator/numpy/np_trace_op.cc new file mode 100644 index 000000000000..d97ac3040384 --- /dev/null +++ b/src/operator/numpy/np_trace_op.cc @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_trace_op.cc + * \brief CPU Implementation of numpy-compatible trace operator + */ + +#include "./np_trace_op-inl.h" + +namespace mxnet { +namespace op { + +inline bool NumpyTraceOpShape(const nnvm::NodeAttrs& attrs, + mxnet::ShapeVector* in_attrs, + mxnet::ShapeVector* out_attrs) { + CHECK_EQ(in_attrs->size(), 1); + CHECK_EQ(out_attrs->size(), 1); + const int ndim((*in_attrs)[0].ndim()); + if (ndim < 2) { + return false; + } + std::vector oshape(ndim - 2); + const NumpyTraceParam& param = nnvm::get(attrs.parsed); + int x1 = CheckAxis(param.axis1, (*in_attrs)[0].ndim()); + int x2 = CheckAxis(param.axis2, (*in_attrs)[0].ndim()); + CHECK_NE(x1, x2) << "axis1 and axis2 cannot refer to the the same axis " << x1; + for ( int i = 0, j = 0; i < ndim; ++i ) { + if (i != x1 && i != x2) { + oshape[j++] = (*in_attrs)[0][i]; + } + } + mxnet::TShape tshape(oshape.begin(), oshape.end()); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, tshape); + return true; +} + +DMLC_REGISTER_PARAMETER(NumpyTraceParam); + +NNVM_REGISTER_OP(_np_trace) +.describe(R"code(Computes the sum of the diagonal elements of a matrix. +Input is a tensor *A* of dimension *n >= 2*. + +If *n=2*, we sum the diagonal elements. The result has shape (). + +If *n>2*, *trace* is performed separately on the matrix defined by *axis1* and *axis2* for all +inputs (batch mode). + +Examples:: + + // Single matrix reduction + A = [[1.0, 1.0], [1.0, 7.0]] + trace(A) = 8.0 + + // Batch matrix reduction + A = [[[1.0, 1.0], [1.0, 7.0]], [[3.0, 0], [0, 17.0]]] + trace(A) = [1.0, 18.0] +)code" ADD_FILELINE) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"data"}; + }) +.set_attr("FInferShape", NumpyTraceOpShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FCompute", NumpyTraceOpForward) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_np_trace"}) +.add_argument("data", "NDArray-or-Symbol", "Input ndarray") +.add_arguments(NumpyTraceParam::__FIELDS__()); + +NNVM_REGISTER_OP(_backward_np_trace) +.set_attr_parser(ParamParser) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("TIsBackward", true) +.set_attr("FCompute", NumpyTraceOpBackward); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_trace_op.cu b/src/operator/numpy/np_trace_op.cu new file mode 100644 index 000000000000..220e4ae62a59 --- /dev/null +++ b/src/operator/numpy/np_trace_op.cu @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_trace_op.cu + * \brief GPU Implementation of numpy-compatible trace operator + */ +#include "./np_trace_op-inl.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_np_trace) +.set_attr("FCompute", NumpyTraceOpForward); + +NNVM_REGISTER_OP(_backward_np_trace) +.set_attr("FCompute", NumpyTraceOpBackward); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_true_divide.cc b/src/operator/numpy/np_true_divide.cc new file mode 100644 index 000000000000..429762778700 --- /dev/null +++ b/src/operator/numpy/np_true_divide.cc @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_true_divide.cc + * \brief CPU Implementation of true_divide operator. + */ +#include "../tensor/elemwise_binary_broadcast_op.h" +#include "../tensor/elemwise_binary_scalar_op.h" + +namespace mxnet { +namespace op { + +template +bool TrueDivideType(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_EQ(in_attrs->size(), static_cast(num_inputs)); + CHECK_EQ(out_attrs->size(), 1U); + for (const int dtype : *in_attrs) { + if (dtype == -1) return false; + } + if (num_inputs == 2) { + const int lhs_dtype = in_attrs->at(0); + const int rhs_dtype = in_attrs->at(1); + CHECK_EQ(lhs_dtype, rhs_dtype) + << "_true_divide currently only supports same dtype for dividend and divisor"; + } + auto is_float = [](const int dtype) { + return dtype == mshadow::kFloat32 || dtype == mshadow::kFloat64 || dtype == mshadow::kFloat16; + }; + + for (const int dtype : *in_attrs) { + CHECK(is_float(dtype)) << "_true_divide currently only supports float dtype"; + } + TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0)); + return true; +} + +NNVM_REGISTER_OP(_npi_true_divide) +.describe(R"code( +Returns a true division of the inputs, element-wise. + +It currently only supports dtype float16, float32, and float64. + +Example:: + + x = [[ 6., 6., 6.], + [ 6., 6., 6.]] + + y = [[ 2.], + [ 3.]] + + _true_divide(x, y) = [[ 3., 3., 3.], + [ 2., 2., 2.]] + +)code" ADD_FILELINE) +.set_num_inputs(2) +.set_num_outputs(1) +.set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"lhs", "rhs"}; + }) +.set_attr("FInferShape", BinaryBroadcastShape) +.set_attr("FInferType", TrueDivideType<2>) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs){ + return std::vector >{{0, 0}, {1, 0}}; + }) +.set_attr("FCompute", BinaryBroadcastCompute) +.set_attr("FGradient", ElemwiseGradUseIn{"_backward_broadcast_div"}) +.add_argument("lhs", "NDArray-or-Symbol", "Dividend array") +.add_argument("rhs", "NDArray-or-Symbol", "Divisor array"); + +NNVM_REGISTER_OP(_npi_true_divide_scalar) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser([](NodeAttrs* attrs) { + attrs->parsed = std::stod(attrs->dict["scalar"]); + }) +.set_attr("FInferShape", ElemwiseShape<1, 1>) +.set_attr("FInferType", TrueDivideType<1>) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}}; + }) +.set_attr("FCompute", BinaryScalarOp::Compute) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_div_scalar"}) +.add_argument("data", "NDArray-or-Symbol", "source input") +.add_argument("scalar", "float", "scalar input"); + +NNVM_REGISTER_OP(_npi_rtrue_divide_scalar) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr_parser([](NodeAttrs* attrs) { + attrs->parsed = std::stod(attrs->dict["scalar"]); + }) +.set_attr("FInferShape", ElemwiseShape<1, 1>) +.set_attr("FInferType", TrueDivideType<1>) +.set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}}; + }) +.set_attr("FCompute", BinaryScalarOp::Compute) +.set_attr("FGradient", ElemwiseGradUseNone{"_backward_rdiv_scalar"}) +.add_argument("data", "NDArray-or-Symbol", "source input") +.add_argument("scalar", "float", "scalar input"); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_true_divide.cu b/src/operator/numpy/np_true_divide.cu new file mode 100644 index 000000000000..be10c44f92a1 --- /dev/null +++ b/src/operator/numpy/np_true_divide.cu @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_true_divide.cu + * \brief GPU Implementation of true_divide operator. + */ +#include "../tensor/elemwise_binary_broadcast_op.h" +#include "../tensor/elemwise_binary_scalar_op.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_npi_true_divide) +.set_attr("FCompute", BinaryBroadcastCompute); + +NNVM_REGISTER_OP(_npi_true_divide_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute); + +NNVM_REGISTER_OP(_npi_rtrue_divide_scalar) +.set_attr("FCompute", BinaryScalarOp::Compute); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/random/np_multinomial_op.cc b/src/operator/numpy/random/np_multinomial_op.cc new file mode 100644 index 000000000000..bf4f88c591cf --- /dev/null +++ b/src/operator/numpy/random/np_multinomial_op.cc @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_multinomial_op.h + * \brief Operator for numpy sampling from multinomial distributions + */ +#include "./np_multinomial_op.h" + +namespace mxnet { +namespace op { + +DMLC_REGISTER_PARAMETER(NumpyMultinomialParam); + +NNVM_REGISTER_OP(_npi_multinomial) +.describe(R"code(Draw samples from a multinomial distribution. " +"The multinomial distribution is a multivariate generalisation of the binomial distribution. " +"Take an experiment with one of p possible outcomes. " +"An example of such an experiment is throwing a dice, where the outcome can be 1 through 6. " +"Each sample drawn from the distribution represents n such experiments. " +"Its values, X_i = [X_0, X_1, ..., X_p], represent the number of times the outcome was i. +)code") +.set_num_inputs( + [](const nnvm::NodeAttrs& attrs) { + const NumpyMultinomialParam& param = nnvm::get(attrs.parsed); + return param.pvals.has_value() ? 0U : 1U; + } +) +.set_num_outputs(1) +.set_attr_parser(ParamParser) +.set_attr("FInferShape", NumpyMultinomialOpShape) +.set_attr("FInferType", NumpyMultinomialOpType) +.set_attr("FResourceRequest", + [](const nnvm::NodeAttrs& attrs) { + return std::vector{ + ResourceRequest::kRandom, ResourceRequest::kTempSpace}; + }) +.set_attr("FCompute", NumpyMultinomialForward) +.set_attr("FGradient", MakeZeroGradNodes) +.add_argument("a", "NDArray-or-Symbol", "Source input") +.add_arguments(NumpyMultinomialParam::__FIELDS__()); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/random/np_multinomial_op.cu b/src/operator/numpy/random/np_multinomial_op.cu new file mode 100644 index 000000000000..a80926024735 --- /dev/null +++ b/src/operator/numpy/random/np_multinomial_op.cu @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_multinomial_op.cu + * \brief Operator for numpy sampling from multinomial distributions + */ +#include "./np_multinomial_op.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_npi_multinomial) +.set_attr("FCompute", NumpyMultinomialForward); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/random/np_multinomial_op.h b/src/operator/numpy/random/np_multinomial_op.h new file mode 100644 index 000000000000..39515b4e7824 --- /dev/null +++ b/src/operator/numpy/random/np_multinomial_op.h @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file np_multinomial_op.h + * \brief Operator for sampling from multinomial distributions + */ +#ifndef MXNET_OPERATOR_NUMPY_RANDOM_NP_MULTINOMIAL_OP_H_ +#define MXNET_OPERATOR_NUMPY_RANDOM_NP_MULTINOMIAL_OP_H_ + +#include +#include +#include "../../mshadow_op.h" +#include "../../mxnet_op.h" +#include "../../operator_common.h" +#include "../../elemwise_op_common.h" + +namespace mxnet { +namespace op { + +struct NumpyMultinomialParam : public dmlc::Parameter { + int n; + dmlc::optional> pvals; + dmlc::optional> size; + DMLC_DECLARE_PARAMETER(NumpyMultinomialParam) { + DMLC_DECLARE_FIELD(n) + .describe("Number of experiments."); + DMLC_DECLARE_FIELD(pvals) + .set_default(dmlc::optional>()) + .describe("Probabilities of each of the p different outcomes. " + "These should sum to 1 (however, the last element is always assumed to " + "account for the remaining probability, as long as sum(pvals[:-1]) <= 1)" + "Note that this is for internal usage only. " + "This operator will only have either input mx.ndarray or this list of pvals"); + DMLC_DECLARE_FIELD(size) + .set_default(dmlc::optional>()) + .describe("Output shape. If the given shape is, " + "e.g., (m, n, k), then m * n * k samples are drawn. " + "Default is None, in which case a single value is returned."); + } +}; + +inline bool NumpyMultinomialOpShape(const nnvm::NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + const NumpyMultinomialParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(out_attrs->size(), 1U); + + std::vector oshape_vec; + dim_t pvals_length; + if (param.pvals.has_value()) { + CHECK_EQ(in_attrs->size(), 0U); + pvals_length = param.pvals.value().ndim(); + } else { + // pvals is from input ndarray + CHECK_EQ(in_attrs->size(), 1U); + const TShape& ishape = (*in_attrs)[0]; + // check the input shape is only one dimension + CHECK_EQ(ishape.ndim(), 1U) + << "object too deep for desired array"; + pvals_length = ishape[0]; + } + if (param.size.has_value()) { + const mxnet::Tuple& size = param.size.value(); + for (int i = 0; i < size.ndim(); ++i) { + oshape_vec.emplace_back(size[i]); + } + } + oshape_vec.emplace_back(pvals_length); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape(oshape_vec)); + return out_attrs->at(0).ndim() != 0U;; +} + +inline bool NumpyMultinomialOpType(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + const NumpyMultinomialParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(in_attrs->size(), (param.pvals.has_value()) ? 0U : 1U); + CHECK_EQ(out_attrs->size(), 1U); + + (*out_attrs)[0] = mshadow::kInt64; + return true; +} + +struct multinomial_kernel { + template + MSHADOW_XINLINE static void Map(int i, + const int num_exp, + const int prob_length, + DType* pvals, + float* uniform, + int64_t* out) { + for (int j = 0; j < num_exp; ++j) { + DType loc = static_cast(uniform[i * num_exp + j]); + DType acc = 0.0; + bool found = false; + for (int k = 0; k < prob_length; ++k) { + acc += pvals[k]; + if (acc > loc) { + found = true; + out[i * prob_length + k] += 1; + break; + } + } + if (!found) { + out[i * prob_length + (prob_length - 1)] += 1; + } + } + } +}; + +template +void NumpyMultinomialForward(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mshadow; + using namespace mxnet_op; + const NumpyMultinomialParam& param = nnvm::get(attrs.parsed); + CHECK_EQ(outputs.size(), 1U); + CHECK_EQ(inputs.size(), (param.pvals.has_value()) ? 0U : 1U); + + int prob_length = (param.pvals.has_value()) + ? param.pvals.value().ndim() : inputs[0].shape_[0]; + // if intput is [] or size contains 0 dimension + if (prob_length == 0U || outputs[0].shape_.Size() == 0) return; + int num_output = outputs[0].Size() / prob_length; + int num_exp = param.n; + Stream *s = ctx.get_stream(); + Random *prnd = ctx.requested[0].get_random(s); + Tensor uniform = + ctx.requested[1].get_space_typed(Shape1(num_output * param.n), s); + prnd->SampleUniform(&uniform, 0, 1); + + // set zero for the outputs + Kernel::Launch(s, outputs[0].Size(), outputs[0].dptr()); + + if (param.pvals.has_value()) { + // create a tensor to copy the param.pvals tuple to avoid + // error: calling a __host__ function from a __host__ __device__ function is not allowed + Tensor pvals = + ctx.requested[1].get_space_typed(Shape1(prob_length), s); + double* pvals_ = pvals.dptr_; + // check if sum of input(pvals) > 1.0 + double sum = 0.0; + for (int i = 0; i < prob_length; ++i) { + sum += param.pvals.value()[i]; + // copy the tuple to data for later kernel usage + pvals_[i] = param.pvals.value()[i]; + CHECK_LE(sum, 1.0) + << "sum(pvals[:-1]) > 1.0"; + } + Kernel::Launch( + s, num_output, num_exp, prob_length, pvals_, uniform.dptr_, outputs[0].dptr()); + } else { + MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, { + // check if sum of input(pvals) > 1.0 + DType sum = DType(0); + DType* input = inputs[0].dptr(); + for (int i = 0; i < prob_length; ++i) { + sum += input[i]; + CHECK_LE(sum, 1.0) + << "sum(pvals[:-1]) > 1.0"; + } + Kernel::Launch( + s, num_output, num_exp, prob_length, + inputs[0].dptr(), uniform.dptr_, outputs[0].dptr()); + }); + } +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_RANDOM_NP_MULTINOMIAL_OP_H_ diff --git a/src/operator/quantization/quantized_concat.cc b/src/operator/quantization/quantized_concat.cc index d6aeb41da1f8..8988c17957a2 100644 --- a/src/operator/quantization/quantized_concat.cc +++ b/src/operator/quantization/quantized_concat.cc @@ -28,8 +28,8 @@ namespace mxnet { namespace op { -static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_shape, - mxnet::ShapeVector* out_shape) { +static bool QuantizedConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_shape, + mxnet::ShapeVector* out_shape) { const ConcatParam& param_ = nnvm::get(attrs.parsed); CHECK_EQ(in_shape->size(), static_cast(param_.num_args * 3)); CHECK_EQ(out_shape->size(), 3U); @@ -74,8 +74,8 @@ static bool ConcatShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector* in_sha return shape_is_known(dshape); } -static bool ConcatType(const nnvm::NodeAttrs& attrs, std::vector* in_type, - std::vector* out_type) { +static bool QuantizedConcatType(const nnvm::NodeAttrs& attrs, std::vector* in_type, + std::vector* out_type) { const ConcatParam& param_ = nnvm::get(attrs.parsed); CHECK_EQ(in_type->size(), static_cast(param_.num_args * 3)); CHECK_EQ(out_type->size(), 3U); @@ -130,8 +130,8 @@ If any input holds int8, then the output will be int8. Otherwise output will be // TODO(Xinyu): a temp solution to enable GluonCV INT8 flow, // will be reverted after the improvement of CachedOP is done. .set_attr("FGradient", MakeZeroGradNodes) -.set_attr("FInferType", ConcatType) -.set_attr("FInferShape", ConcatShape) +.set_attr("FInferType", QuantizedConcatType) +.set_attr("FInferShape", QuantizedConcatShape) .set_attr("key_var_num_args", "num_args") .add_argument("data", "NDArray-or-Symbol[]", "List of arrays to concatenate") .add_arguments(ConcatParam::__FIELDS__()); diff --git a/src/operator/random/sample_op.cc b/src/operator/random/sample_op.cc index 56a162be5da4..543146257ddf 100644 --- a/src/operator/random/sample_op.cc +++ b/src/operator/random/sample_op.cc @@ -81,6 +81,7 @@ DMLC_REGISTER_PARAMETER(SampleGenNegBinomialLikeParam); MXNET_OPERATOR_REGISTER_SAMPLE(_random_uniform, SampleUniformParam) .add_alias("uniform") .add_alias("random_uniform") +.add_alias("_npi_random_uniform") .describe(R"code(Draw random samples from a uniform distribution. .. note:: The existing alias ``uniform`` is deprecated. @@ -99,6 +100,7 @@ Example:: MXNET_OPERATOR_REGISTER_SAMPLE(_random_normal, SampleNormalParam) .add_alias("normal") .add_alias("random_normal") +.add_alias("_npi_random_normal") .describe(R"code(Draw random samples from a normal (Gaussian) distribution. .. note:: The existing alias ``normal`` is deprecated. diff --git a/src/operator/random/shuffle_op.cc b/src/operator/random/shuffle_op.cc index 70315716dea2..86797c136bab 100644 --- a/src/operator/random/shuffle_op.cc +++ b/src/operator/random/shuffle_op.cc @@ -122,6 +122,7 @@ void ShuffleForwardCPU(const nnvm::NodeAttrs& attrs, NNVM_REGISTER_OP(_shuffle) .add_alias("shuffle") +.add_alias("_np__random_shuffle") .describe(R"code(Randomly shuffle the elements. This shuffles the array along the first axis. diff --git a/src/operator/rnn.cc b/src/operator/rnn.cc index 6a0dbd7a4e23..244e39335a91 100644 --- a/src/operator/rnn.cc +++ b/src/operator/rnn.cc @@ -634,6 +634,7 @@ static void RNNStatefulComputeCPU(const OpStatePtr& state_ptr, #endif NNVM_REGISTER_OP(RNN) +.add_alias("_npx_rnn") .describe(R"code(Applies recurrent layers to input data. Currently, vanilla RNN, LSTM and GRU are implemented, with both multi-layer and bidirectional support. diff --git a/src/operator/roi_pooling.cc b/src/operator/roi_pooling.cc index 8862d0db1401..c72b203292fe 100644 --- a/src/operator/roi_pooling.cc +++ b/src/operator/roi_pooling.cc @@ -300,5 +300,9 @@ Example:: "corners of designated region of interest. `batch_index` indicates the index of corresponding " "image in the input array") .add_arguments(ROIPoolingParam::__FIELDS__()); + +NNVM_REGISTER_OP(ROIPooling) +.add_alias("_npx_roi_pooling"); + } // namespace op } // namespace mxnet diff --git a/src/operator/sequence_mask.cc b/src/operator/sequence_mask.cc index f4f81a801e70..d7731026ce21 100644 --- a/src/operator/sequence_mask.cc +++ b/src/operator/sequence_mask.cc @@ -191,5 +191,8 @@ Example:: "vector of sequence lengths of the form [batch_size]") .add_arguments(SequenceMaskParam::__FIELDS__()); +NNVM_REGISTER_OP(SequenceMask) +.add_alias("_npx_sequence_mask"); + } // namespace op } // namespace mxnet diff --git a/src/operator/swapaxis-inl.h b/src/operator/swapaxis-inl.h index b17a81f75bc6..fd9872db6ec8 100644 --- a/src/operator/swapaxis-inl.h +++ b/src/operator/swapaxis-inl.h @@ -47,7 +47,7 @@ enum SwapAxisOpOutputs {kOut}; struct SwapAxisParam : public dmlc::Parameter { // use int for enumeration - uint32_t dim1, dim2; + int dim1, dim2; DMLC_DECLARE_PARAMETER(SwapAxisParam) { DMLC_DECLARE_FIELD(dim1) .set_default(0) @@ -106,8 +106,6 @@ class SwapAxisOp : public Operator { const std::vector &req) { using namespace mshadow; using namespace mshadow::expr; - int dim1 = param_.dim1; - int dim2 = param_.dim2; TBlob data_in = in_data[swapaxisenum::kData]; TBlob data_out = out_data[swapaxisenum::kData]; @@ -115,10 +113,27 @@ class SwapAxisOp : public Operator { mxnet::TShape shape_in = data_in.shape_; mxnet::TShape shape_out = data_out.shape_; + int axis1 = param_.dim1; + if (axis1 < 0) { + axis1 += shape_in.ndim(); + } + CHECK(axis1 >= 0 && axis1 < shape_in.ndim()) + << "axis1: axis " << param_.dim1 << " is out of bounds for array of ndim " + << shape_in.ndim(); + + int axis2 = param_.dim2; + if (axis2 < 0) { + axis2 += shape_in.ndim(); + } + CHECK(axis2 >= 0 && axis2 < shape_in.ndim()) + << "axis2: axis " << param_.dim2 << " is out of bounds for array of ndim " + << shape_in.ndim(); + + if (shape_in.Size() == 0U) return; Shape<5> inter_shape; - Reshape2Five(&inter_shape, shape_in, dim1, dim2); + Reshape2Five(&inter_shape, shape_in, axis1, axis2); Tensor inter_data_in = data_in.get_with_shape(inter_shape, s); @@ -187,13 +202,28 @@ class SwapAxisProp : public OperatorProperty { CHECK_EQ(in_shape->size(), 1U); mxnet::TShape &shape0 = (*in_shape)[swapaxisenum::kData]; + if (!ndim_is_known(shape0)) return false; + int axis1 = param_.dim1; + if (axis1 < 0) { + axis1 += shape0.ndim(); + } + CHECK(axis1 >= 0 && axis1 < shape0.ndim()) + << "axis1: axis " << param_.dim1 << " is out of bounds for array of ndim " << shape0.ndim(); + + int axis2 = param_.dim2; + if (axis2 < 0) { + axis2 += shape0.ndim(); + } + CHECK(axis2 >= 0 && axis2 < shape0.ndim()) + << "axis2: axis " << param_.dim2 << " is out of bounds for array of ndim " << shape0.ndim(); + out_shape->clear(); out_shape->push_back(shape0); mxnet::TShape &shape1 = (*out_shape)[swapaxisenum::kOut]; - std::swap(shape1[param_.dim1], shape1[param_.dim2]); + std::swap(shape1[axis1], shape1[axis2]); - return true; + return shape_is_known(*out_shape); } bool InferType(std::vector *in_type, diff --git a/src/operator/swapaxis.cc b/src/operator/swapaxis.cc index 45bcca4db9ae..32b26cc14f0c 100644 --- a/src/operator/swapaxis.cc +++ b/src/operator/swapaxis.cc @@ -69,6 +69,6 @@ Examples:: [ 3, 7]]] )code" ADD_FILELINE); -NNVM_REGISTER_OP(SwapAxis).add_alias("swapaxes"); +NNVM_REGISTER_OP(SwapAxis).add_alias("swapaxes").add_alias("_npi_swapaxes"); } // namespace op } // namespace mxnet diff --git a/src/operator/tensor/broadcast_reduce_op.h b/src/operator/tensor/broadcast_reduce_op.h index c7c49937730c..07ce716c22cc 100644 --- a/src/operator/tensor/broadcast_reduce_op.h +++ b/src/operator/tensor/broadcast_reduce_op.h @@ -168,15 +168,24 @@ struct BroadcastLikeParam : public dmlc::Parameter { } }; -inline int CheckAxis(int axis, int ndim) { - CHECK(axis < ndim && axis >= -ndim) - << "axis " << axis << " exceeds the input dimension of " << ndim; - return (axis + ndim)%ndim; +inline int CheckAxis(const int axis, const int ndim) { + if (ndim == 0) { + CHECK(axis == 0 || axis == -1) << "axis " << axis << " is out of bounds for array of" + " dimension 1"; + return 0; + } else { + CHECK(axis < ndim && axis >= -ndim) + << "axis " << axis << " exceeds the input dimension of " << ndim; + return (axis + ndim) % ndim; + } } inline mxnet::TShape AxisShapeCompact(mxnet::TShape shape, int *axis, bool allow_2d) { int ndim = shape.ndim(); - index_t leading = 1, trailing = 1, M = shape[*axis]; + index_t leading = 1, trailing = 1, M = 1; + if (shape.ndim() > *axis) { + M = shape[*axis]; + } for (int i = 0; i < *axis; ++i) leading *= shape[i]; for (int i = *axis + 1; i < ndim; ++i) trailing *= shape[i]; if (allow_2d && trailing == 1) { @@ -553,14 +562,37 @@ void SearchAxisCompute(const nnvm::NodeAttrs& attrs, using namespace mshadow::expr; const ReduceAxisParam& param = nnvm::get(attrs.parsed); Stream *s = ctx.get_stream(); - if (!param.axis) LOG(FATAL) << "Global reduction not supported yet"; + int axis = inputs[0].ndim(); + TBlob input = inputs[0]; + if (param.axis.has_value()) { + axis = param.axis.value(); + } else { + // If global reduction, reshape the input tensor into 2D shape (1, inputs[0].shape_.Size()) + // and search on axis = 1. + mxnet::TShape shape_2d(2, 1); + shape_2d[1] = input.shape_.Size(); + input = TBlob(input.dptr_, shape_2d, input.dev_mask(), input.type_flag_, input.dev_id()); + axis = 1; + } - int axis = CheckAxis(param.axis.value(), inputs[0].shape_.ndim()); - mxnet::TShape shape = AxisShapeCompact(inputs[0].shape_, &axis, false); + axis = CheckAxis(axis, input.shape_.ndim()); + if (inputs[0].shape_.ndim() != 0) { + if (param.axis.has_value()) { + // cannot do argmax in an empty dimension + CHECK_NE(inputs[0].shape_[axis], 0) + << "searching input tensor of shape " << inputs[0].shape_ + << " along axis = " << axis << " of zero dim-size is not allowed"; + } else { + // cannot do argmax on an empty array + CHECK_NE(inputs[0].shape_.Size(), 0U) << "attempt to search an empty sequence"; + } + } + if (input.shape_.Size() == 0U) return; // zero-size tensor + mxnet::TShape shape = AxisShapeCompact(input.shape_, &axis, false); MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, { Tensor out = outputs[0].get_with_shape( Shape2(shape[0], shape[2]), s); - Tensor in = inputs[0].get_with_shape( + Tensor in = input.get_with_shape( shape.get<3>(), s); CHECK(req[0] != kAddTo) << "AddTo is not supported"; ASSIGN_DISPATCH(out, req[0], (reduce_with_axis(in, 1))); @@ -914,36 +946,36 @@ void ReduceAxesBackwardUseInOutImpl(const OpContext& ctx, } } if (dst_shape.ndim() == 2) { - Tensor igrad = - outputs[0].get_with_shape(src_shape.get<2>(), s); - Tensor ograd = - inputs[0].get_with_shape(dst_shape.get<2>(), s); - Tensor data = - inputs[1].get_with_shape(src_shape.get<2>(), s); - Tensor out = - inputs[2].get_with_shape(dst_shape.get<2>(), s); + Tensor igrad = + outputs[0].get_with_shape(src_shape.get<2>(), s); + Tensor ograd = + inputs[0].get_with_shape(dst_shape.get<2>(), s); + Tensor data = + inputs[1].get_with_shape(src_shape.get<2>(), s); + Tensor out = + inputs[2].get_with_shape(dst_shape.get<2>(), s); MXNET_REQ_TYPE_SWITCH(req[0], Req, { Kernel, xpu>::Launch( s, outputs[0].shape_.Size(), data.dptr_, out.dptr_, igrad.dptr_, ograd.dptr_, in_shape, out_shape, src_shape.ndim()); }); - if (normalize) igrad /= scalar(src_shape.Size()/dst_shape.Size()); + if (normalize) igrad /= scalar(src_shape.Size()/dst_shape.Size()); } else { const int ndim = MXNET_SPECIAL_MAX_NDIM; - Tensor igrad = - outputs[0].get_with_shape(src_shape.get(), s); - Tensor ograd = - inputs[0].get_with_shape(dst_shape.get(), s); - Tensor data = - inputs[1].get_with_shape(src_shape.get(), s); - Tensor out = - inputs[2].get_with_shape(dst_shape.get(), s); + Tensor igrad = + outputs[0].get_with_shape(src_shape.get(), s); + Tensor ograd = + inputs[0].get_with_shape(dst_shape.get(), s); + Tensor data = + inputs[1].get_with_shape(src_shape.get(), s); + Tensor out = + inputs[2].get_with_shape(dst_shape.get(), s); MXNET_REQ_TYPE_SWITCH(req[0], Req, { Kernel, xpu>::Launch( s, outputs[0].shape_.Size(), data.dptr_, out.dptr_, igrad.dptr_, ograd.dptr_, in_shape, out_shape, src_shape.ndim()); }); - if (normalize) igrad /= scalar(src_shape.Size()/dst_shape.Size()); + if (normalize) igrad /= scalar(src_shape.Size()/dst_shape.Size()); } }); }); @@ -968,6 +1000,34 @@ void ReduceAxesBackwardUseInOut(const nnvm::NodeAttrs& attrs, ReduceAxesBackwardUseInOutImpl(ctx, small, inputs, req, outputs); } +template +struct broadcast_kernel { + template + MSHADOW_XINLINE static void Map(index_t i, + IType *input, + OType *output, + mshadow::Shape<5> in_shape, + mshadow::Shape<5> out_shape, + const OpReqType req, + const uint32_t ndim) { + size_t in_stride = 1; + size_t out_stride = 1; + index_t idx = i; + index_t in_idx = i; + for (int iter = ndim - 1; iter >= 0; --iter) { + size_t dim_idx = idx % out_shape[iter]; + in_idx -= dim_idx * out_stride; + if (in_shape[iter] != 1) { + in_idx += dim_idx * in_stride; + } + idx /= out_shape[iter]; + in_stride *= in_shape[iter]; + out_stride *= out_shape[iter]; + } + KERNEL_ASSIGN(output[i], req, OP::Map(input[in_idx])); + } +}; + template inline void BroadcastComputeImpl(const nnvm::NodeAttrs& attrs, const OpContext& ctx, @@ -977,24 +1037,40 @@ inline void BroadcastComputeImpl(const nnvm::NodeAttrs& attrs, const mxnet::TShape& small) { using namespace mshadow; using namespace mshadow::expr; + using namespace mxnet_op; mxnet::TShape src_shape, dst_shape; BroadcastReduceShapeCompact(outputs[0].shape_, small, &dst_shape, &src_shape); Stream *s = ctx.get_stream(); - MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { - if (dst_shape.ndim() == 2) { - Tensor out = - outputs[0].get_with_shape(dst_shape.get<2>(), s); - Tensor data = - inputs[0].get_with_shape(src_shape.get<2>(), s); - ASSIGN_DISPATCH(out, req[0], broadcast_to(data, dst_shape)); - } else { - const int ndim = MXNET_SPECIAL_MAX_NDIM; - Tensor out = - outputs[0].get_with_shape(dst_shape.get(), s); - Tensor data = - inputs[0].get_with_shape(src_shape.get(), s); - ASSIGN_DISPATCH(out, req[0], broadcast_to(data, dst_shape)); - } + MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, IType, { + MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, OType, { + mshadow::Shape<5> in_shape; + mshadow::Shape<5> out_shape; + for (int i = 0; i < 5; ++i) { + if (i < dst_shape.ndim()) { + in_shape[i] = src_shape[i]; + out_shape[i] = dst_shape[i]; + } else { + in_shape[i] = 1; + out_shape[i] = 1; + } + } + if (dst_shape.ndim() == 2) { + Tensor out = + outputs[0].get_with_shape(dst_shape.get<2>(), s); + Tensor data = + inputs[0].get_with_shape(src_shape.get<2>(), s); + Kernel, xpu>::Launch( + s, out.shape_.Size(), data.dptr_, out.dptr_, in_shape, out_shape, req[0], 2); + } else { + const int ndim = MXNET_SPECIAL_MAX_NDIM; + Tensor out = + outputs[0].get_with_shape(dst_shape.get(), s); + Tensor data = + inputs[0].get_with_shape(src_shape.get(), s); + Kernel, xpu>::Launch( + s, out.shape_.Size(), data.dptr_, out.dptr_, in_shape, out_shape, req[0], ndim); + } + }); }); } diff --git a/src/operator/tensor/broadcast_reduce_op_index.cc b/src/operator/tensor/broadcast_reduce_op_index.cc index 56af3887c763..52082f759e7a 100644 --- a/src/operator/tensor/broadcast_reduce_op_index.cc +++ b/src/operator/tensor/broadcast_reduce_op_index.cc @@ -110,6 +110,7 @@ Examples:: NNVM_REGISTER_OP(pick) .add_alias("choose_element_0index") +.add_alias("_npx_pick") .describe(R"code(Picks elements from an input array according to the input indices along the given axis. Given an input array of shape ``(d0, d1)`` and indices of shape ``(i0,)``, the result will be diff --git a/src/operator/tensor/dot.cc b/src/operator/tensor/dot.cc index 7d7b6c06c846..11a056146e1d 100644 --- a/src/operator/tensor/dot.cc +++ b/src/operator/tensor/dot.cc @@ -111,6 +111,7 @@ NNVM_REGISTER_OP(_backward_dot) .add_arguments(DotParam::__FIELDS__()); NNVM_REGISTER_OP(batch_dot) +.add_alias("_npx_batch_dot") .describe(R"doc(Batchwise dot product. ``batch_dot`` is used to compute dot product of ``x`` and ``y`` when ``x`` and diff --git a/src/operator/tensor/elemwise_binary_broadcast_op.h b/src/operator/tensor/elemwise_binary_broadcast_op.h index f84767dd4b2f..8a81bbc1c475 100644 --- a/src/operator/tensor/elemwise_binary_broadcast_op.h +++ b/src/operator/tensor/elemwise_binary_broadcast_op.h @@ -292,6 +292,7 @@ void BinaryBroadcastCompute(const nnvm::NodeAttrs& attrs, const std::vector& inputs, const std::vector& req, const std::vector& outputs) { + if (outputs[0].shape_.Size() == 0U) return; mxnet::TShape new_lshape, new_rshape, new_oshape; int ndim = BinaryBroadcastShapeCompact(inputs[0].shape_, inputs[1].shape_, outputs[0].shape_, &new_lshape, &new_rshape, &new_oshape); diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc b/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc index cd433e00a770..e3c2e0e898d9 100644 --- a/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc +++ b/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc @@ -30,6 +30,7 @@ namespace mxnet { namespace op { MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_equal) +.add_alias("_npi_equal") .describe(R"code(Returns the result of element-wise **equal to** (==) comparison operation with broadcasting. Example:: @@ -48,6 +49,7 @@ Example:: .set_attr("FGradient", MakeZeroGradNodes); MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_not_equal) +.add_alias("_npi_not_equal") .describe(R"code(Returns the result of element-wise **not equal to** (!=) comparison operation with broadcasting. Example:: @@ -66,6 +68,7 @@ Example:: .set_attr("FGradient", MakeZeroGradNodes); MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_greater) +.add_alias("_npi_greater") .describe(R"code(Returns the result of element-wise **greater than** (>) comparison operation with broadcasting. Example:: @@ -84,6 +87,7 @@ Example:: .set_attr("FGradient", MakeZeroGradNodes); MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_greater_equal) +.add_alias("_npi_greater_equal") .describe(R"code(Returns the result of element-wise **greater than or equal to** (>=) comparison operation with broadcasting. Example:: @@ -102,6 +106,7 @@ Example:: .set_attr("FGradient", MakeZeroGradNodes); MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_lesser) +.add_alias("_npi_less") .describe(R"code(Returns the result of element-wise **lesser than** (<) comparison operation with broadcasting. Example:: @@ -120,6 +125,7 @@ Example:: .set_attr("FGradient", MakeZeroGradNodes); MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(broadcast_lesser_equal) +.add_alias("_npi_less_equal") .describe(R"code(Returns the result of element-wise **lesser than or equal to** (<=) comparison operation with broadcasting. Example:: diff --git a/src/operator/tensor/elemwise_binary_op.h b/src/operator/tensor/elemwise_binary_op.h index 2fe3fd9919cf..9c1d8b17fdea 100644 --- a/src/operator/tensor/elemwise_binary_op.h +++ b/src/operator/tensor/elemwise_binary_op.h @@ -487,9 +487,11 @@ class ElemwiseBinaryOp : public OpBase { MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { const size_t size = (minthree(outputs[0].Size(), inputs[0].Size(), inputs[1].Size()) + DataType::kLanes - 1) / DataType::kLanes; - Kernel, xpu>::Launch(s, size, - outputs[0].dptr(), - inputs[0].dptr(), inputs[1].dptr()); + if (size != 0) { + Kernel, xpu>::Launch(s, size, + outputs[0].dptr(), + inputs[0].dptr(), inputs[1].dptr()); + } }); }); } @@ -510,9 +512,11 @@ class ElemwiseBinaryOp : public OpBase { MSHADOW_TYPE_SWITCH_WITH_HALF2(outputs[0].type_flag_, DType, { const size_t size = (minthree(outputs[0].Size(), inputs[0].Size(), inputs[1].Size()) + DataType::kLanes - 1) / DataType::kLanes; - Kernel, xpu>::Launch(s, size, - outputs[0].dptr(), - inputs[0].dptr(), inputs[1].dptr()); + if (size != 0) { + Kernel, xpu>::Launch(s, size, + outputs[0].dptr(), + inputs[0].dptr(), inputs[1].dptr()); + } }); }); } diff --git a/src/operator/tensor/elemwise_binary_scalar_op_extended.cc b/src/operator/tensor/elemwise_binary_scalar_op_extended.cc index f027665a549b..3a687c2aa062 100644 --- a/src/operator/tensor/elemwise_binary_scalar_op_extended.cc +++ b/src/operator/tensor/elemwise_binary_scalar_op_extended.cc @@ -84,7 +84,8 @@ MXNET_OPERATOR_REGISTER_BINARY(_backward_hypot_scalar) cpu, mshadow_op::hypot_grad_left>); NNVM_REGISTER_OP(smooth_l1) - .describe(R"code(Calculate Smooth L1 Loss(lhs, scalar) by summing +.add_alias("_npx_smooth_l1") +.describe(R"code(Calculate Smooth L1 Loss(lhs, scalar) by summing .. math:: diff --git a/src/operator/tensor/elemwise_binary_scalar_op_logic.cc b/src/operator/tensor/elemwise_binary_scalar_op_logic.cc index 17e76153ebb2..87ba394c99b2 100644 --- a/src/operator/tensor/elemwise_binary_scalar_op_logic.cc +++ b/src/operator/tensor/elemwise_binary_scalar_op_logic.cc @@ -71,26 +71,32 @@ static bool BinaryScalarLogicStorageType(const nnvm::NodeAttrs& attrs, MXNET_OPERATOR_REGISTER_BINARY_SCALAR_LOGIC(_equal_scalar, mshadow_op::eq) +.add_alias("_npi_equal_scalar") .set_attr("FGradient", MakeZeroGradNodes) .add_alias("_EqualScalar"); MXNET_OPERATOR_REGISTER_BINARY_SCALAR_LOGIC(_not_equal_scalar, mshadow_op::ne) +.add_alias("_npi_not_equal_scalar") .set_attr("FGradient", MakeZeroGradNodes) .add_alias("_NotEqualScalar"); MXNET_OPERATOR_REGISTER_BINARY_SCALAR_LOGIC(_greater_scalar, mshadow_op::gt) +.add_alias("_npi_greater_scalar") .set_attr("FGradient", MakeZeroGradNodes) .add_alias("_GreaterScalar"); MXNET_OPERATOR_REGISTER_BINARY_SCALAR_LOGIC(_greater_equal_scalar, mshadow_op::ge) +.add_alias("_npi_greater_equal_scalar") .set_attr("FGradient", MakeZeroGradNodes) .add_alias("_GreaterEqualScalar"); MXNET_OPERATOR_REGISTER_BINARY_SCALAR_LOGIC(_lesser_scalar, mshadow_op::lt) +.add_alias("_npi_less_scalar") .set_attr("FGradient", MakeZeroGradNodes) .add_alias("_LesserScalar"); MXNET_OPERATOR_REGISTER_BINARY_SCALAR_LOGIC(_lesser_equal_scalar, mshadow_op::le) +.add_alias("_npi_less_equal_scalar") .set_attr("FGradient", MakeZeroGradNodes) .add_alias("_LesserEqualScalar"); diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index 458106e02671..87964ac246f0 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -243,8 +243,10 @@ class UnaryOp : public OpBase { mshadow::Stream *s = ctx.get_stream(); MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, { MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { - mxnet_op::Kernel, xpu>::Launch( - s, inputs[0].Size(), outputs[0].dptr(), inputs[0].dptr()); + if (inputs[0].Size() != 0) { + mxnet_op::Kernel, xpu>::Launch( + s, inputs[0].Size(), outputs[0].dptr(), inputs[0].dptr()); + } }); }); } diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index ee77817fcec9..fbacc1e3ac4e 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -465,6 +465,7 @@ Negative indices are supported, and `None` can be used for either `lhs_end` or ` - lhs shape = (30, 12), rhs shape = (4, 2, 2, 3), lhs_begin=-1, lhs_end=None, rhs_begin=1, rhs_end=None, output shape = (30, 2, 2, 3) )code" ADD_FILELINE) +.add_alias("_npx_reshape_like") .set_num_inputs(2) .set_attr_parser(ParamParser) .set_attr("FListInputNames", @@ -603,6 +604,7 @@ Example:: DMLC_REGISTER_PARAMETER(CastParam); NNVM_REGISTER_OP(Cast) .add_alias("cast") +.add_alias("_npx_cast") .describe(R"code(Casts all elements of the input to a new type. .. note:: ``Cast`` is deprecated. Use ``cast`` instead. @@ -1177,6 +1179,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_expm1, unary_bwd *s = ctx.get_stream(); MSHADOW_TYPE_SWITCH(ret.type_flag_, DType, { switch (axes.ndim()) { - case 0: + case 0: { + Tensor in = src.get_with_shape(mshadow::Shape1(1), s); + Tensor out = ret.get_with_shape(mshadow::Shape1(1), s); + Copy(out, in, s); break; + } case 1: { Tensor in = src.get(s); Tensor out = ret.get(s); @@ -1781,9 +1787,6 @@ inline bool TileOpShape(const nnvm::NodeAttrs& attrs, SHAPE_ASSIGN_CHECK(*out_attrs, 0, ishape); return true; } - for (int i = 0; i < reps.ndim(); ++i) { - CHECK_GT(reps[i], 0) << "invalid reps=" << i << ", dim size must be greater than zero"; - } mxnet::TShape oshape(std::max(ishape.ndim(), reps.ndim()), -1); int i1 = ishape.ndim() - 1; int i2 = reps.ndim() - 1; @@ -1796,6 +1799,11 @@ inline bool TileOpShape(const nnvm::NodeAttrs& attrs, oshape[i] = reps[i2--]; } } + // If reps contains 0s, oshape is a zero-size shape. + // Need to distinguish between np_shape mode and legacy mode. + if (!Imperative::Get()->is_np_shape()) { + common::ConvertToNumpyShape(&oshape); + } SHAPE_ASSIGN_CHECK(*out_attrs, 0, oshape); return shape_is_known(oshape); } @@ -1814,7 +1822,7 @@ inline bool TileOpType(const nnvm::NodeAttrs& attrs, /*! * \brief Reshape the input and output tensors for - * using broadcast_to to achieve the funcitonality + * using broadcast_to to achieve the functionality * of operator tile. * \return a pair of mxnet::TShape's, first is the reshaped * input shape, second is the reshaped output shape. @@ -1822,7 +1830,7 @@ inline bool TileOpType(const nnvm::NodeAttrs& attrs, inline std::pair ReshapeInputOutputForTileOp( const mxnet::TShape& ishape, const mxnet::Tuple& reps) { - if (ishape.ndim() == 0 || reps.ndim() == 0) { + if (reps.ndim() == 0) { return std::make_pair(ishape, ishape); } @@ -2177,7 +2185,7 @@ inline size_t SqueezeShapeHelper(mxnet::TShape* shape) { CHECK(shape != nullptr); size_t count = 0; for (int i = 0; i < shape->ndim(); ++i) { - if ((*shape)[i] == 0) { + if ((*shape)[i] == -1) { ++count; } else { std::swap((*shape)[i], (*shape)[i-count]); @@ -2210,12 +2218,12 @@ inline bool SqueezeShape(const nnvm::NodeAttrs& attrs, CHECK_EQ(dshape[axes[i]], 1) << "cannot select an axis to squeeze out which has size=" << dshape[axes[i]] << " not equal to one"; - CHECK_NE(oshape[axes[i]], 0) << "duplicate value in axis"; - oshape[axes[i]] = 0; + CHECK_NE(oshape[axes[i]], -1) << "duplicate value in axis"; + oshape[axes[i]] = -1; } } else { for (int i = 0; i < oshape.ndim(); ++i) { - if (oshape[i] == 1) oshape[i] = 0; + if (oshape[i] == 1) oshape[i] = -1; } } size_t oshape_size = SqueezeShapeHelper(&oshape); @@ -2631,10 +2639,14 @@ inline bool SplitOpShape(const nnvm::NodeAttrs& attrs, for (int i = 0; i < num_outputs; ++i) { int start = indices[i]; int end = (i < num_outputs - 1) ? indices[i + 1] : ishape[real_axis]; - CHECK(start < end) - << "start " << start << " is not less than end " << end << "for subarray " << i; - CHECK(end <= ishape[real_axis]) - << "end " << end << " is no less than the size of the axis " << ishape[real_axis]; + if (ishape[real_axis] == 0U) { + end = start; + } else { + CHECK(start < end) + << "start " << start << " is not less than end " << end << "for subarray " << i; + CHECK(end <= ishape[real_axis]) + << "end " << end << " is no less than the size of the axis " << ishape[real_axis]; + } dshape[real_axis] = (end - start); if (param.squeeze_axis) { CHECK_EQ(end - start, 1U) << "expected axis size of 1 but got " << end - start; diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc index b4abc9f5974a..2705885a9655 100644 --- a/src/operator/tensor/matrix_op.cc +++ b/src/operator/tensor/matrix_op.cc @@ -265,6 +265,7 @@ static inline bool FlattenStorageType(const nnvm::NodeAttrs& attrs, NNVM_REGISTER_OP(Flatten) .add_alias("flatten") +.add_alias("_npx_batch_flatten") .describe(R"code(Flattens the input array into a 2-D array by collapsing the higher dimensions. .. note:: `Flatten` is deprecated. Use `flatten` instead. @@ -410,6 +411,7 @@ Examples:: NNVM_REGISTER_OP(expand_dims) +.add_alias("_npi_expand_dims") .describe(R"code(Inserts a new axis of size 1 into the array shape For example, given ``x`` with shape ``(2,3,4)``, then ``expand_dims(x, axis=1)`` @@ -698,6 +700,7 @@ NNVM_REGISTER_OP(_backward_slice_like) NNVM_REGISTER_OP(clip) MXNET_ADD_SPARSE_OP_ALIAS(clip) +.add_alias("_npi_clip") .describe(R"code(Clips (limits) the values in an array. Given an interval, values outside the interval are clipped to the interval edges. @@ -772,6 +775,7 @@ NNVM_REGISTER_OP(_backward_clip) .set_attr("FCompute", ClipGrad_); NNVM_REGISTER_OP(repeat) +.add_alias("_np_repeat") .describe(R"code(Repeats elements of an array. By default, ``repeat`` flattens the input array into 1-D and then repeats the @@ -822,6 +826,7 @@ NNVM_REGISTER_OP(_backward_repeat) }); NNVM_REGISTER_OP(tile) +.add_alias("_npi_tile") .describe(R"code(Repeats the whole array multiple times. If ``reps`` has length *d*, and input array has dimension of *n*. There are @@ -1123,6 +1128,7 @@ Example:: .add_arguments(DepthToSpaceParam::__FIELDS__()); NNVM_REGISTER_OP(_split_v2) +.add_alias("_npi_split") .describe(R"code(Splits an array along a particular axis into multiple sub-arrays. Example:: diff --git a/src/operator/tensor/ordering_op-inl.h b/src/operator/tensor/ordering_op-inl.h index 1dda90104205..74589e52aa09 100644 --- a/src/operator/tensor/ordering_op-inl.h +++ b/src/operator/tensor/ordering_op-inl.h @@ -580,18 +580,38 @@ void ArgSort(const nnvm::NodeAttrs& attrs, const std::vector& req, const std::vector& outputs) { const ArgSortParam& param = nnvm::get(attrs.parsed); - TopKParam topk_param; - topk_param.axis = param.axis; - topk_param.is_ascend = param.is_ascend; - topk_param.k = 0; - topk_param.dtype = param.dtype; - topk_param.ret_typ = topk_enum::kReturnIndices; - MXNET_NO_FLOAT16_TYPE_SWITCH(inputs[0].type_flag_, DType, { - MSHADOW_TYPE_SWITCH(param.dtype, IDType, { - TopKImpl(ctx.run_ctx, - ctx.requested[0], req, inputs[0], outputs, topk_param); + + if (inputs[0].shape_.ndim() == 0) { + // Scalar tensor only accept axis of value 0, -1 or None + CHECK(!static_cast(param.axis) || param.axis.value() == -1 || param.axis.value() == 0) + << "Axis can only be -1 or 0 for scalor tensor"; + MSHADOW_TYPE_SWITCH(param.dtype, DType, { + Stream *s = ctx.get_stream(); + Tensor outdata = outputs[0].get_with_shape(Shape1(1), s); + ASSIGN_DISPATCH(outdata, OpReqType::kWriteTo, 0); }); - }); + } else if (inputs[0].shape_.Size() == 0) { + // If the input tensor is zero size, only a check on axis is needed + if (static_cast(param.axis)) { + int axis = param.axis.value(); + if (axis < 0) axis += inputs[0].shape_.ndim(); + CHECK(axis >= 0 && axis < inputs[0].shape_.ndim()) + << "Axis must be within the range of input tensor's dimension"; + } + } else { + TopKParam topk_param; + topk_param.axis = param.axis; + topk_param.is_ascend = param.is_ascend; + topk_param.k = 0; + topk_param.dtype = param.dtype; + topk_param.ret_typ = topk_enum::kReturnIndices; + MXNET_NO_FLOAT16_TYPE_SWITCH(inputs[0].type_flag_, DType, { + MSHADOW_TYPE_SWITCH(param.dtype, IDType, { + TopKImpl(ctx.run_ctx, + ctx.requested[0], req, inputs[0], outputs, topk_param); + }); + }); + } } template @@ -824,12 +844,21 @@ inline bool ArgSortShape(const nnvm::NodeAttrs& attrs, mxnet::ShapeVector *in_attrs, mxnet::ShapeVector *out_attrs) { const ArgSortParam& param = nnvm::get(attrs.parsed); - TopKParam topk_param; - topk_param.axis = param.axis; - topk_param.is_ascend = param.is_ascend; - topk_param.k = 0; - topk_param.ret_typ = topk_enum::kReturnIndices; - return TopKShapeImpl(topk_param, in_attrs, out_attrs); + CHECK_EQ(in_attrs->size(), 1U); + CHECK_EQ(out_attrs->size(), 1U); + mxnet::TShape& in_shape = (*in_attrs)[0]; + + if (in_shape.ndim() == 0) { + mxnet::TShape target_shape({1}); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, target_shape); + } else if (!static_cast(param.axis)) { + mxnet::TShape target_shape(Shape1(in_shape.Size())); + SHAPE_ASSIGN_CHECK(*out_attrs, 0, target_shape); + } else { + SHAPE_ASSIGN_CHECK(*out_attrs, 0, in_shape); + } + + return true; } } // namespace op } // namespace mxnet diff --git a/src/operator/tensor/ordering_op.cc b/src/operator/tensor/ordering_op.cc index e2f014d1ad41..7b5a14535829 100644 --- a/src/operator/tensor/ordering_op.cc +++ b/src/operator/tensor/ordering_op.cc @@ -34,6 +34,7 @@ DMLC_REGISTER_PARAMETER(SortParam); DMLC_REGISTER_PARAMETER(ArgSortParam); NNVM_REGISTER_OP(topk) +.add_alias("_npx_topk") .describe(R"code(Returns the top *k* elements in an input array along the given axis. The returned elements will be sorted. @@ -175,6 +176,7 @@ Examples:: // flatten and then sort argsort(x) = [ 3., 1., 5., 0., 4., 2.] )code" ADD_FILELINE) +.add_alias("_npi_argsort") .set_num_inputs(1) .set_num_outputs(1) .set_attr_parser(ParamParser) diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index 064f783ec6c8..1b33c14f12e9 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -35,6 +35,9 @@ from common import setup_module, with_seed, teardown, assert_raises_cudnn_not_satisfied from common import run_in_spawned_process from test_operator import * +from test_numpy_op import * +from test_numpy_ndarray import * +from test_numpy_gluon import * from test_optimizer import * from test_random import * from test_exc_handling import * diff --git a/tests/python/unittest/test_contrib_amp.py b/tests/python/unittest/test_contrib_amp.py index 13048c35371e..ef3a6d81fb48 100644 --- a/tests/python/unittest/test_contrib_amp.py +++ b/tests/python/unittest/test_contrib_amp.py @@ -21,6 +21,7 @@ import ctypes import mxnet.contrib.amp as amp + def test_amp_coverage(): conditional = [item[0] for item in amp.lists.symbol.CONDITIONAL_FP32_FUNCS] diff --git a/tests/python/unittest/test_numpy_gluon.py b/tests/python/unittest/test_numpy_gluon.py new file mode 100644 index 000000000000..b4db7bfc4ab0 --- /dev/null +++ b/tests/python/unittest/test_numpy_gluon.py @@ -0,0 +1,112 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: skip-file +from __future__ import absolute_import +from __future__ import division + +import mxnet as mx +from mxnet import gluon, autograd, np, npx + + +def test_create_np_param(): + M, K, N = 10, 9, 20 + + def check_block_params(x, TestBlock, hybridize, expected_type): + net = TestBlock() + net.initialize() + if hybridize: + net.hybridize() + net(x) + params = net.collect_params() + for k, v in params.items(): + assert type(v.data()) is expected_type + + class TestBlock1(gluon.HybridBlock): + def __init__(self): + super(TestBlock1, self).__init__() + with self.name_scope(): + self.w = self.params.get('w', shape=(K, N), allow_deferred_init=True) + + def hybrid_forward(self, F, x, w): + return F.dot(x, w) + + @npx.use_np + class TestBlock2(gluon.HybridBlock): + def __init__(self): + super(TestBlock2, self).__init__() + with self.name_scope(): + self.w = self.params.get('w', shape=(K, N), allow_deferred_init=True) + + def hybrid_forward(self, F, x, w): + return F.np.dot(x, w) + + x = mx.nd.random.uniform(shape=(M, K)) + check_block_params(x, TestBlock1, False, mx.nd.NDArray) + check_block_params(x, TestBlock1, True, mx.nd.NDArray) + check_block_params(x.as_np_ndarray(), TestBlock2, False, np.ndarray) + check_block_params(x.as_np_ndarray(), TestBlock2, True, np.ndarray) + + +@npx.use_np +def test_optimizer_with_np_ndarrays(): + class LinearRegression(gluon.HybridBlock): + def __init__(self, num_input_dim=0, num_hidden_dim=100, num_output_dim=10): + super(LinearRegression, self).__init__() + with self.name_scope(): + self.w1 = self.params.get('w1', shape=(num_input_dim, num_hidden_dim), + allow_deferred_init=True) + self.w2 = self.params.get('w2', shape=(num_hidden_dim, num_output_dim), + allow_deferred_init=True) + + def hybrid_forward(self, F, x, w1, w2): + h = x.dot(w1) # equivalent to F.np.dot(x, w1) + h_relu = F.npx.relu(h) # equivalent to F.relu(h) but generating np.ndarray + y_pred = h_relu.dot(w2) # equivalent to F.np.dot(h_relu, w2) + return y_pred + + class TotalLoss(gluon.HybridBlock): + def hybrid_forward(self, F, pred, label): + return ((pred - label) ** 2).sum() # equivalent to F.np.sum(F.np.square(pred - label)) + + regressor = LinearRegression() + regressor.initialize(mx.init.Normal()) + regressor.hybridize() + + # Create random input and output data + x = mx.nd.random.normal(shape=(64, 1000)).as_np_ndarray() # x is of type mxnet.numpy.ndarray + regressor(x) + y = mx.nd.random.normal(shape=(64, 10)).as_np_ndarray() # y is of type mxnet.numpy.ndarray + + total_loss = TotalLoss() + total_loss.hybridize() + + trainer = gluon.Trainer(regressor.collect_params(), + 'sgd', + {'learning_rate': 1e-3, 'momentum': 0.9}) + + for t in range(5): + with autograd.record(): + output = regressor(x) # output is a type of np.ndarray because np.dot is the last op in the network + loss = total_loss(output, y) # loss is a scalar np.ndarray + loss.backward() + trainer.step(1) + + +if __name__ == '__main__': + import nose + nose.runmodule() diff --git a/tests/python/unittest/test_numpy_ndarray.py b/tests/python/unittest/test_numpy_ndarray.py new file mode 100644 index 000000000000..7b12eae7b546 --- /dev/null +++ b/tests/python/unittest/test_numpy_ndarray.py @@ -0,0 +1,764 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: skip-file +from __future__ import absolute_import +from __future__ import division +import os +import numpy as _np +import mxnet as mx +from mxnet import np, npx, autograd +from mxnet.gluon import HybridBlock +from mxnet.test_utils import same, assert_almost_equal, rand_shape_nd, rand_ndarray, retry, assert_exception +from common import with_seed, TemporaryDirectory + + +@with_seed() +@npx.use_np_shape +def test_array_creation(): + dtypes = [_np.int8, _np.int32, _np.float16, _np.float32, _np.float64, None] + objects = [ + [], + (), + [[1, 2], [3, 4]], + _np.random.uniform(size=rand_shape_nd(3)), + _np.random.uniform(size=(3, 0, 4)), + np.random.uniform(size=rand_shape_nd(3)), + np.random.uniform(size=(3, 0, 4)) + ] + for dtype in dtypes: + for src in objects: + mx_arr = np.array(src, dtype=dtype) + assert mx_arr.context == mx.current_context() + if isinstance(src, mx.nd.NDArray): + np_arr = _np.array(src.asnumpy(), dtype=dtype if dtype is not None else _np.float32) + else: + np_arr = _np.array(src, dtype=dtype if dtype is not None else _np.float32) + assert mx_arr.dtype == np_arr.dtype + assert same(mx_arr.asnumpy(), np_arr) + + +@with_seed() +@npx.use_np_shape +def test_zeros(): + # test np.zeros in Gluon + class TestZeros(HybridBlock): + def __init__(self, shape, dtype=None): + super(TestZeros, self).__init__() + self._shape = shape + self._dtype = dtype + + def hybrid_forward(self, F, x, *args, **kwargs): + return x + F.np.zeros(shape, dtype) + + class TestZerosOutputType(HybridBlock): + def hybrid_forward(self, F, x, *args, **kwargs): + return x, F.np.zeros(shape=()) + + # test np.zeros in imperative + def check_zero_array_creation(shape, dtype): + np_out = _np.zeros(shape=shape, dtype=dtype) + mx_out = np.zeros(shape=shape, dtype=dtype) + assert same(mx_out.asnumpy(), np_out) + if dtype is None: + assert mx_out.dtype == _np.float32 + assert np_out.dtype == _np.float64 + + shapes = [(0,), (2, 0, 2), (0, 0, 0, 0), ()] + shapes += [rand_shape_nd(ndim, allow_zero_size=True) for ndim in range(5)] + dtypes = [_np.int8, _np.int32, _np.float16, _np.float32, _np.float64, None] + for shape in shapes: + for dtype in dtypes: + check_zero_array_creation(shape, dtype) + x = np.array(_np.random.uniform(size=shape), dtype=dtype) + if dtype is None: + x = x.astype('float32') + for hybridize in [True, False]: + test_zeros = TestZeros(shape, dtype) + test_zeros_output_type = TestZerosOutputType() + if hybridize: + test_zeros.hybridize() + test_zeros_output_type.hybridize() + y = test_zeros(x) + assert type(y) == np.ndarray + assert same(x.asnumpy(), y.asnumpy()) + y = test_zeros_output_type(x) + assert type(y[1]) == np.ndarray + + +@with_seed() +@npx.use_np_shape +def test_ones(): + # test np.ones in Gluon + class TestOnes(HybridBlock): + def __init__(self, shape, dtype=None): + super(TestOnes, self).__init__() + self._shape = shape + self._dtype = dtype + + def hybrid_forward(self, F, x, *args, **kwargs): + return x * F.np.ones(shape, dtype) + + class TestOnesOutputType(HybridBlock): + def hybrid_forward(self, F, x, *args, **kwargs): + return x, F.np.ones(shape=()) + + # test np.ones in imperative + def check_ones_array_creation(shape, dtype): + np_out = _np.ones(shape=shape, dtype=dtype) + mx_out = np.ones(shape=shape, dtype=dtype) + assert same(mx_out.asnumpy(), np_out) + if dtype is None: + assert mx_out.dtype == _np.float32 + assert np_out.dtype == _np.float64 + + shapes = [(0,), (2, 0, 2), (0, 0, 0, 0), ()] + shapes += [rand_shape_nd(ndim, allow_zero_size=True) for ndim in range(5)] + dtypes = [_np.int8, _np.int32, _np.float16, _np.float32, _np.float64, None] + for shape in shapes: + for dtype in dtypes: + check_ones_array_creation(shape, dtype) + x = mx.nd.array(_np.random.uniform(size=shape), dtype=dtype).as_np_ndarray() + if dtype is None: + x = x.astype('float32') + for hybridize in [True, False]: + test_ones = TestOnes(shape, dtype) + test_ones_output_type = TestOnesOutputType() + if hybridize: + test_ones.hybridize() + test_ones_output_type.hybridize() + y = test_ones(x) + assert type(y) == np.ndarray + assert same(x.asnumpy(), y.asnumpy()) + y = test_ones_output_type(x) + assert type(y[1]) == np.ndarray + + +@with_seed() +@npx.use_np_shape +def test_identity(): + class TestIdentity(HybridBlock): + def __init__(self, shape, dtype=None): + super(TestIdentity, self).__init__() + self._n = n + self._dtype = dtype + + def hybrid_forward(self, F, x, *args, **kwargs): + return x * F.np.identity(n, dtype) + + class TestIdentityOutputType(HybridBlock): + def hybrid_forward(self, F, x, *args, **kwargs): + return x, F.np.identity(0) + + def check_identity_array_creation(shape, dtype): + np_out = _np.identity(n=n, dtype=dtype) + mx_out = np.identity(n=n, dtype=dtype) + assert same(mx_out.asnumpy(), np_out) + if dtype is None: + assert mx_out.dtype == _np.float32 + assert np_out.dtype == _np.float64 + + ns = [0, 1, 2, 3, 5, 15, 30, 200] + dtypes = [_np.int8, _np.int32, _np.float16, _np.float32, _np.float64, None] + for n in ns: + for dtype in dtypes: + check_identity_array_creation(n, dtype) + x = mx.nd.array(_np.random.uniform(size=(n, n)), dtype=dtype).as_np_ndarray() + if dtype is None: + x = x.astype('float32') + for hybridize in [True, False]: + test_identity = TestIdentity(n, dtype) + test_identity_output_type = TestIdentityOutputType() + if hybridize: + test_identity.hybridize() + test_identity_output_type.hybridize() + y = test_identity(x) + assert type(y) == np.ndarray + assert same(x.asnumpy() * _np.identity(n, dtype), y.asnumpy()) + y = test_identity_output_type(x) + assert type(y[1]) == np.ndarray + + +@with_seed() +def test_ndarray_binary_element_wise_ops(): + np_op_map = { + '+': _np.add, + '*': _np.multiply, + '-': _np.subtract, + '/': _np.divide, + 'mod': _np.mod, + 'pow': _np.power, + '==': _np.equal, + '>': _np.greater, + '>=': _np.greater_equal, + '<': _np.less, + '<=': _np.less_equal + } + + def get_np_ret(x1, x2, op): + return np_op_map[op](x1, x2) + + @npx.use_np_shape + class TestBinaryElementWiseOp(HybridBlock): + def __init__(self, op, scalar=None, reverse=False): + super(TestBinaryElementWiseOp, self).__init__() + self._op = op + self._scalar = scalar + self._reverse = reverse # if false, scalar is the right operand. + + def hybrid_forward(self, F, x, *args): + if self._op == '+': + if self._scalar is not None: + return x + self._scalar if not self._reverse else self._scalar + x + else: + return x + args[0] if not self._reverse else args[0] + x + elif self._op == '*': + if self._scalar is not None: + return x * self._scalar if not self._reverse else self._scalar * x + else: + return x * args[0] if not self._reverse else args[0] * x + elif self._op == '-': + if self._scalar is not None: + return x - self._scalar if not self._reverse else self._scalar - x + else: + return x - args[0] if not self._reverse else args[0] - x + elif self._op == '/': + if self._scalar is not None: + return x / self._scalar if not self._reverse else self._scalar / x + else: + return x / args[0] if not self._reverse else args[0] / x + elif self._op == 'mod': + if self._scalar is not None: + return x % self._scalar if not self._reverse else self._scalar % x + else: + return x % args[0] if not self._reverse else args[0] % x + elif self._op == 'pow': + if self._scalar is not None: + return x ** self._scalar if not self._reverse else self._scalar ** x + else: + return x ** args[0] if not self._reverse else args[0] ** x + elif self._op == '>': + if self._scalar is not None: + return x > self._scalar if not self._reverse else self._scalar > x + else: + return x > args[0] + elif self._op == '>=': + if self._scalar is not None: + return x >= self._scalar if not self._reverse else self._scalar >= x + else: + return x >= args[0] + elif self._op == '<': + if self._scalar is not None: + return x < self._scalar if not self._reverse else self._scalar < x + else: + return x < args[0] + elif self._op == '<=': + if self._scalar is not None: + return x <= self._scalar if not self._reverse else self._scalar <= x + else: + return x <= args[0] + elif self._op == '==': + if self._scalar is not None: + return x == self._scalar if not self._reverse else self._scalar == x + else: + return x == args[0] + else: + print(self._op) + assert False + + @npx.use_np_shape + def check_binary_op_result(shape1, shape2, op, dtype=None): + if shape1 is None: + mx_input1 = abs(_np.random.uniform()) + 1 + np_input1 = mx_input1 + else: + mx_input1 = rand_ndarray(shape1, dtype=dtype).abs() + 1 + np_input1 = mx_input1.asnumpy() + if shape2 is None: + mx_input2 = abs(_np.random.uniform()) + 1 + np_input2 = mx_input2 + else: + mx_input2 = rand_ndarray(shape2, dtype=dtype).abs() + 1 + np_input2 = mx_input2.asnumpy() + + scalar = None + reverse = False + if isinstance(mx_input1, mx.nd.NDArray) and not isinstance(mx_input2, mx.nd.NDArray): + scalar = mx_input2 + reverse = False + elif isinstance(mx_input2, mx.nd.NDArray) and not isinstance(mx_input1, mx.nd.NDArray): + scalar = mx_input1 + reverse = True + + np_out = get_np_ret(np_input1, np_input2, op) + for hybridize in [True, False]: + if scalar is None: + get_mx_ret_np = TestBinaryElementWiseOp(op) + get_mx_ret_classic = TestBinaryElementWiseOp(op) + if hybridize: + get_mx_ret_np.hybridize() + get_mx_ret_classic.hybridize() + mx_out = get_mx_ret_np(mx_input1.as_np_ndarray(), mx_input2.as_np_ndarray()) + assert type(mx_out) == np.ndarray + assert np_out.shape == mx_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-6, rtol=1e-5) + else: + get_mx_ret = TestBinaryElementWiseOp(op, scalar=scalar, reverse=reverse) + if hybridize: + get_mx_ret.hybridize() + if reverse: + mx_out = get_mx_ret(mx_input2.as_np_ndarray()) + assert type(mx_out) == np.ndarray + else: + mx_out = get_mx_ret(mx_input1.as_np_ndarray()) + assert type(mx_out) == np.ndarray + assert np_out.shape == mx_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-6, rtol=1e-5) + + dtypes = [_np.float32, _np.float64, None] + ops = np_op_map.keys() + for dtype in dtypes: + for op in ops: + check_binary_op_result((3, 4), (3, 4), op, dtype) + check_binary_op_result(None, (3, 4), op, dtype) + check_binary_op_result((3, 4), None, op, dtype) + check_binary_op_result((1, 4), (3, 1), op, dtype) + check_binary_op_result(None, (3, 1), op, dtype) + check_binary_op_result((1, 4), None, op, dtype) + check_binary_op_result((1, 4), (3, 5, 4), op, dtype) + check_binary_op_result((), (3, 5, 4), op, dtype) + check_binary_op_result((), None, op, dtype) + check_binary_op_result(None, (), op, dtype) + check_binary_op_result((0, 2), (1, 1), op, dtype) + check_binary_op_result((0, 2), None, op, dtype) + check_binary_op_result(None, (0, 2), op, dtype) + + +@with_seed() +def test_hybrid_block_multiple_outputs(): + @npx.use_np_shape + class TestAllNumpyOutputs(HybridBlock): + def hybrid_forward(self, F, x, *args, **kwargs): + return F.npx.relu(x), F.np.sum(x) + + class TestAllClassicOutputs(HybridBlock): + def hybrid_forward(self, F, x, *args, **kwargs): + return F.relu(x.as_nd_ndarray()), F.sum(x.as_nd_ndarray()) + + data_np = np.ones((2, 3)) + for block, expected_out_type in [(TestAllClassicOutputs, mx.nd.NDArray), + (TestAllNumpyOutputs, np.ndarray)]: + net = block() + for hybridize in [True, False]: + if hybridize: + net.hybridize() + out1, out2 = net(data_np) + assert type(out1) is expected_out_type + assert type(out2) is expected_out_type + + @npx.use_np_array + class TestMixedTypeOutputsFailure(HybridBlock): + def hybrid_forward(self, F, x, *args, **kwargs): + return F.relu(x.as_nd_ndarray()), F.np.sum(x) + + net = TestMixedTypeOutputsFailure() + assert_exception(net, TypeError, data_np) + net.hybridize() + assert_exception(net, TypeError, data_np) + + +@with_seed() +@npx.use_np_shape +def test_grad_ndarray_type(): + data = np.array(2, dtype=_np.float32) + data.attach_grad() + assert type(data.grad) == np.ndarray + assert type(data.detach()) == np.ndarray + + +@with_seed() +def test_np_ndarray_astype(): + mx_data = np.array([2, 3, 4, 5], dtype=_np.int32) + np_data = mx_data.asnumpy() + + def check_astype_equal(dtype, copy, expect_zero_copy=False): + mx_ret = mx_data.astype(dtype=dtype, copy=copy) + assert type(mx_ret) is np.ndarray + np_ret = np_data.astype(dtype=dtype, copy=copy) + assert mx_ret.dtype == np_ret.dtype + assert same(mx_ret.asnumpy(), np_ret) + if expect_zero_copy: + assert id(mx_ret) == id(mx_data) + assert id(np_ret) == id(np_data) + + for dtype in [_np.int8, _np.uint8, _np.int32, _np.float16, _np.float32, _np.float64]: + for copy in [True, False]: + check_astype_equal(dtype, copy, copy is False and mx_data.dtype == dtype) + + +@with_seed() +def test_np_ndarray_copy(): + mx_data = np.array([2, 3, 4, 5], dtype=_np.int32) + assert_exception(mx_data.copy, NotImplementedError, order='F') + mx_ret = mx_data.copy() + np_ret = mx_data.asnumpy().copy() + assert same(mx_ret.asnumpy(), np_ret) + + +@with_seed() +@npx.use_np_shape +def test_np_ndarray_indexing(): + def test_getitem(np_array, index): + """`is_scalar` indicates whether we should expect a scalar for the result. + If so, the indexed array of NDArray should call asscalar to compare + with numpy's indexed array.""" + np_index = index + if isinstance(index, np.ndarray): + np_index = index.asnumpy() + if isinstance(index, tuple): + np_index = [] + for idx in index: + if isinstance(idx, np.ndarray): + np_index.append(idx.asnumpy()) + else: + np_index.append(idx) + np_index = tuple(np_index) + + np_indexed_array = np_array[np_index] + mx_array = np.array(np_array, dtype=np_array.dtype) + mx_indexed_array = mx_array[index].asnumpy() + assert same(np_indexed_array, mx_indexed_array), 'Failed with index=%s' % str(index) + + def test_setitem(np_array, index): + def assert_same(np_array, np_index, mx_array, mx_index, mx_value, np_value=None): + if np_value is not None: + np_array[np_index] = np_value + elif isinstance(mx_value, np.ndarray): + np_array[np_index] = mx_value.asnumpy() + else: + np_array[np_index] = mx_value + mx_array[mx_index] = mx_value + assert same(np_array, mx_array.asnumpy()) + + np_index = index + if isinstance(index, np.ndarray): + np_index = index.asnumpy() + if isinstance(index, tuple): + np_index = [] + for idx in index: + if isinstance(idx, np.ndarray): + np_index.append(idx.asnumpy()) + else: + np_index.append(idx) + np_index = tuple(np_index) + + mx_array = np.array(np_array, dtype=np_array.dtype) + np_array = mx_array.asnumpy() + indexed_array_shape = np_array[np_index].shape + np_indexed_array = _np.random.randint(low=-10000, high=0, size=indexed_array_shape) + # test value is a numpy array without broadcast + assert_same(np_array, np_index, mx_array, index, np_indexed_array) + # test value is an numeric_type + assert_same(np_array, np_index, mx_array, index, _np.random.randint(low=-10000, high=0)) + if len(indexed_array_shape) > 1: + # test ndarray with broadcast + assert_same(np_array, np_index, mx_array, index, + np.random.uniform(low=-10000, high=0, size=(indexed_array_shape[-1],))) + # test numpy array with broadcast + assert_same(np_array, np_index, mx_array, index, + _np.random.randint(low=-10000, high=0, size=(indexed_array_shape[-1],))) + # test list with broadcast + assert_same(np_array, np_index, mx_array, index, + [_np.random.randint(low=-10000, high=0)] * indexed_array_shape[-1]) + + def test_getitem_autograd(np_array, index): + x = np.array(np_array, dtype=np_array.dtype) + x.attach_grad() + with autograd.record(): + y = x[index] + y.backward() + value = np.ones_like(y) + x_grad = np.zeros_like(x) + x_grad[index] = value + assert same(x_grad.asnumpy(), x.grad.asnumpy()) + + def test_setitem_autograd(np_array, index): + x = np.array(np_array, dtype=np_array.dtype) + out_shape = x[index].shape + y = np.random.uniform(size=out_shape) + y.attach_grad() + try: + with autograd.record(): + x[index] = y + assert False # should not reach here + except mx.base.MXNetError as err: + assert str(err).find('Inplace operations (+=, -=, x[:]=, etc) are not supported when recording with') != -1 + + def np_int(index, int_type=_np.int32): + def convert(num): + if num is None: + return num + else: + return int_type(num) + + if isinstance(index, slice): + return slice(convert(index.start), convert(index.stop), convert(index.step)) + elif isinstance(index, tuple): # tuple of slices and integers + ret = [] + for elem in index: + if isinstance(elem, slice): + ret.append(slice(convert(elem.start), convert(elem.stop), convert(elem.step))) + else: + ret.append(convert(elem)) + return tuple(ret) + else: + assert False + + shape = (8, 16, 9, 9) + np_array = _np.arange(_np.prod(shape), dtype='int32').reshape(shape) + index_list = [ + (), + 0, + _np.int32(0), + _np.int64(0), + 5, + _np.int32(5), + _np.int64(5), + -1, + _np.int32(-1), + _np.int64(-1), + slice(5), + np_int(slice(5), _np.int32), + np_int(slice(5), _np.int64), + slice(1, 5), + np_int(slice(1, 5), _np.int32), + np_int(slice(1, 5), _np.int64), + slice(1, 5, 2), + np_int(slice(1, 5, 2), _np.int32), + np_int(slice(1, 5, 2), _np.int64), + slice(7, 0, -1), + np_int(slice(7, 0, -1)), + np_int(slice(7, 0, -1), _np.int64), + slice(None, 6), + np_int(slice(None, 6)), + np_int(slice(None, 6), _np.int64), + slice(None, 6, 3), + np_int(slice(None, 6, 3)), + np_int(slice(None, 6, 3), _np.int64), + slice(1, None), + np_int(slice(1, None)), + np_int(slice(1, None), _np.int64), + slice(1, None, 3), + np_int(slice(1, None, 3)), + np_int(slice(1, None, 3), _np.int64), + slice(None, None, 2), + np_int(slice(None, None, 2)), + np_int(slice(None, None, 2), _np.int64), + slice(None, None, -1), + np_int(slice(None, None, -1)), + np_int(slice(None, None, -1), _np.int64), + slice(None, None, -2), + np_int(slice(None, None, -2), _np.int32), + np_int(slice(None, None, -2), _np.int64), + (slice(None), slice(None), 1, 8), + (slice(None), slice(None), -1, 8), + (slice(None), slice(None), 1, -8), + (slice(None), slice(None), -1, -8), + np_int((slice(None), slice(None), 1, 8)), + np_int((slice(None), slice(None), 1, 8), _np.int64), + (slice(None), slice(None), 1, 8), + np_int((slice(None), slice(None), -1, -8)), + np_int((slice(None), slice(None), -1, -8), _np.int64), + (slice(None), 2, slice(1, 5), 1), + np_int((slice(None), 2, slice(1, 5), 1)), + np_int((slice(None), 2, slice(1, 5), 1), _np.int64), + (1, 2, 3), + np_int((1, 2, 3)), + np_int((1, 2, 3), _np.int64), + (-1, -2, -3), + np_int((-1, -2, -3)), + np_int((-1, -2, -3), _np.int64), + (1, 2, 3, 4), + np_int((1, 2, 3, 4)), + np_int((1, 2, 3, 4), _np.int64), + (-4, -3, -2, -1), + np_int((-4, -3, -2, -1)), + np_int((-4, -3, -2, -1), _np.int64), + (slice(None, None, -1), 2, slice(1, 5), 1), + np_int((slice(None, None, -1), 2, slice(1, 5), 1)), + np_int((slice(None, None, -1), 2, slice(1, 5), 1), _np.int64), + (slice(None, None, -1), 2, slice(1, 7, 2), 1), + np_int((slice(None, None, -1), 2, slice(1, 7, 2), 1)), + np_int((slice(None, None, -1), 2, slice(1, 7, 2), 1), _np.int64), + (slice(1, 8, 2), slice(14, 2, -2), slice(3, 8), slice(0, 7, 3)), + np_int((slice(1, 8, 2), slice(14, 2, -2), slice(3, 8), slice(0, 7, 3))), + np_int((slice(1, 8, 2), slice(14, 2, -2), slice(3, 8), slice(0, 7, 3)), _np.int64), + (slice(1, 8, 2), 1, slice(3, 8), 2), + np_int((slice(1, 8, 2), 1, slice(3, 8), 2)), + np_int((slice(1, 8, 2), 1, slice(3, 8), 2), _np.int64), + [1], + [1, 2], + [2, 1, 3], + [7, 5, 0, 3, 6, 2, 1], + _np.array([6, 3], dtype=_np.int32), + _np.array([[3, 4], [0, 6]], dtype=_np.int32), + _np.array([[7, 3], [2, 6], [0, 5], [4, 1]], dtype=_np.int32), + _np.array([[7, 3], [2, 6], [0, 5], [4, 1]], dtype=_np.int64), + _np.array([[2], [0], [1]], dtype=_np.int32), + _np.array([[2], [0], [1]], dtype=_np.int64), + np.array([4, 7], dtype=_np.int32), + np.array([4, 7], dtype=_np.int64), + np.array([[3, 6], [2, 1]], dtype=_np.int32), + np.array([[3, 6], [2, 1]], dtype=_np.int64), + np.array([[7, 3], [2, 6], [0, 5], [4, 1]], dtype=_np.int32), + np.array([[7, 3], [2, 6], [0, 5], [4, 1]], dtype=_np.int64), + (1, [2, 3]), + (1, [2, 3], _np.array([[3], [0]], dtype=_np.int32)), + (1, [2, 3]), + (1, [2, 3], _np.array([[3], [0]], dtype=_np.int64)), + (1, [2], _np.array([[5], [3]], dtype=_np.int32), slice(None)), + (1, [2], _np.array([[5], [3]], dtype=_np.int64), slice(None)), + (1, [2, 3], _np.array([[6], [0]], dtype=_np.int32), slice(2, 5)), + (1, [2, 3], _np.array([[6], [0]], dtype=_np.int64), slice(2, 5)), + (1, [2, 3], _np.array([[4], [7]], dtype=_np.int32), slice(2, 5, 2)), + (1, [2, 3], _np.array([[4], [7]], dtype=_np.int64), slice(2, 5, 2)), + (1, [2], _np.array([[3]], dtype=_np.int32), slice(None, None, -1)), + (1, [2], _np.array([[3]], dtype=_np.int64), slice(None, None, -1)), + (1, [2], _np.array([[3]], dtype=_np.int32), np.array([[5, 7], [2, 4]], dtype=_np.int64)), + (1, [2], np.array([[4]], dtype=_np.int32), np.array([[1, 3], [5, 7]], dtype='int64')), + [0], + [0, 1], + [1, 2, 3], + [2, 0, 5, 6], + ([1, 1], [2, 3]), + ([1], [4], [5]), + ([1], [4], [5], [6]), + ([[1]], [[2]]), + ([[1]], [[2]], [[3]], [[4]]), + (slice(0, 2), [[1], [6]], slice(0, 2), slice(0, 5, 2)), + ([[[[1]]]], [[1]], slice(0, 3), [1, 5]), + ([[[[1]]]], 3, slice(0, 3), [1, 3]), + ([[[[1]]]], 3, slice(0, 3), 0), + ([[[[1]]]], [[2], [12]], slice(0, 3), slice(None)), + ([1, 2], slice(3, 5), [2, 3], [3, 4]), + ([1, 2], slice(3, 5), (2, 3), [3, 4]), + range(4), + range(3, 0, -1), + (range(4,), [1]), + # slice(0, 0) does not support output zero-size tensor yet + ] + for index in index_list: + test_getitem(np_array, index) + test_setitem(np_array, index) + test_getitem_autograd(np_array, index) + if not isinstance(index, tuple) or len(index) != 0: + # When index = (), this is same a[()] = b is equivalent to b.copyto(a) + # which should have no problem to do autograd + test_setitem_autograd(np_array, index) + + +@with_seed() +@npx.use_np +def test_np_save_load_ndarrays(): + shapes = [(2, 0, 1), (0,), (), (), (0, 4), (), (3, 0, 0, 0), (2, 1), (0, 5, 0), (4, 5, 6), (0, 0, 0)] + array_list = [_np.random.randint(0, 10, size=shape) for shape in shapes] + array_list = [np.array(arr, dtype=arr.dtype) for arr in array_list] + # test save/load single ndarray + for i, arr in enumerate(array_list): + with TemporaryDirectory() as work_dir: + fname = os.path.join(work_dir, 'dataset.npy') + npx.save(fname, arr) + arr_loaded = npx.load(fname) + assert isinstance(arr_loaded, list) + assert len(arr_loaded) == 1 + assert _np.array_equal(arr_loaded[0].asnumpy(), array_list[i].asnumpy()) + + # test save/load a list of ndarrays + with TemporaryDirectory() as work_dir: + fname = os.path.join(work_dir, 'dataset.npy') + npx.save(fname, array_list) + array_list_loaded = mx.nd.load(fname) + assert isinstance(arr_loaded, list) + assert len(array_list) == len(array_list_loaded) + assert all(isinstance(arr, np.ndarray) for arr in arr_loaded) + for a1, a2 in zip(array_list, array_list_loaded): + assert _np.array_equal(a1.asnumpy(), a2.asnumpy()) + + # test save/load a dict of str->ndarray + arr_dict = {} + keys = [str(i) for i in range(len(array_list))] + for k, v in zip(keys, array_list): + arr_dict[k] = v + with TemporaryDirectory() as work_dir: + fname = os.path.join(work_dir, 'dataset.npy') + npx.save(fname, arr_dict) + arr_dict_loaded = npx.load(fname) + assert isinstance(arr_dict_loaded, dict) + assert len(arr_dict_loaded) == len(arr_dict) + for k, v in arr_dict_loaded.items(): + assert k in arr_dict + assert _np.array_equal(v.asnumpy(), arr_dict[k].asnumpy()) + + +@retry(5) +@with_seed() +@npx.use_np_shape +def test_np_multinomial(): + pvals_list = [[0.0, 0.1, 0.2, 0.3, 0.4], [0.4, 0.3, 0.2, 0.1, 0.0]] + sizes = [None, (), (3,), (2, 5, 7), (4, 9)] + experiements = 10000 + for pvals_type in [list, _np.ndarray]: + for have_size in [False, True]: + for pvals in pvals_list: + if have_size: + for size in sizes: + if pvals_type == mx.nd.NDArray: + pvals = mx.nd.array(pvals).as_np_ndarray() + elif pvals_type == _np.ndarray: + pvals = _np.array(pvals) + freq = mx.np.random.multinomial(experiements, pvals, size=size).asnumpy() / _np.float32(experiements) + # for those cases that didn't need reshape + if size in [None, ()]: + mx.test_utils.assert_almost_equal(freq, pvals, rtol=0.20, atol=1e-1) + else: + # check the shape + assert freq.shape == size + (len(pvals),), 'freq.shape={}, size + (len(pvals))={}'.format(freq.shape, size + (len(pvals))) + freq = freq.reshape((-1, len(pvals))) + # check the value for each row + for i in range(freq.shape[0]): + mx.test_utils.assert_almost_equal(freq[i, :], pvals, rtol=0.20, atol=1e-1) + else: + freq = mx.np.random.multinomial(experiements, pvals).asnumpy() / _np.float32(experiements) + mx.test_utils.assert_almost_equal(freq, pvals, rtol=0.20, atol=1e-1) + # check the zero dimension + sizes = [(0), (0, 2), (4, 0, 2), (3, 0, 1, 2, 0)] + for pvals in pvals_list: + for size in sizes: + freq = mx.np.random.multinomial(experiements, pvals, size=size).asnumpy() + assert freq.size == 0 + # check [] as pvals + for pvals in [[], ()]: + freq = mx.np.random.multinomial(experiements, pvals).asnumpy() + assert freq.size == 0 + for size in sizes: + freq = mx.np.random.multinomial(experiements, pvals, size=size).asnumpy() + assert freq.size == 0 + + +if __name__ == '__main__': + import nose + nose.runmodule() diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py new file mode 100644 index 000000000000..d37341957441 --- /dev/null +++ b/tests/python/unittest/test_numpy_op.py @@ -0,0 +1,1290 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# pylint: skip-file +from __future__ import absolute_import +import numpy as _np +import mxnet as mx +from mxnet import np, npx +from mxnet.base import MXNetError +from mxnet.gluon import HybridBlock +from mxnet.test_utils import same, assert_almost_equal, rand_shape_nd, rand_ndarray +from mxnet.test_utils import check_numeric_gradient +from common import assertRaises, with_seed +import random + + +@with_seed() +@npx.use_np_shape +def test_np_sum(): + class TestSum(HybridBlock): + def __init__(self, axis=None, dtype=None, keepdims=False): + super(TestSum, self).__init__() + self._axis = axis + self._dtype = dtype + self._keepdims = keepdims + + def hybrid_forward(self, F, a, *args, **kwargs): + return F.np.sum(a, axis=self._axis, dtype=self._dtype, keepdims=self._keepdims) + + def is_int(dtype): + return 'int' in dtype + + in_data_dim = random.choice([2, 3, 4]) + shape = rand_shape_nd(in_data_dim, dim=3) + acc_type = {'float16': 'float32', 'float32': 'float64', 'float64': 'float64', + 'int8': 'int32', 'int32': 'int64', 'int64': 'int64'} + for hybridize in [False, True]: + for keepdims in [True, False]: + for axis in ([i for i in range(in_data_dim)] + [(), None]): + for itype in ['float16', 'float32', 'float64', 'int8', 'int32', 'int64']: + for dtype in ['float16', 'float32', 'float64', 'int8', 'int32', 'int64']: + if is_int(dtype) and not is_int(itype): + continue + # test gluon + test_sum = TestSum(axis=axis, dtype=dtype, keepdims=keepdims) + if hybridize: + test_sum.hybridize() + if is_int(itype): + x = _np.random.randint(-128, 128, shape, dtype=itype) + x = mx.nd.array(x) + else: + x = mx.nd.random.uniform(-1.0, 1.0, shape=shape, dtype=itype) + x = x.as_np_ndarray() + x.attach_grad() + expected_ret = _np.sum(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims) + expected_ret = expected_ret.astype(dtype) + with mx.autograd.record(): + y = test_sum(x) + assert y.shape == expected_ret.shape + assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3 if dtype == 'float16' else 1e-3, + atol=1e-5 if dtype == 'float16' else 1e-5) + + y.backward() + assert same(x.grad.asnumpy(), _np.ones(shape=x.shape, dtype=x.dtype)) + + # test numeric + if itype == 'float32' and dtype == 'float32': + x_sym = mx.sym.Variable("x").as_np_ndarray() + mx_sym = mx.sym.np.sum(x_sym, axis=axis, dtype=dtype, keepdims=keepdims).as_nd_ndarray() + check_numeric_gradient(mx_sym, [x.as_nd_ndarray()], + numeric_eps=1e-3, rtol=1e-3, atol=1e-4, dtype=_np.float32) + + # test imperative + mx_out = np.sum(x, axis=axis, dtype=dtype, keepdims=keepdims) + np_out = _np.sum(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims).astype(dtype) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + +@with_seed() +@npx.use_np_shape +def test_np_dot(): + shapes = [ + ((3, 0), (0, 4)), + ((3,), (3,)), # Case 1 + ((3, 4), (4, 5)), # Case 2 + ((), ()), # Case 3 + ((3, 4, 5), ()), # Case 3.5.1 + ((), (3, 4, 5)), # Case 3.5.2 + ((3, 4, 5), (5, )), # Case 4 + ((3, 4, 5), (5, 2)), + ((5,), (5, 2)) + ] + + eps = 1e-3 + + for shape_a, shape_b in shapes: + np_a = _np.random.uniform(-1.0, 1.0, shape_a) + np_a[abs(np_a) < eps] = 2 * eps; + np_b = _np.random.uniform(-1.0, 1.0, shape_b) + np_b[abs(np_b) < eps] = 2 * eps; + a = mx.nd.array(np_a) + b = mx.nd.array(np_b) + np_res = _np.dot(np_a, np_b) + mx_res = np.dot(a.as_np_ndarray(), b.as_np_ndarray()) + assert mx_res.shape == np_res.shape + assert_almost_equal(np_res, mx_res.asnumpy(), rtol=1e-5, atol=1e-5) + mx_a = mx.sym.Variable("a") + mx_b = mx.sym.Variable("b") + mx_sym = mx.sym.np.dot(mx_a.as_np_ndarray(), mx_b.as_np_ndarray()).as_nd_ndarray() + check_numeric_gradient(mx_sym, {"a": a, "b": b}, numeric_eps=eps, rtol=1e-2, atol=1e-3) + + bad_shapes = [((4, 5), (2, 3)), ((3, 4, 5), (6, ))] + + for shape_a, shape_b in bad_shapes: + a = mx.nd.array(random.random()) if len(shape_a) == 0 else rand_ndarray(shape_a) + b = mx.nd.array(random.random()) if len(shape_b) == 0 else rand_ndarray(shape_b) + try: + mx_res = np.dot(a.as_np_ndarray(), b.as_np_ndarray()) + except mx.base.MXNetError: + continue + assert False + + +@with_seed() +@npx.use_np_shape +def test_np_mean(): + @npx.use_np_shape + class TestMean(HybridBlock): + def __init__(self, axis=None, dtype=None, keepdims=False): + super(TestMean, self).__init__() + self._axis = axis + self._dtype = dtype + self._keepdims = keepdims + + def hybrid_forward(self, F, a, *args, **kwargs): + return F.np.mean(a, axis=self._axis, dtype=self._dtype, keepdims=self._keepdims) + + def is_int(dtype): + return 'int' in dtype + + in_data_dim = random.choice([2, 3, 4]) + shape = rand_shape_nd(in_data_dim, dim=3) + acc_type = {'float16': 'float32', 'float32': 'float64', 'float64': 'float64', + 'int8': 'int32', 'int32': 'int64', 'int64': 'int64'} + for hybridize in [False, True]: + for keepdims in [True, False]: + for axis in ([i for i in range(in_data_dim)] + [(), None]): + for itype in ['float16', 'float32', 'float64']: + for dtype in ['float16', 'float32', 'float64']: + print(itype, dtype) + if is_int(dtype) and not is_int(itype): + continue + # test gluon + test_mean = TestMean(axis=axis, dtype=dtype, keepdims=keepdims) + if hybridize: + test_mean.hybridize() + if is_int(itype): + x = _np.random.randint(-128, 128, shape, dtype=itype) + x = mx.nd.array(x, dtype=itype) + else: + x = mx.nd.random.uniform(-1.0, 1.0, shape=shape, dtype=itype) + x = x.as_np_ndarray() + x.attach_grad() + expected_ret = _np.mean(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims) + expected_ret = expected_ret.astype(dtype) + with mx.autograd.record(): + y = test_mean(x) + assert y.shape == expected_ret.shape + assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3 if dtype == 'float16' else 1e-3, + atol=1e-5 if dtype == 'float16' else 1e-5) + + y.backward() + N = x.size / y.size + assert same(x.grad.asnumpy(), _np.ones(shape=x.shape, dtype=x.dtype) / N) + + # test numeric + if itype == 'float32' and dtype == 'float32': + x_sym = mx.sym.Variable("x").as_np_ndarray() + mx_sym = mx.sym.np.mean(x_sym, axis=axis, dtype=dtype, keepdims=keepdims).as_nd_ndarray() + check_numeric_gradient(mx_sym, [x.as_nd_ndarray()], + numeric_eps=1e-3, rtol=1e-3, atol=1e-4, dtype=_np.float32) + + # test imperative + mx_out = np.mean(x, axis=axis, dtype=dtype, keepdims=keepdims) + np_out = _np.mean(x.asnumpy(), axis=axis, dtype=acc_type[itype], keepdims=keepdims).astype(dtype) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + +@with_seed() +@npx.use_np_shape +def test_np_max(): + @npx.use_np_shape + class TestMax(HybridBlock): + def __init__(self, axis=None, keepdims=False): + super(TestMax, self).__init__() + self._axis = axis + self._keepdims = keepdims + + def hybrid_forward(self, F, a, *args, **kwargs): + return F.np.max(a, axis=self._axis, keepdims=self._keepdims) + + def is_int(dtype): + return 'int' == dtype + + def get_grad(axis): + if axis == (): + return _np.ones((2,3,4,5)) + else: + temp = _np.zeros((2,3,4,5)) + if axis == 0: + temp[-1,:,:,:] = 1 + return temp + elif axis == 1: + temp[:,-1,:,:] = 1 + return temp + elif axis == 2: + temp[:,:,-1,:] = 1 + return temp + elif axis == 3: + temp[:,:,:,-1] = 1 + return temp + elif not axis: + temp[-1,-1,-1,-1] = 1 + return temp + raise ValueError('axis should be int or None or ()') + + def _test_np_max_exception(shape, dim): + x = _np.random.uniform(-1.0, 1.0, shape) + x = mx.nd.array(x).as_np_ndarray() + out = mx.np.max(x) + assert out.ndim == dim, 'dimension mismatch, output.ndim={}, dim={}'.format(output.ndim, dim) + + in_data_dim = random.choice([2, 3, 4]) + shape = rand_shape_nd(in_data_dim, dim=3) + for hybridize in [False, True]: + for keepdims in [True, False]: + for axis in ([i for i in range(in_data_dim)] + [(), None]): + for itype in ['float16', 'float32', 'float64', 'int']: + # test gluon + test_max = TestMax(axis=axis, keepdims=keepdims) + if hybridize: + test_max.hybridize() + if is_int(itype): + x = mx.nd.arange(120).reshape((2, 3, 4, 5)) + x = mx.nd.array(x) + else: + x = mx.nd.random.uniform(-1.0, 1.0, shape=shape, dtype=itype) + x = x.as_np_ndarray() + x.attach_grad() + expected_ret = _np.amax(x.asnumpy(), axis=axis, keepdims=keepdims) + with mx.autograd.record(): + y = test_max(x) + assert y.shape == expected_ret.shape + assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3 if itype == 'float16' else 1e-3, + atol=1e-5 if itype == 'float16' else 1e-5) + y.backward() + # only check the gradient with hardcoded input + if is_int(itype): + assert same(x.grad.asnumpy(), get_grad(axis)), \ + 'x={}\ny={}\nx.grad={}\nnumpy={}'.format(x.asnumpy(), y.asnumpy(), x.grad.asnumpy(), get_grad(axis)) + + # test imperative + mx_out = np.max(x, axis=axis, keepdims=keepdims) + np_out = _np.amax(x.asnumpy(), axis=axis, keepdims=keepdims) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + # test zero and zero dim + shapes = [(), (0), (2, 0), (0, 2, 1)] + exceptions = [False, True, True, True] + dims = [0] * len(shapes) + for shape, exception, dim in zip(shapes, exceptions, dims): + if exception: + assertRaises(MXNetError, _test_np_max_exception, shape, dim) + else: + _test_np_max_exception(shape, dim) + + +@with_seed() +@npx.use_np_shape +def test_np_transpose(): + # TODO(junwu): Add more test cases + data = mx.sym.var('a').as_np_ndarray() + ret = data.transpose() + assert type(ret) == mx.sym.np._Symbol + + dtypes = ['float32', 'int32'] + for dtype in dtypes: + for ndim in [0, 1, 2, 3, 4, 5, 6]: + shape = rand_shape_nd(ndim, dim=5, allow_zero_size=True) + np_data = _np.random.uniform(low=-100, high=100, size=shape).astype(dtype) + mx_data = np.array(np_data, dtype=dtype) + axes = [None] + if ndim == 0: + axes += [()] + else: + axis = [i for i in range(ndim)] + axes.append(tuple(axis)) + random.shuffle(axis) + axes.append(tuple(axis)) + for axis in axes: + np_out = _np.transpose(np_data, axes=axis) + mx_out = np.transpose(mx_data, axes=axis) + assert np_out.dtype == mx_out.dtype + assert same(mx_out.asnumpy(), np_out) + # TODO(junwu): Add numerical gradient test and Gluon API test. + + +@with_seed() +@npx.use_np_shape +def test_npx_relu(): + # TODO(junwu): Add more test cases + data = mx.sym.var('data').as_np_ndarray() + ret = mx.sym.npx.relu(data) + assert type(ret) == mx.sym.np._Symbol + + shapes = [(), (0, 2, 0)] + shapes.extend([rand_shape_nd(ndim, allow_zero_size=True) for ndim in range(5)]) + for shape in shapes: + data = np.array(_np.random.uniform(size=shape).astype('float32')) + ret = npx.relu(data) + assert type(ret) == np.ndarray + + +@with_seed() +@npx.use_np_shape +def test_npx_sigmoid(): + # TODO(junwu): Add more test cases + data = mx.sym.var('data').as_np_ndarray() + ret = mx.sym.npx.sigmoid(data) + assert type(ret) == mx.sym.np._Symbol + + shapes = [(), (0, 2, 0)] + shapes.extend([rand_shape_nd(ndim, allow_zero_size=True) for ndim in range(5)]) + for shape in shapes: + data = np.array(_np.random.uniform(size=shape).astype('float32')) + ret = npx.sigmoid(data) + assert type(ret) == np.ndarray + + +@with_seed() +@npx.use_np_shape +def test_np_reshape(): + # TODO(junwu): Add more test cases + data = mx.sym.var('a').as_np_ndarray() + ret = data.reshape(()) + assert type(ret) == mx.sym.np._Symbol + + data = np.ones((1, 1, 1)) + ret = np.reshape(data, ()) + assert ret.shape == () + ret = np.reshape(ret, (1, 1, 1, 1)) + assert ret.shape == (1, 1, 1, 1) + assert type(ret) == np.ndarray + ret2 = ret.reshape(1, 1, -1) + assert ret2.shape == (1, 1, 1) + + +@with_seed() +@npx.use_np_shape +def test_np_maximum(): + # TODO(junwu): Add more test cases + x1, x2 = mx.sym.var('x1').as_np_ndarray(), mx.sym.var('x2').as_np_ndarray() + ret = mx.sym.np.maximum(x1, x2) + assert type(ret) == mx.sym.np._Symbol + + def check_maximum(x1, x2): + mx_out = np.maximum(x1, x2) + if isinstance(x1, np.ndarray) or isinstance(x2, np.ndarray): + assert type(mx_out) == np.ndarray + np_out = _np.maximum(x1.asnumpy() if isinstance(x1, np.ndarray) else x1, + x2.asnumpy() if isinstance(x2, np.ndarray) else x2) + assert same(mx_out.asnumpy() if isinstance(mx_out, np.ndarray) else mx_out, np_out) + + check_maximum(np.zeros((2, 1)), np.ones((5, 1, 4))) + check_maximum(np.zeros((2, 0)), np.ones((5, 1, 1))) + check_maximum(np.zeros(()), np.ones((5, 1, 4))) + + +@with_seed() +@npx.use_np_shape +def test_np_minimum(): + # TODO(junwu): Add more test cases + x1, x2 = mx.sym.var('x1').as_np_ndarray(), mx.sym.var('x2').as_np_ndarray() + ret = mx.sym.np.minimum(x1, x2) + assert type(ret) == mx.sym.np._Symbol + + def check_minimum(x1, x2): + mx_out = np.minimum(x1, x2) + if isinstance(x1, np.ndarray) or isinstance(x2, np.ndarray): + assert type(mx_out) == np.ndarray + np_out = _np.minimum(x1.asnumpy() if isinstance(x1, np.ndarray) else x1, + x2.asnumpy() if isinstance(x2, np.ndarray) else x2) + assert same(mx_out.asnumpy() if isinstance(mx_out, np.ndarray) else mx_out, np_out) + + check_minimum(np.zeros((2, 1)), np.ones((5, 1, 4))) + check_minimum(np.zeros((2, 0)), np.ones((5, 1, 1))) + check_minimum(np.zeros(()), np.ones((5, 1, 4))) + + +@with_seed() +@npx.use_np_shape +def test_np_unary_funcs(): + def check_unary_func(func, ref_grad, shape, low, high): + @npx.use_np_shape + class TestUnary(HybridBlock): + def __init__(self, func): + super(TestUnary, self).__init__() + self._func = func + + def hybrid_forward(self, F, a, *args, **kwargs): + return getattr(F.np, self._func)(a) + + np_func = getattr(_np, func) + mx_func = TestUnary(func) + np_test_data = _np.random.uniform(low, high, shape).astype(_np.float32) + mx_test_data = mx.numpy.array(np_test_data) + for hybridize in [True, False]: + if hybridize: + mx_func.hybridize() + if ref_grad: + mx_test_data.attach_grad() + np_out = np_func(np_test_data) + with mx.autograd.record(): + y = mx_func(mx_test_data) + assert y.shape == np_out.shape + assert_almost_equal(y.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + if ref_grad: + y.backward() + assert_almost_equal(mx_test_data.grad.asnumpy(), ref_grad(np_test_data), rtol=1e-5, atol=1e-6, equal_nan=True) + + funcs = { + 'absolute' : (lambda x: -1. * (x < 0) + (x > 0), -1.0, 1.0), + 'cbrt' : (lambda x: 1. / (3. * _np.cbrt(x) ** 2), -1.0, 1.0), + 'ceil' : (None, -10.0, 10.0), + 'exp' : (lambda x: _np.exp(x), -1.0, 1.0), + 'expm1' : (lambda x: _np.exp(x), -1.0, 1.0), + 'fix' : (None, -10.0, 10.0), + 'floor' : (None, -10.0, 10.0), + 'log' : (lambda x: 1.0 / x, 0.1, 5.0), + 'log10' : (lambda x: 1.0 / (x * _np.log(10)), 0.1, 10.0), + 'log1p' : (lambda x: 1.0 / (1.0 + x), -0.9, 5.0), + 'log2' : (lambda x: 1.0 / (x * _np.log(2)), 0.1, 2.0), + 'logical_not' : (None, -1.0, 1.0), + 'negative' : (lambda x: -1. * _np.ones(x.shape), -1.0, 1.0), + 'reciprocal' : (lambda x: -1. / (x ** 2), 0.01, 1.0), + 'rint' : (None, -5.0, 5.0), + 'sign' : (None, -1.0, 1.0), + 'sqrt' : (lambda x: 0.5 / _np.sqrt(x), 0.001, 10.0), + 'square' : (lambda x: 2.0 * x, -1.0, 1.0), + 'trunc' : (None, -5.0, 5.0), + 'sin' : (lambda x: _np.cos(x), -1.0, 1.0), + 'cos' : (lambda x: -_np.sin(x), -1.0, 1.0), + 'tan' : (lambda x: _np.tan(x) ** 2 + 1.0, -1.0, 1.0), + 'arcsin' : (lambda x: 1. / (1. - x ** 2) ** (1. / 2.), -1.0, 1.0), + 'arccos' : (lambda x: -1. / (1. - x ** 2.) ** (1. / 2.), -1.0, 1.0), + 'arctan' : (lambda x: 1. / (x ** 2. + 1.), -1.0, 1.0), + 'degrees' : (lambda x: 180. / _np.pi * _np.ones(x.shape), -1.0, 1.0), + 'radians' : (lambda x: _np.pi / 180. * _np.ones(x.shape), -1.0, 1.0), + 'sinh' : (lambda x: _np.cosh(x), -1.0, 1.0), + 'cosh' : (lambda x: _np.sinh(x), -1.0, 1.0), + 'tanh' : (lambda x: 1. - _np.tanh(x) ** 2, -1.0, 1.0), + 'arcsinh' : (lambda x: 1./(x**2 + 1.)**(1./2.), -1.0, 1.0), + 'arccosh' : (lambda x: 1./(x**2 - 1.)**(1./2.), 2.0, 5.0), + 'arctanh' : (lambda x: -1./(x**2 - 1.), -0.99, 0.99) + } + ndim = random.choice([2, 3, 4]) + shape = random.choice([rand_shape_nd(ndim, dim=3), (1, 0, 2)]) + for shape in [rand_shape_nd(ndim, dim=3), (1, 0, 2)]: + for func, func_data in funcs.items(): + ref_grad, low, high = func_data + check_unary_func(func, ref_grad, shape, low, high) + + +@with_seed() +@npx.use_np_shape +def test_np_stack(): + @npx.use_np_shape + class TestStack(HybridBlock): + def __init__(self, axis=None): + super(TestStack, self).__init__() + self._axis = axis + + def hybrid_forward(self, F, a, *args): + return F.np.stack([a] + list(args), axis=self._axis) + + a, b, c, d = mx.sym.Variable("a"), mx.sym.Variable("b"), mx.sym.Variable("c"), mx.sym.Variable("d") + ret = mx.sym.np.stack([a.as_np_ndarray(), b.as_np_ndarray(), c.as_np_ndarray(), d.as_np_ndarray()]) + assert type(ret) == mx.sym.np._Symbol + + for shape in [(0, 0), (2, 3)]: + for hybridize in [True, False]: + for axis in range(2): + test_stack = TestStack(axis=axis) + if hybridize: + test_stack.hybridize() + np_a = _np.random.uniform(-1.0, 1.0, shape).astype(_np.float32) + np_b = _np.random.uniform(-1.0, 1.0, shape).astype(_np.float32) + np_c = _np.random.uniform(-1.0, 1.0, shape).astype(_np.float32) + np_d = _np.random.uniform(-1.0, 1.0, shape).astype(_np.float32) + + mx_a = np.array(np_a) + mx_a.attach_grad() + mx_b = np.array(np_b) + mx_b.attach_grad() + mx_c = np.array(np_c) + mx_c.attach_grad() + mx_d = np.array(np_d) + mx_d.attach_grad() + expected_ret = _np.stack([np_a, np_b, np_c, np_d], axis=axis) + with mx.autograd.record(): + y = test_stack(mx_a, mx_b, mx_c, mx_d) + assert y.shape == expected_ret.shape + assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3, atol=1e-5) + + y.backward() + + assert_almost_equal(mx_a.grad.asnumpy(), _np.ones(shape), rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_b.grad.asnumpy(), _np.ones(shape), rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_c.grad.asnumpy(), _np.ones(shape), rtol=1e-3, atol=1e-5) + assert_almost_equal(mx_d.grad.asnumpy(), _np.ones(shape), rtol=1e-3, atol=1e-5) + + np_out = _np.stack([np_a, np_b, np_c, np_d], axis=axis) + mx_out = np.stack([mx_a, mx_b, mx_c, mx_d], axis=axis) + assert same(mx_out.asnumpy(), np_out) + + +@with_seed() +@npx.use_np_shape +def test_np_random(): + shapes = [(), (1,), (2, 3), (4, 0, 5), 6, (7, 8), None] + dtypes = ['float16', 'float32', 'float64'] + op_names = ['uniform', 'normal'] + for shape in shapes: + for dtype in dtypes: + for op_name in op_names: + op = getattr(np.random, op_name, None) + assert op is not None + out = op(size=shape, dtype=dtype) + expected_shape = shape + if not isinstance(shape, tuple): + expected_shape = () if shape is None else (shape,) + assert out.shape == expected_shape + + @npx.use_np + class TestRandom(HybridBlock): + def __init__(self, shape, op_name): + super(TestRandom, self).__init__() + self._shape = shape + self._op_name = op_name + + def hybrid_forward(self, F, x): + op = getattr(F.np.random, self._op_name, None) + assert op is not None + return x + op(size=shape) + + x = np.ones(()) + for op_name in op_names: + for shape in shapes: + for hybridize in [False, True]: + net = TestRandom(shape, op_name) + if hybridize: + net.hybridize() + out = net(x) + expected_shape = shape + if not isinstance(shape, tuple): + expected_shape = () if shape is None else (shape,) + assert out.shape == expected_shape + + +@with_seed() +@npx.use_np_shape +def test_np_arange(): + configs = [ + (1, 10, 2), + (1, 10, 4), + (1, -10, 4), + (1, -10, -2), + (1, -10, -4), + (2, 3), + (2, -3), + (-2, -3), + (-2, 3), + (4, 0, 5), + (-4, 0, 5), + (-4, 0, -5), + (0, 0), + (11, 11), + (0, 0, 2), + (0, 0, -2), + (0, 5, None), + (0, -5, None), + 0, + 6, + ] + dtypes = ['int32', 'float16', 'float32', 'float64', None] + for config in configs: + for dtype in dtypes: + if isinstance(config, tuple): + mx_ret = np.arange(*config, dtype=dtype) + np_ret = _np.arange(*config, dtype=dtype) + else: + mx_ret = np.arange(config, dtype=dtype) + np_ret = _np.arange(config, dtype=dtype) + assert same(mx_ret.asnumpy(), np_ret) + + @npx.use_np + class TestRange(HybridBlock): + def __init__(self, start, stop=None, step=None, dtype=None): + super(TestRange, self).__init__() + self._start = start + self._stop = stop + self._step = step + self._dtype = dtype + + def hybrid_forward(self, F, x): + return x + F.np.arange(self._start, self._stop, self._step, dtype=self._dtype) + + for dtype in dtypes: + x = np.zeros(shape=(), dtype=dtype) + for config in configs: + for hybridize in [False, True]: + if isinstance(config, tuple): + net = TestRange(*config, dtype=dtype) + np_out = _np.arange(*config, dtype=dtype) + else: + net = TestRange(config, dtype=dtype) + np_out = _np.arange(config, dtype=dtype) + if hybridize: + net.hybridize() + mx_out = net(x) + assert same(mx_out.asnumpy(), np_out) + + +@with_seed() +@npx.use_np_shape +def test_np_linspace(): + configs = [ + (0.0, 1.0, 10), + (-2, 4, 30), + (5.234324, 8.98324, 324), + (2, 10, 100) + ] + exception_configs = [ + (0, 10, -1), + (0, 1, 2.5) + ] + dtypes = ['int32', 'float16', 'float32', 'float64', None] + for config in configs: + for dtype in dtypes: + for endpoint in [False, True]: + for retstep in [False, True]: + if isinstance(config, tuple): + mx_ret = np.linspace(*config, endpoint=endpoint, retstep=retstep, dtype=dtype) + np_ret = _np.linspace(*config, endpoint=endpoint, retstep=retstep, dtype=dtype) + else: + mx_ret = np.linspace(config, endpoint=endpoint, retstep=retstep, dtype=dtype) + np_ret = _np.linspace(config, endpoint=endpoint, retstep=retstep, dtype=dtype) + if retstep: + assert_almost_equal(mx_ret[0].asnumpy(), np_ret[0], atol=1e-3, rtol=1e-5) + same(mx_ret[1], np_ret[1]) + else: + assert_almost_equal(mx_ret.asnumpy(), np_ret, atol=1e-3, rtol=1e-5) + # check for exception input + for config in exception_configs: + assertRaises(MXNetError, np.linspace, *config) + # check linspace equivalent to arange + for test_index in range(1000): + assert_almost_equal(mx.np.linspace(0, test_index, test_index + 1).asnumpy(), mx.np.arange(test_index + 1).asnumpy()) + @npx.use_np + class TestLinspace(HybridBlock): + def __init__(self, start, stop, num=50, endpoint=None, retstep=False, dtype=None, axis=0): + super(TestLinspace, self).__init__() + self._start = start + self._stop = stop + self._num = num + self._endpoint = endpoint + self._retstep = retstep + self._dtype = dtype + + def hybrid_forward(self, F, x): + if self._retstep: + raise ValueError("linspace didn't support retstep = True inside HybridBlock") + else: + return x + F.np.linspace(self._start, self._stop, self._num, \ + self._endpoint, self._retstep, self._dtype) + + for dtype in dtypes: + x = np.zeros(shape=(), dtype=dtype) + for config in configs: + for hybridize in [False, True]: + for endpoint in [False, True]: + if isinstance(config, tuple): + net = TestLinspace(*config, endpoint=endpoint, dtype=dtype) + np_out = _np.linspace(*config, endpoint=endpoint, dtype=dtype) + else: + net = TestLinspace(config, endpoint=endpoint, dtype=dtype) + np_out = _np.linspace(config, endpoint=endpoint, dtype=dtype) + if hybridize: + net.hybridize() + mx_out = net(x) + assert_almost_equal(mx_out.asnumpy(), np_out, atol=1e-3, rtol=1e-5) + + +@with_seed() +@npx.use_np_shape +def test_np_argmax(): + workloads = [ + ((), 0, False), + ((), -1, False), + ((), 1, True), + ((5, 3), None, False), + ((5, 3), -1, False), + ((5, 3), 1, False), + ((5, 3), 3, True), + ((5, 0, 3), 0, False), + ((5, 0, 3), -1, False), + ((5, 0, 3), None, True), + ((5, 0, 3), 1, True), + ] + dtypes = ['float16', 'float32', 'float64'] + + @npx.use_np + class TestArgMax(HybridBlock): + def __init__(self, axis=None): + super(TestArgMax, self).__init__() + self._axis = axis + + def hybrid_forward(self, F, x): + return F.np.argmax(x, self._axis) + + for shape, axis, throw_exception in workloads: + for dtype in dtypes: + a = np.random.uniform(size=shape, dtype=dtype) + if throw_exception: + # Cannot use assert_exception because sometimes the main thread + # proceeds to `assert False` before the exception is thrown + # in the worker thread. Have to use mx.nd.waitall() here + # to block the main thread. + try: + np.argmax(a, axis) + mx.nd.waitall() + assert False + except mx.MXNetError: + pass + else: + mx_ret = np.argmax(a, axis=axis) + np_ret = _np.argmax(a.asnumpy(), axis=axis) + assert same(mx_ret.asnumpy(), np_ret) + + for hybridize in [False, True]: + net = TestArgMax(axis) + if hybridize: + net.hybridize() + if throw_exception: + try: + net(a) + mx.nd.waitall() + assert False + except mx.MXNetError: + pass + else: + mx_ret = net(a) + assert same(mx_ret.asnumpy(), np_ret) + + +@with_seed() +@npx.use_np_shape +def test_np_argsort(): + @npx.use_np_shape + class TestArgsort(HybridBlock): + def __init__(self, axis=-1): + super(TestArgsort, self).__init__() + self._axis = axis + + def hybrid_forward(self, F, a): + return F.np.argsort(a, self._axis) + + shapes = [ + (), + (1,), + (5,4), + (5,0,4), + (5,0,0), + (0,0,5), + (0,0,0), + (5,3,4) + ] + for hybridize in [True, False]: + for shape in shapes: + for ax in list(range(len(shape))) + [-1, None]: + test_argsort = TestArgsort(ax) + if hybridize: + test_argsort.hybridize() + + x = np.random.uniform(size=shape) + np_out = _np.argsort(x.asnumpy(), axis=ax) + mx_out = test_argsort(x) + assert mx_out.shape == np_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + # Test imperative once again + mx_out = np.argsort(x, axis=ax) + np_out = _np.argsort(x.asnumpy(), axis=ax) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + +@with_seed() +@npx.use_np_shape +def test_np_linalg_norm(): + @npx.use_np + class TestLinalgNorm(HybridBlock): + def __init__(self, ord=None, axis=None, keepdims=False): + super(TestLinalgNorm, self).__init__() + self._ord = ord + self._axis = axis + self._keepdims = keepdims + + def hybrid_forward(self, F, x): + return F.np.linalg.norm(x, ord=self._ord, axis=self._axis, keepdims=self._keepdims) + + a = np.arange(5 * 6 * 7 * 8).reshape((5, 6, 7, 8)) + ords = [None, 'fro'] + axes = [None, (0, 2), (1, 0), (1, 2)] + for ord in ords: + for axis in axes: + if ord == 'fro' and axis is None and a.ndim > 2: + continue + for keepdims in [False, True]: + for hybridize in [False, True]: + net = TestLinalgNorm(ord, axis, keepdims) + if hybridize: + net.hybridize() + mx_ret = net(a) + np_ret = _np.linalg.norm(a.asnumpy(), ord=ord, axis=axis, keepdims=keepdims) + assert_almost_equal(mx_ret.asnumpy(), np_ret, atol=1e-5, rtol=1e-4) + + +@with_seed() +@npx.use_np_shape +def test_np_concat(): + class TestConcat(HybridBlock): + def __init__(self, axis=None): + super(TestConcat, self).__init__() + self._axis = axis + + def hybrid_forward(self, F, a, *args): + return F.np.concatenate([a] + list(args), axis=self._axis) + + def get_new_shape(shape, axis): + shape_lst = list(shape) + shape_lst[axis] = random.randint(0, 3) + return tuple(shape_lst) + + for shape in [(0, 0), (2, 3)]: + for hybridize in [True, False]: + for axis in range(2): + # test gluon + test_concat = TestConcat(axis=axis) + if hybridize: + test_concat.hybridize() + + a = mx.nd.random.uniform(-1.0, 1.0, shape=get_new_shape(shape, axis)).as_np_ndarray() + a.attach_grad() + b = mx.nd.random.uniform(-1.0, 1.0, shape=get_new_shape(shape, axis)).as_np_ndarray() + b.attach_grad() + c = mx.nd.random.uniform(-1.0, 1.0, shape=get_new_shape(shape, axis)).as_np_ndarray() + c.attach_grad() + d = mx.nd.random.uniform(-1.0, 1.0, shape=get_new_shape(shape, axis)).as_np_ndarray() + d.attach_grad() + expected_ret = _np.concatenate([a.asnumpy(), b.asnumpy(), c.asnumpy(), d.asnumpy()], axis=axis) + with mx.autograd.record(): + y = test_concat(a, b, c, d) + assert y.shape == expected_ret.shape + assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3, atol=1e-5) + + y.backward() + + assert_almost_equal(a.grad.asnumpy(), _np.ones(a.shape), rtol=1e-3, atol=1e-5) + assert_almost_equal(b.grad.asnumpy(), _np.ones(b.shape), rtol=1e-3, atol=1e-5) + assert_almost_equal(c.grad.asnumpy(), _np.ones(c.shape), rtol=1e-3, atol=1e-5) + assert_almost_equal(d.grad.asnumpy(), _np.ones(d.shape), rtol=1e-3, atol=1e-5) + + # test imperative + mx_out = np.concatenate([a, b, c, d], axis=axis) + np_out = _np.concatenate([a.asnumpy(), b.asnumpy(), c.asnumpy(), d.asnumpy()], axis=axis) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + +@with_seed() +@npx.use_np_shape +def test_np_swapaxes(): + config = [((0, 1, 2), 0, 1), + ((0, 1, 2), -1, -2), + ((4, 5, 6, 7), 2, 3), + ((4, 5, 6, 7), -2, -3)] + + class TestSwapaxes(HybridBlock): + def __init__(self, axis1, axis2): + super(TestSwapaxes, self).__init__() + self._axis1 = axis1 + self._axis2 = axis2 + + def hybrid_forward(self, F, x): + return F.np.swapaxes(x, self._axis1, self._axis2) + + for shape, axis1, axis2 in config: + data_np = _np.random.uniform(size=shape) + data_mx = np.array(data_np, dtype=data_np.dtype) + ret_np = _np.swapaxes(data_np, axis1=axis1, axis2=axis2) + ret_mx = np.swapaxes(data_mx, axis1=axis1, axis2=axis2) + assert same(ret_mx.asnumpy(), ret_np) + + net = TestSwapaxes(axis1, axis2) + for hybrid in [False, True]: + if hybrid: + net.hybridize() + ret_mx = net(data_mx) + assert same(ret_mx.asnumpy(), ret_np) + + +@with_seed() +@npx.use_np_shape +def test_np_squeeze(): + config = [((), None), + ((), -1), + ((), 0), + ((4, 1, 2), None), + ((1, 1, 1), None), + ((1, 0, 1, 5), 2), + ((1, 0, 1, 1), (-1, -4))] + + class TestSqueeze(HybridBlock): + def __init__(self, axis): + super(TestSqueeze, self).__init__() + self._axis = axis + + def hybrid_forward(self, F, x): + return F.np.squeeze(x, axis=self._axis) + + for shape, axis in config: + data_np = _np.random.uniform(size=shape) + data_mx = np.array(data_np, dtype=data_np.dtype) + ret_np = _np.squeeze(data_np, axis=axis) + ret_mx = np.squeeze(data_mx, axis=axis) + assert same(ret_mx.asnumpy(), ret_np) + + net = TestSqueeze(axis) + for hybrid in [False, True]: + if hybrid: + net.hybridize() + ret_mx = net(data_mx) + assert same(ret_mx.asnumpy(), ret_np) + + +@with_seed() +@npx.use_np_shape +def test_np_split(): + class TestSplit(HybridBlock): + def __init__(self, indices_or_sections, axis=None): + super(TestSplit, self).__init__() + self._axis = axis + self._indices_or_sections = indices_or_sections + + def hybrid_forward(self, F, a, *args, **kwargs): + return F.np.split(a, indices_or_sections=self._indices_or_sections, + axis=self._axis) + + def get_indices(axis_size): + if axis_size is 0: + axis_size = random.randint(3, 6) + samples = random.randint(1, axis_size - 1) + indices = sorted(random.sample([i for i in range(1, axis_size)], samples)) + indices = tuple(indices) + return indices + + dim = random.randint(0, 3) + shape = [0] + [random.randint(2, 4) for i in range(dim)] + for hybridize in [True, False]: + for axis in range(len(shape)): + indices = get_indices(shape[axis]) + sections = 7 if shape[axis] is 0 else shape[axis] + for indices_or_sections in [indices, sections]: + # test gluon + test_split = TestSplit(axis=axis, indices_or_sections=indices_or_sections) + if hybridize: + test_split.hybridize() + + a = mx.nd.random.uniform(-1.0, 1.0, shape=shape).as_np_ndarray() + a.attach_grad() + expected_ret = _np.split(a.asnumpy(), indices_or_sections=indices_or_sections, axis=axis) + with mx.autograd.record(): + y = test_split(a) + assert len(y) == len(expected_ret) + for mx_out, np_out in zip(y, expected_ret): + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + mx.autograd.backward(y) + + assert_almost_equal(a.grad.asnumpy(), _np.ones(a.shape), rtol=1e-3, atol=1e-5) + + # test imperative + mx_outs = np.split(a, indices_or_sections=indices_or_sections, axis=axis) + np_outs = _np.split(a.asnumpy(), indices_or_sections=indices_or_sections, axis=axis) + for mx_out, np_out in zip(mx_outs, np_outs): + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + +@with_seed() +@npx.use_np_shape +def test_np_cumsum(): + def np_cumsum_backward(ograd, axis=None, dtype=None): + return _np.flip(_np.cumsum(_np.flip(ograd, axis=axis), axis=axis, dtype=dtype), axis=axis) + + @npx.use_np_shape + class TestCumsum(HybridBlock): + def __init__(self, axis=None, dtype=None): + super(TestCumsum, self).__init__() + self._axis = axis + self._dtype = dtype + + def hybrid_forward(self, F, a): + return F.np.cumsum(a, axis=self._axis, dtype=self._dtype) + + shapes = [(2, 3, 4), (2, 0, 3), ()] + for hybridize in [True, False]: + for shape in shapes: + for axis in [None] + [i for i in range(0, len(shape))]: + for otype in [None, _np.float32, _np.float64]: + test_cumsum = TestCumsum(axis=axis, dtype=otype) + if hybridize: + test_cumsum.hybridize() + for itype in [_np.float16, _np.float32, _np.float64]: + x = rand_ndarray(shape).astype(itype).as_np_ndarray() + x.attach_grad() + np_out = _np.cumsum(x.asnumpy(), axis=axis, dtype=otype) + with mx.autograd.record(): + mx_out = test_cumsum(x) + assert mx_out.shape == np_out.shape + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + mx_out.backward() + np_backward = np_cumsum_backward(_np.ones(np_out.shape, dtype=otype), + axis=axis, dtype=otype).reshape(x.shape) + assert_almost_equal(x.grad.asnumpy(), np_backward, rtol=1e-3, atol=1e-5) + + mx_out = np.cumsum(x, axis=axis, dtype=otype) + np_out = _np.cumsum(x.asnumpy(), axis=axis, dtype=otype) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + +@with_seed() +@npx.use_np_shape +def test_np_tile(): + config = [ + ((), ()), + ((), 0), + ((), (2, 0)), + ((), (2, 3)), + ((4, 2), (2,)), + ((4, 2), (2, 3)), + ((4, 2), (2, 1, 4)), + ((4, 2), (2, 3, 4)), + ((4, 2), (2, 0)), + ((4, 2), (2, 0, 3)), + ((4, 2), (2, 0, 3)), + ((4, 0), (2, 0, 3)), + ] + + class TestTile(HybridBlock): + def __init__(self, reps): + super(TestTile, self).__init__() + self._reps = reps + + def hybrid_forward(self, F, x): + return F.np.tile(x, reps=self._reps) + + for shape, reps in config: + data_np = _np.random.uniform(size=shape) + data_mx = np.array(data_np, dtype=data_np.dtype) + ret_np = _np.tile(data_np, reps=reps) + ret_mx = np.tile(data_mx, reps=reps) + assert same(ret_mx.asnumpy(), ret_np) + + net = TestTile(reps) + for hybrid in [False, True]: + if hybrid: + net.hybridize() + ret_mx = net(data_mx) + assert same(ret_mx.asnumpy(), ret_np) + + +@with_seed() +@npx.use_np_shape +def test_np_prod(): + class TestProd(HybridBlock): + def __init__(self, axis=None, dtype=None, keepdims=False): + super(TestProd, self).__init__() + self._axis = axis + self._dtype = dtype + self._keepdims = keepdims + + def hybrid_forward(self, F, a, *args, **kwargs): + return F.np.prod(a, axis=self._axis, dtype=self._dtype, keepdims=self._keepdims) + + in_data_dim = random.choice([3, 4]) + shape = rand_shape_nd(in_data_dim, dim=3) + for hybridize in [False, True]: + for keepdims in [True, False]: + for axis in ([i for i in range(in_data_dim)] + [(), None]): + for itype in ['float32', 'float64']: + for dtype in ['float32', 'float64']: + # test gluon + test_prod = TestProd(axis=axis, dtype=dtype, keepdims=keepdims) + if hybridize: + test_prod.hybridize() + x = np.random.uniform(-2.0, 2.0, size=shape, dtype=itype) + x.attach_grad() + print(x.grad.dtype) + expected_ret = _np.prod(x.asnumpy(), axis=axis, keepdims=keepdims) + expected_ret = expected_ret.astype(dtype) + with mx.autograd.record(): + y = test_prod(x) + assert y.shape == expected_ret.shape + assert_almost_equal(y.asnumpy(), expected_ret, rtol=1e-3, atol=1e-5) + y.backward() + # use keepdims=True so that broadcast divide can be used to calculate + # grad of input + expected_ret = _np.prod(x.asnumpy(), axis=axis, keepdims=True) + assert_almost_equal(x.grad.asnumpy(), expected_ret / x.asnumpy(), rtol=1e-3, atol=1e-3) + + # test numeric + if itype == 'float32' and dtype == 'float32': + x_sym = mx.sym.Variable("x").as_np_ndarray() + mx_sym = mx.sym.np.prod(x_sym, axis=axis, dtype=dtype, keepdims=keepdims).as_nd_ndarray() + check_numeric_gradient(mx_sym, [x.as_nd_ndarray()], + numeric_eps=1e-3, rtol=1e-3, atol=1e-4, dtype=_np.float32) + + # test imperative + mx_out = np.prod(x, axis=axis, dtype=dtype, keepdims=keepdims) + np_out = _np.prod(x.asnumpy(), axis=axis, keepdims=keepdims).astype(dtype) + assert_almost_equal(mx_out.asnumpy(), np_out, rtol=1e-3, atol=1e-5) + + +@with_seed() +@npx.use_np +def test_np_flatten(): + # TODO(junwu): Add more test cases + shapes = [(), (2, 0, 1), (3, 4, 5), 6] + for shape in shapes: + a = _np.random.uniform(size=shape).astype('float32') + a_mx = np.array(a, dtype=a.dtype) + expected_ret = a.flatten() + ret_mx = a_mx.flatten() + assert same(expected_ret, ret_mx.asnumpy()) + + +@with_seed() +@npx.use_np +def test_np_broadcast_to(): + # TODO(junwu): Add more test cases and backward test + shapes = [(1, 2, 3, 4, 5), (1, 0, 3, 4, 5)] + for shape in shapes: + a = _np.random.uniform(size=(4, 1)).astype('float32') + a_mx = np.array(a, dtype=a.dtype) + expected_ret = _np.broadcast_to(a, shape) + ret_mx = np.broadcast_to(a_mx, shape) + assert same(expected_ret, ret_mx.asnumpy()) + + +@with_seed() +@npx.use_np +def test_np_meshgrid(): + nx, ny = (4, 5) + x = np.linspace(0, 1, nx) + y = np.linspace(0, 1, ny) + z = np.ones(()) + xv, yv, zv = np.meshgrid(x, y, z) + xv_expected, yv_expected, zv_expected = _np.meshgrid(x.asnumpy(), y.asnumpy(), z.asnumpy()) + assert same(xv.asnumpy(), xv_expected) + assert same(yv.asnumpy(), yv_expected) + assert same(zv.asnumpy(), zv_expected) + # TODO(junwu): Add more test + + +@with_seed() +@npx.use_np +def test_np_broadcast_arrays(): + # TODO(junwu): Add test + pass + + +@with_seed() +@npx.use_np +def test_np_trace(): + class TestTrace(HybridBlock): + def __init__(self, axis1, axis2, offset): + super(TestTrace, self).__init__() + self._axis1 = axis1 + self._axis2 = axis2 + self._offset = offset + + def hybrid_forward(self, F, data): + return F.np.trace(data, axis1=self._axis1, axis2=self._axis2, offset=self._offset) + + def g(data, axis1, axis2, offset): + idx = _np.indices(data.shape) + ret = _np.zeros_like(data) + ret[idx[axis1] + offset == idx[axis2]] = 1.0 + return ret + + shapes = [ + (3, 3), + (3, 4), + (0, 0), + (3, 3, 3), + (0, 0, 0), + (2, 2, 4, 3), + (2, 2, 4, 3), + (2, 0, 3, 0), + (2, 0, 2, 3) + ] + offsets = range(-5, 5) + dtypes = ['int32', 'float16', 'float32', 'float64'] + for hybridize in [True, False]: + for shape in shapes: + ndim = len(shape) + for axis1 in range(-ndim, ndim): + for axis2 in range(-ndim, ndim): + if (axis1 + ndim) % ndim != (axis2 + ndim) % ndim: + for offset in offsets: + for dtype in dtypes: + if dtype == 'float16': + rtol = atol = 1e-2 + else: + rtol = atol = 1e-5 + test_trace = TestTrace(axis1, axis2, offset) + if hybridize: + test_trace.hybridize() + data_np = _np.random.uniform(-10.0, 10.0, shape) + data = mx.nd.array(data_np, dtype=dtype) + data_np = data.asnumpy() + data.attach_grad() + expected_np = _np.trace(data_np, axis1=axis1, axis2=axis2, offset=offset) + with mx.autograd.record(): + out_mx = test_trace(data.as_np_ndarray()) + assert out_mx.shape == expected_np.shape + assert_almost_equal(out_mx.asnumpy(), expected_np, rtol=rtol, atol=atol) + out_mx.backward() + backward_expected = g(data_np, axis1=axis1, axis2=axis2, offset=offset) + assert_almost_equal(data.grad.asnumpy(), backward_expected, rtol=rtol, atol=atol) + + # Test imperative once again + data = mx.nd.array(data_np, dtype=dtype) + out_mx = np.trace(data.as_np_ndarray(), axis1=axis1, axis2=axis2, offset=offset) + assert_almost_equal(out_mx.asnumpy(), expected_np, rtol=rtol, atol=atol) + + # bad params + params = [ + ([], 0, 1, 0), + ([2], 0, 1, 0), + ([3, 2, 2], 1, 1, 1), + ([3, 2, 2], 0, -4, 1) + ] + for shape, axis1, axis2, offset in params: + data_np = _np.random.uniform(-1.0, 1.0, shape) + data_mx = mx.nd.array(data_np) + try: + output = np.trace(data_mx.as_np_ndarray(), axis1=axis1, axis2=axis2, offset=offset) + except mx.base.MXNetError: + continue + assert False + + +if __name__ == '__main__': + import nose + nose.runmodule() diff --git a/tests/python/unittest/test_thread_local.py b/tests/python/unittest/test_thread_local.py index b553299ab4d7..ee56ba780a95 100644 --- a/tests/python/unittest/test_thread_local.py +++ b/tests/python/unittest/test_thread_local.py @@ -23,6 +23,7 @@ from mxnet.attribute import AttrScope from mxnet.name import NameManager from mxnet.test_utils import set_default_context +from mxnet.util import _NumpyArrayScope def test_context(): ctx_list = [] @@ -163,6 +164,41 @@ def f(): thread.join() assert status[0], "Failed to execute a symbolic graph within a thread" + +def test_np_array_scope(): + np_array_scope_list = [] + _NumpyArrayScope._current = _NumpyArrayScope(False) + np_array_scope_list.append(_NumpyArrayScope._current) + + def f(): + _NumpyArrayScope._current = _NumpyArrayScope(True) + np_array_scope_list.append(_NumpyArrayScope._current) + + thread = threading.Thread(target=f) + thread.start() + thread.join() + assert len(np_array_scope_list) == 2 + assert not np_array_scope_list[0]._is_np_array + assert np_array_scope_list[1]._is_np_array + + event = threading.Event() + status = [False] + + def g(): + with mx.np_array(False): + event.wait() + if not mx.is_np_array(): + status[0] = True + + thread = threading.Thread(target=g) + thread.start() + _NumpyArrayScope._current = _NumpyArrayScope(True) + event.set() + thread.join() + event.clear() + assert status[0], "Spawned thread didn't set status correctly" + + if __name__ == '__main__': import nose nose.runmodule()