PI improvements (#6229)

* initial commit * good draft * additional loadbalance mode * small javadoc update * small javadoc update * OutputAdapter prototype * couple of tests * javadoc update
deeplearning4j · Aug 22, 2018 · c8dba00 · c8dba00
1 parent 4bd8dc3
commit c8dba00
Show file tree

Hide file tree

Showing 13 changed files with 807 additions and 27 deletions.
diff --git a/...rning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/adapters/ArgmaxAdapter.java b/...rning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/adapters/ArgmaxAdapter.java
@@ -0,0 +1,55 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.deeplearning4j.nn.adapters;
+
+import lombok.val;
+import org.deeplearning4j.nn.api.OutputAdapter;
+import org.nd4j.base.Preconditions;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.factory.Nd4j;
+
+/**
+ * This OutputAdapter implementation is suited for silent conversion of 2D SoftMax output
+ *
+ * @author raver119@gmail.com
+ */
+public class ArgmaxAdapter implements OutputAdapter<int[]> {
+
+    /**
+     * This method does conversion from INDArrays to int[], where each element will represents position of the highest element in output INDArray
+     * I.e. Array of {0.25, 0.1, 0.5, 0.15} will return int array with length of 1, and value {2}
+     *
+     * @param outputs
+     * @return
+     */
+    @Override
+    public int[] apply(INDArray... outputs) {
+        Preconditions.checkArgument(outputs.length == 1, "Argmax adapter can have only 1 output");
+        val array = outputs[0];
+        Preconditions.checkArgument(array.rank() < 3, "Argmax adapter requires 2D or 1D output");
+        val result = array.rank() == 2 ? new int[(int) array.size(0)] : new int[1];
+
+        if (array.rank() == 2) {
+            val t = Nd4j.argMax(array, 1);
+            for (int e = 0; e < t.length(); e++)
+                result[e] = (int) t.getDouble(e);
+        } else
+            result[0] = (int) Nd4j.argMax(array, Integer.MAX_VALUE).getDouble(0);
+
+        return result;
+    }
+}
diff --git a/...j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/adapters/Regression2dAdapter.java b/...j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/adapters/Regression2dAdapter.java
@@ -0,0 +1,49 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.deeplearning4j.nn.adapters;
+
+import lombok.extern.slf4j.Slf4j;
+import lombok.val;
+import org.deeplearning4j.nn.api.OutputAdapter;
+import org.nd4j.base.Preconditions;
+import org.nd4j.linalg.api.ndarray.INDArray;
+
+/**
+ * This OutputAdapter implementation takes single 2D nn output in, and returns JVM double[][] array
+ *
+ * @author raver119@gmail.com
+ */
+@Slf4j
+public class Regression2dAdapter implements OutputAdapter<double[][]> {
+    @Override
+    public double[][] apply(INDArray... outputs) {
+        Preconditions.checkArgument(outputs.length == 1, "Argmax adapter can have only 1 output");
+        val array = outputs[0];
+        Preconditions.checkArgument(array.rank() < 3, "Argmax adapter requires 2D or 1D output");
+
+        if (array.rank() == 2 && !array.isVector()) {
+            return array.toDoubleMatrix();
+        } else {
+            val result = new double[1][(int) array.length()];
+
+            for (int e = 0; e< array.length(); e++)
+                result[0][e] = array.getDouble(e);
+
+            return result;
+        }
+    }
+}
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/OutputAdapter.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/api/OutputAdapter.java
@@ -0,0 +1,42 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.deeplearning4j.nn.api;
+
+import org.nd4j.linalg.api.ndarray.INDArray;
+
+import java.io.Serializable;
+
+/**
+ * This interface describes entity used to conver neural network output to specified class.
+ * I.e. INDArray -> int[] on the fly.
+ *
+ * PLEASE NOTE: Implementation will be used in workspace environment to avoid additional allocations during inference.
+ * This means you shouldn't store or return the INDArrays passed to OutputAdapter.apply(INDArray...) directly.
+ * If you need a copy of the output array, use standard network output methods, or use INDArray.detach() before storing the array
+ *
+ * @param <T>
+ */
+public interface OutputAdapter<T> extends Serializable {
+
+    /**
+     * This method provides conversion from multiple INDArrays to T
+     *
+     * @param outputs
+     * @return
+     */
+    T apply(INDArray... outputs);
+}
diff --git a/...rning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/...rning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java
@@ -68,8 +68,6 @@
 import org.nd4j.linalg.dataset.api.MultiDataSet;
 import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
 import org.nd4j.linalg.dataset.api.iterator.MultiDataSetIterator;
-import org.nd4j.linalg.exception.ND4JIllegalAccessException;
-import org.nd4j.linalg.exception.ND4JIllegalStateException;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.heartbeat.Heartbeat;
 import org.nd4j.linalg.heartbeat.reports.Environment;
@@ -130,6 +128,10 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
      */
     protected static final String WS_RNN_LOOP_WORKING_MEM = "WS_RNN_LOOP_WORKING_MEM";
 
+    /**
+     * Workspace for output methods that use OutputAdapter
+     */
+    protected static final String WS_OUTPUT_MEM = "WS_OUTPUT_MEM";
 
     protected final WorkspaceConfiguration WS_LAYER_WORKING_MEM_CONFIG;
 
@@ -1675,6 +1677,24 @@ public INDArray[] output(boolean train, @NonNull INDArray[] input, INDArray[] in
         return output(train, input, inputMasks, labelMasks, null);
     }
 
+    /**
+     * This method uses provided OutputAdapter to return custom object built from INDArray
+     *
+     * PLEASE NOTE: This method uses dedicated Workspace for output generation to avoid redundant allocations
+     *
+     * @param inputs Input arrays to the netwonk
+     * @param inputMasks Optional input mask arrays (may be null)
+     * @param labelMasks Optional label mask arrays (may be null
+     * @param outputAdapter OutputAdapter<T> instance
+     * @param <T> T extends Object
+     * @return T instance produced by OutputAdapter
+     */
+    public synchronized <T> T output(@NonNull INDArray[] inputs, INDArray[] inputMasks, INDArray[] labelMasks, @NonNull OutputAdapter<T> outputAdapter) {
+        try (val ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(WS_ALL_LAYERS_ACT_CONFIG, WS_OUTPUT_MEM)) {
+            return outputAdapter.apply(output(false, inputs, inputMasks, labelMasks, ws));
+        }
+    }
+
     /**
      * Return an array of network outputs (predictions), given the specified network inputs
      * Network outputs are for output layers only.<br>

diff --git a/...j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java b/...j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.java
@@ -149,6 +149,12 @@ public class MultiLayerNetwork implements Serializable, Classifier, Layer, Neura
      */
     protected static final String WS_LAYER_ACT_1 = "WS_LAYER_ACT_1";
     protected static final String WS_LAYER_ACT_2 = "WS_LAYER_ACT_2";
+
+    /**
+     * Workspace for output methods that use OutputAdapter
+     */
+    protected static final String WS_OUTPUT_MEM = "WS_OUTPUT_MEM";
+
     /**
      * Workspace for working memory in RNNs - opened and closed once per RNN time step
      */
@@ -2273,6 +2279,24 @@ public synchronized INDArray output(INDArray input, boolean train, INDArray feat
         }
     }
 
+    /**
+     * This method uses provided OutputAdapter to return custom object built from INDArray
+     *
+     * PLEASE NOTE: This method uses dedicated Workspace for output generation to avoid redundant allocations
+     *
+     * @param input Input arrays to the netwonk
+     * @param inputMask Optional input mask arrays (may be null)
+     * @param labelMask Optional label mask arrays (may be null
+     * @param outputAdapter OutputAdapter<T> instance
+     * @param <T> T extends Object
+     * @return T instance produced by OutputAdapter
+     */
+    public synchronized <T> T output(@NonNull INDArray inputs, INDArray inputMasks, INDArray labelMasks, @NonNull OutputAdapter<T> outputAdapter) {
+        try (val ws = Nd4j.getWorkspaceManager().getAndActivateWorkspace(WS_ALL_LAYERS_ACT_CONFIG, WS_OUTPUT_MEM)) {
+            return outputAdapter.apply(output(inputs, false, inputMasks, labelMasks, ws));
+        }
+    }
+
     /**
      * Label the probabilities of the input
      *

diff --git a/...g4j/deeplearning4j-nn/src/test/java/org/deeplearning4j/nn/adapters/ArgmaxAdapterTest.java b/...g4j/deeplearning4j-nn/src/test/java/org/deeplearning4j/nn/adapters/ArgmaxAdapterTest.java
@@ -0,0 +1,45 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.deeplearning4j.nn.adapters;
+
+import lombok.val;
+import org.junit.Test;
+import org.nd4j.linalg.factory.Nd4j;
+
+import static org.junit.Assert.*;
+
+public class ArgmaxAdapterTest {
+    @Test
+    public void testSoftmax_2D_1() {
+        val in = new double[][] {{1, 3, 2}, { 4, 5, 6}};
+
+        val adapter = new ArgmaxAdapter();
+        val result = adapter.apply(Nd4j.create(in));
+
+        assertArrayEquals(new int[]{1, 2}, result);
+    }
+
+    @Test
+    public void testSoftmax_1D_1() {
+        val in = new double[] {1, 3, 2};
+
+        val adapter = new ArgmaxAdapter();
+        val result = adapter.apply(Nd4j.create(in));
+
+        assertArrayEquals(new int[]{1}, result);
+    }
+}
diff --git a/...eplearning4j-nn/src/test/java/org/deeplearning4j/nn/adapters/Regression2dAdapterTest.java b/...eplearning4j-nn/src/test/java/org/deeplearning4j/nn/adapters/Regression2dAdapterTest.java
@@ -0,0 +1,46 @@
+/*******************************************************************************
+ * Copyright (c) 2015-2018 Skymind, Inc.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+
+package org.deeplearning4j.nn.adapters;
+
+import lombok.val;
+import org.junit.Test;
+import org.nd4j.linalg.factory.Nd4j;
+import org.nd4j.linalg.util.ArrayUtil;
+
+import static org.junit.Assert.*;
+
+public class Regression2dAdapterTest {
+    @Test
+    public void testRegressionAdapter_2D_1() throws Exception {
+        val in = new double[][] {{1, 2, 3}, { 4, 5, 6}};
+
+        val adapter = new Regression2dAdapter();
+        val result = adapter.apply(Nd4j.create(in));
+
+        assertArrayEquals(ArrayUtil.flatten(in), ArrayUtil.flatten(result), 1e-5);
+    }
+
+    @Test
+    public void testRegressionAdapter_2D_2() throws Exception {
+        val in = new double[]{1, 2, 3};
+
+        val adapter = new Regression2dAdapter();
+        val result = adapter.apply(Nd4j.create(in));
+
+        assertArrayEquals(in, ArrayUtil.flatten(result), 1e-5);
+    }
+}
diff --git a/...ava/org/deeplearning4j/optimize/solvers/accumulation/EncodedGradientsAccumulatorTest.java b/...ava/org/deeplearning4j/optimize/solvers/accumulation/EncodedGradientsAccumulatorTest.java
@@ -42,10 +42,9 @@ public void testStore1() throws Exception {
 
         int workers[] = new int[] {2, 4, 8};
 
-        EncodingHandler handler = new EncodingHandler(1e-3);
-
-
         for (int numWorkers : workers) {
+            EncodingHandler handler = new EncodingHandler(1e-3);
+
             val bufferSize = EncodedGradientsAccumulator.getOptimalBufferSize(numParams, numWorkers, 2);
             log.info("Workers: {}; Buffer size: {} bytes", numWorkers, bufferSize);
             EncodedGradientsAccumulator accumulator =
@@ -73,9 +72,9 @@ public void testStore1() throws Exception {
     public void testEncodingLimits1() throws Exception {
         int numParams = 100000;
 
-        EncodingHandler handler = new EncodingHandler(1e-3);
-
         for (int e = 10; e < numParams / 5; e++) {
+            EncodingHandler handler = new EncodingHandler(1e-3);
+
             INDArray encoded = handler.encodeUpdates(getGradients(numParams, e, 2e-3));
 
             //  log.info("enc len: {}", encoded.data().length());