diff --git a/.gitignore b/.gitignore
index 796f2a7c355..3ef13efe3ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,4 @@ project/plugins/project/
# other
*.txt
+*.swp # vim swap file
diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/example/ImageNetLocal.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/example/ImageNetLocal.scala
index c58c9e9b563..dbfd76fed72 100644
--- a/dl/src/main/scala/com/intel/analytics/sparkdl/example/ImageNetLocal.scala
+++ b/dl/src/main/scala/com/intel/analytics/sparkdl/example/ImageNetLocal.scala
@@ -21,7 +21,7 @@ import java.awt.color.ColorSpace
import java.util
import com.intel.analytics.sparkdl.nn.ClassNLLCriterion
-import com.intel.analytics.sparkdl.optim.SGD
+import com.intel.analytics.sparkdl.optim.{EvaluateMethods, SGD}
import com.intel.analytics.sparkdl.tensor.Tensor
import com.intel.analytics.sparkdl.utils.{File, T}
@@ -49,160 +49,9 @@ object ImageNetLocal {
println(s"[${(System.nanoTime() - startTime) / 1e9}s] $msg")
}
- def runDouble(donkey: Donkey, dataSet: DataSets, netType: String, classNum: Int,
+ def run(donkey: Donkey, dataSet: DataSets, netType: String, classNum: Int,
labelsMap: Map[String, Double], testInterval: Int, donkeyVal: Donkey,
- dataSetVal: DataSets, batchSize: Int): Unit = {
- // Compute Mean on amount of samples
- val samples = 10000
- log(s"Start to calculate Mean on $samples samples")
- var (meanR, meanG, meanB) = Array.tabulate(samples)(n => {
- print(".")
- val data = donkey.pull
- dataSet.post(data._2)
- ImageNetUtils.computeMean(data._1, data._2.dataOffset)
- }).reduce((a, b) => (a._1 + b._1, a._2 + b._2, a._3 + b._3))
- meanR /= samples
- meanG /= samples
- meanB /= samples
- println()
-
- // Compute std on amount of samples
- log(s"Start to calculate std on $samples samples")
- var (varR, varG, varB) = Array.tabulate(samples)(n => {
- print(".")
- val data = donkey.pull
- dataSet.post(data._2)
- ImageNetUtils.computeVar(data._1, meanR, meanG, meanB, data._2.dataOffset)
- }).reduce((a, b) => (a._1 + b._1, a._2 + b._2, a._3 + b._3))
- varR /= samples
- varG /= samples
- varB /= samples
-
- val model = netType match {
- case "alexnet" => AlexNet.getModel[Double](classNum)
- case "googlenet" => GoogleNet.getModel[Double](classNum)
- case "googlenet-bn" => GoogleNet.getModel[Double](classNum, "googlenet-bn")
- case "googlenet-cf" => GoogleNet.getModelCaffe[Double](classNum)
- case _ => throw new IllegalArgumentException
- }
- val (weights, grad) = model.getParameters()
- println(s"modelsize ${weights.nElement()}")
- println(model)
- val criterion = new ClassNLLCriterion[Double]()
- val epochNum = 90
- val featureShape = Array(3, 224, 224)
- val targetShape = Array(1)
- val sgd = new SGD[Double]
- val state = T("momentum" -> 0.9, "dampening" -> 0.0)
- val stageImgs = new util.ArrayDeque[Image](batchSize)
- val input = Tensor[Double](batchSize, 3, 224, 224)
- val target = Tensor[Double](batchSize)
- val iter = ImageNetUtils.toTensorDouble(
- donkey.map(d => {
- stageImgs.push(d._2)
- (labelsMap(d._2.label), d._1)
- }),
- featureShape,
- targetShape,
- batchSize,
- (meanR, meanG, meanB),
- (varR, varG, varB),
- input,
- target
- )
-
- val stageImgsVal = new util.ArrayDeque[Image](batchSize)
- val iterVal = ImageNetUtils.toTensorDouble(
- donkeyVal.map(d => {
- stageImgsVal.push(d._2)
- (labelsMap(d._2.label), d._1)
- }),
- featureShape,
- targetShape,
- batchSize,
- (meanR, meanG, meanB),
- (varR, varG, varB),
- input,
- target
- )
-
- log(s"meanR is $meanR meanG is $meanG meanB is $meanB")
- log(s"varR is $varR varG is $varG varB is $varB")
- log("Start to train...")
-
- var wallClockTime = 0L
- for (i <- 1 to epochNum) {
- println(s"Epoch[$i] Train")
-
- for (regime <- regimes(netType)) {
- if (i >= regime._1 && i <= regime._2) {
- state("learningRate") = regime._3
- state("weightDecay") = regime._4
- }
- }
-
- var j = 0
- var c = 0
- model.training()
- while (j < dataSet.getTotal) {
- val start = System.nanoTime()
- val (input, target) = iter.next()
- val readImgTime = System.nanoTime()
- model.zeroGradParameters()
- val output = model.forward(input)
- val loss = criterion.forward(output, target)
- val gradOutput = criterion.backward(output, target)
- model.backward(input, gradOutput)
- sgd.optimize(_ => (loss, grad), weights, state, state)
- val end = System.nanoTime()
- wallClockTime += end - start
- log(s"Epoch[$i][Iteration $c $j/${dataSet.getTotal}][Wall Clock ${wallClockTime / 1e9}s]" +
- s" loss is $loss time ${(end - start) / 1e9}s read " +
- s"time ${(readImgTime - start) / 1e9}s train time ${(end - readImgTime) / 1e9}s." +
- s" Throughput is ${input.size(1).toDouble / (end - start) * 1e9} img / second")
- while (!stageImgs.isEmpty) {
- dataSet.post(stageImgs.poll())
- }
- j += input.size(1)
- c += 1
- }
-
- if (i % testInterval == 0) {
- model.evaluate()
- var correct = 0
- var k = 0
- while (k < dataSetVal.getTotal) {
- val (input, target) = iterVal.next()
- val output = model.forward(input)
- output.max(2)._2.squeeze().map(target, (a, b) => {
- if (a == b) {
- correct += 1
- }
- a
- })
- while (!stageImgsVal.isEmpty) {
- dataSetVal.post(stageImgsVal.poll())
- }
- k += input.size(1)
- }
-
- val accuracy = correct.toDouble / dataSetVal.getTotal
- println(s"[Wall Clock ${wallClockTime / 1e9}s] Accuracy is $accuracy")
-
- // Save model to a file each epoch
- File.save(model, s"${netType}${accuracy}.model${i}", true)
- File.save(state, s"${netType}${accuracy}.state${i}", true)
- }
-
- log("shuffle")
- dataSet.shuffle
- log("shuffle end")
- }
- }
-
- def runFloat(donkey: Donkey, dataSet: DataSets, netType: String, classNum: Int,
- labelsMap: Map[String, Double], testInterval: Int, donkeyVal: Donkey,
- dataSetVal: DataSets, batchSize: Int): Unit = {
+ dataSetVal: DataSets, batchSize: Int, modelPath : String): Unit = {
// Compute Mean on amount of samples
val samples = 10000
log(s"Start to calculate Mean on $samples samples")
@@ -327,25 +176,27 @@ object ImageNetLocal {
if (i % testInterval == 0) {
model.evaluate()
- var correct = 0
+ var top1Correct = 0
+ var top5Correct = 0
var k = 0
while (k < dataSetVal.getTotal) {
val (input, target) = iterVal.next()
val output = model.forward(input)
- output.max(2)._2.squeeze().map(target, (a, b) => {
- if (a == b) {
- correct += 1
- }
- a
- })
+ top1Correct += EvaluateMethods.calcAccuracy(output, target)._1
+ top5Correct += EvaluateMethods.calcTop5Accuracy(output, target)._1
while (!stageImgsVal.isEmpty) {
dataSetVal.post(stageImgsVal.poll())
}
k += input.size(1)
}
- val accuracy = correct.toDouble / dataSetVal.getTotal
- println(s"[Wall Clock ${wallClockTime / 1e9}s] Accuracy is $accuracy")
+ val top1Accuracy = top1Correct.toDouble / dataSetVal.getTotal
+ val top5Accuracy = top5Correct.toDouble / dataSetVal.getTotal
+ println(s"[Wall Clock ${wallClockTime / 1e9}s] Top-1 Accuracy is $top1Accuracy")
+ println(s"[Wall Clock ${wallClockTime / 1e9}s] Top-5 Accuracy is $top5Accuracy")
+ println(s"Save model and state to $modelPath-$i")
+ File.save(model, modelPath + s"-$i.model")
+ File.save(state, modelPath + s"-$i.state")
}
log("shuffle")
@@ -371,8 +222,8 @@ object ImageNetLocal {
val testInterval = args(4).toInt
val netType = args(5)
val classNum = args(6).toInt
- val dataType = args(7)
- val batchSize = args(8).toInt
+ val batchSize = args(7).toInt
+ val modelPath = args(8)
val dataSet = new DataSets(path, classNum, labelsMap)
val donkey = new Donkey(parallelism, dataSet)
@@ -383,12 +234,7 @@ object ImageNetLocal {
dataSet.shuffle
log("shuffle end")
- dataType match {
- case "double" => runDouble(donkey, dataSet, netType, classNum, labelsMap, testInterval,
- donkeyVal, dataSetVal, batchSize)
- case "float" => runFloat(donkey, dataSet, netType, classNum, labelsMap, testInterval,
- donkeyVal, dataSetVal, batchSize)
- case _ => throw new IllegalArgumentException
- }
+ run(donkey, dataSet, netType, classNum, labelsMap, testInterval,
+ donkeyVal, dataSetVal, batchSize, modelPath)
}
}
diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/models/GoogleNet.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/models/GoogleNet.scala
index 12c1a41f100..cec63aefce5 100644
--- a/dl/src/main/scala/com/intel/analytics/sparkdl/models/GoogleNet.scala
+++ b/dl/src/main/scala/com/intel/analytics/sparkdl/models/GoogleNet.scala
@@ -232,7 +232,7 @@ object GoogleNet_v2 {
val conv3 = new Sequential[D]
conv3.add(new SpatialConvolution[D](inputSize, config[Table](2)(1), 1, 1, 1, 1)
- .setName(namePrefix + "3x3_s2"))
+ .setName(namePrefix + "3x3_reduce"))
conv3.add(new SpatialBatchNormalization(config[Table](2)(1), 1e-3)
.setName(namePrefix + "3x3_reduce/bn"))
conv3.add(new ReLU[D](true). setName(namePrefix + "3x3_reduce/bn/sc/relu"))
diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/models/Perf.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/models/Perf.scala
index d6be3bdb702..6191e890b2a 100644
--- a/dl/src/main/scala/com/intel/analytics/sparkdl/models/Perf.scala
+++ b/dl/src/main/scala/com/intel/analytics/sparkdl/models/Perf.scala
@@ -79,7 +79,7 @@ object Perf {
def performance[T: ClassTag](param: Params)(implicit tn: TensorNumeric[T]): Unit = {
val (model, input) = param.module match {
- case "alexnet" => (AlexNet(1000), Tensor[T](param.batchSize, 3, 224, 224))
+ case "alexnet" => (AlexNet(1000), Tensor[T](param.batchSize, 3, 227, 227))
case "alexnetowt" => (AlexNet_OWT(1000), Tensor[T](param.batchSize, 3, 224, 224))
case "googlenet_v1" => (GoogleNet_v1(1000), Tensor[T](param.batchSize, 3, 224, 224))
case "googlenet_v2" => (GoogleNet_v2(1000), Tensor[T](param.batchSize, 3, 224, 224))
@@ -139,8 +139,6 @@ object Perf {
}
}
-case class TestCase[T](input: Tensor[T], target: Tensor[T], model: Module[T])
-
case class Params(
batchSize: Int = 128,
iteration: Int = 10,
diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Container.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Container.scala
index 40b73ac80be..a90cf9b0187 100644
--- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Container.scala
+++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Container.scala
@@ -19,6 +19,7 @@ package com.intel.analytics.sparkdl.nn
import com.intel.analytics.sparkdl.tensor.Tensor
import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric
+import com.intel.analytics.sparkdl.mkl.MKL
import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag
@@ -93,4 +94,25 @@ private[nn] abstract class Container[@specialized(Float, Double) T: ClassTag](
})
(result, offset, newIndexes)
}
+
+ override def initMkl() : Unit = {
+ def containMkl(module : Module[T]) : Boolean = {
+ return if (module.toString.startsWith("mkl.")) true else false
+ }
+
+ for (i <- 0 until modules.length) {
+ if (containMkl(modules(i))) {
+ if (i >= 1 && containMkl(modules(i - 1))) {
+ ev.getType() match {
+ case "Float" => MKL.SetPrevFloat(modules(i - 1).getClassPtr(),
+ modules(i).getClassPtr())
+ case "Double" => MKL.SetPrevDouble(modules(i - 1).getClassPtr(),
+ modules(i).getClassPtr())
+ }
+ }
+ } else {
+ modules(i).initMkl()
+ }
+ }
+ }
}
diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Module.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Module.scala
index 026cc3e3b69..ebe61457f38 100644
--- a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Module.scala
+++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/Module.scala
@@ -43,6 +43,17 @@ abstract class Module[T: ClassTag](implicit ev: TensorNumeric[T]) extends Serial
if (this.name == null) this.toString else this.name
}
+ private var needComputeBack = true
+
+ def setNeedComputeBack(need: Boolean): this.type = {
+ needComputeBack = need
+ this
+ }
+
+ def isNeedComputeBack(): Boolean = {
+ needComputeBack
+ }
+
// list of sub modules
val modules: ArrayBuffer[Module[T]] = ArrayBuffer[Module[T]]()
@@ -199,6 +210,10 @@ abstract class Module[T: ClassTag](implicit ev: TensorNumeric[T]) extends Serial
def cloneModule(): Module[T] = {
SerializationUtils.clone(this)
}
+
+ // Support for mkl init.
+ def getClassPtr() : Long = {0L}
+ def initMkl() : Unit = {}
}
object Module {
diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala
new file mode 100644
index 00000000000..6eebabdc02c
--- /dev/null
+++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/BatchNormalization.scala
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.intel.analytics.sparkdl.nn.mkl
+
+import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric
+import com.intel.analytics.sparkdl.tensor.Tensor
+import com.intel.analytics.sparkdl.utils.RandomGenerator._
+import com.intel.analytics.sparkdl.nn.Module
+import com.intel.analytics.sparkdl.mkl.MKL
+
+import scala.language.implicitConversions
+
+import scala.reflect.ClassTag
+
+class SpatialBatchNormalization[@specialized(Float, Double) T: ClassTag](
+ val nOutput: Int,
+ val eps: Double = 1e-5,
+ val momentum: Double = 0.1,
+ val affine: Boolean = true)(implicit ev: TensorNumeric[T])
+ extends Module[T] {
+
+ require(nOutput > 0,
+ "To set affine=false call SpatialBatchNormalization(nFeature, eps, momentum, false)")
+
+ val nDim = 2
+ val runningMean = Tensor[T](nOutput)
+ val runningVar = Tensor[T](nOutput).fill(ev.fromType[Int](1))
+ val saveMean = Tensor[T](nOutput)
+ val saveStd = Tensor[T](nOutput).fill(ev.fromType[Int](1))
+
+ private var classPtr = 0L
+ private var firstPass = true
+
+ override def getClassPtr(): Long = classPtr
+
+ val weight: Tensor[T] = if (affine) Tensor[T](nOutput) else null
+ val bias: Tensor[T] = if (affine) Tensor[T](nOutput) else null
+ gradWeight = if (affine) Tensor[T](nOutput) else null
+ gradBias = if (affine) Tensor[T](nOutput) else null
+
+ val useWeight: Boolean = if (weight != null) true else false
+ val useBias: Boolean = if (bias != null) true else false
+
+ if (affine) {
+ reset()
+ }
+
+ override def reset(): Unit = {
+ if (null != weight) {
+ weight.apply1(_ => ev.fromType[Double](RNG.uniform(0, 1)))
+ }
+
+ if (null != bias) {
+ bias.fill(ev.fromType[Int](0))
+ }
+
+ runningMean.zero()
+ runningVar.fill(ev.fromType[Int](1))
+ }
+
+ def checkInputDim(input: Tensor[T]): Unit = {
+ require(input.dim() == nDim,
+ s"only mini-batch supported (${nDim}D tensor), got ${input.dim()}D tensor instead")
+ require(input.size(2) == runningMean.nElement(),
+ s"got ${input.size(2)}-feature tensor, expected ${runningMean.nElement()}")
+ }
+
+ override def updateOutput(input: Tensor[T]): Tensor[T] = {
+ output.resizeAs(input)
+
+ val inputOffset = input.storageOffset() - 1;
+ val outputOffset = output.storageOffset() - 1;
+
+ val inputWidth = input.size(input.dim())
+ val inputHeight = input.size(input.dim() - 1)
+ val inputChannel = if (input.dim() <= 2) 1 else input.size(input.dim() - 2)
+ val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3)
+ // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3
+
+ val kernelOffset = weight.storageOffset() - 1
+ val biasOffset = bias.storageOffset() - 1
+
+ implicit def bool2int(b: Boolean) = if (b) 1 else 0
+ if (firstPass) {
+ ev.getType() match {
+ case "Float" =>
+ classPtr = MKL.BatchNormInitFloat(inputNumber,
+ inputChannel,
+ inputHeight,
+ inputWidth,
+ eps,
+ useWeight,
+ useBias,
+ 4)
+ case "Double" =>
+ classPtr = MKL.BatchNormInitDouble(inputNumber,
+ inputChannel,
+ inputHeight,
+ inputWidth,
+ eps,
+ useBias,
+ useBias,
+ 4)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+ firstPass = false
+ }
+
+ ev.getType() match {
+ case "Float" =>
+ MKL.BatchNormForwardFloat(input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ output.storage().array().asInstanceOf[Array[Float]],
+ outputOffset,
+ weight.storage().array().asInstanceOf[Array[Float]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Float]],
+ biasOffset,
+ classPtr)
+ case "Double" =>
+ MKL.BatchNormForwardDouble(input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ output.storage().array().asInstanceOf[Array[Double]],
+ outputOffset,
+ weight.storage().array().asInstanceOf[Array[Double]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Double]],
+ biasOffset,
+ classPtr)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+ output
+ }
+
+ override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
+ gradInput.resizeAs(input)
+
+ val inputOffset = input.storageOffset() - 1;
+ val outputOffset = output.storageOffset() - 1;
+
+ val inputWidth = input.size(input.dim())
+ val inputHeight = input.size(input.dim() - 1)
+ val inputChannel = if (input.dim() <= 2) 1 else input.size(input.dim() - 2)
+ val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3)
+ // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3
+
+ val kernelOffset = weight.storageOffset() - 1
+ val biasOffset = bias.storageOffset() - 1
+
+ val kernelDiffOffset = gradWeight.storageOffset() - 1
+ val biasDiffOffset = gradBias.storageOffset() - 1
+
+ val gradOutputOffset = gradOutput.storageOffset() - 1
+ val gradInputOffset = gradInput.storageOffset() - 1
+
+ implicit def bool2int(b: Boolean) = if (b) 1 else 0
+ ev.getType() match {
+ case "Float" =>
+ MKL.BatchNormBackwardFloat(input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Float]],
+ gradOutputOffset,
+ gradInput.storage().array().asInstanceOf[Array[Float]],
+ gradInputOffset,
+ gradWeight.storage().array().asInstanceOf[Array[Float]],
+ kernelDiffOffset,
+ gradBias.storage().array().asInstanceOf[Array[Float]],
+ biasDiffOffset,
+ classPtr)
+ case "Double" =>
+ MKL.BatchNormBackwardDouble(input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Double]],
+ gradOutputOffset,
+ gradInput.storage().array().asInstanceOf[Array[Double]],
+ gradInputOffset,
+ gradWeight.storage().array().asInstanceOf[Array[Double]],
+ kernelDiffOffset,
+ gradBias.storage().array().asInstanceOf[Array[Double]],
+ biasDiffOffset,
+ classPtr)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+
+ gradInput
+ }
+
+ override def accGradParameters(input: Tensor[T], gradOutput: Tensor[T], scale: Double): Unit = {}
+
+ override def zeroGradParameters(): Unit = {
+ gradWeight.zero()
+ gradBias.zero()
+ }
+
+ override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = {
+ (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias))
+ }
+
+ override def toString(): String = {
+ s"mkl.BatchNormalization[${ev.getType()}]($nOutput, $eps, $momentum, $affine)"
+ }
+}
diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Concat.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Concat.scala
new file mode 100644
index 00000000000..5ec16d1026f
--- /dev/null
+++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Concat.scala
@@ -0,0 +1,305 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * ATTENTION: MKL version. The start and end layer must be MKL version too.
+ * Currently, it supports BatchNormalization, Linear, LRN, Pooling(Avg, Max),
+ * ReLU and SpatialConvolution.
+ */
+
+package com.intel.analytics.sparkdl.nn.mkl
+
+import com.intel.analytics.sparkdl.nn.{Container, Module}
+import com.intel.analytics.sparkdl.tensor.Tensor
+import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric
+import com.intel.analytics.sparkdl.mkl.MKL
+
+import scala.reflect.ClassTag
+
+class Concat[T: ClassTag](val dimension: Int)(implicit ev: TensorNumeric[T]) extends Container[T] {
+
+ private var size: Array[Int] = null
+ private var gradouts: Array[Tensor[T]] = null
+ private var gradOutputs: Array[Array[T]] = Array[Array[T]]()
+
+ var concatPtr : Long = 0L
+ var concat1Pass: Boolean = true
+
+ var sumPtr : Long = 0L
+ var sum1Pass : Boolean = true
+
+ override def getClassPtr(): Long = concatPtr
+
+ def getSize(): Array[Int] = {
+ return size
+ }
+
+ override def updateOutput(input: Tensor[T]): Tensor[T] = {
+ // TODO should check the size of every tensor. It must be same as the first tensor
+ val outs = new Array[Tensor[T]](this.modules.length)
+ var i = 0
+ while (i < this.modules.length) {
+ val currentOutput = this.modules(i).updateOutput(input)
+ outs(i) = currentOutput
+ if (i == 0) {
+ this.size = currentOutput.size()
+ } else {
+ this.size(this.dimension - 1) += currentOutput.size(this.dimension)
+ }
+ i += 1
+ }
+
+ this.output.resize(this.size)
+ // TODO call mkl native code to update output
+ // TODO dimension here is different with "dimension" in MKL 2017
+ // TODO check all dimensions of input tensors are same
+ if (concat1Pass) {
+ val nDimension = outs(0).nDimension()
+ val inputSize: Array[Int] = new Array[Int](this.modules.length * nDimension)
+
+ for (i <- 0 until this.modules.length) {
+ for (j <- 0 until nDimension) {
+ inputSize(i * nDimension + j) = outs(i).size(nDimension - j)
+ }
+ }
+
+ ev.getType() match {
+ case "Double" =>
+ concatPtr = MKL.ConcatInitDouble(this.modules.length, nDimension, inputSize)
+ case "Float" =>
+ concatPtr = MKL.ConcatInitFloat(this.modules.length, nDimension, inputSize)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float supported")
+ }
+ concat1Pass = false
+ }
+
+ // get all of the tensors in outs to float/double array
+ val inputs: Array[Array[T]] = new Array[Array[T]](this.modules.length)
+ val inputsOffset: Array[Int] = new Array[Int](this.modules.length)
+ for (i <- 0 until this.modules.length) {
+ inputs(i) = outs(i).storage().array()
+ inputsOffset(i) = outs(i).storageOffset() - 1
+ }
+
+
+ ev.getType() match {
+ case "Double" =>
+ MKL.ConcatForwardDouble(inputs.asInstanceOf[Array[Array[Double]]],
+ inputsOffset,
+ output.storage().array().asInstanceOf[Array[Double]],
+ output.storageOffset() - 1,
+ concatPtr)
+ case "Float" =>
+ MKL.ConcatForwardFloat(inputs.asInstanceOf[Array[Array[Float]]],
+ inputsOffset,
+ output.storage().array().asInstanceOf[Array[Float]],
+ output.storageOffset() - 1,
+ concatPtr)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float supported")
+ }
+
+ this.output
+ }
+
+ // TODO should we implement this function, what's the difference from @backward
+ override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
+// this.gradInput.resizeAs(input)
+//
+// var offset = 1
+// var i = 0
+// while (i < this.modules.length) {
+// val currentOutput = this.modules(i).output
+// val currentGradInput = this.modules(i).updateGradInput(input,
+// gradOutput.narrow(dimension, offset, currentOutput.size(dimension)))
+//
+// if (currentGradInput != null) {
+// if (i == 0) {
+// this.gradInput.copy(currentGradInput)
+// } else {
+// this.gradInput.add(currentGradInput)
+// }
+// }
+// i += 1
+// offset += currentOutput.size(dimension)
+// }
+
+ this.gradInput
+ }
+
+ override def backward(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
+ // TODO call mkl native code to update gradient input
+ var totalSize : Long = 0L
+ this.gradInput.resizeAs(input)
+ if (gradouts == null || gradouts.length != this.modules.length) {
+ gradouts = new Array[Tensor[T]](this.modules.length)
+ }
+ val gradOutputs: Array[Array[T]] = new Array[Array[T]](this.modules.length)
+ val gradOutputsOffset: Array[Int] = new Array[Int](this.modules.length)
+ for (i <- 0 until this.modules.length) {
+ if (gradouts(i) == null) gradouts(i) = Tensor()
+ gradouts(i).resizeAs(this.modules(i).output)
+ gradOutputs(i) = gradouts(i).storage().array()
+ gradOutputsOffset(i) = gradouts(i).storageOffset() - 1
+ }
+
+ ev.getType() match {
+ case "Double" =>
+ MKL.ConcatBackwardDouble(gradOutputs.asInstanceOf[Array[Array[Double]]],
+ gradOutputsOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Double]],
+ gradOutput.storageOffset() - 1,
+ concatPtr)
+ case "Float" =>
+ MKL.ConcatBackwardFloat(gradOutputs.asInstanceOf[Array[Array[Float]]],
+ gradOutputsOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Float]],
+ gradOutput.storageOffset() - 1,
+ concatPtr)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float / Double is supported")
+ }
+
+ val tmpGradInputs : Array[Tensor[T]] = new Array[Tensor[T]](this.modules.length)
+
+ for (i <- 0 until this.modules.length) {
+ val currentOutput = this.modules(i).output
+ tmpGradInputs(i) = this.modules(i).backward(input, gradouts(i))
+ }
+
+ // It can't be converted to mkl dnn concat forward, becaus the size of all
+ // gradient input is the same.
+ // copy method here doesn't costs too much
+ // TODO convert to eltwise
+ //if (currentGradInput != null) {
+ // if (i == 0) {
+ // this.gradInput.copy(currentGradInput)
+ // } else {
+ // this.gradInput.add(currentGradInput)
+ // }
+ //}
+
+ val subGradInputs: Array[Array[T]] = new Array[Array[T]](this.modules.length)
+ val subGradInputsOffset: Array[Int] = new Array[Int](this.modules.length)
+ for (i <- 0 until this.modules.length) {
+ subGradInputs(i) = tmpGradInputs(i).storage().array()
+ subGradInputsOffset(i) = tmpGradInputs(i).storageOffset() - 1
+ }
+
+ if (sum1Pass) {
+ val nDimension = tmpGradInputs(0).nDimension()
+ val subGradInputSize: Array[Int] = new Array[Int](this.modules.length * nDimension)
+
+ for (i <- 0 until this.modules.length) {
+ for (j <- 0 until nDimension) {
+ subGradInputSize(i * nDimension + j) = tmpGradInputs(i).size(nDimension - j)
+ }
+ }
+
+ ev.getType() match {
+ case "Double" =>
+ sumPtr = MKL.SumInitDouble(this.modules.length, nDimension, subGradInputSize)
+ case "Float" =>
+ sumPtr = MKL.SumInitFloat(this.modules.length, nDimension, subGradInputSize)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float supported")
+ }
+ sum1Pass = false
+ }
+
+ ev.getType() match {
+ case "Double" =>
+ MKL.SumForwardDouble(subGradInputs.asInstanceOf[Array[Array[Double]]],
+ subGradInputsOffset,
+ gradInput.storage().array().asInstanceOf[Array[Double]],
+ gradInput.storageOffset() - 1,
+ sumPtr)
+ case "Float" =>
+ MKL.SumForwardFloat(subGradInputs.asInstanceOf[Array[Array[Float]]],
+ subGradInputsOffset,
+ gradInput.storage().array().asInstanceOf[Array[Float]],
+ gradInput.storageOffset() - 1,
+ sumPtr)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float supported")
+ }
+
+ this.gradInput
+ }
+
+ override def equals(obj: Any): Boolean = {
+ if (!super.equals(obj)) {
+ return false
+ }
+
+ if (!obj.isInstanceOf[Concat[T]]) {
+ return false
+ }
+ val other = obj.asInstanceOf[Concat[T]]
+ if (this.eq(other)) {
+ return true
+ }
+ if (dimension != other.dimension) {
+ return false
+ }
+
+ if (this.modules.length != other.modules.length) {
+ return false
+ }
+
+ val moduleLength = modules.length
+ var i = 0
+ while (i < moduleLength) {
+ if (modules(i) != other.modules(i)) {
+ return false
+ }
+ i += 1
+ }
+
+ true
+ }
+ override def hashCode(): Int = {
+
+ val seed = 37
+ var hash = super.hashCode()
+ var i = 0
+ val moduleLength = modules.length
+ while (i < moduleLength) {
+ hash = hash * seed + modules(i).hashCode()
+ i += 1
+ }
+
+ hash
+ }
+
+ override def toString(): String = {
+ val tab = " "
+ val next = " |`-> "
+ val last = " ... -> "
+ val ext = " | "
+ val extlast = " "
+ s"mkl.Concat {$line${tab}input$line${modules.zipWithIndex.map {
+ case (model: Module[T], index: Int) =>
+ s"$tab$next(${index + 1}): ${if (index == modules.length - 1) {
+ model.setLine(line + tab + extlast)
+ } else {
+ model.setLine(line + tab + ext)
+ }}"
+ }.mkString(line)}$line$tab${last}output$line$tab}"
+ }
+}
diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala
new file mode 100644
index 00000000000..f049b31cff7
--- /dev/null
+++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Linear.scala
@@ -0,0 +1,317 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.intel.analytics.sparkdl.nn.mkl
+
+import com.intel.analytics.sparkdl.mkl.MKL
+import com.intel.analytics.sparkdl.nn.{Default, InitializationMethod, Module, Xavier}
+import com.intel.analytics.sparkdl.utils.RandomGenerator._
+import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric
+import com.intel.analytics.sparkdl.tensor.Tensor
+
+import scala.reflect.ClassTag
+
+class Linear[@specialized(Float, Double) T: ClassTag](
+ inputSize: Int,
+ outputSize: Int,
+ val needCompute: Boolean = true,
+ private var initMethod: InitializationMethod = Default
+)(implicit ev: TensorNumeric[T])
+ extends Module[T] {
+ val weight: Tensor[T] = Tensor[T](outputSize, inputSize)
+ val bias: Tensor[T] = Tensor[T](outputSize)
+ val addBuffer: Tensor[T] = Tensor[T]()
+ this.gradWeight = Tensor[T](outputSize, inputSize)
+ this.gradBias = Tensor[T](outputSize)
+
+ private var classPtr = 0L
+ private var firstPass = true
+
+ override def getClassPtr(): Long = classPtr
+
+ reset()
+
+ def setInitMethod(initMethod: InitializationMethod): this.type = {
+ this.initMethod = initMethod
+ this
+ }
+
+ override def reset(): Unit = {
+ initMethod match {
+ case Default =>
+ val stdv = 1.0 / math.sqrt(weight.size(2)) // todo, better to support uniform
+ weight.apply1(_ => ev.fromType[Double](RNG.uniform(0, 1) * 2 * stdv - stdv))
+ bias.apply1(_ => ev.fromType[Double](RNG.uniform(0, 1) * 2 * stdv - stdv))
+ case Xavier =>
+ val fanIn = weight.size(2)
+ val fanOut = weight.size(1)
+ val stdv = math.sqrt(3 / (fanIn + fanOut)) // todo, better to support uniform
+ weight.apply1(_ => ev.fromType[Double](RNG.uniform(0, 1) * 2 * stdv - stdv))
+ bias.fill(ev.fromType(0))
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Default / Xavier supported")
+ }
+ }
+
+ override def updateOutput(input: Tensor[T]): Tensor[T] = {
+ require(input.dim() == 2, "only batch mode supported")
+
+ val inputWidth = input.size(input.dim())
+ val inputHeight = input.size(input.dim() - 1)
+
+ val nFrame = input.size(1)
+ val nElement = output.nElement
+ output.resize(Array(nFrame, bias.size(1)))
+ if (output.nElement() != nElement) { output.zero() }
+
+ val inputOffset = input.storageOffset() - 1
+ val outputOffset = output.storageOffset() - 1
+ val biasOffset = bias.storageOffset() - 1
+ val kernelOffset = weight.storageOffset() - 1
+
+ val kernelHeight = outputSize
+ val kernelWidth = inputSize
+ val outputChannels = outputSize
+
+ if (firstPass) {
+ ev.getType() match {
+ case "Double" =>
+ classPtr = MKL
+ .LinearInitDouble(inputHeight, inputWidth, outputChannels, kernelHeight, kernelWidth)
+ case "Float" =>
+ classPtr =
+ MKL.LinearInitFloat(inputHeight, inputWidth, outputChannels, kernelHeight, kernelWidth)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+
+ firstPass = false
+ }
+
+ ev.getType() match {
+ case "Double" =>
+ MKL.LinearForwardDouble(input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ output.storage().array().asInstanceOf[Array[Double]],
+ outputOffset,
+ weight.storage().array().asInstanceOf[Array[Double]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Double]],
+ biasOffset,
+ classPtr)
+ case "Float" =>
+ MKL.LinearForwardFloat(input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ output.storage().array().asInstanceOf[Array[Float]],
+ outputOffset,
+ weight.storage().array().asInstanceOf[Array[Float]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Float]],
+ biasOffset,
+ classPtr)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float supported")
+ }
+ output
+ }
+
+ override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
+ require(input.dim() == 2, "only batch mode supported")
+ val nElement = gradInput.nElement()
+ gradInput.resizeAs(input)
+ if (nElement != gradInput.nElement()) {
+ gradInput.zero()
+ }
+
+ val inputWidth = input.size(input.dim())
+ val inputHeight = input.size(input.dim() - 1)
+
+ val inputOffset = input.storageOffset() - 1
+ val kernelOffset = weight.storageOffset() - 1
+ val biasOffset = bias.storageOffset() - 1
+ val gradOutputOffset = gradOutput.storageOffset() - 1
+ val gradInputOffset = gradInput.storageOffset() - 1
+ val gradWeightOffset = gradWeight.storageOffset() - 1
+ val gradBiasOffset = gradBias.storageOffset() - 1
+
+ val kernelHeight = outputSize
+ val kernelWidth = inputSize
+ val outputChannels = outputSize
+
+ if (needCompute) {
+ ev.getType() match {
+ case "Double" =>
+ MKL.LinearBackwardDataDouble(input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Double]],
+ gradOutputOffset,
+ gradInput.storage().array().asInstanceOf[Array[Double]],
+ gradInputOffset,
+ weight.storage().array().asInstanceOf[Array[Double]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Double]],
+ biasOffset,
+ classPtr)
+ case "Float" =>
+ MKL.LinearBackwardDataFloat(input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Float]],
+ gradOutputOffset,
+ gradInput.storage().array().asInstanceOf[Array[Float]],
+ gradInputOffset,
+ weight.storage().array().asInstanceOf[Array[Float]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Float]],
+ biasOffset,
+ classPtr)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float supported")
+ }
+ }
+
+ ev.getType() match {
+ case "Double" =>
+ MKL.LinearBackwardKernelDouble(input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Double]],
+ gradOutputOffset,
+ gradWeight.storage().array().asInstanceOf[Array[Double]],
+ gradWeightOffset,
+ weight.storage().array().asInstanceOf[Array[Double]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Double]],
+ biasOffset,
+ classPtr)
+
+ case "Float" =>
+ MKL.LinearBackwardKernelFloat(input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Float]],
+ gradOutputOffset,
+ gradWeight.storage().array().asInstanceOf[Array[Float]],
+ gradWeightOffset,
+ weight.storage().array().asInstanceOf[Array[Float]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Float]],
+ biasOffset,
+ classPtr)
+
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+
+ ev.getType() match {
+ case "Double" =>
+ MKL.LinearBackwardBiasDouble(input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Double]],
+ gradOutputOffset,
+ gradBias.storage().array().asInstanceOf[Array[Double]],
+ gradBiasOffset,
+ weight.storage().array().asInstanceOf[Array[Double]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Double]],
+ biasOffset,
+ classPtr)
+
+ case "Float" =>
+ MKL.LinearBackwardBiasFloat(input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Float]],
+ gradOutputOffset,
+ gradBias.storage().array().asInstanceOf[Array[Float]],
+ gradBiasOffset,
+ weight.storage().array().asInstanceOf[Array[Float]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Float]],
+ biasOffset,
+ classPtr)
+
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+
+ gradInput
+ }
+
+// override def accGradParameters(input: Tensor[T],
+// gradOutput: Tensor[T],
+// scale: Double = 1.0): Unit = {
+// require(input.dim() == 2, "only batch mode supported")
+// require(input.dim() == 1 || input.dim() == 2, "input must be vector or matrix")
+// val value = ev.fromType[Double](scale)
+// if (input.dim() == 1) {
+// gradWeight.addr(value, gradOutput, input)
+// gradBias.add(value, gradOutput)
+// } else if (input.dim() == 2) {
+// gradWeight.addmm(value, gradOutput.t, input)
+// gradBias.addmv(value, gradOutput.t, addBuffer)
+// }
+// }
+
+ override def updateParameters(learningRate: T): Unit = {
+ // weight.map(gradWeight,(a,b)=>a - learningRate*b)
+ weight.add(ev.negative(learningRate), gradWeight)
+ // bias.map(gradBias,(a,b)=>a - learningRate*b)
+ bias.add(ev.negative(learningRate), gradBias)
+ }
+
+ override def zeroGradParameters(): Unit = {
+ gradWeight.zero()
+ gradBias.zero()
+ }
+
+ override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = {
+ (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias))
+ }
+
+ override def equals(obj: Any): Boolean = {
+
+ if (!super.equals(obj)) {
+ return false
+ }
+
+ if (!obj.isInstanceOf[Linear[T]]) { return false }
+ val other = obj.asInstanceOf[Linear[T]]
+ if (this.eq(other)) { return true }
+
+ gradWeight == other.gradWeight &&
+ gradBias == other.gradBias &&
+ weight == other.weight &&
+ bias == other.bias
+ }
+
+ override def hashCode() : Int = {
+ val seed = 37
+ var hash = super.hashCode()
+ hash = hash * seed + gradWeight.hashCode()
+ hash = hash * seed + gradBias.hashCode()
+ hash = hash * seed + weight.hashCode()
+ hash = hash * seed + bias.hashCode()
+
+ hash
+ }
+
+ override def toString(): String = {
+ s"nn.mkl.Linear($inputSize -> $outputSize)"
+ }
+
+ override def findModel(paramOffset: Int, indexes: Array[Int]): (Module[T], Int, Array[Int]) = {
+ (this, paramOffset - outputSize * inputSize - outputSize, indexes)
+ }
+
+}
diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala
new file mode 100644
index 00000000000..30e185c258f
--- /dev/null
+++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/LocalNormalizationAcrossChannels.scala
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.intel.analytics.sparkdl.nn.mkl
+
+import com.intel.analytics.sparkdl.mkl.MKL
+import com.intel.analytics.sparkdl.nn.Module
+import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric
+import com.intel.analytics.sparkdl.tensor._
+import com.intel.analytics.sparkdl.utils.RandomGenerator._
+import scala.reflect.ClassTag
+import scala.language.implicitConversions
+
+class LocalNormalizationAcrossChannels[@specialized(Float, Double) T: ClassTag](
+ val size: Int = 5,
+ val alpha: Double = 1.0,
+ val beta: Double = 0.75,
+ val k: Double = 1.0)(implicit ev: TensorNumeric[T])
+ extends Module[T] {
+
+ private val scale = Tensor[T]()
+ private val paddedSquare = Tensor[T]()
+ private val paddedRatio = Tensor[T]()
+ private val accumRatio = Tensor[T]()
+ private val accumRatioTimeInput = Tensor[T]()
+
+ require(size % 2 == 1, "LRN only supports odd values for size")
+ val prePad = (size - 1) / 2
+
+ var classPtr = 0L
+ private var firstPass = true
+
+ override def getClassPtr(): Long = classPtr
+
+ override def equals(obj: Any): Boolean = {
+ if (!super.equals(obj)) {
+ return false
+ }
+
+ if (!obj.isInstanceOf[LocalNormalizationAcrossChannels[T]]) { return false }
+ val other = obj.asInstanceOf[LocalNormalizationAcrossChannels[T]]
+ if (this.eq(other)) { return true }
+
+ size == other.size &&
+ alpha == other.alpha && beta == other.beta && k == other.k
+ }
+
+ override def hashCode() : Int = {
+ val seed = 37
+ var hash = super.hashCode()
+ hash = hash * seed + size.hashCode()
+ hash = hash * seed + alpha.hashCode()
+ hash = hash * seed + beta.hashCode()
+ hash = hash * seed + k.hashCode()
+
+ hash
+ }
+
+ override def toString(): String = {
+ s"mkl.LocalResponseNormalizationAcrossChannels($size, $alpha, $beta, $k)"
+ }
+
+ override def updateOutput(input: Tensor[T]): Tensor[T] = {
+ require(input.nDimension() == 4,
+ "Input must have 4 dimensions, corresponding to (batch, channels, height, width)")
+ require(input.isContiguous(), "Input is not contiguous")
+
+ output.resizeAs(input)
+
+ val inputOffset = input.storageOffset() - 1;
+ val outputOffset = output.storageOffset() - 1;
+
+ val inputWidth = input.size(input.dim())
+ val inputHeight = input.size(input.dim() - 1)
+ val inputChannel = if (input.dim() <= 3) 1 else input.size(input.dim() - 2)
+ val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3)
+ // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3
+
+ if (firstPass) {
+ ev.getType() match {
+ case "Float" =>
+ classPtr = MKL.LRNInitFloat(inputNumber,
+ inputChannel,
+ inputHeight,
+ inputWidth,
+ size,
+ alpha.toFloat,
+ beta.toFloat,
+ k.toFloat,
+ 4)
+ case "Double" =>
+ classPtr = MKL.LRNInitDouble(inputNumber,
+ inputChannel,
+ inputHeight,
+ inputWidth,
+ size,
+ alpha.toDouble,
+ beta.toDouble,
+ k.toDouble,
+ 4)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+ firstPass = false
+ }
+
+ implicit def bool2int(b: Boolean) = if (b) 1 else 0
+ ev.getType() match {
+ case "Float" =>
+ MKL.LRNForwardFloat(
+ input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ output.storage().array().asInstanceOf[Array[Float]],
+ outputOffset,
+ classPtr
+ )
+ case "Double" =>
+ MKL.LRNForwardDouble(
+ input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ output.storage().array().asInstanceOf[Array[Double]],
+ outputOffset,
+ classPtr
+ )
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+
+ output
+ }
+
+ override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
+ require(input.nDimension() == 4,
+ "Input must have 4 dimensions, corresponding to (batch, channels, height, width)")
+ require(gradOutput.isContiguous(), "gradOutput is not contiguous")
+
+ gradInput.resizeAs(input)
+
+ val inputOffset = input.storageOffset() - 1;
+ val outputOffset = output.storageOffset() - 1;
+
+ val inputWidth = input.size(input.dim())
+ val inputHeight = input.size(input.dim() - 1)
+ val inputChannel = input.size(input.dim() - 2)
+ val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3)
+ // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3
+
+ val gradOutputOffset = gradOutput.storageOffset() - 1
+ val gradInputOffset = gradInput.storageOffset() - 1
+
+ ev.getType() match {
+ case "Float" =>
+ MKL.LRNBackwardFloat(input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Float]],
+ gradOutputOffset,
+ gradInput.storage().array().asInstanceOf[Array[Float]],
+ gradInputOffset,
+ classPtr)
+ case "Double" =>
+ MKL.LRNBackwardDouble(input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Double]],
+ gradOutputOffset,
+ gradInput.storage().array().asInstanceOf[Array[Double]],
+ gradInputOffset,
+ classPtr)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+
+ gradInput
+ }
+}
diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Pooling.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Pooling.scala
new file mode 100644
index 00000000000..796652b7104
--- /dev/null
+++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/Pooling.scala
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.intel.analytics.sparkdl.nn.mkl
+
+import com.intel.analytics.sparkdl.mkl.MKL
+import com.intel.analytics.sparkdl.nn.Module
+import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric
+import com.intel.analytics.sparkdl.utils.RandomGenerator
+import com.intel.analytics.sparkdl.tensor.Tensor
+
+import scala.language.implicitConversions
+
+import scala.reflect.ClassTag
+
+class SpatialPooling[@specialized(Float, Double) T: ClassTag](
+ val kernelWidth: Int,
+ val kernelHeight: Int,
+ val strideWidth: Int,
+ val strideHeight: Int,
+ val padWidth: Int = 0,
+ val padHeight: Int = 0)(implicit ev: TensorNumeric[T])
+ extends Module[T] {
+
+ implicit def bool2int(b: Boolean) : Int = if (b) 1 else 0
+
+ var classPtr: Long = 0L
+ private var firstPass = true
+
+ override def getClassPtr(): Long = classPtr
+
+ // algorithm = 0 -> max
+ // algorithm = 0 -> avg
+ val algorithm = 0;
+
+ // TODO just for adopt to the testcase
+ var ceil_mode = false
+ def ceil(): SpatialPooling[T] = {
+ ceil_mode = true
+ this
+ }
+
+ def floor(): SpatialPooling[T] = {
+ ceil_mode = false
+ this
+ }
+
+ def this(kernelWidth: Int, kernelHeight: Int)(implicit ev: TensorNumeric[T]) {
+ this(kernelWidth, kernelHeight, kernelWidth, kernelHeight)
+ }
+
+ // compute the output height and width
+ def computeOut(input: Int, pad: Int, kernel: Int, stride: Int): Int = {
+ if (ceil_mode) {
+ math.ceil(1.0 * (input + 2 * pad - kernel) / stride).toInt + 1
+ } else {
+ math.floor(1.0 * (input + 2 * pad - kernel) / stride).toInt + 1
+ }
+ }
+
+ override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
+ gradInput.resizeAs(input)
+
+ val inputOffset = input.storageOffset() - 1;
+ val outputOffset = output.storageOffset() - 1;
+ val gradInputOffset = gradInput.storageOffset() - 1;
+ val gradOutputOffset = gradOutput.storageOffset() - 1;
+
+ val inputWidth = input.size(input.dim())
+ val inputHeight = input.size(input.dim() - 1)
+ val inputChannel = input.size(input.dim() - 2)
+ val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3)
+ // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3
+
+ val outputHeight =
+ computeOut(inputHeight, padHeight, kernelHeight, strideHeight)
+ val outputWidth =
+ computeOut(inputWidth, padHeight, kernelWidth, strideWidth)
+ val outputChannel = inputChannel
+ val outputNumber = inputNumber
+
+ ev.getType() match {
+ case "Float" =>
+ MKL.PoolingBackwardFloat(input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Float]],
+ gradOutputOffset,
+ gradInput.storage().array().asInstanceOf[Array[Float]],
+ gradInputOffset,
+ classPtr)
+ case "Double" =>
+ MKL.PoolingBackwardDouble(input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Double]],
+ gradOutputOffset,
+ gradInput.storage().array().asInstanceOf[Array[Double]],
+ gradOutputOffset,
+ classPtr)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+
+ gradInput
+ }
+
+ override def updateOutput(input: Tensor[T]): Tensor[T] = {
+ val inputWidth = input.size(input.dim())
+ val inputHeight = input.size(input.dim() - 1)
+ val inputChannel = input.size(input.dim() - 2)
+ val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3)
+ // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3
+
+ val outputHeight =
+ computeOut(inputHeight, padHeight, kernelHeight, strideHeight)
+ val outputWidth =
+ computeOut(inputWidth, padWidth, kernelWidth, strideWidth)
+ val outputChannel = inputChannel
+ val outputNumber = inputNumber
+
+ val inputOffset = input.storageOffset() - 1;
+ val outputOffset = output.storageOffset() - 1;
+
+ if (input.dim() == 3) {
+ output.resize(Array(outputChannel, outputHeight, outputWidth))
+ } else {
+ output.resize(Array(outputNumber, outputChannel, outputHeight, outputWidth))
+ }
+
+ // TODO algorithm = 0 means using MAX
+ val algorithm = 0
+
+ if (firstPass) {
+ ev.getType() match {
+ case "Float" =>
+ classPtr = MKL.PoolingInitFloat(inputNumber,
+ inputChannel,
+ inputHeight,
+ inputWidth,
+ kernelHeight,
+ kernelWidth,
+ strideHeight,
+ strideWidth,
+ padHeight,
+ padWidth,
+ 4,
+ ceil_mode,
+ algorithm)
+ case "Double" =>
+ classPtr = MKL.PoolingInitDouble(inputNumber,
+ inputChannel,
+ inputHeight,
+ inputWidth,
+ kernelHeight,
+ kernelWidth,
+ strideHeight,
+ strideWidth,
+ padHeight,
+ padWidth,
+ 4,
+ ceil_mode,
+ algorithm)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+
+ firstPass = false
+ }
+
+ ev.getType() match {
+ case "Float" =>
+ MKL.PoolingForwardFloat(input.storage().array.asInstanceOf[Array[Float]],
+ inputOffset,
+ output.storage().array.asInstanceOf[Array[Float]],
+ outputOffset,
+ classPtr)
+ case "Double" =>
+ MKL.PoolingForwardDouble(input.storage().array.asInstanceOf[Array[Double]],
+ inputOffset,
+ output.storage().array.asInstanceOf[Array[Double]],
+ outputOffset,
+ classPtr)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+ output
+ }
+
+ override def toString(): String = {
+ s"mkl.Pooling"
+ }
+
+}
+
+class SpatialMaxPooling[T: ClassTag](kernelWidth: Int,
+ kernelHeight: Int,
+ strideWidth: Int,
+ strideHeight: Int,
+ padWidth: Int = 0,
+ padHeight: Int = 0)(implicit ev: TensorNumeric[T])
+ extends SpatialPooling[T](kernelWidth,
+ kernelHeight,
+ strideWidth,
+ strideHeight,
+ padWidth,
+ padHeight) {
+ override val algorithm: Int = 0
+ def this(kernelWidth: Int, kernelHeight: Int)(implicit ev: TensorNumeric[T]) {
+ this(kernelWidth, kernelHeight, kernelWidth, kernelHeight)
+ }
+ override def toString(): String = {
+ s"mkl.SpatialMaxPooling"
+ }
+}
+
+class SpatialAveragePooling[T: ClassTag](kernelWidth: Int,
+ kernelHeight: Int,
+ strideWidth: Int,
+ strideHeight: Int,
+ padWidth: Int = 0,
+ padHeight: Int = 0)(implicit ev: TensorNumeric[T])
+ extends SpatialPooling[T](kernelWidth,
+ kernelHeight,
+ strideWidth,
+ strideHeight,
+ padWidth,
+ padHeight) {
+ override val algorithm: Int = 1
+ def this(kernelWidth: Int, kernelHeight: Int)(implicit ev: TensorNumeric[T]) {
+ this(kernelWidth, kernelHeight, kernelWidth, kernelHeight)
+ }
+ override def toString(): String = {
+ s"mkl.SpatialAvgPooling"
+ }
+}
diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/ReLU.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/ReLU.scala
new file mode 100644
index 00000000000..77fb16e903d
--- /dev/null
+++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/ReLU.scala
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.intel.analytics.sparkdl.nn.mkl
+
+import com.intel.analytics.sparkdl.mkl.MKL
+import com.intel.analytics.sparkdl.nn.Module
+import com.intel.analytics.sparkdl.tensor.Tensor
+import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric
+
+import scala.language.implicitConversions
+
+import scala.reflect.ClassTag
+
+class ReLU[@specialized(Float, Double) T: ClassTag](ip: Boolean = false)(
+ implicit ev: TensorNumeric[T])
+ extends Module[T] {
+
+ override def toString(): String = {
+ s"mkl.ReLU"
+ }
+
+ private var firstPass = true
+ var classPtr = 0L;
+
+ override def getClassPtr(): Long = classPtr
+
+ override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
+ gradInput.resizeAs(gradOutput)
+ // TODO Why does copy in mkl_dnn? Because it costs so much time, I comment is out.
+ // gradInput.copy(gradOutput)
+
+ val inputOffset = input.storageOffset() - 1;
+ val outputOffset = output.storageOffset() - 1;
+ val gradInputOffset = gradInput.storageOffset() - 1;
+ val gradOutputOffset = gradOutput.storageOffset() - 1;
+
+ val inputWidth = input.size(input.dim())
+ val inputHeight = input.size(input.dim() - 1)
+ val inputChannel = if (input.dim() <= 2) 1 else input.size(input.dim() - 2)
+ val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3)
+ // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3
+
+ implicit def bool2int(b: Boolean) = if (b) 1 else 0
+ val start = System.nanoTime()
+ ev.getType() match {
+ case "Float" =>
+ MKL.ReLUBackwardFloat(input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Float]],
+ gradOutputOffset,
+ gradInput.storage().array().asInstanceOf[Array[Float]],
+ gradInputOffset,
+ classPtr)
+
+ case "Double" =>
+ MKL.ReLUBackwardDouble(input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Double]],
+ gradOutputOffset,
+ gradInput.storage().array().asInstanceOf[Array[Double]],
+ gradInputOffset,
+ classPtr)
+
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+
+ gradInput
+ }
+
+ override def updateOutput(input: Tensor[T]): Tensor[T] = {
+ output.resizeAs(input)
+
+ val inputOffset = input.storageOffset() - 1;
+ val outputOffset = output.storageOffset() - 1;
+
+ val inputWidth = input.size(input.dim())
+ val inputHeight = input.size(input.dim() - 1)
+ val inputChannel = if (input.dim() <= 2) 1 else input.size(input.dim() - 2)
+ val inputNumber = if (input.dim() <= 3) 1 else input.size(input.dim() - 3)
+ // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3
+
+ if (firstPass) {
+ ev.getType() match {
+ case "Float" =>
+ classPtr = MKL.ReLUInitFloat(inputNumber, inputChannel, inputHeight, inputWidth, 4);
+ case "Double" =>
+ classPtr = MKL.ReLUInitDouble(inputNumber, inputChannel, inputHeight, inputWidth, 4);
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+ firstPass = false
+ }
+
+ implicit def bool2int(b: Boolean) = if (b) 1 else 0
+ val start = System.nanoTime()
+ ev.getType() match {
+ case "Float" =>
+ MKL.ReLUForwardFloat(input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ output.storage().array().asInstanceOf[Array[Float]],
+ outputOffset,
+ classPtr)
+
+ case "Double" =>
+ MKL.ReLUForwardDouble(input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ output.storage().array().asInstanceOf[Array[Double]],
+ outputOffset,
+ classPtr)
+
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+ // println("[SCALA] ReLU forward call JNI " + (System.nanoTime() - start) / 1e6)
+
+ output
+ }
+}
diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala
new file mode 100644
index 00000000000..5e024697109
--- /dev/null
+++ b/dl/src/main/scala/com/intel/analytics/sparkdl/nn/mkl/SpatialConvolution.scala
@@ -0,0 +1,425 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.intel.analytics.sparkdl.nn.mkl
+
+import com.intel.analytics.sparkdl.mkl.MKL
+import com.intel.analytics.sparkdl.nn.Module
+import com.intel.analytics.sparkdl.tensor.TensorNumericMath.TensorNumeric
+import com.intel.analytics.sparkdl.tensor._
+import com.intel.analytics.sparkdl.utils.RandomGenerator._
+
+import scala.language.implicitConversions
+
+import com.intel.analytics.sparkdl.nn.InitializationMethod
+import com.intel.analytics.sparkdl.nn.Default
+import com.intel.analytics.sparkdl.nn.Xavier
+
+import scala.reflect.ClassTag
+
+class SpatialConvolution[@specialized(Float, Double) T: ClassTag](
+ val nInputPlane: Int,
+ val nOutputPlane: Int,
+ val kernelWidth: Int,
+ val kernelHeight: Int,
+ val strideWidth: Int = 1,
+ val strideHeight: Int = 1,
+ val padWidth: Int = 0,
+ val padHeight: Int = 0,
+ val groups: Int = 1,
+ private var initMethod: InitializationMethod = Default
+)(implicit ev: TensorNumeric[T])
+ extends Module[T] {
+ val weight: Tensor[T] =
+ Tensor[T](nOutputPlane, nInputPlane, kernelHeight, kernelWidth)
+ val bias: Tensor[T] = Tensor[T](nOutputPlane)
+ this.gradInput = Tensor[T](nOutputPlane, nInputPlane, kernelHeight, kernelWidth)
+ this.gradBias = Tensor[T](nOutputPlane)
+ this.gradWeight = Tensor[T](nOutputPlane, nInputPlane, kernelHeight, kernelWidth)
+ val fInput = Tensor[T]()
+ val fGradInput = Tensor[T]()
+ reset()
+
+ private var im2colTime = 0L
+ private var col2imTime = 0L
+
+ var classPtr = 0L
+ private var firstPass = true
+
+ override def getClassPtr(): Long = classPtr
+
+ def getIm2ColTime() : Long = im2colTime
+ def getCol2ImgTime() : Long = col2imTime
+
+ def setInitMethod(initMethod: InitializationMethod): this.type = {
+ this.initMethod = initMethod
+ this
+ }
+
+ override def reset(): Unit = {
+ val stdv = 1.0 / math.sqrt(kernelWidth * kernelHeight * nInputPlane)
+ // todo, better to support uniform
+ weight.apply1(_ => ev.fromType[Double](RNG.uniform(0, 1) * 2 * stdv - stdv))
+ bias.apply1(_ => ev.fromType[Double](RNG.uniform(0, 1) * 2 * stdv - stdv))
+ }
+
+ override def updateOutput(input: Tensor[T]): Tensor[T] = {
+ require(input.dim() == 3 || input.dim() == 4, "Only support 3D or 4D(batch mode) input")
+ // TODO the requirement of contiguous input may be not necessary for MKL 2017.
+ // because it supports the api of groups convolution.
+ require(input.isContiguous(), "input is not contiguous")
+
+ // compute the output height and width
+ def computeOut(input: Int, pad: Int, kernel: Int, stride: Int): Int = {
+ (input + 2 * pad - kernel) / stride + 1
+ }
+
+ // +---------+-------+-------+
+ // | | 3-dim | 4-dim |
+ // +=========+=======+=======+
+ // | Number | ? | 1 |
+ // +---------+-------+-------+
+ // | Channel | 1 | 2 |
+ // +---------+-------+-------+
+ // | Height | 2 | 3 |
+ // +---------+-------+-------+
+ // | Width | 3 | 4 |
+ // +---------+-------+-------+
+ // Table: Index of 3-dim/4-dim input
+
+ val inputWidth = input.size(input.dim())
+ val inputHeight = input.size(input.dim() - 1)
+ val inputChannel = input.size(input.dim() - 2)
+ // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3
+ val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3)
+
+ // output number is as same as input number
+ val outputNumber = inputNumber
+ val outputChannel = nOutputPlane
+ val outputWidth =
+ computeOut(inputWidth, padWidth, kernelWidth, strideWidth)
+ val outputHeight =
+ computeOut(inputHeight, padHeight, kernelHeight, strideHeight)
+
+ require(outputWidth >= 1 && outputHeight >= 1, "output size is too small")
+ if (input.dim() == 3) {
+ output.resize(Array(outputChannel, outputHeight, outputWidth))
+ } else {
+ output.resize(Array(outputNumber, outputChannel, outputHeight, outputWidth))
+ }
+
+ // kernel number and bias number are as same as nOutputPlane
+ val biasNumber = nOutputPlane
+ val kernelNumber = nOutputPlane
+ // TODO kernel channel equals to input channel now
+ val kernelChannel = inputChannel
+
+ val inputOffset = input.storageOffset() - 1
+ val outputOffset = output.storageOffset() - 1
+ val biasOffset = bias.storageOffset() - 1
+ val kernelOffset = weight.storageOffset() - 1
+
+ if (firstPass) {
+ ev.getType() match {
+ case "Double" =>
+ classPtr = MKL.ConvolutionInitDouble(inputNumber,
+ inputChannel,
+ inputHeight,
+ inputWidth,
+ kernelNumber,
+ kernelChannel,
+ kernelHeight,
+ kernelWidth,
+ strideHeight,
+ strideWidth,
+ padHeight,
+ padWidth,
+ 4,
+ groups)
+ case "Float" =>
+ classPtr = MKL.ConvolutionInitFloat(inputNumber,
+ inputChannel,
+ inputHeight,
+ inputWidth,
+ kernelNumber,
+ kernelChannel,
+ kernelHeight,
+ kernelWidth,
+ strideHeight,
+ strideWidth,
+ padHeight,
+ padWidth,
+ 4,
+ groups)
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float supported")
+ }
+ firstPass = false
+ }
+
+ implicit def bool2int(b: Boolean) = if (b) 1 else 0
+ val start = System.nanoTime()
+ ev.getType() match {
+ case "Double" =>
+ MKL.ConvolutionForwardDouble(input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ output.storage().array().asInstanceOf[Array[Double]],
+ outputOffset,
+ weight.storage().array().asInstanceOf[Array[Double]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Double]],
+ biasOffset,
+ classPtr)
+ case "Float" =>
+ MKL.ConvolutionForwardFloat(input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ output.storage().array().asInstanceOf[Array[Float]],
+ outputOffset,
+ weight.storage().array().asInstanceOf[Array[Float]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Float]],
+ biasOffset,
+ classPtr)
+
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float supported")
+ }
+ output
+ }
+
+ override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
+ require(input.nDimension() == 3 || input.nDimension() == 4, "Only support 3D or 4D input")
+ require(nOutputPlane == (if (input.nDimension() == 3) gradOutput.size(1)
+ else gradOutput.size(2)),
+ "Number of output features is not equal to nOutputPlane")
+ require(input.isContiguous(), "input is not contiguous")
+ require(gradInput.isContiguous(), "gradInput is not contiguous")
+ gradInput.resizeAs(input)
+
+ val gradInputOffset = gradInput.storageOffset() - 1
+ val gradKernelOffset = gradWeight.storageOffset() - 1
+ val gradOutputOffset = gradOutput.storageOffset() - 1
+ val gradBiasOffset = gradBias.storageOffset() - 1
+
+ // +---------+-------+-------+
+ // | | 3-dim | 4-dim |
+ // +=========+=======+=======+
+ // | Number | ? | 1 |
+ // +---------+-------+-------+
+ // | Channel | 1 | 2 |
+ // +---------+-------+-------+
+ // | Height | 2 | 3 |
+ // +---------+-------+-------+
+ // | Width | 3 | 4 |
+ // +---------+-------+-------+
+ // Table: Index of 3-dim/4-dim input
+
+ val inputWidth = input.size(input.dim())
+ val inputHeight = input.size(input.dim() - 1)
+ val inputChannel = input.size(input.dim() - 2)
+ // TODO we may set input.size(input.dim() - 3) == 1 if input.dim() == 3
+ val inputNumber = if (input.dim() == 3) 1 else input.size(input.dim() - 3)
+
+ val kernelNumber = nOutputPlane
+ val kernelChannel = inputChannel
+
+ val inputOffset = input.storageOffset() - 1
+ val biasOffset = bias.storageOffset() - 1
+ val kernelOffset = weight.storageOffset() - 1
+
+ implicit def bool2int(b: Boolean) = if (b) 1 else 0
+ val start = System.nanoTime()
+ if (isNeedComputeBack()) {
+ ev.getType() match {
+ case "Double" =>
+ MKL.ConvolutionBackwardDataDouble(
+ input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Double]],
+ gradOutputOffset,
+ gradInput.storage().array().asInstanceOf[Array[Double]],
+ gradInputOffset,
+ weight.storage().array().asInstanceOf[Array[Double]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Double]],
+ biasOffset,
+ classPtr
+ )
+ case "Float" =>
+ MKL.ConvolutionBackwardDataFloat(
+ input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Float]],
+ gradOutputOffset,
+ gradInput.storage().array().asInstanceOf[Array[Float]],
+ gradInputOffset,
+ weight.storage().array().asInstanceOf[Array[Float]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Float]],
+ biasOffset,
+ classPtr
+ )
+
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+ }
+ ev.getType() match {
+ case "Double" =>
+ MKL.ConvolutionBackwardKernelDouble(
+ input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Double]],
+ gradOutputOffset,
+ gradWeight.storage().array().asInstanceOf[Array[Double]],
+ gradKernelOffset,
+ weight.storage().array().asInstanceOf[Array[Double]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Double]],
+ biasOffset,
+ classPtr
+ )
+ case "Float" =>
+ MKL.ConvolutionBackwardKernelFloat(
+ input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Float]],
+ gradOutputOffset,
+ gradWeight.storage().array().asInstanceOf[Array[Float]],
+ gradKernelOffset,
+ weight.storage().array().asInstanceOf[Array[Float]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Float]],
+ biasOffset,
+ classPtr
+ )
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+ ev.getType() match {
+ case "Double" =>
+ MKL.ConvolutionBackwardBiasDouble(
+ input.storage().array().asInstanceOf[Array[Double]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Double]],
+ gradOutputOffset,
+ gradBias.storage().array().asInstanceOf[Array[Double]],
+ gradBiasOffset,
+ weight.storage().array().asInstanceOf[Array[Double]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Double]],
+ biasOffset,
+ classPtr
+ )
+
+ case "Float" =>
+ MKL.ConvolutionBackwardBiasFloat(
+ input.storage().array().asInstanceOf[Array[Float]],
+ inputOffset,
+ gradOutput.storage().array().asInstanceOf[Array[Float]],
+ gradOutputOffset,
+ gradBias.storage().array().asInstanceOf[Array[Float]],
+ gradBiasOffset,
+ weight.storage().array().asInstanceOf[Array[Float]],
+ kernelOffset,
+ bias.storage().array().asInstanceOf[Array[Float]],
+ biasOffset,
+ classPtr
+ )
+
+ case _ =>
+ throw new UnsupportedOperationException(s"Only Float/Double supported")
+ }
+ gradInput
+ }
+
+ override def updateParameters(learningRate: T): Unit = {
+ weight.map(gradWeight, (a, b) => ev.minus(a, ev.times(learningRate, b)))
+ bias.map(gradBias, (a, b) => ev.minus(a, ev.times(learningRate, b)))
+ }
+
+ override def zeroGradParameters(): Unit = {
+ gradWeight.zero()
+ gradBias.zero()
+ }
+
+ override def parameters(): (Array[Tensor[T]], Array[Tensor[T]]) = {
+ (Array(this.weight, this.bias), Array(this.gradWeight, this.gradBias))
+ }
+
+ override def equals(obj: Any): Boolean = {
+ if (!super.equals(obj)) {
+ return false
+ }
+
+ if (!obj.isInstanceOf[SpatialConvolution[T]]) { return false }
+ val other = obj.asInstanceOf[SpatialConvolution[T]]
+ if (this.eq(other)) { return true }
+
+ nInputPlane == other.nInputPlane &&
+ nOutputPlane == other.nOutputPlane &&
+ kernelWidth == other.kernelWidth &&
+ kernelHeight == other.kernelHeight &&
+ strideWidth == other.strideWidth &&
+ strideHeight == other.strideHeight &&
+ padWidth == other.padWidth &&
+ padHeight == other.padHeight &&
+ weight == other.weight &&
+ bias == other.bias &&
+ gradWeight == other.gradWeight &&
+ gradBias == other.gradBias
+ }
+
+ override def hashCode() : Int = {
+ val seed = 37
+ var hash = super.hashCode()
+ hash = hash * seed + nInputPlane.hashCode()
+ hash = hash * seed + nOutputPlane.hashCode()
+ hash = hash * seed + kernelWidth.hashCode()
+ hash = hash * seed + kernelHeight.hashCode()
+ hash = hash * seed + strideWidth.hashCode()
+ hash = hash * seed + strideHeight.hashCode()
+ hash = hash * seed + padWidth.hashCode()
+ hash = hash * seed + padWidth.hashCode()
+ hash = hash * seed + weight.hashCode()
+ hash = hash * seed + bias.hashCode()
+ hash = hash * seed + gradWeight.hashCode()
+ hash = hash * seed + gradBias.hashCode()
+
+ hash
+ }
+
+ override def toString(): String = {
+ s"""mkl.SpatialConvolution($nInputPlane -> $nOutputPlane, $kernelWidth x $kernelHeight, $strideWidth, $strideHeight, $padWidth, $padHeight)"""
+ }
+
+ override def findModel(paramOffset: Int, indexes: Array[Int]): (Module[T], Int, Array[Int]) = {
+ (this,
+ paramOffset - nOutputPlane * nInputPlane * kernelHeight * kernelWidth - nOutputPlane,
+ indexes)
+ }
+
+ // mkl-dnn's convolution_backward has done updateGradInput and accGradParameters,
+ // so accGradParameters does nothing
+ // override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = {
+ // backward(input, gradOutput)
+ // }
+
+ override def accGradParameters(input: Tensor[T],
+ gradOutput: Tensor[T],
+ scale: Double = 1.0): Unit = {}
+}
diff --git a/dl/src/test/scala/com/intel/analytics/sparkdl/nn/mkl/GoogLeNetSpec.scala b/dl/src/test/scala/com/intel/analytics/sparkdl/nn/mkl/GoogLeNetSpec.scala
new file mode 100644
index 00000000000..cc127c24ff3
--- /dev/null
+++ b/dl/src/test/scala/com/intel/analytics/sparkdl/nn/mkl/GoogLeNetSpec.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.intel.analytics.sparkdl.nn.mkl
+
+import com.intel.analytics.sparkdl.models._
+import org.scalatest.FlatSpec
+
+class GoogLeNetSpec extends FlatSpec{
+ "GoogLeNet V1 with mkl dnn" should "ends with no segment fault" in {
+ Perf.performance[Float](new Params(batchSize = 32, module = "alexnet"))
+ }
+}
diff --git a/dl/src/test/scala/com/intel/analytics/sparkdl/optim/EpochOptimizerSpec.scala b/dl/src/test/scala/com/intel/analytics/sparkdl/optim/EpochOptimizerSpec.scala
index 0284d54dff3..599fb1a0021 100644
--- a/dl/src/test/scala/com/intel/analytics/sparkdl/optim/EpochOptimizerSpec.scala
+++ b/dl/src/test/scala/com/intel/analytics/sparkdl/optim/EpochOptimizerSpec.scala
@@ -20,7 +20,7 @@ package com.intel.analytics.sparkdl.optim
import com.intel.analytics.sparkdl.nn._
import com.intel.analytics.sparkdl.ps.{AllReduceParameterManager, OneReduceParameterManager}
import com.intel.analytics.sparkdl.tensor.{Storage, Tensor}
-import com.intel.analytics.sparkdl.utils.{Engine, T}
+import com.intel.analytics.sparkdl.utils.{RandomGenerator, Engine, T}
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkContext
import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers}
@@ -38,6 +38,7 @@ class EpochOptimizerSpec extends FlatSpec with Matchers with BeforeAndAfter {
"An Artificial Neural Network with MSE and LBFGS" should "be trained with good result" in {
Logger.getLogger("org").setLevel(Level.WARN)
Logger.getLogger("akka").setLevel(Level.WARN)
+ RandomGenerator.RNG.setSeed(1000)
sc = new SparkContext("local[1]", "SerialOptimizerSpec")
@@ -98,6 +99,7 @@ class EpochOptimizerSpec extends FlatSpec with Matchers with BeforeAndAfter {
Logger.getLogger("org").setLevel(Level.WARN)
Logger.getLogger("akka").setLevel(Level.WARN)
+ RandomGenerator.RNG.setSeed(1000)
sc = new SparkContext("local[1]", "SerialOptimizerSpec")
// Prepare two kinds of input and their corresponding label
diff --git a/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java b/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java
index 42e19c689b0..4e2796a95e1 100644
--- a/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java
+++ b/mkl/jni/src/main/java/com/intel/analytics/sparkdl/mkl/MKL.java
@@ -83,4 +83,196 @@ private static File file(String path) throws IOException {
String name = new File(path).getName();
return createTempFile("jniloader", name);
}
+
+ /* Convolution API */
+ public native static long ConvolutionInitFloat(
+ int inputNumber, int inputChannel, int inputHeight, int inputWidth,
+ int kernelNumber, int kernelChannel, int kernelHeight, int kernelWidth,
+ int strideHeight, int strideWidth, int padHeight, int padWidth,
+ int dimension, int groups);
+ public native static void ConvolutionForwardFloat(
+ float[] input, int inputOffset, float[] output, int outputOffset,
+ float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr);
+ public native static void ConvolutionBackwardDataFloat(
+ float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset,
+ float[] gradInput, int gradInputOffset,
+ float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr);
+ public native static void ConvolutionBackwardKernelFloat(
+ float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset,
+ float[] gradKernel, int gradKernelOffset,
+ float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr);
+ public native static void ConvolutionBackwardBiasFloat(
+ float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset,
+ float[] gradBias, int gradBiasOffset,
+ float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr);
+
+ public native static long ConvolutionInitDouble(
+ int inputNumber, int inputChannel, int inputHeight, int inputWidth,
+ int kernelNumber, int kernelChannel, int kernelHeight, int kernelWidth,
+ int strideHeight, int strideWidth, int padHeight, int padWidth,
+ int dimension, int groups);
+ public native static void ConvolutionForwardDouble(
+ double[] input, int inputOffset, double[] output, int outputOffset,
+ double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr);
+ public native static void ConvolutionBackwardDataDouble(
+ double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset,
+ double[] gradInput, int gradInputOffset,
+ double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr);
+ public native static void ConvolutionBackwardKernelDouble(
+ double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset,
+ double[] gradKernel, int gradKernelOffset,
+ double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr);
+ public native static void ConvolutionBackwardBiasDouble(
+ double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset,
+ double[] gradBias, int gradBiasOffset,
+ double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr);
+
+ /* ReLU API */
+ public native static long ReLUInitFloat(
+ int inputNumber, int inputChannel, int inputHeight, int inputWidth, int dimension);
+ public native static void ReLUForwardFloat(
+ float[] input, int inputOffset, float[] output, int outputOffset, long classPtr);
+ public native static void ReLUBackwardFloat(
+ float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset,
+ float[] gradInput, int gradInputOffset, long classPtr);
+
+ public native static long ReLUInitDouble(
+ int inputNumber, int inputChannel, int inputHeight, int inputWidth, int dimension);
+ public native static void ReLUForwardDouble(
+ double[] input, int inputOffset, double[] output, int outputOffset, long classPtr);
+ public native static void ReLUBackwardDouble(
+ double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset,
+ double[] gradInput, int gradInputOffset, long classPtr);
+
+ /* Pooling API */
+ public native static long PoolingInitFloat(
+ int inputNumber, int inputChannel, int inputHeight, int inputWidth,
+ int kernelHeight, int kernelWidth, int strideHeight, int strideWidth,
+ int padHeight, int padWidth, int dimension, int ceilMode,
+ int algorithm);
+ public native static void PoolingForwardFloat(
+ float[] input, int inputOffset, float[] output, int outputOffset,
+ long classPtr);
+ public native static void PoolingBackwardFloat(
+ float[] input, int inputOffset, float[] outputDiff,
+ int outputDiffOffset, float[] inputDiff, int inputDiffOffset,
+ long classPtr);
+
+ public native static long PoolingInitDouble(
+ int inputNumber, int inputChannel, int inputHeight, int inputWidth,
+ int kernelHeight, int kernelWidth, int strideHeight, int strideWidth,
+ int padHeight, int padWidth, int dimension, int ceilMode,
+ int algorithm);
+ public native static void PoolingForwardDouble(
+ double[] input, int inputOffset, double[] output, int outputOffset,
+ long classPtr);
+ public native static void PoolingBackwardDouble(
+ double[] input, int inputOffset, double[] outputDiff,
+ int outputDiffOffset, double[] inputDiff, int inputDiffOffset,
+ long classPtr);
+
+ /* Batch Normalization */
+ public native static long BatchNormInitFloat(
+ int inputNumber, int inputChannel, int inputHeight, int inputWidth,
+ double eps, int useKernel, int useBias,
+ int dimension);
+ public native static void BatchNormForwardFloat(
+ float[] input, int inputOffset, float[] output, int outputOffset,
+ float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr);
+ public native static void BatchNormBackwardFloat(
+ float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset,
+ float[] gradInput, int gradInputOffset,
+ float[] kernelDiff, int kernelDiffOffset, float[] biasDiff, int biasDiffOffset, long classPtr);
+
+ public native static long BatchNormInitDouble(
+ int inputNumber, int inputChannel, int inputHeight, int inputWidth,
+ double eps, int useKernel, int useBias,
+ int dimension);
+ public native static void BatchNormForwardDouble(
+ double[] input, int inputOffset, double[] output, int outputOffset,
+ double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr);
+ public native static void BatchNormBackwardDouble(
+ double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset,
+ double[] gradInput, int gradInputOffset,
+ double[] kernelDiff, int kernelDiffOffset, double[] biasDiff, int biasDiffOffset, long classPtr);
+
+ /* LRN API*/
+ public native static long LRNInitFloat(int inputNumber, int inputChannel, int inputHeight, int inputWidth,
+ int size, float alpha, float beta, float k, int dimension);
+ public native static void LRNForwardFloat(float[] input, int inputOffset, float[] output, int outputOffset, long classPtr);
+ public native static void LRNBackwardFloat(float[] input, int inputOffset,
+ float[] outputDiff, int outputOffsetDiff,
+ float[] inputDiff, int inputDiffOffset,
+ long classPtr);
+ public native static long LRNInitDouble(int inputNumber, int inputChannel, int inputHeight, int inputWidth,
+ int size, double alpha, double beta, double k, int dimension);
+ public native static void LRNForwardDouble(double[] input, int inputOffset, double[] output, int outputOffset, long classPtr);
+ public native static void LRNBackwardDouble(double[] input, int inputOffset,
+ double[] outputDiff, int outputOffsetDiff,
+ double[] inputDiff, int inputDiffOffset,
+ long classPtr);
+
+
+ /* Init MKL Model */
+ public native static void SetPrevFloat(long prev, long current);
+ public native static void SetPrevDouble(long prev, long current);
+
+ /* Delete all memmory allocated */
+ public native static void ReleaseAllMemFloat(long classPtr);
+ public native static void ReleaseAllMemDouble(long classPtr);
+
+
+ // TODO
+ /* Linear API */
+ public native static long LinearInitFloat(
+ int inputHeight, int inputWidth, int outputChannel,
+ int kernelHeight, int kernelWidth);
+ public native static void LinearForwardFloat(
+ float[] input, int inputOffset, float[] output, int outputOffset,
+ float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr);
+ public native static void LinearBackwardDataFloat(
+ float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset,
+ float[] gradInput, int gradInputOffset,
+ float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr);
+ public native static void LinearBackwardKernelFloat(
+ float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset,
+ float[] gradKernel, int gradKernelOffset,
+ float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr);
+ public native static void LinearBackwardBiasFloat(
+ float[] input, int inputOffset, float[] gradOutput, int gradOutputOffset,
+ float[] gradBias, int gradBiasOffset,
+ float[] kernel, int kernelOffset, float[] bias, int biasOffset, long classPtr);
+
+ public native static long LinearInitDouble(
+ int inputHeight, int inputWidth, int outputChannel,
+ int kernelHeight, int kernelWidth);
+ public native static void LinearForwardDouble(
+ double[] input, int inputOffset, double[] output, int outputOffset,
+ double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr);
+ public native static void LinearBackwardDataDouble(
+ double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset,
+ double[] gradInput, int gradInputOffset,
+ double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr);
+ public native static void LinearBackwardKernelDouble(
+ double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset,
+ double[] gradKernel, int gradKernelOffset,
+ double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr);
+ public native static void LinearBackwardBiasDouble(
+ double[] input, int inputOffset, double[] gradOutput, int gradOutputOffset,
+ double[] gradBias, int gradBiasOffset,
+ double[] kernel, int kernelOffset, double[] bias, int biasOffset, long classPtr);
+
+ /* Concat API */
+ public native static long ConcatInitFloat(int numChannels, int dimension, int[] size);
+ public native static void ConcatForwardFloat(float[][] input, int[] inputOffset, float[] output, int outputOffset, long classPtr);
+ public native static void ConcatBackwardFloat(float[][] gradInput, int[] gradInputOffset, float[] output, int outputOffset, long classPtr);
+ public native static long ConcatInitDouble(int numChannels, int dimension, int[] size);
+ public native static void ConcatForwardDouble(double[][] input, int[] inputOffset, double[] output, int outputOffset, long classPtr);
+ public native static void ConcatBackwardDouble(double[][] gradInput, int[] gradInputOffset, double[] output, int outputOffset, long classPtr);
+
+ /* Sum API */
+ public native static long SumInitFloat(int numChannels, int dimension, int[] size);
+ public native static void SumForwardFloat(float[][] input, int[] inputOffset, float[] output, int outputOffset, long classPtr);
+ public native static long SumInitDouble(int numChannels, int dimension, int[] size);
+ public native static void SumForwardDouble(double[][] input, int[] inputOffset, double[] output, int outputOffset, long classPtr);
}
diff --git a/mkl/native/pom.xml b/mkl/native/pom.xml
index 3f695449888..bfe1c0bb6e5 100644
--- a/mkl/native/pom.xml
+++ b/mkl/native/pom.xml
@@ -46,7 +46,18 @@
${basedir}/src/main/c/jni
- mkl.c
+ omp_threads.cpp
+ layer.cpp
+ convolution.cpp
+ pooling.cpp
+ lrn.cpp
+ linear.cpp
+ relu.cpp
+ batch_norm.cpp
+ concat.cpp
+ sum.cpp
+ utils.cpp
+ debug.cpp
@@ -63,7 +74,11 @@
-fPIC
-fopenmp
-Wall
- -std=c99
+ -std=c++11
+
-I ${JAVA_HOME}/include/
@@ -73,6 +88,8 @@
-lpthread
-lm
-lrt
+ -lrt
+ -lmkl_rt
-shared
diff --git a/mkl/native/src/main/c/jni/.clang-format b/mkl/native/src/main/c/jni/.clang-format
new file mode 100644
index 00000000000..4c24541ff91
--- /dev/null
+++ b/mkl/native/src/main/c/jni/.clang-format
@@ -0,0 +1,90 @@
+---
+Language: Cpp
+BasedOnStyle: llvm
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlinesLeft: true
+AlignOperands: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: true
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+ AfterClass: false
+ AfterControlStatement: false
+ AfterEnum: false
+ AfterFunction: false
+ AfterNamespace: false
+ AfterObjCDeclaration: false
+ AfterStruct: false
+ AfterUnion: false
+ BeforeCatch: false
+ BeforeElse: false
+ IndentBraces: false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Linux
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: true
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
+IncludeCategories:
+ - Regex: '^<.*\.h>'
+ Priority: 1
+ - Regex: '^<.*'
+ Priority: 2
+ - Regex: '.*'
+ Priority: 3
+IndentCaseLabels: true
+IndentWidth: 2
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Left
+ReflowComments: true
+SortIncludes: true
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Auto
+TabWidth: 8
+UseTab: Never
+AlignConsecutiveAssignments: true
+AlignOperands: true
diff --git a/mkl/native/src/main/c/jni/MKLWrapper.h b/mkl/native/src/main/c/jni/MKLWrapper.h
new file mode 100644
index 00000000000..5d75ddd5385
--- /dev/null
+++ b/mkl/native/src/main/c/jni/MKLWrapper.h
@@ -0,0 +1,527 @@
+#ifndef _MKLWARPPER_H
+#define _MKLWARPPER_H
+#include
+#include
+#include
+
+template
+dnnError_t dnnGroupsConvolutionCreateForwardBias(
+ dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes,
+ dnnAlgorithm_t algorithm, size_t groups, size_t dimension,
+ const size_t srcSize[], const size_t dstSize[], const size_t filterSize[],
+ const size_t convolutionStrides[], const int inputOffset[],
+ const dnnBorder_t borderType)
+{
+ return dnnGroupsConvolutionCreateForwardBias_F32(
+ pConvolution, attributes, algorithm, groups, dimension, srcSize, dstSize,
+ filterSize, convolutionStrides, inputOffset, borderType);
+}
+template <>
+dnnError_t dnnGroupsConvolutionCreateForwardBias(
+ dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes,
+ dnnAlgorithm_t algorithm, size_t groups, size_t dimension,
+ const size_t srcSize[], const size_t dstSize[], const size_t filterSize[],
+ const size_t convolutionStrides[], const int inputOffset[],
+ const dnnBorder_t borderType)
+{
+ return dnnGroupsConvolutionCreateForwardBias_F64(
+ pConvolution, attributes, algorithm, groups, dimension, srcSize, dstSize,
+ filterSize, convolutionStrides, inputOffset, borderType);
+}
+
+template
+dnnError_t dnnGroupsConvolutionCreateBackwardData(
+ dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes,
+ dnnAlgorithm_t algorithm, size_t groups, size_t dimension,
+ const size_t srcSize[], const size_t dstSize[], const size_t filterSize[],
+ const size_t convolutionStrides[], const int inputOffset[],
+ const dnnBorder_t borderType)
+{
+ return dnnGroupsConvolutionCreateBackwardData_F32(
+ pConvolution, attributes, algorithm, groups, dimension, srcSize, dstSize,
+ filterSize, convolutionStrides, inputOffset, borderType);
+}
+template <>
+dnnError_t dnnGroupsConvolutionCreateBackwardData(
+ dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes,
+ dnnAlgorithm_t algorithm, size_t groups, size_t dimension,
+ const size_t srcSize[], const size_t dstSize[], const size_t filterSize[],
+ const size_t convolutionStrides[], const int inputOffset[],
+ const dnnBorder_t borderType)
+{
+ return dnnGroupsConvolutionCreateBackwardData_F64(
+ pConvolution, attributes, algorithm, groups, dimension, srcSize, dstSize,
+ filterSize, convolutionStrides, inputOffset, borderType);
+}
+template
+dnnError_t dnnGroupsConvolutionCreateBackwardFilter(
+ dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes,
+ dnnAlgorithm_t algorithm, size_t groups, size_t dimension,
+ const size_t srcSize[], const size_t dstSize[], const size_t filterSize[],
+ const size_t convolutionStrides[], const int inputOffset[],
+ const dnnBorder_t borderType)
+{
+ return dnnGroupsConvolutionCreateBackwardFilter_F32(
+ pConvolution, attributes, algorithm, groups, dimension, srcSize, dstSize,
+ filterSize, convolutionStrides, inputOffset, borderType);
+}
+template <>
+dnnError_t dnnGroupsConvolutionCreateBackwardFilter(
+ dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes,
+ dnnAlgorithm_t algorithm, size_t groups, size_t dimension,
+ const size_t srcSize[], const size_t dstSize[], const size_t filterSize[],
+ const size_t convolutionStrides[], const int inputOffset[],
+ const dnnBorder_t borderType)
+{
+ return dnnGroupsConvolutionCreateBackwardFilter_F64(
+ pConvolution, attributes, algorithm, groups, dimension, srcSize, dstSize,
+ filterSize, convolutionStrides, inputOffset, borderType);
+}
+template
+dnnError_t dnnGroupsConvolutionCreateBackwardBias(
+ dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes,
+ dnnAlgorithm_t algorithm, size_t groups, size_t dimension,
+ const size_t dstSize[])
+{
+ return dnnGroupsConvolutionCreateBackwardBias_F32(
+ pConvolution, attributes, algorithm, groups, dimension, dstSize);
+}
+template <>
+dnnError_t dnnGroupsConvolutionCreateBackwardBias(
+ dnnPrimitive_t *pConvolution, dnnPrimitiveAttributes_t attributes,
+ dnnAlgorithm_t algorithm, size_t groups, size_t dimension,
+ const size_t dstSize[])
+{
+ return dnnGroupsConvolutionCreateBackwardBias_F64(
+ pConvolution, attributes, algorithm, groups, dimension, dstSize);
+}
+
+template
+dnnError_t dnnExecute(dnnPrimitive_t primitive, void *resources[])
+{
+ return dnnExecute_F32(primitive, resources);
+}
+template <>
+dnnError_t dnnExecute(dnnPrimitive_t primitive, void *resources[])
+{
+ return dnnExecute_F64(primitive, resources);
+}
+
+template
+dnnError_t dnnReLUCreateForward(dnnPrimitive_t *pRelu,
+ dnnPrimitiveAttributes_t attributes,
+ const dnnLayout_t dataLayout,
+ Type negativeSlope)
+{
+ return dnnReLUCreateForward_F32(pRelu, attributes, dataLayout, negativeSlope);
+}
+template <>
+dnnError_t dnnReLUCreateForward(dnnPrimitive_t *pRelu,
+ dnnPrimitiveAttributes_t attributes,
+ const dnnLayout_t dataLayout,
+ double negativeSlope)
+{
+ return dnnReLUCreateForward_F64(pRelu, attributes, dataLayout, negativeSlope);
+}
+template
+dnnError_t dnnReLUCreateBackward(dnnPrimitive_t *pRelu,
+ dnnPrimitiveAttributes_t attributes,
+ const dnnLayout_t diffLayout,
+ const dnnLayout_t dataLayout,
+ Type negativeSlope)
+{
+ return dnnReLUCreateBackward_F32(pRelu, attributes, diffLayout, dataLayout,
+ negativeSlope);
+}
+template <>
+dnnError_t dnnReLUCreateBackward(dnnPrimitive_t *pRelu,
+ dnnPrimitiveAttributes_t attributes,
+ const dnnLayout_t diffLayout,
+ const dnnLayout_t dataLayout,
+ double negativeSlope)
+{
+ return dnnReLUCreateBackward_F64(pRelu, attributes, diffLayout, dataLayout,
+ negativeSlope);
+}
+
+template
+dnnError_t dnnLayoutCreate(dnnLayout_t *pLayout, size_t dimension,
+ const size_t size[], const size_t strides[])
+{
+ return dnnLayoutCreate_F32(pLayout, dimension, size, strides);
+}
+
+template <>
+dnnError_t dnnLayoutCreate(dnnLayout_t *pLayout, size_t dimension,
+ const size_t size[], const size_t strides[])
+{
+ return dnnLayoutCreate_F64(pLayout, dimension, size, strides);
+}
+
+template
+dnnError_t dnnPoolingCreateForward(
+ dnnPrimitive_t *pPooling, dnnPrimitiveAttributes_t attributes,
+ dnnAlgorithm_t op, const dnnLayout_t srcLayout, const size_t kernelSize[],
+ const size_t kernelStride[], const int inputOffset[],
+ const dnnBorder_t borderType)
+{
+ return dnnPoolingCreateForward_F32(pPooling, attributes, op, srcLayout,
+ kernelSize, kernelStride, inputOffset,
+ borderType);
+}
+
+template <>
+dnnError_t dnnPoolingCreateForward(
+ dnnPrimitive_t *pPooling, dnnPrimitiveAttributes_t attributes,
+ dnnAlgorithm_t op, const dnnLayout_t srcLayout, const size_t kernelSize[],
+ const size_t kernelStride[], const int inputOffset[],
+ const dnnBorder_t borderType)
+{
+ return dnnPoolingCreateForward_F64(pPooling, attributes, op, srcLayout,
+ kernelSize, kernelStride, inputOffset,
+ borderType);
+}
+
+template
+dnnError_t dnnPoolingCreateBackward(
+ dnnPrimitive_t *pPooling, dnnPrimitiveAttributes_t attributes,
+ dnnAlgorithm_t op, const dnnLayout_t srcLayout, const size_t kernelSize[],
+ const size_t kernelStride[], const int inputOffset[],
+ const dnnBorder_t borderType)
+{
+ return dnnPoolingCreateBackward_F32(pPooling, attributes, op, srcLayout,
+ kernelSize, kernelStride, inputOffset,
+ borderType);
+}
+
+template <>
+dnnError_t dnnPoolingCreateBackward(
+ dnnPrimitive_t *pPooling, dnnPrimitiveAttributes_t attributes,
+ dnnAlgorithm_t op, const dnnLayout_t srcLayout, const size_t kernelSize[],
+ const size_t kernelStride[], const int inputOffset[],
+ const dnnBorder_t borderType)
+{
+ return dnnPoolingCreateBackward_F64(pPooling, attributes, op, srcLayout,
+ kernelSize, kernelStride, inputOffset,
+ borderType);
+}
+
+template
+dnnError_t dnnLayoutCreateFromPrimitive(dnnLayout_t *pLayout,
+ const dnnPrimitive_t primitive,
+ dnnResourceType_t type)
+{
+ return dnnLayoutCreateFromPrimitive_F32(pLayout, primitive, type);
+}
+
+template <>
+dnnError_t dnnLayoutCreateFromPrimitive(dnnLayout_t *pLayout,
+ const dnnPrimitive_t primitive,
+ dnnResourceType_t type)
+{
+ return dnnLayoutCreateFromPrimitive_F64(pLayout, primitive, type);
+}
+
+template
+dnnError_t dnnDelete(dnnPrimitive_t primitive)
+{
+ return dnnDelete_F32(primitive);
+}
+
+template <>
+dnnError_t dnnDelete(dnnPrimitive_t primitive)
+{
+ return dnnDelete_F64(primitive);
+}
+
+template
+dnnError_t dnnLayoutDelete(dnnLayout_t layout)
+{
+ return dnnLayoutDelete_F32(layout);
+}
+template <>
+dnnError_t dnnLayoutDelete(dnnLayout_t layout)
+{
+ return dnnLayoutDelete_F64(layout);
+}
+
+template
+int dnnLayoutCompare(const dnnLayout_t L1, const dnnLayout_t L2)
+{
+ return dnnLayoutCompare_F32(L1, L2);
+}
+template <>
+int dnnLayoutCompare(const dnnLayout_t L1, const dnnLayout_t L2)
+{
+ return dnnLayoutCompare_F64(L1, L2);
+}
+
+template
+size_t dnnLayoutGetMemorySize(const dnnLayout_t Layout)
+{
+ return dnnLayoutGetMemorySize_F32(Layout);
+}
+template <>
+size_t dnnLayoutGetMemorySize(const dnnLayout_t Layout)
+{
+ return dnnLayoutGetMemorySize_F64(Layout);
+}
+
+template
+dnnError_t dnnAllocateBuffer(void **pPtr, dnnLayout_t layout)
+{
+ return dnnAllocateBuffer_F32(pPtr, layout);
+}
+template <>
+dnnError_t dnnAllocateBuffer(void **pPtr, dnnLayout_t layout)
+{
+ return dnnAllocateBuffer_F64(pPtr, layout);
+}
+
+template
+dnnError_t dnnConversionCreate(dnnPrimitive_t *pConversion,
+ const dnnLayout_t from, const dnnLayout_t to)
+{
+ return dnnConversionCreate_F32(pConversion, from, to);
+}
+template <>
+dnnError_t dnnConversionCreate(dnnPrimitive_t *pConversion,
+ const dnnLayout_t from,
+ const dnnLayout_t to)
+{
+ return dnnConversionCreate_F64(pConversion, from, to);
+}
+
+template
+dnnError_t dnnReleaseBuffer(void *pPtr)
+{
+ return dnnReleaseBuffer_F32(pPtr);
+}
+template <>
+dnnError_t dnnReleaseBuffer(void *pPtr)
+{
+ return dnnReleaseBuffer_F64(pPtr);
+}
+
+template
+dnnError_t dnnBatchNormalizationCreateForward(
+ dnnPrimitive_t *pBatchNormalization, dnnPrimitiveAttributes_t attributes,
+ const dnnLayout_t dataLayout, float eps)
+{
+ return dnnBatchNormalizationCreateForward_F32(pBatchNormalization, attributes,
+ dataLayout, eps);
+}
+
+template <>
+dnnError_t dnnBatchNormalizationCreateForward(
+ dnnPrimitive_t *pBatchNormalization, dnnPrimitiveAttributes_t attributes,
+ const dnnLayout_t dataLayout, float eps)
+{
+ return dnnBatchNormalizationCreateForward_F64(pBatchNormalization, attributes,
+ dataLayout, eps);
+}
+
+template
+dnnError_t dnnBatchNormalizationCreateBackwardScaleShift(
+ dnnPrimitive_t *pBatchNormalization, dnnPrimitiveAttributes_t attributes,
+ const dnnLayout_t dataLayout, float eps)
+{
+ return dnnBatchNormalizationCreateBackwardScaleShift_F32(
+ pBatchNormalization, attributes, dataLayout, eps);
+}
+
+template <>
+dnnError_t dnnBatchNormalizationCreateBackwardScaleShift(
+ dnnPrimitive_t *pBatchNormalization, dnnPrimitiveAttributes_t attributes,
+ const dnnLayout_t dataLayout, float eps)
+{
+ return dnnBatchNormalizationCreateBackwardScaleShift_F64(
+ pBatchNormalization, attributes, dataLayout, eps);
+}
+
+template
+dnnError_t dnnBatchNormalizationCreateBackwardData(
+ dnnPrimitive_t *pBatchNormalization, dnnPrimitiveAttributes_t attributes,
+ const dnnLayout_t dataLayout, float eps)
+{
+ return dnnBatchNormalizationCreateBackwardData_F32(
+ pBatchNormalization, attributes, dataLayout, eps);
+}
+
+template <>
+dnnError_t dnnBatchNormalizationCreateBackwardData(
+ dnnPrimitive_t *pBatchNormalization, dnnPrimitiveAttributes_t attributes,
+ const dnnLayout_t dataLayout, float eps)
+{
+ return dnnBatchNormalizationCreateBackwardData_F64(
+ pBatchNormalization, attributes, dataLayout, eps);
+}
+
+template
+dnnError_t dnnLRNCreateForward(dnnPrimitive_t *pLrn,
+ dnnPrimitiveAttributes_t attributes,
+ const dnnLayout_t dataLayout, size_t kernelSie,
+ float alpha, float beta, float k)
+{
+ return dnnLRNCreateForward_F32(pLrn, attributes, dataLayout, kernelSie, alpha,
+ beta, k);
+}
+
+template <>
+dnnError_t dnnLRNCreateForward(dnnPrimitive_t *pLrn,
+ dnnPrimitiveAttributes_t attributes,
+ const dnnLayout_t dataLayout,
+ size_t kernelSie, float alpha,
+ float beta, float k)
+{
+ return dnnLRNCreateForward_F64(pLrn, attributes, dataLayout, kernelSie, alpha,
+ beta, k);
+}
+
+template
+dnnError_t dnnLRNCreateBackward(dnnPrimitive_t *pLrn,
+ dnnPrimitiveAttributes_t attributes,
+ const dnnLayout_t diffLayout,
+ const dnnLayout_t dataLayout, size_t kernelSize,
+ float alpha, float beta, float k)
+{
+ return dnnLRNCreateBackward_F32(pLrn, attributes, diffLayout, dataLayout,
+ kernelSize, alpha, beta, k);
+}
+
+template <>
+dnnError_t dnnLRNCreateBackward(dnnPrimitive_t *pLrn,
+ dnnPrimitiveAttributes_t attributes,
+ const dnnLayout_t diffLayout,
+ const dnnLayout_t dataLayout,
+ size_t kernelSize, float alpha,
+ float beta, float k)
+{
+ return dnnLRNCreateBackward_F64(pLrn, attributes, diffLayout, dataLayout,
+ kernelSize, alpha, beta, k);
+}
+
+template
+dnnError_t dnnInnerProductCreateForwardBias(dnnPrimitive_t *pInnerProduct,
+ dnnPrimitiveAttributes_t attributes,
+ size_t dimentions,
+ const size_t srcSize[],
+ size_t outputChannels)
+{
+ return dnnInnerProductCreateForwardBias_F32(
+ pInnerProduct, attributes, dimentions, srcSize, outputChannels);
+}
+template <>
+dnnError_t dnnInnerProductCreateForwardBias(
+ dnnPrimitive_t *pInnerProduct, dnnPrimitiveAttributes_t attributes,
+ size_t dimentions, const size_t srcSize[], size_t outputChannels)
+{
+ return dnnInnerProductCreateForwardBias_F64(
+ pInnerProduct, attributes, dimentions, srcSize, outputChannels);
+}
+
+template
+dnnError_t dnnInnerProductCreateBackwardData(
+ dnnPrimitive_t *pInnerProduct, dnnPrimitiveAttributes_t attributes,
+ size_t dimentions, const size_t srcSize[], size_t outputChannels)
+{
+ return dnnInnerProductCreateBackwardData_F32(
+ pInnerProduct, attributes, dimentions, srcSize, outputChannels);
+}
+template <>
+dnnError_t dnnInnerProductCreateBackwardData(
+ dnnPrimitive_t *pInnerProduct, dnnPrimitiveAttributes_t attributes,
+ size_t dimentions, const size_t srcSize[], size_t outputChannels)
+{
+ return dnnInnerProductCreateBackwardData_F64(
+ pInnerProduct, attributes, dimentions, srcSize, outputChannels);
+}
+template
+dnnError_t dnnInnerProductCreateBackwardFilter(
+ dnnPrimitive_t *pInnerProduct, dnnPrimitiveAttributes_t attributes,
+ size_t dimentions, const size_t srcSize[], size_t outputChannels)
+{
+ return dnnInnerProductCreateBackwardFilter_F32(
+ pInnerProduct, attributes, dimentions, srcSize, outputChannels);
+}
+template <>
+dnnError_t dnnInnerProductCreateBackwardFilter(
+ dnnPrimitive_t *pInnerProduct, dnnPrimitiveAttributes_t attributes,
+ size_t dimentions, const size_t srcSize[], size_t outputChannels)
+{
+ return dnnInnerProductCreateBackwardFilter_F64(
+ pInnerProduct, attributes, dimentions, srcSize, outputChannels);
+}
+template
+dnnError_t dnnInnerProductCreateBackwardBias(
+ dnnPrimitive_t *pInnerProduct, dnnPrimitiveAttributes_t attributes,
+ size_t dimentions, const size_t dstSize[])
+{
+ return dnnInnerProductCreateBackwardBias_F32(pInnerProduct, attributes,
+ dimentions, dstSize);
+}
+template <>
+dnnError_t dnnInnerProductCreateBackwardBias(
+ dnnPrimitive_t *pInnerProduct, dnnPrimitiveAttributes_t attributes,
+ size_t dimentions, const size_t dstSize[])
+{
+ return dnnInnerProductCreateBackwardBias_F64(pInnerProduct, attributes,
+ dimentions, dstSize);
+}
+
+template
+dnnError_t dnnConcatCreate(dnnPrimitive_t *pConcat,
+ dnnPrimitiveAttributes_t attributes,
+ size_t nSrcTensors, dnnLayout_t *src)
+{
+ return dnnConcatCreate_F32(pConcat, attributes, nSrcTensors, src);
+}
+
+template <>
+dnnError_t dnnConcatCreate(dnnPrimitive_t *pConcat,
+ dnnPrimitiveAttributes_t attributes,
+ size_t nSrcTensors, dnnLayout_t *src)
+{
+ return dnnConcatCreate_F64(pConcat, attributes, nSrcTensors, src);
+}
+
+template
+dnnError_t dnnSplitCreate(dnnPrimitive_t *pSplit,
+ dnnPrimitiveAttributes_t attributes,
+ const size_t nDstTensors, dnnLayout_t layout,
+ size_t dstChannelSize[])
+{
+
+ return dnnSplitCreate_F32(pSplit, attributes, nDstTensors, layout,
+ dstChannelSize);
+}
+
+template <>
+dnnError_t dnnSplitCreate(dnnPrimitive_t *pSplit,
+ dnnPrimitiveAttributes_t attributes,
+ const size_t nDstTensors, dnnLayout_t layout,
+ size_t dstChannelSize[])
+{
+
+ return dnnSplitCreate_F64(pSplit, attributes, nDstTensors, layout,
+ dstChannelSize);
+}
+
+template
+dnnError_t dnnSumCreate(
+ dnnPrimitive_t *pSum,
+ dnnPrimitiveAttributes_t attributes, const size_t nSummands,
+ dnnLayout_t layout, Type *coefficients)
+{
+ return dnnSumCreate_F32(pSum, attributes, nSummands, layout, coefficients);
+}
+
+template <>
+dnnError_t dnnSumCreate(
+ dnnPrimitive_t *pSum,
+ dnnPrimitiveAttributes_t attributes, const size_t nSummands,
+ dnnLayout_t layout, double *coefficients)
+{
+ return dnnSumCreate_F64(pSum, attributes, nSummands, layout, coefficients);
+}
+#endif
diff --git a/mkl/native/src/main/c/jni/batch_norm.cpp b/mkl/native/src/main/c/jni/batch_norm.cpp
new file mode 100644
index 00000000000..c648e5c5ef1
--- /dev/null
+++ b/mkl/native/src/main/c/jni/batch_norm.cpp
@@ -0,0 +1,428 @@
+#include
+
+#include "debug.h"
+#include "layer.h"
+#include "memory.h"
+#include "utils.h"
+
+template
+class MKLBatchNorm : public MKLLayer
+{
+ public:
+ MKLBatchNorm();
+ ~MKLBatchNorm();
+
+ void init(size_t inputNumber, size_t inputChannel, size_t inputHeight,
+ size_t inputWidth, double eps, int useKernel, int useBias,
+ int dimension);
+
+ void updateOutput(DType *input, DType *output);
+ void updateGradInput(DType *input, DType *gradOutput, DType *gradInput);
+
+ void setKernel(DType *ptr);
+ void setBias(DType *ptr);
+ void setGradKernel(DType *ptr);
+ void setGradBias(DType *ptr);
+
+ private:
+ // this method is not the same as createMklLayout in MKLMemory
+ void firstPass();
+ void preExecute(DType *input);
+
+ std::shared_ptr> scaleShift;
+ std::shared_ptr> workspace;
+
+ size_t inputSize[4];
+ size_t inputStrides[4];
+
+ size_t outputSize[4];
+ size_t outputStrides[4];
+
+ double eps;
+ bool useKernel;
+ bool useBias;
+
+ DType *kernel;
+ DType *bias;
+ DType *gradKernel;
+ DType *gradBias;
+
+ dnnPrimitive_t scaleShiftPrim;
+};
+
+template
+MKLBatchNorm::MKLBatchNorm()
+ : scaleShift(new MKLData),
+ workspace(new MKLData),
+ kernel(NULL),
+ bias(NULL),
+ gradKernel(NULL),
+ gradBias(NULL),
+ scaleShiftPrim(NULL)
+{
+ eps = 0.00001;
+}
+
+template
+MKLBatchNorm::~MKLBatchNorm()
+{
+ dnnDelete(scaleShiftPrim);
+}
+
+template
+void MKLBatchNorm::setKernel(DType *ptr)
+{
+ kernel = ptr;
+}
+template
+void MKLBatchNorm::setBias(DType *ptr)
+{
+ bias = ptr;
+}
+template
+void MKLBatchNorm::setGradKernel(DType *ptr)
+{
+ gradKernel = ptr;
+}
+template
+void MKLBatchNorm::setGradBias(DType *ptr)
+{
+ gradBias = ptr;
+}
+
+template
+void MKLBatchNorm::init(size_t inputNumber, size_t inputChannel,
+ size_t inputHeight, size_t inputWidth,
+ double eps, int useKernel, int useBias,
+ int dimension)
+{
+ this->dimension = dimension;
+
+ inputSize[0] = inputWidth;
+ inputSize[1] = inputHeight;
+ inputSize[2] = inputChannel;
+ inputSize[3] = inputNumber;
+
+ inputStrides[0] = 1;
+ for (int i = 1; i < 4; i++)
+ inputStrides[i] = inputStrides[i - 1] * inputSize[i - 1];
+
+ // the output channel is as same as the number of kernel.
+ // and the output number must be as same as the number of input too.
+ outputSize[0] = inputWidth;
+ outputSize[1] = inputHeight;
+ outputSize[2] = inputChannel;
+ outputSize[3] = inputNumber;
+
+ outputStrides[0] = 1;
+ for (int i = 1; i < 4; i++)
+ outputStrides[i] = outputStrides[i - 1] * outputSize[i - 1];
+
+ this->eps = eps;
+ this->useKernel = useKernel > 0 ? true : false;
+ this->useBias = useBias > 0 ? true : false;
+
+ // create usr layout
+ this->input->createUsrLayout(dimension, inputSize, inputStrides);
+ this->output->createUsrLayout(dimension, outputSize, outputStrides);
+
+ this->gradInput->createUsrLayout(dimension, inputSize, inputStrides);
+ this->gradOutput->createUsrLayout(dimension, outputSize, outputStrides);
+}
+
+template
+void MKLBatchNorm::firstPass()
+{
+ dnnError_t status = E_UNIMPLEMENTED;
+ dnnLayout_t layout;
+
+ status =
+ dnnLayoutCreate(&layout, this->dimension, inputSize, inputStrides);
+ CHECK_EQ(status, E_SUCCESS);
+
+ // forward
+ status = dnnBatchNormalizationCreateForward(&(this->forwardPrim), NULL,
+ layout, eps);
+ CHECK_EQ(status, E_SUCCESS);
+
+ this->input->createMklLayout(this->forwardPrim, dnnResourceSrc);
+ this->output->createMklLayout(this->forwardPrim, dnnResourceDst);
+
+ // backward data
+ status = dnnBatchNormalizationCreateBackwardData(&(this->backwardPrim),
+ NULL, layout, eps);
+ CHECK_EQ(status, E_SUCCESS);
+
+ this->gradOutput->createMklLayout(this->backwardPrim, dnnResourceDiffDst);
+ this->gradInput->createMklLayout(this->backwardPrim, dnnResourceDiffSrc);
+
+ // scaleshift
+ this->scaleShift->createMklLayout(this->forwardPrim, dnnResourceScaleShift);
+ this->scaleShift->createConversion(true);
+ if (useKernel) {
+ status = dnnBatchNormalizationCreateBackwardScaleShift(
+ &scaleShiftPrim, NULL, layout, eps);
+ CHECK_EQ(status, E_SUCCESS);
+ }
+
+ // workspace
+ this->workspace->createMklLayout(this->forwardPrim, dnnResourceWorkspace);
+ this->workspace->createConversion(true);
+
+ // we create the layout only at the first time
+ this->isFirstPass = false;
+
+ // delte the layout
+ dnnLayoutDelete(layout);
+}
+
+template
+void MKLBatchNorm::preExecute(DType *input)
+{
+ this->input->createConversion();
+}
+
+template
+void MKLBatchNorm::updateOutput(DType *input, DType *output)
+{
+ if (this->isFirstPass) firstPass();
+
+ // Because the address will change every time, so we need create conversion
+ // every forward/backward.
+ // TODO Should we set the kernel and bias address every time?
+ preExecute(input);
+ this->output->createConversion();
+
+ DType *ptr = reinterpret_cast(scaleShift->getData());
+
+ // pad the scale shift with kernel and bias
+ if (useKernel) {
+ for (int i = 0; i < inputSize[2]; i++) {
+ ptr[i] = kernel[i];
+ if (useBias)
+ ptr[i + inputSize[2]] = bias[i];
+ else
+ ptr[i + inputSize[2]] = 0;
+ }
+ } else {
+ for (int i = 0; i < inputSize[2]; i++) {
+ ptr[i] = 1.0;
+ ptr[i + inputSize[2]] = 0;
+ }
+ }
+#ifdef DEBUG
+ printData(reinterpret_cast(this->input->getUsrData()),
+ this->inputSize[3], this->inputSize[2], this->inputSize[1],
+ this->inputSize[0], "Forward input");
+#endif
+
+ dnnError_t status;
+ void *resources[dnnResourceNumber];
+
+ resources[dnnResourceSrc] = this->input->getConvertedData();
+ resources[dnnResourceDst] = this->output->getData();
+ resources[dnnResourceScaleShift] = scaleShift->getData();
+ resources[dnnResourceWorkspace] = workspace->getData();
+
+ PERFSTART();
+ status = dnnExecute(this->forwardPrim, resources);
+ PERFEND("main computing");
+ CHECK_EQ(status, E_SUCCESS);
+
+#ifdef DEBUG
+ printData(reinterpret_cast(this->output->getData()),
+ outputSize[3], outputSize[2], outputSize[1], outputSize[0],
+ "Forward output");
+#endif
+
+ if (!this->output->isUseNext()) {
+ this->output->backToUsr();
+ }
+}
+
+template
+void MKLBatchNorm::updateGradInput(DType *input, DType *gradOutput,
+ DType *gradInput)
+{
+ dnnError_t status;
+ void *resources[dnnResourceNumber];
+
+ preExecute(input);
+
+ this->gradOutput->createConversion();
+ this->gradInput->createConversion();
+
+ resources[dnnResourceDiffDst] = this->gradOutput->getConvertedData();
+ resources[dnnResourceDiffSrc] = this->gradInput->getData();
+ resources[dnnResourceSrc] = this->input->getConvertedData();
+ resources[dnnResourceScaleShift] = scaleShift->getData();
+ resources[dnnResourceWorkspace] = workspace->getData();
+
+ // 4. main computing parts.
+ PERFSTART();
+ status = dnnExecute(this->backwardPrim, resources);
+ CHECK_EQ(status, E_SUCCESS);
+ PERFEND("main computing");
+
+ if (useKernel) {
+ void *diffRes[dnnResourceNumber];
+ diffRes[dnnResourceDiffDst] = this->gradOutput->getConvertedData();
+ diffRes[dnnResourceSrc] = this->input->getConvertedData();
+ diffRes[dnnResourceDiffScaleShift] = scaleShift->getData();
+ diffRes[dnnResourceWorkspace] = workspace->getData();
+
+ PERFSTART();
+ status = dnnExecute(scaleShiftPrim, diffRes);
+ CHECK_EQ(status, E_SUCCESS);
+ PERFEND("weight and bias diff main computing");
+
+ DType *ptr = reinterpret_cast(scaleShift->getData());
+ for (int i = 0; i < inputSize[2]; i++) {
+ gradKernel[i] = ptr[i];
+ if (useBias) {
+ gradBias[i] = ptr[i + inputSize[2]];
+ }
+ }
+ }
+
+ if (!this->gradInput->isUsePrev()) {
+ this->gradInput->backToUsr();
+ }
+
+#ifdef DEBUG
+ printData(reinterpret_cast(this->gradInput->getUsrData()),
+ inputSize[3], inputSize[2], inputSize[1], inputSize[0],
+ "backward gradient input");
+#endif
+}
+
+template
+jlong JNIBatchNormInit(JNIEnv *env, jclass thisClass, jint inputNumber,
+ jint inputChannel, jint inputHeight, jint inputWidth,
+ double eps, jint useKernel, jint useBias, jint dimension)
+{
+ MKLBatchNorm *ptr = new MKLBatchNorm();
+ ptr->init(inputNumber, inputChannel, inputHeight, inputWidth, eps, useKernel,
+ useBias, dimension);
+
+ return reinterpret_cast(ptr);
+}
+
+template
+void JNIBatchNormUpdateOutput(JNIEnv *env, jclass thisClass, ArrayType input,
+ jint inputOffset, ArrayType output,
+ jint outputOffset, ArrayType kernel,
+ jint kernelOffset, ArrayType bias,
+ jint biasOffset, long classPtr)
+{
+ MKLBatchNorm *ptr = reinterpret_cast *>(classPtr);
+
+ std::shared_ptr> jInput(
+ new ZipArray(env, input, inputOffset, ptr->input));
+
+ std::shared_ptr> jOutput(
+ new ZipArray(env, output, outputOffset, ptr->output));
+
+ std::shared_ptr> jKernel(
+ new ZipArray(env, kernel, kernelOffset, NULL));
+
+ std::shared_ptr> jBias(
+ new ZipArray(env, bias, biasOffset, NULL));
+
+ ptr->setKernel(jKernel->getPtr());
+ ptr->setBias(jBias->getPtr());
+
+ ptr->updateOutput(jInput->getPtr(), jOutput->getPtr());
+}
+
+template
+void JNIBatchNormUpdateGradInput(JNIEnv *env, jclass thisClass, ArrayType input,
+ jint inputOffset, ArrayType outputDiff,
+ jint outputDiffOffset, ArrayType inputDiff,
+ jint inputDiffOffset, ArrayType kernelDiff,
+ jint kernelDiffOffset, ArrayType biasDiff,
+ jint biasDiffOffset, long classPtr)
+{
+ MKLBatchNorm *ptr = reinterpret_cast *>(classPtr);
+ std::shared_ptr> jInput(
+ new ZipArray(env, input, inputOffset, ptr->input));
+
+ std::shared_ptr> jOutputDiff(
+ new ZipArray(env, outputDiff, outputDiffOffset,
+ ptr->gradOutput));
+
+ std::shared_ptr> jInputDiff(
+ new ZipArray(env, inputDiff, inputDiffOffset,
+ ptr->gradInput));
+
+ std::shared_ptr> jKernelDiff(
+ new ZipArray(env, kernelDiff, kernelDiffOffset, NULL));
+
+ std::shared_ptr> jBiasDiff(
+ new ZipArray(env, biasDiff, biasDiffOffset, NULL));
+
+ ptr->setGradKernel(jKernelDiff->getPtr());
+ ptr->setGradBias(jBiasDiff->getPtr());
+
+ ptr->updateGradInput(jInput->getPtr(), jOutputDiff->getPtr(),
+ jInputDiff->getPtr());
+}
+
+// Macro
+#define BatchNormInit(DType, JType, JArrayType) \
+ JNIEXPORT \
+ jlong JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_BatchNormInit##DType( \
+ JNIEnv *env, jclass thisClass, jint inputNumber, jint inputChannel, \
+ jint inputHeight, jint inputWidth, jdouble eps, jint useKernel, \
+ jint useBias, jint dimension) \
+ { \
+ return JNIBatchNormInit( \
+ env, thisClass, inputNumber, inputChannel, inputHeight, inputWidth, \
+ eps, useKernel, useBias, dimension); \
+ }
+
+#define BatchNormForward(DType, JType, JArrayType) \
+ JNIEXPORT \
+ void JNICALL \
+ Java_com_intel_analytics_sparkdl_mkl_MKL_BatchNormForward##DType( \
+ JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \
+ JArrayType output, jint outputOffset, JArrayType kernel, \
+ jint kernelOffset, JArrayType bias, jint biasOffset, long classPtr) \
+ { \
+ JNIBatchNormUpdateOutput( \
+ env, thisClass, input, inputOffset, output, outputOffset, kernel, \
+ kernelOffset, bias, biasOffset, classPtr); \
+ }
+
+#define BatchNormBackward(DType, JType, JArrayType) \
+ JNIEXPORT \
+ void JNICALL \
+ Java_com_intel_analytics_sparkdl_mkl_MKL_BatchNormBackward##DType( \
+ JNIEnv *env, jclass thisClass, JArrayType input, jint inputOffset, \
+ JArrayType outputDiff, jint outputDiffOffset, JArrayType inputDiff, \
+ jint inputDiffOffset, JArrayType kernelDiff, jint kernelDiffOffset, \
+ JArrayType biasDiff, jint biasDiffOffset, long classPtr) \
+ { \
+ JNIBatchNormUpdateGradInput( \
+ env, thisClass, input, inputOffset, outputDiff, outputDiffOffset, \
+ inputDiff, inputDiffOffset, kernelDiff, kernelDiffOffset, biasDiff, \
+ biasDiffOffset, classPtr); \
+ }
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// double
+BatchNormInit(Double, jdouble, jdoubleArray);
+BatchNormForward(Double, jdouble, jdoubleArray);
+BatchNormBackward(Double, jdouble, jdoubleArray);
+
+// float
+BatchNormInit(Float, jfloat, jfloatArray);
+BatchNormForward(Float, jfloat, jfloatArray);
+BatchNormBackward(Float, jfloat, jfloatArray);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/mkl/native/src/main/c/jni/concat.cpp b/mkl/native/src/main/c/jni/concat.cpp
new file mode 100644
index 00000000000..f3b8fb557f6
--- /dev/null
+++ b/mkl/native/src/main/c/jni/concat.cpp
@@ -0,0 +1,331 @@
+#include
+#include
+
+#include "debug.h"
+#include "layer.h"
+#include "memory.h"
+#include "utils.h"
+
+using namespace std;
+
+template
+class MKLConcat : public MKLLayer
+{
+ public:
+ MKLConcat();
+ ~MKLConcat();
+
+ void init(int numConcats, int dimension, int *size);
+
+ void updateOutput(DType **input, DType *output);
+ void updateGradInput(DType **gradInput, DType *gradOutput);
+
+ // attention, we will override the four variables of MKLLayer
+ vector>> input;
+ vector>> gradInput;
+
+ private:
+ // this method is not the same as createMklLayout in MKLMemory
+ void firstPass();
+ void preExecute(DType *input);
+
+ int numConcats; // number of concats
+ size_t *numSplits;
+};
+
+template
+MKLConcat::MKLConcat() : numSplits(NULL), numConcats(0)
+{
+ // TODO
+}
+
+template
+MKLConcat::~MKLConcat()
+{
+ // TODO
+ delete[] numSplits;
+}
+
+template
+void MKLConcat::init(int numConcats, int dimension, int *size)
+{
+ this->numConcats = numConcats;
+ this->dimension = dimension;
+ this->numSplits = new size_t[numConcats];
+
+ size_t inputSize[dimension];
+ size_t inputStrides[dimension];
+ size_t outputSize[dimension];
+ size_t outputStrides[dimension];
+
+ int offset = 0;
+ size_t channels = 0;
+
+ for (int i = 0; i < numConcats; i++) {
+ input.push_back(shared_ptr>(new MKLData));
+ gradInput.push_back(shared_ptr>(new MKLData));
+
+ // set the size.
+ // the size of every channel should be gaved in size.
+ // the dimension of every channel should be the same.
+ inputStrides[0] = 1;
+ inputSize[0] = size[offset];
+ for (int j = 1; j < dimension; j++) {
+ inputSize[j] = size[offset + j];
+ inputStrides[j] = inputStrides[j - 1] * inputSize[j - 1];
+ }
+ offset += dimension;
+
+ // we must be sure that inputSize[2] is channels, or it will be 1
+ // if dimension == 2, which means there are only height and width. -> height
+ // if dimension > 2, which means there is channel in the tensor, -> channel
+ numSplits[i] = dimension <= 2 ? inputSize[1] : inputSize[2];
+ channels += numSplits[i];
+
+ this->input[i]->createUsrLayout(dimension, inputSize, inputStrides);
+ this->gradInput[i]->createUsrLayout(dimension, inputSize, inputStrides);
+ }
+
+ // the output size should be equal to the first input size, besides channel
+ // the channel of output (outputSize[2]) should be the sum of all
+ // input channels.
+ // the number of output is only 1
+ outputStrides[0] = 1;
+ outputSize[0] = inputSize[0];
+ for (int i = 1; i < dimension; i++) {
+ if (i == 2)
+ outputSize[i] = channels;
+ else
+ outputSize[i] = inputSize[i];
+ outputStrides[i] = outputStrides[i - 1] * outputSize[i - 1];
+ }
+
+ this->output->createUsrLayout(dimension, outputSize, outputStrides);
+ this->gradOutput->createUsrLayout(dimension, outputSize, outputStrides);
+}
+
+template
+void MKLConcat::firstPass()
+{
+ dnnLayout_t *layouts = new dnnLayout_t[numConcats];
+
+ for (int i = 0; i < numConcats; i++) {
+ layouts[i] = this->input[i]->getUsrLayout();
+ }
+
+ dnnError_t status = E_UNIMPLEMENTED;
+ status =
+ dnnConcatCreate(&(this->forwardPrim), NULL, numConcats, layouts);
+ CHECK_EQ(status, E_SUCCESS);
+
+ this->output->createMklLayout(this->forwardPrim, dnnResourceDst);
+ this->gradOutput->createMklLayout(this->forwardPrim, dnnResourceDst);
+
+ // backward
+ status = dnnSplitCreate(&(this->backwardPrim), NULL, numConcats,
+ this->gradOutput->getMklLayout(), numSplits);
+ CHECK_EQ(status, E_SUCCESS);
+
+ for (int i = 0; i < numConcats; i++) {
+ this->input[i]->createMklLayout(
+ this->forwardPrim, (dnnResourceType_t)(dnnResourceMultipleSrc + i));
+
+ // TODO comes from caffe, it's different with others (DiffSrc/DiffDst)
+ this->gradInput[i]->createMklLayout(
+ this->backwardPrim, (dnnResourceType_t)(dnnResourceMultipleDst + i));
+ }
+
+ delete[] layouts;
+
+ this->isFirstPass = false;
+}
+
+template
+void MKLConcat::updateOutput(DType **input, DType *output)
+{
+ if (this->isFirstPass) firstPass();
+
+ for (int i = 0; i < numConcats; i++) {
+ this->input[i]->setUsrData(input[i]);
+ this->input[i]->createConversion();
+ }
+ this->output->setUsrData(output);
+ this->output->createConversion();
+
+ dnnError_t status;
+ void *resources[dnnResourceNumber];
+
+ for (int i = 0; i < numConcats; i++) {
+ resources[dnnResourceMultipleSrc + i] = this->input[i]->getConvertedData();
+ }
+ resources[dnnResourceDst] = this->output->getData();
+
+ PERFSTART();
+ status = dnnExecute(this->forwardPrim, resources);
+ PERFEND("main computing");
+
+ if (!this->output->isUseNext()) this->output->backToUsr();
+}
+
+template
+void MKLConcat::updateGradInput(DType **gradInput, DType *gradOutput)
+{
+ for (int i = 0; i < numConcats; i++) {
+ this->gradInput[i]->setUsrData(gradInput[i]);
+ this->gradInput[i]->createConversion();
+ }
+ this->gradOutput->setUsrData(gradOutput);
+ this->gradOutput->createConversion();
+
+ dnnError_t status;
+ void *resources[dnnResourceNumber];
+
+ for (int i = 0; i < numConcats; i++) {
+ resources[dnnResourceMultipleDst + i] = this->gradInput[i]->getData();
+ }
+ resources[dnnResourceSrc] = this->gradOutput->getConvertedData();
+
+ PERFSTART();
+ status = dnnExecute(this->backwardPrim, resources);
+ PERFEND("main computing");
+
+ for (int i = 0; i < numConcats; i++) {
+ if (!this->gradInput[i]->isUsePrev()) this->gradInput[i]->backToUsr();
+ }
+}
+
+template
+jlong JNIConcatInit(JNIEnv *env, jclass thisClass, int numConcats,
+ int dimension, jintArray size)
+{
+ MKLConcat *ptr = new MKLConcat();
+
+ jint *jSize =
+ reinterpret_cast(env->GetPrimitiveArrayCritical(size, 0));
+ ptr->init(numConcats, dimension, jSize);
+ env->ReleasePrimitiveArrayCritical(size, jSize, 0);
+
+ return reinterpret_cast(ptr);
+}
+
+template
+void JNIConcatUpdateOutput(JNIEnv *env, jclass thisClass, jobjectArray input,
+ jintArray inputOffset, ArrayType output,
+ jint outputOffset, long classPtr)
+{
+ MKLConcat *ptr = reinterpret_cast *>(classPtr);
+
+ jint *jInputOffset =
+ reinterpret_cast(env->GetPrimitiveArrayCritical(inputOffset, 0));
+
+ // TODO we should re-write, this version makes a little complict.
+ int len = env->GetArrayLength(input);
+ DType *inputArrStart[len];
+ DType *inputArr[len];
+ ArrayType jInputArr[len];
+ for (int i = 0; i < len; i++) {
+ jInputArr[i] = (ArrayType)(env->GetObjectArrayElement(input, i));
+ inputArrStart[i] = reinterpret_cast(
+ env->GetPrimitiveArrayCritical(jInputArr[i], 0));
+ inputArr[i] = inputArrStart[i] + jInputOffset[i];
+ }
+
+ std::shared_ptr> jOutput(
+ new ZipArray(env, output, outputOffset, ptr->output));
+
+ ptr->updateOutput(inputArr, jOutput->getPtr());
+
+ for (int i = 0; i < len; i++) {
+ env->ReleasePrimitiveArrayCritical(jInputArr[i], inputArrStart[i], 0);
+ }
+
+ env->ReleasePrimitiveArrayCritical(inputOffset, jInputOffset, 0);
+}
+
+template
+void JNIConcatUpdateGradInput(JNIEnv *env, jclass thisClass,
+ jobjectArray inputDiff, jintArray inputDiffOffset,
+ ArrayType outputDiff, jint outputDiffOffset,
+ long classPtr)
+{
+ MKLConcat *ptr = reinterpret_cast *>(classPtr);
+
+ jint *jInputDiffOffset = reinterpret_cast(
+ env->GetPrimitiveArrayCritical(inputDiffOffset, 0));
+
+ int len = env->GetArrayLength(inputDiff);
+ DType *inputDiffArrStart[len];
+ DType *inputDiffArr[len];
+ ArrayType jInputDiffArr[len];
+ for (int i = 0; i < len; i++) {
+ jInputDiffArr[i] = (ArrayType)(env->GetObjectArrayElement(inputDiff, i));
+ inputDiffArrStart[i] = reinterpret_cast(
+ env->GetPrimitiveArrayCritical(jInputDiffArr[i], 0));
+ inputDiffArr[i] = inputDiffArrStart[i] + jInputDiffOffset[i];
+ }
+
+ std::shared_ptr> jOutputDiff(
+ new ZipArray(env, outputDiff, outputDiffOffset,
+ ptr->gradOutput));
+
+ ptr->updateGradInput(inputDiffArr, jOutputDiff->getPtr());
+
+ for (int i = 0; i < len; i++) {
+ env->ReleasePrimitiveArrayCritical(jInputDiffArr[i], inputDiffArrStart[i],
+ 0);
+ }
+
+ env->ReleasePrimitiveArrayCritical(inputDiffOffset, jInputDiffOffset, 0);
+}
+
+// Macro
+#define ConcatInit(DType, JType, JArrayType) \
+ JNIEXPORT \
+ jlong JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_ConcatInit##DType( \
+ JNIEnv *env, jclass thisClass, jint numConcats, jint dimension, \
+ jintArray size) \
+ { \
+ return JNIConcatInit(env, thisClass, numConcats, \
+ dimension, size); \
+ }
+
+#define ConcatForward(DType, JType, JArrayType) \
+ JNIEXPORT \
+ void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_ConcatForward##DType( \
+ JNIEnv *env, jclass thisClass, jobjectArray input, \
+ jintArray inputOffset, JArrayType output, jint outputOffset, \
+ long classPtr) \
+ { \
+ JNIConcatUpdateOutput( \
+ env, thisClass, input, inputOffset, output, outputOffset, classPtr); \
+ }
+
+#define ConcatBackward(DType, JType, JArrayType) \
+ JNIEXPORT \
+ void JNICALL Java_com_intel_analytics_sparkdl_mkl_MKL_ConcatBackward##DType( \
+ JNIEnv *env, jclass thisClass, jobjectArray inputDiff, \
+ jintArray inputDiffOffset, JArrayType outputDiff, jint outputDiffOffset, \
+ long classPtr) \
+ { \
+ JNIConcatUpdateGradInput(env, thisClass, inputDiff, \
+ inputDiffOffset, outputDiff, \
+ outputDiffOffset, classPtr); \
+ }
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Double
+ConcatInit(Double, jdouble, jdoubleArray);
+ConcatForward(Double, jdouble, jdoubleArray);
+ConcatBackward(Double, jdouble, jdoubleArray);
+
+// Float
+ConcatInit(Float, jfloat, jfloatArray);
+ConcatForward(Float, jfloat, jfloatArray);
+ConcatBackward(Float, jfloat, jfloatArray);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/mkl/native/src/main/c/jni/convolution.cpp b/mkl/native/src/main/c/jni/convolution.cpp
new file mode 100644
index 00000000000..36c821ba7aa
--- /dev/null
+++ b/mkl/native/src/main/c/jni/convolution.cpp
@@ -0,0 +1,580 @@
+#include
+
+#include "debug.h"
+#include "layer.h"
+#include "memory.h"
+#include "utils.h"
+
+static int getMKLBuildDate()
+{
+ static int build = 0;
+ if (build == 0) {
+ MKLVersion v;
+ mkl_get_version(&v);
+ build = atoi(v.Build);
+ }
+ return build;
+}
+
+template
+class MKLConvolution : public MKLLayer
+{
+ public:
+ MKLConvolution();
+ ~MKLConvolution();
+
+ void init(size_t inputNumber, size_t inputChannel, size_t inputHeight,
+ size_t inputWidth, size_t kernelNumber, size_t kernelChannel,
+ size_t kernelHeight, size_t kernelWidth, size_t strideHeight,
+ size_t strideWidth, int padHeight, int padWidth, int dimension,
+ int groups);
+
+ void updateOutput(DType *input, DType *output);
+ void updateGradInput(DType *input, DType *gradOutput, DType *gradInput);
+ void updateGradKernel(DType *input, DType *gradOutput, DType *gradKernel);
+ void updateGradBias(DType *input, DType *gradOutput, DType *gradBias);
+
+ std::shared_ptr> kernel;
+ std::shared_ptr> bias;
+
+ std::shared_ptr> gradKernel;
+ std::shared_ptr> gradBias;
+
+ private:
+ // this method is not the same as createMklLayout in MKLMemory
+ void firstPass();
+ void preExecute(DType *input);
+
+ DType *kernelAdr;
+ DType *biasAdr;
+
+ dnnPrimitive_t kernelPrim, biasPrim;
+
+ size_t groups;
+
+ size_t inputSize[4];
+ size_t inputStrides[4];
+
+ size_t outputSize[4];
+ size_t outputStrides[4];
+
+ size_t kernelDimension;
+ size_t kernelSize[5];
+ size_t kernelStrides[5];
+
+ size_t biasSize[1];
+ size_t biasStrides[1];
+
+ size_t stride[2];
+ int pad[2];
+};
+
+template
+MKLConvolution::MKLConvolution()
+ : kernel(new MKLData),
+ bias(new MKLData),
+ gradKernel(new MKLData),
+ gradBias(new MKLData),
+ kernelAdr(NULL),
+ biasAdr(NULL),
+ kernelPrim(NULL),
+ biasPrim(NULL)
+{
+}
+
+template
+MKLConvolution::~MKLConvolution()
+{
+ dnnDelete(kernelPrim);
+ dnnDelete(biasPrim);
+}
+
+template
+void MKLConvolution::init(size_t inputNumber, size_t inputChannel,
+ size_t inputHeight, size_t inputWidth,
+ size_t kernelNumber, size_t kernelChannel,
+ size_t kernelHeight, size_t kernelWidth,
+ size_t strideHeight, size_t strideWidth,
+ int padHeight, int padWidth, int dimension,
+ int groups)
+{
+ this->dimension = dimension;
+ this->groups = groups;
+
+ inputSize[0] = inputWidth;
+ inputSize[1] = inputHeight;
+ inputSize[2] = inputChannel;
+ inputSize[3] = inputNumber;
+
+ inputStrides[0] = 1;
+ for (int i = 1; i < 4; i++)
+ inputStrides[i] = inputStrides[i - 1] * inputSize[i - 1];
+
+ size_t outputWidth =
+ computeOut(inputWidth, padWidth, kernelWidth, strideWidth, false);
+ size_t outputHeight =
+ computeOut(inputHeight, padHeight, kernelHeight, strideHeight, false);
+
+ // the output channel is as same as the number of kernel.
+ // and the output number must be as same as the number of input too.
+ outputSize[0] = outputWidth;
+ outputSize[1] = outputHeight;
+ outputSize[2] = kernelNumber;
+ outputSize[3] = inputNumber;
+
+ outputStrides[0] = 1;
+ for (int i = 1; i < 4; i++)
+ outputStrides[i] = outputStrides[i - 1] * outputSize[i - 1];
+
+ // comes from IntelCaffe.
+ size_t groupsMKL = groups;
+ kernelDimension = this->dimension + (groups != 1);
+ if (getMKLBuildDate() < 20160701) {
+ kernelDimension = this->dimension;
+ groupsMKL = 1;
+ }
+
+ kernelSize[0] = kernelWidth;
+ kernelSize[1] = kernelHeight;
+ kernelSize[2] = kernelChannel / groups;
+ kernelSize[3] = kernelNumber / groupsMKL;
+ kernelSize[4] = groupsMKL;
+
+ kernelStrides[0] = 1;
+ for (int i = 1; i < 5; i++)
+ kernelStrides[i] = kernelStrides[i - 1] * kernelSize[i - 1];
+
+ biasSize[0] = kernelNumber;
+ biasStrides[0] = 1;
+
+ stride[0] = strideWidth;
+ stride[1] = strideHeight;
+
+ pad[0] = -padWidth;
+ pad[1] = -padHeight;
+
+ // create usr layout
+ this->input->createUsrLayout(dimension, inputSize, inputStrides);
+ this->output->createUsrLayout(dimension, outputSize, outputStrides);
+ this->kernel->createUsrLayout(kernelDimension, kernelSize, kernelStrides);
+ this->bias->createUsrLayout(1, biasSize, biasStrides);
+
+ this->gradInput->createUsrLayout(dimension, inputSize, inputStrides);
+ this->gradOutput->createUsrLayout(dimension, outputSize, outputStrides);
+ this->gradKernel->createUsrLayout(kernelDimension, kernelSize, kernelStrides);
+ // bias dimension is 1
+ this->gradBias->createUsrLayout(1, biasSize, biasStrides);
+}
+
+template
+void MKLConvolution::firstPass()
+{
+ dnnError_t status = E_UNIMPLEMENTED;
+ // forward
+ status = dnnGroupsConvolutionCreateForwardBias(
+ &(this->forwardPrim), NULL, dnnAlgorithmConvolutionDirect, groups,
+ this->dimension, inputSize, outputSize, kernelSize, stride, pad,
+ dnnBorderZeros);
+ CHECK_EQ(status, E_SUCCESS);
+
+ this->input->createMklLayout(this->forwardPrim, dnnResourceSrc);
+ this->output->createMklLayout(this->forwardPrim, dnnResourceDst);
+ this->kernel->createMklLayout(this->forwardPrim, dnnResourceFilter);
+ this->bias->createMklLayout(this->forwardPrim, dnnResourceBias);
+
+ // backward data
+ status = dnnGroupsConvolutionCreateBackwardData(
+ &(this->backwardPrim), NULL, dnnAlgorithmConvolutionDirect, groups,
+ this->dimension, inputSize, outputSize, kernelSize, stride, pad,
+ dnnBorderZeros);
+ CHECK_EQ(status, E_SUCCESS);
+
+ this->gradOutput->createMklLayout(this->backwardPrim, dnnResourceDiffDst);
+ this->gradInput->createMklLayout(this->backwardPrim, dnnResourceDiffSrc);
+
+ // backward kernel
+ status = dnnGroupsConvolutionCreateBackwardFilter(
+ &kernelPrim, NULL, dnnAlgorithmConvolutionDirect, groups, this->dimension,
+ inputSize, outputSize, kernelSize, stride, pad, dnnBorderZeros);
+ CHECK_EQ(status, E_SUCCESS);
+
+ this->gradKernel->createMklLayout(this->kernelPrim, dnnResourceDiffFilter);
+
+ // backward bias
+ status = dnnGroupsConvolutionCreateBackwardBias(
+ &biasPrim, NULL, dnnAlgorithmConvolutionDirect, groups, this->dimension,
+ outputSize);
+ CHECK_EQ(status, E_SUCCESS);
+
+ this->gradBias->createMklLayout(this->biasPrim, dnnResourceDiffBias);
+
+ // we create the layout only at the first time
+ this->isFirstPass = false;
+}
+
+template
+void MKLConvolution::preExecute(DType *input)
+{
+ this->input->createConversion();
+ this->kernel->createConversion();
+ this->bias->createConversion();
+}
+
+template
+void MKLConvolution::updateOutput(DType *input, DType *output)
+{
+ if (this->isFirstPass) firstPass();
+
+ // Because the address will change every time, so we need create conversion
+ // every forward/backward.
+ // TODO Should we set the kernel and bias address every time?
+ preExecute(input);
+ this->output->createConversion();
+
+#ifdef DEBUG
+ printData(reinterpret_cast(this->input->getUsrData()),
+ this->inputSize[3], this->inputSize[2], this->inputSize[1],
+ this->inputSize[0], "Forward input");
+#endif
+
+ dnnError_t status;
+ void *resources[dnnResourceNumber];
+
+ resources[dnnResourceFilter] = this->kernel->getConvertedData();
+ resources[dnnResourceBias] = this->bias->getConvertedData();
+ resources[dnnResourceSrc] = this->input->getConvertedData();
+ resources[dnnResourceDst] = this->output->getData();
+
+ PERFSTART();
+ status = dnnExecute(this->forwardPrim, resources);
+ PERFEND("main computing");
+ CHECK_EQ(status, E_SUCCESS);
+
+#ifdef DEBUG
+ printData(reinterpret_cast(this->output->getData()),
+ outputSize[3], outputSize[2], outputSize[1], outputSize[0],
+ "Forward output");
+#endif
+
+ if (!this->output->isUseNext()) {
+ this->output->backToUsr();
+ }
+}
+
+template
+void MKLConvolution::updateGradInput(DType *input, DType *gradOutput,
+ DType *gradInput)
+{
+ dnnError_t status;
+ void *resources[dnnResourceNumber];
+
+ preExecute(input);
+
+ this->gradOutput->createConversion();
+ this->gradInput->createConversion();
+
+ resources[dnnResourceDiffDst] = this->gradOutput->getConvertedData();
+ resources[dnnResourceFilter] = this->kernel->getConvertedData();
+ resources[dnnResourceDiffSrc] = this->gradInput->getData();
+
+ // 4. main computing parts.
+ PERFSTART();
+ status = dnnExecute(this->backwardPrim, resources);
+ CHECK_EQ(status, E_SUCCESS);
+ PERFEND("main computing");
+
+ if (!this->gradInput->isUsePrev()) {
+ this->gradInput->backToUsr();
+ }
+
+#ifdef DEBUG
+ printData(reinterpret_cast(this->gradInput->getUsrData()),
+ inputSize[3], inputSize[2], inputSize[1], inputSize[0],
+ "backward gradient input");
+#endif
+}
+template
+void MKLConvolution::updateGradKernel(DType *input, DType *gradOutput,
+ DType *gradKernel)
+{
+ dnnError_t status;
+ void *resources[dnnResourceNumber];
+
+ preExecute(input);
+
+ this->gradOutput->createConversion();
+ this->gradKernel->createConversion();
+
+ resources[dnnResourceDiffDst] = this->gradOutput->getConvertedData();
+ resources[dnnResourceSrc] = this->input->getConvertedData();
+ resources[dnnResourceDiffFilter] = this->gradKernel->getData();
+
+ // 4. main computing parts.
+ PERFSTART();
+ status = dnnExecute(this->kernelPrim, resources);
+ CHECK_EQ(status, E_SUCCESS);
+ PERFEND("main computing");
+
+ // the kernel need not re-use for previous layer
+ this->gradKernel->backToUsr();
+}
+
+template
+void MKLConvolution::updateGradBias(DType *input, DType *gradOutput,
+ DType *gradBias)
+{
+ dnnError_t status;
+ void *resources[dnnResourceNumber];
+
+ preExecute(input);
+
+ this->gradOutput->createConversion();
+ this->gradBias->createConversion();
+
+ resources[dnnResourceDiffDst] = this->gradOutput->getConvertedData();
+ resources[dnnResourceDiffBias] = this->gradBias->getData();
+
+ // 4. main computing parts.
+ PERFSTART();
+ status = dnnExecute(this->biasPrim, resources);
+ CHECK_EQ(status, E_SUCCESS);
+ PERFEND("main computing");
+
+ this->gradBias->backToUsr();
+}
+
+template
+jlong JNIConvolutionInit(JNIEnv *env, jclass thisClass, jint inputNumber,
+ jint inputChannel, jint inputHeight, jint inputWidth,
+ jint kernelNumber, jint kernelChannel,
+ jint kernelHeight, jint kernelWidth, jint strideHeight,
+ jint strideWidth, jint padHeight, jint padWidth,
+ jint dimension, jint groups)
+{
+ MKLConvolution *conv = new MKLConvolution();
+ conv->init(inputNumber, inputChannel, inputHeight, inputWidth, kernelNumber,
+ kernelChannel, kernelHeight, kernelWidth, strideHeight,
+ strideWidth, padHeight, padWidth, dimension, groups);
+
+ return reinterpret_cast(conv);
+}
+
+template
+void JNIConvolutionUpdateOutput(JNIEnv *env, jclass thisClass, ArrayType input,
+ jint inputOffset, ArrayType output,
+ jint outputOffset, ArrayType kernel,
+ jint kernelOffset, ArrayType bias,
+ jint biasOffset, long classPtr)
+{
+ MKLConvolution *ptr =
+ reinterpret_cast *>(classPtr);
+
+ std::shared_ptr> jInput(
+ new ZipArray(env, input, inputOffset, ptr->input));
+
+ std::shared_ptr> jOutput(
+ new ZipArray(env, output, outputOffset, ptr->output));
+
+ std::shared_ptr> jKernel(
+ new ZipArray(env, kernel, kernelOffset, ptr->kernel));
+
+ std::shared_ptr> jBias(
+ new ZipArray(env, bias, biasOffset, ptr->bias));
+
+ ptr->updateOutput(jInput->getPtr(), jOutput->getPtr());
+}
+
+template
+void JNIConvolutionUpdateGradInput(JNIEnv *env, jclass thisClass,
+ ArrayType input, jint inputOffset,
+ ArrayType outputDiff, jint outputDiffOffset,
+ ArrayType inputDiff, jint inputDiffOffset,
+ ArrayType kernel, jint kernelOffset,
+ ArrayType bias, jint biasOffset,
+ long classPtr)
+{
+ MKLConvolution *ptr =
+ reinterpret_cast *>(classPtr);
+ std::shared_ptr> jInput(
+ new ZipArray