intel · i8run · Sep 25, 2016 · Sep 26, 2016 · Sep 27, 2016 · Sep 27, 2016
diff --git a/.gitignore b/.gitignore
@@ -20,3 +20,5 @@ project/plugins/project/
 
 # other
 *.txt
+*.csv
+*.swp # vim swap file
diff --git a/README.md b/README.md
@@ -1 +1,21 @@
-Deep learning library for Apache Spark
+#BigDL
+
+A scalable deep learning library for Apache Spark.
+
+Here's the summary of core features:
+* a powerful N-dimensional array
+* lots of math and data manipulating operations
+* rich neural network layers
+* effecient distributed numeric optimization routines on Apache Spark
+* powered by MKL and MKL DNN, fast and optmized on Intel hardware platforms
+
+##How to build
+###Linux
+1. Download [Intel MKL](https://software.intel.com/en-us/intel-mkl) and install it in your linux box
+2. Prepare MKL build environment<br>  <code>source PATH_TO_MKL/bin/mklvars.sh &#60;arch&#62;</code><br>  The **&#60;arch&#62;** can be *ia32*, *intel64*, or *mic*, which depends on your system.
+3. Build project<br>  <code>mvn clean package -DskipTests -P mkl</code>
+
+##Example
+* MNIST example
+* Cifar10 example
+* Imagenet example
diff --git a/dl/pom.xml b/dl/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <artifactId>sparkdl-parent_0.1</artifactId>
         <groupId>com.intel.analytics.sparkdl</groupId>
-        <version>0.1.0-SNAPSHOT</version>
+        <version>0.1.0-dnn-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
@@ -19,6 +19,11 @@
     </properties>
 
     <dependencies>
+        <dependency>
+            <groupId>com.twelvemonkeys.imageio</groupId>
+            <artifactId>imageio-jpeg</artifactId>
+            <version>3.2.1</version>
+        </dependency>
         <dependency>
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-client</artifactId>
@@ -32,7 +37,7 @@
             <scope>compile</scope>
         </dependency>
         <dependency>
-            <groupId>com.intel.analytics.dllib.mkl</groupId>
+            <groupId>com.intel.analytics.sparkdl.mkl</groupId>
             <artifactId>mkl-java_0.1</artifactId>
             <version>${project.version}</version>
         </dependency>

diff --git a/dl/scalastyle_config.xml b/dl/scalastyle_config.xml
@@ -183,7 +183,7 @@ You can also disable only one rule, by specifying its rule id, as specified in:
 
     <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
 
-    <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="true"></check>
+    <check customId="noSpaceBeforeLeftBracket" level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="true"></check>
     <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="true"></check>
 
     <check customId="methodName" level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="true">

diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/dataset/Cifar.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/dataset/Cifar.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.intel.analytics.sparkdl.dataset
+
+import java.nio.file.{Files, Path, Paths}
+
+import com.intel.analytics.sparkdl.models.cifar.VggLike
+import com.intel.analytics.sparkdl.nn.ClassNLLCriterion
+import com.intel.analytics.sparkdl.optim.SGD.EpochStep
+import com.intel.analytics.sparkdl.optim.{LocalOptimizer, SGD, Top1Accuracy, Trigger}
+import com.intel.analytics.sparkdl.utils.T
+import scopt.OptionParser
+
+object Cifar10Local {
+  case class Cifar10LocalParam(
+    folder: String = "./",
+    net: String = "vgg"
+  )
+
+  private val parser = new OptionParser[Cifar10LocalParam]("Spark-DL Cifar10 Local Example") {
+    head("Spark-DL Cifar10 Local Example")
+    opt[String]('f', "folder")
+      .text("where you put the Cifar10 data")
+      .action((x, c) => c.copy(folder = x))
+  }
+
+  def main(args: Array[String]) {
+    parser.parse(args, new Cifar10LocalParam()).map(param => {
+      val trainDataSource = new CifarDataSource(Paths.get(param.folder + "/train"), looped = true)
+      val validationDataSource = new CifarDataSource(Paths.get(param.folder + "/val"),
+        looped = false)
+      val arrayToImage = ArrayByteToRGBImage()
+      val normalizer = RGBImageNormalizer(trainDataSource -> arrayToImage)
+      val toTensor = new RGBImageToTensor(batchSize = 128)
+
+      val optimizer = new LocalOptimizer[Float](
+        data = trainDataSource -> arrayToImage -> normalizer -> toTensor,
+        validationData = validationDataSource -> arrayToImage -> normalizer -> toTensor,
+        model = VggLike[Float](classNum = 10),
+        criterion = new ClassNLLCriterion[Float](),
+        optimMethod = new SGD[Float](),
+        state = T(
+          "learningRate" -> 0.01,
+          "weightDecay" -> 0.0005,
+          "momentum" -> 0.9,
+          "dampening" -> 0.0,
+          "learningRateSchedule" -> EpochStep(25, 0.5)
+        ),
+        endWhen = Trigger.maxEpoch(90)
+      )
+      optimizer.setValidationTrigger(Trigger.everyEpoch)
+      optimizer.addValidation(new Top1Accuracy[Float])
+
+      optimizer.optimize()
+    })
+  }
+}
diff --git a/dl/src/main/scala/com/intel/analytics/sparkdl/dataset/ConvertSeq.scala b/dl/src/main/scala/com/intel/analytics/sparkdl/dataset/ConvertSeq.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.intel.analytics.sparkdl.dataset
+
+import java.io.IOException
+import java.nio.ByteBuffer
+import java.nio.file.Paths
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.io.{SequenceFile, Text}
+import scopt.OptionParser
+
+object ConvertSeq {
+
+  case class ConvertSeqParams(
+    folder: String = "./",
+    outputSeq: String = "./",
+    parallel: Int = 1,
+    buffer : Int = 256,
+    dataSetType: String = "ImageNet"
+  )
+
+  private val parser = new OptionParser[ConvertSeqParams]("Spark-DL Convert Seq") {
+    head("Convert Image Files to Hadoop Sequential Files")
+    opt[String]('f', "folder")
+      .text("where you put the dataset")
+      .action((x, c) => c.copy(folder = x))
+    opt[String]('o', "outputSeq")
+      .text("outputSeq folder")
+      .action((x, c) => c.copy(outputSeq = x))
+    opt[Int]('p', "parallel")
+      .text("parallel num")
+      .action((x, c) => c.copy(parallel = x))
+    opt[Int]('b', "buffer")
+      .text("buffer size")
+      .action((x, c) => c.copy(buffer = x))
+    opt[String]('d', "dataSetType")
+      .text("dataset type")
+      .action((x, c) => c.copy(dataSetType = x))
+  }
+
+  def main(args: Array[String]): Unit = {
+    parser.parse(args, new ConvertSeqParams()).map(param => {
+      param.dataSetType match {
+        case "ImageNet" =>
+          val dataSource = new ImageNetDataSource(Paths.get(param.folder), looped = false)
+          val pathToImage = PathToRGBImage(256)
+          val worker = new Worker(dataSource -> pathToImage, param.parallel)
+          worker.process(param.outputSeq)
+        case "Cifar-10" =>
+          val dataSource = new CifarDataSource(Paths.get(param.folder), looped = false)
+          val arrayToImage = ArrayByteToRGBImage()
+          val worker = new Worker(dataSource -> arrayToImage, param.parallel)
+          worker.process(param.outputSeq)
+        case _ => throw new UnsupportedOperationException(s"Only ImageNet/Cifar-10 supported")
+      }
+    })
+  }
+}
+
+class Worker(dataSet: DataSource[RGBImage], parallel: Int) {
+
+  def process(target: String): Unit = {
+    var i = 0
+    var file = s"${target}-seq"
+    val writer = new Writer(file)
+    while(dataSet.hasNext) {
+      val data = dataSet.next()
+      val imageKey = s"${data.label()}-${i}"
+      println(s"write ${imageKey}")
+      writer.write(imageKey, RGBImage.convertToByte(data.content, data.width(), data.height()),
+        data.width(), data.height())
+      i += 1
+    }
+    writer.close()
+  }
+}
+
+class Writer @throws[IOException]
+(val seqFilePath: String) {
+  private val conf: Configuration = new Configuration
+  val path = new Path(seqFilePath)
+  val writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(path),
+    SequenceFile.Writer.keyClass(classOf[Text]), SequenceFile.Writer.valueClass(classOf[Text]))
+  var preBuffer: ByteBuffer = ByteBuffer.allocate(4 * 2)
+
+  @throws[Exception]
+  def write(imageKey: String, img: Array[Byte], width: Int, height: Int) {
+    preBuffer.putInt(width)
+    preBuffer.putInt(height)
+    val data: Array[Byte] = new Array[Byte](preBuffer.capacity + img.length)
+    System.arraycopy(preBuffer.array, 0, data, 0, preBuffer.capacity)
+    System.arraycopy(img, 0, data, preBuffer.capacity, img.length)
+    preBuffer.clear
+    writer.append(new Text(imageKey), new Text(data))
+  }
+
+  def close() {
+    try {
+      writer.close()
+    } catch {
+      case e: IOException =>
+        e.printStackTrace()
+    }
+  }
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -20,3 +20,5 @@ project/plugins/project/ @@
     # other
     *.txt
+    *.csv
+    *.swp # vim swap file