# nd4j1

This workbook is intended to provide a the "MVP" of a jupyter enabled notebook for [deeplearning4j](https://deeplearning4j.org). Its goal is to provide a working example of a DL4J network, and then to use [wisp](https://github.com/quantifind/wisp) to display the results of the network classification output.

This example is based on the classic classicication problem using [iris flower data](https://en.wikipedia.org/wiki/Iris_flower_data_set). 

In [1]:
classpath.addPath("/opt/app/libs/datavec-api-0.5.0.jar")
classpath.addPath("/opt/app/libs/annotations-2.0.1.jar")                
classpath.addPath("/opt/app/libs/commons-io-2.4.jar")
classpath.addPath("/opt/app/libs/nd4j-native-0.5.0-linux-x86_64.jar")
classpath.addPath("/opt/app/libs/commons-lang3-3.3.1.jar")
classpath.addPath("/opt/app/libs/commons-math3-3.4.1.jar")
classpath.addPath("/opt/app/libs/guava-18.0.jar")
classpath.addPath("/opt/app/libs/nd4j-native-0.5.0.jar")
classpath.addPath("/opt/app/libs/javacpp-1.2.3.jar")
classpath.addPath("/opt/app/libs/nd4j-native-api-0.5.0.jar")
classpath.addPath("/opt/app/libs/javassist-3.18.2-GA.jar")
classpath.addPath("/opt/app/libs/nd4j-native-platform-0.5.0.jar")
classpath.addPath("/opt/app/libs/lombok-1.16.4.jar")
classpath.addPath("/opt/app/libs/reflections-0.9.10.jar")
classpath.addPath("/opt/app/libs/nd4j-api-0.5.0.jar")
classpath.addPath("/opt/app/libs/scala-library-2.11.1.jar")
classpath.addPath("/opt/app/libs/nd4j-buffer-0.5.0.jar")
classpath.addPath("/opt/app/libs/slf4j-api-1.7.21.jar")
classpath.addPath("/opt/app/libs/nd4j-common-0.5.0.jar")
classpath.addPath("/opt/app/libs/slf4j-simple-1.7.21.jar")
classpath.addPath("/opt/app/libs/nd4j-context-0.5.0.jar")
classpath.addPath("/opt/app/libs/deeplearning4j-core-0.5.0.jar")
classpath.addPath("/opt/app/libs/canova-api-0.0.0.17.jar")
classpath.addPath("/opt/app/libs/hadoop-mapreduce-client-core-2.2.0.jar")
classpath.addPath("/opt/app/libs/datavec-nd4j-common-0.5.0.jar")
classpath.addPath("/opt/app/libs/reflections-0.9.10.jar")
classpath.addPath("/opt/app/libs/jackson-dataformat-yaml-2.4.4.jar")
classpath.addPath("/opt/app/libs/guava-18.0.jar")
classpath.addPath("/opt/app/libs/jackson-core-2.6.5.jar")
classpath.addPath("/opt/app/libs/jackson-annotations-2.6.5.jar")
classpath.addPath("/opt/app/libs/jackson-databind-2.6.5.jar")
classpath.addPath("/opt/app/libs/jackson-module-scala_2.10-2.6.5.jar")
classpath.addPath("/opt/app/libs/json4s-jackson_2.10-3.2.11.jar")
classpath.addPath("/opt/app/libs/commons-lang-2.5.jar")



In [2]:
classpath.add("com.quantifind" %% "wisp" % "0.0.4")

Adding 27 artifact(s)




In [3]:
import java.text.{DecimalFormat, DecimalFormatSymbols}

import org.apache.commons.lang.StringUtils
import org.nd4j.linalg.api.complex.IComplexNDArray
import org.nd4j.linalg.api.ndarray.INDArray

/**
  * Created by claytongraham on 10/29/16.
  *
  * we have to do this because the dl4j implementation does not let the user change the format, this
  * can be an issue when you want to use the serialization model for other things like making a 
  * guava table from the string representation. What would be really awesome is if someone were 
  * smart enough to provide better data ingress and egress for nd4j. If this gets released then 
  * remove this and use nd4j
  * 
  * https://github.com/deeplearning4j/nd4j/blob/master/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/string/NDArrayStrings.java
  */
class NDArrayStrings {
  private var sep: String = ","
  private var padding: Int = 0
  private var decFormatNum: String = "#,###,##0"
  private var decFormatRest: String = ""
  private var decimalFormat: DecimalFormat = new DecimalFormat(decFormatNum + decFormatRest)

  def this(sep: String, precisionI: Int, decFormat: String) {
    this()
    this.decFormatNum = decFormat
    this.sep = sep
    var precision: Int = precisionI

    if (precision != 0) {
      this.decFormatRest = "."
      while (precision > 0) {
        this.decFormatRest += "0"
        precision -= 1
      }
    }
    this.decimalFormat = new DecimalFormat(decFormatNum + decFormatRest)
    val sepNgroup: DecimalFormatSymbols = DecimalFormatSymbols.getInstance
    sepNgroup.setDecimalSeparator('.')
    sepNgroup.setGroupingSeparator(',')
    decimalFormat.setDecimalFormatSymbols(sepNgroup)
  }

  /**
    * Format the given ndarray as a string
    *
    * @param arr the array to format
    * @return the formatted array
    */
  def format(arr: INDArray): String = {
    val padding: String = decimalFormat.format(arr.maxNumber)
    this.padding = padding.length
    format(arr, arr.rank)
  }

  private def format(arr: INDArray, rank: Int): String = format(arr, arr.rank, 0)

  private def format(arr: INDArray, rank: Int, offsetI: Int): String = {
    val sb: StringBuilder = new StringBuilder
    var offset: Int = offsetI
    if (arr.isScalar) {
      if (arr.isInstanceOf[IComplexNDArray]) return arr.asInstanceOf[IComplexNDArray].getComplex(0).toString
      decimalFormat.format(arr.getDouble(0))
    }
    else if (rank <= 0) ""
    else if (arr.isVector) {
      sb.append("[")
      var i: Int = 0
      while (i < arr.length) {
        {
          if (arr.isInstanceOf[IComplexNDArray]) sb.append(arr.asInstanceOf[IComplexNDArray].getComplex(i).toString)
          else sb.append(String.format("%1$" + padding + "s", decimalFormat.format(arr.getDouble(i))))
          if (i < arr.length - 1) sb.append(sep)
        }
        {
          i += 1; i - 1
        }
      }
      sb.append("]")
      sb.toString
    }
    else {
      offset = offset + 1
      sb.append("[")
      var i: Int = 0
      while (i < arr.slices) {
        {
          sb.append(format(arr.slice(i), rank - 1, offset))
          if (i != arr.slices - 1) {
            sb.append(",\n")
            sb.append(StringUtils.repeat("\n", rank - 2))
            sb.append(StringUtils.repeat(" ", offset))
          }
        }
        {
          i += 1; i - 1
        }
      }
      sb.append("]")
      sb.toString
    }
  }
}

[32mimport [36mjava.text.{DecimalFormat, DecimalFormatSymbols}[0m
[32mimport [36morg.apache.commons.lang.StringUtils[0m
[32mimport [36morg.nd4j.linalg.api.complex.IComplexNDArray[0m
[32mimport [36morg.nd4j.linalg.api.ndarray.INDArray[0m
defined [32mclass [36mNDArrayStrings[0m

In [4]:
import java.io.{File, IOException}
import org.apache.commons.io.FileUtils
import java.net.URL
import org.nd4j.linalg.factory.Nd4j
import org.datavec.api.records.reader.RecordReader
import org.datavec.api.records.reader.impl.csv.CSVRecordReader
import org.datavec.api.split.FileSplit
import org.datavec.api.util.ClassPathResource
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator
import org.deeplearning4j.eval.Evaluation
import org.deeplearning4j.nn.conf.MultiLayerConfiguration
import org.deeplearning4j.nn.conf.NeuralNetConfiguration
import org.deeplearning4j.nn.conf.layers.DenseLayer
import org.deeplearning4j.nn.conf.layers.OutputLayer
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork
import org.deeplearning4j.nn.weights.WeightInit
import org.deeplearning4j.optimize.listeners.ScoreIterationListener
import org.nd4j.linalg.api.ndarray.INDArray
import org.nd4j.linalg.dataset.DataSet
import org.nd4j.linalg.dataset.SplitTestAndTrain
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator
import org.nd4j.linalg.dataset.api.preprocessor.DataNormalization
import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize
import org.nd4j.linalg.lossfunctions.LossFunctions
import org.slf4j.Logger
import org.slf4j.LoggerFactory
import org.reflections.Reflections
import org.reflections.scanners.SubTypesScanner
import java.util
import org.datavec.common.data.NDArrayWritable
import com.fasterxml.jackson.core.JsonParseException
import com.fasterxml.jackson.databind.ObjectMapper
import com.google.common.collect.{HashBasedTable, Table}

/**
  * Created by claytongraham on 10/29/16.
  *
  * this is our network executor, it is a classifier that tries to figure out
  * what type of iris we have for each based on the 5 attributes
  * 
  */
class DeepLearning4JMultiLayerNetwork(val filePath: String) {

  var dataFile: String = filePath

  var networkOutput: Table[Integer, Integer, Double] = null
  
  def execute() {
    println("Hello, deeplearning4j!")

    //First: get the dataset using the record reader. CSVRecordReader handles loading/parsing
    val numLinesToSkip: Int = 0
    val delimiter: String = ","
    val recordReader: RecordReader = new CSVRecordReader(numLinesToSkip, delimiter)
    recordReader.initialize(new FileSplit(new File(dataFile)))

    //Second: the RecordReaderDataSetIterator handles conversion to DataSet objects, ready for use in neural network
    val labelIndex: Int = 4 //5 values in each row of the iris.txt CSV: 4 input features followed by an integer label (class) index. Labels are the 5th value (index 4) in each row
    val numClasses: Int = 3 //3 classes (types of iris flowers) in the iris data set. Classes have integer values 0, 1 or 2
    val batchSize: Int = 150 //Iris data set: 150 examples total. We are loading all of them into one DataSet (not recommended for large data sets)

    val iterator: DataSetIterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, numClasses)

    val allData: DataSet = iterator.next
    allData.shuffle()
    val testAndTrain: SplitTestAndTrain = allData.splitTestAndTrain(0.65) //Use 65% of data for training
    val trainingData: DataSet = testAndTrain.getTrain
    val testData: DataSet = testAndTrain.getTest

    //We need to normalize our data. We'll use NormalizeStandardize (which gives us mean 0, unit variance):
    val normalizer: DataNormalization = new NormalizerStandardize
    normalizer.fit(trainingData) //Collect the statistics (mean/stdev) from the training data. This does not modify the input data
    normalizer.transform(trainingData) //Apply normalization to the training data
    normalizer.transform(testData) //Apply normalization to the test data. This is using statistics calculated from the *training* set
    val numInputs: Int = 4
    val outputNum: Int = 3
    val iterations: Int = 1000
    val seed: Long = 6

    println("Build model....")
    val conf: MultiLayerConfiguration =
      new NeuralNetConfiguration.Builder().seed(seed)
        .iterations(iterations).activation("tanh")
        .weightInit(WeightInit.XAVIER).learningRate(0.1)
        .regularization(true).l2(1e-4).list
        .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(3).build)
        .layer(1, new DenseLayer.Builder().nIn(3).nOut(3).build)
        .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
          .activation("softmax")
          .nIn(3).nOut(outputNum).build).backprop(true).pretrain(false).build

    //run the model
    val model: MultiLayerNetwork = new MultiLayerNetwork(conf)
    model.init()
    model.setListeners(new ScoreIterationListener(100))
    model.fit(trainingData)

    //evaluate the model on the test set
    val eval: Evaluation = new Evaluation(3)
    val output: INDArray = model.output(testData.getFeatureMatrix)
    eval.eval(testData.getLabels, output)
    println(eval.stats)
    
    networkOutput = makeTableFromArray(output,3)
    
  }


  @throws[IOException]
  def makeRowsFromNDArray(source: INDArray, precision: Int): util.List[_] = {
    val mapper: ObjectMapper = new ObjectMapper
    val serializedData: String = new NDArrayStrings(",", precision, "######0").format(source)
    try {
      val rows: util.List[_] = mapper.readValue(serializedData.getBytes, classOf[util.List[_]]).asInstanceOf[util.List[_]]
      rows
    }
    catch {
      case e: JsonParseException => {
        e.printStackTrace()
        return null
      }
    }
  }

  @throws[IOException]
  def makeTableFromArray(source: INDArray, precision: Int): Table[Integer, Integer, Double] = {
    val table: Table[Integer, Integer, Double] = HashBasedTable.create[Integer, Integer, Double]
    val rows: util.List[_] = makeRowsFromNDArray(source, precision)
    var i: Int = 0
    while (i < rows.size) {
      {
        val row: util.List[Double] = rows.get(i).asInstanceOf[util.List[Double]]
        var j: Int = 0
        while (j < row.size) {
          {
            table.put(i, j, row.get(j))
          }
          {
            j += 1; j - 1
          }
        }
      }
      {
        i += 1; i - 1
      }
    }
    table
  }

}

[32mimport [36mjava.io.{File, IOException}[0m
[32mimport [36morg.apache.commons.io.FileUtils[0m
[32mimport [36mjava.net.URL[0m
[32mimport [36morg.nd4j.linalg.factory.Nd4j[0m
[32mimport [36morg.datavec.api.records.reader.RecordReader[0m
[32mimport [36morg.datavec.api.records.reader.impl.csv.CSVRecordReader[0m
[32mimport [36morg.datavec.api.split.FileSplit[0m
[32mimport [36morg.datavec.api.util.ClassPathResource[0m
[32mimport [36morg.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator[0m
[32mimport [36morg.deeplearning4j.eval.Evaluation[0m
[32mimport [36morg.deeplearning4j.nn.conf.MultiLayerConfiguration[0m
[32mimport [36morg.deeplearning4j.nn.conf.NeuralNetConfiguration[0m
[32mimport [36morg.deeplearning4j.nn.conf.layers.DenseLayer[0m
[32mimport [36morg.deeplearning4j.nn.conf.layers.OutputLayer[0m
[32mimport [36morg.deeplearning4j.nn.multilayer.MultiLayerNetwork[0m
[32mimport [36morg.deeplearning4j.nn.weights.WeightInit[0m
[32mimp

In [5]:
val network = new DeepLearning4JMultiLayerNetwork("data/iris.txt");
network.execute()

Hello, deeplearning4j!


[pool-4-thread-10] INFO org.reflections.Reflections - Reflections took 466 ms to scan 7 urls, producing 109 keys and 364 values 


Build model....


[pool-4-thread-10] INFO org.deeplearning4j.optimize.listeners.ScoreIterationListener - Score at iteration 0 is 1.0732334011203644
[pool-4-thread-10] INFO org.deeplearning4j.optimize.listeners.ScoreIterationListener - Score at iteration 100 is 0.4338571233450501
[pool-4-thread-10] INFO org.deeplearning4j.optimize.listeners.ScoreIterationListener - Score at iteration 200 is 0.38194244254622567
[pool-4-thread-10] INFO org.deeplearning4j.optimize.listeners.ScoreIterationListener - Score at iteration 300 is 0.20852557196019797
[pool-4-thread-10] INFO org.deeplearning4j.optimize.listeners.ScoreIterationListener - Score at iteration 400 is 0.10802983709162767
[pool-4-thread-10] INFO org.deeplearning4j.optimize.listeners.ScoreIterationListener - Score at iteration 500 is 0.06857784103670342
[pool-4-thread-10] INFO org.deeplearning4j.optimize.listeners.ScoreIterationListener - Score at iteration 600 is 0.048369936262854886
[pool-4-thread-10] INFO org.deeplearning4j.optimize.listeners.ScoreItera


Examples labeled as 0 classified by model as 0: 10 times
Examples labeled as 1 classified by model as 1: 15 times
Examples labeled as 2 classified by model as 1: 7 times
Examples labeled as 2 classified by model as 2: 21 times


 Accuracy:  0.8679
 Precision: 0.8939
 Recall:    0.9167
 F1 Score:  0.9052


[36mnetwork[0m: INSTANCE.$ref$DeepLearning4JMultiLayerNetwork = cmd3$$user$DeepLearning4JMultiLayerNetwork@4a3ab4e6

In [6]:
import com.quantifind.charts.highcharts.SeriesType
import com.quantifind.charts.Highcharts._
import com.quantifind.charts.highcharts.Highchart
import com.quantifind.charts.highcharts.Histogram

def show(c:Highchart, name:String) = {
    val json = s"""
$$(function () {
    $$('#$name').highcharts(${c.toJson})
    })
"""
    display.html(<div id={name}>graph</div>)
    display.js(json)
}

[32mimport [36mcom.quantifind.charts.highcharts.SeriesType[0m
[32mimport [36mcom.quantifind.charts.Highcharts._[0m
[32mimport [36mcom.quantifind.charts.highcharts.Highchart[0m
[32mimport [36mcom.quantifind.charts.highcharts.Histogram[0m
defined [32mfunction [36mshow[0m

In [7]:
display.html(
<div>
  <script src="http://code.highcharts.com/stock/highstock.js"></script>
  <script src="http://code.highcharts.com/stock/modules/exporting.js"></script>
  <script src="http://www.highcharts.com/js/themes/grid.js"></script>
</div>)



In [8]:
import java.util
import com.quantifind.charts.Highcharts._
import collection.mutable._
import scala.collection.JavaConversions._
import scala.collection.mutable

var listmax: List[Int] = List()
var listrows: List[Int] = List()
val setrows : mutable.Set[Integer] = asScalaSet(network.networkOutput.rowKeySet())
for(rowKey <- setrows) {
  var rowMap: Map[Integer, Double] = mapAsScalaMap(network.networkOutput.row(rowKey))
  var maxValColumn: Int = 0
  var maxVal: Double = 0.0
  listrows :::= List(rowKey)
  for(colKey <- rowMap.keysIterator){
    var dval = rowMap.get(colKey).get
    if(dval>maxVal){
      maxVal = dval
      maxValColumn = colKey
    }
  }

  listmax :::= List(maxValColumn)

}

val chart = scatter(listrows,listmax)
show(chart,"classifications")

serving resources from: file:/opt/app/src/main/ipynb/index-1477856146070.html
Server started: http://a0973adcac54:41990/index-1477856146070.html
Error while opening window (cause: java.io.IOException: Cannot run program "xdg-open": error=2, No such file or directory)
You can browse the following URL: http://a0973adcac54:41990
Output written to http://a0973adcac54:41990 (CMD + Click link in Mac OSX).


[32mimport [36mjava.util[0m
[32mimport [36mcom.quantifind.charts.Highcharts._[0m
[32mimport [36mcollection.mutable._[0m
[32mimport [36mscala.collection.JavaConversions._[0m
[32mimport [36mscala.collection.mutable[0m
[36mlistmax[0m: List[Int] = [33mList[0m(
  [32m2[0m,
  [32m0[0m,
  [32m2[0m,
  [32m2[0m,
  [32m1[0m,
  [32m1[0m,
  [32m2[0m,
  [32m1[0m,
  [32m1[0m,
  [32m1[0m,
  [32m2[0m,
  [32m0[0m,
  [32m1[0m,
  [32m0[0m,
  [32m1[0m,
  [32m2[0m,
  [32m0[0m,
  [32m2[0m,
  [32m2[0m,
[33m...[0m
[36mlistrows[0m: List[Int] = [33mList[0m(
  [32m52[0m,
  [32m48[0m,
  [32m49[0m,
  [32m50[0m,
  [32m51[0m,
  [32m45[0m,
  [32m44[0m,
  [32m47[0m,
  [32m46[0m,
  [32m41[0m,
  [32m40[0m,
  [32m43[0m,
  [32m42[0m,
  [32m37[0m,
  [32m36[0m,
  [32m39[0m,
  [32m38[0m,
  [32m33[0m,
  [32m32[0m,
[33m...[0m
[36msetrows[0m: collection.mutable.Set[Integer] = [33mSet[0m(
  0,
  1,
  2,
  3,
  4,
  5,
  6,
