From 98365304172afd0ffe08fc606f2f307ddc182b6e Mon Sep 17 00:00:00 2001 From: Andrew Palumbo Date: Fri, 29 Apr 2016 23:26:03 -0400 Subject: [PATCH 1/8] allow for decimal sample percentages. Start basic 3d surface and scatter plots --- examples/bin/spark-shell-plot.mscala | 62 ++++++++++++++++- .../apache/mahout/visualization/mplot2d.scala | 13 ++-- .../apache/mahout/visualization/mplot3d.scala | 50 +++++++++++++- .../apache/mahout/visualization/msurf3d.scala | 69 +++++++++++++++++++ 4 files changed, 184 insertions(+), 10 deletions(-) create mode 100644 math-scala/src/main/scala/org/apache/mahout/visualization/msurf3d.scala diff --git a/examples/bin/spark-shell-plot.mscala b/examples/bin/spark-shell-plot.mscala index 73578f71a2..7864826505 100644 --- a/examples/bin/spark-shell-plot.mscala +++ b/examples/bin/spark-shell-plot.mscala @@ -11,9 +11,67 @@ val drmSin = drmRand.mapBlock() {case (keys, block) => } keys -> blockB } +new mplot2d(drmRand, samplePercent = .1) +new mplot2d(drmSin, samplePercent = .1) -new mplot2d(drmRand, samplePercent = 1) -new mplot2d(drmSin, samplePercent = 1) + +// surface plot +import org.apache.mahout.visualization.msurf3d + +val mxRnd3d = Matrices.symmetricUniformView(5000, 3, 1234) +val drmRand3d = drmParallelize(mxRnd3d) + +// gaussian +val drmGauss = drmRand3d.mapBlock() {case (keys, block) => + val blockB = block.like() + val m = block.nrow + val n = block.nrow + val sigma = 1.0 + + for (i <- 0 until m) { + + val t = Math.exp(-(blockB(i, 0) * blockB(i, 0) + blockB(i, 1) * blockB(i, 1)) / 2) / Math.sqrt(2*Math.PI) + + blockB(i, 2) = t * (-1 / (Math.PI *math.pow(sigma,4) * (1 -((blockB(i, 0)*(blockB(i, 0)) + ((blockB(i, 1)*(blockB(i, 1)) )/ 2 *(Math.pow(sigma,2)) + // } + } + keys -> blockB +} + +new msurf3d(drmGauss, samplePercent = 1) + +val drmHat = drmRand3d.mapBlock() {case (keys, block) => + val blockB = block.like() + val m = block.nrow + val n = block.nrow + for (i <- 0 until m) { + for (j <- 0 until i) { + blockB(i, 0) = 6.0 * (i - m/2) / m + blockB(j, 1) = 6.0 * (j - m/2) / m + blockB(i, 2) = Math.exp((blockB(i, 0) * blockB(j, 0) + blockB(i, 1) * blockB(j, 1)) / -2) / Math.sqrt(2*Math.PI) + } + } + keys -> blockB +} +new msurf3d(drmHat, samplePercent = 10) + + +val mxRnd3d = Matrices.symmetricUniformView(5000, 3, 1234) +val drmRand3d = drmParallelize(mxRnd3d) + +val drmGauss = drmRand3d.mapBlock() {case (keys, block) => + val blockB = block.like() + val m = block.nrow + val n = block.nrow + val sigma = 1.0 + for (i <- 0 until m) { + val x = blockB(0, 0) + val y = blockB(0, 1) + blockB(0, 2) = Math.exp(-((Math.pow(x, 2)) + (Math.pow(y, 2)))/2) + } + keys -> blockB +} +new msurf3d(drmGauss, samplePercent = 1) diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/mplot2d.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/mplot2d.scala index 20e2743f9d..eb3fd0f530 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/mplot2d.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/mplot2d.scala @@ -32,18 +32,21 @@ import scala.collection.JavaConversions._ /** * Create a s scatter plot of a DRM by sampling a given percentage - * and plotting corresponding points of (drmXY(::,0),drmXY(::,2)) + * and plotting corresponding points of (drmXY(::,0),drmXY(::,1)) * * @param drmXY an m x 2 Drm drm to plot * @param samplePercent the percentage the drm to sample * @tparam K */ -class mplot2d[K](drmXY: DrmLike[K], samplePercent: Int = 1, setVisible: Boolean = true) { +class mplot2d[K](drmXY: DrmLike[K], samplePercent: Double, setVisible: Boolean = true) { val drmSize = drmXY.checkpoint().numRows() - val sampleDec: Double = (samplePercent.toDouble / 100.toDouble) + val sampleDec: Double = (samplePercent / 100.toDouble) + + println("Sampldec = " + sampleDec+ " * " + drmSize ) val numSamples: Int = (drmSize * sampleDec).toInt - + + println("Sampldec = " +numSamples) val mPlotMatrix: Matrix = drmSampleKRows(drmXY, numSamples, false) val arrays: Array[Array[Double]] = Array.ofDim[Double](mPlotMatrix.numRows(), 2) for (i <- 0 until mPlotMatrix.numRows()) { @@ -52,7 +55,7 @@ class mplot2d[K](drmXY: DrmLike[K], samplePercent: Int = 1, setVisible: Boolean } val canvas: PlotCanvas = ScatterPlot.plot(arrays,Color.BLUE) - canvas.setTitle("2d Plot: " + samplePercent + " % sample of " + drmSize +" points") + canvas.setTitle("2d scatter Plot: " + samplePercent + " % sample of " + drmSize +" points") canvas.setAxisLabels("x_0", "x_1") val plotPanel: PlotPanel = new PlotPanel(canvas) diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/mplot3d.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/mplot3d.scala index 24e2415e67..d508ab3c5f 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/mplot3d.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/mplot3d.scala @@ -15,10 +15,54 @@ * limitations under the License. */ - package org.apache.mahout.visualization +import java.awt.{BorderLayout, Color} +import java.io.File +import javax.swing.JFrame + +import org.apache.mahout.math._ +import scalabindings._ +import RLikeOps._ +import drm._ +import smile.plot._ + +import scala.collection.JavaConversions._ + + +/** + * Create a s scatter plot of a DRM by sampling a given percentage + * and plotting corresponding points of (drmXYZ(::,0), drmXYZ(::,1), drmXYZ(::,2)) + * + * @param drmXYZ an m x 3 Drm drm to plot + * @param samplePercent the percentage the drm to sample + * @tparam K + */ +class mplot3d[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true) { + val drmSize = drmXYZ.checkpoint().numRows() + val sampleDec: Double = (samplePercent / 100.toDouble) + + val numSamples: Int = (drmSize * sampleDec).toInt + + val mPlotMatrix: Matrix = drmSampleKRows(drmXYZ, numSamples, false) + val arrays: Array[Array[Double]] = Array.ofDim[Double](mPlotMatrix.numRows(), 3) + for (i <- 0 until mPlotMatrix.numRows()) { + arrays(i)(0) = mPlotMatrix(i, 0) + arrays(i)(1) = mPlotMatrix(i, 1) + arrays(i)(2) = mPlotMatrix(i, 2) + } + + val canvas3d: PlotCanvas = ScatterPlot.plot(arrays, Color.RED) + canvas3d.setTitle("3d scatter Plot: " + samplePercent + " % sample of " + drmSize +" points") + + val plotPanel: PlotPanel = new PlotPanel(canvas3d) -class mplot3d { + val plotFrame: JFrame = new JFrame("3d scatter Plot") + plotFrame.setLayout(new BorderLayout()) + plotFrame.add(plotPanel) + plotFrame.setSize(300,300) + if (setVisible) { + plotFrame.setVisible(true) + plotFrame.show() + } -} diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/msurf3d.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/msurf3d.scala new file mode 100644 index 0000000000..c0c8d1bc5b --- /dev/null +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/msurf3d.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.visualization + +import java.awt.{BorderLayout, Color} +import java.io.File +import javax.swing.JFrame + +import org.apache.mahout.math._ +import scalabindings._ +import RLikeOps._ +import drm._ +import smile.plot._ + +import scala.collection.JavaConversions._ + + +/** + * Create a s surface plot of a DRM by sampling a given percentage + * and plotting corresponding points of (drmXYZ(::,0), drmXYZ(::,1), drmXYZ(::,2)) + * + * @param drmXYZ an m x 3 Drm drm to plot + * @param samplePercent the percentage the drm to sample + * @tparam K + */ +class msurf3d[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true) { + val drmSize = drmXYZ.checkpoint().numRows() + val sampleDec: Double = (samplePercent / 100.toDouble) + + val numSamples: Int = (drmSize * sampleDec).toInt + + val mPlotMatrix: Matrix = drmSampleKRows(drmXYZ, numSamples, false) + val arrays: Array[Array[Double]] = Array.ofDim[Double](mPlotMatrix.numRows(), 3) + for (i <- 0 until mPlotMatrix.numRows()) { + arrays(i)(0) = mPlotMatrix(i, 0) + arrays(i)(1) = mPlotMatrix(i, 1) + arrays(i)(2) = mPlotMatrix(i, 2) + } + + val canvas: PlotCanvas = Surface.plot(arrays, Palette.jet(256, 1.0f)) + canvas.setTitle("Surface Plot: " + samplePercent + " % sample of " + drmSize +" points") + + val plotPanel: PlotPanel = new PlotPanel(canvas) + + val plotFrame: JFrame = new JFrame("Surface Plot") + plotFrame.setLayout(new BorderLayout()) + plotFrame.add(plotPanel) + plotFrame.setSize(300,300) + if (setVisible) { + plotFrame.setVisible(true) + plotFrame.show() + } + +} From 80de3d02bb1f1ff88d5020798ada305e8f969650 Mon Sep 17 00:00:00 2001 From: Andrew Palumbo Date: Sat, 30 Apr 2016 00:00:05 -0400 Subject: [PATCH 2/8] small refactoring, simple 3d plot example --- examples/bin/spark-shell-plot.mscala | 63 +++++-------------- .../apache/mahout/visualization/mplot3d.scala | 9 +-- .../{msurf3d.scala => msurf.scala} | 2 +- 3 files changed, 21 insertions(+), 53 deletions(-) rename math-scala/src/main/scala/org/apache/mahout/visualization/{msurf3d.scala => msurf.scala} (95%) diff --git a/examples/bin/spark-shell-plot.mscala b/examples/bin/spark-shell-plot.mscala index 7864826505..10a0499024 100644 --- a/examples/bin/spark-shell-plot.mscala +++ b/examples/bin/spark-shell-plot.mscala @@ -14,64 +14,31 @@ val drmSin = drmRand.mapBlock() {case (keys, block) => new mplot2d(drmRand, samplePercent = .1) new mplot2d(drmSin, samplePercent = .1) - -// surface plot -import org.apache.mahout.visualization.msurf3d - -val mxRnd3d = Matrices.symmetricUniformView(5000, 3, 1234) +// 3d scatter +import org.apache.mahout.visualization.mplot3d +val mxRnd3d = Matrices.symmetricUniformView(50000, 3, 1234) val drmRand3d = drmParallelize(mxRnd3d) -// gaussian -val drmGauss = drmRand3d.mapBlock() {case (keys, block) => +val drmGauss = drmRand3d.mapBlock() {case (keys, block) => val blockB = block.like() - val m = block.nrow - val n = block.nrow - val sigma = 1.0 - - for (i <- 0 until m) { - - val t = Math.exp(-(blockB(i, 0) * blockB(i, 0) + blockB(i, 1) * blockB(i, 1)) / 2) / Math.sqrt(2*Math.PI) - - blockB(i, 2) = t * (-1 / (Math.PI *math.pow(sigma,4) * (1 -((blockB(i, 0)*(blockB(i, 0)) + ((blockB(i, 1)*(blockB(i, 1)) )/ 2 *(Math.pow(sigma,2)) - // } - } - keys -> blockB -} - -new msurf3d(drmGauss, samplePercent = 1) + for (i <- 0 until block.nrow) { + val x:Double = block(i, 0) + val y:Double = block(i, 1) + val z:Double = block(i, 2) -val drmHat = drmRand3d.mapBlock() {case (keys, block) => - val blockB = block.like() - val m = block.nrow - val n = block.nrow - for (i <- 0 until m) { - for (j <- 0 until i) { - blockB(i, 0) = 6.0 * (i - m/2) / m - blockB(j, 1) = 6.0 * (j - m/2) / m - blockB(i, 2) = Math.exp((blockB(i, 0) * blockB(j, 0) + blockB(i, 1) * blockB(j, 1)) / -2) / Math.sqrt(2*Math.PI) - } + blockB(i, 0) = x + blockB(i, 1) = y + blockB(i, 2) = Math.exp(-((Math.pow(x, 2)) + (Math.pow(y, 2)))/2) } keys -> blockB } -new msurf3d(drmHat, samplePercent = 10) +new mplot3d(drmGauss, samplePercent = 50) -val mxRnd3d = Matrices.symmetricUniformView(5000, 3, 1234) -val drmRand3d = drmParallelize(mxRnd3d) -val drmGauss = drmRand3d.mapBlock() {case (keys, block) => - val blockB = block.like() - val m = block.nrow - val n = block.nrow - val sigma = 1.0 - for (i <- 0 until m) { - val x = blockB(0, 0) - val y = blockB(0, 1) - blockB(0, 2) = Math.exp(-((Math.pow(x, 2)) + (Math.pow(y, 2)))/2) - } - keys -> blockB -} -new msurf3d(drmGauss, samplePercent = 1) +// 3d Surface needs to be ordered. +import org.apache.mahout.visualization.msurf +new msurf3d(drmGauss, samplePercent = 10) diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/mplot3d.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/mplot3d.scala index d508ab3c5f..309accf352 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/mplot3d.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/mplot3d.scala @@ -38,14 +38,14 @@ import scala.collection.JavaConversions._ * @param samplePercent the percentage the drm to sample * @tparam K */ -class mplot3d[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true) { +class mplot3d[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true) { val drmSize = drmXYZ.checkpoint().numRows() val sampleDec: Double = (samplePercent / 100.toDouble) val numSamples: Int = (drmSize * sampleDec).toInt val mPlotMatrix: Matrix = drmSampleKRows(drmXYZ, numSamples, false) - val arrays: Array[Array[Double]] = Array.ofDim[Double](mPlotMatrix.numRows(), 3) + val arrays: Array[Array[Double]] = Array.ofDim[Double](mPlotMatrix.numRows(), 3) for (i <- 0 until mPlotMatrix.numRows()) { arrays(i)(0) = mPlotMatrix(i, 0) arrays(i)(1) = mPlotMatrix(i, 1) @@ -53,16 +53,17 @@ class mplot3d[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Bool } val canvas3d: PlotCanvas = ScatterPlot.plot(arrays, Color.RED) - canvas3d.setTitle("3d scatter Plot: " + samplePercent + " % sample of " + drmSize +" points") + canvas3d.setTitle("3d scatter Plot: " + samplePercent + " % sample of " + drmSize + " points") val plotPanel: PlotPanel = new PlotPanel(canvas3d) val plotFrame: JFrame = new JFrame("3d scatter Plot") plotFrame.setLayout(new BorderLayout()) plotFrame.add(plotPanel) - plotFrame.setSize(300,300) + plotFrame.setSize(300, 300) if (setVisible) { plotFrame.setVisible(true) plotFrame.show() } +} diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/msurf3d.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/msurf.scala similarity index 95% rename from math-scala/src/main/scala/org/apache/mahout/visualization/msurf3d.scala rename to math-scala/src/main/scala/org/apache/mahout/visualization/msurf.scala index c0c8d1bc5b..7c6671879b 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/msurf3d.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/msurf.scala @@ -38,7 +38,7 @@ import scala.collection.JavaConversions._ * @param samplePercent the percentage the drm to sample * @tparam K */ -class msurf3d[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true) { +class msurf[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true) { val drmSize = drmXYZ.checkpoint().numRows() val sampleDec: Double = (samplePercent / 100.toDouble) From bba8e250d4bb39a83ad89dcc636b9b61cca9ab44 Mon Sep 17 00:00:00 2001 From: Andrew Palumbo Date: Sat, 30 Apr 2016 19:39:24 -0400 Subject: [PATCH 3/8] move some things into traits, refactor naming per scala/java conventions, add syntatic sugar, and other routines to package, still not wrapping X,Y,Z datapoints correctly for grid --- examples/bin/spark-shell-plot.mscala | 54 +++++++++-- .../apache/mahout/visualization/MGrid.scala | 65 +++++++++++++ .../{mplot2d.scala => MPlot2d.scala} | 49 ++++------ .../{mplot3d.scala => MPlot3d.scala} | 13 ++- .../{msurf.scala => MSurf.scala} | 14 ++- .../{mlpot.scala => MahoutPlot.scala} | 12 ++- .../apache/mahout/visualization/package.scala | 97 +++++++++++++++++++ 7 files changed, 253 insertions(+), 51 deletions(-) create mode 100644 math-scala/src/main/scala/org/apache/mahout/visualization/MGrid.scala rename math-scala/src/main/scala/org/apache/mahout/visualization/{mplot2d.scala => MPlot2d.scala} (51%) rename math-scala/src/main/scala/org/apache/mahout/visualization/{mplot3d.scala => MPlot3d.scala} (80%) rename math-scala/src/main/scala/org/apache/mahout/visualization/{msurf.scala => MSurf.scala} (82%) rename math-scala/src/main/scala/org/apache/mahout/visualization/{mlpot.scala => MahoutPlot.scala} (77%) create mode 100644 math-scala/src/main/scala/org/apache/mahout/visualization/package.scala diff --git a/examples/bin/spark-shell-plot.mscala b/examples/bin/spark-shell-plot.mscala index 10a0499024..3ee8014711 100644 --- a/examples/bin/spark-shell-plot.mscala +++ b/examples/bin/spark-shell-plot.mscala @@ -1,4 +1,22 @@ -import org.apache.mahout.visualization.mplot2d +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + + +import org.apache.mahout.visualization.MPlot2d val mxRnd = Matrices.symmetricUniformView(5000000, 2, 1234) val drmRand = drmParallelize(mxRnd) @@ -11,11 +29,11 @@ val drmSin = drmRand.mapBlock() {case (keys, block) => } keys -> blockB } -new mplot2d(drmRand, samplePercent = .1) -new mplot2d(drmSin, samplePercent = .1) +new MPlot2d(drmRand, samplePercent = .1) +new MPlot2d(drmSin, samplePercent = .1) // 3d scatter -import org.apache.mahout.visualization.mplot3d +import org.apache.mahout.visualization.MPlot3d val mxRnd3d = Matrices.symmetricUniformView(50000, 3, 1234) val drmRand3d = drmParallelize(mxRnd3d) @@ -33,12 +51,34 @@ val drmGauss = drmRand3d.mapBlock() {case (keys, block) => keys -> blockB } -new mplot3d(drmGauss, samplePercent = 50) +new MPlot3d(drmGauss, samplePercent = 50) // 3d Surface needs to be ordered. -import org.apache.mahout.visualization.msurf -new msurf3d(drmGauss, samplePercent = 10) +import org.apache.mahout.visualization.MSurf +new MSurf(drmGauss, samplePercent = 10) + + +// 3d grid +import org.apache.mahout.visualization.MGrid +val mxRnd3d = Matrices.symmetricUniformView(50000, 3, 1234) +val drmRand3d = drmParallelize(mxRnd3d) + +val drmGauss = drmRand3d.mapBlock() {case (keys, block) => + val blockB = block.like() + for (i <- 0 until block.nrow) { + val x:Double = block(i, 0) + val y:Double = block(i, 1) + val z:Double = block(i, 2) + + blockB(i, 0) = x + blockB(i, 1) = y + blockB(i, 2) = Math.exp(-((Math.pow(x, 2)) + (Math.pow(y, 2)))/2) + } + keys -> blockB +} + +new MGrid(drmGauss, samplePercent = 10) diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/MGrid.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/MGrid.scala new file mode 100644 index 0000000000..3f6e6544bb --- /dev/null +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/MGrid.scala @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.visualization + +import java.awt.{BorderLayout, Color} +import javax.swing.JFrame + +import org.apache.mahout.math._ +import org.apache.mahout.math.drm._ +import org.apache.mahout.math.scalabindings.RLikeOps._ +import org.apache.mahout.math.scalabindings._ +import smile.plot._ + + +/** + * Create a grid plot of a DRM by sampling a given percentage + * and plotting corresponding points of (drmXYZ(::,0), drmXYZ(::,1), drmXYZ(::,2)) + * + * @param drmXYZ an m x 3 Drm drm to plot + * @param samplePercent the percentage the drm to sample + * @tparam K + */ +class MGrid[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true) extends MahoutPlot{ + + val drmSize = drmXYZ.checkpoint().numRows() + val sampleDec: Double = (samplePercent / 100.toDouble) + val numSamples: Int = (drmSize * sampleDec).toInt + + mPlotMatrix = drmSampleKRows(drmXYZ, numSamples, false) + + // matrix rows + val m = mPlotMatrix.numRows() + + // roll a set of 3d points in an m x 3 drm into a m x m x 3 matrix. + val array3d: Array[Array[Array[Double]]] = mxXYZ2array3d(mPlotMatrix) + + canvas = Grid.plot(array3d) + canvas.setTitle("3d Grid Plot: " + samplePercent + " % sample of " + drmSize + " points") + + plotPanel = new PlotPanel(canvas) + + plotFrame = new JFrame("Grid Plot") + plotFrame.setLayout(new BorderLayout()) + plotFrame.add(plotPanel) + plotFrame.setSize(300, 300) + if (setVisible) { + plotFrame.setVisible(true) + } +} + diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/mplot2d.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/MPlot2d.scala similarity index 51% rename from math-scala/src/main/scala/org/apache/mahout/visualization/mplot2d.scala rename to math-scala/src/main/scala/org/apache/mahout/visualization/MPlot2d.scala index eb3fd0f530..4771f57bc7 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/mplot2d.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/MPlot2d.scala @@ -18,7 +18,6 @@ package org.apache.mahout.visualization import java.awt.{BorderLayout, Color} -import java.io.File import javax.swing.JFrame import org.apache.mahout.math._ @@ -27,8 +26,6 @@ import RLikeOps._ import drm._ import smile.plot._ -import scala.collection.JavaConversions._ - /** * Create a s scatter plot of a DRM by sampling a given percentage @@ -38,35 +35,31 @@ import scala.collection.JavaConversions._ * @param samplePercent the percentage the drm to sample * @tparam K */ -class mplot2d[K](drmXY: DrmLike[K], samplePercent: Double, setVisible: Boolean = true) { - val drmSize = drmXY.checkpoint().numRows() - val sampleDec: Double = (samplePercent / 100.toDouble) - - println("Sampldec = " + sampleDec+ " * " + drmSize ) +class MPlot2d[K](drmXY: DrmLike[K], samplePercent: Double, setVisible: Boolean = true) extends MahoutPlot { + val drmSize = drmXY.checkpoint().numRows() + val sampleDec: Double = (samplePercent / 100.toDouble) - val numSamples: Int = (drmSize * sampleDec).toInt + val numSamples: Int = (drmSize * sampleDec).toInt - println("Sampldec = " +numSamples) - val mPlotMatrix: Matrix = drmSampleKRows(drmXY, numSamples, false) - val arrays: Array[Array[Double]] = Array.ofDim[Double](mPlotMatrix.numRows(), 2) - for (i <- 0 until mPlotMatrix.numRows()) { - arrays(i)(0) = mPlotMatrix(i, 0) - arrays(i)(1) = mPlotMatrix(i, 1) - } + mPlotMatrix = drmSampleKRows(drmXY, numSamples, false) + val arrays: Array[Array[Double]] = Array.ofDim[Double](mPlotMatrix.numRows(), 2) + for (i <- 0 until mPlotMatrix.numRows()) { + arrays(i)(0) = mPlotMatrix(i, 0) + arrays(i)(1) = mPlotMatrix(i, 1) + } - val canvas: PlotCanvas = ScatterPlot.plot(arrays,Color.BLUE) - canvas.setTitle("2d scatter Plot: " + samplePercent + " % sample of " + drmSize +" points") - canvas.setAxisLabels("x_0", "x_1") + canvas = ScatterPlot.plot(arrays, Color.BLUE) + canvas.setTitle("2d scatter Plot: " + samplePercent + " % sample of " + drmSize +" points") + canvas.setAxisLabels("x_0", "x_1") - val plotPanel: PlotPanel = new PlotPanel(canvas) + plotPanel = new PlotPanel(canvas) - val plotFrame: JFrame = new JFrame("2d Plot") - plotFrame.setLayout(new BorderLayout()) - plotFrame.add(plotPanel) - plotFrame.setSize(300,300) - if (setVisible) { - plotFrame.setVisible(true) - plotFrame.show() - } + plotFrame = new JFrame("2d scatter Plot") + plotFrame.setLayout(new BorderLayout()) + plotFrame.add(plotPanel) + plotFrame.setSize(300,300) + if (setVisible) { + plotFrame.setVisible(true) + } } diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/mplot3d.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/MPlot3d.scala similarity index 80% rename from math-scala/src/main/scala/org/apache/mahout/visualization/mplot3d.scala rename to math-scala/src/main/scala/org/apache/mahout/visualization/MPlot3d.scala index 309accf352..d4db2f75a3 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/mplot3d.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/MPlot3d.scala @@ -38,13 +38,13 @@ import scala.collection.JavaConversions._ * @param samplePercent the percentage the drm to sample * @tparam K */ -class mplot3d[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true) { +class MPlot3d[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true) extends MahoutPlot { val drmSize = drmXYZ.checkpoint().numRows() val sampleDec: Double = (samplePercent / 100.toDouble) val numSamples: Int = (drmSize * sampleDec).toInt - val mPlotMatrix: Matrix = drmSampleKRows(drmXYZ, numSamples, false) + mPlotMatrix = drmSampleKRows(drmXYZ, numSamples, false) val arrays: Array[Array[Double]] = Array.ofDim[Double](mPlotMatrix.numRows(), 3) for (i <- 0 until mPlotMatrix.numRows()) { arrays(i)(0) = mPlotMatrix(i, 0) @@ -52,18 +52,17 @@ class mplot3d[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Bool arrays(i)(2) = mPlotMatrix(i, 2) } - val canvas3d: PlotCanvas = ScatterPlot.plot(arrays, Color.RED) - canvas3d.setTitle("3d scatter Plot: " + samplePercent + " % sample of " + drmSize + " points") + canvas = ScatterPlot.plot(arrays, Color.RED) + canvas.setTitle("3d scatter Plot: " + samplePercent + " % sample of " + drmSize + " points") - val plotPanel: PlotPanel = new PlotPanel(canvas3d) + plotPanel = new PlotPanel(canvas) - val plotFrame: JFrame = new JFrame("3d scatter Plot") + plotFrame = new JFrame("3d scatter Plot") plotFrame.setLayout(new BorderLayout()) plotFrame.add(plotPanel) plotFrame.setSize(300, 300) if (setVisible) { plotFrame.setVisible(true) - plotFrame.show() } } diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/msurf.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/MSurf.scala similarity index 82% rename from math-scala/src/main/scala/org/apache/mahout/visualization/msurf.scala rename to math-scala/src/main/scala/org/apache/mahout/visualization/MSurf.scala index 7c6671879b..cd5e1a2d0a 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/msurf.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/MSurf.scala @@ -18,7 +18,6 @@ package org.apache.mahout.visualization import java.awt.{BorderLayout, Color} -import java.io.File import javax.swing.JFrame import org.apache.mahout.math._ @@ -27,7 +26,6 @@ import RLikeOps._ import drm._ import smile.plot._ -import scala.collection.JavaConversions._ /** @@ -38,13 +36,14 @@ import scala.collection.JavaConversions._ * @param samplePercent the percentage the drm to sample * @tparam K */ -class msurf[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true) { +class MSurf[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true) extends MahoutPlot { val drmSize = drmXYZ.checkpoint().numRows() val sampleDec: Double = (samplePercent / 100.toDouble) val numSamples: Int = (drmSize * sampleDec).toInt - val mPlotMatrix: Matrix = drmSampleKRows(drmXYZ, numSamples, false) + mPlotMatrix = drmSampleKRows(drmXYZ, numSamples, false) + val arrays: Array[Array[Double]] = Array.ofDim[Double](mPlotMatrix.numRows(), 3) for (i <- 0 until mPlotMatrix.numRows()) { arrays(i)(0) = mPlotMatrix(i, 0) @@ -52,18 +51,17 @@ class msurf[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolea arrays(i)(2) = mPlotMatrix(i, 2) } - val canvas: PlotCanvas = Surface.plot(arrays, Palette.jet(256, 1.0f)) + canvas = Surface.plot(arrays, Palette.jet(256, 1.0f)) canvas.setTitle("Surface Plot: " + samplePercent + " % sample of " + drmSize +" points") - val plotPanel: PlotPanel = new PlotPanel(canvas) + plotPanel = new PlotPanel(canvas) - val plotFrame: JFrame = new JFrame("Surface Plot") + plotFrame = new JFrame("Surface Plot") plotFrame.setLayout(new BorderLayout()) plotFrame.add(plotPanel) plotFrame.setSize(300,300) if (setVisible) { plotFrame.setVisible(true) - plotFrame.show() } } diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/mlpot.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/MahoutPlot.scala similarity index 77% rename from math-scala/src/main/scala/org/apache/mahout/visualization/mlpot.scala rename to math-scala/src/main/scala/org/apache/mahout/visualization/MahoutPlot.scala index 18b692d83d..87b015f2bb 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/mlpot.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/MahoutPlot.scala @@ -18,7 +18,17 @@ package org.apache.mahout.visualization +import javax.swing.JFrame -trait mlpot { +import org.apache.mahout.math.Matrix +import smile.plot.{PlotCanvas, PlotPanel} + + +trait MahoutPlot { + + var canvas : PlotCanvas = _ + var plotPanel: PlotPanel =_ + var plotFrame: JFrame = _ + var mPlotMatrix: Matrix = _ } diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala new file mode 100644 index 0000000000..ea4552f009 --- /dev/null +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.mahout + + +import org.apache.mahout.math._ +import org.apache.mahout.math.drm.{DrmLike, _} +import org.apache.mahout.math.scalabindings.RLikeOps._ +import org.apache.mahout.math.scalabindings._ + + +package object visualization { + + /** + * Roll a set of datapoints in a mx3 matrix into a 3D Array()()() + * + * @param mxXYZ Matrix of data points x_0 = mx(i,0), x_1 = mx(i,1), x_2 = mx(i,2) + * @return an Array[Array[Array[Double]]] 3d Array + */ + def mxXYZ2array3d(mxXYZ: Matrix ): Array[Array[Array[Double]]] = { + + // number of datapoints + val m = mxXYZ.numRows() + + // 3d array to return + val array3d: Array[Array[Array[Double]]] = Array.ofDim[Double](m, m, 3) + + // roll a set of 3d points in an m x 3 matrix into a m x m x 3 Array. + for (i <- 0 until (m/2)) { + for (j <- 0 until (m/2)) { + for (k <- 0 until 3) { + array3d(i)(j)(k) = mxXYZ(i + j, k) + } + } + } + array3d + } + + /** + * Syntatic sugar for Msurf class + * @param drmXYZ + * @param samplePercent + * @param setVisible + * @tparam K + * @return + */ + def msurf[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true): MahoutPlot = + new MSurf[K](drmXYZ: DrmLike[K], samplePercent, setVisible) + + /** + * Syntatic sugar for MPlot2d class + * @param drmXYZ + * @param samplePercent + * @param setVisible + * @tparam K + * @return + */ + def mpot2d[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true): MahoutPlot = + new MPlot2d[K](drmXYZ: DrmLike[K], samplePercent, setVisible) + + /** + * Syntatic sugar for MPlot3d class + * @param drmXYZ + * @param samplePercent + * @param setVisible + * @tparam K + * @return + */ + def mplot3d[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true): MahoutPlot = + new MPlot3d[K](drmXYZ: DrmLike[K], samplePercent, setVisible) + + /** + * Syntatic sugar for MGrid class + * @param drmXYZ + * @param samplePercent + * @param setVisible + * @tparam K + * @return + */ + def mgrid[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true): MahoutPlot = + new MGrid[K](drmXYZ: DrmLike[K], samplePercent, setVisible) + +} From 0f45abc8f455cff7cde739daa63e2a0c3e5c0127 Mon Sep 17 00:00:00 2001 From: Andrew Palumbo Date: Sat, 30 Apr 2016 20:27:57 -0400 Subject: [PATCH 4/8] histograms --- examples/bin/spark-shell-plot.mscala | 17 ++++- .../apache/mahout/visualization/MHisto.scala | 66 +++++++++++++++++++ .../mahout/visualization/MHisto3d.scala | 66 +++++++++++++++++++ .../apache/mahout/visualization/MPlot2d.scala | 4 +- .../apache/mahout/visualization/MPlot3d.scala | 2 +- .../apache/mahout/visualization/package.scala | 25 +++++++ 6 files changed, 174 insertions(+), 6 deletions(-) create mode 100644 math-scala/src/main/scala/org/apache/mahout/visualization/MHisto.scala create mode 100644 math-scala/src/main/scala/org/apache/mahout/visualization/MHisto3d.scala diff --git a/examples/bin/spark-shell-plot.mscala b/examples/bin/spark-shell-plot.mscala index 3ee8014711..be2a83d41e 100644 --- a/examples/bin/spark-shell-plot.mscala +++ b/examples/bin/spark-shell-plot.mscala @@ -53,13 +53,12 @@ val drmGauss = drmRand3d.mapBlock() {case (keys, block) => new MPlot3d(drmGauss, samplePercent = 50) - -// 3d Surface needs to be ordered. +// 3d Surface needs to be ordered. --notworking import org.apache.mahout.visualization.MSurf new MSurf(drmGauss, samplePercent = 10) -// 3d grid +// 3d grid --not working import org.apache.mahout.visualization.MGrid val mxRnd3d = Matrices.symmetricUniformView(50000, 3, 1234) val drmRand3d = drmParallelize(mxRnd3d) @@ -81,4 +80,16 @@ val drmGauss = drmRand3d.mapBlock() {case (keys, block) => new MGrid(drmGauss, samplePercent = 10) +// 2 and 3d histograms of gaussian data +import org.apache.mahout.visualization.MHisto3d +import org.apache.mahout.visualization.MHisto +val mxRnd3d = Matrices.gaussianView(50000, 3, 1234) +val drmRand3d = drmParallelize(mxRnd3d) + +// check out freguencies of the first column across 10 bins of original data. +new MHisto(drmRand3d(0,::), 10, samplePercent = 10) + +// look at 3d-Histogram +new MHisto3d(drmGauss, 10, samplePercent = 10) + diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/MHisto.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/MHisto.scala new file mode 100644 index 0000000000..201bdebc37 --- /dev/null +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/MHisto.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.visualization + +import java.awt.{BorderLayout, Color} +import javax.swing.JFrame + +import org.apache.mahout.math._ +import org.apache.mahout.math.drm._ +import org.apache.mahout.math.scalabindings.RLikeOps._ +import org.apache.mahout.math.scalabindings._ +import smile.plot._ + + +/** + * Create a Histogram of bims of a DRM by sampling a given percentage + * and plotting corresponding points of (drmXY(::,0),drmXY(::,1)) + * + * @param drmXY an m x 1 Drm Column, drm to plot + * @param numBins: number of bins + * @param samplePercent the percentage the drm to sample. Default =1 + * @tparam K + */ +class MHisto[K](drmXY: DrmLike[K], numBins: Int, samplePercent: Double = 1, setVisible: Boolean = true) extends MahoutPlot { + val drmSize = drmXY.checkpoint().numRows() + val sampleDec: Double = (samplePercent / 100.toDouble) + + val numSamples: Int = (drmSize * sampleDec).toInt + + mPlotMatrix = drmSampleKRows(drmXY, numSamples, false) + val arrays = Array.ofDim[Double](mPlotMatrix.numRows()) + for (i <- 0 until mPlotMatrix.numRows()) { + arrays(i) = mPlotMatrix(i, 0) + } + + // just use bins during development, can define ranges etc later + canvas = Histogram.plot(arrays, numBins) + canvas.setTitle("2d Histogram: " + samplePercent + " % sample of " + drmSize +" points") + canvas.setAxisLabels("x_0", "frequency") + + plotPanel = new PlotPanel(canvas) + + plotFrame = new JFrame("2d Histogram") + plotFrame.setLayout(new BorderLayout()) + plotFrame.add(plotPanel) + plotFrame.setSize(300,300) + if (setVisible) { + plotFrame.setVisible(true) + } + +} diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/MHisto3d.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/MHisto3d.scala new file mode 100644 index 0000000000..4b0fb534a0 --- /dev/null +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/MHisto3d.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.visualization + +import java.awt.{BorderLayout, Color} +import javax.swing.JFrame + +import org.apache.mahout.math._ +import org.apache.mahout.math.drm._ +import org.apache.mahout.math.scalabindings.RLikeOps._ +import org.apache.mahout.math.scalabindings._ +import smile.plot._ + + +/** + * Create 3d Histogram of a DRM by sampling a given percentage + * and plotting corresponding points of (drmXYZ(::,0), drmXYZ(::,1), drmXYZ(::,2)) + * + * @param drmXYZ an m x 3 Drm drm to plot + * @param numBins num bins to define histogram on + * @param samplePercent the percentage the drm to sample + * @tparam K + */ +class MHisto3d[K](drmXYZ: DrmLike[K],numBins: Int, samplePercent: Double = 1, setVisible: Boolean = true) extends MahoutPlot { + val drmSize = drmXYZ.checkpoint().numRows() + val sampleDec: Double = (samplePercent / 100.toDouble) + + val numSamples: Int = (drmSize * sampleDec).toInt + + mPlotMatrix = drmSampleKRows(drmXYZ, numSamples, false) + val arrays: Array[Array[Double]] = Array.ofDim[Double](mPlotMatrix.numRows(), 3) + for (i <- 0 until mPlotMatrix.numRows()) { + arrays(i)(0) = mPlotMatrix(i, 0) + arrays(i)(1) = mPlotMatrix(i, 1) + arrays(i)(2) = mPlotMatrix(i, 2) + } + + canvas = Histogram3D.plot(arrays, Palette.jet(256, 1.0f)) + canvas.setTitle("3d Histogram: " + samplePercent + " % sample of " + drmSize + " points") + + plotPanel = new PlotPanel(canvas) + + plotFrame = new JFrame("3d Histogram") + plotFrame.setLayout(new BorderLayout()) + plotFrame.add(plotPanel) + plotFrame.setSize(300, 300) + if (setVisible) { + plotFrame.setVisible(true) + } +} + diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/MPlot2d.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/MPlot2d.scala index 4771f57bc7..5b77a2ca4d 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/MPlot2d.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/MPlot2d.scala @@ -28,14 +28,14 @@ import smile.plot._ /** - * Create a s scatter plot of a DRM by sampling a given percentage + * Create a scatter plot of a DRM by sampling a given percentage * and plotting corresponding points of (drmXY(::,0),drmXY(::,1)) * * @param drmXY an m x 2 Drm drm to plot * @param samplePercent the percentage the drm to sample * @tparam K */ -class MPlot2d[K](drmXY: DrmLike[K], samplePercent: Double, setVisible: Boolean = true) extends MahoutPlot { +class MPlot2d[K](drmXY: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true) extends MahoutPlot { val drmSize = drmXY.checkpoint().numRows() val sampleDec: Double = (samplePercent / 100.toDouble) diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/MPlot3d.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/MPlot3d.scala index d4db2f75a3..c7e023dfd6 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/MPlot3d.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/MPlot3d.scala @@ -31,7 +31,7 @@ import scala.collection.JavaConversions._ /** - * Create a s scatter plot of a DRM by sampling a given percentage + * Create a scatter plot of a DRM by sampling a given percentage * and plotting corresponding points of (drmXYZ(::,0), drmXYZ(::,1), drmXYZ(::,2)) * * @param drmXYZ an m x 3 Drm drm to plot diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala index ea4552f009..d0c74f005b 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala @@ -94,4 +94,29 @@ package object visualization { def mgrid[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true): MahoutPlot = new MGrid[K](drmXYZ: DrmLike[K], samplePercent, setVisible) + /** + * + * @param drmXYZ + * @param numBins + * @param samplePercent + * @param setVisible + * @tparam K + * @return + */ + def mhisto[K](drmXYZ: DrmLike[K], numBins: Int, samplePercent: Double = 1, setVisible: Boolean = true): MahoutPlot = + new MHisto[K](drmXYZ: DrmLike[K], numBins, samplePercent, setVisible) + + /** + * + * @param drmXYZ + * @param numBins + * @param samplePercent + * @param setVisible + * @tparam K + * @return + */ + def mhisto3d[K](drmXYZ: DrmLike[K], numBins: Int, samplePercent: Double = 1, setVisible: Boolean = true): MahoutPlot = + new MHisto3d[K](drmXYZ: DrmLike[K],numBins, samplePercent, setVisible) + + } From 6029bd3481452ec73c505c9026ff73a21f22a086 Mon Sep 17 00:00:00 2001 From: Andrew Palumbo Date: Sat, 30 Apr 2016 21:31:29 -0400 Subject: [PATCH 5/8] fix up 3d histogram --- examples/bin/spark-shell-plot.mscala | 4 ++-- .../main/scala/org/apache/mahout/visualization/MHisto3d.scala | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/bin/spark-shell-plot.mscala b/examples/bin/spark-shell-plot.mscala index be2a83d41e..a81d2d6fb8 100644 --- a/examples/bin/spark-shell-plot.mscala +++ b/examples/bin/spark-shell-plot.mscala @@ -87,9 +87,9 @@ val mxRnd3d = Matrices.gaussianView(50000, 3, 1234) val drmRand3d = drmParallelize(mxRnd3d) // check out freguencies of the first column across 10 bins of original data. -new MHisto(drmRand3d(0,::), 10, samplePercent = 10) +new MHisto(drmRand3d, 20, samplePercent = 50) // look at 3d-Histogram -new MHisto3d(drmGauss, 10, samplePercent = 10) +new MHisto3d(drmRand3d, 10, samplePercent = 10) diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/MHisto3d.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/MHisto3d.scala index 4b0fb534a0..06c4f6dcc2 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/MHisto3d.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/MHisto3d.scala @@ -43,11 +43,11 @@ class MHisto3d[K](drmXYZ: DrmLike[K],numBins: Int, samplePercent: Double = 1, se val numSamples: Int = (drmSize * sampleDec).toInt mPlotMatrix = drmSampleKRows(drmXYZ, numSamples, false) - val arrays: Array[Array[Double]] = Array.ofDim[Double](mPlotMatrix.numRows(), 3) + val arrays: Array[Array[Double]] = Array.ofDim[Double](mPlotMatrix.numRows(), 2) for (i <- 0 until mPlotMatrix.numRows()) { arrays(i)(0) = mPlotMatrix(i, 0) arrays(i)(1) = mPlotMatrix(i, 1) - arrays(i)(2) = mPlotMatrix(i, 2) + // arrays(i)(2) = mPlotMatrix(i, 2) } canvas = Histogram3D.plot(arrays, Palette.jet(256, 1.0f)) From c3d164a373240d6de6a356a41f894105380ce89b Mon Sep 17 00:00:00 2001 From: Andrew Palumbo Date: Sat, 30 Apr 2016 23:50:59 -0400 Subject: [PATCH 6/8] Add exportPNG to MahoutPlot trait --- .../apache/mahout/visualization/MHisto3d.scala | 9 ++++----- .../apache/mahout/visualization/MSurf.scala | 2 +- .../mahout/visualization/MahoutPlot.scala | 17 +++++++++++++++++ .../apache/mahout/visualization/package.scala | 18 +++++++++--------- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/MHisto3d.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/MHisto3d.scala index 06c4f6dcc2..2c871f5128 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/MHisto3d.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/MHisto3d.scala @@ -31,23 +31,22 @@ import smile.plot._ * Create 3d Histogram of a DRM by sampling a given percentage * and plotting corresponding points of (drmXYZ(::,0), drmXYZ(::,1), drmXYZ(::,2)) * - * @param drmXYZ an m x 3 Drm drm to plot + * @param drmXY an m x 3 Drm drm to plot * @param numBins num bins to define histogram on * @param samplePercent the percentage the drm to sample * @tparam K */ -class MHisto3d[K](drmXYZ: DrmLike[K],numBins: Int, samplePercent: Double = 1, setVisible: Boolean = true) extends MahoutPlot { - val drmSize = drmXYZ.checkpoint().numRows() +class MHisto3d[K](drmXY: DrmLike[K],numBins: Int, samplePercent: Double = 1, setVisible: Boolean = true) extends MahoutPlot { + val drmSize = drmXY.checkpoint().numRows() val sampleDec: Double = (samplePercent / 100.toDouble) val numSamples: Int = (drmSize * sampleDec).toInt - mPlotMatrix = drmSampleKRows(drmXYZ, numSamples, false) + mPlotMatrix = drmSampleKRows(drmXY, numSamples, false) val arrays: Array[Array[Double]] = Array.ofDim[Double](mPlotMatrix.numRows(), 2) for (i <- 0 until mPlotMatrix.numRows()) { arrays(i)(0) = mPlotMatrix(i, 0) arrays(i)(1) = mPlotMatrix(i, 1) - // arrays(i)(2) = mPlotMatrix(i, 2) } canvas = Histogram3D.plot(arrays, Palette.jet(256, 1.0f)) diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/MSurf.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/MSurf.scala index cd5e1a2d0a..d07f01ffcb 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/MSurf.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/MSurf.scala @@ -59,7 +59,7 @@ class MSurf[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolea plotFrame = new JFrame("Surface Plot") plotFrame.setLayout(new BorderLayout()) plotFrame.add(plotPanel) - plotFrame.setSize(300,300) + plotFrame.setSize(300, 300) if (setVisible) { plotFrame.setVisible(true) } diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/MahoutPlot.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/MahoutPlot.scala index 87b015f2bb..9b54c808b3 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/MahoutPlot.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/MahoutPlot.scala @@ -18,6 +18,10 @@ package org.apache.mahout.visualization +import java.awt.Graphics2D +import java.awt.image.BufferedImage +import java.io.File +import javax.imageio.ImageIO import javax.swing.JFrame import org.apache.mahout.math.Matrix @@ -30,5 +34,18 @@ trait MahoutPlot { var plotPanel: PlotPanel =_ var plotFrame: JFrame = _ var mPlotMatrix: Matrix = _ + def contentPane = plotPanel + + // export a PNG of the plot to /tmp/test.png + def exportPNG(path: String ="/tmp/test.png") = { + val bi: BufferedImage = new BufferedImage(contentPane.getWidth, contentPane.getHeight, BufferedImage.TYPE_INT_ARGB) + val g2d: Graphics2D = bi.createGraphics + + contentPane.printAll(g2d) + + val file: File = new File(path) + + ImageIO.write(bi, "PNG", file) + } } diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala index d0c74f005b..a6d10ddd62 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala @@ -63,14 +63,14 @@ package object visualization { /** * Syntatic sugar for MPlot2d class - * @param drmXYZ + * @param drmXY * @param samplePercent * @param setVisible * @tparam K * @return */ - def mpot2d[K](drmXYZ: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true): MahoutPlot = - new MPlot2d[K](drmXYZ: DrmLike[K], samplePercent, setVisible) + def mpot2d[K](drmXY: DrmLike[K], samplePercent: Double = 1, setVisible: Boolean = true): MahoutPlot = + new MPlot2d[K](drmXY: DrmLike[K], samplePercent, setVisible) /** * Syntatic sugar for MPlot3d class @@ -96,27 +96,27 @@ package object visualization { /** * - * @param drmXYZ + * @param drmX * @param numBins * @param samplePercent * @param setVisible * @tparam K * @return */ - def mhisto[K](drmXYZ: DrmLike[K], numBins: Int, samplePercent: Double = 1, setVisible: Boolean = true): MahoutPlot = - new MHisto[K](drmXYZ: DrmLike[K], numBins, samplePercent, setVisible) + def mhisto[K](drmX: DrmLike[K], numBins: Int, samplePercent: Double = 1, setVisible: Boolean = true): MahoutPlot = + new MHisto[K](drmX: DrmLike[K], numBins, samplePercent, setVisible) /** * - * @param drmXYZ + * @param drmXY * @param numBins * @param samplePercent * @param setVisible * @tparam K * @return */ - def mhisto3d[K](drmXYZ: DrmLike[K], numBins: Int, samplePercent: Double = 1, setVisible: Boolean = true): MahoutPlot = - new MHisto3d[K](drmXYZ: DrmLike[K],numBins, samplePercent, setVisible) + def mhisto3d[K](drmXY: DrmLike[K], numBins: Int, samplePercent: Double = 1, setVisible: Boolean = true): MahoutPlot = + new MHisto3d[K](drmXY: DrmLike[K], numBins, samplePercent, setVisible) } From 1275c42d3442a5d6f302cf8f1f8a29fec81bd5c1 Mon Sep 17 00:00:00 2001 From: Andrew Palumbo Date: Sun, 1 May 2016 00:19:07 -0400 Subject: [PATCH 7/8] export canvas as PNG not the plot --- examples/bin/spark-shell-plot.mscala | 11 ++++++----- .../org/apache/mahout/visualization/MahoutPlot.scala | 8 +++++--- .../org/apache/mahout/visualization/package.scala | 10 +++++----- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/examples/bin/spark-shell-plot.mscala b/examples/bin/spark-shell-plot.mscala index a81d2d6fb8..b4c5493194 100644 --- a/examples/bin/spark-shell-plot.mscala +++ b/examples/bin/spark-shell-plot.mscala @@ -66,9 +66,9 @@ val drmRand3d = drmParallelize(mxRnd3d) val drmGauss = drmRand3d.mapBlock() {case (keys, block) => val blockB = block.like() for (i <- 0 until block.nrow) { - val x:Double = block(i, 0) - val y:Double = block(i, 1) - val z:Double = block(i, 2) + val x: Double = block(i, 0) + val y: Double = block(i, 1) + val z: Double = block(i, 2) blockB(i, 0) = x blockB(i, 1) = y @@ -86,10 +86,11 @@ import org.apache.mahout.visualization.MHisto val mxRnd3d = Matrices.gaussianView(50000, 3, 1234) val drmRand3d = drmParallelize(mxRnd3d) -// check out freguencies of the first column across 10 bins of original data. +// check out frequencies of the first column across 10 bins of original data. new MHisto(drmRand3d, 20, samplePercent = 50) // look at 3d-Histogram -new MHisto3d(drmRand3d, 10, samplePercent = 10) +val h3d = new MHisto3d(drmRand3d, 10, samplePercent = 10) +h3d.exportPNG("/tmp/histo3d.png") diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/MahoutPlot.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/MahoutPlot.scala index 9b54c808b3..8688a3d965 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/MahoutPlot.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/MahoutPlot.scala @@ -31,14 +31,16 @@ import smile.plot.{PlotCanvas, PlotPanel} trait MahoutPlot { var canvas : PlotCanvas = _ - var plotPanel: PlotPanel =_ + var plotPanel: PlotPanel = _ var plotFrame: JFrame = _ var mPlotMatrix: Matrix = _ - def contentPane = plotPanel + def contentPane = canvas // export a PNG of the plot to /tmp/test.png def exportPNG(path: String ="/tmp/test.png") = { - val bi: BufferedImage = new BufferedImage(contentPane.getWidth, contentPane.getHeight, BufferedImage.TYPE_INT_ARGB) + val bi: BufferedImage = + new BufferedImage(contentPane.getWidth, contentPane.getHeight, BufferedImage.TYPE_INT_ARGB) + val g2d: Graphics2D = bi.createGraphics contentPane.printAll(g2d) diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala index a6d10ddd62..d2ee6df42a 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala @@ -31,7 +31,7 @@ package object visualization { * @param mxXYZ Matrix of data points x_0 = mx(i,0), x_1 = mx(i,1), x_2 = mx(i,2) * @return an Array[Array[Array[Double]]] 3d Array */ - def mxXYZ2array3d(mxXYZ: Matrix ): Array[Array[Array[Double]]] = { + def mxXYZ2array3d(mxXYZ: Matrix): Array[Array[Array[Double]]] = { // number of datapoints val m = mxXYZ.numRows() @@ -40,10 +40,10 @@ package object visualization { val array3d: Array[Array[Array[Double]]] = Array.ofDim[Double](m, m, 3) // roll a set of 3d points in an m x 3 matrix into a m x m x 3 Array. - for (i <- 0 until (m/2)) { - for (j <- 0 until (m/2)) { + for (i <- 0 until m) { + for (j <- 0 until m) { for (k <- 0 until 3) { - array3d(i)(j)(k) = mxXYZ(i + j, k) + array3d(i)(j)(k) = mxXYZ(i, k) } } } @@ -51,7 +51,7 @@ package object visualization { } /** - * Syntatic sugar for Msurf class + * Syntatic sugar for MSurf class * @param drmXYZ * @param samplePercent * @param setVisible From 55d0d213ee8ab87ac1d4e1333e758b6b9db93bb9 Mon Sep 17 00:00:00 2001 From: Andrew Palumbo Date: Sun, 1 May 2016 00:37:45 -0400 Subject: [PATCH 8/8] fix up to use mplot2d etc --- examples/bin/spark-shell-plot.mscala | 41 ++++++++++--------- .../apache/mahout/visualization/package.scala | 2 +- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/examples/bin/spark-shell-plot.mscala b/examples/bin/spark-shell-plot.mscala index b4c5493194..4efb0c06cc 100644 --- a/examples/bin/spark-shell-plot.mscala +++ b/examples/bin/spark-shell-plot.mscala @@ -16,7 +16,9 @@ */ -import org.apache.mahout.visualization.MPlot2d +// this is the only import needed +import org.apache.mahout.visualization._ +//import org.apache.mahout.visualization.MPlot2d val mxRnd = Matrices.symmetricUniformView(5000000, 2, 1234) val drmRand = drmParallelize(mxRnd) @@ -29,20 +31,20 @@ val drmSin = drmRand.mapBlock() {case (keys, block) => } keys -> blockB } -new MPlot2d(drmRand, samplePercent = .1) -new MPlot2d(drmSin, samplePercent = .1) +mplot2d(drmRand, samplePercent = .1) +mplot2d(drmSin, samplePercent = .1) // 3d scatter -import org.apache.mahout.visualization.MPlot3d +//import org.apache.mahout.visualization.MPlot3d val mxRnd3d = Matrices.symmetricUniformView(50000, 3, 1234) val drmRand3d = drmParallelize(mxRnd3d) val drmGauss = drmRand3d.mapBlock() {case (keys, block) => val blockB = block.like() for (i <- 0 until block.nrow) { - val x:Double = block(i, 0) - val y:Double = block(i, 1) - val z:Double = block(i, 2) + val x: Double = block(i, 0) + val y: Double = block(i, 1) + val z: Double = block(i, 2) blockB(i, 0) = x blockB(i, 1) = y @@ -51,15 +53,15 @@ val drmGauss = drmRand3d.mapBlock() {case (keys, block) => keys -> blockB } -new MPlot3d(drmGauss, samplePercent = 50) +mplot3d(drmGauss, samplePercent = 50) -// 3d Surface needs to be ordered. --notworking -import org.apache.mahout.visualization.MSurf -new MSurf(drmGauss, samplePercent = 10) +// 3d Surface needs to be ordered. --not working correctly as is +//import org.apache.mahout.visualization.MSurf +msurf(drmGauss, samplePercent = 10) -// 3d grid --not working -import org.apache.mahout.visualization.MGrid +// 3d grid --not still not rendering -needs fix +//import org.apache.mahout.visualization.MGrid val mxRnd3d = Matrices.symmetricUniformView(50000, 3, 1234) val drmRand3d = drmParallelize(mxRnd3d) @@ -77,20 +79,21 @@ val drmGauss = drmRand3d.mapBlock() {case (keys, block) => keys -> blockB } -new MGrid(drmGauss, samplePercent = 10) +mgrid(drmGauss, samplePercent = 10) // 2 and 3d histograms of gaussian data -import org.apache.mahout.visualization.MHisto3d -import org.apache.mahout.visualization.MHisto +//import org.apache.mahout.visualization.MHisto3d +//import org.apache.mahout.visualization.MHisto val mxRnd3d = Matrices.gaussianView(50000, 3, 1234) val drmRand3d = drmParallelize(mxRnd3d) // check out frequencies of the first column across 10 bins of original data. -new MHisto(drmRand3d, 20, samplePercent = 50) +mhisto(drmRand3d, 20, samplePercent = 50) -// look at 3d-Histogram -val h3d = new MHisto3d(drmRand3d, 10, samplePercent = 10) +// create a 3d-Histogram +val h3d = mhisto3d(drmRand3d, 10, samplePercent = 10) +// export the canvas to the filesystem h3d.exportPNG("/tmp/histo3d.png") diff --git a/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala b/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala index d2ee6df42a..fcbadeaec6 100644 --- a/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala +++ b/math-scala/src/main/scala/org/apache/mahout/visualization/package.scala @@ -38,8 +38,8 @@ package object visualization { // 3d array to return val array3d: Array[Array[Array[Double]]] = Array.ofDim[Double](m, m, 3) - // roll a set of 3d points in an m x 3 matrix into a m x m x 3 Array. + //TODO: FIX this: for (i <- 0 until m) { for (j <- 0 until m) { for (k <- 0 until 3) {