Skip to content

Commit

Permalink
MAHOUT-2019 SparkRow Matrix Speedup and fixing change to scala 2.11 m…
Browse files Browse the repository at this point in the history
…ade by build script
  • Loading branch information
pferrel committed Nov 18, 2017
1 parent d9b32f3 commit 800a9ed
Showing 1 changed file with 53 additions and 0 deletions.
53 changes: 53 additions & 0 deletions math/src/main/java/org/apache/mahout/math/SparseRowMatrix.java
Expand Up @@ -19,7 +19,12 @@

import org.apache.mahout.math.flavor.MatrixFlavor;
import org.apache.mahout.math.flavor.TraversingStructureEnum;
import org.apache.mahout.math.function.DoubleDoubleFunction;
import org.apache.mahout.math.function.Functions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Iterator;

/**
* sparse matrix with general element values whose rows are accessible quickly. Implemented as a row
Expand All @@ -30,6 +35,8 @@ public class SparseRowMatrix extends AbstractMatrix {

private final boolean randomAccessRows;

private static final Logger log = LoggerFactory.getLogger(SparseRowMatrix.class);

/**
* Construct a sparse matrix starting with the provided row vectors.
*
Expand Down Expand Up @@ -132,6 +139,52 @@ public Matrix viewPart(int[] offset, int[] size) {
return new MatrixView(this, offset, size);
}

@Override
public Matrix assign(Matrix other, DoubleDoubleFunction function) {
int rows = rowSize();
if (rows != other.rowSize()) {
throw new CardinalityException(rows, other.rowSize());
}
int columns = columnSize();
if (columns != other.columnSize()) {
throw new CardinalityException(columns, other.columnSize());
}
for (int row = 0; row < rows; row++) {
try {
Iterator<Vector.Element> sparseRowIterator = ((SequentialAccessSparseVector) this.rowVectors[row])
.iterateNonZero();
if (function.isLikeMult()) { // TODO: is this a sufficient test?
// TODO: this may cause an exception if the row type is not compatible but it is currently guaranteed to be
// a SequentialAccessSparseVector, should "try" here just in case and Warn
// TODO: can we use iterateNonZero on both rows until the index is the same to get better speedup?

// TODO: SASVs have an iterateNonZero that returns zeros, this should not hurt but is far from optimal
// this might perform much better if SparseRowMatrix were backed by RandomAccessSparseVectors, which
// are backed by fastutil hashmaps and the iterateNonZero actually does only return nonZeros.
while (sparseRowIterator.hasNext()) {
Vector.Element element = sparseRowIterator.next();
int col = element.index();
setQuick(row, col, function.apply(element.get(), other.getQuick(row, col)));
}
} else {
for (int col = 0; col < columns; col++) {
setQuick(row, col, function.apply(getQuick(row, col), other.getQuick(row, col)));
}
}

} catch (ClassCastException e) {
// Warn and use default implementation
log.warn("Error casting the row to SequentialAccessSparseVector, this should never happen because" +
"SparseRomMatrix is always made of SequentialAccessSparseVectors. Proceeding with non-optimzed" +
"implementation.");
for (int col = 0; col < columns; col++) {
setQuick(row, col, function.apply(getQuick(row, col), other.getQuick(row, col)));
}
}
}
return this;
}

@Override
public Matrix assignColumn(int column, Vector other) {
if (rowSize() != other.size()) {
Expand Down

0 comments on commit 800a9ed

Please sign in to comment.